diff options
author | Steve Reinhardt <stever@eecs.umich.edu> | 2006-05-22 14:29:33 -0400 |
---|---|---|
committer | Steve Reinhardt <stever@eecs.umich.edu> | 2006-05-22 14:29:33 -0400 |
commit | ba2eae5d528487900d1510fc0a160e660f2c394c (patch) | |
tree | a2c6dd5948f6ff353763cb3f83ddd734077e646e /ext/ply | |
parent | 86777c9db174c74be49667bce3dda99f8ba23696 (diff) | |
download | gem5-ba2eae5d528487900d1510fc0a160e660f2c394c.tar.xz |
New directory structure:
- simulator source now in 'src' subdirectory
- imported files from 'ext' repository
- support building in arbitrary places, including
outside of the source tree. See comment at top
of SConstruct file for more details.
Regression tests are temporarily disabled; that
syetem needs more extensive revisions.
SConstruct:
Update for new directory structure.
Modify to support build trees that are not subdirectories
of the source tree. See comment at top of file for
more details.
Regression tests are temporarily disabled.
src/arch/SConscript:
src/arch/isa_parser.py:
src/python/SConscript:
Update for new directory structure.
--HG--
rename : build/SConstruct => SConstruct
rename : build/default_options/ALPHA_FS => build_opts/ALPHA_FS
rename : build/default_options/ALPHA_FS_TL => build_opts/ALPHA_FS_TL
rename : build/default_options/ALPHA_SE => build_opts/ALPHA_SE
rename : build/default_options/MIPS_SE => build_opts/MIPS_SE
rename : build/default_options/SPARC_SE => build_opts/SPARC_SE
rename : Doxyfile => src/Doxyfile
rename : SConscript => src/SConscript
rename : arch/SConscript => src/arch/SConscript
rename : arch/alpha/SConscript => src/arch/alpha/SConscript
rename : arch/alpha/aout_machdep.h => src/arch/alpha/aout_machdep.h
rename : arch/alpha/arguments.cc => src/arch/alpha/arguments.cc
rename : arch/alpha/arguments.hh => src/arch/alpha/arguments.hh
rename : arch/alpha/ecoff_machdep.h => src/arch/alpha/ecoff_machdep.h
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/ev5.hh => src/arch/alpha/ev5.hh
rename : arch/alpha/faults.cc => src/arch/alpha/faults.cc
rename : arch/alpha/faults.hh => src/arch/alpha/faults.hh
rename : arch/alpha/freebsd/system.cc => src/arch/alpha/freebsd/system.cc
rename : arch/alpha/freebsd/system.hh => src/arch/alpha/freebsd/system.hh
rename : arch/alpha/isa/branch.isa => src/arch/alpha/isa/branch.isa
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/fp.isa => src/arch/alpha/isa/fp.isa
rename : arch/alpha/isa/int.isa => src/arch/alpha/isa/int.isa
rename : arch/alpha/isa/main.isa => src/arch/alpha/isa/main.isa
rename : arch/alpha/isa/mem.isa => src/arch/alpha/isa/mem.isa
rename : arch/alpha/isa/opcdec.isa => src/arch/alpha/isa/opcdec.isa
rename : arch/alpha/isa/pal.isa => src/arch/alpha/isa/pal.isa
rename : arch/alpha/isa/unimp.isa => src/arch/alpha/isa/unimp.isa
rename : arch/alpha/isa/unknown.isa => src/arch/alpha/isa/unknown.isa
rename : arch/alpha/isa/util.isa => src/arch/alpha/isa/util.isa
rename : arch/alpha/isa_traits.hh => src/arch/alpha/isa_traits.hh
rename : arch/alpha/linux/aligned.hh => src/arch/alpha/linux/aligned.hh
rename : arch/alpha/linux/hwrpb.hh => src/arch/alpha/linux/hwrpb.hh
rename : arch/alpha/linux/linux.cc => src/arch/alpha/linux/linux.cc
rename : arch/alpha/linux/linux.hh => src/arch/alpha/linux/linux.hh
rename : arch/alpha/linux/process.cc => src/arch/alpha/linux/process.cc
rename : arch/alpha/linux/process.hh => src/arch/alpha/linux/process.hh
rename : arch/alpha/linux/system.cc => src/arch/alpha/linux/system.cc
rename : arch/alpha/linux/system.hh => src/arch/alpha/linux/system.hh
rename : arch/alpha/linux/thread_info.hh => src/arch/alpha/linux/thread_info.hh
rename : arch/alpha/linux/threadinfo.hh => src/arch/alpha/linux/threadinfo.hh
rename : arch/alpha/osfpal.cc => src/arch/alpha/osfpal.cc
rename : arch/alpha/osfpal.hh => src/arch/alpha/osfpal.hh
rename : arch/alpha/process.cc => src/arch/alpha/process.cc
rename : arch/alpha/process.hh => src/arch/alpha/process.hh
rename : arch/alpha/regfile.hh => src/arch/alpha/regfile.hh
rename : arch/alpha/stacktrace.cc => src/arch/alpha/stacktrace.cc
rename : arch/alpha/stacktrace.hh => src/arch/alpha/stacktrace.hh
rename : arch/alpha/system.cc => src/arch/alpha/system.cc
rename : arch/alpha/system.hh => src/arch/alpha/system.hh
rename : arch/alpha/tlb.cc => src/arch/alpha/tlb.cc
rename : arch/alpha/tlb.hh => src/arch/alpha/tlb.hh
rename : arch/alpha/tru64/process.cc => src/arch/alpha/tru64/process.cc
rename : arch/alpha/tru64/process.hh => src/arch/alpha/tru64/process.hh
rename : arch/alpha/tru64/system.cc => src/arch/alpha/tru64/system.cc
rename : arch/alpha/tru64/system.hh => src/arch/alpha/tru64/system.hh
rename : arch/alpha/tru64/tru64.cc => src/arch/alpha/tru64/tru64.cc
rename : arch/alpha/tru64/tru64.hh => src/arch/alpha/tru64/tru64.hh
rename : arch/alpha/types.hh => src/arch/alpha/types.hh
rename : arch/alpha/utility.hh => src/arch/alpha/utility.hh
rename : arch/alpha/vtophys.cc => src/arch/alpha/vtophys.cc
rename : arch/alpha/vtophys.hh => src/arch/alpha/vtophys.hh
rename : arch/isa_parser.py => src/arch/isa_parser.py
rename : arch/isa_specific.hh => src/arch/isa_specific.hh
rename : arch/mips/SConscript => src/arch/mips/SConscript
rename : arch/mips/faults.cc => src/arch/mips/faults.cc
rename : arch/mips/faults.hh => src/arch/mips/faults.hh
rename : arch/mips/isa/base.isa => src/arch/mips/isa/base.isa
rename : arch/mips/isa/bitfields.isa => src/arch/mips/isa/bitfields.isa
rename : arch/mips/isa/decoder.isa => src/arch/mips/isa/decoder.isa
rename : arch/mips/isa/formats/basic.isa => src/arch/mips/isa/formats/basic.isa
rename : arch/mips/isa/formats/branch.isa => src/arch/mips/isa/formats/branch.isa
rename : arch/mips/isa/formats/formats.isa => src/arch/mips/isa/formats/formats.isa
rename : arch/mips/isa/formats/fp.isa => src/arch/mips/isa/formats/fp.isa
rename : arch/mips/isa/formats/int.isa => src/arch/mips/isa/formats/int.isa
rename : arch/mips/isa/formats/mem.isa => src/arch/mips/isa/formats/mem.isa
rename : arch/mips/isa/formats/noop.isa => src/arch/mips/isa/formats/noop.isa
rename : arch/mips/isa/formats/tlbop.isa => src/arch/mips/isa/formats/tlbop.isa
rename : arch/mips/isa/formats/trap.isa => src/arch/mips/isa/formats/trap.isa
rename : arch/mips/isa/formats/unimp.isa => src/arch/mips/isa/formats/unimp.isa
rename : arch/mips/isa/formats/unknown.isa => src/arch/mips/isa/formats/unknown.isa
rename : arch/mips/isa/formats/util.isa => src/arch/mips/isa/formats/util.isa
rename : arch/mips/isa/includes.isa => src/arch/mips/isa/includes.isa
rename : arch/mips/isa/main.isa => src/arch/mips/isa/main.isa
rename : arch/mips/isa/operands.isa => src/arch/mips/isa/operands.isa
rename : arch/mips/isa_traits.cc => src/arch/mips/isa_traits.cc
rename : arch/mips/isa_traits.hh => src/arch/mips/isa_traits.hh
rename : arch/mips/linux/linux.cc => src/arch/mips/linux/linux.cc
rename : arch/mips/linux/linux.hh => src/arch/mips/linux/linux.hh
rename : arch/mips/linux/process.cc => src/arch/mips/linux/process.cc
rename : arch/mips/linux/process.hh => src/arch/mips/linux/process.hh
rename : arch/mips/process.cc => src/arch/mips/process.cc
rename : arch/mips/process.hh => src/arch/mips/process.hh
rename : arch/mips/regfile/float_regfile.hh => src/arch/mips/regfile/float_regfile.hh
rename : arch/mips/regfile/int_regfile.hh => src/arch/mips/regfile/int_regfile.hh
rename : arch/mips/regfile/misc_regfile.hh => src/arch/mips/regfile/misc_regfile.hh
rename : arch/mips/regfile/regfile.hh => src/arch/mips/regfile/regfile.hh
rename : arch/mips/stacktrace.hh => src/arch/mips/stacktrace.hh
rename : arch/mips/types.hh => src/arch/mips/types.hh
rename : arch/mips/utility.hh => src/arch/mips/utility.hh
rename : arch/sparc/SConscript => src/arch/sparc/SConscript
rename : arch/sparc/faults.cc => src/arch/sparc/faults.cc
rename : arch/sparc/faults.hh => src/arch/sparc/faults.hh
rename : arch/sparc/isa/base.isa => src/arch/sparc/isa/base.isa
rename : arch/sparc/isa/bitfields.isa => src/arch/sparc/isa/bitfields.isa
rename : arch/sparc/isa/decoder.isa => src/arch/sparc/isa/decoder.isa
rename : arch/sparc/isa/formats.isa => src/arch/sparc/isa/formats.isa
rename : arch/sparc/isa/formats/basic.isa => src/arch/sparc/isa/formats/basic.isa
rename : arch/sparc/isa/formats/branch.isa => src/arch/sparc/isa/formats/branch.isa
rename : arch/sparc/isa/formats/integerop.isa => src/arch/sparc/isa/formats/integerop.isa
rename : arch/sparc/isa/formats/mem.isa => src/arch/sparc/isa/formats/mem.isa
rename : arch/sparc/isa/formats/nop.isa => src/arch/sparc/isa/formats/nop.isa
rename : arch/sparc/isa/formats/priv.isa => src/arch/sparc/isa/formats/priv.isa
rename : arch/sparc/isa/formats/trap.isa => src/arch/sparc/isa/formats/trap.isa
rename : arch/sparc/isa/formats/unknown.isa => src/arch/sparc/isa/formats/unknown.isa
rename : arch/sparc/isa/includes.isa => src/arch/sparc/isa/includes.isa
rename : arch/sparc/isa/main.isa => src/arch/sparc/isa/main.isa
rename : arch/sparc/isa/operands.isa => src/arch/sparc/isa/operands.isa
rename : arch/sparc/isa_traits.hh => src/arch/sparc/isa_traits.hh
rename : arch/sparc/linux/linux.cc => src/arch/sparc/linux/linux.cc
rename : arch/sparc/linux/linux.hh => src/arch/sparc/linux/linux.hh
rename : arch/sparc/linux/process.cc => src/arch/sparc/linux/process.cc
rename : arch/sparc/linux/process.hh => src/arch/sparc/linux/process.hh
rename : arch/sparc/process.cc => src/arch/sparc/process.cc
rename : arch/sparc/process.hh => src/arch/sparc/process.hh
rename : arch/sparc/regfile.hh => src/arch/sparc/regfile.hh
rename : arch/sparc/solaris/process.cc => src/arch/sparc/solaris/process.cc
rename : arch/sparc/solaris/process.hh => src/arch/sparc/solaris/process.hh
rename : arch/sparc/solaris/solaris.cc => src/arch/sparc/solaris/solaris.cc
rename : arch/sparc/solaris/solaris.hh => src/arch/sparc/solaris/solaris.hh
rename : arch/sparc/stacktrace.hh => src/arch/sparc/stacktrace.hh
rename : arch/sparc/system.cc => src/arch/sparc/system.cc
rename : arch/sparc/system.hh => src/arch/sparc/system.hh
rename : arch/sparc/utility.hh => src/arch/sparc/utility.hh
rename : base/bitfield.hh => src/base/bitfield.hh
rename : base/callback.hh => src/base/callback.hh
rename : base/chunk_generator.hh => src/base/chunk_generator.hh
rename : base/circlebuf.cc => src/base/circlebuf.cc
rename : base/circlebuf.hh => src/base/circlebuf.hh
rename : base/compression/lzss_compression.cc => src/base/compression/lzss_compression.cc
rename : base/compression/lzss_compression.hh => src/base/compression/lzss_compression.hh
rename : base/compression/null_compression.hh => src/base/compression/null_compression.hh
rename : base/cprintf.cc => src/base/cprintf.cc
rename : base/cprintf.hh => src/base/cprintf.hh
rename : base/cprintf_formats.hh => src/base/cprintf_formats.hh
rename : base/crc.cc => src/base/crc.cc
rename : base/crc.hh => src/base/crc.hh
rename : base/date.cc => src/base/date.cc
rename : base/dbl_list.hh => src/base/dbl_list.hh
rename : base/endian.hh => src/base/endian.hh
rename : base/fast_alloc.cc => src/base/fast_alloc.cc
rename : base/fast_alloc.hh => src/base/fast_alloc.hh
rename : base/fenv.hh => src/base/fenv.hh
rename : base/fifo_buffer.cc => src/base/fifo_buffer.cc
rename : base/fifo_buffer.hh => src/base/fifo_buffer.hh
rename : base/hashmap.hh => src/base/hashmap.hh
rename : base/hostinfo.cc => src/base/hostinfo.cc
rename : base/hostinfo.hh => src/base/hostinfo.hh
rename : base/hybrid_pred.cc => src/base/hybrid_pred.cc
rename : base/hybrid_pred.hh => src/base/hybrid_pred.hh
rename : base/inet.cc => src/base/inet.cc
rename : base/inet.hh => src/base/inet.hh
rename : base/inifile.cc => src/base/inifile.cc
rename : base/inifile.hh => src/base/inifile.hh
rename : base/intmath.cc => src/base/intmath.cc
rename : base/intmath.hh => src/base/intmath.hh
rename : base/kgdb.h => src/base/kgdb.h
rename : base/loader/aout_object.cc => src/base/loader/aout_object.cc
rename : base/loader/aout_object.hh => src/base/loader/aout_object.hh
rename : base/loader/coff_sym.h => src/base/loader/coff_sym.h
rename : base/loader/coff_symconst.h => src/base/loader/coff_symconst.h
rename : base/loader/ecoff_object.cc => src/base/loader/ecoff_object.cc
rename : base/loader/ecoff_object.hh => src/base/loader/ecoff_object.hh
rename : base/loader/elf_object.cc => src/base/loader/elf_object.cc
rename : base/loader/elf_object.hh => src/base/loader/elf_object.hh
rename : base/loader/exec_aout.h => src/base/loader/exec_aout.h
rename : base/loader/exec_ecoff.h => src/base/loader/exec_ecoff.h
rename : base/loader/object_file.cc => src/base/loader/object_file.cc
rename : base/loader/object_file.hh => src/base/loader/object_file.hh
rename : base/loader/symtab.cc => src/base/loader/symtab.cc
rename : base/loader/symtab.hh => src/base/loader/symtab.hh
rename : base/match.cc => src/base/match.cc
rename : base/match.hh => src/base/match.hh
rename : base/misc.cc => src/base/misc.cc
rename : base/misc.hh => src/base/misc.hh
rename : base/mod_num.hh => src/base/mod_num.hh
rename : base/mysql.cc => src/base/mysql.cc
rename : base/mysql.hh => src/base/mysql.hh
rename : base/output.cc => src/base/output.cc
rename : base/output.hh => src/base/output.hh
rename : base/pollevent.cc => src/base/pollevent.cc
rename : base/pollevent.hh => src/base/pollevent.hh
rename : base/predictor.hh => src/base/predictor.hh
rename : base/random.cc => src/base/random.cc
rename : base/random.hh => src/base/random.hh
rename : base/range.cc => src/base/range.cc
rename : base/range.hh => src/base/range.hh
rename : base/refcnt.hh => src/base/refcnt.hh
rename : base/remote_gdb.cc => src/base/remote_gdb.cc
rename : base/remote_gdb.hh => src/base/remote_gdb.hh
rename : base/res_list.hh => src/base/res_list.hh
rename : base/sat_counter.cc => src/base/sat_counter.cc
rename : base/sat_counter.hh => src/base/sat_counter.hh
rename : base/sched_list.hh => src/base/sched_list.hh
rename : base/socket.cc => src/base/socket.cc
rename : base/socket.hh => src/base/socket.hh
rename : base/statistics.cc => src/base/statistics.cc
rename : base/statistics.hh => src/base/statistics.hh
rename : base/stats/events.cc => src/base/stats/events.cc
rename : base/stats/events.hh => src/base/stats/events.hh
rename : base/stats/flags.hh => src/base/stats/flags.hh
rename : base/stats/mysql.cc => src/base/stats/mysql.cc
rename : base/stats/mysql.hh => src/base/stats/mysql.hh
rename : base/stats/mysql_run.hh => src/base/stats/mysql_run.hh
rename : base/stats/output.hh => src/base/stats/output.hh
rename : base/stats/statdb.cc => src/base/stats/statdb.cc
rename : base/stats/statdb.hh => src/base/stats/statdb.hh
rename : base/stats/text.cc => src/base/stats/text.cc
rename : base/stats/text.hh => src/base/stats/text.hh
rename : base/stats/types.hh => src/base/stats/types.hh
rename : base/stats/visit.cc => src/base/stats/visit.cc
rename : base/stats/visit.hh => src/base/stats/visit.hh
rename : base/str.cc => src/base/str.cc
rename : base/str.hh => src/base/str.hh
rename : base/time.cc => src/base/time.cc
rename : base/time.hh => src/base/time.hh
rename : base/timebuf.hh => src/base/timebuf.hh
rename : base/trace.cc => src/base/trace.cc
rename : base/trace.hh => src/base/trace.hh
rename : base/traceflags.py => src/base/traceflags.py
rename : base/userinfo.cc => src/base/userinfo.cc
rename : base/userinfo.hh => src/base/userinfo.hh
rename : cpu/SConscript => src/cpu/SConscript
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.cc => src/cpu/base_dyn_inst.cc
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/cpu_exec_context.cc => src/cpu/cpu_exec_context.cc
rename : cpu/cpu_exec_context.hh => src/cpu/cpu_exec_context.hh
rename : cpu/cpu_models.py => src/cpu/cpu_models.py
rename : cpu/exec_context.hh => src/cpu/exec_context.hh
rename : cpu/exetrace.cc => src/cpu/exetrace.cc
rename : cpu/exetrace.hh => src/cpu/exetrace.hh
rename : cpu/inst_seq.hh => src/cpu/inst_seq.hh
rename : cpu/intr_control.cc => src/cpu/intr_control.cc
rename : cpu/intr_control.hh => src/cpu/intr_control.hh
rename : cpu/memtest/memtest.cc => src/cpu/memtest/memtest.cc
rename : cpu/memtest/memtest.hh => src/cpu/memtest/memtest.hh
rename : cpu/o3/2bit_local_pred.cc => src/cpu/o3/2bit_local_pred.cc
rename : cpu/o3/2bit_local_pred.hh => src/cpu/o3/2bit_local_pred.hh
rename : cpu/o3/alpha_cpu.cc => src/cpu/o3/alpha_cpu.cc
rename : cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha_cpu.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha_cpu_builder.cc
rename : cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha_cpu_impl.hh
rename : cpu/o3/alpha_dyn_inst.cc => src/cpu/o3/alpha_dyn_inst.cc
rename : cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha_dyn_inst.hh
rename : cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha_dyn_inst_impl.hh
rename : cpu/o3/alpha_impl.hh => src/cpu/o3/alpha_impl.hh
rename : cpu/o3/alpha_params.hh => src/cpu/o3/alpha_params.hh
rename : cpu/o3/bpred_unit.cc => src/cpu/o3/bpred_unit.cc
rename : cpu/o3/bpred_unit.hh => src/cpu/o3/bpred_unit.hh
rename : cpu/o3/bpred_unit_impl.hh => src/cpu/o3/bpred_unit_impl.hh
rename : cpu/o3/btb.cc => src/cpu/o3/btb.cc
rename : cpu/o3/btb.hh => src/cpu/o3/btb.hh
rename : cpu/o3/comm.hh => src/cpu/o3/comm.hh
rename : cpu/o3/commit.cc => src/cpu/o3/commit.cc
rename : cpu/o3/commit.hh => src/cpu/o3/commit.hh
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/cpu.hh => src/cpu/o3/cpu.hh
rename : cpu/o3/cpu_policy.hh => src/cpu/o3/cpu_policy.hh
rename : cpu/o3/decode.cc => src/cpu/o3/decode.cc
rename : cpu/o3/decode.hh => src/cpu/o3/decode.hh
rename : cpu/o3/decode_impl.hh => src/cpu/o3/decode_impl.hh
rename : cpu/o3/fetch.cc => src/cpu/o3/fetch.cc
rename : cpu/o3/fetch.hh => src/cpu/o3/fetch.hh
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/free_list.cc => src/cpu/o3/free_list.cc
rename : cpu/o3/free_list.hh => src/cpu/o3/free_list.hh
rename : cpu/o3/iew.cc => src/cpu/o3/iew.cc
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.cc => src/cpu/o3/inst_queue.cc
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/mem_dep_unit.cc => src/cpu/o3/mem_dep_unit.cc
rename : cpu/o3/mem_dep_unit.hh => src/cpu/o3/mem_dep_unit.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/ras.cc => src/cpu/o3/ras.cc
rename : cpu/o3/ras.hh => src/cpu/o3/ras.hh
rename : cpu/o3/regfile.hh => src/cpu/o3/regfile.hh
rename : cpu/o3/rename.cc => src/cpu/o3/rename.cc
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/rename_map.cc => src/cpu/o3/rename_map.cc
rename : cpu/o3/rename_map.hh => src/cpu/o3/rename_map.hh
rename : cpu/o3/rob.cc => src/cpu/o3/rob.cc
rename : cpu/o3/rob.hh => src/cpu/o3/rob.hh
rename : cpu/o3/rob_impl.hh => src/cpu/o3/rob_impl.hh
rename : cpu/o3/sat_counter.cc => src/cpu/o3/sat_counter.cc
rename : cpu/o3/sat_counter.hh => src/cpu/o3/sat_counter.hh
rename : cpu/o3/store_set.cc => src/cpu/o3/store_set.cc
rename : cpu/o3/store_set.hh => src/cpu/o3/store_set.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/op_class.cc => src/cpu/op_class.cc
rename : cpu/op_class.hh => src/cpu/op_class.hh
rename : cpu/ozone/cpu.cc => src/cpu/ozone/cpu.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/ozone/ea_list.cc => src/cpu/ozone/ea_list.cc
rename : cpu/ozone/ea_list.hh => src/cpu/ozone/ea_list.hh
rename : cpu/pc_event.cc => src/cpu/pc_event.cc
rename : cpu/pc_event.hh => src/cpu/pc_event.hh
rename : cpu/profile.cc => src/cpu/profile.cc
rename : cpu/profile.hh => src/cpu/profile.hh
rename : cpu/simple/atomic.cc => src/cpu/simple/atomic.cc
rename : cpu/simple/atomic.hh => src/cpu/simple/atomic.hh
rename : cpu/simple/base.cc => src/cpu/simple/base.cc
rename : cpu/simple/base.hh => src/cpu/simple/base.hh
rename : cpu/simple/timing.cc => src/cpu/simple/timing.cc
rename : cpu/simple/timing.hh => src/cpu/simple/timing.hh
rename : cpu/smt.hh => src/cpu/smt.hh
rename : cpu/static_inst.cc => src/cpu/static_inst.cc
rename : cpu/static_inst.hh => src/cpu/static_inst.hh
rename : cpu/trace/opt_cpu.cc => src/cpu/trace/opt_cpu.cc
rename : cpu/trace/opt_cpu.hh => src/cpu/trace/opt_cpu.hh
rename : cpu/trace/reader/ibm_reader.cc => src/cpu/trace/reader/ibm_reader.cc
rename : cpu/trace/reader/ibm_reader.hh => src/cpu/trace/reader/ibm_reader.hh
rename : cpu/trace/reader/itx_reader.cc => src/cpu/trace/reader/itx_reader.cc
rename : cpu/trace/reader/itx_reader.hh => src/cpu/trace/reader/itx_reader.hh
rename : cpu/trace/reader/m5_reader.cc => src/cpu/trace/reader/m5_reader.cc
rename : cpu/trace/reader/m5_reader.hh => src/cpu/trace/reader/m5_reader.hh
rename : cpu/trace/reader/mem_trace_reader.cc => src/cpu/trace/reader/mem_trace_reader.cc
rename : cpu/trace/reader/mem_trace_reader.hh => src/cpu/trace/reader/mem_trace_reader.hh
rename : cpu/trace/trace_cpu.cc => src/cpu/trace/trace_cpu.cc
rename : cpu/trace/trace_cpu.hh => src/cpu/trace/trace_cpu.hh
rename : dev/alpha_access.h => src/dev/alpha_access.h
rename : dev/alpha_console.cc => src/dev/alpha_console.cc
rename : dev/alpha_console.hh => src/dev/alpha_console.hh
rename : dev/baddev.cc => src/dev/baddev.cc
rename : dev/baddev.hh => src/dev/baddev.hh
rename : dev/disk_image.cc => src/dev/disk_image.cc
rename : dev/disk_image.hh => src/dev/disk_image.hh
rename : dev/etherbus.cc => src/dev/etherbus.cc
rename : dev/etherbus.hh => src/dev/etherbus.hh
rename : dev/etherdump.cc => src/dev/etherdump.cc
rename : dev/etherdump.hh => src/dev/etherdump.hh
rename : dev/etherint.cc => src/dev/etherint.cc
rename : dev/etherint.hh => src/dev/etherint.hh
rename : dev/etherlink.cc => src/dev/etherlink.cc
rename : dev/etherlink.hh => src/dev/etherlink.hh
rename : dev/etherpkt.cc => src/dev/etherpkt.cc
rename : dev/etherpkt.hh => src/dev/etherpkt.hh
rename : dev/ethertap.cc => src/dev/ethertap.cc
rename : dev/ethertap.hh => src/dev/ethertap.hh
rename : dev/ide_atareg.h => src/dev/ide_atareg.h
rename : dev/ide_ctrl.cc => src/dev/ide_ctrl.cc
rename : dev/ide_ctrl.hh => src/dev/ide_ctrl.hh
rename : dev/ide_disk.cc => src/dev/ide_disk.cc
rename : dev/ide_disk.hh => src/dev/ide_disk.hh
rename : dev/ide_wdcreg.h => src/dev/ide_wdcreg.h
rename : dev/io_device.cc => src/dev/io_device.cc
rename : dev/io_device.hh => src/dev/io_device.hh
rename : dev/isa_fake.cc => src/dev/isa_fake.cc
rename : dev/isa_fake.hh => src/dev/isa_fake.hh
rename : dev/ns_gige.cc => src/dev/ns_gige.cc
rename : dev/ns_gige.hh => src/dev/ns_gige.hh
rename : dev/ns_gige_reg.h => src/dev/ns_gige_reg.h
rename : dev/pciconfigall.cc => src/dev/pciconfigall.cc
rename : dev/pciconfigall.hh => src/dev/pciconfigall.hh
rename : dev/pcidev.cc => src/dev/pcidev.cc
rename : dev/pcidev.hh => src/dev/pcidev.hh
rename : dev/pcireg.h => src/dev/pcireg.h
rename : dev/pitreg.h => src/dev/pitreg.h
rename : dev/pktfifo.cc => src/dev/pktfifo.cc
rename : dev/pktfifo.hh => src/dev/pktfifo.hh
rename : dev/platform.cc => src/dev/platform.cc
rename : dev/platform.hh => src/dev/platform.hh
rename : dev/rtcreg.h => src/dev/rtcreg.h
rename : dev/simconsole.cc => src/dev/simconsole.cc
rename : dev/simconsole.hh => src/dev/simconsole.hh
rename : dev/simple_disk.cc => src/dev/simple_disk.cc
rename : dev/simple_disk.hh => src/dev/simple_disk.hh
rename : dev/sinic.cc => src/dev/sinic.cc
rename : dev/sinic.hh => src/dev/sinic.hh
rename : dev/sinicreg.hh => src/dev/sinicreg.hh
rename : dev/tsunami.cc => src/dev/tsunami.cc
rename : dev/tsunami.hh => src/dev/tsunami.hh
rename : dev/tsunami_cchip.cc => src/dev/tsunami_cchip.cc
rename : dev/tsunami_cchip.hh => src/dev/tsunami_cchip.hh
rename : dev/tsunami_io.cc => src/dev/tsunami_io.cc
rename : dev/tsunami_io.hh => src/dev/tsunami_io.hh
rename : dev/tsunami_pchip.cc => src/dev/tsunami_pchip.cc
rename : dev/tsunami_pchip.hh => src/dev/tsunami_pchip.hh
rename : dev/tsunamireg.h => src/dev/tsunamireg.h
rename : dev/uart.cc => src/dev/uart.cc
rename : dev/uart.hh => src/dev/uart.hh
rename : dev/uart8250.cc => src/dev/uart8250.cc
rename : dev/uart8250.hh => src/dev/uart8250.hh
rename : kern/kernel_stats.cc => src/kern/kernel_stats.cc
rename : kern/kernel_stats.hh => src/kern/kernel_stats.hh
rename : kern/linux/events.cc => src/kern/linux/events.cc
rename : kern/linux/events.hh => src/kern/linux/events.hh
rename : kern/linux/linux.hh => src/kern/linux/linux.hh
rename : kern/linux/linux_syscalls.cc => src/kern/linux/linux_syscalls.cc
rename : kern/linux/linux_syscalls.hh => src/kern/linux/linux_syscalls.hh
rename : kern/linux/printk.cc => src/kern/linux/printk.cc
rename : kern/linux/printk.hh => src/kern/linux/printk.hh
rename : kern/linux/sched.hh => src/kern/linux/sched.hh
rename : kern/solaris/solaris.hh => src/kern/solaris/solaris.hh
rename : kern/system_events.cc => src/kern/system_events.cc
rename : kern/system_events.hh => src/kern/system_events.hh
rename : kern/tru64/dump_mbuf.cc => src/kern/tru64/dump_mbuf.cc
rename : kern/tru64/dump_mbuf.hh => src/kern/tru64/dump_mbuf.hh
rename : kern/tru64/mbuf.hh => src/kern/tru64/mbuf.hh
rename : kern/tru64/printf.cc => src/kern/tru64/printf.cc
rename : kern/tru64/printf.hh => src/kern/tru64/printf.hh
rename : kern/tru64/tru64.hh => src/kern/tru64/tru64.hh
rename : kern/tru64/tru64_events.cc => src/kern/tru64/tru64_events.cc
rename : kern/tru64/tru64_events.hh => src/kern/tru64/tru64_events.hh
rename : kern/tru64/tru64_syscalls.cc => src/kern/tru64/tru64_syscalls.cc
rename : kern/tru64/tru64_syscalls.hh => src/kern/tru64/tru64_syscalls.hh
rename : mem/bridge.cc => src/mem/bridge.cc
rename : mem/bridge.hh => src/mem/bridge.hh
rename : mem/bus.cc => src/mem/bus.cc
rename : mem/bus.hh => src/mem/bus.hh
rename : mem/cache/prefetch/tagged_prefetcher_impl.hh => src/mem/cache/prefetch/tagged_prefetcher_impl.hh
rename : mem/config/prefetch.hh => src/mem/config/prefetch.hh
rename : mem/mem_object.cc => src/mem/mem_object.cc
rename : mem/mem_object.hh => src/mem/mem_object.hh
rename : mem/packet.cc => src/mem/packet.cc
rename : mem/packet.hh => src/mem/packet.hh
rename : mem/page_table.cc => src/mem/page_table.cc
rename : mem/page_table.hh => src/mem/page_table.hh
rename : mem/physical.cc => src/mem/physical.cc
rename : mem/physical.hh => src/mem/physical.hh
rename : mem/port.cc => src/mem/port.cc
rename : mem/port.hh => src/mem/port.hh
rename : mem/request.hh => src/mem/request.hh
rename : mem/translating_port.cc => src/mem/translating_port.cc
rename : mem/translating_port.hh => src/mem/translating_port.hh
rename : mem/vport.cc => src/mem/vport.cc
rename : mem/vport.hh => src/mem/vport.hh
rename : python/SConscript => src/python/SConscript
rename : python/m5/__init__.py => src/python/m5/__init__.py
rename : python/m5/config.py => src/python/m5/config.py
rename : python/m5/convert.py => src/python/m5/convert.py
rename : python/m5/multidict.py => src/python/m5/multidict.py
rename : python/m5/objects/AlphaConsole.py => src/python/m5/objects/AlphaConsole.py
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/AlphaFullCPU.py
rename : python/m5/objects/AlphaTLB.py => src/python/m5/objects/AlphaTLB.py
rename : python/m5/objects/BadDevice.py => src/python/m5/objects/BadDevice.py
rename : python/m5/objects/BaseCPU.py => src/python/m5/objects/BaseCPU.py
rename : python/m5/objects/BaseCache.py => src/python/m5/objects/BaseCache.py
rename : python/m5/objects/Bridge.py => src/python/m5/objects/Bridge.py
rename : python/m5/objects/Bus.py => src/python/m5/objects/Bus.py
rename : python/m5/objects/CoherenceProtocol.py => src/python/m5/objects/CoherenceProtocol.py
rename : python/m5/objects/Device.py => src/python/m5/objects/Device.py
rename : python/m5/objects/DiskImage.py => src/python/m5/objects/DiskImage.py
rename : python/m5/objects/Ethernet.py => src/python/m5/objects/Ethernet.py
rename : python/m5/objects/Ide.py => src/python/m5/objects/Ide.py
rename : python/m5/objects/IntrControl.py => src/python/m5/objects/IntrControl.py
rename : python/m5/objects/MemObject.py => src/python/m5/objects/MemObject.py
rename : python/m5/objects/MemTest.py => src/python/m5/objects/MemTest.py
rename : python/m5/objects/Pci.py => src/python/m5/objects/Pci.py
rename : python/m5/objects/PhysicalMemory.py => src/python/m5/objects/PhysicalMemory.py
rename : python/m5/objects/Platform.py => src/python/m5/objects/Platform.py
rename : python/m5/objects/Process.py => src/python/m5/objects/Process.py
rename : python/m5/objects/Repl.py => src/python/m5/objects/Repl.py
rename : python/m5/objects/Root.py => src/python/m5/objects/Root.py
rename : python/m5/objects/SimConsole.py => src/python/m5/objects/SimConsole.py
rename : python/m5/objects/SimpleDisk.py => src/python/m5/objects/SimpleDisk.py
rename : python/m5/objects/System.py => src/python/m5/objects/System.py
rename : python/m5/objects/Tsunami.py => src/python/m5/objects/Tsunami.py
rename : python/m5/objects/Uart.py => src/python/m5/objects/Uart.py
rename : python/m5/smartdict.py => src/python/m5/smartdict.py
rename : sim/async.hh => src/sim/async.hh
rename : sim/builder.cc => src/sim/builder.cc
rename : sim/builder.hh => src/sim/builder.hh
rename : sim/byteswap.hh => src/sim/byteswap.hh
rename : sim/debug.cc => src/sim/debug.cc
rename : sim/debug.hh => src/sim/debug.hh
rename : sim/eventq.cc => src/sim/eventq.cc
rename : sim/eventq.hh => src/sim/eventq.hh
rename : sim/faults.cc => src/sim/faults.cc
rename : sim/faults.hh => src/sim/faults.hh
rename : sim/host.hh => src/sim/host.hh
rename : sim/main.cc => src/sim/main.cc
rename : sim/param.cc => src/sim/param.cc
rename : sim/param.hh => src/sim/param.hh
rename : sim/process.cc => src/sim/process.cc
rename : sim/process.hh => src/sim/process.hh
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
rename : sim/pseudo_inst.hh => src/sim/pseudo_inst.hh
rename : sim/root.cc => src/sim/root.cc
rename : sim/serialize.cc => src/sim/serialize.cc
rename : sim/serialize.hh => src/sim/serialize.hh
rename : sim/sim_events.cc => src/sim/sim_events.cc
rename : sim/sim_events.hh => src/sim/sim_events.hh
rename : sim/sim_exit.hh => src/sim/sim_exit.hh
rename : sim/sim_object.cc => src/sim/sim_object.cc
rename : sim/sim_object.hh => src/sim/sim_object.hh
rename : sim/startup.cc => src/sim/startup.cc
rename : sim/startup.hh => src/sim/startup.hh
rename : sim/stat_control.cc => src/sim/stat_control.cc
rename : sim/stat_control.hh => src/sim/stat_control.hh
rename : sim/stats.hh => src/sim/stats.hh
rename : sim/syscall_emul.cc => src/sim/syscall_emul.cc
rename : sim/syscall_emul.hh => src/sim/syscall_emul.hh
rename : sim/system.cc => src/sim/system.cc
rename : sim/system.hh => src/sim/system.hh
rename : sim/vptr.hh => src/sim/vptr.hh
rename : test/Makefile => src/unittest/Makefile
rename : test/bitvectest.cc => src/unittest/bitvectest.cc
rename : test/circletest.cc => src/unittest/circletest.cc
rename : test/cprintftest.cc => src/unittest/cprintftest.cc
rename : test/foo.ini => src/unittest/foo.ini
rename : test/genini.py => src/unittest/genini.py
rename : test/initest.cc => src/unittest/initest.cc
rename : test/initest.ini => src/unittest/initest.ini
rename : test/lru_test.cc => src/unittest/lru_test.cc
rename : test/nmtest.cc => src/unittest/nmtest.cc
rename : test/offtest.cc => src/unittest/offtest.cc
rename : test/paramtest.cc => src/unittest/paramtest.cc
rename : test/rangetest.cc => src/unittest/rangetest.cc
rename : test/sized_test.cc => src/unittest/sized_test.cc
rename : test/stattest.cc => src/unittest/stattest.cc
rename : test/strnumtest.cc => src/unittest/strnumtest.cc
rename : test/symtest.cc => src/unittest/symtest.cc
rename : test/tokentest.cc => src/unittest/tokentest.cc
rename : test/tracetest.cc => src/unittest/tracetest.cc
extra : convert_revision : cab6a5271ca1b368193cd948e5d3dcc47ab1bd48
Diffstat (limited to 'ext/ply')
98 files changed, 8572 insertions, 0 deletions
diff --git a/ext/ply/CHANGES b/ext/ply/CHANGES new file mode 100644 index 000000000..9c7334066 --- /dev/null +++ b/ext/ply/CHANGES @@ -0,0 +1,158 @@ +Version 1.3 +------------------------------ +12/10/02: jmdyck + Various minor adjustments to the code that Dave checked in today. + Updated test/yacc_{inf,unused}.exp to reflect today's changes. + +12/10/02: beazley + Incorporated a variety of minor bug fixes to empty production + handling and infinite recursion checking. Contributed by + Michael Dyck. + +12/10/02: beazley + Removed bogus recover() method call in yacc.restart() + +Version 1.2 +------------------------------ +11/27/02: beazley + Lexer and parser objects are now available as an attribute + of tokens and slices respectively. For example: + + def t_NUMBER(t): + r'\d+' + print t.lexer + + def p_expr_plus(t): + 'expr: expr PLUS expr' + print t.lexer + print t.parser + + This can be used for state management (if needed). + +10/31/02: beazley + Modified yacc.py to work with Python optimize mode. To make + this work, you need to use + + yacc.yacc(optimize=1) + + Furthermore, you need to first run Python in normal mode + to generate the necessary parsetab.py files. After that, + you can use python -O or python -OO. + + Note: optimized mode turns off a lot of error checking. + Only use when you are sure that your grammar is working. + Make sure parsetab.py is up to date! + +10/30/02: beazley + Added cloning of Lexer objects. For example: + + import copy + l = lex.lex() + lc = copy.copy(l) + + l.input("Some text") + lc.input("Some other text") + ... + + This might be useful if the same "lexer" is meant to + be used in different contexts---or if multiple lexers + are running concurrently. + +10/30/02: beazley + Fixed subtle bug with first set computation and empty productions. + Patch submitted by Michael Dyck. + +10/30/02: beazley + Fixed error messages to use "filename:line: message" instead + of "filename:line. message". This makes error reporting more + friendly to emacs. Patch submitted by François Pinard. + +10/30/02: beazley + Improvements to parser.out file. Terminals and nonterminals + are sorted instead of being printed in random order. + Patch submitted by François Pinard. + +10/30/02: beazley + Improvements to parser.out file output. Rules are now printed + in a way that's easier to understand. Contributed by Russ Cox. + +10/30/02: beazley + Added 'nonassoc' associativity support. This can be used + to disable the chaining of operators like a < b < c. + To use, simply specify 'nonassoc' in the precedence table + + precedence = ( + ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator + ) + + Patch contributed by Russ Cox. + +10/30/02: beazley + Modified the lexer to provide optional support for Python -O and -OO + modes. To make this work, Python *first* needs to be run in + unoptimized mode. This reads the lexing information and creates a + file "lextab.py". Then, run lex like this: + + # module foo.py + ... + ... + lex.lex(optimize=1) + + Once the lextab file has been created, subsequent calls to + lex.lex() will read data from the lextab file instead of using + introspection. In optimized mode (-O, -OO) everything should + work normally despite the loss of doc strings. + + To change the name of the file 'lextab.py' use the following: + + lex.lex(lextab="footab") + + (this creates a file footab.py) + + +Version 1.1 October 25, 2001 +------------------------------ + +10/25/01: beazley + Modified the table generator to produce much more compact data. + This should greatly reduce the size of the parsetab.py[c] file. + Caveat: the tables still need to be constructed so a little more + work is done in parsetab on import. + +10/25/01: beazley + There may be a possible bug in the cycle detector that reports errors + about infinite recursion. I'm having a little trouble tracking it + down, but if you get this problem, you can disable the cycle + detector as follows: + + yacc.yacc(check_recursion = 0) + +10/25/01: beazley + Fixed a bug in lex.py that sometimes caused illegal characters to be + reported incorrectly. Reported by Sverre Jørgensen. + +7/8/01 : beazley + Added a reference to the underlying lexer object when tokens are handled by + functions. The lexer is available as the 'lexer' attribute. This + was added to provide better lexing support for languages such as Fortran + where certain types of tokens can't be conveniently expressed as regular + expressions (and where the tokenizing function may want to perform a + little backtracking). Suggested by Pearu Peterson. + +6/20/01 : beazley + Modified yacc() function so that an optional starting symbol can be specified. + For example: + + yacc.yacc(start="statement") + + Normally yacc always treats the first production rule as the starting symbol. + However, if you are debugging your grammar it may be useful to specify + an alternative starting symbol. Idea suggested by Rich Salz. + +Version 1.0 June 18, 2001 +-------------------------- +Initial public offering + diff --git a/ext/ply/COPYING b/ext/ply/COPYING new file mode 100644 index 000000000..b1e3f5a26 --- /dev/null +++ b/ext/ply/COPYING @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/ext/ply/README b/ext/ply/README new file mode 100644 index 000000000..35b458d4c --- /dev/null +++ b/ext/ply/README @@ -0,0 +1,249 @@ +PLY (Python Lex-Yacc) Version 1.2 (November 27, 2002) + +David M. Beazley +Department of Computer Science +University of Chicago +Chicago, IL 60637 +beazley@cs.uchicago.edu + +Copyright (C) 2001 David M. Beazley + +$Header: /home/stever/bk/newmem2/ext/ply/README 1.1 03/06/06 14:53:34-00:00 stever@ $ + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +See the file COPYING for a complete copy of the LGPL. + +Introduction +============ + +PLY is a 100% Python implementation of the common parsing tools lex +and yacc. Although several other parsing tools are available for +Python, there are several reasons why you might want to consider PLY: + + - The tools are very closely modeled after traditional lex/yacc. + If you know how to use these tools in C, you will find PLY + to be similar. + + - PLY provides *very* extensive error reporting and diagnostic + information to assist in parser construction. The original + implementation was developed for instructional purposes. As + a result, the system tries to identify the most common types + of errors made by novice users. + + - PLY provides full support for empty productions, error recovery, + precedence specifiers, and moderately ambiguous grammars. + + - Parsing is based on LR-parsing which is fast, memory efficient, + better suited to large grammars, and which has a number of nice + properties when dealing with syntax errors and other parsing problems. + Currently, PLY builds its parsing tables using the SLR algorithm which + is slightly weaker than LALR(1) used in traditional yacc. + + - Like John Aycock's excellent SPARK toolkit, PLY uses Python + reflection to build lexers and parsers. This greatly simplifies + the task of parser construction since it reduces the number of files + and eliminates the need to run a separate lex/yacc tool before + running your program. + + - PLY can be used to build parsers for "real" programming languages. + Although it is not ultra-fast due to its Python implementation, + PLY can be used to parse grammars consisting of several hundred + rules (as might be found for a language like C). The lexer and LR + parser are also reasonably efficient when parsing typically + sized programs. + +The original version of PLY was developed for an Introduction to +Compilers course where students used it to build a compiler for a +simple Pascal-like language. Their compiler had to include lexical +analysis, parsing, type checking, type inference, and generation of +assembly code for the SPARC processor. Because of this, the current +implementation has been extensively tested and debugged. In addition, +most of the API and error checking steps have been adapted to address +common usability problems. + +How to Use +========== + +PLY consists of two files : lex.py and yacc.py. To use the system, +simply copy these files to your project and import them like standard +Python modules. + +The file doc/ply.html contains complete documentation on how to use +the system. + +The example directory contains several different examples including a +PLY specification for ANSI C as given in K&R 2nd Ed. Note: To use +the examples, you will need to copy the lex.py and yacc.py files to +the example directory. + +A simple example is found at the end of this document + +Requirements +============ +PLY requires the use of Python 2.0 or greater. It should work on +just about any platform. + +Resources +========= + +More information about PLY can be obtained on the PLY webpage at: + + http://systems.cs.uchicago.edu/ply + +For a detailed overview of parsing theory, consult the excellent +book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and +Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown +may also be useful. + +Given that this is the first release, I welcome your comments on how +to improve the current implementation. See the TODO file for things that +still need to be done. + +Acknowledgments +=============== + +A special thanks is in order for all of the students in CS326 who +suffered through about 25 different versions of these tools :-). + +Example +======= + +Here is a simple example showing a PLY implementation of a calculator with variables. + +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +# Ignored characters +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + +# Precedence rules for the arithmetic operators +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names (for storing variables) +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) + + + + + + + + + + + + + + + + + diff --git a/ext/ply/TODO b/ext/ply/TODO new file mode 100644 index 000000000..b2978150d --- /dev/null +++ b/ext/ply/TODO @@ -0,0 +1,22 @@ +The PLY to-do list: + +$Header: /home/stever/bk/newmem2/ext/ply/TODO 1.1 03/06/06 14:53:34-00:00 stever@ $ + +1. Create a Python package using distutils + +2. More interesting parsing examples. + +3. Work on the ANSI C grammar so that it can actually parse C programs. To do this, + some extra code needs to be added to the lexer to deal with typedef names and enumeration + constants. + +4. Get LALR(1) to work. Hard, but not impossible. + +5. More tests in the test directory. + +6. Performance improvements and cleanup in yacc.py. + +7. More documentation. + +8. Lots and lots of cleanup. + diff --git a/ext/ply/doc/ply.html b/ext/ply/doc/ply.html new file mode 100644 index 000000000..2596066fe --- /dev/null +++ b/ext/ply/doc/ply.html @@ -0,0 +1,1642 @@ +<html> +<head> +<title>PLY (Python Lex-Yacc)</title> +</head> +<body bgcolor="#ffffff"> + +<h1>PLY (Python Lex-Yacc)</h1> + +<b> +David M. Beazley <br> +Department of Computer Science <br> +University of Chicago <br> +Chicago, IL 60637 <br> +beazley@cs.uchicago.edu <br> +</b> + +<p> +Documentation version: $Header: /home/stever/bk/newmem2/ext/ply/doc/ply.html 1.1 03/06/06 14:53:34-00:00 stever@ $ + +<h2>Introduction</h2> + +PLY is a Python-only implementation of the popular compiler +construction tools lex and yacc. The implementation borrows ideas +from a number of previous efforts; most notably John Aycock's SPARK +toolkit. However, the overall flavor of the implementation is more +closely modeled after the C version of lex and yacc. The other +significant feature of PLY is that it provides extensive input +validation and error reporting--much more so than other Python parsing +tools. + +<p> +Early versions of PLY were developed to support the Introduction to +Compilers Course at the University of Chicago. In this course, +students built a fully functional compiler for a simple Pascal-like +language. Their compiler, implemented entirely in Python, had to +include lexical analysis, parsing, type checking, type inference, +nested scoping, and code generation for the SPARC processor. +Approximately 30 different compiler implementations were completed in +this course. Most of PLY's interface and operation has been motivated by common +usability problems encountered by students. + +<p> +Because PLY was primarily developed as an instructional tool, you will +find it to be <em>MUCH</em> more picky about token and grammar rule +specification than most other Python parsing tools. In part, this +added formality is meant to catch common programming mistakes made by +novice users. However, advanced users will also find such features to +be useful when building complicated grammars for real programming +languages. It should also be noted that PLY does not provide much in the way +of bells and whistles (e.g., automatic construction of abstract syntax trees, +tree traversal, etc.). Instead, you will find a bare-bones, yet +fully capable lex/yacc implementation written entirely in Python. + +<p> +The rest of this document assumes that you are somewhat familar with +parsing theory, syntax directed translation, and automatic tools such +as lex and yacc. If you are unfamilar with these topics, you will +probably want to consult an introductory text such as "Compilers: +Principles, Techniques, and Tools", by Aho, Sethi, and Ullman. "Lex +and Yacc" by John Levine may also be handy. + +<h2>PLY Overview</h2> + +PLY consists of two separate tools; <tt>lex.py</tt> and +<tt>yacc.py</tt>. <tt>lex.py</tt> is used to break input text into a +collection of tokens specified by a collection of regular expression +rules. <tt>yacc.py</tt> is used to recognize language syntax that has +been specified in the form of a context free grammar. Currently, +<tt>yacc.py</tt> uses LR parsing and generates its parsing tables +using the SLR algorithm. LALR(1) parsing may be supported in a future +release. + +<p> +The two tools are meant to work together. Specifically, +<tt>lex.py</tt> provides an external interface in the form of a +<tt>token()</tt> function that returns the next valid token on the +input stream. <tt>yacc.py</tt> calls this repeatedly to retrieve +tokens and invoke grammar rules. The output of <tt>yacc.py</tt> is +often an Abstract Syntax Tree (AST). However, this is entirely up to +the user. If desired, <tt>yacc.py</tt> can also be used to implement +simple one-pass compilers. + +<p> +Like its Unix counterpart, <tt>yacc.py</tt> provides most of the +features you expect including extensive error checking, grammar +validation, support for empty productions, error tokens, and ambiguity +resolution via precedence rules. The primary difference between +<tt>yacc.py</tt> and <tt>yacc</tt> is the use of SLR parsing instead +of LALR(1). Although this slightly restricts the types of grammars +than can be successfully parsed, it is sufficiently powerful to handle most +kinds of normal programming language constructs. + +<p> +Finally, it is important to note that PLY relies on reflection +(introspection) to build its lexers and parsers. Unlike traditional +lex/yacc which require a special input file that is converted into a +separate source file, the specifications given to PLY <em>are</em> +valid Python programs. This means that there are no extra source +files nor is there a special compiler construction step (e.g., running +yacc to generate Python code for the compiler). + +<h2>Lex Example</h2> + +<tt>lex.py</tt> is used to write tokenizers. To do this, each token +must be defined by a regular expression rule. The following file +implements a very simple lexer for tokenizing simple integer expressions: + +<blockquote> +<pre> +# ------------------------------------------------------------ +# calclex.py +# +# tokenizer for a simple expression evaluator for +# numbers and +,-,*,/ +# ------------------------------------------------------------ +import lex + +# List of token names. This is always required +tokens = ( + 'NUMBER', + 'PLUS', + 'MINUS', + 'TIMES', + 'DIVIDE', + 'LPAREN', + 'RPAREN', +) + +# Regular expression rules for simple tokens +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_LPAREN = r'\(' +t_RPAREN = r'\)' + +# A regular expression rule with some action code +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Line %d: Number %s is too large!" % (t.lineno,t.value) + t.value = 0 + return t + +# Define a rule so we can track line numbers +def t_newline(t): + r'\n+' + t.lineno += len(t.value) + +# A string containing ignored characters (spaces and tabs) +t_ignore = ' \t' + +# Error handling rule +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +lex.lex() + +# Test it out +data = ''' +3 + 4 * 10 + + -20 *2 +''' + +# Give the lexer some input +lex.input(data) + +# Tokenize +while 1: + tok = lex.token() + if not tok: break # No more input + print tok +</pre> +</blockquote> + +In the example, the <tt>tokens</tt> list defines all of the possible +token names that can be produced by the lexer. This list is always required +and is used to perform a variety of validation checks. Following the <tt>tokens</tt> +list, regular expressions are written for each token. Each of these +rules are defined by making declarations with a special prefix <tt>t_</tt> to indicate that it +defines a token. For simple tokens, the regular expression can +be specified as strings such as this (note: Python raw strings are used since they are the +most convenient way to write regular expression strings): + +<blockquote> +<pre> +t_PLUS = r'\+' +</pre> +</blockquote> + +In this case, the name following the <tt>t_</tt> must exactly match one of the +names supplied in <tt>tokens</tt>. If some kind of action needs to be performed, +a token rule can be specified as a function. For example: + +<blockquote> +<pre> +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Number %s is too large!" % t.value + t.value = 0 + return t +</pre> +</blockquote> + +In this case, the regular expression rule is specified in the function documentation string. +The function always takes a single argument which is an instance of +<tt>LexToken</tt>. This object has attributes of <tt>t.type</tt> which is the token type, +<tt>t.value</tt> which is the lexeme, and <tt>t.lineno</tt> which is the current line number. +By default, <tt>t.type</tt> is set to the name following the <tt>t_</tt> prefix. The action +function can modify the contents of the <tt>LexToken</tt> object as appropriate. However, +when it is done, the resulting token should be returned. If no value is returned by the action +function, the token is simply discarded and the next token read. + +<p> +The rule <tt>t_newline()</tt> illustrates a regular expression rule +for a discarded token. In this case, a rule is written to match +newlines so that proper line number tracking can be performed. +By returning no value, the function causes the newline character to be +discarded. + +<p> +The special <tt>t_ignore</tt> rule is reserved by <tt>lex.py</tt> for characters +that should be completely ignored in the input stream. +Usually this is used to skip over whitespace and other non-essential characters. +Although it is possible to define a regular expression rule for whitespace in a manner +similar to <tt>t_newline()</tt>, the use of <tt>t_ignore</tt> provides substantially better +lexing performance because it is handled as a special case and is checked in a much +more efficient manner than the normal regular expression rules. + +<p> +Finally, the <tt>t_error()</tt> +function is used to handle lexing errors that occur when illegal +characters are detected. In this case, the <tt>t.value</tt> attribute contains the +rest of the input string that has not been tokenized. In the example, we simply print +the offending character and skip ahead one character by calling <tt>t.skip(1)</tt>. + +<p> +To build the lexer, the function <tt>lex.lex()</tt> is used. This function +uses Python reflection (or introspection) to read the the regular expression rules +out of the calling context and build the lexer. Once the lexer has been built, two functions can +be used to control the lexer. + +<ul> +<li><tt>lex.input(data)</tt>. Reset the lexer and store a new input string. +<li><tt>lex.token()</tt>. Return the next token. Returns a special <tt>LexToken</tt> instance on success or +None if the end of the input text has been reached. +</ul> + +The code at the bottom of the example shows how the lexer is actually used. When executed, +the following output will be produced: + +<blockquote> +<pre> +$ python example.py +LexToken(NUMBER,3,2) +LexToken(PLUS,'+',2) +LexToken(NUMBER,4,2) +LexToken(TIMES,'*',2) +LexToken(NUMBER,10,2) +LexToken(PLUS,'+',3) +LexToken(MINUS,'-',3) +LexToken(NUMBER,20,3) +LexToken(TIMES,'*',3) +LexToken(NUMBER,2,3) +</pre> +</blockquote> + +<h2>Lex Implementation Notes</h2> + +<ul> +<li><tt>lex.py</tt> uses the <tt>re</tt> module to do its patten matching. When building the master regular expression, +rules are added in the following order: +<p> +<ol> +<li>All tokens defined by functions are added in the same order as they appear in the lexer file. +<li>Tokens defined by strings are added by sorting them in order of decreasing regular expression length (longer expressions +are added first). +</ol> +<p> +Without this ordering, it can be difficult to correctly match certain types of tokens. For example, if you +wanted to have separate tokens for "=" and "==", you need to make sure that "==" is checked first. By sorting regular +expressions in order of decreasing length, this problem is solved for rules defined as strings. For functions, +the order can be explicitly controlled since rules appearing first are checked first. + +<P> +<li>The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this +rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data +such as open files or sockets. This limitation is primarily a side-effect of using the <tt>re</tt> module. + +<p> +<li> +To handle reserved words, it is usually easier to just match an identifier and do a special name lookup in a function +like this: + +<blockquote> +<pre> +reserved = { + 'if' : 'IF', + 'then' : 'THEN', + 'else' : 'ELSE', + 'while' : 'WHILE', + ... +} + +def t_ID(t): + r'[a-zA-Z_][a-zA-Z_0-9]*' + t.type = reserved.get(t.value,'ID') # Check for reserved words + return t +</pre> +</blockquote> + +<p> +<li>The lexer requires tokens to be defined as class instances with <tt>t.type</tt>, <tt>t.value</tt>, and <tt>t.lineno</tt> +attributes. By default, tokens are created as instances of the <tt>LexToken</tt> class defined internally to <tt>lex.py</tt>. +If desired, you can create new kinds of tokens provided that they have the three required attributes. However, +in practice, it is probably safer to stick with the default. + +<p> +<li>The only safe attribute for assigning token properties is <tt>t.value</tt>. In some cases, you may want to attach +a number of different properties to a token (e.g., symbol table entries for identifiers). To do this, replace <tt>t.value</tt> +with a tuple or class instance. For example: + +<blockquote> +<pre> +def t_ID(t): + ... + # For identifiers, create a (lexeme, symtab) tuple + t.value = (t.value, symbol_lookup(t.value)) + ... + return t +</pre> +</blockquote> + +Although allowed, do NOT assign additional attributes to the token object. For example, +<blockquote> +<pre> +def t_ID(t): + ... + # Bad implementation of above + t.symtab = symbol_lookup(t.value) + ... +</pre> +</blockquote> + +The reason you don't want to do this is that the <tt>yacc.py</tt> +module only provides public access to the <tt>t.value</tt> attribute of each token. +Therefore, any other attributes you assign are inaccessible (if you are familiar +with the internals of C lex/yacc, <tt>t.value</tt> is the same as <tt>yylval.tok</tt>). + +<p> +<li>To track line numbers, the lexer internally maintains a line +number variable. Each token automatically gets the value of the +current line number in the <tt>t.lineno</tt> attribute. To modify the +current line number, simply change the <tt>t.lineno</tt> attribute +in a function rule (as previously shown for +<tt>t_newline()</tt>). Even if the resulting token is discarded, +changes to the line number remain in effect for subsequent tokens. + +<p> +<li>To support multiple scanners in the same application, the <tt>lex.lex()</tt> function +actually returns a special <tt>Lexer</tt> object. This object has two methods +<tt>input()</tt> and <tt>token()</tt> that can be used to supply input and get tokens. For example: + +<blockquote> +<pre> +lexer = lex.lex() +lexer.input(sometext) +while 1: + tok = lexer.token() + if not tok: break + print tok +</pre> +</blockquote> + +The functions <tt>lex.input()</tt> and <tt>lex.token()</tt> are bound to the <tt>input()</tt> +and <tt>token()</tt> methods of the last lexer created by the lex module. + + +<p> +<li>To reduce compiler startup time and improve performance, the lexer can be built in optimized mode as follows: + +<blockquote> +<pre> +lex.lex(optimize=1) +</pre> +</blockquote> + +When used, most error checking and validation is disabled. This provides a slight performance +gain while tokenizing and tends to chop a few tenths of a second off startup time. Since it disables +error checking, this mode is not the default and is not recommended during development. However, once +you have your compiler fully working, it is usually safe to disable the error checks. + +<p> +<li>You can enable some additional debugging by building the lexer like this: + +<blockquote> +<pre> +lex.lex(debug=1) +</pre> +</blockquote> + +<p> +<li>To help you debug your lexer, <tt>lex.py</tt> comes with a simple main program which will either +tokenize input read from standard input or from a file. To use it, simply put this in your lexer: + +<blockquote> +<pre> +if __name__ == '__main__': + lex.runmain() +</pre> +</blockquote> + +Then, run you lexer as a main program such as <tt>python mylex.py</tt> + +<p> +<li>Since the lexer is written entirely in Python, its performance is +largely determined by that of the Python <tt>re</tt> module. Although +the lexer has been written to be as efficient as possible, it's not +blazingly fast when used on very large input files. Sorry. If +performance is concern, you might consider upgrading to the most +recent version of Python, creating a hand-written lexer, or offloading +the lexer into a C extension module. In defense of <tt>lex.py</tt>, +it's performance is not <em>that</em> bad when used on reasonably +sized input files. For instance, lexing a 4700 line C program with +32000 input tokens takes about 20 seconds on a 200 Mhz PC. Obviously, +it will run much faster on a more speedy machine. + +</ul> + +<h2>Parsing basics</h2> + +<tt>yacc.py</tt> is used to parse language syntax. Before showing an +example, there are a few important bits of background that must be +mentioned. First, <tt>syntax</tt> is usually specified in terms of a +context free grammar (CFG). For example, if you wanted to parse +simple arithmetic expressions, you might first write an unambiguous +grammar specification like this: + +<blockquote> +<pre> +expression : expression + term + | expression - term + | term + +term : term * factor + | term / factor + | factor + +factor : NUMBER + | ( expression ) +</pre> +</blockquote> + +Next, the semantic behavior of a language is often specified using a +technique known as syntax directed translation. In syntax directed +translation, attributes are attached to each symbol in a given grammar +rule along with an action. Whenever a particular grammar rule is +recognized, the action describes what to do. For example, given the +expression grammar above, you might write the specification for a +simple calculator like this: + +<blockquote> +<pre> +Grammar Action +-------------------------------- -------------------------------------------- +expression0 : expression1 + term expression0.val = expression1.val + term.val + | expression1 - term expression0.val = expression1.val - term.val + | term expression0.val = term.val + +term0 : term1 * factor term0.val = term1.val * factor.val + | term1 / factor term0.val = term1.val / factor.val + | factor term0.val = factor.val + +factor : NUMBER factor.val = int(NUMBER.lexval) + | ( expression ) factor.val = expression.val +</pre> +</blockquote> + +Finally, Yacc uses a parsing technique known as LR-parsing or shift-reduce parsing. LR parsing is a +bottom up technique that tries to recognize the right-hand-side of various grammar rules. +Whenever a valid right-hand-side is found in the input, the appropriate action code is triggered and the +grammar symbols are replaced by the grammar symbol on the left-hand-side. + +<p> +LR parsing is commonly implemented by shifting grammar symbols onto a stack and looking at the stack and the next +input token for patterns. The details of the algorithm can be found in a compiler text, but the +following example illustrates the steps that are performed if you wanted to parse the expression +<tt>3 + 5 * (10 - 20)</tt> using the grammar defined above: + +<blockquote> +<pre> +Step Symbol Stack Input Tokens Action +---- --------------------- --------------------- ------------------------------- +1 $ 3 + 5 * ( 10 - 20 )$ Shift 3 +2 $ 3 + 5 * ( 10 - 20 )$ Reduce factor : NUMBER +3 $ factor + 5 * ( 10 - 20 )$ Reduce term : factor +4 $ term + 5 * ( 10 - 20 )$ Reduce expr : term +5 $ expr + 5 * ( 10 - 20 )$ Shift + +6 $ expr + 5 * ( 10 - 20 )$ Shift 5 +7 $ expr + 5 * ( 10 - 20 )$ Reduce factor : NUMBER +8 $ expr + factor * ( 10 - 20 )$ Reduce term : factor +9 $ expr + term * ( 10 - 20 )$ Shift * +10 $ expr + term * ( 10 - 20 )$ Shift ( +11 $ expr + term * ( 10 - 20 )$ Shift 10 +12 $ expr + term * ( 10 - 20 )$ Reduce factor : NUMBER +13 $ expr + term * ( factor - 20 )$ Reduce term : factor +14 $ expr + term * ( term - 20 )$ Reduce expr : term +15 $ expr + term * ( expr - 20 )$ Shift - +16 $ expr + term * ( expr - 20 )$ Shift 20 +17 $ expr + term * ( expr - 20 )$ Reduce factor : NUMBER +18 $ expr + term * ( expr - factor )$ Reduce term : factor +19 $ expr + term * ( expr - term )$ Reduce expr : expr - term +20 $ expr + term * ( expr )$ Shift ) +21 $ expr + term * ( expr ) $ Reduce factor : (expr) +22 $ expr + term * factor $ Reduce term : term * factor +23 $ expr + term $ Reduce expr : expr + term +24 $ expr $ Reduce expr +25 $ $ Success! +</pre> +</blockquote> + +When parsing the expression, an underlying state machine and the current input token determine what to do next. +If the next token looks like part of a valid grammar rule (based on other items on the stack), it is generally shifted +onto the stack. If the top of the stack contains a valid right-hand-side of a grammar rule, it is +usually "reduced" and the symbols replaced with the symbol on the left-hand-side. When this reduction occurs, the +appropriate action is triggered (if defined). If the input token can't be shifted and the top of stack doesn't match +any grammar rules, a syntax error has occurred and the parser must take some kind of recovery step (or bail out). + +<p> +It is important to note that the underlying implementation is actually built around a large finite-state machine +and some tables. The construction of these tables is quite complicated and beyond the scope of this discussion. +However, subtle details of this process explain why, in the example above, the parser chooses to shift a token +onto the stack in step 9 rather than reducing the rule <tt>expr : expr + term</tt>. + +<h2>Yacc example</h2> + +Suppose you wanted to make a grammar for simple arithmetic expressions as previously described. Here is +how you would do it with <tt>yacc.py</tt>: + +<blockquote> +<pre> +# Yacc example + +import yacc + +# Get the token map from the lexer. This is required. +from calclex import tokens + +def p_expression_plus(t): + 'expression : expression PLUS term' + t[0] = t[1] + t[3] + +def p_expression_minus(t): + 'expression : expression MINUS term' + t[0] = t[1] - t[3] + +def p_expression_term(t): + 'expression : term' + t[0] = t[1] + +def p_term_times(t): + 'term : term TIMES factor' + t[0] = t[1] * t[3] + +def p_term_div(t): + 'term : term DIVIDE factor' + t[0] = t[1] / t[3] + +def p_term_factor(t): + 'term : factor' + t[0] = t[1] + +def p_factor_num(t): + 'factor : NUMBER' + t[0] = t[1] + +def p_factor_expr(t): + 'factor : LPAREN expression RPAREN' + t[0] = t[2] + +# Error rule for syntax errors +def p_error(t): + print "Syntax error in input!" + +# Build the parser +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: continue + result = yacc.parse(s) + print result +</pre> +</blockquote> + +In this example, each grammar rule is defined by a Python function where the docstring to that function contains the +appropriate context-free grammar specification (an idea borrowed from John Aycock's SPARK toolkit). Each function accepts a single +argument <tt>t</tt> that is a sequence containing the values of each grammar symbol in the corresponding rule. The values of +<tt>t[i]</tt> are mapped to grammar symbols as shown here: + +<blockquote> +<pre> +def p_expression_plus(t): + 'expression : expression PLUS term' + # ^ ^ ^ ^ + # t[0] t[1] t[2] t[3] + + t[0] = t[1] + t[3] +</pre> +</blockquote> + +For tokens, the "value" in the corresponding <tt>t[i]</tt> is the +<em>same</em> as the value of the <tt>t.value</tt> attribute assigned +in the lexer module. For non-terminals, the value is determined by +whatever is placed in <tt>t[0]</tt> when rules are reduced. This +value can be anything at all. However, it probably most common for +the value to be a simple Python type, a tuple, or an instance. In this example, we +are relying on the fact that the <tt>NUMBER</tt> token stores an integer value in its value +field. All of the other rules simply perform various types of integer operations and store +the result. + +<p> +The first rule defined in the yacc specification determines the starting grammar +symbol (in this case, a rule for <tt>expression</tt> appears first). Whenever +the starting rule is reduced by the parser and no more input is available, parsing +stops and the final value is returned (this value will be whatever the top-most rule +placed in <tt>t[0]</tt>). + +<p>The <tt>p_error(t)</tt> rule is defined to catch syntax errors. See the error handling section +below for more detail. + +<p> +To build the parser, call the <tt>yacc.yacc()</tt> function. This function +looks at the module and attempts to construct all of the LR parsing tables for the grammar +you have specified. The first time <tt>yacc.yacc()</tt> is invoked, you will get a message +such as this: + +<blockquote> +<pre> +$ python calcparse.py +yacc: Generating SLR parsing table... +calc > +</pre> +</blockquote> + +Since table construction is relatively expensive (especially for large +grammars), the resulting parsing table is written to the current +directory in a file called <tt>parsetab.py</tt>. In addition, a +debugging file called <tt>parser.out</tt> is created. On subsequent +executions, <tt>yacc</tt> will reload the table from +<tt>parsetab.py</tt> unless it has detected a change in the underlying +grammar (in which case the tables and <tt>parsetab.py</tt> file are +regenerated). + +<p> +If any errors are detected in your grammar specification, <tt>yacc.py</tt> will produce +diagnostic messages and possibly raise an exception. Some of the errors that can be detected include: + +<ul> +<li>Duplicated function names (if more than one rule function have the same name in the grammar file). +<li>Shift/reduce and reduce/reduce conflicts generated by ambiguous grammars. +<li>Badly specified grammar rules. +<li>Infinite recursion (rules that can never terminate). +<li>Unused rules and tokens +<li>Undefined rules and tokens +</ul> + +The next few sections now discuss a few finer points of grammar construction. + +<h2>Combining Grammar Rule Functions</h2> + +When grammar rules are similar, they can be combined into a single function. +For example, consider the two rules in our earlier example: + +<blockquote> +<pre> +def p_expression_plus(t): + 'expression : expression PLUS term' + t[0] = t[1] + t[3] + +def p_expression_minus(t): + 'expression : expression MINUS term' + t[0] = t[1] - t[3] +</pre> +</blockquote> + +Instead of writing two functions, you might write a single function like this: + +<blockquote> +<pre> +def p_expression(t): + '''expression : expression PLUS term + | expression MINUS term''' + if t[2] == '+': + t[0] = t[1] + t[3] + elif t[2] == '-': + t[0] = t[1] - t[3] +</pre> +</blockquote> + +In general, the doc string for any given function can contain multiple grammar rules. So, it would +have also been legal (although possibly confusing) to write this: + +<blockquote> +<pre> +def p_binary_operators(t): + '''expression : expression PLUS term + | expression MINUS term + term : term TIMES factor + | term DIVIDE factor''' + if t[2] == '+': + t[0] = t[1] + t[3] + elif t[2] == '-': + t[0] = t[1] - t[3] + elif t[2] == '*': + t[0] = t[1] * t[3] + elif t[2] == '/': + t[0] = t[1] / t[3] +</pre> +</blockquote> + +When combining grammar rules into a single function, it is usually a good idea for all of the rules to have +a similar structure (e.g., the same number of terms). Otherwise, the corresponding action code may be more +complicated than necessary. + +<h2>Empty Productions</h2> + +<tt>yacc.py</tt> can handle empty productions by defining a rule like this: + +<blockquote> +<pre> +def p_empty(t): + 'empty :' + pass +</pre> +</blockquote> + +Now to use the empty production, simply use 'empty' as a symbol. For example: + +<blockquote> +<pre> +def p_optitem(t): + 'optitem : item' + ' | empty' + ... +</pre> +</blockquote> + +<h2>Dealing With Ambiguous Grammars</h2> + +The expression grammar given in the earlier example has been written in a special format to eliminate ambiguity. +However, in many situations, it is extremely difficult or awkward to write grammars in this format. A +much more natural way to express the grammar is in a more compact form like this: + +<blockquote> +<pre> +expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | LPAREN expression RPAREN + | NUMBER +</pre> +</blockquote> + +Unfortunately, this grammar specification is ambiguous. For example, if you are parsing the string +"3 * 4 + 5", there is no way to tell how the operators are supposed to be grouped. +For example, does this expression mean "(3 * 4) + 5" or is it "3 * (4+5)"? + +<p> +When an ambiguous grammar is given to <tt>yacc.py</tt> it will print messages about "shift/reduce conflicts" +or a "reduce/reduce conflicts". A shift/reduce conflict is caused when the parser generator can't decide +whether or not to reduce a rule or shift a symbol on the parsing stack. For example, consider +the string "3 * 4 + 5" and the internal parsing stack: + +<blockquote> +<pre> +Step Symbol Stack Input Tokens Action +---- --------------------- --------------------- ------------------------------- +1 $ 3 * 4 + 5$ Shift 3 +2 $ 3 * 4 + 5$ Reduce : expression : NUMBER +3 $ expr * 4 + 5$ Shift * +4 $ expr * 4 + 5$ Shift 4 +5 $ expr * 4 + 5$ Reduce: expression : NUMBER +6 $ expr * expr + 5$ SHIFT/REDUCE CONFLICT ???? +</pre> +</blockquote> + +In this case, when the parser reaches step 6, it has two options. One is the reduce the +rule <tt>expr : expr * expr</tt> on the stack. The other option is to shift the +token <tt>+</tt> on the stack. Both options are perfectly legal from the rules +of the context-free-grammar. + +<p> +By default, all shift/reduce conflicts are resolved in favor of shifting. Therefore, in the above +example, the parser will always shift the <tt>+</tt> instead of reducing. Although this +strategy works in many cases (including the ambiguous if-then-else), it is not enough for arithmetic +expressions. In fact, in the above example, the decision to shift <tt>+</tt> is completely wrong---we should have +reduced <tt>expr * expr</tt> since multiplication has higher precedence than addition. + +<p>To resolve ambiguity, especially in expression grammars, <tt>yacc.py</tt> allows individual +tokens to be assigned a precedence level and associativity. This is done by adding a variable +<tt>precedence</tt> to the grammar file like this: + +<blockquote> +<pre> +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), +) +</pre> +</blockquote> + +This declaration specifies that <tt>PLUS</tt>/<tt>MINUS</tt> have +the same precedence level and are left-associative and that +<tt>TIMES</tt>/<tt>DIVIDE</tt> have the same precedence and are left-associative. +Furthermore, the declaration specifies that <tt>TIMES</tt>/<tt>DIVIDE</tt> have higher +precedence than <tt>PLUS</tt>/<tt>MINUS</tt> (since they appear later in the +precedence specification). + +<p> +The precedence specification is used to attach a numerical precedence value and associativity direction +to each grammar rule. This is always determined by the precedence of the right-most terminal symbol. Therefore, +if PLUS/MINUS had a precedence of 1 and TIMES/DIVIDE had a precedence of 2, the grammar rules +would have precedence values as follows: + +<blockquote> +<pre> +expression : expression PLUS expression # prec = 1, left + | expression MINUS expression # prec = 1, left + | expression TIMES expression # prec = 2, left + | expression DIVIDE expression # prec = 2, left + | LPAREN expression RPAREN # prec = unknown + | NUMBER # prec = unknown +</pre> +</blockquote> + +When shift/reduce conflicts are encountered, the parser generator resolves the conflict by +looking at the precedence rules and associativity specifiers. + +<p> +<ol> +<li>If the current token has higher precedence, it is shifted. +<li>If the grammar rule on the stack has higher precedence, the rule is reduced. +<li>If the current token and the grammar rule have the same precedence, the +rule is reduced for left associativity, whereas the token is shifted for right associativity. +<li>If nothing is known about the precedence, shift/reduce conflicts are resolved in +favor of shifting (the default). +</ol> + +<p> +When shift/reduce conflicts are resolved using the first three techniques (with the help of +precedence rules), <tt>yacc.py</tt> will report no errors or conflicts in the grammar. + +<p> +One problem with the precedence specifier technique is that it is sometimes necessary to +change the precedence of an operator in certain contents. For example, consider a unary-minus operator +in "3 + 4 * -5". Normally, unary minus has a very high precedence--being evaluated before the multiply. +However, in our precedence specifier, MINUS has a lower precedence than TIMES. To deal with this, +precedence rules can be given for fictitious tokens like this: + +<blockquote> +<pre> +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator +) +</pre> +</blockquote> + +Now, in the grammar file, we can write our unary minus rule like this: + +<blockquote> +<pre> +def p_expr_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] +</pre> +</blockquote> + +In this case, <tt>%prec UMINUS</tt> overrides the default rule precedence--setting it to that +of UMINUS in the precedence specifier. + +<p> +It is also possible to specify non-associativity in the <tt>precedence</tt> table. This would +be used when you <em>don't</em> want operations to chain together. For example, suppose +you wanted to support a comparison operators like <tt><</tt> and <tt>></tt> but you didn't want to allow +combinations like <tt>a < b < c</tt>. To do this, simply specify a rule like this: + +<blockquote> +<pre> +precedence = ( + ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator +) +</pre> +</blockquote> + +<p> +Reduce/reduce conflicts are caused when there are multiple grammar +rules that can be applied to a given set of symbols. This kind of +conflict is almost always bad and is always resolved by picking the +rule that appears first in the grammar file. Reduce/reduce conflicts +are almost always caused when different sets of grammar rules somehow +generate the same set of symbols. For example: + +<blockquote> +<pre> +assignment : ID EQUALS NUMBER + | ID EQUALS expression + +expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | LPAREN expression RPAREN + | NUMBER +</pre> +</blockquote> + +In this case, a reduce/reduce conflict exists between these two rules: + +<blockquote> +<pre> +assignment : ID EQUALS NUMBER +expression : NUMBER +</pre> +</blockquote> + +For example, if you wrote "a = 5", the parser can't figure out if this +is supposed to reduced as <tt>assignment : ID EQUALS NUMBER</tt> or +whether it's supposed to reduce the 5 as an expression and then reduce +the rule <tt>assignment : ID EQUALS expression</tt>. + +<h2>The parser.out file</h2> + +Tracking down shift/reduce and reduce/reduce conflicts is one of the finer pleasures of using an LR +parsing algorithm. To assist in debugging, <tt>yacc.py</tt> creates a debugging file called +'parser.out' when it generates the parsing table. The contents of this file look like the following: + +<blockquote> +<pre> +Unused terminals: + + +Grammar + +Rule 1 expression -> expression PLUS expression +Rule 2 expression -> expression MINUS expression +Rule 3 expression -> expression TIMES expression +Rule 4 expression -> expression DIVIDE expression +Rule 5 expression -> NUMBER +Rule 6 expression -> LPAREN expression RPAREN + +Terminals, with rules where they appear + +TIMES : 3 +error : +MINUS : 2 +RPAREN : 6 +LPAREN : 6 +DIVIDE : 4 +PLUS : 1 +NUMBER : 5 + +Nonterminals, with rules where they appear + +expression : 1 1 2 2 3 3 4 4 6 0 + + +Parsing method: SLR + + +state 0 + + S' -> . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 1 + + S' -> expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + PLUS shift and go to state 6 + MINUS shift and go to state 5 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + +state 2 + + expression -> LPAREN . expression RPAREN + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 3 + + expression -> NUMBER . + + $ reduce using rule 5 + PLUS reduce using rule 5 + MINUS reduce using rule 5 + TIMES reduce using rule 5 + DIVIDE reduce using rule 5 + RPAREN reduce using rule 5 + + +state 4 + + expression -> expression TIMES . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 5 + + expression -> expression MINUS . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 6 + + expression -> expression PLUS . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 7 + + expression -> expression DIVIDE . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 8 + + expression -> LPAREN expression . RPAREN + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + RPAREN shift and go to state 13 + PLUS shift and go to state 6 + MINUS shift and go to state 5 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + +state 9 + + expression -> expression TIMES expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 3 + PLUS reduce using rule 3 + MINUS reduce using rule 3 + TIMES reduce using rule 3 + DIVIDE reduce using rule 3 + RPAREN reduce using rule 3 + + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + ! TIMES [ shift and go to state 4 ] + ! DIVIDE [ shift and go to state 7 ] + +state 10 + + expression -> expression MINUS expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 2 + PLUS reduce using rule 2 + MINUS reduce using rule 2 + RPAREN reduce using rule 2 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + ! TIMES [ reduce using rule 2 ] + ! DIVIDE [ reduce using rule 2 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + +state 11 + + expression -> expression PLUS expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 1 + PLUS reduce using rule 1 + MINUS reduce using rule 1 + RPAREN reduce using rule 1 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + ! TIMES [ reduce using rule 1 ] + ! DIVIDE [ reduce using rule 1 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + +state 12 + + expression -> expression DIVIDE expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 4 + PLUS reduce using rule 4 + MINUS reduce using rule 4 + TIMES reduce using rule 4 + DIVIDE reduce using rule 4 + RPAREN reduce using rule 4 + + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + ! TIMES [ shift and go to state 4 ] + ! DIVIDE [ shift and go to state 7 ] + +state 13 + + expression -> LPAREN expression RPAREN . + + $ reduce using rule 6 + PLUS reduce using rule 6 + MINUS reduce using rule 6 + TIMES reduce using rule 6 + DIVIDE reduce using rule 6 + RPAREN reduce using rule 6 +</pre> +</blockquote> + +In the file, each state of the grammar is described. Within each state the "." indicates the current +location of the parse within any applicable grammar rules. In addition, the actions for each valid +input token are listed. When a shift/reduce or reduce/reduce conflict arises, rules <em>not</em> selected +are prefixed with an !. For example: + +<blockquote> +<pre> + ! TIMES [ reduce using rule 2 ] + ! DIVIDE [ reduce using rule 2 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] +</pre> +</blockquote> + +By looking at these rules (and with a little practice), you can usually track down the source +of most parsing conflicts. It should also be stressed that not all shift-reduce conflicts are +bad. However, the only way to be sure that they are resolved correctly is to look at <tt>parser.out</tt>. + +<h2>Syntax Error Handling</h2> + +When a syntax error occurs during parsing, the error is immediately +detected (i.e., the parser does not read any more tokens beyond the +source of the error). Error recovery in LR parsers is a delicate +topic that involves ancient rituals and black-magic. The recovery mechanism +provided by <tt>yacc.py</tt> is comparable to Unix yacc so you may want +consult a book like O'Reilly's "Lex and Yacc" for some of the finer details. + +<p> +When a syntax error occurs, <tt>yacc.py</tt> performs the following steps: + +<ol> +<li>On the first occurrence of an error, the user-defined <tt>p_error()</tt> function +is called with the offending token as an argument. Afterwards, the parser enters +an "error-recovery" mode in which it will not make future calls to <tt>p_error()</tt> until it +has successfully shifted at least 3 tokens onto the parsing stack. + +<p> +<li>If no recovery action is taken in <tt>p_error()</tt>, the offending lookahead token is replaced +with a special <tt>error</tt> token. + +<p> +<li>If the offending lookahead token is already set to <tt>error</tt>, the top item of the parsing stack is +deleted. + +<p> +<li>If the entire parsing stack is unwound, the parser enters a restart state and attempts to start +parsing from its initial state. + +<p> +<li>If a grammar rule accepts <tt>error</tt> as a token, it will be +shifted onto the parsing stack. + +<p> +<li>If the top item of the parsing stack is <tt>error</tt>, lookahead tokens will be discarded until the +parser can successfully shift a new symbol or reduce a rule involving <tt>error</tt>. +</ol> + +<h4>Recovery and resynchronization with error rules</h4> + +The most well-behaved approach for handling syntax errors is to write grammar rules that include the <tt>error</tt> +token. For example, suppose your language had a grammar rule for a print statement like this: + +<blockquote> +<pre> +def p_statement_print(t): + 'statement : PRINT expr SEMI' + ... +</pre> +</blockquote> + +To account for the possibility of a bad expression, you might write an additional grammar rule like this: + +<blockquote> +<pre> +def p_statement_print_error(t): + 'statement : PRINT error SEMI' + print "Syntax error in print statement. Bad expression" + +</pre> +</blockquote> + +In this case, the <tt>error</tt> token will match any sequence of +tokens that might appear up to the first semicolon that is +encountered. Once the semicolon is reached, the rule will be +invoked and the <tt>error</tt> token will go away. + +<p> +This type of recovery is sometimes known as parser resynchronization. +The <tt>error</tt> token acts as a wildcard for any bad input text and +the token immediately following <tt>error</tt> acts as a +synchronization token. + +<p> +It is important to note that the <tt>error</tt> token usually does not appear as the last token +on the right in an error rule. For example: + +<blockquote> +<pre> +def p_statement_print_error(t): + 'statement : PRINT error' + print "Syntax error in print statement. Bad expression" +</pre> +</blockquote> + +This is because the first bad token encountered will cause the rule to +be reduced--which may make it difficult to recover if more bad tokens +immediately follow. + +<h4>Panic mode recovery</h4> + +An alternative error recovery scheme is to enter a panic mode recovery in which tokens are +discarded to a point where the parser might be able to recover in some sensible manner. + +<p> +Panic mode recovery is implemented entirely in the <tt>p_error()</tt> function. For example, this +function starts discarding tokens until it reaches a closing '}'. Then, it restarts the +parser in its initial state. + +<blockquote> +<pre> +def p_error(t): + print "Whoa. You are seriously hosed." + # Read ahead looking for a closing '}' + while 1: + tok = yacc.token() # Get the next token + if not tok or tok.type == 'RBRACE': break + yacc.restart() +</pre> +</blockquote> + +<p> +This function simply discards the bad token and tells the parser that the error was ok. + +<blockquote> +<pre> +def p_error(t): + print "Syntax error at token", t.type + # Just discard the token and tell the parser it's okay. + yacc.errok() +</pre> +</blockquote> + +<P> +Within the <tt>p_error()</tt> function, three functions are available to control the behavior +of the parser: +<p> +<ul> +<li><tt>yacc.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery +mode. This will prevent an <tt>error</tt> token from being generated and will reset the internal +error counters so that the next syntax error will call <tt>p_error()</tt> again. + +<p> +<li><tt>yacc.token()</tt>. This returns the next token on the input stream. + +<p> +<li><tt>yacc.restart()</tt>. This discards the entire parsing stack and resets the parser +to its initial state. +</ul> + +Note: these functions are only available when invoking <tt>p_error()</tt> and are not available +at any other time. + +<p> +To supply the next lookahead token to the parser, <tt>p_error()</tt> can return a token. This might be +useful if trying to synchronize on special characters. For example: + +<blockquote> +<pre> +def p_error(t): + # Read ahead looking for a terminating ";" + while 1: + tok = yacc.token() # Get the next token + if not tok or tok.type == 'SEMI': break + yacc.errok() + + # Return SEMI to the parser as the next lookahead token + return tok +</pre> +</blockquote> + +<h4>General comments on error handling</h4> + +For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable +technique. This is because you can instrument the grammar to catch errors at selected places where it is relatively easy +to recover and continue parsing. Panic mode recovery is really only useful in certain specialized applications where you might want +to discard huge portions of the input text to find a valid restart point. + +<h2>Line Number Tracking</h2> + +<tt>yacc.py</tt> automatically tracks line numbers for all of the grammar symbols and tokens it processes. To retrieve the line +numbers, two functions are used in grammar rules: + +<ul> +<li><tt>t.lineno(num)</tt>. Return the starting line number for symbol <em>num</em> +<li><tt>t.linespan(num)</tt>. Return a tuple (startline,endline) with the starting and ending line number for symbol <em>num</em>. +</ul> + +For example: + +<blockquote> +<pre> +def t_expression(t): + 'expression : expression PLUS expression' + t.lineno(1) # Line number of the left expression + t.lineno(2) # line number of the PLUS operator + t.lineno(3) # line number of the right expression + ... + start,end = t.linespan(3) # Start,end lines of the right expression + +</pre> +</blockquote> + +Since line numbers are managed internally by the parser, there is usually no need to modify the line +numbers. However, if you want to save the line numbers in a parse-tree node, you will need to make your own +private copy. + +<h2>AST Construction</h2> + +<tt>yacc.py</tt> provides no special functions for constructing an abstract syntax tree. However, such +construction is easy enough to do on your own. Simply create a data structure for abstract syntax tree nodes +and assign nodes to <tt>t[0]</tt> in each rule. + +For example: + +<blockquote> +<pre> +class Expr: pass + +class BinOp(Expr): + def __init__(self,left,op,right): + self.type = "binop" + self.left = left + self.right = right + self.op = op + +class Number(Expr): + def __init__(self,value): + self.type = "number" + self.value = value + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + + t[0] = BinOp(t[1],t[2],t[3]) + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = Number(t[1]) +</pre> +</blockquote> + +To simplify tree traversal, it may make sense to pick a very generic tree structure for your parse tree nodes. +For example: + +<blockquote> +<pre> +class Node: + def __init__(self,type,children=None,leaf=None): + self.type = type + if children: + self.children = children + else: + self.children = [ ] + self.leaf = leaf + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + + t[0] = Node("binop", [t[1],t[3]], t[2]) +</pre> +</blockquote> + +<h2>Yacc implementation notes</h2> + +<ul> +<li>By default, <tt>yacc.py</tt> relies on <tt>lex.py</tt> for tokenizing. However, an alternative tokenizer +can be supplied as follows: + +<blockquote> +<pre> +yacc.parse(lexer=x) +</pre> +</blockquote> +in this case, <tt>x</tt> must be a Lexer object that minimally has a <tt>x.token()</tt> method for retrieving the next +token. If an input string is given to <tt>yacc.parse()</tt>, the lexer must also have an <tt>x.input()</tt> method. + +<p> +<li>By default, the yacc generates tables in debugging mode (which produces the parser.out file and other output). +To disable this, use + +<blockquote> +<pre> +yacc.yacc(debug=0) +</pre> +</blockquote> + +<p> +<li>To change the name of the <tt>parsetab.py</tt> file, use: + +<blockquote> +<pre> +yacc.yacc(tabmodule="foo") +</pre> +</blockquote> + +<P> +<li>To print copious amounts of debugging during parsing, use: + +<blockquote> +<pre> +yacc.parse(debug=1) +</pre> +</blockquote> + +<p> +<li>The <tt>yacc.yacc()</tt> function really returns a parser object. If you want to support multiple +parsers in the same application, do this: + +<blockquote> +<pre> +p = yacc.yacc() +... +p.parse() +</pre> +</blockquote> + +Note: The function <tt>yacc.parse()</tt> is bound to the last parser that was generated. + +<p> +<li>Since the generation of the SLR tables is relatively expensive, previously generated tables are +cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5 +checksum of all grammar rules and precedence rules. Only in the event of a mismatch are the tables regenerated. + +<p> +It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules +and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow +machine. Please be patient. + +<p> +<li>Since LR parsing is mostly driven by tables, the performance of the parser is largely independent of the +size of the grammar. The biggest bottlenecks will be the lexer and the complexity of your grammar rules. +</ul> + +<h2>Parser and Lexer State Management</h2> + +In advanced parsing applications, you may want to have multiple +parsers and lexers. Furthermore, the parser may want to control the +behavior of the lexer in some way. + +<p> +To do this, it is important to note that both the lexer and parser are +actually implemented as objects. These objects are returned by the +<tt>lex()</tt> and <tt>yacc()</tt> functions respectively. For example: + +<blockquote> +<pre> +lexer = lex.lex() # Return lexer object +parser = yacc.yacc() # Return parser object +</pre> +</blockquote> + +Within lexer and parser rules, these objects are also available. In the lexer, +the "lexer" attribute of a token refers to the lexer object in use. For example: + +<blockquote> +<pre> +def t_NUMBER(t): + r'\d+' + ... + print t.lexer # Show lexer object +</pre> +</blockquote> + +In the parser, the "lexer" and "parser" attributes refer to the lexer +and parser objects respectively. + +<blockquote> +<pre> +def p_expr_plus(t): + 'expr : expr PLUS expr' + ... + print t.parser # Show parser object + print t.lexer # Show lexer object +</pre> +</blockquote> + +If necessary, arbitrary attributes can be attached to the lexer or parser object. +For example, if you wanted to have different parsing modes, you could attach a mode +attribute to the parser object and look at it later. + +<h2>Using Python's Optimized Mode</h2> + +Because PLY uses information from doc-strings, parsing and lexing +information must be gathered while running the Python interpreter in +normal mode (i.e., not with the -O or -OO options). However, if you +specify optimized mode like this: + +<blockquote> +<pre> +lex.lex(optimize=1) +yacc.yacc(optimize=1) +</pre> +</blockquote> + +then PLY can later be used when Python runs in optimized mode. To make this work, +make sure you first run Python in normal mode. Once the lexing and parsing tables +have been generated the first time, run Python in optimized mode. PLY will use +the tables without the need for doc strings. + +<p> +Beware: running PLY in optimized mode disables a lot of error +checking. You should only do this when your project has stabilized +and you don't need to do any debugging. + +<h2>Where to go from here?</h2> + +The <tt>examples</tt> directory of the PLY distribution contains several simple examples. Please consult a +compilers textbook for the theory and underlying implementation details or LR parsing. + +</body> +</html> + + + + + + + diff --git a/ext/ply/example/ansic/README b/ext/ply/example/ansic/README new file mode 100644 index 000000000..e049d3b4e --- /dev/null +++ b/ext/ply/example/ansic/README @@ -0,0 +1,2 @@ +This example is incomplete. Was going to specify an ANSI C parser. +This is part of it. diff --git a/ext/ply/example/ansic/clex.py b/ext/ply/example/ansic/clex.py new file mode 100644 index 000000000..afd995208 --- /dev/null +++ b/ext/ply/example/ansic/clex.py @@ -0,0 +1,161 @@ +# ---------------------------------------------------------------------- +# clex.py +# +# A lexer for ANSI C. +# ---------------------------------------------------------------------- + +import lex + +# Reserved words +reserved = ( + 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', + 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', + 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', + 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', + ) + +tokens = reserved + ( + # Literals (identifier, integer constant, float constant, string constant, char const) + 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', + + # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', + 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', + 'LOR', 'LAND', 'LNOT', + 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', + + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) + 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', + 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', + + # Increment/decrement (++,--) + 'PLUSPLUS', 'MINUSMINUS', + + # Structure dereference (->) + 'ARROW', + + # Conditional operator (?) + 'CONDOP', + + # Delimeters ( ) [ ] { } , . ; : + 'LPAREN', 'RPAREN', + 'LBRACKET', 'RBRACKET', + 'LBRACE', 'RBRACE', + 'COMMA', 'PERIOD', 'SEMI', 'COLON', + + # Ellipsis (...) + 'ELLIPSIS', + ) + +# Completely ignored characters +t_ignore = ' \t\x0c' + +# Newlines +def t_NEWLINE(t): + r'\n+' + t.lineno += t.value.count("\n") + +# Operators +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_MOD = r'%' +t_OR = r'\|' +t_AND = r'&' +t_NOT = r'~' +t_XOR = r'^' +t_LSHIFT = r'<<' +t_RSHIFT = r'>>' +t_LOR = r'\|\|' +t_LAND = r'&&' +t_LNOT = r'!' +t_LT = r'<' +t_GT = r'>' +t_LE = r'<=' +t_GE = r'>=' +t_EQ = r'==' +t_NE = r'!=' + +# Assignment operators + +t_EQUALS = r'=' +t_TIMESEQUAL = r'\*=' +t_DIVEQUAL = r'/=' +t_MODEQUAL = r'%=' +t_PLUSEQUAL = r'\+=' +t_MINUSEQUAL = r'-=' +t_LSHIFTEQUAL = r'<<=' +t_RSHIFTEQUAL = r'>>=' +t_ANDEQUAL = r'&=' +t_OREQUAL = r'\|=' +t_XOREQUAL = r'^=' + +# Increment/decrement +t_PLUSPLUS = r'\+\+' +t_MINUSMINUS = r'--' + +# -> +t_ARROW = r'->' + +# ? +t_CONDOP = r'\?' + +# Delimeters +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LBRACE = r'\{' +t_RBRACE = r'\}' +t_COMMA = r',' +t_PERIOD = r'\.' +t_SEMI = r';' +t_COLON = r':' +t_ELLIPSIS = r'\.\.\.' + +# Identifiers and reserved words + +reserved_map = { } +for r in reserved: + reserved_map[r.lower()] = r + +def t_ID(t): + r'[A-Za-z_][\w_]*' + t.type = reserved_map.get(t.value,"ID") + return t + +# Integer literal +t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' + +# Floating literal +t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' + +# String literal +t_SCONST = r'\"([^\\\n]|(\\.))*?\"' + +# Character constant 'c' or L'c' +t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' + +# Comments +def t_comment(t): + r' /\*(.|\n)*?\*/' + t.lineno += t.value.count('\n') + +# Preprocessor directive (ignored) +def t_preprocessor(t): + r'\#(.)*?\n' + t.lineno += 1 + +def t_error(t): + print "Illegal character %s" % repr(t.value[0]) + t.skip(1) + +lexer = lex.lex(optimize=1) +if __name__ == "__main__": + lex.runmain(lexer) + + + + + diff --git a/ext/ply/example/ansic/cparse.py b/ext/ply/example/ansic/cparse.py new file mode 100644 index 000000000..ddfd5c72b --- /dev/null +++ b/ext/ply/example/ansic/cparse.py @@ -0,0 +1,859 @@ +# ----------------------------------------------------------------------------- +# cparse.py +# +# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed. +# ----------------------------------------------------------------------------- + +import yacc +import clex + +# Get the token map +tokens = clex.tokens + +# translation-unit: + +def p_translation_unit_1(t): + 'translation_unit : external_declaration' + pass + +def p_translation_unit_2(t): + 'translation_unit : translation_unit external_declaration' + pass + +# external-declaration: + +def p_external_declaration_1(t): + 'external_declaration : function_definition' + pass + +def p_external_declaration_2(t): + 'external_declaration : declaration' + pass + +# function-definition: + +def p_function_definition_1(t): + 'function_definition : declaration_specifiers declarator declaration_list compound_statement' + pass + +def p_function_definition_2(t): + 'function_definition : declarator declaration_list compound_statement' + pass + +def p_function_definition_3(t): + 'function_definition : declarator compound_statement' + pass + +def p_function_definition_4(t): + 'function_definition : declaration_specifiers declarator compound_statement' + pass + +# declaration: + +def p_declaration_1(t): + 'declaration : declaration_specifiers init_declarator_list SEMI' + pass + +def p_declaration_2(t): + 'declaration : declaration_specifiers SEMI' + pass + +# declaration-list: + +def p_declaration_list_1(t): + 'declaration_list : declaration' + pass + +def p_declaration_list_2(t): + 'declaration_list : declaration_list declaration ' + pass + +# declaration-specifiers +def p_declaration_specifiers_1(t): + 'declaration_specifiers : storage_class_specifier declaration_specifiers' + pass + +def p_declaration_specifiers_2(t): + 'declaration_specifiers : type_specifier declaration_specifiers' + pass + +def p_declaration_specifiers_3(t): + 'declaration_specifiers : type_qualifier declaration_specifiers' + pass + +def p_declaration_specifiers_4(t): + 'declaration_specifiers : storage_class_specifier' + pass + +def p_declaration_specifiers_5(t): + 'declaration_specifiers : type_specifier' + pass + +def p_declaration_specifiers_6(t): + 'declaration_specifiers : type_qualifier' + pass + +# storage-class-specifier +def p_storage_class_specifier(t): + '''storage_class_specifier : AUTO + | REGISTER + | STATIC + | EXTERN + | TYPEDEF + ''' + pass + +# type-specifier: +def p_type_specifier(t): + '''type_specifier : VOID + | CHAR + | SHORT + | INT + | LONG + | FLOAT + | DOUBLE + | SIGNED + | UNSIGNED + | struct_or_union_specifier + | enum_specifier + | TYPEID + ''' + pass + +# type-qualifier: +def p_type_qualifier(t): + '''type_qualifier : CONST + | VOLATILE''' + pass + +# struct-or-union-specifier + +def p_struct_or_union_specifier_1(t): + 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE' + pass + +def p_struct_or_union_specifier_2(t): + 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE' + pass + +def p_struct_or_union_specifier_3(t): + 'struct_or_union_specifier : struct_or_union ID' + pass + +# struct-or-union: +def p_struct_or_union(t): + '''struct_or_union : STRUCT + | UNION + ''' + pass + +# struct-declaration-list: + +def p_struct_declaration_list_1(t): + 'struct_declaration_list : struct_declaration' + pass + +def p_struct_declaration_list_2(t): + 'struct_declaration_list : struct_declarator_list struct_declaration' + pass + +# init-declarator-list: + +def p_init_declarator_list_1(t): + 'init_declarator_list : init_declarator' + pass + +def p_init_declarator_list_2(t): + 'init_declarator_list : init_declarator_list COMMA init_declarator' + pass + +# init-declarator + +def p_init_declarator_1(t): + 'init_declarator : declarator' + pass + +def p_init_declarator_2(t): + 'init_declarator : declarator EQUALS initializer' + pass + +# struct-declaration: + +def p_struct_declaration(t): + 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI' + pass + +# specifier-qualifier-list: + +def p_specifier_qualifier_list_1(t): + 'specifier_qualifier_list : type_specifier specifier_qualifier_list' + pass + +def p_specifier_qualifier_list_2(t): + 'specifier_qualifier_list : type_specifier' + pass + +def p_specifier_qualifier_list_3(t): + 'specifier_qualifier_list : type_qualifier specifier_qualifier_list' + pass + +def p_specifier_qualifier_list_4(t): + 'specifier_qualifier_list : type_qualifier' + pass + +# struct-declarator-list: + +def p_struct_declarator_list_1(t): + 'struct_declarator_list : struct_declarator' + pass + +def p_struct_declarator_list_2(t): + 'struct_declarator_list : struct_declarator_list COMMA struct_declarator' + pass + +# struct-declarator: + +def p_struct_declarator_1(t): + 'struct_declarator : declarator' + pass + +def p_struct_declarator_2(t): + 'struct_declarator : declarator COLON constant_expression' + pass + +def p_struct_declarator_3(t): + 'struct_declarator : COLON constant_expression' + pass + +# enum-specifier: + +def p_enum_specifier_1(t): + 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE' + pass + +def p_enum_specifier_2(t): + 'enum_specifier : ENUM LBRACE enumerator_list RBRACE' + pass + +def p_enum_specifier_3(t): + 'enum_specifier : ENUM ID' + pass + +# enumerator_list: +def p_enumerator_list_1(t): + 'enumerator_list : enumerator' + pass + +def p_enumerator_list_2(t): + 'enumerator_list : enumerator_list COMMA enumerator' + pass + +# enumerator: +def p_enumerator_1(t): + 'enumerator : ID' + pass + +def p_enumerator_2(t): + 'enumerator : ID EQUALS constant_expression' + pass + +# declarator: + +def p_declarator_1(t): + 'declarator : pointer direct_declarator' + pass + +def p_declarator_2(t): + 'declarator : direct_declarator' + pass + +# direct-declarator: + +def p_direct_declarator_1(t): + 'direct_declarator : ID' + pass + +def p_direct_declarator_2(t): + 'direct_declarator : LPAREN declarator RPAREN' + pass + +def p_direct_declarator_3(t): + 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_declarator_4(t): + 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN ' + pass + +def p_direct_declarator_5(t): + 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN ' + pass + +def p_direct_declarator_6(t): + 'direct_declarator : direct_declarator LPAREN RPAREN ' + pass + +# pointer: +def p_pointer_1(t): + 'pointer : TIMES type_qualifier_list' + pass + +def p_pointer_2(t): + 'pointer : TIMES' + pass + +def p_pointer_3(t): + 'pointer : TIMES type_qualifier_list pointer' + pass + +def p_pointer_4(t): + 'pointer : TIMES pointer' + pass + +# type-qualifier-list: + +def p_type_qualifier_list_1(t): + 'type_qualifier_list : type_qualifier' + pass + +def p_type_qualifier_list_2(t): + 'type_qualifier_list : type_qualifier_list type_qualifier' + pass + +# parameter-type-list: + +def p_parameter_type_list_1(t): + 'parameter_type_list : parameter_list' + pass + +def p_parameter_type_list_2(t): + 'parameter_type_list : parameter_list COMMA ELLIPSIS' + pass + +# parameter-list: + +def p_parameter_list_1(t): + 'parameter_list : parameter_declaration' + pass + +def p_parameter_list_2(t): + 'parameter_list : parameter_list COMMA parameter_declaration' + pass + +# parameter-declaration: +def p_parameter_declaration_1(t): + 'parameter_declaration : declaration_specifiers declarator' + pass + +def p_parameter_declaration_2(t): + 'parameter_declaration : declaration_specifiers abstract_declarator_opt' + pass + +# identifier-list: +def p_identifier_list_1(t): + 'identifier_list : ID' + pass + +def p_identifier_list_2(t): + 'identifier_list : identifier_list COMMA ID' + pass + +# initializer: + +def p_initializer_1(t): + 'initializer : assignment_expression' + pass + +def p_initializer_2(t): + '''initializer : LBRACE initializer_list RBRACE + | LBRACE initializer_list COMMA RBRACE''' + pass + +# initializer-list: + +def p_initializer_list_1(t): + 'initializer_list : initializer' + pass + +def p_initializer_list_2(t): + 'initializer_list : initializer_list COMMA initializer' + pass + +# type-name: + +def p_type_name(t): + 'type_name : specifier_qualifier_list abstract_declarator_opt' + pass + +def p_abstract_declarator_opt_1(t): + 'abstract_declarator_opt : empty' + pass + +def p_abstract_declarator_opt_2(t): + 'abstract_declarator_opt : abstract_declarator' + pass + +# abstract-declarator: + +def p_abstract_declarator_1(t): + 'abstract_declarator : pointer ' + pass + +def p_abstract_declarator_2(t): + 'abstract_declarator : pointer direct_abstract_declarator' + pass + +def p_abstract_declarator_3(t): + 'abstract_declarator : direct_abstract_declarator' + pass + +# direct-abstract-declarator: + +def p_direct_abstract_declarator_1(t): + 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN' + pass + +def p_direct_abstract_declarator_2(t): + 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_abstract_declarator_3(t): + 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_abstract_declarator_4(t): + 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN' + pass + +def p_direct_abstract_declarator_5(t): + 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN' + pass + +# Optional fields in abstract declarators + +def p_constant_expression_opt_1(t): + 'constant_expression_opt : empty' + pass + +def p_constant_expression_opt_2(t): + 'constant_expression_opt : constant_expression' + pass + +def p_parameter_type_list_opt_1(t): + 'parameter_type_list_opt : empty' + pass + +def p_parameter_type_list_opt_2(t): + 'parameter_type_list_opt : parameter_type_list' + pass + +# statement: + +def p_statement(t): + ''' + statement : labeled_statement + | expression_statement + | compound_statement + | selection_statement + | iteration_statement + | jump_statement + ''' + pass + +# labeled-statement: + +def p_labeled_statement_1(t): + 'labeled_statement : ID COLON statement' + pass + +def p_labeled_statement_2(t): + 'labeled_statement : CASE constant_expression COLON statement' + pass + +def p_labeled_statement_3(t): + 'labeled_statement : DEFAULT COLON statement' + pass + +# expression-statement: +def p_expression_statement(t): + 'expression_statement : expression_opt SEMI' + pass + +# compound-statement: + +def p_compound_statement_1(t): + 'compound_statement : LBRACE declaration_list statement_list RBRACE' + pass + +def p_compound_statement_2(t): + 'compound_statement : LBRACE statement_list RBRACE' + pass + +def p_compound_statement_3(t): + 'compound_statement : LBRACE declaration_list RBRACE' + pass + +def p_compound_statement_4(t): + 'compound_statement : LBRACE RBRACE' + pass + +# statement-list: + +def p_statement_list_1(t): + 'statement_list : statement' + pass + +def p_statement_list_2(t): + 'statement_list : statement_list statement' + pass + +# selection-statement + +def p_selection_statement_1(t): + 'selection_statement : IF LPAREN expression RPAREN statement' + pass + +def p_selection_statement_2(t): + 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement ' + pass + +def p_selection_statement_3(t): + 'selection_statement : SWITCH LPAREN expression RPAREN statement ' + pass + +# iteration_statement: + +def p_iteration_statement_1(t): + 'iteration_statement : WHILE LPAREN expression RPAREN statement' + pass + +def p_iteration_statement_2(t): + 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement ' + pass + +def p_iteration_statement_3(t): + 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI' + pass + +# jump_statement: + +def p_jump_statement_1(t): + 'jump_statement : GOTO ID SEMI' + pass + +def p_jump_statement_2(t): + 'jump_statement : CONTINUE SEMI' + pass + +def p_jump_statement_3(t): + 'jump_statement : BREAK SEMI' + pass + +def p_jump_statement_4(t): + 'jump_statement : RETURN expression_opt SEMI' + pass + +def p_expression_opt_1(t): + 'expression_opt : empty' + pass + +def p_expression_opt_2(t): + 'expression_opt : expression' + pass + +# expression: +def p_expression_1(t): + 'expression : assignment_expression' + pass + +def p_expression_2(t): + 'expression : expression COMMA assignment_expression' + pass + +# assigment_expression: +def p_assignment_expression_1(t): + 'assignment_expression : conditional_expression' + pass + +def p_assignment_expression_2(t): + 'assignment_expression : unary_expression assignment_operator assignment_expression' + pass + +# assignment_operator: +def p_assignment_operator(t): + ''' + assignment_operator : EQUALS + | TIMESEQUAL + | DIVEQUAL + | MODEQUAL + | PLUSEQUAL + | MINUSEQUAL + | LSHIFTEQUAL + | RSHIFTEQUAL + | ANDEQUAL + | OREQUAL + | XOREQUAL + ''' + pass + +# conditional-expression +def p_conditional_expression_1(t): + 'conditional_expression : logical_or_expression' + pass + +def p_conditional_expression_2(t): + 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression ' + pass + +# constant-expression + +def p_constant_expression(t): + 'constant_expression : conditional_expression' + pass + +# logical-or-expression + +def p_logical_or_expression_1(t): + 'logical_or_expression : logical_and_expression' + pass + +def p_logical_or_expression_2(t): + 'logical_or_expression : logical_or_expression LOR logical_and_expression' + pass + +# logical-and-expression + +def p_logical_and_expression_1(t): + 'logical_and_expression : inclusive_or_expression' + pass + +def p_logical_and_expression_2(t): + 'logical_and_expression : logical_and_expression LAND inclusive_or_expression' + pass + +# inclusive-or-expression: + +def p_inclusive_or_expression_1(t): + 'inclusive_or_expression : exclusive_or_expression' + pass + +def p_inclusive_or_expression_2(t): + 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression' + pass + +# exclusive-or-expression: + +def p_exclusive_or_expression_1(t): + 'exclusive_or_expression : and_expression' + pass + +def p_exclusive_or_expression_2(t): + 'exclusive_or_expression : exclusive_or_expression XOR and_expression' + pass + +# AND-expression + +def p_and_expression_1(t): + 'and_expression : equality_expression' + pass + +def p_and_expression_2(t): + 'and_expression : and_expression AND equality_expression' + pass + + +# equality-expression: +def p_equality_expression_1(t): + 'equality_expression : relational_expression' + pass + +def p_equality_expression_2(t): + 'equality_expression : equality_expression EQ relational_expression' + pass + +def p_equality_expression_3(t): + 'equality_expression : equality_expression NE relational_expression' + pass + + +# relational-expression: +def p_relational_expression_1(t): + 'relational_expression : shift_expression' + pass + +def p_relational_expression_2(t): + 'relational_expression : relational_expression LT shift_expression' + pass + +def p_relational_expression_3(t): + 'relational_expression : relational_expression GT shift_expression' + pass + +def p_relational_expression_4(t): + 'relational_expression : relational_expression LE shift_expression' + pass + +def p_relational_expression_5(t): + 'relational_expression : relational_expression GE shift_expression' + pass + +# shift-expression + +def p_shift_expression_1(t): + 'shift_expression : additive_expression' + pass + +def p_shift_expression_2(t): + 'shift_expression : shift_expression LSHIFT additive_expression' + pass + +def p_shift_expression_3(t): + 'shift_expression : shift_expression RSHIFT additive_expression' + pass + +# additive-expression + +def p_additive_expression_1(t): + 'additive_expression : multiplicative_expression' + pass + +def p_additive_expression_2(t): + 'additive_expression : additive_expression PLUS multiplicative_expression' + pass + +def p_additive_expression_3(t): + 'additive_expression : additive_expression MINUS multiplicative_expression' + pass + +# multiplicative-expression + +def p_multiplicative_expression_1(t): + 'multiplicative_expression : cast_expression' + pass + +def p_multiplicative_expression_2(t): + 'multiplicative_expression : multiplicative_expression TIMES cast_expression' + pass + +def p_multiplicative_expression_3(t): + 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression' + pass + +def p_multiplicative_expression_4(t): + 'multiplicative_expression : multiplicative_expression MOD cast_expression' + pass + +# cast-expression: + +def p_cast_expression_1(t): + 'cast_expression : unary_expression' + pass + +def p_cast_expression_2(t): + 'cast_expression : LPAREN type_name RPAREN cast_expression' + pass + +# unary-expression: +def p_unary_expression_1(t): + 'unary_expression : postfix_expression' + pass + +def p_unary_expression_2(t): + 'unary_expression : PLUSPLUS unary_expression' + pass + +def p_unary_expression_3(t): + 'unary_expression : MINUSMINUS unary_expression' + pass + +def p_unary_expression_4(t): + 'unary_expression : unary_operator cast_expression' + pass + +def p_unary_expression_5(t): + 'unary_expression : SIZEOF unary_expression' + pass + +def p_unary_expression_6(t): + 'unary_expression : SIZEOF LPAREN type_name RPAREN' + pass + +#unary-operator +def p_unary_operator(t): + '''unary_operator : AND + | TIMES + | PLUS + | MINUS + | NOT + | LNOT ''' + pass + +# postfix-expression: +def p_postfix_expression_1(t): + 'postfix_expression : primary_expression' + pass + +def p_postfix_expression_2(t): + 'postfix_expression : postfix_expression LBRACKET expression RBRACKET' + pass + +def p_postfix_expression_3(t): + 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN' + pass + +def p_postfix_expression_4(t): + 'postfix_expression : postfix_expression LPAREN RPAREN' + pass + +def p_postfix_expression_5(t): + 'postfix_expression : postfix_expression PERIOD ID' + pass + +def p_postfix_expression_6(t): + 'postfix_expression : postfix_expression ARROW ID' + pass + +def p_postfix_expression_7(t): + 'postfix_expression : postfix_expression PLUSPLUS' + pass + +def p_postfix_expression_8(t): + 'postfix_expression : postfix_expression MINUSMINUS' + pass + +# primary-expression: +def p_primary_expression(t): + '''primary_expression : ID + | constant + | SCONST + | LPAREN expression RPAREN''' + pass + +# argument-expression-list: +def p_argument_expression_list(t): + '''argument_expression_list : assignment_expression + | argument_expression_list COMMA assignment_expression''' + pass + +# constant: +def p_constant(t): + '''constant : ICONST + | FCONST + | CCONST''' + pass + + +def p_empty(t): + 'empty : ' + pass + +def p_error(t): + print "Whoa. We're hosed" + +import profile +# Build the grammar +profile.run("yacc.yacc()") + + + + diff --git a/ext/ply/example/calc/calc.py b/ext/ply/example/calc/calc.py new file mode 100644 index 000000000..aeb23c246 --- /dev/null +++ b/ext/ply/example/calc/calc.py @@ -0,0 +1,108 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) diff --git a/ext/ply/example/hedit/hedit.py b/ext/ply/example/hedit/hedit.py new file mode 100644 index 000000000..f00427bf5 --- /dev/null +++ b/ext/ply/example/hedit/hedit.py @@ -0,0 +1,44 @@ +# ----------------------------------------------------------------------------- +# hedit.py +# +# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) +# +# These tokens can't be easily tokenized because they are of the following +# form: +# +# nHc1...cn +# +# where n is a positive integer and c1 ... cn are characters. +# +# This example shows how to modify the state of the lexer to parse +# such tokens +# ----------------------------------------------------------------------------- + +tokens = ( + 'H_EDIT_DESCRIPTOR', + ) + +# Tokens +t_ignore = " \t\n" + +def t_H_EDIT_DESCRIPTOR(t): + r"\d+H.*" # This grabs all of the remaining text + i = t.value.index('H') + n = eval(t.value[:i]) + + # Adjust the tokenizing position + t.lexer.lexpos -= len(t.value) - (i+1+n) + + t.value = t.value[i+1:i+1+n] + return t + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() +lex.runmain() + + diff --git a/ext/ply/example/optcalc/README b/ext/ply/example/optcalc/README new file mode 100644 index 000000000..6d196f0ee --- /dev/null +++ b/ext/ply/example/optcalc/README @@ -0,0 +1,9 @@ +An example showing how to use Python optimized mode. +To run: + + - First run 'python calc.py' + + - Then run 'python -OO calc.py' + +If working corretly, the second version should run the +same way. diff --git a/ext/ply/example/optcalc/calc.py b/ext/ply/example/optcalc/calc.py new file mode 100644 index 000000000..fa66cda5b --- /dev/null +++ b/ext/ply/example/optcalc/calc.py @@ -0,0 +1,110 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex(optimize=1) + +# Parsing rules + +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + elif t[2] == '<': t[0] = t[1] < t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc(optimize=1) + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) + diff --git a/ext/ply/lex.py b/ext/ply/lex.py new file mode 100644 index 000000000..7ad7a394b --- /dev/null +++ b/ext/ply/lex.py @@ -0,0 +1,681 @@ +#----------------------------------------------------------------------------- +# ply: lex.py +# +# Author: David M. Beazley (beazley@cs.uchicago.edu) +# Department of Computer Science +# University of Chicago +# Chicago, IL 60637 +# +# Copyright (C) 2001, David M. Beazley +# +# $Header: /home/stever/bk/newmem2/ext/ply/lex.py 1.1 03/06/06 14:53:34-00:00 stever@ $ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This module automatically constructs a lexical analysis module from regular +# expression rules defined in a user-defined module. The idea is essentially the same +# as that used in John Aycock's Spark framework, but the implementation works +# at the module level rather than requiring the use of classes. +# +# This module tries to provide an interface that is closely modeled after +# the traditional lex interface in Unix. It also differs from Spark +# in that: +# +# - It provides more extensive error checking and reporting if +# the user supplies a set of regular expressions that can't +# be compiled or if there is any other kind of a problem in +# the specification. +# +# - The interface is geared towards LALR(1) and LR(1) parser +# generators. That is tokens are generated one at a time +# rather than being generated in advanced all in one step. +# +# There are a few limitations of this module +# +# - The module interface makes it somewhat awkward to support more +# than one lexer at a time. Although somewhat inelegant from a +# design perspective, this is rarely a practical concern for +# most compiler projects. +# +# - The lexer requires that the entire input text be read into +# a string before scanning. I suppose that most machines have +# enough memory to make this a minor issues, but it makes +# the lexer somewhat difficult to use in interactive sessions +# or with streaming data. +# +#----------------------------------------------------------------------------- + +r""" +lex.py + +This module builds lex-like scanners based on regular expression rules. +To use the module, simply write a collection of regular expression rules +and actions like this: + +# lexer.py +import lex + +# Define a list of valid tokens +tokens = ( + 'IDENTIFIER', 'NUMBER', 'PLUS', 'MINUS' + ) + +# Define tokens as functions +def t_IDENTIFIER(t): + r' ([a-zA-Z_](\w|_)* ' + return t + +def t_NUMBER(t): + r' \d+ ' + return t + +# Some simple tokens with no actions +t_PLUS = r'\+' +t_MINUS = r'-' + +# Initialize the lexer +lex.lex() + +The tokens list is required and contains a complete list of all valid +token types that the lexer is allowed to produce. Token types are +restricted to be valid identifiers. This means that 'MINUS' is a valid +token type whereas '-' is not. + +Rules are defined by writing a function with a name of the form +t_rulename. Each rule must accept a single argument which is +a token object generated by the lexer. This token has the following +attributes: + + t.type = type string of the token. This is initially set to the + name of the rule without the leading t_ + t.value = The value of the lexeme. + t.lineno = The value of the line number where the token was encountered + +For example, the t_NUMBER() rule above might be called with the following: + + t.type = 'NUMBER' + t.value = '42' + t.lineno = 3 + +Each rule returns the token object it would like to supply to the +parser. In most cases, the token t is returned with few, if any +modifications. To discard a token for things like whitespace or +comments, simply return nothing. For instance: + +def t_whitespace(t): + r' \s+ ' + pass + +For faster lexing, you can also define this in terms of the ignore set like this: + +t_ignore = ' \t' + +The characters in this string are ignored by the lexer. Use of this feature can speed +up parsing significantly since scanning will immediately proceed to the next token. + +lex requires that the token returned by each rule has an attribute +t.type. Other than this, rules are free to return any kind of token +object that they wish and may construct a new type of token object +from the attributes of t (provided the new object has the required +type attribute). + +If illegal characters are encountered, the scanner executes the +function t_error(t) where t is a token representing the rest of the +string that hasn't been matched. If this function isn't defined, a +LexError exception is raised. The .text attribute of this exception +object contains the part of the string that wasn't matched. + +The t.skip(n) method can be used to skip ahead n characters in the +input stream. This is usually only used in the error handling rule. +For instance, the following rule would print an error message and +continue: + +def t_error(t): + print "Illegal character in input %s" % t.value[0] + t.skip(1) + +Of course, a nice scanner might wish to skip more than one character +if the input looks very corrupted. + +The lex module defines a t.lineno attribute on each token that can be used +to track the current line number in the input. The value of this +variable is not modified by lex so it is up to your lexer module +to correctly update its value depending on the lexical properties +of the input language. To do this, you might write rules such as +the following: + +def t_newline(t): + r' \n+ ' + t.lineno += t.value.count("\n") + +To initialize your lexer so that it can be used, simply call the lex.lex() +function in your rule file. If there are any errors in your +specification, warning messages or an exception will be generated to +alert you to the problem. + +(dave: this needs to be rewritten) +To use the newly constructed lexer from another module, simply do +this: + + import lex + import lexer + plex.input("position = initial + rate*60") + + while 1: + token = plex.token() # Get a token + if not token: break # No more tokens + ... do whatever ... + +Assuming that the module 'lexer' has initialized plex as shown +above, parsing modules can safely import 'plex' without having +to import the rule file or any additional imformation about the +scanner you have defined. +""" + +# ----------------------------------------------------------------------------- + + +__version__ = "1.3" + +import re, types, sys, copy + +# Exception thrown when invalid token encountered and no default +class LexError(Exception): + def __init__(self,message,s): + self.args = (message,) + self.text = s + +# Token class +class LexToken: + def __str__(self): + return "LexToken(%s,%r,%d)" % (self.type,self.value,self.lineno) + def __repr__(self): + return str(self) + def skip(self,n): + try: + self._skipn += n + except AttributeError: + self._skipn = n + +# ----------------------------------------------------------------------------- +# Lexer class +# +# input() - Store a new string in the lexer +# token() - Get the next token +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexindexfunc = [ ] # Reverse mapping of groups to functions and types + self.lexerrorf = None # Error rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = None # Ignored characters + self.lineno = 1 # Current line number + self.debug = 0 # Debugging mode + self.optimize = 0 # Optimized mode + self.token = self.errtoken + + def __copy__(self): + c = Lexer() + c.lexre = self.lexre + c.lexdata = self.lexdata + c.lexpos = self.lexpos + c.lexlen = self.lexlen + c.lenindexfunc = self.lexindexfunc + c.lexerrorf = self.lexerrorf + c.lextokens = self.lextokens + c.lexignore = self.lexignore + c.lineno = self.lineno + c.optimize = self.optimize + c.token = c.realtoken + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self,s): + if not isinstance(s,types.StringType): + raise ValueError, "Expected a string" + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + self.token = self.realtoken + + # Change the token routine to point to realtoken() + global token + if token == self.errtoken: + token = self.token + + # ------------------------------------------------------------ + # errtoken() - Return error if token is called with no data + # ------------------------------------------------------------ + def errtoken(self): + raise RuntimeError, "No input string given with input()" + + # ------------------------------------------------------------ + # token() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def realtoken(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + m = self.lexre.match(lexdata,lexpos) + if m: + i = m.lastindex + lexpos = m.end() + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexer = self + func,tok.type = self.lexindexfunc[i] + if not func: + self.lexpos = lexpos + return tok + + # If token is processed by a function, call it + self.lexpos = lexpos + newtok = func(tok) + self.lineno = tok.lineno # Update line number + + # Every function must return a token, if nothing, we just move to next token + if not newtok: continue + + # Verify type of the token. If not in the token map, raise an error + if not self.optimize: + if not self.lextokens.has_key(newtok.type): + raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func.func_code.co_filename, func.func_code.co_firstlineno, + func.__name__, newtok.type),lexdata[lexpos:]) + + return newtok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = "error" + tok.lexer = self + oldpos = lexpos + newtok = self.lexerrorf(tok) + lexpos += getattr(tok,"_skipn",0) + if oldpos == lexpos: + # Error method didn't change text position at all. This is an error. + self.lexpos = lexpos + raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + if not newtok: continue + self.lexpos = lexpos + return newtok + + self.lexpos = lexpos + raise LexError, ("No match found", lexdata[lexpos:]) + + # No more input data + self.lexpos = lexpos + 1 + return None + + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This checks to see if there are duplicated t_rulename() functions or strings +# in the parser input file. This is done using a simple regular expression +# match on each line in the filename. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + import os.path + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea what the file is. Return OK + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if not m: + m = sre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev) + noerror = 0 + linen += 1 + return noerror + +# ----------------------------------------------------------------------------- +# _read_lextab(module) +# +# Reads lexer table from a lextab file instead of using introspection. +# ----------------------------------------------------------------------------- + +def _read_lextab(lexer, fdict, module): + exec "import %s as lextab" % module + lexer.lexre = re.compile(lextab._lexre, re.VERBOSE) + lexer.lexindexfunc = lextab._lextab + for i in range(len(lextab._lextab)): + t = lexer.lexindexfunc[i] + if t: + if t[0]: + lexer.lexindexfunc[i] = (fdict[t[0]],t[1]) + lexer.lextokens = lextab._lextokens + lexer.lexignore = lextab._lexignore + if lextab._lexerrorf: + lexer.lexerrorf = fdict[lextab._lexerrorf] + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None,debug=0,optimize=0,lextab="lextab"): + ldict = None + regex = "" + error = 0 + files = { } + lexer = Lexer() + lexer.debug = debug + lexer.optimize = optimize + global token,input + + if module: + if not isinstance(module, types.ModuleType): + raise ValueError,"Expected a module" + + ldict = module.__dict__ + + else: + # No module given. We might be able to get information from the caller. + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + if optimize and lextab: + try: + _read_lextab(lexer,ldict, lextab) + if not lexer.lexignore: lexer.lexignore = "" + token = lexer.token + input = lexer.input + return lexer + + except ImportError: + pass + + # Get the tokens map + tokens = ldict.get("tokens",None) + if not tokens: + raise SyntaxError,"lex: module does not define 'tokens'" + if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): + raise SyntaxError,"lex: tokens must be a list or tuple." + + # Build a dictionary of valid token names + lexer.lextokens = { } + if not optimize: + + # Utility function for verifying tokens + def is_identifier(s): + for c in s: + if not (c.isalnum() or c == '_'): return 0 + return 1 + + for n in tokens: + if not is_identifier(n): + print "lex: Bad token name '%s'" % n + error = 1 + if lexer.lextokens.has_key(n): + print "lex: Warning. Token '%s' multiply defined." % n + lexer.lextokens[n] = None + else: + for n in tokens: lexer.lextokens[n] = None + + + if debug: + print "lex: tokens = '%s'" % lexer.lextokens.keys() + + # Get a list of symbols with the t_ prefix + tsymbols = [f for f in ldict.keys() if f[:2] == 't_'] + + # Now build up a list of functions and a list of strings + fsymbols = [ ] + ssymbols = [ ] + for f in tsymbols: + if isinstance(ldict[f],types.FunctionType): + fsymbols.append(ldict[f]) + elif isinstance(ldict[f],types.StringType): + ssymbols.append((f,ldict[f])) + else: + print "lex: %s not defined as a function or string" % f + error = 1 + + # Sort the functions by line number + fsymbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) + + # Sort the strings by regular expression length + ssymbols.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) + + # Check for non-empty symbols + if len(fsymbols) == 0 and len(ssymbols) == 0: + raise SyntaxError,"lex: no rules of the form t_rulename are defined." + + # Add all of the rules defined with actions first + for f in fsymbols: + + line = f.func_code.co_firstlineno + file = f.func_code.co_filename + files[file] = None + + if not optimize: + if f.func_code.co_argcount > 1: + print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) + error = 1 + continue + + if f.func_code.co_argcount < 1: + print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) + error = 1 + continue + + if f.__name__ == 't_ignore': + print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__) + error = 1 + continue + + if f.__name__ == 't_error': + lexer.lexerrorf = f + continue + + if f.__doc__: + if not optimize: + try: + c = re.compile(f.__doc__, re.VERBOSE) + except re.error,e: + print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e) + error = 1 + continue + + if debug: + print "lex: Adding rule %s -> '%s'" % (f.__name__,f.__doc__) + + # Okay. The regular expression seemed okay. Let's append it to the master regular + # expression we're building + + if (regex): regex += "|" + regex += "(?P<%s>%s)" % (f.__name__,f.__doc__) + else: + print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__) + + # Now add all of the simple rules + for name,r in ssymbols: + + if name == 't_ignore': + lexer.lexignore = r + continue + + if not optimize: + if name == 't_error': + raise SyntaxError,"lex: Rule 't_error' must be defined as a function" + error = 1 + continue + + if not lexer.lextokens.has_key(name[2:]): + print "lex: Rule '%s' defined for an unspecified token %s." % (name,name[2:]) + error = 1 + continue + try: + c = re.compile(r,re.VERBOSE) + except re.error,e: + print "lex: Invalid regular expression for rule '%s'. %s" % (name,e) + error = 1 + continue + if debug: + print "lex: Adding rule %s -> '%s'" % (name,r) + + if regex: regex += "|" + regex += "(?P<%s>%s)" % (name,r) + + if not optimize: + for f in files.keys(): + if not validate_file(f): + error = 1 + try: + if debug: + print "lex: regex = '%s'" % regex + lexer.lexre = re.compile(regex, re.VERBOSE) + + # Build the index to function map for the matching engine + lexer.lexindexfunc = [ None ] * (max(lexer.lexre.groupindex.values())+1) + for f,i in lexer.lexre.groupindex.items(): + handle = ldict[f] + if isinstance(handle,types.FunctionType): + lexer.lexindexfunc[i] = (handle,handle.__name__[2:]) + else: + # If rule was specified as a string, we build an anonymous + # callback function to carry out the action + lexer.lexindexfunc[i] = (None,f[2:]) + + # If a lextab was specified, we create a file containing the precomputed + # regular expression and index table + + if lextab and optimize: + lt = open(lextab+".py","w") + lt.write("# %s.py. This file automatically created by PLY. Don't edit.\n" % lextab) + lt.write("_lexre = %s\n" % repr(regex)) + lt.write("_lextab = [\n"); + for i in range(0,len(lexer.lexindexfunc)): + t = lexer.lexindexfunc[i] + if t: + if t[0]: + lt.write(" ('%s',%s),\n"% (t[0].__name__, repr(t[1]))) + else: + lt.write(" (None,%s),\n" % repr(t[1])) + else: + lt.write(" None,\n") + + lt.write("]\n"); + lt.write("_lextokens = %s\n" % repr(lexer.lextokens)) + lt.write("_lexignore = %s\n" % repr(lexer.lexignore)) + if (lexer.lexerrorf): + lt.write("_lexerrorf = %s\n" % repr(lexer.lexerrorf.__name__)) + else: + lt.write("_lexerrorf = None\n") + lt.close() + + except re.error,e: + print "lex: Fatal error. Unable to compile regular expression rules. %s" % e + error = 1 + if error: + raise SyntaxError,"lex: Unable to build lexer." + if not lexer.lexerrorf: + print "lex: Warning. no t_error rule is defined." + + if not lexer.lexignore: lexer.lexignore = "" + + # Create global versions of the token() and input() functions + token = lexer.token + input = lexer.input + + return lexer + +# ----------------------------------------------------------------------------- +# run() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None,data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + print "Reading from standard input (type EOF to end):" + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while 1: + tok = _token() + if not tok: break + print "(%s,'%s',%d)" % (tok.type, tok.value, tok.lineno) + + + + diff --git a/ext/ply/test/README b/ext/ply/test/README new file mode 100644 index 000000000..bca748497 --- /dev/null +++ b/ext/ply/test/README @@ -0,0 +1,9 @@ +This directory mostly contains tests for various types of error +conditions. To run: + + $ python testlex.py . + $ python testyacc.py . + +(make sure lex.py and yacc.py exist in this directory before +running the tests). + diff --git a/ext/ply/test/calclex.py b/ext/ply/test/calclex.py new file mode 100644 index 000000000..f8eb91a09 --- /dev/null +++ b/ext/ply/test/calclex.py @@ -0,0 +1,46 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + + + diff --git a/ext/ply/test/lex_doc1.exp b/ext/ply/test/lex_doc1.exp new file mode 100644 index 000000000..29381911d --- /dev/null +++ b/ext/ply/test/lex_doc1.exp @@ -0,0 +1 @@ +./lex_doc1.py:15: No regular expression defined for rule 't_NUMBER' diff --git a/ext/ply/test/lex_doc1.py b/ext/ply/test/lex_doc1.py new file mode 100644 index 000000000..fb0fb885e --- /dev/null +++ b/ext/ply/test/lex_doc1.py @@ -0,0 +1,27 @@ +# lex_token.py +# +# Missing documentation string + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t): + pass + +def t_error(t): + pass + + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup1.exp b/ext/ply/test/lex_dup1.exp new file mode 100644 index 000000000..22bca3190 --- /dev/null +++ b/ext/ply/test/lex_dup1.exp @@ -0,0 +1,2 @@ +./lex_dup1.py:17: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup1.py b/ext/ply/test/lex_dup1.py new file mode 100644 index 000000000..88bbe00e9 --- /dev/null +++ b/ext/ply/test/lex_dup1.py @@ -0,0 +1,27 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup2.exp b/ext/ply/test/lex_dup2.exp new file mode 100644 index 000000000..883bdad46 --- /dev/null +++ b/ext/ply/test/lex_dup2.exp @@ -0,0 +1,2 @@ +./lex_dup2.py:19: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup2.py b/ext/ply/test/lex_dup2.py new file mode 100644 index 000000000..65e0b21a2 --- /dev/null +++ b/ext/ply/test/lex_dup2.py @@ -0,0 +1,31 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t): + r'\d+' + pass + +def t_NUMBER(t): + r'\d+' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup3.exp b/ext/ply/test/lex_dup3.exp new file mode 100644 index 000000000..916612aa1 --- /dev/null +++ b/ext/ply/test/lex_dup3.exp @@ -0,0 +1,2 @@ +./lex_dup3.py:17: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup3.py b/ext/ply/test/lex_dup3.py new file mode 100644 index 000000000..424101823 --- /dev/null +++ b/ext/ply/test/lex_dup3.py @@ -0,0 +1,29 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_NUMBER(t): + r'\d+' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_empty.exp b/ext/ply/test/lex_empty.exp new file mode 100644 index 000000000..af38602d5 --- /dev/null +++ b/ext/ply/test/lex_empty.exp @@ -0,0 +1 @@ +SyntaxError: lex: no rules of the form t_rulename are defined. diff --git a/ext/ply/test/lex_empty.py b/ext/ply/test/lex_empty.py new file mode 100644 index 000000000..6472832f1 --- /dev/null +++ b/ext/ply/test/lex_empty.py @@ -0,0 +1,18 @@ +# lex_token.py +# +# No rules defined + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error1.exp b/ext/ply/test/lex_error1.exp new file mode 100644 index 000000000..baa19e5b3 --- /dev/null +++ b/ext/ply/test/lex_error1.exp @@ -0,0 +1 @@ +lex: Warning. no t_error rule is defined. diff --git a/ext/ply/test/lex_error1.py b/ext/ply/test/lex_error1.py new file mode 100644 index 000000000..ed7980346 --- /dev/null +++ b/ext/ply/test/lex_error1.py @@ -0,0 +1,22 @@ +# lex_token.py +# +# Missing t_error() rule + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error2.exp b/ext/ply/test/lex_error2.exp new file mode 100644 index 000000000..fb1b55c8b --- /dev/null +++ b/ext/ply/test/lex_error2.exp @@ -0,0 +1 @@ +SyntaxError: lex: Rule 't_error' must be defined as a function diff --git a/ext/ply/test/lex_error2.py b/ext/ply/test/lex_error2.py new file mode 100644 index 000000000..80020f72b --- /dev/null +++ b/ext/ply/test/lex_error2.py @@ -0,0 +1,24 @@ +# lex_token.py +# +# t_error defined, but not function + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_error = "foo" + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error3.exp b/ext/ply/test/lex_error3.exp new file mode 100644 index 000000000..936828f93 --- /dev/null +++ b/ext/ply/test/lex_error3.exp @@ -0,0 +1,2 @@ +./lex_error3.py:17: Rule 't_error' requires an argument. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_error3.py b/ext/ply/test/lex_error3.py new file mode 100644 index 000000000..46facf589 --- /dev/null +++ b/ext/ply/test/lex_error3.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# t_error defined as function, but with wrong # args + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error4.exp b/ext/ply/test/lex_error4.exp new file mode 100644 index 000000000..242516576 --- /dev/null +++ b/ext/ply/test/lex_error4.exp @@ -0,0 +1,2 @@ +./lex_error4.py:17: Rule 't_error' has too many arguments. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_error4.py b/ext/ply/test/lex_error4.py new file mode 100644 index 000000000..d777fee84 --- /dev/null +++ b/ext/ply/test/lex_error4.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# t_error defined as function, but too many args + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t,s): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_hedit.exp b/ext/ply/test/lex_hedit.exp new file mode 100644 index 000000000..0b09827c6 --- /dev/null +++ b/ext/ply/test/lex_hedit.exp @@ -0,0 +1,3 @@ +(H_EDIT_DESCRIPTOR,'abc',1) +(H_EDIT_DESCRIPTOR,'abcdefghij',1) +(H_EDIT_DESCRIPTOR,'xy',1) diff --git a/ext/ply/test/lex_hedit.py b/ext/ply/test/lex_hedit.py new file mode 100644 index 000000000..68f9fcbd1 --- /dev/null +++ b/ext/ply/test/lex_hedit.py @@ -0,0 +1,44 @@ +# ----------------------------------------------------------------------------- +# hedit.py +# +# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) +# +# These tokens can't be easily tokenized because they are of the following +# form: +# +# nHc1...cn +# +# where n is a positive integer and c1 ... cn are characters. +# +# This example shows how to modify the state of the lexer to parse +# such tokens +# ----------------------------------------------------------------------------- + +tokens = ( + 'H_EDIT_DESCRIPTOR', + ) + +# Tokens +t_ignore = " \t\n" + +def t_H_EDIT_DESCRIPTOR(t): + r"\d+H.*" # This grabs all of the remaining text + i = t.value.index('H') + n = eval(t.value[:i]) + + # Adjust the tokenizing position + t.lexer.lexpos -= len(t.value) - (i+1+n) + t.value = t.value[i+1:i+1+n] + return t + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() +lex.runmain(data="3Habc 10Habcdefghij 2Hxy") + + + diff --git a/ext/ply/test/lex_ignore.exp b/ext/ply/test/lex_ignore.exp new file mode 100644 index 000000000..c3b04a154 --- /dev/null +++ b/ext/ply/test/lex_ignore.exp @@ -0,0 +1,2 @@ +./lex_ignore.py:17: Rule 't_ignore' must be defined as a string. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_ignore.py b/ext/ply/test/lex_ignore.py new file mode 100644 index 000000000..49c303f81 --- /dev/null +++ b/ext/ply/test/lex_ignore.py @@ -0,0 +1,29 @@ +# lex_token.py +# +# Improperly specific ignore declaration + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_ignore(t): + ' \t' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_re1.exp b/ext/ply/test/lex_re1.exp new file mode 100644 index 000000000..634eefefe --- /dev/null +++ b/ext/ply/test/lex_re1.exp @@ -0,0 +1,2 @@ +lex: Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_re1.py b/ext/ply/test/lex_re1.py new file mode 100644 index 000000000..4a055ad72 --- /dev/null +++ b/ext/ply/test/lex_re1.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# Bad regular expression in a string + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'(\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_rule1.exp b/ext/ply/test/lex_rule1.exp new file mode 100644 index 000000000..0c23ca294 --- /dev/null +++ b/ext/ply/test/lex_rule1.exp @@ -0,0 +1,2 @@ +lex: t_NUMBER not defined as a function or string +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_rule1.py b/ext/ply/test/lex_rule1.py new file mode 100644 index 000000000..ff3764ea1 --- /dev/null +++ b/ext/ply/test/lex_rule1.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# Rule defined as some other type + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = 1 + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token1.exp b/ext/ply/test/lex_token1.exp new file mode 100644 index 000000000..3792831fa --- /dev/null +++ b/ext/ply/test/lex_token1.exp @@ -0,0 +1 @@ +SyntaxError: lex: module does not define 'tokens' diff --git a/ext/ply/test/lex_token1.py b/ext/ply/test/lex_token1.py new file mode 100644 index 000000000..e8eca2b63 --- /dev/null +++ b/ext/ply/test/lex_token1.py @@ -0,0 +1,19 @@ +# lex_token.py +# +# Tests for absence of tokens variable + +import lex + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token2.exp b/ext/ply/test/lex_token2.exp new file mode 100644 index 000000000..3f98fe51d --- /dev/null +++ b/ext/ply/test/lex_token2.exp @@ -0,0 +1 @@ +SyntaxError: lex: tokens must be a list or tuple. diff --git a/ext/ply/test/lex_token2.py b/ext/ply/test/lex_token2.py new file mode 100644 index 000000000..38b34dabe --- /dev/null +++ b/ext/ply/test/lex_token2.py @@ -0,0 +1,21 @@ +# lex_token.py +# +# Tests for tokens of wrong type + +import lex + +tokens = "PLUS MINUS NUMBER" + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token3.exp b/ext/ply/test/lex_token3.exp new file mode 100644 index 000000000..d991d3c37 --- /dev/null +++ b/ext/ply/test/lex_token3.exp @@ -0,0 +1,2 @@ +lex: Rule 't_MINUS' defined for an unspecified token MINUS. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_token3.py b/ext/ply/test/lex_token3.py new file mode 100644 index 000000000..909f9180d --- /dev/null +++ b/ext/ply/test/lex_token3.py @@ -0,0 +1,24 @@ +# lex_token.py +# +# tokens is right type, but is missing a token for one rule + +import lex + +tokens = [ + "PLUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token4.exp b/ext/ply/test/lex_token4.exp new file mode 100644 index 000000000..3dd88e05a --- /dev/null +++ b/ext/ply/test/lex_token4.exp @@ -0,0 +1,2 @@ +lex: Bad token name '-' +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_token4.py b/ext/ply/test/lex_token4.py new file mode 100644 index 000000000..d77d1662c --- /dev/null +++ b/ext/ply/test/lex_token4.py @@ -0,0 +1,26 @@ +# lex_token.py +# +# Bad token name + +import lex + +tokens = [ + "PLUS", + "MINUS", + "-", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token5.exp b/ext/ply/test/lex_token5.exp new file mode 100644 index 000000000..d7bcb2e7c --- /dev/null +++ b/ext/ply/test/lex_token5.exp @@ -0,0 +1 @@ +lex.LexError: ./lex_token5.py:16: Rule 't_NUMBER' returned an unknown token type 'NUM' diff --git a/ext/ply/test/lex_token5.py b/ext/ply/test/lex_token5.py new file mode 100644 index 000000000..d9b0c96aa --- /dev/null +++ b/ext/ply/test/lex_token5.py @@ -0,0 +1,31 @@ +# lex_token.py +# +# Return a bad token name + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' + +def t_NUMBER(t): + r'\d+' + t.type = "NUM" + return t + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() +lex.input("1234") +t = lex.token() + + diff --git a/ext/ply/test/testlex.py b/ext/ply/test/testlex.py new file mode 100755 index 000000000..df000b83d --- /dev/null +++ b/ext/ply/test/testlex.py @@ -0,0 +1,57 @@ +#!/usr/local/bin +# ---------------------------------------------------------------------- +# testlex.py +# +# Run tests for the lexing module +# ---------------------------------------------------------------------- + +import sys,os,glob + +if len(sys.argv) < 2: + print "Usage: python testlex.py directory" + raise SystemExit + +dirname = None +make = 0 + +for o in sys.argv[1:]: + if o == '-make': + make = 1 + else: + dirname = o + break + +if not dirname: + print "Usage: python testlex.py [-make] directory" + raise SystemExit + +f = glob.glob("%s/%s" % (dirname,"lex_*.py")) + +print "**** Running tests for lex ****" + +for t in f: + name = t[:-3] + print "Testing %-32s" % name, + if make: + if not os.path.exists("%s.exp" % name): + os.system("python %s.py >%s.exp 2>&1" % (name,name)) + passed = 1 + else: + os.system("python %s.py >%s.out 2>&1" % (name,name)) + a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name)) + if a == 0: + passed = 1 + else: + passed = 0 + + if passed: + print "Passed" + else: + print "Failed. See %s.dif" % name + + + + + + + diff --git a/ext/ply/test/testyacc.py b/ext/ply/test/testyacc.py new file mode 100644 index 000000000..a185cbb29 --- /dev/null +++ b/ext/ply/test/testyacc.py @@ -0,0 +1,58 @@ +#!/usr/local/bin +# ---------------------------------------------------------------------- +# testyacc.py +# +# Run tests for the yacc module +# ---------------------------------------------------------------------- + +import sys,os,glob + +if len(sys.argv) < 2: + print "Usage: python testyacc.py directory" + raise SystemExit + +dirname = None +make = 0 + +for o in sys.argv[1:]: + if o == '-make': + make = 1 + else: + dirname = o + break + +if not dirname: + print "Usage: python testyacc.py [-make] directory" + raise SystemExit + +f = glob.glob("%s/%s" % (dirname,"yacc_*.py")) + +print "**** Running tests for yacc ****" + +for t in f: + name = t[:-3] + print "Testing %-32s" % name, + os.system("rm -f %s/parsetab.*" % dirname) + if make: + if not os.path.exists("%s.exp" % name): + os.system("python %s.py >%s.exp 2>&1" % (name,name)) + passed = 1 + else: + os.system("python %s.py >%s.out 2>&1" % (name,name)) + a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name)) + if a == 0: + passed = 1 + else: + passed = 0 + + if passed: + print "Passed" + else: + print "Failed. See %s.dif" % name + + + + + + + diff --git a/ext/ply/test/yacc_badargs.exp b/ext/ply/test/yacc_badargs.exp new file mode 100644 index 000000000..b145c51f2 --- /dev/null +++ b/ext/ply/test/yacc_badargs.exp @@ -0,0 +1,3 @@ +./yacc_badargs.py:21: Rule 'p_statement_assign' has too many arguments. +./yacc_badargs.py:25: Rule 'p_statement_expr' requires an argument. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_badargs.py b/ext/ply/test/yacc_badargs.py new file mode 100644 index 000000000..12075efcc --- /dev/null +++ b/ext/ply/test/yacc_badargs.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badargs.py +# +# Rules with wrong # args +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t,s): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badprec.exp b/ext/ply/test/yacc_badprec.exp new file mode 100644 index 000000000..7764b0246 --- /dev/null +++ b/ext/ply/test/yacc_badprec.exp @@ -0,0 +1 @@ +yacc.YaccError: precedence must be a list or tuple. diff --git a/ext/ply/test/yacc_badprec.py b/ext/ply/test/yacc_badprec.py new file mode 100644 index 000000000..55bf7720d --- /dev/null +++ b/ext/ply/test/yacc_badprec.py @@ -0,0 +1,63 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec.py +# +# Bad precedence specifier +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = "blah" + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badprec2.exp b/ext/ply/test/yacc_badprec2.exp new file mode 100644 index 000000000..1df1427b2 --- /dev/null +++ b/ext/ply/test/yacc_badprec2.exp @@ -0,0 +1,3 @@ +yacc: Invalid precedence table. +yacc: Generating SLR parsing table... +yacc: 4 shift/reduce conflicts diff --git a/ext/ply/test/yacc_badprec2.py b/ext/ply/test/yacc_badprec2.py new file mode 100644 index 000000000..9cbc99827 --- /dev/null +++ b/ext/ply/test/yacc_badprec2.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec2.py +# +# Bad precedence +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + 42, + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badrule.exp b/ext/ply/test/yacc_badrule.exp new file mode 100644 index 000000000..553779778 --- /dev/null +++ b/ext/ply/test/yacc_badrule.exp @@ -0,0 +1,5 @@ +./yacc_badrule.py:22: Syntax error. Expected ':' +./yacc_badrule.py:26: Syntax error in rule 'statement' +./yacc_badrule.py:31: Syntax error. Expected ':' +./yacc_badrule.py:40: Syntax error. Expected ':' +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_badrule.py b/ext/ply/test/yacc_badrule.py new file mode 100644 index 000000000..cad3a967e --- /dev/null +++ b/ext/ply/test/yacc_badrule.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badrule.py +# +# Syntax problems in the rule strings +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression: MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badtok.exp b/ext/ply/test/yacc_badtok.exp new file mode 100644 index 000000000..f6e64726c --- /dev/null +++ b/ext/ply/test/yacc_badtok.exp @@ -0,0 +1 @@ +yacc.YaccError: tokens must be a list or tuple. diff --git a/ext/ply/test/yacc_badtok.py b/ext/ply/test/yacc_badtok.py new file mode 100644 index 000000000..a17d26aaa --- /dev/null +++ b/ext/ply/test/yacc_badtok.py @@ -0,0 +1,68 @@ +# ----------------------------------------------------------------------------- +# yacc_badtok.py +# +# A grammar, but tokens is a bad datatype +# ----------------------------------------------------------------------------- + +import sys +sys.tracebacklimit = 0 + +tokens = "Hello" + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_dup.exp b/ext/ply/test/yacc_dup.exp new file mode 100644 index 000000000..99f3fe22c --- /dev/null +++ b/ext/ply/test/yacc_dup.exp @@ -0,0 +1,4 @@ +./yacc_dup.py:25: Function p_statement redefined. Previously defined on line 21 +yacc: Warning. Token 'EQUALS' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_dup.py b/ext/ply/test/yacc_dup.py new file mode 100644 index 000000000..557cd0ae1 --- /dev/null +++ b/ext/ply/test/yacc_dup.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_dup.py +# +# Duplicated rule name +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error1.exp b/ext/ply/test/yacc_error1.exp new file mode 100644 index 000000000..980fc905c --- /dev/null +++ b/ext/ply/test/yacc_error1.exp @@ -0,0 +1 @@ +yacc.YaccError: ./yacc_error1.py:59: p_error() requires 1 argument. diff --git a/ext/ply/test/yacc_error1.py b/ext/ply/test/yacc_error1.py new file mode 100644 index 000000000..413004520 --- /dev/null +++ b/ext/ply/test/yacc_error1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t,s): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error2.exp b/ext/ply/test/yacc_error2.exp new file mode 100644 index 000000000..d0573b4dd --- /dev/null +++ b/ext/ply/test/yacc_error2.exp @@ -0,0 +1 @@ +yacc.YaccError: ./yacc_error2.py:59: p_error() requires 1 argument. diff --git a/ext/ply/test/yacc_error2.py b/ext/ply/test/yacc_error2.py new file mode 100644 index 000000000..d4fd1d219 --- /dev/null +++ b/ext/ply/test/yacc_error2.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error3.exp b/ext/ply/test/yacc_error3.exp new file mode 100644 index 000000000..31eaee754 --- /dev/null +++ b/ext/ply/test/yacc_error3.exp @@ -0,0 +1 @@ +yacc.YaccError: 'p_error' defined, but is not a function. diff --git a/ext/ply/test/yacc_error3.py b/ext/ply/test/yacc_error3.py new file mode 100644 index 000000000..7093fab48 --- /dev/null +++ b/ext/ply/test/yacc_error3.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +p_error = "blah" + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_inf.exp b/ext/ply/test/yacc_inf.exp new file mode 100644 index 000000000..a7f47dada --- /dev/null +++ b/ext/ply/test/yacc_inf.exp @@ -0,0 +1,5 @@ +yacc: Warning. Token 'NUMBER' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Infinite recursion detected for symbol 'statement'. +yacc: Infinite recursion detected for symbol 'expression'. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_inf.py b/ext/ply/test/yacc_inf.py new file mode 100644 index 000000000..885e2c4df --- /dev/null +++ b/ext/ply/test/yacc_inf.py @@ -0,0 +1,55 @@ +# ----------------------------------------------------------------------------- +# yacc_inf.py +# +# Infinite recursion +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_missing1.exp b/ext/ply/test/yacc_missing1.exp new file mode 100644 index 000000000..065d6a54a --- /dev/null +++ b/ext/ply/test/yacc_missing1.exp @@ -0,0 +1,2 @@ +./yacc_missing1.py:22: Symbol 'location' used, but not defined as a token or a rule. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_missing1.py b/ext/ply/test/yacc_missing1.py new file mode 100644 index 000000000..e63904d0e --- /dev/null +++ b/ext/ply/test/yacc_missing1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_missing1.py +# +# Grammar with a missing rule +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : location EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_nodoc.exp b/ext/ply/test/yacc_nodoc.exp new file mode 100644 index 000000000..3f52a3287 --- /dev/null +++ b/ext/ply/test/yacc_nodoc.exp @@ -0,0 +1,2 @@ +./yacc_nodoc.py:25: No documentation string specified in function 'p_statement_expr' +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_nodoc.py b/ext/ply/test/yacc_nodoc.py new file mode 100644 index 000000000..e3941bdaa --- /dev/null +++ b/ext/ply/test/yacc_nodoc.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_nodoc.py +# +# Rule with a missing doc-string +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_noerror.exp b/ext/ply/test/yacc_noerror.exp new file mode 100644 index 000000000..986fa31fa --- /dev/null +++ b/ext/ply/test/yacc_noerror.exp @@ -0,0 +1,2 @@ +yacc: Warning. no p_error() function is defined. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_noerror.py b/ext/ply/test/yacc_noerror.py new file mode 100644 index 000000000..d92f48ea6 --- /dev/null +++ b/ext/ply/test/yacc_noerror.py @@ -0,0 +1,64 @@ +# ----------------------------------------------------------------------------- +# yacc_noerror.py +# +# No p_error() rule defined. +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_nop.exp b/ext/ply/test/yacc_nop.exp new file mode 100644 index 000000000..062878b9e --- /dev/null +++ b/ext/ply/test/yacc_nop.exp @@ -0,0 +1,2 @@ +./yacc_nop.py:25: Warning. Possible grammar rule 'statement_expr' defined without p_ prefix. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_nop.py b/ext/ply/test/yacc_nop.py new file mode 100644 index 000000000..c599ffd5d --- /dev/null +++ b/ext/ply/test/yacc_nop.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_nop.py +# +# Possible grammar rule defined without p_ prefix +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_notfunc.exp b/ext/ply/test/yacc_notfunc.exp new file mode 100644 index 000000000..271167341 --- /dev/null +++ b/ext/ply/test/yacc_notfunc.exp @@ -0,0 +1,4 @@ +yacc: Warning. 'p_statement_assign' not defined as a function +yacc: Warning. Token 'EQUALS' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_notfunc.py b/ext/ply/test/yacc_notfunc.py new file mode 100644 index 000000000..f61663d60 --- /dev/null +++ b/ext/ply/test/yacc_notfunc.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_notfunc.py +# +# p_rule not defined as a function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +p_statement_assign = "Blah" + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_notok.exp b/ext/ply/test/yacc_notok.exp new file mode 100644 index 000000000..708f6f597 --- /dev/null +++ b/ext/ply/test/yacc_notok.exp @@ -0,0 +1 @@ +yacc.YaccError: module does not define a list 'tokens' diff --git a/ext/ply/test/yacc_notok.py b/ext/ply/test/yacc_notok.py new file mode 100644 index 000000000..dfa0059be --- /dev/null +++ b/ext/ply/test/yacc_notok.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_notok.py +# +# A grammar, but we forgot to import the tokens list +# ----------------------------------------------------------------------------- + +import sys +sys.tracebacklimit = 0 + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_rr.exp b/ext/ply/test/yacc_rr.exp new file mode 100644 index 000000000..0ec556d16 --- /dev/null +++ b/ext/ply/test/yacc_rr.exp @@ -0,0 +1,2 @@ +yacc: Generating SLR parsing table... +yacc: 1 reduce/reduce conflict diff --git a/ext/ply/test/yacc_rr.py b/ext/ply/test/yacc_rr.py new file mode 100644 index 000000000..c061c2c17 --- /dev/null +++ b/ext/ply/test/yacc_rr.py @@ -0,0 +1,71 @@ +# ----------------------------------------------------------------------------- +# yacc_rr.py +# +# A grammar with a reduce/reduce conflict +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_assign_2(t): + 'statement : NAME EQUALS NUMBER' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_simple.exp b/ext/ply/test/yacc_simple.exp new file mode 100644 index 000000000..de7964b6f --- /dev/null +++ b/ext/ply/test/yacc_simple.exp @@ -0,0 +1 @@ +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_simple.py b/ext/ply/test/yacc_simple.py new file mode 100644 index 000000000..7b4b40b17 --- /dev/null +++ b/ext/ply/test/yacc_simple.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_sr.exp b/ext/ply/test/yacc_sr.exp new file mode 100644 index 000000000..7225ad94b --- /dev/null +++ b/ext/ply/test/yacc_sr.exp @@ -0,0 +1,2 @@ +yacc: Generating SLR parsing table... +yacc: 20 shift/reduce conflicts diff --git a/ext/ply/test/yacc_sr.py b/ext/ply/test/yacc_sr.py new file mode 100644 index 000000000..4341f6997 --- /dev/null +++ b/ext/ply/test/yacc_sr.py @@ -0,0 +1,62 @@ +# ----------------------------------------------------------------------------- +# yacc_sr.py +# +# A grammar with shift-reduce conflicts +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_term1.exp b/ext/ply/test/yacc_term1.exp new file mode 100644 index 000000000..422d2bacd --- /dev/null +++ b/ext/ply/test/yacc_term1.exp @@ -0,0 +1,2 @@ +./yacc_term1.py:22: Illegal rule name 'NUMBER'. Already defined as a token. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_term1.py b/ext/ply/test/yacc_term1.py new file mode 100644 index 000000000..97a2e7a60 --- /dev/null +++ b/ext/ply/test/yacc_term1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_term1.py +# +# Terminal used on the left-hand-side +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'NUMBER : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_unused.exp b/ext/ply/test/yacc_unused.exp new file mode 100644 index 000000000..390754de3 --- /dev/null +++ b/ext/ply/test/yacc_unused.exp @@ -0,0 +1,4 @@ +./yacc_unused.py:60: Symbol 'COMMA' used, but not defined as a token or a rule. +yacc: Symbol 'COMMA' is unreachable. +yacc: Symbol 'exprlist' is unreachable. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_unused.py b/ext/ply/test/yacc_unused.py new file mode 100644 index 000000000..4cbd63327 --- /dev/null +++ b/ext/ply/test/yacc_unused.py @@ -0,0 +1,76 @@ +# ----------------------------------------------------------------------------- +# yacc_unused.py +# +# A grammar with an unused rule +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_expr_list(t): + 'exprlist : exprlist COMMA expression' + pass + +def p_expr_list_2(t): + 'exprlist : expression' + pass + + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_uprec.exp b/ext/ply/test/yacc_uprec.exp new file mode 100644 index 000000000..b1a71a250 --- /dev/null +++ b/ext/ply/test/yacc_uprec.exp @@ -0,0 +1,2 @@ +./yacc_uprec.py:35: Nothing known about the precedence of 'UMINUS' +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_uprec.py b/ext/ply/test/yacc_uprec.py new file mode 100644 index 000000000..139ce6318 --- /dev/null +++ b/ext/ply/test/yacc_uprec.py @@ -0,0 +1,62 @@ +# ----------------------------------------------------------------------------- +# yacc_uprec.py +# +# A grammar with a bad %prec specifier +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/yacc.py b/ext/ply/yacc.py new file mode 100644 index 000000000..1041745ed --- /dev/null +++ b/ext/ply/yacc.py @@ -0,0 +1,1846 @@ +#----------------------------------------------------------------------------- +# ply: yacc.py +# +# Author: David M. Beazley (beazley@cs.uchicago.edu) +# Department of Computer Science +# University of Chicago +# Chicago, IL 60637 +# +# Copyright (C) 2001, David M. Beazley +# +# $Header: /home/stever/bk/newmem2/ext/ply/yacc.py 1.3 03/06/06 14:59:28-00:00 stever@ $ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. Roughly speaking, this module is a cross between +# John Aycock's Spark system and the GNU bison utility. +# +# Disclaimer: This is a work in progress. SLR parsing seems to work fairly +# well and there is extensive error checking. LALR(1) is in progress. The +# rest of this file is a bit of a mess. Please pardon the dust. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +#----------------------------------------------------------------------------- + +__version__ = "1.3" + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = 1 # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'SLR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +import re, types, sys, cStringIO, md5, os.path + +# Exception raised for yacc-related errors +class YaccError(Exception): pass + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) + +class YaccSymbol: + def __str__(self): return self.type + def __repr__(self): return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. + +class YaccSlice: + def __init__(self,s): + self.slice = s + self.pbstack = [] + + def __getitem__(self,n): + return self.slice[n].value + + def __setitem__(self,n,v): + self.slice[n].value = v + + def __len__(self): + return len(self.slice) + + def lineno(self,n): + return getattr(self.slice[n],"lineno",0) + + def linespan(self,n): + startline = getattr(self.slice[n],"lineno",0) + endline = getattr(self.slice[n],"endlineno",startline) + return startline,endline + + def pushback(self,n): + if n <= 0: + raise ValueError, "Expected a positive value" + if n > (len(self.slice)-1): + raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) + for i in range(0,n): + self.pbstack.append(self.slice[-i-1]) + +# The LR Parsing engine. This is defined as a class so that multiple parsers +# can exist in the same process. A user never instantiates this directly. +# Instead, the global yacc() function should be used to create a suitable Parser +# object. + +class Parser: + def __init__(self,magic=None): + + # This is a hack to keep users from trying to instantiate a Parser + # object directly. + + if magic != "xyzzy": + raise YaccError, "Can't instantiate Parser. Use yacc() instead." + + # Reset internal state + self.productions = None # List of productions + self.errorfunc = None # Error handling function + self.action = { } # LR Action table + self.goto = { } # LR goto table + self.require = { } # Attribute require table + self.method = "Unknown LR" # Table construction method used + + def errok(self): + self.errorcount = 0 + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$' + self.symstack.append(sym) + self.statestack.append(0) + + def parse(self,input=None,lexer=None,debug=0): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table + goto = self.goto # Local reference to goto table + prod = self.productions # Local reference to production list + pslice = YaccSlice(None) # Slice object passed to grammar rules + pslice.parser = self # Parser object + self.errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + import lex as lexer + + pslice.lexer = lexer + + # If input was supplied, pass to lexer + if input: + lexer.input(input) + + # Tokenize function + get_token = lexer.token + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + errtoken = None # Err token + + # The start state is assumed to be (0,$) + statestack.append(0) + sym = YaccSymbol() + sym.type = '$' + symstack.append(sym) + + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$' + if debug: + print "%-20s : %s" % (lookahead, [xx.type for xx in symstack]) + + # Check the action table + s = statestack[-1] + ltype = lookahead.type + t = actions.get((s,ltype),None) + + if t is not None: + if t > 0: + # shift a symbol on the stack + if ltype == '$': + # Error, end of input + print "yacc: Parse error. EOF" + return + statestack.append(t) + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if self.errorcount > 0: + self.errorcount -= 1 + + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + try: + sym.lineno = targ[1].lineno + sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) + except AttributeError: + sym.lineno = 0 + del symstack[-plen:] + del statestack[-plen:] + else: + sym.lineno = 0 + targ = [ sym ] + pslice.slice = targ + pslice.pbstack = [] + # Call the grammar rule with our special slice object + p.func(pslice) + + # Validate attributes of the resulting value attribute +# if require: +# try: +# t0 = targ[0] +# r = Requires.get(t0.type,None) +# t0d = t0.__dict__ +# if r: +# for field in r: +# tn = t0 +# for fname in field: +# try: +# tf = tn.__dict__ +# tn = tf.get(fname) +# except StandardError: +# tn = None +# if not tn: +# print "%s:%d: Rule %s doesn't set required attribute '%s'" % \ +# (p.file,p.line,p.name,".".join(field)) +# except TypeError,LookupError: +# print "Bad requires directive " % r +# pass + + + # If there was a pushback, put that on the stack + if pslice.pbstack: + lookaheadstack.append(lookahead) + for _t in pslice.pbstack: + lookaheadstack.append(_t) + lookahead = None + + symstack.append(sym) + statestack.append(goto[statestack[-1],pname]) + continue + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + + if t == None: + # We have some kind of parsing error here. To handle this, + # we are going to push the current token onto the tokenstack + # and replace it with an 'error' token. If there are any synchronization + # rules, they may catch it. + # + # In addition to pushing the error token, we call call the user defined p_error() + # function if this is the first syntax error. This function is only called + # if errorcount == 0. + + if not self.errorcount: + self.errorcount = error_count + errtoken = lookahead + if errtoken.type == '$': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if not self.errorcount: + # User must have done some kind of panic mode recovery on their own. The returned token + # is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type) + else: + print "yacc: Syntax error, token=%s" % errtoken.type + else: + print "yacc: Parse error in input. EOF" + return + + else: + self.errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$': + lookahead = None + errtoken = None + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + + continue + + # Call an error function here + raise RuntimeError, "yacc: internal parser error!!!\n" + +# ----------------------------------------------------------------------------- +# === Parser Construction === +# +# The following functions and variables are used to implement the yacc() function +# itself. This is pretty hairy stuff involving lots of error checking, +# construction of LR items, kernels, and so forth. Although a lot of +# this work is done using global variables, the resulting Parser object +# is completely self contained--meaning that it is safe to repeatedly +# call yacc() with different grammars in the same application. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This function checks to see if there are duplicated p_rulename() functions +# in the parser module file. Without this function, it is really easy for +# users to make mistakes by cutting and pasting code fragments (and it's a real +# bugger to try and figure out why the resulting parser doesn't work). Therefore, +# we just do a little regular expression pattern matching of def statements +# to try and detect duplicates. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea. Assume it's okay. + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev) + noerror = 0 + linen += 1 + return noerror + +# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. +def validate_dict(d): + for n,v in d.items(): + if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue + if n[0:2] == 't_': continue + + if n[0:2] == 'p_': + print "yacc: Warning. '%s' not defined as a function" % n + if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: + try: + doc = v.__doc__.split(" ") + if doc[1] == ':': + print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n) + except StandardError: + pass + +# ----------------------------------------------------------------------------- +# === GRAMMAR FUNCTIONS === +# +# The following global variables and functions are used to store, manipulate, +# and verify the grammar rules specified by the user. +# ----------------------------------------------------------------------------- + +# Initialize all of the global variables used during grammar construction +def initialize_vars(): + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + Prodmap = { } # A dictionary that is only used to detect duplicate + # productions. + + Terminals = { } # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + Nonterminals = { } # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + First = { } # A dictionary of precomputed FIRST(x) symbols + + Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + + Precedence = { } # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + LRitems = [ ] # A list of all LR items for the grammar. These are the + # productions with the "dot" like E -> E . PLUS E + + Errorfunc = None # User defined error handler + + Signature = md5.new() # Digital signature of the grammar rules, precedence + # and other information. Used to determined when a + # parsing table needs to be regenerated. + + Requires = { } # Requires list + + # File objects used when creating the parser.out debugging file + global _vf, _vfc + _vf = cStringIO.StringIO() + _vfc = cStringIO.StringIO() + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# It has a few required attributes: +# +# name - Name of the production (nonterminal) +# prod - A list of symbols making up its production +# number - Production number. +# +# In addition, a few additional attributes are used to help with debugging or +# optimization of table generation. +# +# file - File where production action is defined. +# lineno - Line number where action is defined +# func - Action function +# prec - Precedence level +# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' +# then lr_next refers to 'E -> E PLUS . E' +# lr_index - LR item index (location of the ".") in the prod list. +# len - Length of the production (number of symbols on right hand side) +# ----------------------------------------------------------------------------- + +class Production: + def __init__(self,**kw): + for k,v in kw.items(): + setattr(self,k,v) + self.lr_index = -1 + self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure + self.usyms = [ ] + + def __str__(self): + if self.prod: + s = "%s -> %s" % (self.name," ".join(self.prod)) + else: + s = "%s -> <empty>" % self.name + return s + + def __repr__(self): + return str(self) + + # Compute lr_items from the production + def lr_item(self,n): + if n > len(self.prod): return None + p = Production() + p.name = self.name + p.prod = list(self.prod) + p.number = self.number + p.lr_index = n + p.prod.insert(n,".") + p.prod = tuple(p.prod) + p.len = len(p.prod) + p.usyms = self.usyms + + # Precompute list of productions immediately following + try: + p.lrafter = Prodnames[p.prod[n+1]] + except (IndexError,KeyError),e: + p.lrafter = [] + try: + p.lrbefore = p.prod[n-1] + except IndexError: + p.lrbefore = None + + return p + +class MiniProduction: + pass + +# Utility function +def is_identifier(s): + for c in s: + if not (c.isalnum() or c == '_'): return 0 + return 1 + +# ----------------------------------------------------------------------------- +# add_production() +# +# Given an action function, this function assembles a production rule. +# The production rule is assumed to be found in the function's docstring. +# This rule has the general syntax: +# +# name1 ::= production1 +# | production2 +# | production3 +# ... +# | productionn +# name2 ::= production1 +# | production2 +# ... +# ----------------------------------------------------------------------------- + +def add_production(f,file,line,prodname,syms): + + if Terminals.has_key(prodname): + print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname) + return -1 + if prodname == 'error': + print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname) + return -1 + + if not is_identifier(prodname): + print "%s:%d: Illegal rule name '%s'" % (file,line,prodname) + return -1 + + for s in syms: + if not is_identifier(s) and s != '%prec': + print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname) + return -1 + + # See if the rule is already in the rulemap + map = "%s -> %s" % (prodname,syms) + if Prodmap.has_key(map): + m = Prodmap[map] + print "%s:%d: Duplicate rule %s." % (file,line, m) + print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line) + return -1 + + p = Production() + p.name = prodname + p.prod = syms + p.file = file + p.line = line + p.func = f + p.number = len(Productions) + + + Productions.append(p) + Prodmap[map] = p + if not Nonterminals.has_key(prodname): + Nonterminals[prodname] = [ ] + + # Add all terminals to Terminals + i = 0 + while i < len(p.prod): + t = p.prod[i] + if t == '%prec': + try: + precname = p.prod[i+1] + except IndexError: + print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line) + return -1 + + prec = Precedence.get(precname,None) + if not prec: + print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname) + return -1 + else: + p.prec = prec + del p.prod[i] + del p.prod[i] + continue + + if Terminals.has_key(t): + Terminals[t].append(p.number) + # Is a terminal. We'll assign a precedence to p based on this + if not hasattr(p,"prec"): + p.prec = Precedence.get(t,('right',0)) + else: + if not Nonterminals.has_key(t): + Nonterminals[t] = [ ] + Nonterminals[t].append(p.number) + i += 1 + + if not hasattr(p,"prec"): + p.prec = ('right',0) + + # Set final length of productions + p.len = len(p.prod) + p.prod = tuple(p.prod) + + # Calculate unique syms in the production + p.usyms = [ ] + for s in p.prod: + if s not in p.usyms: + p.usyms.append(s) + + # Add to the global productions list + try: + Prodnames[p.name].append(p) + except KeyError: + Prodnames[p.name] = [ p ] + return 0 + +# Given a raw rule function, this function rips out its doc string +# and adds rules to the grammar + +def add_function(f): + line = f.func_code.co_firstlineno + file = f.func_code.co_filename + error = 0 + + if f.func_code.co_argcount > 1: + print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) + return -1 + + if f.func_code.co_argcount < 1: + print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) + return -1 + + if f.__doc__: + # Split the doc string into lines + pstrings = f.__doc__.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + print "%s:%d: Misplaced '|'." % (file,dline) + return -1 + prodname = lastp + if len(p) > 1: + syms = p[1:] + else: + syms = [ ] + else: + prodname = p[0] + lastp = prodname + assign = p[1] + if len(p) > 2: + syms = p[2:] + else: + syms = [ ] + if assign != ':' and assign != '::=': + print "%s:%d: Syntax error. Expected ':'" % (file,dline) + return -1 + e = add_production(f,file,dline,prodname,syms) + error += e + except StandardError: + print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps) + error -= 1 + else: + print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__) + return error + + +# Cycle checking code (Michael Dyck) + +def compute_reachable(): + ''' + Find each symbol that can be reached from the start symbol. + Print a warning for any nonterminals that can't be reached. + (Unused terminals have already had their warning.) + ''' + Reachable = { } + for s in Terminals.keys() + Nonterminals.keys(): + Reachable[s] = 0 + + mark_reachable_from( Productions[0].prod[0], Reachable ) + + for s in Nonterminals.keys(): + if not Reachable[s]: + print "yacc: Symbol '%s' is unreachable." % s + +def mark_reachable_from(s, Reachable): + ''' + Mark all symbols that are reachable from symbol s. + ''' + if Reachable[s]: + # We've already reached symbol s. + return + Reachable[s] = 1 + for p in Prodnames.get(s,[]): + for r in p.prod: + mark_reachable_from(r, Reachable) + +# ----------------------------------------------------------------------------- +# compute_terminates() +# +# This function looks at the various parsing rules and tries to detect +# infinite recursion cycles (grammar rules where there is no possible way +# to derive a string of only terminals). +# ----------------------------------------------------------------------------- +def compute_terminates(): + ''' + Raise an error for any symbols that don't terminate. + ''' + Terminates = {} + + # Terminals: + for t in Terminals.keys(): + Terminates[t] = 1 + + Terminates['$'] = 1 + + # Nonterminals: + + # Initialize to false: + for n in Nonterminals.keys(): + Terminates[n] = 0 + + # Then propagate termination until no change: + while 1: + some_change = 0 + for (n,pl) in Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not Terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = 0 + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = 1 + + if p_terminates: + # symbol n terminates! + if not Terminates[n]: + Terminates[n] = 1 + some_change = 1 + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + some_error = 0 + for (s,terminates) in Terminates.items(): + if not terminates: + if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + print "yacc: Infinite recursion detected for symbol '%s'." % s + some_error = 1 + + return some_error + +# ----------------------------------------------------------------------------- +# verify_productions() +# +# This function examines all of the supplied rules to see if they seem valid. +# ----------------------------------------------------------------------------- +def verify_productions(cycle_check=1): + error = 0 + for p in Productions: + if not p: continue + + for s in p.prod: + if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': + print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s) + error = 1 + continue + + unused_tok = 0 + # Now verify all of the tokens + if yaccdebug: + _vf.write("Unused terminals:\n\n") + for s,v in Terminals.items(): + if s != 'error' and not v: + print "yacc: Warning. Token '%s' defined, but not used." % s + if yaccdebug: _vf.write(" %s\n"% s) + unused_tok += 1 + + # Print out all of the productions + if yaccdebug: + _vf.write("\nGrammar\n\n") + for i in range(1,len(Productions)): + _vf.write("Rule %-5d %s\n" % (i, Productions[i])) + + unused_prod = 0 + # Verify the use of all productions + for s,v in Nonterminals.items(): + if not v: + p = Prodnames[s][0] + print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s) + unused_prod += 1 + + + if unused_tok == 1: + print "yacc: Warning. There is 1 unused token." + if unused_tok > 1: + print "yacc: Warning. There are %d unused tokens." % unused_tok + + if unused_prod == 1: + print "yacc: Warning. There is 1 unused rule." + if unused_prod > 1: + print "yacc: Warning. There are %d unused rules." % unused_prod + + if yaccdebug: + _vf.write("\nTerminals, with rules where they appear\n\n") + ks = Terminals.keys() + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) + _vf.write("\nNonterminals, with rules where they appear\n\n") + ks = Nonterminals.keys() + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) + + if (cycle_check): + compute_reachable() + error += compute_terminates() +# error += check_cycles() + return error + +# ----------------------------------------------------------------------------- +# build_lritems() +# +# This function walks the list of productions and builds a complete set of the +# LR items. The LR items are stored in two ways: First, they are uniquely +# numbered and placed in the list _lritems. Second, a linked list of LR items +# is built for each production. For example: +# +# E -> E PLUS E +# +# Creates the list +# +# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] +# ----------------------------------------------------------------------------- + +def build_lritems(): + for p in Productions: + lastlri = p + lri = p.lr_item(0) + i = 0 + while 1: + lri = p.lr_item(i) + lastlri.lr_next = lri + if not lri: break + lri.lr_num = len(LRitems) + LRitems.append(lri) + lastlri = lri + i += 1 + + # In order for the rest of the parser generator to work, we need to + # guarantee that no more lritems are generated. Therefore, we nuke + # the p.lr_item method. (Only used in debugging) + # Production.lr_item = None + +# ----------------------------------------------------------------------------- +# add_precedence() +# +# Given a list of precedence rules, add to the precedence table. +# ----------------------------------------------------------------------------- + +def add_precedence(plist): + plevel = 0 + error = 0 + for p in plist: + plevel += 1 + try: + prec = p[0] + terms = p[1:] + if prec != 'left' and prec != 'right' and prec != 'nonassoc': + print "yacc: Invalid precedence '%s'" % prec + return -1 + for t in terms: + if Precedence.has_key(t): + print "yacc: Precedence already specified for terminal '%s'" % t + error += 1 + continue + Precedence[t] = (prec,plevel) + except: + print "yacc: Invalid precedence table." + error += 1 + + return error + +# ----------------------------------------------------------------------------- +# augment_grammar() +# +# Compute the augmented grammar. This is just a rule S' -> start where start +# is the starting symbol. +# ----------------------------------------------------------------------------- + +def augment_grammar(start=None): + if not start: + start = Productions[1].name + Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) + Productions[0].usyms = [ start ] + Nonterminals[start].append(0) + + +# ------------------------------------------------------------------------- +# first() +# +# Compute the value of FIRST1(beta) where beta is a tuple of symbols. +# +# During execution of compute_first1, the result may be incomplete. +# Afterward (e.g., when called from compute_follow()), it will be complete. +# ------------------------------------------------------------------------- +def first(beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [ ] + for x in beta: + x_produces_empty = 0 + + # Add all the non-<empty> symbols of First[x] to the result. + for f in First[x]: + if f == '<empty>': + x_produces_empty = 1 + else: + if f not in result: result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('<empty>') + + return result + + +# FOLLOW(x) +# Given a non-terminal. This function computes the set of all symbols +# that might follow it. Dragon book, p. 189. + +def compute_follow(start=None): + # Add '$' to the follow list of the start symbol + for k in Nonterminals.keys(): + Follow[k] = [ ] + + if not start: + start = Productions[1].name + + Follow[start] = [ '$' ] + + while 1: + didadd = 0 + for p in Productions[1:]: + # Here is the production set + for i in range(len(p.prod)): + B = p.prod[i] + if Nonterminals.has_key(B): + # Okay. We got a non-terminal in a production + fst = first(p.prod[i+1:]) + hasempty = 0 + for f in fst: + if f != '<empty>' and f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if f == '<empty>': + hasempty = 1 + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in Follow[p.name]: + if f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if not didadd: break + + if 0 and yaccdebug: + _vf.write('\nFollow:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) + +# ------------------------------------------------------------------------- +# compute_first1() +# +# Compute the value of FIRST1(X) for all symbols +# ------------------------------------------------------------------------- +def compute_first1(): + + # Terminals: + for t in Terminals.keys(): + First[t] = [t] + + First['$'] = ['$'] + First['#'] = ['#'] # what's this for? + + # Nonterminals: + + # Initialize to the empty set: + for n in Nonterminals.keys(): + First[n] = [] + + # Then propagate symbols until no change: + while 1: + some_change = 0 + for n in Nonterminals.keys(): + for p in Prodnames[n]: + for f in first(p.prod): + if f not in First[n]: + First[n].append( f ) + some_change = 1 + if not some_change: + break + + if 0 and yaccdebug: + _vf.write('\nFirst:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % + (k, " ".join([str(s) for s in First[k]]))) + +# ----------------------------------------------------------------------------- +# === SLR Generation === +# +# The following functions are used to construct SLR (Simple LR) parsing tables +# as described on p.221-229 of the dragon book. +# ----------------------------------------------------------------------------- + +# Global variables for the LR parsing engine +def lr_init_vars(): + global _lr_action, _lr_goto, _lr_method + global _lr_goto_cache + + _lr_action = { } # Action table + _lr_goto = { } # Goto table + _lr_method = "Unknown" # LR method used + _lr_goto_cache = { } + +# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. +# prodlist is a list of productions. + +_add_count = 0 # Counter used to detect cycles + +def lr0_closure(I): + global _add_count + + _add_count += 1 + prodlist = Productions + + # Add everything in I to J + J = I[:] + didadd = 1 + while didadd: + didadd = 0 + for j in J: + for x in j.lrafter: + if x.lr0_added == _add_count: continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = _add_count + didadd = 1 + + return J + +# Compute the LR(0) goto function goto(I,X) where I is a set +# of LR(0) items and X is a grammar symbol. This function is written +# in a way that guarantees uniqueness of the generated goto sets +# (i.e. the same goto set will never be returned as two different Python +# objects). With uniqueness, we can later do fast set comparisons using +# id(obj) instead of element-wise comparison. + +def lr0_goto(I,x): + # First we look for a previously cached entry + g = _lr_goto_cache.get((id(I),x),None) + if g: return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = _lr_goto_cache.get(x,None) + if not s: + s = { } + _lr_goto_cache[x] = s + + gs = [ ] + for p in I: + n = p.lr_next + if n and n.lrbefore == x: + s1 = s.get(id(n),None) + if not s1: + s1 = { } + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$',None) + if not g: + if gs: + g = lr0_closure(gs) + s['$'] = g + else: + s['$'] = gs + _lr_goto_cache[(id(I),x)] = g + return g + +# Compute the kernel of a set of LR(0) items +def lr0_kernel(I): + KI = [ ] + for p in I: + if p.name == "S'" or p.lr_index > 0 or p.len == 0: + KI.append(p) + + return KI + +_lr0_cidhash = { } + +# Compute the LR(0) sets of item function +def lr0_items(): + + C = [ lr0_closure([Productions[0].lr_next]) ] + i = 0 + for I in C: + _lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = { } + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms.keys(): + g = lr0_goto(I,x) + if not g: continue + if _lr0_cidhash.has_key(id(g)): continue + _lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + +# ----------------------------------------------------------------------------- +# slr_parse_table() +# +# This function constructs an SLR table. +# ----------------------------------------------------------------------------- +def slr_parse_table(): + global _lr_method + goto = _lr_goto # Goto array + action = _lr_action # Action array + actionp = { } # Action production array (temporary) + + _lr_method = "SLR" + + n_srconflict = 0 + n_rrconflict = 0 + + if yaccdebug: + _vf.write("\n\nParsing method: SLR\n\n") + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = lr0_items() + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [ ] # List of actions + + if yaccdebug: + _vf.write("\nstate %d\n\n" % st) + for p in I: + _vf.write(" (%d) %s\n" % (p.number, str(p))) + _vf.write("\n") + + for p in I: + try: + if p.prod[-1] == ".": + if p.name == "S'": + # Start symbol. Accept! + action[st,"$"] = 0 + actionp[st,"$"] = p + else: + # We are at the end of a production. Reduce! + for a in Follow[p.name]: + actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) + r = action.get((st,a),None) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + sprec,slevel = Productions[actionp[st,a].number].prec + rprec,rlevel = Precedence.get(a,('right',0)) + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + action[st,a] = -p.number + actionp[st,a] = p + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + n_srconflict += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the shift + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + n_srconflict +=1 + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + action[st,a] = -p.number + actionp[st,a] = p + # print "Reduce/reduce conflict in state %d" % st + n_rrconflict += 1 + _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) + _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) + else: + print "Unknown conflict in state %d" % st + else: + action[st,a] = -p.number + actionp[st,a] = p + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if Terminals.has_key(a): + g = lr0_goto(I,a) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + # We are in a shift state + actlist.append((a,p,"shift and go to state %d" % j)) + r = action.get((st,a),None) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + print "Shift/shift conflict in state %d" % st + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + rprec,rlevel = Productions[actionp[st,a].number].prec + sprec,slevel = Precedence.get(a,('right',0)) + if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): + # We decide to shift here... highest precedence to shift + action[st,a] = j + actionp[st,a] = p + if not slevel and not rlevel: + n_srconflict += 1 + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + n_srconflict +=1 + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + + else: + print "Unknown conflict in state %d" % st + else: + action[st,a] = j + actionp[st,a] = p + + except StandardError,e: + raise YaccError, "Hosed in slr_parse_table", e + + # Print the actions associated with each terminal + if yaccdebug: + for a,p,m in actlist: + if action.has_key((st,a)): + if p is actionp[st,a]: + _vf.write(" %-15s %s\n" % (a,m)) + _vf.write("\n") + for a,p,m in actlist: + if action.has_key((st,a)): + if p is not actionp[st,a]: + _vf.write(" ! %-15s [ %s ]\n" % (a,m)) + + # Construct the goto table for this state + if yaccdebug: + _vf.write("\n") + nkeys = { } + for ii in I: + for s in ii.usyms: + if Nonterminals.has_key(s): + nkeys[s] = None + for n in nkeys.keys(): + g = lr0_goto(I,n) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + goto[st,n] = j + if yaccdebug: + _vf.write(" %-15s shift and go to state %d\n" % (n,j)) + + st += 1 + + if n_srconflict == 1: + print "yacc: %d shift/reduce conflict" % n_srconflict + if n_srconflict > 1: + print "yacc: %d shift/reduce conflicts" % n_srconflict + if n_rrconflict == 1: + print "yacc: %d reduce/reduce conflict" % n_rrconflict + if n_rrconflict > 1: + print "yacc: %d reduce/reduce conflicts" % n_rrconflict + + +# ----------------------------------------------------------------------------- +# ==== LALR(1) Parsing ==== +# **** UNFINISHED! 6/16/01 +# ----------------------------------------------------------------------------- + + +# Compute the lr1_closure of a set I. I is a list of tuples (p,a) where +# p is a LR0 item and a is a terminal + +_lr1_add_count = 0 + +def lr1_closure(I): + global _lr1_add_count + + _lr1_add_count += 1 + + J = I[:] + + # Loop over items (p,a) in I. + ji = 0 + while ji < len(J): + p,a = J[ji] + # p = [ A -> alpha . B beta] + + # For each production B -> gamma + for B in p.lr1_after: + f = tuple(p.lr1_beta + (a,)) + + # For each terminal b in first(Beta a) + for b in first(f): + # Check if (B -> . gamma, b) is in J + # Only way this can happen is if the add count mismatches + pn = B.lr_next + if pn.lr_added.get(b,0) == _lr1_add_count: continue + pn.lr_added[b] = _lr1_add_count + J.append((pn,b)) + ji += 1 + + return J + +def lalr_parse_table(): + + # Compute some lr1 information about all of the productions + for p in LRitems: + try: + after = p.prod[p.lr_index + 1] + p.lr1_after = Prodnames[after] + p.lr1_beta = p.prod[p.lr_index + 2:] + except LookupError: + p.lr1_after = [ ] + p.lr1_beta = [ ] + p.lr_added = { } + + # Compute the LR(0) items + C = lr0_items() + CK = [] + for I in C: + CK.append(lr0_kernel(I)) + + print CK + +# ----------------------------------------------------------------------------- +# ==== LR Utility functions ==== +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _lr_write_tables() +# +# This function writes the LR parsing tables to a file +# ----------------------------------------------------------------------------- + +def lr_write_tables(modulename=tab_module): + filename = modulename + ".py" + try: + f = open(filename,"w") + + f.write(""" +# %s +# This file is automatically generated. Do not edit. + +_lr_method = %s + +_lr_signature = %s +""" % (filename, repr(_lr_method), repr(Signature.digest()))) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = { } + + for k,v in _lr_action.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_action_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_action[(_x,_k)] = _y +del _lr_action_items +""") + + else: + f.write("\n_lr_action = { "); + for k,v in _lr_action.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + if smaller: + # Factor out names to try and make smaller + items = { } + + for k,v in _lr_goto.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_goto_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_goto[(_x,_k)] = _y +del _lr_goto_items +""") + else: + f.write("\n_lr_goto = { "); + for k,v in _lr_goto.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + # Write production table + f.write("_lr_productions = [\n") + for p in Productions: + if p: + if (p.func): + f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) + else: + f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) + else: + f.write(" None,\n") + f.write("]\n") + f.close() + + except IOError,e: + print "Unable to create '%s'" % filename + print e + return + +def lr_read_tables(module=tab_module,optimize=0): + global _lr_action, _lr_goto, _lr_productions, _lr_method + try: + exec "import %s as parsetab" % module + + if (optimize) or (Signature.digest() == parsetab._lr_signature): + _lr_action = parsetab._lr_action + _lr_goto = parsetab._lr_goto + _lr_productions = parsetab._lr_productions + _lr_method = parsetab._lr_method + return 1 + else: + return 0 + + except (ImportError,AttributeError): + return 0 + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build the parser module +# ----------------------------------------------------------------------------- + +def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0): + global yaccdebug + yaccdebug = debug + + initialize_vars() + files = { } + error = 0 + + # Add starting symbol to signature + if start: + Signature.update(start) + + # Try to figure out what module we are working with + if module: + # User supplied a module object. + if not isinstance(module, types.ModuleType): + raise ValueError,"Expected a module" + + ldict = module.__dict__ + + else: + # No module given. We might be able to get information from the caller. + # Throw an exception and unwind the traceback to get the globals + + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + # If running in optimized mode. We're going to + + if (optimize and lr_read_tables(tabmodule,1)): + # Read parse table + del Productions[:] + for p in _lr_productions: + if not p: + Productions.append(None) + else: + m = MiniProduction() + m.name = p[0] + m.len = p[1] + m.file = p[3] + m.line = p[4] + if p[2]: + m.func = ldict[p[2]] + Productions.append(m) + + else: + # Get the tokens map + tokens = ldict.get("tokens",None) + + if not tokens: + raise YaccError,"module does not define a list 'tokens'" + if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): + raise YaccError,"tokens must be a list or tuple." + + # Check to see if a requires dictionary is defined. + requires = ldict.get("require",None) + if requires: + if not (isinstance(requires,types.DictType)): + raise YaccError,"require must be a dictionary." + + for r,v in requires.items(): + try: + if not (isinstance(v,types.ListType)): + raise TypeError + v1 = [x.split(".") for x in v] + Requires[r] = v1 + except StandardError: + print "Invalid specification for rule '%s' in require. Expected a list of strings" % r + + + # Build the dictionary of terminals. We a record a 0 in the + # dictionary to track whether or not a terminal is actually + # used in the grammar + + if 'error' in tokens: + print "yacc: Illegal token 'error'. Is a reserved word." + raise YaccError,"Illegal token name" + + for n in tokens: + if Terminals.has_key(n): + print "yacc: Warning. Token '%s' multiply defined." % n + Terminals[n] = [ ] + + Terminals['error'] = [ ] + + # Get the precedence map (if any) + prec = ldict.get("precedence",None) + if prec: + if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): + raise YaccError,"precedence must be a list or tuple." + add_precedence(prec) + Signature.update(repr(prec)) + + for n in tokens: + if not Precedence.has_key(n): + Precedence[n] = ('right',0) # Default, right associative, 0 precedence + + # Look for error handler + ef = ldict.get('p_error',None) + if ef: + if not isinstance(ef,types.FunctionType): + raise YaccError,"'p_error' defined, but is not a function." + eline = ef.func_code.co_firstlineno + efile = ef.func_code.co_filename + files[efile] = None + + if (ef.func_code.co_argcount != 1): + raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) + global Errorfunc + Errorfunc = ef + else: + print "yacc: Warning. no p_error() function is defined." + + # Get the list of built-in functions with p_ prefix + symbols = [ldict[f] for f in ldict.keys() + if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_' + and ldict[f].__name__ != 'p_error')] + + # Check for non-empty symbols + if len(symbols) == 0: + raise YaccError,"no rules of the form p_rulename are defined." + + # Sort the symbols by line number + symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) + + # Add all of the symbols to the grammar + for f in symbols: + if (add_function(f)) < 0: + error += 1 + else: + files[f.func_code.co_filename] = None + + # Make a signature of the docstrings + for f in symbols: + if f.__doc__: + Signature.update(f.__doc__) + + lr_init_vars() + + if error: + raise YaccError,"Unable to construct parser." + + if not lr_read_tables(tabmodule): + + # Validate files + for filename in files.keys(): + if not validate_file(filename): + error = 1 + + # Validate dictionary + validate_dict(ldict) + + if start and not Prodnames.has_key(start): + raise YaccError,"Bad starting symbol '%s'" % start + + augment_grammar(start) + error = verify_productions(cycle_check=check_recursion) + otherfunc = [ldict[f] for f in ldict.keys() + if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')] + + if error: + raise YaccError,"Unable to construct parser." + + build_lritems() + compute_first1() + compute_follow(start) + + if method == 'SLR': + slr_parse_table() + elif method == 'LALR1': + lalr_parse_table() + return + else: + raise YaccError, "Unknown parsing method '%s'" % method + + lr_write_tables(tabmodule) + + if yaccdebug: + try: + f = open(debug_file,"w") + f.write(_vfc.getvalue()) + f.write("\n\n") + f.write(_vf.getvalue()) + f.close() + except IOError,e: + print "yacc: can't create '%s'" % debug_file,e + + # Made it here. Create a parser object and set up its internal state. + # Set global parse() method to bound method of parser object. + + p = Parser("xyzzy") + p.productions = Productions + p.errorfunc = Errorfunc + p.action = _lr_action + p.goto = _lr_goto + p.method = _lr_method + p.require = Requires + + global parse + parse = p.parse + + # Clean up all of the globals we created + if (not optimize): + yacc_cleanup() + return p + +# yacc_cleanup function. Delete all of the global variables +# used during table construction + +def yacc_cleanup(): + global _lr_action, _lr_goto, _lr_method, _lr_goto_cache + del _lr_action, _lr_goto, _lr_method, _lr_goto_cache + + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + del Productions, Prodnames, Prodmap, Terminals + del Nonterminals, First, Follow, Precedence, LRitems + del Errorfunc, Signature, Requires + + global _vf, _vfc + del _vf, _vfc + + +# Stub that raises an error if parsing is attempted without first calling yacc() +def parse(*args,**kwargs): + raise YaccError, "yacc: No parser built with yacc()" + |