diff options
author | Steve Reinhardt <stever@eecs.umich.edu> | 2006-05-22 14:29:33 -0400 |
---|---|---|
committer | Steve Reinhardt <stever@eecs.umich.edu> | 2006-05-22 14:29:33 -0400 |
commit | ba2eae5d528487900d1510fc0a160e660f2c394c (patch) | |
tree | a2c6dd5948f6ff353763cb3f83ddd734077e646e /ext | |
parent | 86777c9db174c74be49667bce3dda99f8ba23696 (diff) | |
download | gem5-ba2eae5d528487900d1510fc0a160e660f2c394c.tar.xz |
New directory structure:
- simulator source now in 'src' subdirectory
- imported files from 'ext' repository
- support building in arbitrary places, including
outside of the source tree. See comment at top
of SConstruct file for more details.
Regression tests are temporarily disabled; that
syetem needs more extensive revisions.
SConstruct:
Update for new directory structure.
Modify to support build trees that are not subdirectories
of the source tree. See comment at top of file for
more details.
Regression tests are temporarily disabled.
src/arch/SConscript:
src/arch/isa_parser.py:
src/python/SConscript:
Update for new directory structure.
--HG--
rename : build/SConstruct => SConstruct
rename : build/default_options/ALPHA_FS => build_opts/ALPHA_FS
rename : build/default_options/ALPHA_FS_TL => build_opts/ALPHA_FS_TL
rename : build/default_options/ALPHA_SE => build_opts/ALPHA_SE
rename : build/default_options/MIPS_SE => build_opts/MIPS_SE
rename : build/default_options/SPARC_SE => build_opts/SPARC_SE
rename : Doxyfile => src/Doxyfile
rename : SConscript => src/SConscript
rename : arch/SConscript => src/arch/SConscript
rename : arch/alpha/SConscript => src/arch/alpha/SConscript
rename : arch/alpha/aout_machdep.h => src/arch/alpha/aout_machdep.h
rename : arch/alpha/arguments.cc => src/arch/alpha/arguments.cc
rename : arch/alpha/arguments.hh => src/arch/alpha/arguments.hh
rename : arch/alpha/ecoff_machdep.h => src/arch/alpha/ecoff_machdep.h
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/ev5.hh => src/arch/alpha/ev5.hh
rename : arch/alpha/faults.cc => src/arch/alpha/faults.cc
rename : arch/alpha/faults.hh => src/arch/alpha/faults.hh
rename : arch/alpha/freebsd/system.cc => src/arch/alpha/freebsd/system.cc
rename : arch/alpha/freebsd/system.hh => src/arch/alpha/freebsd/system.hh
rename : arch/alpha/isa/branch.isa => src/arch/alpha/isa/branch.isa
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/fp.isa => src/arch/alpha/isa/fp.isa
rename : arch/alpha/isa/int.isa => src/arch/alpha/isa/int.isa
rename : arch/alpha/isa/main.isa => src/arch/alpha/isa/main.isa
rename : arch/alpha/isa/mem.isa => src/arch/alpha/isa/mem.isa
rename : arch/alpha/isa/opcdec.isa => src/arch/alpha/isa/opcdec.isa
rename : arch/alpha/isa/pal.isa => src/arch/alpha/isa/pal.isa
rename : arch/alpha/isa/unimp.isa => src/arch/alpha/isa/unimp.isa
rename : arch/alpha/isa/unknown.isa => src/arch/alpha/isa/unknown.isa
rename : arch/alpha/isa/util.isa => src/arch/alpha/isa/util.isa
rename : arch/alpha/isa_traits.hh => src/arch/alpha/isa_traits.hh
rename : arch/alpha/linux/aligned.hh => src/arch/alpha/linux/aligned.hh
rename : arch/alpha/linux/hwrpb.hh => src/arch/alpha/linux/hwrpb.hh
rename : arch/alpha/linux/linux.cc => src/arch/alpha/linux/linux.cc
rename : arch/alpha/linux/linux.hh => src/arch/alpha/linux/linux.hh
rename : arch/alpha/linux/process.cc => src/arch/alpha/linux/process.cc
rename : arch/alpha/linux/process.hh => src/arch/alpha/linux/process.hh
rename : arch/alpha/linux/system.cc => src/arch/alpha/linux/system.cc
rename : arch/alpha/linux/system.hh => src/arch/alpha/linux/system.hh
rename : arch/alpha/linux/thread_info.hh => src/arch/alpha/linux/thread_info.hh
rename : arch/alpha/linux/threadinfo.hh => src/arch/alpha/linux/threadinfo.hh
rename : arch/alpha/osfpal.cc => src/arch/alpha/osfpal.cc
rename : arch/alpha/osfpal.hh => src/arch/alpha/osfpal.hh
rename : arch/alpha/process.cc => src/arch/alpha/process.cc
rename : arch/alpha/process.hh => src/arch/alpha/process.hh
rename : arch/alpha/regfile.hh => src/arch/alpha/regfile.hh
rename : arch/alpha/stacktrace.cc => src/arch/alpha/stacktrace.cc
rename : arch/alpha/stacktrace.hh => src/arch/alpha/stacktrace.hh
rename : arch/alpha/system.cc => src/arch/alpha/system.cc
rename : arch/alpha/system.hh => src/arch/alpha/system.hh
rename : arch/alpha/tlb.cc => src/arch/alpha/tlb.cc
rename : arch/alpha/tlb.hh => src/arch/alpha/tlb.hh
rename : arch/alpha/tru64/process.cc => src/arch/alpha/tru64/process.cc
rename : arch/alpha/tru64/process.hh => src/arch/alpha/tru64/process.hh
rename : arch/alpha/tru64/system.cc => src/arch/alpha/tru64/system.cc
rename : arch/alpha/tru64/system.hh => src/arch/alpha/tru64/system.hh
rename : arch/alpha/tru64/tru64.cc => src/arch/alpha/tru64/tru64.cc
rename : arch/alpha/tru64/tru64.hh => src/arch/alpha/tru64/tru64.hh
rename : arch/alpha/types.hh => src/arch/alpha/types.hh
rename : arch/alpha/utility.hh => src/arch/alpha/utility.hh
rename : arch/alpha/vtophys.cc => src/arch/alpha/vtophys.cc
rename : arch/alpha/vtophys.hh => src/arch/alpha/vtophys.hh
rename : arch/isa_parser.py => src/arch/isa_parser.py
rename : arch/isa_specific.hh => src/arch/isa_specific.hh
rename : arch/mips/SConscript => src/arch/mips/SConscript
rename : arch/mips/faults.cc => src/arch/mips/faults.cc
rename : arch/mips/faults.hh => src/arch/mips/faults.hh
rename : arch/mips/isa/base.isa => src/arch/mips/isa/base.isa
rename : arch/mips/isa/bitfields.isa => src/arch/mips/isa/bitfields.isa
rename : arch/mips/isa/decoder.isa => src/arch/mips/isa/decoder.isa
rename : arch/mips/isa/formats/basic.isa => src/arch/mips/isa/formats/basic.isa
rename : arch/mips/isa/formats/branch.isa => src/arch/mips/isa/formats/branch.isa
rename : arch/mips/isa/formats/formats.isa => src/arch/mips/isa/formats/formats.isa
rename : arch/mips/isa/formats/fp.isa => src/arch/mips/isa/formats/fp.isa
rename : arch/mips/isa/formats/int.isa => src/arch/mips/isa/formats/int.isa
rename : arch/mips/isa/formats/mem.isa => src/arch/mips/isa/formats/mem.isa
rename : arch/mips/isa/formats/noop.isa => src/arch/mips/isa/formats/noop.isa
rename : arch/mips/isa/formats/tlbop.isa => src/arch/mips/isa/formats/tlbop.isa
rename : arch/mips/isa/formats/trap.isa => src/arch/mips/isa/formats/trap.isa
rename : arch/mips/isa/formats/unimp.isa => src/arch/mips/isa/formats/unimp.isa
rename : arch/mips/isa/formats/unknown.isa => src/arch/mips/isa/formats/unknown.isa
rename : arch/mips/isa/formats/util.isa => src/arch/mips/isa/formats/util.isa
rename : arch/mips/isa/includes.isa => src/arch/mips/isa/includes.isa
rename : arch/mips/isa/main.isa => src/arch/mips/isa/main.isa
rename : arch/mips/isa/operands.isa => src/arch/mips/isa/operands.isa
rename : arch/mips/isa_traits.cc => src/arch/mips/isa_traits.cc
rename : arch/mips/isa_traits.hh => src/arch/mips/isa_traits.hh
rename : arch/mips/linux/linux.cc => src/arch/mips/linux/linux.cc
rename : arch/mips/linux/linux.hh => src/arch/mips/linux/linux.hh
rename : arch/mips/linux/process.cc => src/arch/mips/linux/process.cc
rename : arch/mips/linux/process.hh => src/arch/mips/linux/process.hh
rename : arch/mips/process.cc => src/arch/mips/process.cc
rename : arch/mips/process.hh => src/arch/mips/process.hh
rename : arch/mips/regfile/float_regfile.hh => src/arch/mips/regfile/float_regfile.hh
rename : arch/mips/regfile/int_regfile.hh => src/arch/mips/regfile/int_regfile.hh
rename : arch/mips/regfile/misc_regfile.hh => src/arch/mips/regfile/misc_regfile.hh
rename : arch/mips/regfile/regfile.hh => src/arch/mips/regfile/regfile.hh
rename : arch/mips/stacktrace.hh => src/arch/mips/stacktrace.hh
rename : arch/mips/types.hh => src/arch/mips/types.hh
rename : arch/mips/utility.hh => src/arch/mips/utility.hh
rename : arch/sparc/SConscript => src/arch/sparc/SConscript
rename : arch/sparc/faults.cc => src/arch/sparc/faults.cc
rename : arch/sparc/faults.hh => src/arch/sparc/faults.hh
rename : arch/sparc/isa/base.isa => src/arch/sparc/isa/base.isa
rename : arch/sparc/isa/bitfields.isa => src/arch/sparc/isa/bitfields.isa
rename : arch/sparc/isa/decoder.isa => src/arch/sparc/isa/decoder.isa
rename : arch/sparc/isa/formats.isa => src/arch/sparc/isa/formats.isa
rename : arch/sparc/isa/formats/basic.isa => src/arch/sparc/isa/formats/basic.isa
rename : arch/sparc/isa/formats/branch.isa => src/arch/sparc/isa/formats/branch.isa
rename : arch/sparc/isa/formats/integerop.isa => src/arch/sparc/isa/formats/integerop.isa
rename : arch/sparc/isa/formats/mem.isa => src/arch/sparc/isa/formats/mem.isa
rename : arch/sparc/isa/formats/nop.isa => src/arch/sparc/isa/formats/nop.isa
rename : arch/sparc/isa/formats/priv.isa => src/arch/sparc/isa/formats/priv.isa
rename : arch/sparc/isa/formats/trap.isa => src/arch/sparc/isa/formats/trap.isa
rename : arch/sparc/isa/formats/unknown.isa => src/arch/sparc/isa/formats/unknown.isa
rename : arch/sparc/isa/includes.isa => src/arch/sparc/isa/includes.isa
rename : arch/sparc/isa/main.isa => src/arch/sparc/isa/main.isa
rename : arch/sparc/isa/operands.isa => src/arch/sparc/isa/operands.isa
rename : arch/sparc/isa_traits.hh => src/arch/sparc/isa_traits.hh
rename : arch/sparc/linux/linux.cc => src/arch/sparc/linux/linux.cc
rename : arch/sparc/linux/linux.hh => src/arch/sparc/linux/linux.hh
rename : arch/sparc/linux/process.cc => src/arch/sparc/linux/process.cc
rename : arch/sparc/linux/process.hh => src/arch/sparc/linux/process.hh
rename : arch/sparc/process.cc => src/arch/sparc/process.cc
rename : arch/sparc/process.hh => src/arch/sparc/process.hh
rename : arch/sparc/regfile.hh => src/arch/sparc/regfile.hh
rename : arch/sparc/solaris/process.cc => src/arch/sparc/solaris/process.cc
rename : arch/sparc/solaris/process.hh => src/arch/sparc/solaris/process.hh
rename : arch/sparc/solaris/solaris.cc => src/arch/sparc/solaris/solaris.cc
rename : arch/sparc/solaris/solaris.hh => src/arch/sparc/solaris/solaris.hh
rename : arch/sparc/stacktrace.hh => src/arch/sparc/stacktrace.hh
rename : arch/sparc/system.cc => src/arch/sparc/system.cc
rename : arch/sparc/system.hh => src/arch/sparc/system.hh
rename : arch/sparc/utility.hh => src/arch/sparc/utility.hh
rename : base/bitfield.hh => src/base/bitfield.hh
rename : base/callback.hh => src/base/callback.hh
rename : base/chunk_generator.hh => src/base/chunk_generator.hh
rename : base/circlebuf.cc => src/base/circlebuf.cc
rename : base/circlebuf.hh => src/base/circlebuf.hh
rename : base/compression/lzss_compression.cc => src/base/compression/lzss_compression.cc
rename : base/compression/lzss_compression.hh => src/base/compression/lzss_compression.hh
rename : base/compression/null_compression.hh => src/base/compression/null_compression.hh
rename : base/cprintf.cc => src/base/cprintf.cc
rename : base/cprintf.hh => src/base/cprintf.hh
rename : base/cprintf_formats.hh => src/base/cprintf_formats.hh
rename : base/crc.cc => src/base/crc.cc
rename : base/crc.hh => src/base/crc.hh
rename : base/date.cc => src/base/date.cc
rename : base/dbl_list.hh => src/base/dbl_list.hh
rename : base/endian.hh => src/base/endian.hh
rename : base/fast_alloc.cc => src/base/fast_alloc.cc
rename : base/fast_alloc.hh => src/base/fast_alloc.hh
rename : base/fenv.hh => src/base/fenv.hh
rename : base/fifo_buffer.cc => src/base/fifo_buffer.cc
rename : base/fifo_buffer.hh => src/base/fifo_buffer.hh
rename : base/hashmap.hh => src/base/hashmap.hh
rename : base/hostinfo.cc => src/base/hostinfo.cc
rename : base/hostinfo.hh => src/base/hostinfo.hh
rename : base/hybrid_pred.cc => src/base/hybrid_pred.cc
rename : base/hybrid_pred.hh => src/base/hybrid_pred.hh
rename : base/inet.cc => src/base/inet.cc
rename : base/inet.hh => src/base/inet.hh
rename : base/inifile.cc => src/base/inifile.cc
rename : base/inifile.hh => src/base/inifile.hh
rename : base/intmath.cc => src/base/intmath.cc
rename : base/intmath.hh => src/base/intmath.hh
rename : base/kgdb.h => src/base/kgdb.h
rename : base/loader/aout_object.cc => src/base/loader/aout_object.cc
rename : base/loader/aout_object.hh => src/base/loader/aout_object.hh
rename : base/loader/coff_sym.h => src/base/loader/coff_sym.h
rename : base/loader/coff_symconst.h => src/base/loader/coff_symconst.h
rename : base/loader/ecoff_object.cc => src/base/loader/ecoff_object.cc
rename : base/loader/ecoff_object.hh => src/base/loader/ecoff_object.hh
rename : base/loader/elf_object.cc => src/base/loader/elf_object.cc
rename : base/loader/elf_object.hh => src/base/loader/elf_object.hh
rename : base/loader/exec_aout.h => src/base/loader/exec_aout.h
rename : base/loader/exec_ecoff.h => src/base/loader/exec_ecoff.h
rename : base/loader/object_file.cc => src/base/loader/object_file.cc
rename : base/loader/object_file.hh => src/base/loader/object_file.hh
rename : base/loader/symtab.cc => src/base/loader/symtab.cc
rename : base/loader/symtab.hh => src/base/loader/symtab.hh
rename : base/match.cc => src/base/match.cc
rename : base/match.hh => src/base/match.hh
rename : base/misc.cc => src/base/misc.cc
rename : base/misc.hh => src/base/misc.hh
rename : base/mod_num.hh => src/base/mod_num.hh
rename : base/mysql.cc => src/base/mysql.cc
rename : base/mysql.hh => src/base/mysql.hh
rename : base/output.cc => src/base/output.cc
rename : base/output.hh => src/base/output.hh
rename : base/pollevent.cc => src/base/pollevent.cc
rename : base/pollevent.hh => src/base/pollevent.hh
rename : base/predictor.hh => src/base/predictor.hh
rename : base/random.cc => src/base/random.cc
rename : base/random.hh => src/base/random.hh
rename : base/range.cc => src/base/range.cc
rename : base/range.hh => src/base/range.hh
rename : base/refcnt.hh => src/base/refcnt.hh
rename : base/remote_gdb.cc => src/base/remote_gdb.cc
rename : base/remote_gdb.hh => src/base/remote_gdb.hh
rename : base/res_list.hh => src/base/res_list.hh
rename : base/sat_counter.cc => src/base/sat_counter.cc
rename : base/sat_counter.hh => src/base/sat_counter.hh
rename : base/sched_list.hh => src/base/sched_list.hh
rename : base/socket.cc => src/base/socket.cc
rename : base/socket.hh => src/base/socket.hh
rename : base/statistics.cc => src/base/statistics.cc
rename : base/statistics.hh => src/base/statistics.hh
rename : base/stats/events.cc => src/base/stats/events.cc
rename : base/stats/events.hh => src/base/stats/events.hh
rename : base/stats/flags.hh => src/base/stats/flags.hh
rename : base/stats/mysql.cc => src/base/stats/mysql.cc
rename : base/stats/mysql.hh => src/base/stats/mysql.hh
rename : base/stats/mysql_run.hh => src/base/stats/mysql_run.hh
rename : base/stats/output.hh => src/base/stats/output.hh
rename : base/stats/statdb.cc => src/base/stats/statdb.cc
rename : base/stats/statdb.hh => src/base/stats/statdb.hh
rename : base/stats/text.cc => src/base/stats/text.cc
rename : base/stats/text.hh => src/base/stats/text.hh
rename : base/stats/types.hh => src/base/stats/types.hh
rename : base/stats/visit.cc => src/base/stats/visit.cc
rename : base/stats/visit.hh => src/base/stats/visit.hh
rename : base/str.cc => src/base/str.cc
rename : base/str.hh => src/base/str.hh
rename : base/time.cc => src/base/time.cc
rename : base/time.hh => src/base/time.hh
rename : base/timebuf.hh => src/base/timebuf.hh
rename : base/trace.cc => src/base/trace.cc
rename : base/trace.hh => src/base/trace.hh
rename : base/traceflags.py => src/base/traceflags.py
rename : base/userinfo.cc => src/base/userinfo.cc
rename : base/userinfo.hh => src/base/userinfo.hh
rename : cpu/SConscript => src/cpu/SConscript
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.cc => src/cpu/base_dyn_inst.cc
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/cpu_exec_context.cc => src/cpu/cpu_exec_context.cc
rename : cpu/cpu_exec_context.hh => src/cpu/cpu_exec_context.hh
rename : cpu/cpu_models.py => src/cpu/cpu_models.py
rename : cpu/exec_context.hh => src/cpu/exec_context.hh
rename : cpu/exetrace.cc => src/cpu/exetrace.cc
rename : cpu/exetrace.hh => src/cpu/exetrace.hh
rename : cpu/inst_seq.hh => src/cpu/inst_seq.hh
rename : cpu/intr_control.cc => src/cpu/intr_control.cc
rename : cpu/intr_control.hh => src/cpu/intr_control.hh
rename : cpu/memtest/memtest.cc => src/cpu/memtest/memtest.cc
rename : cpu/memtest/memtest.hh => src/cpu/memtest/memtest.hh
rename : cpu/o3/2bit_local_pred.cc => src/cpu/o3/2bit_local_pred.cc
rename : cpu/o3/2bit_local_pred.hh => src/cpu/o3/2bit_local_pred.hh
rename : cpu/o3/alpha_cpu.cc => src/cpu/o3/alpha_cpu.cc
rename : cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha_cpu.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha_cpu_builder.cc
rename : cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha_cpu_impl.hh
rename : cpu/o3/alpha_dyn_inst.cc => src/cpu/o3/alpha_dyn_inst.cc
rename : cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha_dyn_inst.hh
rename : cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha_dyn_inst_impl.hh
rename : cpu/o3/alpha_impl.hh => src/cpu/o3/alpha_impl.hh
rename : cpu/o3/alpha_params.hh => src/cpu/o3/alpha_params.hh
rename : cpu/o3/bpred_unit.cc => src/cpu/o3/bpred_unit.cc
rename : cpu/o3/bpred_unit.hh => src/cpu/o3/bpred_unit.hh
rename : cpu/o3/bpred_unit_impl.hh => src/cpu/o3/bpred_unit_impl.hh
rename : cpu/o3/btb.cc => src/cpu/o3/btb.cc
rename : cpu/o3/btb.hh => src/cpu/o3/btb.hh
rename : cpu/o3/comm.hh => src/cpu/o3/comm.hh
rename : cpu/o3/commit.cc => src/cpu/o3/commit.cc
rename : cpu/o3/commit.hh => src/cpu/o3/commit.hh
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/cpu.hh => src/cpu/o3/cpu.hh
rename : cpu/o3/cpu_policy.hh => src/cpu/o3/cpu_policy.hh
rename : cpu/o3/decode.cc => src/cpu/o3/decode.cc
rename : cpu/o3/decode.hh => src/cpu/o3/decode.hh
rename : cpu/o3/decode_impl.hh => src/cpu/o3/decode_impl.hh
rename : cpu/o3/fetch.cc => src/cpu/o3/fetch.cc
rename : cpu/o3/fetch.hh => src/cpu/o3/fetch.hh
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/free_list.cc => src/cpu/o3/free_list.cc
rename : cpu/o3/free_list.hh => src/cpu/o3/free_list.hh
rename : cpu/o3/iew.cc => src/cpu/o3/iew.cc
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.cc => src/cpu/o3/inst_queue.cc
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/mem_dep_unit.cc => src/cpu/o3/mem_dep_unit.cc
rename : cpu/o3/mem_dep_unit.hh => src/cpu/o3/mem_dep_unit.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/ras.cc => src/cpu/o3/ras.cc
rename : cpu/o3/ras.hh => src/cpu/o3/ras.hh
rename : cpu/o3/regfile.hh => src/cpu/o3/regfile.hh
rename : cpu/o3/rename.cc => src/cpu/o3/rename.cc
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/rename_map.cc => src/cpu/o3/rename_map.cc
rename : cpu/o3/rename_map.hh => src/cpu/o3/rename_map.hh
rename : cpu/o3/rob.cc => src/cpu/o3/rob.cc
rename : cpu/o3/rob.hh => src/cpu/o3/rob.hh
rename : cpu/o3/rob_impl.hh => src/cpu/o3/rob_impl.hh
rename : cpu/o3/sat_counter.cc => src/cpu/o3/sat_counter.cc
rename : cpu/o3/sat_counter.hh => src/cpu/o3/sat_counter.hh
rename : cpu/o3/store_set.cc => src/cpu/o3/store_set.cc
rename : cpu/o3/store_set.hh => src/cpu/o3/store_set.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/op_class.cc => src/cpu/op_class.cc
rename : cpu/op_class.hh => src/cpu/op_class.hh
rename : cpu/ozone/cpu.cc => src/cpu/ozone/cpu.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/ozone/ea_list.cc => src/cpu/ozone/ea_list.cc
rename : cpu/ozone/ea_list.hh => src/cpu/ozone/ea_list.hh
rename : cpu/pc_event.cc => src/cpu/pc_event.cc
rename : cpu/pc_event.hh => src/cpu/pc_event.hh
rename : cpu/profile.cc => src/cpu/profile.cc
rename : cpu/profile.hh => src/cpu/profile.hh
rename : cpu/simple/atomic.cc => src/cpu/simple/atomic.cc
rename : cpu/simple/atomic.hh => src/cpu/simple/atomic.hh
rename : cpu/simple/base.cc => src/cpu/simple/base.cc
rename : cpu/simple/base.hh => src/cpu/simple/base.hh
rename : cpu/simple/timing.cc => src/cpu/simple/timing.cc
rename : cpu/simple/timing.hh => src/cpu/simple/timing.hh
rename : cpu/smt.hh => src/cpu/smt.hh
rename : cpu/static_inst.cc => src/cpu/static_inst.cc
rename : cpu/static_inst.hh => src/cpu/static_inst.hh
rename : cpu/trace/opt_cpu.cc => src/cpu/trace/opt_cpu.cc
rename : cpu/trace/opt_cpu.hh => src/cpu/trace/opt_cpu.hh
rename : cpu/trace/reader/ibm_reader.cc => src/cpu/trace/reader/ibm_reader.cc
rename : cpu/trace/reader/ibm_reader.hh => src/cpu/trace/reader/ibm_reader.hh
rename : cpu/trace/reader/itx_reader.cc => src/cpu/trace/reader/itx_reader.cc
rename : cpu/trace/reader/itx_reader.hh => src/cpu/trace/reader/itx_reader.hh
rename : cpu/trace/reader/m5_reader.cc => src/cpu/trace/reader/m5_reader.cc
rename : cpu/trace/reader/m5_reader.hh => src/cpu/trace/reader/m5_reader.hh
rename : cpu/trace/reader/mem_trace_reader.cc => src/cpu/trace/reader/mem_trace_reader.cc
rename : cpu/trace/reader/mem_trace_reader.hh => src/cpu/trace/reader/mem_trace_reader.hh
rename : cpu/trace/trace_cpu.cc => src/cpu/trace/trace_cpu.cc
rename : cpu/trace/trace_cpu.hh => src/cpu/trace/trace_cpu.hh
rename : dev/alpha_access.h => src/dev/alpha_access.h
rename : dev/alpha_console.cc => src/dev/alpha_console.cc
rename : dev/alpha_console.hh => src/dev/alpha_console.hh
rename : dev/baddev.cc => src/dev/baddev.cc
rename : dev/baddev.hh => src/dev/baddev.hh
rename : dev/disk_image.cc => src/dev/disk_image.cc
rename : dev/disk_image.hh => src/dev/disk_image.hh
rename : dev/etherbus.cc => src/dev/etherbus.cc
rename : dev/etherbus.hh => src/dev/etherbus.hh
rename : dev/etherdump.cc => src/dev/etherdump.cc
rename : dev/etherdump.hh => src/dev/etherdump.hh
rename : dev/etherint.cc => src/dev/etherint.cc
rename : dev/etherint.hh => src/dev/etherint.hh
rename : dev/etherlink.cc => src/dev/etherlink.cc
rename : dev/etherlink.hh => src/dev/etherlink.hh
rename : dev/etherpkt.cc => src/dev/etherpkt.cc
rename : dev/etherpkt.hh => src/dev/etherpkt.hh
rename : dev/ethertap.cc => src/dev/ethertap.cc
rename : dev/ethertap.hh => src/dev/ethertap.hh
rename : dev/ide_atareg.h => src/dev/ide_atareg.h
rename : dev/ide_ctrl.cc => src/dev/ide_ctrl.cc
rename : dev/ide_ctrl.hh => src/dev/ide_ctrl.hh
rename : dev/ide_disk.cc => src/dev/ide_disk.cc
rename : dev/ide_disk.hh => src/dev/ide_disk.hh
rename : dev/ide_wdcreg.h => src/dev/ide_wdcreg.h
rename : dev/io_device.cc => src/dev/io_device.cc
rename : dev/io_device.hh => src/dev/io_device.hh
rename : dev/isa_fake.cc => src/dev/isa_fake.cc
rename : dev/isa_fake.hh => src/dev/isa_fake.hh
rename : dev/ns_gige.cc => src/dev/ns_gige.cc
rename : dev/ns_gige.hh => src/dev/ns_gige.hh
rename : dev/ns_gige_reg.h => src/dev/ns_gige_reg.h
rename : dev/pciconfigall.cc => src/dev/pciconfigall.cc
rename : dev/pciconfigall.hh => src/dev/pciconfigall.hh
rename : dev/pcidev.cc => src/dev/pcidev.cc
rename : dev/pcidev.hh => src/dev/pcidev.hh
rename : dev/pcireg.h => src/dev/pcireg.h
rename : dev/pitreg.h => src/dev/pitreg.h
rename : dev/pktfifo.cc => src/dev/pktfifo.cc
rename : dev/pktfifo.hh => src/dev/pktfifo.hh
rename : dev/platform.cc => src/dev/platform.cc
rename : dev/platform.hh => src/dev/platform.hh
rename : dev/rtcreg.h => src/dev/rtcreg.h
rename : dev/simconsole.cc => src/dev/simconsole.cc
rename : dev/simconsole.hh => src/dev/simconsole.hh
rename : dev/simple_disk.cc => src/dev/simple_disk.cc
rename : dev/simple_disk.hh => src/dev/simple_disk.hh
rename : dev/sinic.cc => src/dev/sinic.cc
rename : dev/sinic.hh => src/dev/sinic.hh
rename : dev/sinicreg.hh => src/dev/sinicreg.hh
rename : dev/tsunami.cc => src/dev/tsunami.cc
rename : dev/tsunami.hh => src/dev/tsunami.hh
rename : dev/tsunami_cchip.cc => src/dev/tsunami_cchip.cc
rename : dev/tsunami_cchip.hh => src/dev/tsunami_cchip.hh
rename : dev/tsunami_io.cc => src/dev/tsunami_io.cc
rename : dev/tsunami_io.hh => src/dev/tsunami_io.hh
rename : dev/tsunami_pchip.cc => src/dev/tsunami_pchip.cc
rename : dev/tsunami_pchip.hh => src/dev/tsunami_pchip.hh
rename : dev/tsunamireg.h => src/dev/tsunamireg.h
rename : dev/uart.cc => src/dev/uart.cc
rename : dev/uart.hh => src/dev/uart.hh
rename : dev/uart8250.cc => src/dev/uart8250.cc
rename : dev/uart8250.hh => src/dev/uart8250.hh
rename : kern/kernel_stats.cc => src/kern/kernel_stats.cc
rename : kern/kernel_stats.hh => src/kern/kernel_stats.hh
rename : kern/linux/events.cc => src/kern/linux/events.cc
rename : kern/linux/events.hh => src/kern/linux/events.hh
rename : kern/linux/linux.hh => src/kern/linux/linux.hh
rename : kern/linux/linux_syscalls.cc => src/kern/linux/linux_syscalls.cc
rename : kern/linux/linux_syscalls.hh => src/kern/linux/linux_syscalls.hh
rename : kern/linux/printk.cc => src/kern/linux/printk.cc
rename : kern/linux/printk.hh => src/kern/linux/printk.hh
rename : kern/linux/sched.hh => src/kern/linux/sched.hh
rename : kern/solaris/solaris.hh => src/kern/solaris/solaris.hh
rename : kern/system_events.cc => src/kern/system_events.cc
rename : kern/system_events.hh => src/kern/system_events.hh
rename : kern/tru64/dump_mbuf.cc => src/kern/tru64/dump_mbuf.cc
rename : kern/tru64/dump_mbuf.hh => src/kern/tru64/dump_mbuf.hh
rename : kern/tru64/mbuf.hh => src/kern/tru64/mbuf.hh
rename : kern/tru64/printf.cc => src/kern/tru64/printf.cc
rename : kern/tru64/printf.hh => src/kern/tru64/printf.hh
rename : kern/tru64/tru64.hh => src/kern/tru64/tru64.hh
rename : kern/tru64/tru64_events.cc => src/kern/tru64/tru64_events.cc
rename : kern/tru64/tru64_events.hh => src/kern/tru64/tru64_events.hh
rename : kern/tru64/tru64_syscalls.cc => src/kern/tru64/tru64_syscalls.cc
rename : kern/tru64/tru64_syscalls.hh => src/kern/tru64/tru64_syscalls.hh
rename : mem/bridge.cc => src/mem/bridge.cc
rename : mem/bridge.hh => src/mem/bridge.hh
rename : mem/bus.cc => src/mem/bus.cc
rename : mem/bus.hh => src/mem/bus.hh
rename : mem/cache/prefetch/tagged_prefetcher_impl.hh => src/mem/cache/prefetch/tagged_prefetcher_impl.hh
rename : mem/config/prefetch.hh => src/mem/config/prefetch.hh
rename : mem/mem_object.cc => src/mem/mem_object.cc
rename : mem/mem_object.hh => src/mem/mem_object.hh
rename : mem/packet.cc => src/mem/packet.cc
rename : mem/packet.hh => src/mem/packet.hh
rename : mem/page_table.cc => src/mem/page_table.cc
rename : mem/page_table.hh => src/mem/page_table.hh
rename : mem/physical.cc => src/mem/physical.cc
rename : mem/physical.hh => src/mem/physical.hh
rename : mem/port.cc => src/mem/port.cc
rename : mem/port.hh => src/mem/port.hh
rename : mem/request.hh => src/mem/request.hh
rename : mem/translating_port.cc => src/mem/translating_port.cc
rename : mem/translating_port.hh => src/mem/translating_port.hh
rename : mem/vport.cc => src/mem/vport.cc
rename : mem/vport.hh => src/mem/vport.hh
rename : python/SConscript => src/python/SConscript
rename : python/m5/__init__.py => src/python/m5/__init__.py
rename : python/m5/config.py => src/python/m5/config.py
rename : python/m5/convert.py => src/python/m5/convert.py
rename : python/m5/multidict.py => src/python/m5/multidict.py
rename : python/m5/objects/AlphaConsole.py => src/python/m5/objects/AlphaConsole.py
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/AlphaFullCPU.py
rename : python/m5/objects/AlphaTLB.py => src/python/m5/objects/AlphaTLB.py
rename : python/m5/objects/BadDevice.py => src/python/m5/objects/BadDevice.py
rename : python/m5/objects/BaseCPU.py => src/python/m5/objects/BaseCPU.py
rename : python/m5/objects/BaseCache.py => src/python/m5/objects/BaseCache.py
rename : python/m5/objects/Bridge.py => src/python/m5/objects/Bridge.py
rename : python/m5/objects/Bus.py => src/python/m5/objects/Bus.py
rename : python/m5/objects/CoherenceProtocol.py => src/python/m5/objects/CoherenceProtocol.py
rename : python/m5/objects/Device.py => src/python/m5/objects/Device.py
rename : python/m5/objects/DiskImage.py => src/python/m5/objects/DiskImage.py
rename : python/m5/objects/Ethernet.py => src/python/m5/objects/Ethernet.py
rename : python/m5/objects/Ide.py => src/python/m5/objects/Ide.py
rename : python/m5/objects/IntrControl.py => src/python/m5/objects/IntrControl.py
rename : python/m5/objects/MemObject.py => src/python/m5/objects/MemObject.py
rename : python/m5/objects/MemTest.py => src/python/m5/objects/MemTest.py
rename : python/m5/objects/Pci.py => src/python/m5/objects/Pci.py
rename : python/m5/objects/PhysicalMemory.py => src/python/m5/objects/PhysicalMemory.py
rename : python/m5/objects/Platform.py => src/python/m5/objects/Platform.py
rename : python/m5/objects/Process.py => src/python/m5/objects/Process.py
rename : python/m5/objects/Repl.py => src/python/m5/objects/Repl.py
rename : python/m5/objects/Root.py => src/python/m5/objects/Root.py
rename : python/m5/objects/SimConsole.py => src/python/m5/objects/SimConsole.py
rename : python/m5/objects/SimpleDisk.py => src/python/m5/objects/SimpleDisk.py
rename : python/m5/objects/System.py => src/python/m5/objects/System.py
rename : python/m5/objects/Tsunami.py => src/python/m5/objects/Tsunami.py
rename : python/m5/objects/Uart.py => src/python/m5/objects/Uart.py
rename : python/m5/smartdict.py => src/python/m5/smartdict.py
rename : sim/async.hh => src/sim/async.hh
rename : sim/builder.cc => src/sim/builder.cc
rename : sim/builder.hh => src/sim/builder.hh
rename : sim/byteswap.hh => src/sim/byteswap.hh
rename : sim/debug.cc => src/sim/debug.cc
rename : sim/debug.hh => src/sim/debug.hh
rename : sim/eventq.cc => src/sim/eventq.cc
rename : sim/eventq.hh => src/sim/eventq.hh
rename : sim/faults.cc => src/sim/faults.cc
rename : sim/faults.hh => src/sim/faults.hh
rename : sim/host.hh => src/sim/host.hh
rename : sim/main.cc => src/sim/main.cc
rename : sim/param.cc => src/sim/param.cc
rename : sim/param.hh => src/sim/param.hh
rename : sim/process.cc => src/sim/process.cc
rename : sim/process.hh => src/sim/process.hh
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
rename : sim/pseudo_inst.hh => src/sim/pseudo_inst.hh
rename : sim/root.cc => src/sim/root.cc
rename : sim/serialize.cc => src/sim/serialize.cc
rename : sim/serialize.hh => src/sim/serialize.hh
rename : sim/sim_events.cc => src/sim/sim_events.cc
rename : sim/sim_events.hh => src/sim/sim_events.hh
rename : sim/sim_exit.hh => src/sim/sim_exit.hh
rename : sim/sim_object.cc => src/sim/sim_object.cc
rename : sim/sim_object.hh => src/sim/sim_object.hh
rename : sim/startup.cc => src/sim/startup.cc
rename : sim/startup.hh => src/sim/startup.hh
rename : sim/stat_control.cc => src/sim/stat_control.cc
rename : sim/stat_control.hh => src/sim/stat_control.hh
rename : sim/stats.hh => src/sim/stats.hh
rename : sim/syscall_emul.cc => src/sim/syscall_emul.cc
rename : sim/syscall_emul.hh => src/sim/syscall_emul.hh
rename : sim/system.cc => src/sim/system.cc
rename : sim/system.hh => src/sim/system.hh
rename : sim/vptr.hh => src/sim/vptr.hh
rename : test/Makefile => src/unittest/Makefile
rename : test/bitvectest.cc => src/unittest/bitvectest.cc
rename : test/circletest.cc => src/unittest/circletest.cc
rename : test/cprintftest.cc => src/unittest/cprintftest.cc
rename : test/foo.ini => src/unittest/foo.ini
rename : test/genini.py => src/unittest/genini.py
rename : test/initest.cc => src/unittest/initest.cc
rename : test/initest.ini => src/unittest/initest.ini
rename : test/lru_test.cc => src/unittest/lru_test.cc
rename : test/nmtest.cc => src/unittest/nmtest.cc
rename : test/offtest.cc => src/unittest/offtest.cc
rename : test/paramtest.cc => src/unittest/paramtest.cc
rename : test/rangetest.cc => src/unittest/rangetest.cc
rename : test/sized_test.cc => src/unittest/sized_test.cc
rename : test/stattest.cc => src/unittest/stattest.cc
rename : test/strnumtest.cc => src/unittest/strnumtest.cc
rename : test/symtest.cc => src/unittest/symtest.cc
rename : test/tokentest.cc => src/unittest/tokentest.cc
rename : test/tracetest.cc => src/unittest/tracetest.cc
extra : convert_revision : cab6a5271ca1b368193cd948e5d3dcc47ab1bd48
Diffstat (limited to 'ext')
113 files changed, 10335 insertions, 0 deletions
diff --git a/ext/dnet/LICENSE b/ext/dnet/LICENSE new file mode 100644 index 000000000..95ecd51e6 --- /dev/null +++ b/ext/dnet/LICENSE @@ -0,0 +1,28 @@ + + Copyright (c) 2000-2004 Dug Song <dugsong@monkey.org> + All rights reserved, all wrongs reversed. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The names of the authors and copyright holders may not be used to + endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/ext/dnet/dnet/addr.h b/ext/dnet/dnet/addr.h new file mode 100644 index 000000000..584e3aba3 --- /dev/null +++ b/ext/dnet/dnet/addr.h @@ -0,0 +1,67 @@ +/* + * addr.h + * + * Network address operations. + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: addr.h,v 1.12 2003/02/27 03:44:55 dugsong Exp $ + */ + +#ifndef DNET_ADDR_H +#define DNET_ADDR_H + +#define ADDR_TYPE_NONE 0 /* No address set */ +#define ADDR_TYPE_ETH 1 /* Ethernet */ +#define ADDR_TYPE_IP 2 /* Internet Protocol v4 */ +#define ADDR_TYPE_IP6 3 /* Internet Protocol v6 */ + +struct addr { + uint16_t addr_type; + uint16_t addr_bits; + union { + eth_addr_t __eth; + ip_addr_t __ip; + ip6_addr_t __ip6; + + uint8_t __data8[16]; + uint16_t __data16[8]; + uint32_t __data32[4]; + } __addr_u; +}; +#define addr_eth __addr_u.__eth +#define addr_ip __addr_u.__ip +#define addr_ip6 __addr_u.__ip6 +#define addr_data8 __addr_u.__data8 +#define addr_data16 __addr_u.__data16 +#define addr_data32 __addr_u.__data32 + +#define addr_pack(addr, type, bits, data, len) do { \ + (addr)->addr_type = type; \ + (addr)->addr_bits = bits; \ + memmove((addr)->addr_data8, (char *)data, len); \ +} while (0) + +__BEGIN_DECLS +int addr_cmp(const struct addr *a, const struct addr *b); + +int addr_bcast(const struct addr *a, struct addr *b); +int addr_net(const struct addr *a, struct addr *b); + +char *addr_ntop(const struct addr *src, char *dst, size_t size); +int addr_pton(const char *src, struct addr *dst); + +char *addr_ntoa(const struct addr *a); +#define addr_aton addr_pton + +int addr_ntos(const struct addr *a, struct sockaddr *sa); +int addr_ston(const struct sockaddr *sa, struct addr *a); + +int addr_btos(uint16_t bits, struct sockaddr *sa); +int addr_stob(const struct sockaddr *sa, uint16_t *bits); + +int addr_btom(uint16_t bits, void *mask, size_t size); +int addr_mtob(const void *mask, size_t size, uint16_t *bits); +__END_DECLS + +#endif /* DNET_ADDR_H */ diff --git a/ext/dnet/dnet/arp.h b/ext/dnet/dnet/arp.h new file mode 100644 index 000000000..d3c162410 --- /dev/null +++ b/ext/dnet/dnet/arp.h @@ -0,0 +1,103 @@ +/* + * arp.h + * + * Address Resolution Protocol. + * RFC 826 + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: arp.h,v 1.12 2003/03/16 17:39:17 dugsong Exp $ + */ + +#ifndef DNET_ARP_H +#define DNET_ARP_H + +#define ARP_HDR_LEN 8 /* base ARP header length */ +#define ARP_ETHIP_LEN 20 /* base ARP message length */ + +#ifndef __GNUC__ +# define __attribute__(x) +# pragma pack(1) +#endif + +/* + * ARP header + */ +struct arp_hdr { + uint16_t ar_hrd; /* format of hardware address */ + uint16_t ar_pro; /* format of protocol address */ + uint8_t ar_hln; /* length of hardware address (ETH_ADDR_LEN) */ + uint8_t ar_pln; /* length of protocol address (IP_ADDR_LEN) */ + uint16_t ar_op; /* operation */ +}; + +/* + * Hardware address format + */ +#define ARP_HRD_ETH 0x0001 /* ethernet hardware */ +#define ARP_HRD_IEEE802 0x0006 /* IEEE 802 hardware */ + +/* + * Protocol address format + */ +#define ARP_PRO_IP 0x0800 /* IP protocol */ + +/* + * ARP operation + */ +#define ARP_OP_REQUEST 1 /* request to resolve ha given pa */ +#define ARP_OP_REPLY 2 /* response giving hardware address */ +#define ARP_OP_REVREQUEST 3 /* request to resolve pa given ha */ +#define ARP_OP_REVREPLY 4 /* response giving protocol address */ + +/* + * Ethernet/IP ARP message + */ +struct arp_ethip { + uint8_t ar_sha[ETH_ADDR_LEN]; /* sender hardware address */ + uint8_t ar_spa[IP_ADDR_LEN]; /* sender protocol address */ + uint8_t ar_tha[ETH_ADDR_LEN]; /* target hardware address */ + uint8_t ar_tpa[IP_ADDR_LEN]; /* target protocol address */ +}; + +/* + * ARP cache entry + */ +struct arp_entry { + struct addr arp_pa; /* protocol address */ + struct addr arp_ha; /* hardware address */ +}; + +#ifndef __GNUC__ +# pragma pack() +#endif + +#define arp_pack_hdr_ethip(hdr, op, sha, spa, tha, tpa) do { \ + struct arp_hdr *pack_arp_p = (struct arp_hdr *)(hdr); \ + struct arp_ethip *pack_ethip_p = (struct arp_ethip *) \ + ((uint8_t *)(hdr) + ARP_HDR_LEN); \ + pack_arp_p->ar_hrd = htons(ARP_HRD_ETH); \ + pack_arp_p->ar_pro = htons(ARP_PRO_IP); \ + pack_arp_p->ar_hln = ETH_ADDR_LEN; \ + pack_arp_p->ar_pln = IP_ADDR_LEN; \ + pack_arp_p->ar_op = htons(op); \ + memmove(pack_ethip_p->ar_sha, &(sha), ETH_ADDR_LEN); \ + memmove(pack_ethip_p->ar_spa, &(spa), IP_ADDR_LEN); \ + memmove(pack_ethip_p->ar_tha, &(tha), ETH_ADDR_LEN); \ + memmove(pack_ethip_p->ar_tpa, &(tpa), IP_ADDR_LEN); \ +} while (0) + +typedef struct arp_handle arp_t; + +typedef int (*arp_handler)(const struct arp_entry *entry, void *arg); + +__BEGIN_DECLS +arp_t *arp_open(void); +int arp_add(arp_t *arp, const struct arp_entry *entry); +int arp_delete(arp_t *arp, const struct arp_entry *entry); +int arp_get(arp_t *arp, struct arp_entry *entry); +int arp_loop(arp_t *arp, arp_handler callback, void *arg); +arp_t *arp_close(arp_t *arp); +__END_DECLS + +#endif /* DNET_ARP_H */ diff --git a/ext/dnet/dnet/blob.h b/ext/dnet/dnet/blob.h new file mode 100644 index 000000000..a3be7897d --- /dev/null +++ b/ext/dnet/dnet/blob.h @@ -0,0 +1,56 @@ +/* + * blob.h + * + * Binary blob handling. + * + * Copyright (c) 2002 Dug Song <dugsong@monkey.org> + * + * $Id: blob.h,v 1.2 2002/04/05 03:06:44 dugsong Exp $ + */ + +#ifndef DNET_BLOB_H +#define DNET_BLOB_H + +typedef struct blob { + u_char *base; /* start of data */ + int off; /* offset into data */ + int end; /* end of data */ + int size; /* size of allocation */ +} blob_t; + +__BEGIN_DECLS +blob_t *blob_new(void); + +int blob_read(blob_t *b, void *buf, int len); +int blob_write(blob_t *b, const void *buf, int len); + +int blob_seek(blob_t *b, int off, int whence); +#define blob_skip(b, l) blob_seek(b, l, SEEK_CUR) +#define blob_rewind(b) blob_seek(b, 0, SEEK_SET) + +#define blob_offset(b) ((b)->off) +#define blob_left(b) ((b)->end - (b)->off) + +int blob_index(blob_t *b, const void *buf, int len); +int blob_rindex(blob_t *b, const void *buf, int len); + +int blob_pack(blob_t *b, const char *fmt, ...); +int blob_unpack(blob_t *b, const char *fmt, ...); + +int blob_insert(blob_t *b, const void *buf, int len); +int blob_delete(blob_t *b, void *buf, int len); + +int blob_print(blob_t *b, char *style, int len); + +blob_t *blob_free(blob_t *b); + +int blob_register_alloc(size_t size, void *(*bmalloc)(size_t), + void (*bfree)(void *), void *(*brealloc)(void *, size_t)); +#ifdef va_start +typedef int (*blob_fmt_cb)(int pack, int len, blob_t *b, va_list *arg); + +int blob_register_pack(char c, blob_fmt_cb fmt_cb); +#endif +__END_DECLS + +#endif /* DNET_BLOB_H */ diff --git a/ext/dnet/dnet/eth.h b/ext/dnet/dnet/eth.h new file mode 100644 index 000000000..da3033066 --- /dev/null +++ b/ext/dnet/dnet/eth.h @@ -0,0 +1,77 @@ +/* + * eth.h + * + * Ethernet. + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: eth.h,v 1.15 2004/01/03 08:47:23 dugsong Exp $ + */ + +#ifndef DNET_ETH_H +#define DNET_ETH_H + +#define ETH_ADDR_LEN 6 +#define ETH_ADDR_BITS 48 +#define ETH_TYPE_LEN 2 +#define ETH_CRC_LEN 4 +#define ETH_HDR_LEN 14 + +#define ETH_LEN_MIN 64 /* minimum frame length with CRC */ +#define ETH_LEN_MAX 1518 /* maximum frame length with CRC */ + +#define ETH_MTU (ETH_LEN_MAX - ETH_HDR_LEN - ETH_CRC_LEN) +#define ETH_MIN (ETH_LEN_MIN - ETH_HDR_LEN - ETH_CRC_LEN) + +typedef struct eth_addr { + uint8_t data[ETH_ADDR_LEN]; +} eth_addr_t; + +struct eth_hdr { + eth_addr_t eth_dst; /* destination address */ + eth_addr_t eth_src; /* source address */ + uint16_t eth_type; /* payload type */ +}; + +/* + * Ethernet payload types - http://standards.ieee.org/regauth/ethertype + */ +#define ETH_TYPE_PUP 0x0200 /* PUP protocol */ +#define ETH_TYPE_IP 0x0800 /* IP protocol */ +#define ETH_TYPE_ARP 0x0806 /* address resolution protocol */ +#define ETH_TYPE_REVARP 0x8035 /* reverse addr resolution protocol */ +#define ETH_TYPE_8021Q 0x8100 /* IEEE 802.1Q VLAN tagging */ +#define ETH_TYPE_IPV6 0x86DD /* IPv6 protocol */ +#define ETH_TYPE_MPLS 0x8847 /* MPLS */ +#define ETH_TYPE_MPLS_MCAST 0x8848 /* MPLS Multicast */ +#define ETH_TYPE_PPPOEDISC 0x8863 /* PPP Over Ethernet Discovery Stage */ +#define ETH_TYPE_PPPOE 0x8864 /* PPP Over Ethernet Session Stage */ +#define ETH_TYPE_LOOPBACK 0x9000 /* used to test interfaces */ + +#define ETH_IS_MULTICAST(ea) (*(ea) & 0x01) /* is address mcast/bcast? */ + +#define ETH_ADDR_BROADCAST "\xff\xff\xff\xff\xff\xff" + +#define eth_pack_hdr(h, dst, src, type) do { \ + struct eth_hdr *eth_pack_p = (struct eth_hdr *)(h); \ + memmove(ð_pack_p->eth_dst, &(dst), ETH_ADDR_LEN); \ + memmove(ð_pack_p->eth_src, &(src), ETH_ADDR_LEN); \ + eth_pack_p->eth_type = htons(type); \ +} while (0) + +typedef struct eth_handle eth_t; + +__BEGIN_DECLS +eth_t *eth_open(const char *device); +int eth_get(eth_t *e, eth_addr_t *ea); +int eth_set(eth_t *e, const eth_addr_t *ea); +size_t eth_send(eth_t *e, const void *buf, size_t len); +eth_t *eth_close(eth_t *e); + +char *eth_ntop(const eth_addr_t *eth, char *dst, size_t len); +int eth_pton(const char *src, eth_addr_t *dst); +char *eth_ntoa(const eth_addr_t *eth); +#define eth_aton eth_pton +__END_DECLS + +#endif /* DNET_ETH_H */ diff --git a/ext/dnet/dnet/fw.h b/ext/dnet/dnet/fw.h new file mode 100644 index 000000000..ebda8e7eb --- /dev/null +++ b/ext/dnet/dnet/fw.h @@ -0,0 +1,54 @@ +/* + * fw.h + * + * Network firewalling operations. + * + * Copyright (c) 2001 Dug Song <dugsong@monkey.org> + * + * $Id: fw.h,v 1.13 2002/12/14 04:02:36 dugsong Exp $ + */ + +#ifndef DNET_FW_H +#define DNET_FW_H + +struct fw_rule { + char fw_device[INTF_NAME_LEN]; /* interface name */ + uint8_t fw_op; /* operation */ + uint8_t fw_dir; /* direction */ + uint8_t fw_proto; /* IP protocol */ + struct addr fw_src; /* src address / net */ + struct addr fw_dst; /* dst address / net */ + uint16_t fw_sport[2]; /* range / ICMP type */ + uint16_t fw_dport[2]; /* range / ICMP code */ +}; + +#define FW_OP_ALLOW 1 +#define FW_OP_BLOCK 2 + +#define FW_DIR_IN 1 +#define FW_DIR_OUT 2 + +#define fw_pack_rule(rule, dev, op, dir, p, s, d, sp1, sp2, dp1, dp2) \ +do { \ + strlcpy((rule)->fw_device, dev, sizeof((rule)->fw_device)); \ + (rule)->fw_op = op; (rule)->fw_dir = dir; \ + (rule)->fw_proto = p; \ + memmove(&(rule)->fw_src, &(s), sizeof((rule)->fw_src)); \ + memmove(&(rule)->fw_dst, &(d), sizeof((rule)->fw_dst)); \ + (rule)->fw_sport[0] = sp1; (rule)->fw_sport[1] = sp2; \ + (rule)->fw_dport[0] = dp1; (rule)->fw_dport[1] = dp2; \ +} while (0) + +typedef struct fw_handle fw_t; + +typedef int (*fw_handler)(const struct fw_rule *rule, void *arg); + +__BEGIN_DECLS +fw_t *fw_open(void); +int fw_add(fw_t *f, const struct fw_rule *rule); +int fw_delete(fw_t *f, const struct fw_rule *rule); +int fw_loop(fw_t *f, fw_handler callback, void *arg); +fw_t *fw_close(fw_t *f); +__END_DECLS + +#endif /* DNET_FW_H */ diff --git a/ext/dnet/dnet/icmp.h b/ext/dnet/dnet/icmp.h new file mode 100644 index 000000000..e997d5887 --- /dev/null +++ b/ext/dnet/dnet/icmp.h @@ -0,0 +1,265 @@ +/* + * icmp.h + * + * Internet Control Message Protocol. + * RFC 792, 950, 1256, 1393, 1475, 2002, 2521 + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: icmp.h,v 1.14 2003/03/16 17:39:17 dugsong Exp $ + */ + +#ifndef DNET_ICMP_H +#define DNET_ICMP_H + +#define ICMP_HDR_LEN 4 /* base ICMP header length */ +#define ICMP_LEN_MIN 8 /* minimum ICMP message size, with header */ + +#ifndef __GNUC__ +# define __attribute__(x) +# pragma pack(1) +#endif + +/* + * ICMP header + */ +struct icmp_hdr { + uint8_t icmp_type; /* type of message, see below */ + uint8_t icmp_code; /* type sub code */ + uint16_t icmp_cksum; /* ones complement cksum of struct */ +}; + +/* + * Types (icmp_type) and codes (icmp_code) - + * http://www.iana.org/assignments/icmp-parameters + */ +#define ICMP_CODE_NONE 0 /* for types without codes */ +#define ICMP_ECHOREPLY 0 /* echo reply */ +#define ICMP_UNREACH 3 /* dest unreachable, codes: */ +#define ICMP_UNREACH_NET 0 /* bad net */ +#define ICMP_UNREACH_HOST 1 /* bad host */ +#define ICMP_UNREACH_PROTO 2 /* bad protocol */ +#define ICMP_UNREACH_PORT 3 /* bad port */ +#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ +#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ +#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ +#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ +#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ +#define ICMP_UNREACH_NET_PROHIB 9 /* for crypto devs */ +#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ +#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ +#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ +#define ICMP_UNREACH_FILTER_PROHIB 13 /* prohibited access */ +#define ICMP_UNREACH_HOST_PRECEDENCE 14 /* precedence error */ +#define ICMP_UNREACH_PRECEDENCE_CUTOFF 15 /* precedence cutoff */ +#define ICMP_SRCQUENCH 4 /* packet lost, slow down */ +#define ICMP_REDIRECT 5 /* shorter route, codes: */ +#define ICMP_REDIRECT_NET 0 /* for network */ +#define ICMP_REDIRECT_HOST 1 /* for host */ +#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ +#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ +#define ICMP_ALTHOSTADDR 6 /* alternate host address */ +#define ICMP_ECHO 8 /* echo service */ +#define ICMP_RTRADVERT 9 /* router advertise, codes: */ +#define ICMP_RTRADVERT_NORMAL 0 /* normal */ +#define ICMP_RTRADVERT_NOROUTE_COMMON 16 /* selective routing */ +#define ICMP_RTRSOLICIT 10 /* router solicitation */ +#define ICMP_TIMEXCEED 11 /* time exceeded, code: */ +#define ICMP_TIMEXCEED_INTRANS 0 /* ttl==0 in transit */ +#define ICMP_TIMEXCEED_REASS 1 /* ttl==0 in reass */ +#define ICMP_PARAMPROB 12 /* ip header bad */ +#define ICMP_PARAMPROB_ERRATPTR 0 /* req. opt. absent */ +#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ +#define ICMP_PARAMPROB_LENGTH 2 /* bad length */ +#define ICMP_TSTAMP 13 /* timestamp request */ +#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ +#define ICMP_INFO 15 /* information request */ +#define ICMP_INFOREPLY 16 /* information reply */ +#define ICMP_MASK 17 /* address mask request */ +#define ICMP_MASKREPLY 18 /* address mask reply */ +#define ICMP_TRACEROUTE 30 /* traceroute */ +#define ICMP_DATACONVERR 31 /* data conversion error */ +#define ICMP_MOBILE_REDIRECT 32 /* mobile host redirect */ +#define ICMP_IPV6_WHEREAREYOU 33 /* IPv6 where-are-you */ +#define ICMP_IPV6_IAMHERE 34 /* IPv6 i-am-here */ +#define ICMP_MOBILE_REG 35 /* mobile registration req */ +#define ICMP_MOBILE_REGREPLY 36 /* mobile registration reply */ +#define ICMP_DNS 37 /* domain name request */ +#define ICMP_DNSREPLY 38 /* domain name reply */ +#define ICMP_SKIP 39 /* SKIP */ +#define ICMP_PHOTURIS 40 /* Photuris */ +#define ICMP_PHOTURIS_UNKNOWN_INDEX 0 /* unknown sec index */ +#define ICMP_PHOTURIS_AUTH_FAILED 1 /* auth failed */ +#define ICMP_PHOTURIS_DECOMPRESS_FAILED 2 /* decompress failed */ +#define ICMP_PHOTURIS_DECRYPT_FAILED 3 /* decrypt failed */ +#define ICMP_PHOTURIS_NEED_AUTHN 4 /* no authentication */ +#define ICMP_PHOTURIS_NEED_AUTHZ 5 /* no authorization */ +#define ICMP_TYPE_MAX 40 + +#define ICMP_INFOTYPE(type) \ + ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ + (type) == ICMP_RTRADVERT || (type) == ICMP_RTRSOLICIT || \ + (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ + (type) == ICMP_INFO || (type) == ICMP_INFOREPLY || \ + (type) == ICMP_MASK || (type) == ICMP_MASKREPLY) + +/* + * Echo message data + */ +struct icmp_msg_echo { + uint16_t icmp_id; + uint16_t icmp_seq; + uint8_t icmp_data __flexarr; /* optional data */ +}; + +/* + * Fragmentation-needed (unreachable) message data + */ +struct icmp_msg_needfrag { + uint16_t icmp_void; /* must be zero */ + uint16_t icmp_mtu; /* MTU of next-hop network */ + uint8_t icmp_ip __flexarr; /* IP hdr + 8 bytes of pkt */ +}; + +/* + * Unreachable, source quench, redirect, time exceeded, + * parameter problem message data + */ +struct icmp_msg_quote { + uint32_t icmp_void; /* must be zero */ +#define icmp_gwaddr icmp_void /* router IP address to use */ +#define icmp_pptr icmp_void /* ptr to bad octet field */ + uint8_t icmp_ip __flexarr; /* IP hdr + 8 bytes of pkt */ +}; + +/* + * Router advertisement message data, RFC 1256 + */ +struct icmp_msg_rtradvert { + uint8_t icmp_num_addrs; /* # of address / pref pairs */ + uint8_t icmp_wpa; /* words / address == 2 */ + uint16_t icmp_lifetime; /* route lifetime in seconds */ + struct icmp_msg_rtr_data { + uint32_t icmp_void; +#define icmp_gwaddr icmp_void /* router IP address */ + uint32_t icmp_pref; /* router preference (usu 0) */ + } icmp_rtr __flexarr; /* variable # of routers */ +}; +#define ICMP_RTR_PREF_NODEFAULT 0x80000000 /* do not use as default gw */ + +/* + * Timestamp message data + */ +struct icmp_msg_tstamp { + uint32_t icmp_id; /* identifier */ + uint32_t icmp_seq; /* sequence number */ + uint32_t icmp_ts_orig; /* originate timestamp */ + uint32_t icmp_ts_rx; /* receive timestamp */ + uint32_t icmp_ts_tx; /* transmit timestamp */ +}; + +/* + * Address mask message data, RFC 950 + */ +struct icmp_msg_mask { + uint32_t icmp_id; /* identifier */ + uint32_t icmp_seq; /* sequence number */ + uint32_t icmp_mask; /* address mask */ +}; + +/* + * Traceroute message data, RFC 1393, RFC 1812 + */ +struct icmp_msg_traceroute { + uint16_t icmp_id; /* identifier */ + uint16_t icmp_void; /* unused */ + uint16_t icmp_ohc; /* outbound hop count */ + uint16_t icmp_rhc; /* return hop count */ + uint32_t icmp_speed; /* link speed, bytes/sec */ + uint32_t icmp_mtu; /* MTU in bytes */ +}; + +/* + * Domain name reply message data, RFC 1788 + */ +struct icmp_msg_dnsreply { + uint16_t icmp_id; /* identifier */ + uint16_t icmp_seq; /* sequence number */ + uint32_t icmp_ttl; /* time-to-live */ + uint8_t icmp_names __flexarr; /* variable number of names */ +}; + +/* + * Generic identifier, sequence number data + */ +struct icmp_msg_idseq { + uint16_t icmp_id; + uint16_t icmp_seq; +}; + +/* + * ICMP message union + */ +union icmp_msg { + struct icmp_msg_echo echo; /* ICMP_ECHO{REPLY} */ + struct icmp_msg_quote unreach; /* ICMP_UNREACH */ + struct icmp_msg_needfrag needfrag; /* ICMP_UNREACH_NEEDFRAG */ + struct icmp_msg_quote srcquench; /* ICMP_SRCQUENCH */ + struct icmp_msg_quote redirect; /* ICMP_REDIRECT (set to 0) */ + uint32_t rtrsolicit; /* ICMP_RTRSOLICIT */ + struct icmp_msg_rtradvert rtradvert; /* ICMP_RTRADVERT */ + struct icmp_msg_quote timexceed; /* ICMP_TIMEXCEED */ + struct icmp_msg_quote paramprob; /* ICMP_PARAMPROB */ + struct icmp_msg_tstamp tstamp; /* ICMP_TSTAMP{REPLY} */ + struct icmp_msg_idseq info; /* ICMP_INFO{REPLY} */ + struct icmp_msg_mask mask; /* ICMP_MASK{REPLY} */ + struct icmp_msg_traceroute traceroute; /* ICMP_TRACEROUTE */ + struct icmp_msg_idseq dns; /* ICMP_DNS */ + struct icmp_msg_dnsreply dnsreply; /* ICMP_DNSREPLY */ +}; + +#ifndef __GNUC__ +# pragma pack() +#endif + +#define icmp_pack_hdr(hdr, type, code) do { \ + struct icmp_hdr *icmp_pack_p = (struct icmp_hdr *)(hdr); \ + icmp_pack_p->icmp_type = type; icmp_pack_p->icmp_code = code; \ +} while (0) + +#define icmp_pack_hdr_echo(hdr, type, code, id, seq, data, len) do { \ + struct icmp_msg_echo *echo_pack_p = (struct icmp_msg_echo *) \ + ((uint8_t *)(hdr) + ICMP_HDR_LEN); \ + icmp_pack_hdr(hdr, type, code); \ + echo_pack_p->icmp_id = htons(id); \ + echo_pack_p->icmp_seq = htons(seq); \ + memmove(echo_pack_p->icmp_data, data, len); \ +} while (0) + +#define icmp_pack_hdr_quote(hdr, type, code, word, pkt, len) do { \ + struct icmp_msg_quote *quote_pack_p = (struct icmp_msg_quote *) \ + ((uint8_t *)(hdr) + ICMP_HDR_LEN); \ + icmp_pack_hdr(hdr, type, code); \ + quote_pack_p->icmp_void = htonl(word); \ + memmove(quote_pack_p->icmp_ip, pkt, len); \ +} while (0) + +#define icmp_pack_hdr_mask(hdr, type, code, id, seq, mask) do { \ + struct icmp_msg_mask *mask_pack_p = (struct icmp_msg_mask *) \ + ((uint8_t *)(hdr) + ICMP_HDR_LEN); \ + icmp_pack_hdr(hdr, type, code); \ + mask_pack_p->icmp_id = htons(id); \ + mask_pack_p->icmp_seq = htons(seq); \ + mask_pack_p->icmp_mask = htonl(mask); \ +} while (0) + +#define icmp_pack_hdr_needfrag(hdr, type, code, mtu, pkt, len) do { \ + struct icmp_msg_needfrag *frag_pack_p = \ + (struct icmp_msg_needfrag *)((uint8_t *)(hdr) + ICMP_HDR_LEN); \ + icmp_pack_hdr(hdr, type, code); \ + frag_pack_p->icmp_void = 0; \ + frag_pack_p->icmp_mtu = htons(mtu); \ + memmove(frag_pack_p->icmp_ip, pkt, len); \ +} while (0) + +#endif /* DNET_ICMP_H */ diff --git a/ext/dnet/dnet/intf.h b/ext/dnet/dnet/intf.h new file mode 100644 index 000000000..38acd4356 --- /dev/null +++ b/ext/dnet/dnet/intf.h @@ -0,0 +1,68 @@ +/* + * intf.c + * + * Network interface operations. + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: intf.h,v 1.16 2004/01/13 07:41:09 dugsong Exp $ + */ + +#ifndef DNET_INTF_H +#define DNET_INTF_H + +/* + * Interface entry + */ +#define INTF_NAME_LEN 16 + +struct intf_entry { + u_int intf_len; /* length of entry */ + char intf_name[INTF_NAME_LEN]; /* interface name */ + u_short intf_type; /* interface type (r/o) */ + u_short intf_flags; /* interface flags */ + u_int intf_mtu; /* interface MTU */ + struct addr intf_addr; /* interface address */ + struct addr intf_dst_addr; /* point-to-point dst */ + struct addr intf_link_addr; /* link-layer address */ + u_int intf_alias_num; /* number of aliases */ + struct addr intf_alias_addrs __flexarr; /* array of aliases */ +}; + +/* + * MIB-II interface types - http://www.iana.org/assignments/ianaiftype-mib + */ +#define INTF_TYPE_OTHER 1 /* other */ +#define INTF_TYPE_ETH 6 /* Ethernet */ +#define INTF_TYPE_TOKENRING 9 /* Token Ring */ +#define INTF_TYPE_FDDI 15 /* FDDI */ +#define INTF_TYPE_PPP 23 /* Point-to-Point Protocol */ +#define INTF_TYPE_LOOPBACK 24 /* software loopback */ +#define INTF_TYPE_SLIP 28 /* Serial Line Interface Protocol */ +#define INTF_TYPE_TUN 53 /* proprietary virtual/internal */ + +/* + * Interface flags + */ +#define INTF_FLAG_UP 0x01 /* enable interface */ +#define INTF_FLAG_LOOPBACK 0x02 /* is a loopback net (r/o) */ +#define INTF_FLAG_POINTOPOINT 0x04 /* point-to-point link (r/o) */ +#define INTF_FLAG_NOARP 0x08 /* disable ARP */ +#define INTF_FLAG_BROADCAST 0x10 /* supports broadcast (r/o) */ +#define INTF_FLAG_MULTICAST 0x20 /* supports multicast (r/o) */ + +typedef struct intf_handle intf_t; + +typedef int (*intf_handler)(const struct intf_entry *entry, void *arg); + +__BEGIN_DECLS +intf_t *intf_open(void); +int intf_get(intf_t *i, struct intf_entry *entry); +int intf_get_src(intf_t *i, struct intf_entry *entry, struct addr *src); +int intf_get_dst(intf_t *i, struct intf_entry *entry, struct addr *dst); +int intf_set(intf_t *i, const struct intf_entry *entry); +int intf_loop(intf_t *i, intf_handler callback, void *arg); +intf_t *intf_close(intf_t *i); +__END_DECLS + +#endif /* DNET_INTF_H */ diff --git a/ext/dnet/dnet/ip.h b/ext/dnet/dnet/ip.h new file mode 100644 index 000000000..95b7718fb --- /dev/null +++ b/ext/dnet/dnet/ip.h @@ -0,0 +1,487 @@ +/* + * ip.h + * + * Internet Protocol (RFC 791). + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: ip.h,v 1.23 2003/03/16 17:39:17 dugsong Exp $ + */ + +#ifndef DNET_IP_H +#define DNET_IP_H + +#define IP_ADDR_LEN 4 /* IP address length */ +#define IP_ADDR_BITS 32 /* IP address bits */ + +#define IP_HDR_LEN 20 /* base IP header length */ +#define IP_OPT_LEN 2 /* base IP option length */ +#define IP_OPT_LEN_MAX 40 +#define IP_HDR_LEN_MAX (IP_HDR_LEN + IP_OPT_LEN_MAX) + +#define IP_LEN_MAX 65535 +#define IP_LEN_MIN IP_HDR_LEN + +typedef uint32_t ip_addr_t; + +#ifndef __GNUC__ +# define __attribute__(x) +# pragma pack(1) +#endif + +/* + * IP header, without options + */ +struct ip_hdr { +#if DNET_BYTESEX == DNET_BIG_ENDIAN + uint8_t ip_v:4, /* version */ + ip_hl:4; /* header length (incl any options) */ +#elif DNET_BYTESEX == DNET_LIL_ENDIAN + uint8_t ip_hl:4, + ip_v:4; +#else +# error "need to include <dnet.h>" +#endif + uint8_t ip_tos; /* type of service */ + uint16_t ip_len; /* total length (incl header) */ + uint16_t ip_id; /* identification */ + uint16_t ip_off; /* fragment offset and flags */ + uint8_t ip_ttl; /* time to live */ + uint8_t ip_p; /* protocol */ + uint16_t ip_sum; /* checksum */ + ip_addr_t ip_src; /* source address */ + ip_addr_t ip_dst; /* destination address */ +}; + +/* + * Type of service (ip_tos), RFC 1349 ("obsoleted by RFC 2474") + */ +#define IP_TOS_DEFAULT 0x00 /* default */ +#define IP_TOS_LOWDELAY 0x10 /* low delay */ +#define IP_TOS_THROUGHPUT 0x08 /* high throughput */ +#define IP_TOS_RELIABILITY 0x04 /* high reliability */ +#define IP_TOS_LOWCOST 0x02 /* low monetary cost - XXX */ +#define IP_TOS_ECT 0x02 /* ECN-capable transport */ +#define IP_TOS_CE 0x01 /* congestion experienced */ + +/* + * IP precedence (high 3 bits of ip_tos), hopefully unused + */ +#define IP_TOS_PREC_ROUTINE 0x00 +#define IP_TOS_PREC_PRIORITY 0x20 +#define IP_TOS_PREC_IMMEDIATE 0x40 +#define IP_TOS_PREC_FLASH 0x60 +#define IP_TOS_PREC_FLASHOVERRIDE 0x80 +#define IP_TOS_PREC_CRITIC_ECP 0xa0 +#define IP_TOS_PREC_INTERNETCONTROL 0xc0 +#define IP_TOS_PREC_NETCONTROL 0xe0 + +/* + * Fragmentation flags (ip_off) + */ +#define IP_RF 0x8000 /* reserved */ +#define IP_DF 0x4000 /* don't fragment */ +#define IP_MF 0x2000 /* more fragments (not last frag) */ +#define IP_OFFMASK 0x1fff /* mask for fragment offset */ + +/* + * Time-to-live (ip_ttl), seconds + */ +#define IP_TTL_DEFAULT 64 /* default ttl, RFC 1122, RFC 1340 */ +#define IP_TTL_MAX 255 /* maximum ttl */ + +/* + * Protocol (ip_p) - http://www.iana.org/assignments/protocol-numbers + */ +#define IP_PROTO_IP 0 /* dummy for IP */ +#define IP_PROTO_HOPOPTS IP_PROTO_IP /* IPv6 hop-by-hop options */ +#define IP_PROTO_ICMP 1 /* ICMP */ +#define IP_PROTO_IGMP 2 /* IGMP */ +#define IP_PROTO_GGP 3 /* gateway-gateway protocol */ +#define IP_PROTO_IPIP 4 /* IP in IP */ +#define IP_PROTO_ST 5 /* ST datagram mode */ +#define IP_PROTO_TCP 6 /* TCP */ +#define IP_PROTO_CBT 7 /* CBT */ +#define IP_PROTO_EGP 8 /* exterior gateway protocol */ +#define IP_PROTO_IGP 9 /* interior gateway protocol */ +#define IP_PROTO_BBNRCC 10 /* BBN RCC monitoring */ +#define IP_PROTO_NVP 11 /* Network Voice Protocol */ +#define IP_PROTO_PUP 12 /* PARC universal packet */ +#define IP_PROTO_ARGUS 13 /* ARGUS */ +#define IP_PROTO_EMCON 14 /* EMCON */ +#define IP_PROTO_XNET 15 /* Cross Net Debugger */ +#define IP_PROTO_CHAOS 16 /* Chaos */ +#define IP_PROTO_UDP 17 /* UDP */ +#define IP_PROTO_MUX 18 /* multiplexing */ +#define IP_PROTO_DCNMEAS 19 /* DCN measurement */ +#define IP_PROTO_HMP 20 /* Host Monitoring Protocol */ +#define IP_PROTO_PRM 21 /* Packet Radio Measurement */ +#define IP_PROTO_IDP 22 /* Xerox NS IDP */ +#define IP_PROTO_TRUNK1 23 /* Trunk-1 */ +#define IP_PROTO_TRUNK2 24 /* Trunk-2 */ +#define IP_PROTO_LEAF1 25 /* Leaf-1 */ +#define IP_PROTO_LEAF2 26 /* Leaf-2 */ +#define IP_PROTO_RDP 27 /* "Reliable Datagram" proto */ +#define IP_PROTO_IRTP 28 /* Inet Reliable Transaction */ +#define IP_PROTO_TP 29 /* ISO TP class 4 */ +#define IP_PROTO_NETBLT 30 /* Bulk Data Transfer */ +#define IP_PROTO_MFPNSP 31 /* MFE Network Services */ +#define IP_PROTO_MERITINP 32 /* Merit Internodal Protocol */ +#define IP_PROTO_SEP 33 /* Sequential Exchange proto */ +#define IP_PROTO_3PC 34 /* Third Party Connect proto */ +#define IP_PROTO_IDPR 35 /* Interdomain Policy Route */ +#define IP_PROTO_XTP 36 /* Xpress Transfer Protocol */ +#define IP_PROTO_DDP 37 /* Datagram Delivery Proto */ +#define IP_PROTO_CMTP 38 /* IDPR Ctrl Message Trans */ +#define IP_PROTO_TPPP 39 /* TP++ Transport Protocol */ +#define IP_PROTO_IL 40 /* IL Transport Protocol */ +#define IP_PROTO_IPV6 41 /* IPv6 */ +#define IP_PROTO_SDRP 42 /* Source Demand Routing */ +#define IP_PROTO_ROUTING 43 /* IPv6 routing header */ +#define IP_PROTO_FRAGMENT 44 /* IPv6 fragmentation header */ +#define IP_PROTO_RSVP 46 /* Reservation protocol */ +#define IP_PROTO_GRE 47 /* General Routing Encap */ +#define IP_PROTO_MHRP 48 /* Mobile Host Routing */ +#define IP_PROTO_ENA 49 /* ENA */ +#define IP_PROTO_ESP 50 /* Encap Security Payload */ +#define IP_PROTO_AH 51 /* Authentication Header */ +#define IP_PROTO_INLSP 52 /* Integated Net Layer Sec */ +#define IP_PROTO_SWIPE 53 /* SWIPE */ +#define IP_PROTO_NARP 54 /* NBMA Address Resolution */ +#define IP_PROTO_MOBILE 55 /* Mobile IP, RFC 2004 */ +#define IP_PROTO_TLSP 56 /* Transport Layer Security */ +#define IP_PROTO_SKIP 57 /* SKIP */ +#define IP_PROTO_ICMPV6 58 /* ICMP for IPv6 */ +#define IP_PROTO_NONE 59 /* IPv6 no next header */ +#define IP_PROTO_DSTOPTS 60 /* IPv6 destination options */ +#define IP_PROTO_ANYHOST 61 /* any host internal proto */ +#define IP_PROTO_CFTP 62 /* CFTP */ +#define IP_PROTO_ANYNET 63 /* any local network */ +#define IP_PROTO_EXPAK 64 /* SATNET and Backroom EXPAK */ +#define IP_PROTO_KRYPTOLAN 65 /* Kryptolan */ +#define IP_PROTO_RVD 66 /* MIT Remote Virtual Disk */ +#define IP_PROTO_IPPC 67 /* Inet Pluribus Packet Core */ +#define IP_PROTO_DISTFS 68 /* any distributed fs */ +#define IP_PROTO_SATMON 69 /* SATNET Monitoring */ +#define IP_PROTO_VISA 70 /* VISA Protocol */ +#define IP_PROTO_IPCV 71 /* Inet Packet Core Utility */ +#define IP_PROTO_CPNX 72 /* Comp Proto Net Executive */ +#define IP_PROTO_CPHB 73 /* Comp Protocol Heart Beat */ +#define IP_PROTO_WSN 74 /* Wang Span Network */ +#define IP_PROTO_PVP 75 /* Packet Video Protocol */ +#define IP_PROTO_BRSATMON 76 /* Backroom SATNET Monitor */ +#define IP_PROTO_SUNND 77 /* SUN ND Protocol */ +#define IP_PROTO_WBMON 78 /* WIDEBAND Monitoring */ +#define IP_PROTO_WBEXPAK 79 /* WIDEBAND EXPAK */ +#define IP_PROTO_EON 80 /* ISO CNLP */ +#define IP_PROTO_VMTP 81 /* Versatile Msg Transport*/ +#define IP_PROTO_SVMTP 82 /* Secure VMTP */ +#define IP_PROTO_VINES 83 /* VINES */ +#define IP_PROTO_TTP 84 /* TTP */ +#define IP_PROTO_NSFIGP 85 /* NSFNET-IGP */ +#define IP_PROTO_DGP 86 /* Dissimilar Gateway Proto */ +#define IP_PROTO_TCF 87 /* TCF */ +#define IP_PROTO_EIGRP 88 /* EIGRP */ +#define IP_PROTO_OSPF 89 /* Open Shortest Path First */ +#define IP_PROTO_SPRITERPC 90 /* Sprite RPC Protocol */ +#define IP_PROTO_LARP 91 /* Locus Address Resolution */ +#define IP_PROTO_MTP 92 /* Multicast Transport Proto */ +#define IP_PROTO_AX25 93 /* AX.25 Frames */ +#define IP_PROTO_IPIPENCAP 94 /* yet-another IP encap */ +#define IP_PROTO_MICP 95 /* Mobile Internet Ctrl */ +#define IP_PROTO_SCCSP 96 /* Semaphore Comm Sec Proto */ +#define IP_PROTO_ETHERIP 97 /* Ethernet in IPv4 */ +#define IP_PROTO_ENCAP 98 /* encapsulation header */ +#define IP_PROTO_ANYENC 99 /* private encryption scheme */ +#define IP_PROTO_GMTP 100 /* GMTP */ +#define IP_PROTO_IFMP 101 /* Ipsilon Flow Mgmt Proto */ +#define IP_PROTO_PNNI 102 /* PNNI over IP */ +#define IP_PROTO_PIM 103 /* Protocol Indep Multicast */ +#define IP_PROTO_ARIS 104 /* ARIS */ +#define IP_PROTO_SCPS 105 /* SCPS */ +#define IP_PROTO_QNX 106 /* QNX */ +#define IP_PROTO_AN 107 /* Active Networks */ +#define IP_PROTO_IPCOMP 108 /* IP Payload Compression */ +#define IP_PROTO_SNP 109 /* Sitara Networks Protocol */ +#define IP_PROTO_COMPAQPEER 110 /* Compaq Peer Protocol */ +#define IP_PROTO_IPXIP 111 /* IPX in IP */ +#define IP_PROTO_VRRP 112 /* Virtual Router Redundancy */ +#define IP_PROTO_PGM 113 /* PGM Reliable Transport */ +#define IP_PROTO_ANY0HOP 114 /* 0-hop protocol */ +#define IP_PROTO_L2TP 115 /* Layer 2 Tunneling Proto */ +#define IP_PROTO_DDX 116 /* D-II Data Exchange (DDX) */ +#define IP_PROTO_IATP 117 /* Interactive Agent Xfer */ +#define IP_PROTO_STP 118 /* Schedule Transfer Proto */ +#define IP_PROTO_SRP 119 /* SpectraLink Radio Proto */ +#define IP_PROTO_UTI 120 /* UTI */ +#define IP_PROTO_SMP 121 /* Simple Message Protocol */ +#define IP_PROTO_SM 122 /* SM */ +#define IP_PROTO_PTP 123 /* Performance Transparency */ +#define IP_PROTO_ISIS 124 /* ISIS over IPv4 */ +#define IP_PROTO_FIRE 125 /* FIRE */ +#define IP_PROTO_CRTP 126 /* Combat Radio Transport */ +#define IP_PROTO_CRUDP 127 /* Combat Radio UDP */ +#define IP_PROTO_SSCOPMCE 128 /* SSCOPMCE */ +#define IP_PROTO_IPLT 129 /* IPLT */ +#define IP_PROTO_SPS 130 /* Secure Packet Shield */ +#define IP_PROTO_PIPE 131 /* Private IP Encap in IP */ +#define IP_PROTO_SCTP 132 /* Stream Ctrl Transmission */ +#define IP_PROTO_FC 133 /* Fibre Channel */ +#define IP_PROTO_RSVPIGN 134 /* RSVP-E2E-IGNORE */ +#define IP_PROTO_RAW 255 /* Raw IP packets */ +#define IP_PROTO_RESERVED IP_PROTO_RAW /* Reserved */ +#define IP_PROTO_MAX 255 + +/* + * Option types (opt_type) - http://www.iana.org/assignments/ip-parameters + */ +#define IP_OPT_CONTROL 0x00 /* control */ +#define IP_OPT_DEBMEAS 0x40 /* debugging & measurement */ +#define IP_OPT_COPY 0x80 /* copy into all fragments */ +#define IP_OPT_RESERVED1 0x20 +#define IP_OPT_RESERVED2 0x60 + +#define IP_OPT_EOL 0 /* end of option list */ +#define IP_OPT_NOP 1 /* no operation */ +#define IP_OPT_SEC (2|IP_OPT_COPY) /* DoD basic security */ +#define IP_OPT_LSRR (3|IP_OPT_COPY) /* loose source route */ +#define IP_OPT_TS (4|IP_OPT_DEBMEAS) /* timestamp */ +#define IP_OPT_ESEC (5|IP_OPT_COPY) /* DoD extended security */ +#define IP_OPT_CIPSO (6|IP_OPT_COPY) /* commercial security */ +#define IP_OPT_RR 7 /* record route */ +#define IP_OPT_SATID (8|IP_OPT_COPY) /* stream ID (obsolete) */ +#define IP_OPT_SSRR (9|IP_OPT_COPY) /* strict source route */ +#define IP_OPT_ZSU 10 /* experimental measurement */ +#define IP_OPT_MTUP 11 /* MTU probe */ +#define IP_OPT_MTUR 12 /* MTU reply */ +#define IP_OPT_FINN (13|IP_OPT_COPY|IP_OPT_DEBMEAS) /* exp flow control */ +#define IP_OPT_VISA (14|IP_OPT_COPY) /* exp access control */ +#define IP_OPT_ENCODE 15 /* ??? */ +#define IP_OPT_IMITD (16|IP_OPT_COPY) /* IMI traffic descriptor */ +#define IP_OPT_EIP (17|IP_OPT_COPY) /* extended IP, RFC 1385 */ +#define IP_OPT_TR (18|IP_OPT_DEBMEAS) /* traceroute */ +#define IP_OPT_ADDEXT (19|IP_OPT_COPY) /* IPv7 ext addr, RFC 1475 */ +#define IP_OPT_RTRALT (20|IP_OPT_COPY) /* router alert, RFC 2113 */ +#define IP_OPT_SDB (21|IP_OPT_COPY) /* directed bcast, RFC 1770 */ +#define IP_OPT_NSAPA (22|IP_OPT_COPY) /* NSAP addresses */ +#define IP_OPT_DPS (23|IP_OPT_COPY) /* dynamic packet state */ +#define IP_OPT_UMP (24|IP_OPT_COPY) /* upstream multicast */ +#define IP_OPT_MAX 25 + +#define IP_OPT_COPIED(o) ((o) & 0x80) +#define IP_OPT_CLASS(o) ((o) & 0x60) +#define IP_OPT_NUMBER(o) ((o) & 0x1f) +#define IP_OPT_TYPEONLY(o) ((o) == IP_OPT_EOL || (o) == IP_OPT_NOP) + +/* + * Security option data - RFC 791, 3.1 + */ +struct ip_opt_data_sec { + uint16_t s; /* security */ + uint16_t c; /* compartments */ + uint16_t h; /* handling restrictions */ + uint8_t tcc[3]; /* transmission control code */ +} __attribute__((__packed__)); + +#define IP_OPT_SEC_UNCLASS 0x0000 /* unclassified */ +#define IP_OPT_SEC_CONFID 0xf135 /* confidential */ +#define IP_OPT_SEC_EFTO 0x789a /* EFTO */ +#define IP_OPT_SEC_MMMM 0xbc4d /* MMMM */ +#define IP_OPT_SEC_PROG 0x5e26 /* PROG */ +#define IP_OPT_SEC_RESTR 0xaf13 /* restricted */ +#define IP_OPT_SEC_SECRET 0xd788 /* secret */ +#define IP_OPT_SEC_TOPSECRET 0x6bc5 /* top secret */ + +/* + * {Loose Source, Record, Strict Source} Route option data - RFC 791, 3.1 + */ +struct ip_opt_data_rr { + uint8_t ptr; /* from start of option, >= 4 */ + uint32_t iplist __flexarr; /* list of IP addresses */ +} __attribute__((__packed__)); + +/* + * Timestamp option data - RFC 791, 3.1 + */ +struct ip_opt_data_ts { + uint8_t ptr; /* from start of option, >= 5 */ +#if DNET_BYTESEX == DNET_BIG_ENDIAN + uint8_t oflw:4, /* number of IPs skipped */ + flg:4; /* address[ / timestamp] flag */ +#elif DNET_BYTESEX == DNET_LIL_ENDIAN + uint8_t flg:4, + oflw:4; +#endif + uint32_t ipts __flexarr; /* IP address [/ timestamp] pairs */ +} __attribute__((__packed__)); + +#define IP_OPT_TS_TSONLY 0 /* timestamps only */ +#define IP_OPT_TS_TSADDR 1 /* IP address / timestamp pairs */ +#define IP_OPT_TS_PRESPEC 3 /* IP address / zero timestamp pairs */ + +/* + * Traceroute option data - RFC 1393, 2.2 + */ +struct ip_opt_data_tr { + uint16_t id; /* ID number */ + uint16_t ohc; /* outbound hop count */ + uint16_t rhc; /* return hop count */ + uint32_t origip; /* originator IP address */ +} __attribute__((__packed__)); + +/* + * IP option (following IP header) + */ +struct ip_opt { + uint8_t opt_type; /* option type */ + uint8_t opt_len; /* option length >= IP_OPT_LEN */ + union ip_opt_data { + struct ip_opt_data_sec sec; /* IP_OPT_SEC */ + struct ip_opt_data_rr rr; /* IP_OPT_{L,S}RR */ + struct ip_opt_data_ts ts; /* IP_OPT_TS */ + uint16_t satid; /* IP_OPT_SATID */ + uint16_t mtu; /* IP_OPT_MTU{P,R} */ + struct ip_opt_data_tr tr; /* IP_OPT_TR */ + uint32_t addext[2]; /* IP_OPT_ADDEXT */ + uint16_t rtralt; /* IP_OPT_RTRALT */ + uint32_t sdb[9]; /* IP_OPT_SDB */ + uint8_t data8[IP_OPT_LEN_MAX - IP_OPT_LEN]; + } opt_data; +} __attribute__((__packed__)); + +#ifndef __GNUC__ +# pragma pack() +#endif + +/* + * Classful addressing + */ +#define IP_CLASSA(i) (((uint32_t)(i) & htonl(0x80000000)) == \ + htonl(0x00000000)) +#define IP_CLASSA_NET (htonl(0xff000000)) +#define IP_CLASSA_NSHIFT 24 +#define IP_CLASSA_HOST (htonl(0x00ffffff)) +#define IP_CLASSA_MAX 128 + +#define IP_CLASSB(i) (((uint32_t)(i) & htonl(0xc0000000)) == \ + htonl(0x80000000)) +#define IP_CLASSB_NET (htonl(0xffff0000)) +#define IP_CLASSB_NSHIFT 16 +#define IP_CLASSB_HOST (htonl(0x0000ffff)) +#define IP_CLASSB_MAX 65536 + +#define IP_CLASSC(i) (((uint32_t)(i) & htonl(0xe0000000)) == \ + htonl(0xc0000000)) +#define IP_CLASSC_NET (htonl(0xffffff00)) +#define IP_CLASSC_NSHIFT 8 +#define IP_CLASSC_HOST (htonl(0x000000ff)) + +#define IP_CLASSD(i) (((uint32_t)(i) & htonl(0xf0000000)) == \ + htonl(0xe0000000)) +/* These ones aren't really net and host fields, but routing needn't know. */ +#define IP_CLASSD_NET (htonl(0xf0000000)) +#define IP_CLASSD_NSHIFT 28 +#define IP_CLASSD_HOST (htonl(0x0fffffff)) +#define IP_MULTICAST(i) IP_CLASSD(i) + +#define IP_EXPERIMENTAL(i) (((uint32_t)(i) & htonl(0xf0000000)) == \ + htonl(0xf0000000)) +#define IP_BADCLASS(i) (((uint32_t)(i) & htonl(0xf0000000)) == \ + htonl(0xf0000000)) +#define IP_LOCAL_GROUP(i) (((uint32_t)(i) & htonl(0xffffff00)) == \ + htonl(0xe0000000)) +/* + * Reserved addresses + */ +#define IP_ADDR_ANY (htonl(0x00000000)) /* 0.0.0.0 */ +#define IP_ADDR_BROADCAST (htonl(0xffffffff)) /* 255.255.255.255 */ +#define IP_ADDR_LOOPBACK (htonl(0x7f000001)) /* 127.0.0.1 */ +#define IP_ADDR_MCAST_ALL (htonl(0xe0000001)) /* 224.0.0.1 */ +#define IP_ADDR_MCAST_LOCAL (htonl(0xe00000ff)) /* 224.0.0.225 */ + +#define ip_pack_hdr(hdr, tos, len, id, off, ttl, p, src, dst) do { \ + struct ip_hdr *ip_pack_p = (struct ip_hdr *)(hdr); \ + ip_pack_p->ip_v = 4; ip_pack_p->ip_hl = 5; \ + ip_pack_p->ip_tos = tos; ip_pack_p->ip_len = htons(len); \ + ip_pack_p->ip_id = htons(id); ip_pack_p->ip_off = htons(off); \ + ip_pack_p->ip_ttl = ttl; ip_pack_p->ip_p = p; \ + ip_pack_p->ip_src = src; ip_pack_p->ip_dst = dst; \ +} while (0) + +typedef struct ip_handle ip_t; + +__BEGIN_DECLS +ip_t *ip_open(void); +size_t ip_send(ip_t *i, const void *buf, size_t len); +ip_t *ip_close(ip_t *i); + +char *ip_ntop(const ip_addr_t *ip, char *dst, size_t len); +int ip_pton(const char *src, ip_addr_t *dst); +char *ip_ntoa(const ip_addr_t *ip); +#define ip_aton ip_pton + +size_t ip_add_option(void *buf, size_t len, + int proto, const void *optbuf, size_t optlen); +void ip_checksum(void *buf, size_t len); + +inline int +ip_cksum_add(const void *buf, size_t len, int cksum) +{ + uint16_t *sp = (uint16_t *)buf; + int n, sn; + + sn = len / 2; + n = (sn + 15) / 16; + + /* XXX - unroll loop using Duff's device. */ + switch (sn % 16) { + case 0: do { + cksum += *sp++; + case 15: + cksum += *sp++; + case 14: + cksum += *sp++; + case 13: + cksum += *sp++; + case 12: + cksum += *sp++; + case 11: + cksum += *sp++; + case 10: + cksum += *sp++; + case 9: + cksum += *sp++; + case 8: + cksum += *sp++; + case 7: + cksum += *sp++; + case 6: + cksum += *sp++; + case 5: + cksum += *sp++; + case 4: + cksum += *sp++; + case 3: + cksum += *sp++; + case 2: + cksum += *sp++; + case 1: + cksum += *sp++; + } while (--n > 0); + } + if (len & 1) + cksum += htons(*(u_char *)sp << 8); + + return (cksum); +} + +inline uint16_t +ip_cksum_carry(int x) +{ + x = (x >> 16) + (x & 0xffff); + return ~(x + (x >> 16)) & 0xffff; +} + +__END_DECLS + +#endif /* DNET_IP_H */ diff --git a/ext/dnet/dnet/ip6.h b/ext/dnet/dnet/ip6.h new file mode 100644 index 000000000..7fae29b47 --- /dev/null +++ b/ext/dnet/dnet/ip6.h @@ -0,0 +1,183 @@ +/* + * ip6.h + * + * Internet Protocol, Version 6 (RFC 2460). + * + * Copyright (c) 2002 Dug Song <dugsong@monkey.org> + * + * $Id: ip6.h,v 1.6 2004/02/23 10:01:15 dugsong Exp $ + */ + +#ifndef DNET_IP6_H +#define DNET_IP6_H + +#define IP6_ADDR_LEN 16 +#define IP6_ADDR_BITS 128 + +#define IP6_HDR_LEN 40 /* IPv6 header length */ +#define IP6_LEN_MIN IP6_HDR_LEN +#define IP6_LEN_MAX 65535 /* non-jumbo payload */ + +#define IP6_MTU_MIN 1280 /* minimum MTU (1024 + 256) */ + +typedef struct ip6_addr { + uint8_t data[IP6_ADDR_LEN]; +} ip6_addr_t; + +#ifndef __GNUC__ +# define __attribute__(x) +# pragma pack(1) +#endif + +/* + * IPv6 header + */ +struct ip6_hdr { + union { + struct ip6_hdr_ctl { + uint32_t ip6_un1_flow; /* 20 bits of flow ID */ + uint16_t ip6_un1_plen; /* payload length */ + uint8_t ip6_un1_nxt; /* next header */ + uint8_t ip6_un1_hlim; /* hop limit */ + } ip6_un1; + uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ + } ip6_ctlun; + ip6_addr_t ip6_src; + ip6_addr_t ip6_dst; +} __attribute__((__packed__)); + +#define ip6_vfc ip6_ctlun.ip6_un2_vfc +#define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow +#define ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen +#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt /* IP_PROTO_* */ +#define ip6_hlim ip6_ctlun.ip6_un1.ip6_un1_hlim + +#define IP6_VERSION 0x60 +#define IP6_VERSION_MASK 0xf0 /* ip6_vfc version */ + +#if DNET_BYTESEX == DNET_BIG_ENDIAN +#define IP6_FLOWINFO_MASK 0x0fffffff /* ip6_flow info (28 bits) */ +#define IP6_FLOWLABEL_MASK 0x000fffff /* ip6_flow label (20 bits) */ +#elif DNET_BYTESEX == DNET_LIL_ENDIAN +#define IP6_FLOWINFO_MASK 0xffffff0f /* ip6_flow info (28 bits) */ +#define IP6_FLOWLABEL_MASK 0xffff0f00 /* ip6_flow label (20 bits) */ +#endif + +/* + * Hop limit (ip6_hlim) + */ +#define IP6_HLIM_DEFAULT 64 +#define IP6_HLIM_MAX 255 + +/* + * Preferred extension header order from RFC 2460, 4.1: + * + * IP_PROTO_IPV6, IP_PROTO_HOPOPTS, IP_PROTO_DSTOPTS, IP_PROTO_ROUTING, + * IP_PROTO_FRAGMENT, IP_PROTO_AH, IP_PROTO_ESP, IP_PROTO_DSTOPTS, IP_PROTO_* + */ + +/* + * Routing header data (IP_PROTO_ROUTING) + */ +struct ip6_ext_data_routing { + uint8_t type; /* routing type */ + uint8_t segleft; /* segments left */ + /* followed by routing type specific data */ +} __attribute__((__packed__)); + +struct ip6_ext_data_routing0 { + uint8_t type; /* always zero */ + uint8_t segleft; /* segments left */ + uint8_t reserved; /* reserved field */ + uint8_t slmap[3]; /* strict/loose bit map */ + ip6_addr_t addr[1]; /* up to 23 addresses */ +} __attribute__((__packed__)); + +/* + * Fragment header data (IP_PROTO_FRAGMENT) + */ +struct ip6_ext_data_fragment { + uint16_t offlg; /* offset, reserved, and flag */ + uint32_t ident; /* identification */ +} __attribute__((__packed__)); + +/* + * Fragmentation offset, reserved, and flags (offlg) + */ +#if DNET_BYTESEX == DNET_BIG_ENDIAN +#define IP6_OFF_MASK 0xfff8 /* mask out offset from offlg */ +#define IP6_RESERVED_MASK 0x0006 /* reserved bits in offlg */ +#define IP6_MORE_FRAG 0x0001 /* more-fragments flag */ +#elif DNET_BYTESEX == DNET_LIL_ENDIAN +#define IP6_OFF_MASK 0xf8ff /* mask out offset from offlg */ +#define IP6_RESERVED_MASK 0x0600 /* reserved bits in offlg */ +#define IP6_MORE_FRAG 0x0100 /* more-fragments flag */ +#endif + +/* + * Option types, for IP_PROTO_HOPOPTS, IP_PROTO_DSTOPTS headers + */ +#define IP6_OPT_PAD1 0x00 /* 00 0 00000 */ +#define IP6_OPT_PADN 0x01 /* 00 0 00001 */ +#define IP6_OPT_JUMBO 0xC2 /* 11 0 00010 = 194 */ +#define IP6_OPT_JUMBO_LEN 6 +#define IP6_OPT_RTALERT 0x05 /* 00 0 00101 */ +#define IP6_OPT_RTALERT_LEN 4 +#define IP6_OPT_RTALERT_MLD 0 /* Datagram contains an MLD message */ +#define IP6_OPT_RTALERT_RSVP 1 /* Datagram contains an RSVP message */ +#define IP6_OPT_RTALERT_ACTNET 2 /* contains an Active Networks msg */ +#define IP6_OPT_LEN_MIN 2 + +#define IP6_OPT_TYPE(o) ((o) & 0xC0) /* high 2 bits of opt_type */ +#define IP6_OPT_TYPE_SKIP 0x00 /* continue processing on failure */ +#define IP6_OPT_TYPE_DISCARD 0x40 /* discard packet on failure */ +#define IP6_OPT_TYPE_FORCEICMP 0x80 /* discard and send ICMP on failure */ +#define IP6_OPT_TYPE_ICMP 0xC0 /* ...only if non-multicast dst */ + +#define IP6_OPT_MUTABLE 0x20 /* option data may change en route */ + +/* + * Extension header (chained via {ip6,ext}_nxt, following IPv6 header) + */ +struct ip6_ext_hdr { + uint8_t ext_nxt; /* next header */ + uint8_t ext_len; /* following length in units of 8 octets */ + union { + struct ip6_ext_data_routing routing; + struct ip6_ext_data_fragment fragment; + } ext_data; +} __attribute__((__packed__)); + +#ifndef __GNUC__ +# pragma pack() +#endif + +/* + * Reserved addresses + */ +#define IP6_ADDR_UNSPEC \ + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" +#define IP6_ADDR_LOOPBACK \ + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01" + +#define ip6_pack_hdr(hdr, fc, fl, plen, nxt, hlim, src, dst) do { \ + struct ip6_hdr *ip6 = (struct ip6_hdr *)(hdr); \ + ip6->ip6_flow = htonl(((uint32_t)(fc) << 28) & \ + (IP6_FLOWLABEL_MASK | (fl))); \ + ip6->ip6_vfc = (IP6_VERSION | ((fc) >> 4)); \ + ip6->ip6_plen = htons((plen)); \ + ip6->ip6_nxt = (nxt); ip6->ip6_hlim = (hlim); \ + memmove(&ip6->ip6_src, &(src), IP6_ADDR_LEN); \ + memmove(&ip6->ip6_dst, &(dst), IP6_ADDR_LEN); \ +} while (0); + +__BEGIN_DECLS +char *ip6_ntop(const ip6_addr_t *ip6, char *dst, size_t size); +int ip6_pton(const char *src, ip6_addr_t *dst); +char *ip6_ntoa(const ip6_addr_t *ip6); +#define ip6_aton ip6_pton + +void ip6_checksum(void *buf, size_t len); +__END_DECLS + +#endif /* DNET_IP6_H */ diff --git a/ext/dnet/dnet/os.h b/ext/dnet/dnet/os.h new file mode 100644 index 000000000..cae244781 --- /dev/null +++ b/ext/dnet/dnet/os.h @@ -0,0 +1,117 @@ +/* + * os.h + * + * Sleazy OS-specific defines. + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: os.h,v 1.10 2004/05/04 03:19:42 dugsong Exp $ + */ + +#ifndef DNET_OS_H +#define DNET_OS_H + +#ifdef _WIN32 +# include <windows.h> +# include <winsock2.h> +# include <stdint.h> +/* XXX */ +# undef IP_OPT_LSRR +# undef IP_OPT_TS +# undef IP_OPT_RR +# undef IP_OPT_SSRR +#else +# include <sys/param.h> +# include <sys/types.h> +# include <sys/socket.h> +# include <netinet/in.h> +# include <arpa/inet.h> +# include <netdb.h> +# ifdef __bsdi__ +# include <machine/types.h> + typedef u_int8_t uint8_t; + typedef u_int16_t uint16_t; + typedef u_int32_t uint32_t; + typedef u_int64_t uint64_t; +# else +# include <inttypes.h> +# endif +#endif + +#define DNET_LIL_ENDIAN 1234 +#define DNET_BIG_ENDIAN 4321 + +/* BSD and IRIX */ +#ifdef BYTE_ORDER +#if BYTE_ORDER == LITTLE_ENDIAN +# define DNET_BYTESEX DNET_LIL_ENDIAN +#elif BYTE_ORDER == BIG_ENDIAN +# define DNET_BYTESEX DNET_BIG_ENDIAN +#endif +#endif + +/* Linux */ +#ifdef __BYTE_ORDER +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define DNET_BYTESEX DNET_LIL_ENDIAN +#elif __BYTE_ORDER == __BIG_ENDIAN +# define DNET_BYTESEX DNET_BIG_ENDIAN +#endif +#endif + +/* Solaris */ +#if defined(_BIT_FIELDS_LTOH) +# define DNET_BYTESEX DNET_LIL_ENDIAN +#elif defined (_BIT_FIELDS_HTOL) +# define DNET_BYTESEX DNET_BIG_ENDIAN +#endif + +/* Nastiness from old BIND code. */ +#ifndef DNET_BYTESEX +# if defined(vax) || defined(ns32000) || defined(sun386) || defined(i386) || \ + defined(MIPSEL) || defined(_MIPSEL) || defined(BIT_ZERO_ON_RIGHT) || \ + defined(__alpha__) || defined(__alpha) +# define DNET_BYTESEX DNET_LIL_ENDIAN +# elif defined(sel) || defined(pyr) || defined(mc68000) || defined(sparc) || \ + defined(is68k) || defined(tahoe) || defined(ibm032) || defined(ibm370) || \ + defined(MIPSEB) || defined(_MIPSEB) || defined(_IBMR2) || defined(DGUX) ||\ + defined(apollo) || defined(__convex__) || defined(_CRAY) || \ + defined(__hppa) || defined(__hp9000) || \ + defined(__hp9000s300) || defined(__hp9000s700) || defined(__ia64) || \ + defined (BIT_ZERO_ON_LEFT) || defined(m68k) +# define DNET_BYTESEX DNET_BIG_ENDIAN +# else +# error "bytesex unknown" +# endif +#endif + +/* C++ support. */ +#undef __BEGIN_DECLS +#undef __END_DECLS +#ifdef __cplusplus +# define __BEGIN_DECLS extern "C" { +# define __END_DECLS } /* extern "C" */ +#else +# define __BEGIN_DECLS +# define __END_DECLS +#endif + +/* Support for flexible arrays. */ +#undef __flexarr +#if defined(__GNUC__) && ((__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)) +/* GCC 2.97 supports C99 flexible array members. */ +# define __flexarr [] +#else +# ifdef __GNUC__ +# define __flexarr [0] +# else +# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +# define __flexarr [] +# else +/* Some other non-C99 compiler. Approximate with [1]. */ +# define __flexarr [1] +# endif +# endif +#endif + +#endif /* DNET_OS_H */ diff --git a/ext/dnet/dnet/rand.h b/ext/dnet/dnet/rand.h new file mode 100644 index 000000000..49121930c --- /dev/null +++ b/ext/dnet/dnet/rand.h @@ -0,0 +1,33 @@ +/* + * rand.h + * + * Pseudo-random number generation, based on OpenBSD arc4random(). + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * Copyright (c) 1996 David Mazieres <dm@lcs.mit.edu> + * + * $Id: rand.h,v 1.4 2002/04/07 19:01:25 dugsong Exp $ + */ + +#ifndef DNET_RAND_H +#define DNET_RAND_H + +typedef struct rand_handle rand_t; + +__BEGIN_DECLS +rand_t *rand_open(void); + +int rand_get(rand_t *r, void *buf, size_t len); +int rand_set(rand_t *r, const void *seed, size_t len); +int rand_add(rand_t *r, const void *buf, size_t len); + +uint8_t rand_uint8(rand_t *r); +uint16_t rand_uint16(rand_t *r); +uint32_t rand_uint32(rand_t *r); + +int rand_shuffle(rand_t *r, void *base, size_t nmemb, size_t size); + +rand_t *rand_close(rand_t *r); +__END_DECLS + +#endif /* DNET_RAND_H */ diff --git a/ext/dnet/dnet/route.h b/ext/dnet/dnet/route.h new file mode 100644 index 000000000..74c21419c --- /dev/null +++ b/ext/dnet/dnet/route.h @@ -0,0 +1,35 @@ +/* + * route.c + * + * Kernel route table operations. + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: route.h,v 1.6 2002/02/04 04:02:22 dugsong Exp $ + */ + +#ifndef DNET_ROUTE_H +#define DNET_ROUTE_H + +/* + * Routing table entry + */ +struct route_entry { + struct addr route_dst; /* destination address */ + struct addr route_gw; /* gateway address */ +}; + +typedef struct route_handle route_t; + +typedef int (*route_handler)(const struct route_entry *entry, void *arg); + +__BEGIN_DECLS +route_t *route_open(void); +int route_add(route_t *r, const struct route_entry *entry); +int route_delete(route_t *r, const struct route_entry *entry); +int route_get(route_t *r, struct route_entry *entry); +int route_loop(route_t *r, route_handler callback, void *arg); +route_t *route_close(route_t *r); +__END_DECLS + +#endif /* DNET_ROUTE_H */ diff --git a/ext/dnet/dnet/tcp.h b/ext/dnet/dnet/tcp.h new file mode 100644 index 000000000..008946384 --- /dev/null +++ b/ext/dnet/dnet/tcp.h @@ -0,0 +1,158 @@ +/* + * tcp.h + * + * Transmission Control Protocol (RFC 793). + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: tcp.h,v 1.17 2004/02/23 10:02:11 dugsong Exp $ + */ + +#ifndef DNET_TCP_H +#define DNET_TCP_H + +#define TCP_HDR_LEN 20 /* base TCP header length */ +#define TCP_OPT_LEN 2 /* base TCP option length */ +#define TCP_OPT_LEN_MAX 40 +#define TCP_HDR_LEN_MAX (TCP_HDR_LEN + TCP_OPT_LEN_MAX) + +#ifndef __GNUC__ +# define __attribute__(x) +# pragma pack(1) +#endif + +/* + * TCP header, without options + */ +struct tcp_hdr { + uint16_t th_sport; /* source port */ + uint16_t th_dport; /* destination port */ + uint32_t th_seq; /* sequence number */ + uint32_t th_ack; /* acknowledgment number */ +#if DNET_BYTESEX == DNET_BIG_ENDIAN + uint8_t th_off:4, /* data offset */ + th_x2:4; /* (unused) */ +#elif DNET_BYTESEX == DNET_LIL_ENDIAN + uint8_t th_x2:4, + th_off:4; +#else +# error "need to include <dnet.h>" +#endif + uint8_t th_flags; /* control flags */ + uint16_t th_win; /* window */ + uint16_t th_sum; /* checksum */ + uint16_t th_urp; /* urgent pointer */ +}; + +/* + * TCP control flags (th_flags) + */ +#define TH_FIN 0x01 /* end of data */ +#define TH_SYN 0x02 /* synchronize sequence numbers */ +#define TH_RST 0x04 /* reset connection */ +#define TH_PUSH 0x08 /* push */ +#define TH_ACK 0x10 /* acknowledgment number set */ +#define TH_URG 0x20 /* urgent pointer set */ +#define TH_ECE 0x40 /* ECN echo, RFC 3168 */ +#define TH_CWR 0x80 /* congestion window reduced */ + +#define TCP_PORT_MAX 65535 /* maximum port */ +#define TCP_WIN_MAX 65535 /* maximum (unscaled) window */ + +/* + * Sequence number comparison macros + */ +#define TCP_SEQ_LT(a,b) ((int)((a)-(b)) < 0) +#define TCP_SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) +#define TCP_SEQ_GT(a,b) ((int)((a)-(b)) > 0) +#define TCP_SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * TCP FSM states + */ +#define TCP_STATE_CLOSED 0 /* closed */ +#define TCP_STATE_LISTEN 1 /* listening from connection */ +#define TCP_STATE_SYN_SENT 2 /* active, have sent SYN */ +#define TCP_STATE_SYN_RECEIVED 3 /* have sent and received SYN */ + +#define TCP_STATE_ESTABLISHED 4 /* established */ +#define TCP_STATE_CLOSE_WAIT 5 /* rcvd FIN, waiting for close */ + +#define TCP_STATE_FIN_WAIT_1 6 /* have closed, sent FIN */ +#define TCP_STATE_CLOSING 7 /* closed xchd FIN, await FIN-ACK */ +#define TCP_STATE_LAST_ACK 8 /* had FIN and close, await FIN-ACK */ + +#define TCP_STATE_FIN_WAIT_2 9 /* have closed, FIN is acked */ +#define TCP_STATE_TIME_WAIT 10 /* in 2*MSL quiet wait after close */ +#define TCP_STATE_MAX 11 + +/* + * Options (opt_type) - http://www.iana.org/assignments/tcp-parameters + */ +#define TCP_OPT_EOL 0 /* end of option list */ +#define TCP_OPT_NOP 1 /* no operation */ +#define TCP_OPT_MSS 2 /* maximum segment size */ +#define TCP_OPT_WSCALE 3 /* window scale factor, RFC 1072 */ +#define TCP_OPT_SACKOK 4 /* SACK permitted, RFC 2018 */ +#define TCP_OPT_SACK 5 /* SACK, RFC 2018 */ +#define TCP_OPT_ECHO 6 /* echo (obsolete), RFC 1072 */ +#define TCP_OPT_ECHOREPLY 7 /* echo reply (obsolete), RFC 1072 */ +#define TCP_OPT_TIMESTAMP 8 /* timestamp, RFC 1323 */ +#define TCP_OPT_POCONN 9 /* partial order conn, RFC 1693 */ +#define TCP_OPT_POSVC 10 /* partial order service, RFC 1693 */ +#define TCP_OPT_CC 11 /* connection count, RFC 1644 */ +#define TCP_OPT_CCNEW 12 /* CC.NEW, RFC 1644 */ +#define TCP_OPT_CCECHO 13 /* CC.ECHO, RFC 1644 */ +#define TCP_OPT_ALTSUM 14 /* alt checksum request, RFC 1146 */ +#define TCP_OPT_ALTSUMDATA 15 /* alt checksum data, RFC 1146 */ +#define TCP_OPT_SKEETER 16 /* Skeeter */ +#define TCP_OPT_BUBBA 17 /* Bubba */ +#define TCP_OPT_TRAILSUM 18 /* trailer checksum */ +#define TCP_OPT_MD5 19 /* MD5 signature, RFC 2385 */ +#define TCP_OPT_SCPS 20 /* SCPS capabilities */ +#define TCP_OPT_SNACK 21 /* selective negative acks */ +#define TCP_OPT_REC 22 /* record boundaries */ +#define TCP_OPT_CORRUPT 23 /* corruption experienced */ +#define TCP_OPT_SNAP 24 /* SNAP */ +#define TCP_OPT_TCPCOMP 26 /* TCP compression filter */ +#define TCP_OPT_MAX 27 + +#define TCP_OPT_TYPEONLY(type) \ + ((type) == TCP_OPT_EOL || (type) == TCP_OPT_NOP) + +/* + * TCP option (following TCP header) + */ +struct tcp_opt { + uint8_t opt_type; /* option type */ + uint8_t opt_len; /* option length >= TCP_OPT_LEN */ + union tcp_opt_data { + uint16_t mss; /* TCP_OPT_MSS */ + uint8_t wscale; /* TCP_OPT_WSCALE */ + uint16_t sack[19]; /* TCP_OPT_SACK */ + uint32_t echo; /* TCP_OPT_ECHO{REPLY} */ + uint32_t timestamp[2]; /* TCP_OPT_TIMESTAMP */ + uint32_t cc; /* TCP_OPT_CC{NEW,ECHO} */ + uint8_t cksum; /* TCP_OPT_ALTSUM */ + uint8_t md5[16]; /* TCP_OPT_MD5 */ + uint8_t data8[TCP_OPT_LEN_MAX - TCP_OPT_LEN]; + } opt_data; +} __attribute__((__packed__)); + +#ifndef __GNUC__ +# pragma pack() +#endif + +#define tcp_pack_hdr(hdr, sport, dport, seq, ack, flags, win, urp) do { \ + struct tcp_hdr *tcp_pack_p = (struct tcp_hdr *)(hdr); \ + tcp_pack_p->th_sport = htons(sport); \ + tcp_pack_p->th_dport = htons(dport); \ + tcp_pack_p->th_seq = htonl(seq); \ + tcp_pack_p->th_ack = htonl(ack); \ + tcp_pack_p->th_x2 = 0; tcp_pack_p->th_off = 5; \ + tcp_pack_p->th_flags = flags; \ + tcp_pack_p->th_win = htons(win); \ + tcp_pack_p->th_urp = htons(urp); \ +} while (0) + +#endif /* DNET_TCP_H */ diff --git a/ext/dnet/dnet/udp.h b/ext/dnet/dnet/udp.h new file mode 100644 index 000000000..73839a92a --- /dev/null +++ b/ext/dnet/dnet/udp.h @@ -0,0 +1,32 @@ +/* + * udp.h + * + * User Datagram Protocol (RFC 768). + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * $Id: udp.h,v 1.8 2002/04/02 05:05:39 dugsong Exp $ + */ + +#ifndef DNET_UDP_H +#define DNET_UDP_H + +#define UDP_HDR_LEN 8 + +struct udp_hdr { + uint16_t uh_sport; /* source port */ + uint16_t uh_dport; /* destination port */ + uint16_t uh_ulen; /* udp length (including header) */ + uint16_t uh_sum; /* udp checksum */ +}; + +#define UDP_PORT_MAX 65535 + +#define udp_pack_hdr(hdr, sport, dport, ulen) do { \ + struct udp_hdr *udp_pack_p = (struct udp_hdr *)(hdr); \ + udp_pack_p->uh_sport = htons(sport); \ + udp_pack_p->uh_dport = htons(dport); \ + udp_pack_p->uh_ulen = htons(ulen); \ +} while (0) + +#endif /* DNET_UDP_H */ diff --git a/ext/ply/CHANGES b/ext/ply/CHANGES new file mode 100644 index 000000000..9c7334066 --- /dev/null +++ b/ext/ply/CHANGES @@ -0,0 +1,158 @@ +Version 1.3 +------------------------------ +12/10/02: jmdyck + Various minor adjustments to the code that Dave checked in today. + Updated test/yacc_{inf,unused}.exp to reflect today's changes. + +12/10/02: beazley + Incorporated a variety of minor bug fixes to empty production + handling and infinite recursion checking. Contributed by + Michael Dyck. + +12/10/02: beazley + Removed bogus recover() method call in yacc.restart() + +Version 1.2 +------------------------------ +11/27/02: beazley + Lexer and parser objects are now available as an attribute + of tokens and slices respectively. For example: + + def t_NUMBER(t): + r'\d+' + print t.lexer + + def p_expr_plus(t): + 'expr: expr PLUS expr' + print t.lexer + print t.parser + + This can be used for state management (if needed). + +10/31/02: beazley + Modified yacc.py to work with Python optimize mode. To make + this work, you need to use + + yacc.yacc(optimize=1) + + Furthermore, you need to first run Python in normal mode + to generate the necessary parsetab.py files. After that, + you can use python -O or python -OO. + + Note: optimized mode turns off a lot of error checking. + Only use when you are sure that your grammar is working. + Make sure parsetab.py is up to date! + +10/30/02: beazley + Added cloning of Lexer objects. For example: + + import copy + l = lex.lex() + lc = copy.copy(l) + + l.input("Some text") + lc.input("Some other text") + ... + + This might be useful if the same "lexer" is meant to + be used in different contexts---or if multiple lexers + are running concurrently. + +10/30/02: beazley + Fixed subtle bug with first set computation and empty productions. + Patch submitted by Michael Dyck. + +10/30/02: beazley + Fixed error messages to use "filename:line: message" instead + of "filename:line. message". This makes error reporting more + friendly to emacs. Patch submitted by François Pinard. + +10/30/02: beazley + Improvements to parser.out file. Terminals and nonterminals + are sorted instead of being printed in random order. + Patch submitted by François Pinard. + +10/30/02: beazley + Improvements to parser.out file output. Rules are now printed + in a way that's easier to understand. Contributed by Russ Cox. + +10/30/02: beazley + Added 'nonassoc' associativity support. This can be used + to disable the chaining of operators like a < b < c. + To use, simply specify 'nonassoc' in the precedence table + + precedence = ( + ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator + ) + + Patch contributed by Russ Cox. + +10/30/02: beazley + Modified the lexer to provide optional support for Python -O and -OO + modes. To make this work, Python *first* needs to be run in + unoptimized mode. This reads the lexing information and creates a + file "lextab.py". Then, run lex like this: + + # module foo.py + ... + ... + lex.lex(optimize=1) + + Once the lextab file has been created, subsequent calls to + lex.lex() will read data from the lextab file instead of using + introspection. In optimized mode (-O, -OO) everything should + work normally despite the loss of doc strings. + + To change the name of the file 'lextab.py' use the following: + + lex.lex(lextab="footab") + + (this creates a file footab.py) + + +Version 1.1 October 25, 2001 +------------------------------ + +10/25/01: beazley + Modified the table generator to produce much more compact data. + This should greatly reduce the size of the parsetab.py[c] file. + Caveat: the tables still need to be constructed so a little more + work is done in parsetab on import. + +10/25/01: beazley + There may be a possible bug in the cycle detector that reports errors + about infinite recursion. I'm having a little trouble tracking it + down, but if you get this problem, you can disable the cycle + detector as follows: + + yacc.yacc(check_recursion = 0) + +10/25/01: beazley + Fixed a bug in lex.py that sometimes caused illegal characters to be + reported incorrectly. Reported by Sverre Jørgensen. + +7/8/01 : beazley + Added a reference to the underlying lexer object when tokens are handled by + functions. The lexer is available as the 'lexer' attribute. This + was added to provide better lexing support for languages such as Fortran + where certain types of tokens can't be conveniently expressed as regular + expressions (and where the tokenizing function may want to perform a + little backtracking). Suggested by Pearu Peterson. + +6/20/01 : beazley + Modified yacc() function so that an optional starting symbol can be specified. + For example: + + yacc.yacc(start="statement") + + Normally yacc always treats the first production rule as the starting symbol. + However, if you are debugging your grammar it may be useful to specify + an alternative starting symbol. Idea suggested by Rich Salz. + +Version 1.0 June 18, 2001 +-------------------------- +Initial public offering + diff --git a/ext/ply/COPYING b/ext/ply/COPYING new file mode 100644 index 000000000..b1e3f5a26 --- /dev/null +++ b/ext/ply/COPYING @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/ext/ply/README b/ext/ply/README new file mode 100644 index 000000000..35b458d4c --- /dev/null +++ b/ext/ply/README @@ -0,0 +1,249 @@ +PLY (Python Lex-Yacc) Version 1.2 (November 27, 2002) + +David M. Beazley +Department of Computer Science +University of Chicago +Chicago, IL 60637 +beazley@cs.uchicago.edu + +Copyright (C) 2001 David M. Beazley + +$Header: /home/stever/bk/newmem2/ext/ply/README 1.1 03/06/06 14:53:34-00:00 stever@ $ + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +See the file COPYING for a complete copy of the LGPL. + +Introduction +============ + +PLY is a 100% Python implementation of the common parsing tools lex +and yacc. Although several other parsing tools are available for +Python, there are several reasons why you might want to consider PLY: + + - The tools are very closely modeled after traditional lex/yacc. + If you know how to use these tools in C, you will find PLY + to be similar. + + - PLY provides *very* extensive error reporting and diagnostic + information to assist in parser construction. The original + implementation was developed for instructional purposes. As + a result, the system tries to identify the most common types + of errors made by novice users. + + - PLY provides full support for empty productions, error recovery, + precedence specifiers, and moderately ambiguous grammars. + + - Parsing is based on LR-parsing which is fast, memory efficient, + better suited to large grammars, and which has a number of nice + properties when dealing with syntax errors and other parsing problems. + Currently, PLY builds its parsing tables using the SLR algorithm which + is slightly weaker than LALR(1) used in traditional yacc. + + - Like John Aycock's excellent SPARK toolkit, PLY uses Python + reflection to build lexers and parsers. This greatly simplifies + the task of parser construction since it reduces the number of files + and eliminates the need to run a separate lex/yacc tool before + running your program. + + - PLY can be used to build parsers for "real" programming languages. + Although it is not ultra-fast due to its Python implementation, + PLY can be used to parse grammars consisting of several hundred + rules (as might be found for a language like C). The lexer and LR + parser are also reasonably efficient when parsing typically + sized programs. + +The original version of PLY was developed for an Introduction to +Compilers course where students used it to build a compiler for a +simple Pascal-like language. Their compiler had to include lexical +analysis, parsing, type checking, type inference, and generation of +assembly code for the SPARC processor. Because of this, the current +implementation has been extensively tested and debugged. In addition, +most of the API and error checking steps have been adapted to address +common usability problems. + +How to Use +========== + +PLY consists of two files : lex.py and yacc.py. To use the system, +simply copy these files to your project and import them like standard +Python modules. + +The file doc/ply.html contains complete documentation on how to use +the system. + +The example directory contains several different examples including a +PLY specification for ANSI C as given in K&R 2nd Ed. Note: To use +the examples, you will need to copy the lex.py and yacc.py files to +the example directory. + +A simple example is found at the end of this document + +Requirements +============ +PLY requires the use of Python 2.0 or greater. It should work on +just about any platform. + +Resources +========= + +More information about PLY can be obtained on the PLY webpage at: + + http://systems.cs.uchicago.edu/ply + +For a detailed overview of parsing theory, consult the excellent +book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and +Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown +may also be useful. + +Given that this is the first release, I welcome your comments on how +to improve the current implementation. See the TODO file for things that +still need to be done. + +Acknowledgments +=============== + +A special thanks is in order for all of the students in CS326 who +suffered through about 25 different versions of these tools :-). + +Example +======= + +Here is a simple example showing a PLY implementation of a calculator with variables. + +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +# Ignored characters +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + +# Precedence rules for the arithmetic operators +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names (for storing variables) +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) + + + + + + + + + + + + + + + + + diff --git a/ext/ply/TODO b/ext/ply/TODO new file mode 100644 index 000000000..b2978150d --- /dev/null +++ b/ext/ply/TODO @@ -0,0 +1,22 @@ +The PLY to-do list: + +$Header: /home/stever/bk/newmem2/ext/ply/TODO 1.1 03/06/06 14:53:34-00:00 stever@ $ + +1. Create a Python package using distutils + +2. More interesting parsing examples. + +3. Work on the ANSI C grammar so that it can actually parse C programs. To do this, + some extra code needs to be added to the lexer to deal with typedef names and enumeration + constants. + +4. Get LALR(1) to work. Hard, but not impossible. + +5. More tests in the test directory. + +6. Performance improvements and cleanup in yacc.py. + +7. More documentation. + +8. Lots and lots of cleanup. + diff --git a/ext/ply/doc/ply.html b/ext/ply/doc/ply.html new file mode 100644 index 000000000..2596066fe --- /dev/null +++ b/ext/ply/doc/ply.html @@ -0,0 +1,1642 @@ +<html> +<head> +<title>PLY (Python Lex-Yacc)</title> +</head> +<body bgcolor="#ffffff"> + +<h1>PLY (Python Lex-Yacc)</h1> + +<b> +David M. Beazley <br> +Department of Computer Science <br> +University of Chicago <br> +Chicago, IL 60637 <br> +beazley@cs.uchicago.edu <br> +</b> + +<p> +Documentation version: $Header: /home/stever/bk/newmem2/ext/ply/doc/ply.html 1.1 03/06/06 14:53:34-00:00 stever@ $ + +<h2>Introduction</h2> + +PLY is a Python-only implementation of the popular compiler +construction tools lex and yacc. The implementation borrows ideas +from a number of previous efforts; most notably John Aycock's SPARK +toolkit. However, the overall flavor of the implementation is more +closely modeled after the C version of lex and yacc. The other +significant feature of PLY is that it provides extensive input +validation and error reporting--much more so than other Python parsing +tools. + +<p> +Early versions of PLY were developed to support the Introduction to +Compilers Course at the University of Chicago. In this course, +students built a fully functional compiler for a simple Pascal-like +language. Their compiler, implemented entirely in Python, had to +include lexical analysis, parsing, type checking, type inference, +nested scoping, and code generation for the SPARC processor. +Approximately 30 different compiler implementations were completed in +this course. Most of PLY's interface and operation has been motivated by common +usability problems encountered by students. + +<p> +Because PLY was primarily developed as an instructional tool, you will +find it to be <em>MUCH</em> more picky about token and grammar rule +specification than most other Python parsing tools. In part, this +added formality is meant to catch common programming mistakes made by +novice users. However, advanced users will also find such features to +be useful when building complicated grammars for real programming +languages. It should also be noted that PLY does not provide much in the way +of bells and whistles (e.g., automatic construction of abstract syntax trees, +tree traversal, etc.). Instead, you will find a bare-bones, yet +fully capable lex/yacc implementation written entirely in Python. + +<p> +The rest of this document assumes that you are somewhat familar with +parsing theory, syntax directed translation, and automatic tools such +as lex and yacc. If you are unfamilar with these topics, you will +probably want to consult an introductory text such as "Compilers: +Principles, Techniques, and Tools", by Aho, Sethi, and Ullman. "Lex +and Yacc" by John Levine may also be handy. + +<h2>PLY Overview</h2> + +PLY consists of two separate tools; <tt>lex.py</tt> and +<tt>yacc.py</tt>. <tt>lex.py</tt> is used to break input text into a +collection of tokens specified by a collection of regular expression +rules. <tt>yacc.py</tt> is used to recognize language syntax that has +been specified in the form of a context free grammar. Currently, +<tt>yacc.py</tt> uses LR parsing and generates its parsing tables +using the SLR algorithm. LALR(1) parsing may be supported in a future +release. + +<p> +The two tools are meant to work together. Specifically, +<tt>lex.py</tt> provides an external interface in the form of a +<tt>token()</tt> function that returns the next valid token on the +input stream. <tt>yacc.py</tt> calls this repeatedly to retrieve +tokens and invoke grammar rules. The output of <tt>yacc.py</tt> is +often an Abstract Syntax Tree (AST). However, this is entirely up to +the user. If desired, <tt>yacc.py</tt> can also be used to implement +simple one-pass compilers. + +<p> +Like its Unix counterpart, <tt>yacc.py</tt> provides most of the +features you expect including extensive error checking, grammar +validation, support for empty productions, error tokens, and ambiguity +resolution via precedence rules. The primary difference between +<tt>yacc.py</tt> and <tt>yacc</tt> is the use of SLR parsing instead +of LALR(1). Although this slightly restricts the types of grammars +than can be successfully parsed, it is sufficiently powerful to handle most +kinds of normal programming language constructs. + +<p> +Finally, it is important to note that PLY relies on reflection +(introspection) to build its lexers and parsers. Unlike traditional +lex/yacc which require a special input file that is converted into a +separate source file, the specifications given to PLY <em>are</em> +valid Python programs. This means that there are no extra source +files nor is there a special compiler construction step (e.g., running +yacc to generate Python code for the compiler). + +<h2>Lex Example</h2> + +<tt>lex.py</tt> is used to write tokenizers. To do this, each token +must be defined by a regular expression rule. The following file +implements a very simple lexer for tokenizing simple integer expressions: + +<blockquote> +<pre> +# ------------------------------------------------------------ +# calclex.py +# +# tokenizer for a simple expression evaluator for +# numbers and +,-,*,/ +# ------------------------------------------------------------ +import lex + +# List of token names. This is always required +tokens = ( + 'NUMBER', + 'PLUS', + 'MINUS', + 'TIMES', + 'DIVIDE', + 'LPAREN', + 'RPAREN', +) + +# Regular expression rules for simple tokens +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_LPAREN = r'\(' +t_RPAREN = r'\)' + +# A regular expression rule with some action code +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Line %d: Number %s is too large!" % (t.lineno,t.value) + t.value = 0 + return t + +# Define a rule so we can track line numbers +def t_newline(t): + r'\n+' + t.lineno += len(t.value) + +# A string containing ignored characters (spaces and tabs) +t_ignore = ' \t' + +# Error handling rule +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +lex.lex() + +# Test it out +data = ''' +3 + 4 * 10 + + -20 *2 +''' + +# Give the lexer some input +lex.input(data) + +# Tokenize +while 1: + tok = lex.token() + if not tok: break # No more input + print tok +</pre> +</blockquote> + +In the example, the <tt>tokens</tt> list defines all of the possible +token names that can be produced by the lexer. This list is always required +and is used to perform a variety of validation checks. Following the <tt>tokens</tt> +list, regular expressions are written for each token. Each of these +rules are defined by making declarations with a special prefix <tt>t_</tt> to indicate that it +defines a token. For simple tokens, the regular expression can +be specified as strings such as this (note: Python raw strings are used since they are the +most convenient way to write regular expression strings): + +<blockquote> +<pre> +t_PLUS = r'\+' +</pre> +</blockquote> + +In this case, the name following the <tt>t_</tt> must exactly match one of the +names supplied in <tt>tokens</tt>. If some kind of action needs to be performed, +a token rule can be specified as a function. For example: + +<blockquote> +<pre> +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Number %s is too large!" % t.value + t.value = 0 + return t +</pre> +</blockquote> + +In this case, the regular expression rule is specified in the function documentation string. +The function always takes a single argument which is an instance of +<tt>LexToken</tt>. This object has attributes of <tt>t.type</tt> which is the token type, +<tt>t.value</tt> which is the lexeme, and <tt>t.lineno</tt> which is the current line number. +By default, <tt>t.type</tt> is set to the name following the <tt>t_</tt> prefix. The action +function can modify the contents of the <tt>LexToken</tt> object as appropriate. However, +when it is done, the resulting token should be returned. If no value is returned by the action +function, the token is simply discarded and the next token read. + +<p> +The rule <tt>t_newline()</tt> illustrates a regular expression rule +for a discarded token. In this case, a rule is written to match +newlines so that proper line number tracking can be performed. +By returning no value, the function causes the newline character to be +discarded. + +<p> +The special <tt>t_ignore</tt> rule is reserved by <tt>lex.py</tt> for characters +that should be completely ignored in the input stream. +Usually this is used to skip over whitespace and other non-essential characters. +Although it is possible to define a regular expression rule for whitespace in a manner +similar to <tt>t_newline()</tt>, the use of <tt>t_ignore</tt> provides substantially better +lexing performance because it is handled as a special case and is checked in a much +more efficient manner than the normal regular expression rules. + +<p> +Finally, the <tt>t_error()</tt> +function is used to handle lexing errors that occur when illegal +characters are detected. In this case, the <tt>t.value</tt> attribute contains the +rest of the input string that has not been tokenized. In the example, we simply print +the offending character and skip ahead one character by calling <tt>t.skip(1)</tt>. + +<p> +To build the lexer, the function <tt>lex.lex()</tt> is used. This function +uses Python reflection (or introspection) to read the the regular expression rules +out of the calling context and build the lexer. Once the lexer has been built, two functions can +be used to control the lexer. + +<ul> +<li><tt>lex.input(data)</tt>. Reset the lexer and store a new input string. +<li><tt>lex.token()</tt>. Return the next token. Returns a special <tt>LexToken</tt> instance on success or +None if the end of the input text has been reached. +</ul> + +The code at the bottom of the example shows how the lexer is actually used. When executed, +the following output will be produced: + +<blockquote> +<pre> +$ python example.py +LexToken(NUMBER,3,2) +LexToken(PLUS,'+',2) +LexToken(NUMBER,4,2) +LexToken(TIMES,'*',2) +LexToken(NUMBER,10,2) +LexToken(PLUS,'+',3) +LexToken(MINUS,'-',3) +LexToken(NUMBER,20,3) +LexToken(TIMES,'*',3) +LexToken(NUMBER,2,3) +</pre> +</blockquote> + +<h2>Lex Implementation Notes</h2> + +<ul> +<li><tt>lex.py</tt> uses the <tt>re</tt> module to do its patten matching. When building the master regular expression, +rules are added in the following order: +<p> +<ol> +<li>All tokens defined by functions are added in the same order as they appear in the lexer file. +<li>Tokens defined by strings are added by sorting them in order of decreasing regular expression length (longer expressions +are added first). +</ol> +<p> +Without this ordering, it can be difficult to correctly match certain types of tokens. For example, if you +wanted to have separate tokens for "=" and "==", you need to make sure that "==" is checked first. By sorting regular +expressions in order of decreasing length, this problem is solved for rules defined as strings. For functions, +the order can be explicitly controlled since rules appearing first are checked first. + +<P> +<li>The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this +rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data +such as open files or sockets. This limitation is primarily a side-effect of using the <tt>re</tt> module. + +<p> +<li> +To handle reserved words, it is usually easier to just match an identifier and do a special name lookup in a function +like this: + +<blockquote> +<pre> +reserved = { + 'if' : 'IF', + 'then' : 'THEN', + 'else' : 'ELSE', + 'while' : 'WHILE', + ... +} + +def t_ID(t): + r'[a-zA-Z_][a-zA-Z_0-9]*' + t.type = reserved.get(t.value,'ID') # Check for reserved words + return t +</pre> +</blockquote> + +<p> +<li>The lexer requires tokens to be defined as class instances with <tt>t.type</tt>, <tt>t.value</tt>, and <tt>t.lineno</tt> +attributes. By default, tokens are created as instances of the <tt>LexToken</tt> class defined internally to <tt>lex.py</tt>. +If desired, you can create new kinds of tokens provided that they have the three required attributes. However, +in practice, it is probably safer to stick with the default. + +<p> +<li>The only safe attribute for assigning token properties is <tt>t.value</tt>. In some cases, you may want to attach +a number of different properties to a token (e.g., symbol table entries for identifiers). To do this, replace <tt>t.value</tt> +with a tuple or class instance. For example: + +<blockquote> +<pre> +def t_ID(t): + ... + # For identifiers, create a (lexeme, symtab) tuple + t.value = (t.value, symbol_lookup(t.value)) + ... + return t +</pre> +</blockquote> + +Although allowed, do NOT assign additional attributes to the token object. For example, +<blockquote> +<pre> +def t_ID(t): + ... + # Bad implementation of above + t.symtab = symbol_lookup(t.value) + ... +</pre> +</blockquote> + +The reason you don't want to do this is that the <tt>yacc.py</tt> +module only provides public access to the <tt>t.value</tt> attribute of each token. +Therefore, any other attributes you assign are inaccessible (if you are familiar +with the internals of C lex/yacc, <tt>t.value</tt> is the same as <tt>yylval.tok</tt>). + +<p> +<li>To track line numbers, the lexer internally maintains a line +number variable. Each token automatically gets the value of the +current line number in the <tt>t.lineno</tt> attribute. To modify the +current line number, simply change the <tt>t.lineno</tt> attribute +in a function rule (as previously shown for +<tt>t_newline()</tt>). Even if the resulting token is discarded, +changes to the line number remain in effect for subsequent tokens. + +<p> +<li>To support multiple scanners in the same application, the <tt>lex.lex()</tt> function +actually returns a special <tt>Lexer</tt> object. This object has two methods +<tt>input()</tt> and <tt>token()</tt> that can be used to supply input and get tokens. For example: + +<blockquote> +<pre> +lexer = lex.lex() +lexer.input(sometext) +while 1: + tok = lexer.token() + if not tok: break + print tok +</pre> +</blockquote> + +The functions <tt>lex.input()</tt> and <tt>lex.token()</tt> are bound to the <tt>input()</tt> +and <tt>token()</tt> methods of the last lexer created by the lex module. + + +<p> +<li>To reduce compiler startup time and improve performance, the lexer can be built in optimized mode as follows: + +<blockquote> +<pre> +lex.lex(optimize=1) +</pre> +</blockquote> + +When used, most error checking and validation is disabled. This provides a slight performance +gain while tokenizing and tends to chop a few tenths of a second off startup time. Since it disables +error checking, this mode is not the default and is not recommended during development. However, once +you have your compiler fully working, it is usually safe to disable the error checks. + +<p> +<li>You can enable some additional debugging by building the lexer like this: + +<blockquote> +<pre> +lex.lex(debug=1) +</pre> +</blockquote> + +<p> +<li>To help you debug your lexer, <tt>lex.py</tt> comes with a simple main program which will either +tokenize input read from standard input or from a file. To use it, simply put this in your lexer: + +<blockquote> +<pre> +if __name__ == '__main__': + lex.runmain() +</pre> +</blockquote> + +Then, run you lexer as a main program such as <tt>python mylex.py</tt> + +<p> +<li>Since the lexer is written entirely in Python, its performance is +largely determined by that of the Python <tt>re</tt> module. Although +the lexer has been written to be as efficient as possible, it's not +blazingly fast when used on very large input files. Sorry. If +performance is concern, you might consider upgrading to the most +recent version of Python, creating a hand-written lexer, or offloading +the lexer into a C extension module. In defense of <tt>lex.py</tt>, +it's performance is not <em>that</em> bad when used on reasonably +sized input files. For instance, lexing a 4700 line C program with +32000 input tokens takes about 20 seconds on a 200 Mhz PC. Obviously, +it will run much faster on a more speedy machine. + +</ul> + +<h2>Parsing basics</h2> + +<tt>yacc.py</tt> is used to parse language syntax. Before showing an +example, there are a few important bits of background that must be +mentioned. First, <tt>syntax</tt> is usually specified in terms of a +context free grammar (CFG). For example, if you wanted to parse +simple arithmetic expressions, you might first write an unambiguous +grammar specification like this: + +<blockquote> +<pre> +expression : expression + term + | expression - term + | term + +term : term * factor + | term / factor + | factor + +factor : NUMBER + | ( expression ) +</pre> +</blockquote> + +Next, the semantic behavior of a language is often specified using a +technique known as syntax directed translation. In syntax directed +translation, attributes are attached to each symbol in a given grammar +rule along with an action. Whenever a particular grammar rule is +recognized, the action describes what to do. For example, given the +expression grammar above, you might write the specification for a +simple calculator like this: + +<blockquote> +<pre> +Grammar Action +-------------------------------- -------------------------------------------- +expression0 : expression1 + term expression0.val = expression1.val + term.val + | expression1 - term expression0.val = expression1.val - term.val + | term expression0.val = term.val + +term0 : term1 * factor term0.val = term1.val * factor.val + | term1 / factor term0.val = term1.val / factor.val + | factor term0.val = factor.val + +factor : NUMBER factor.val = int(NUMBER.lexval) + | ( expression ) factor.val = expression.val +</pre> +</blockquote> + +Finally, Yacc uses a parsing technique known as LR-parsing or shift-reduce parsing. LR parsing is a +bottom up technique that tries to recognize the right-hand-side of various grammar rules. +Whenever a valid right-hand-side is found in the input, the appropriate action code is triggered and the +grammar symbols are replaced by the grammar symbol on the left-hand-side. + +<p> +LR parsing is commonly implemented by shifting grammar symbols onto a stack and looking at the stack and the next +input token for patterns. The details of the algorithm can be found in a compiler text, but the +following example illustrates the steps that are performed if you wanted to parse the expression +<tt>3 + 5 * (10 - 20)</tt> using the grammar defined above: + +<blockquote> +<pre> +Step Symbol Stack Input Tokens Action +---- --------------------- --------------------- ------------------------------- +1 $ 3 + 5 * ( 10 - 20 )$ Shift 3 +2 $ 3 + 5 * ( 10 - 20 )$ Reduce factor : NUMBER +3 $ factor + 5 * ( 10 - 20 )$ Reduce term : factor +4 $ term + 5 * ( 10 - 20 )$ Reduce expr : term +5 $ expr + 5 * ( 10 - 20 )$ Shift + +6 $ expr + 5 * ( 10 - 20 )$ Shift 5 +7 $ expr + 5 * ( 10 - 20 )$ Reduce factor : NUMBER +8 $ expr + factor * ( 10 - 20 )$ Reduce term : factor +9 $ expr + term * ( 10 - 20 )$ Shift * +10 $ expr + term * ( 10 - 20 )$ Shift ( +11 $ expr + term * ( 10 - 20 )$ Shift 10 +12 $ expr + term * ( 10 - 20 )$ Reduce factor : NUMBER +13 $ expr + term * ( factor - 20 )$ Reduce term : factor +14 $ expr + term * ( term - 20 )$ Reduce expr : term +15 $ expr + term * ( expr - 20 )$ Shift - +16 $ expr + term * ( expr - 20 )$ Shift 20 +17 $ expr + term * ( expr - 20 )$ Reduce factor : NUMBER +18 $ expr + term * ( expr - factor )$ Reduce term : factor +19 $ expr + term * ( expr - term )$ Reduce expr : expr - term +20 $ expr + term * ( expr )$ Shift ) +21 $ expr + term * ( expr ) $ Reduce factor : (expr) +22 $ expr + term * factor $ Reduce term : term * factor +23 $ expr + term $ Reduce expr : expr + term +24 $ expr $ Reduce expr +25 $ $ Success! +</pre> +</blockquote> + +When parsing the expression, an underlying state machine and the current input token determine what to do next. +If the next token looks like part of a valid grammar rule (based on other items on the stack), it is generally shifted +onto the stack. If the top of the stack contains a valid right-hand-side of a grammar rule, it is +usually "reduced" and the symbols replaced with the symbol on the left-hand-side. When this reduction occurs, the +appropriate action is triggered (if defined). If the input token can't be shifted and the top of stack doesn't match +any grammar rules, a syntax error has occurred and the parser must take some kind of recovery step (or bail out). + +<p> +It is important to note that the underlying implementation is actually built around a large finite-state machine +and some tables. The construction of these tables is quite complicated and beyond the scope of this discussion. +However, subtle details of this process explain why, in the example above, the parser chooses to shift a token +onto the stack in step 9 rather than reducing the rule <tt>expr : expr + term</tt>. + +<h2>Yacc example</h2> + +Suppose you wanted to make a grammar for simple arithmetic expressions as previously described. Here is +how you would do it with <tt>yacc.py</tt>: + +<blockquote> +<pre> +# Yacc example + +import yacc + +# Get the token map from the lexer. This is required. +from calclex import tokens + +def p_expression_plus(t): + 'expression : expression PLUS term' + t[0] = t[1] + t[3] + +def p_expression_minus(t): + 'expression : expression MINUS term' + t[0] = t[1] - t[3] + +def p_expression_term(t): + 'expression : term' + t[0] = t[1] + +def p_term_times(t): + 'term : term TIMES factor' + t[0] = t[1] * t[3] + +def p_term_div(t): + 'term : term DIVIDE factor' + t[0] = t[1] / t[3] + +def p_term_factor(t): + 'term : factor' + t[0] = t[1] + +def p_factor_num(t): + 'factor : NUMBER' + t[0] = t[1] + +def p_factor_expr(t): + 'factor : LPAREN expression RPAREN' + t[0] = t[2] + +# Error rule for syntax errors +def p_error(t): + print "Syntax error in input!" + +# Build the parser +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: continue + result = yacc.parse(s) + print result +</pre> +</blockquote> + +In this example, each grammar rule is defined by a Python function where the docstring to that function contains the +appropriate context-free grammar specification (an idea borrowed from John Aycock's SPARK toolkit). Each function accepts a single +argument <tt>t</tt> that is a sequence containing the values of each grammar symbol in the corresponding rule. The values of +<tt>t[i]</tt> are mapped to grammar symbols as shown here: + +<blockquote> +<pre> +def p_expression_plus(t): + 'expression : expression PLUS term' + # ^ ^ ^ ^ + # t[0] t[1] t[2] t[3] + + t[0] = t[1] + t[3] +</pre> +</blockquote> + +For tokens, the "value" in the corresponding <tt>t[i]</tt> is the +<em>same</em> as the value of the <tt>t.value</tt> attribute assigned +in the lexer module. For non-terminals, the value is determined by +whatever is placed in <tt>t[0]</tt> when rules are reduced. This +value can be anything at all. However, it probably most common for +the value to be a simple Python type, a tuple, or an instance. In this example, we +are relying on the fact that the <tt>NUMBER</tt> token stores an integer value in its value +field. All of the other rules simply perform various types of integer operations and store +the result. + +<p> +The first rule defined in the yacc specification determines the starting grammar +symbol (in this case, a rule for <tt>expression</tt> appears first). Whenever +the starting rule is reduced by the parser and no more input is available, parsing +stops and the final value is returned (this value will be whatever the top-most rule +placed in <tt>t[0]</tt>). + +<p>The <tt>p_error(t)</tt> rule is defined to catch syntax errors. See the error handling section +below for more detail. + +<p> +To build the parser, call the <tt>yacc.yacc()</tt> function. This function +looks at the module and attempts to construct all of the LR parsing tables for the grammar +you have specified. The first time <tt>yacc.yacc()</tt> is invoked, you will get a message +such as this: + +<blockquote> +<pre> +$ python calcparse.py +yacc: Generating SLR parsing table... +calc > +</pre> +</blockquote> + +Since table construction is relatively expensive (especially for large +grammars), the resulting parsing table is written to the current +directory in a file called <tt>parsetab.py</tt>. In addition, a +debugging file called <tt>parser.out</tt> is created. On subsequent +executions, <tt>yacc</tt> will reload the table from +<tt>parsetab.py</tt> unless it has detected a change in the underlying +grammar (in which case the tables and <tt>parsetab.py</tt> file are +regenerated). + +<p> +If any errors are detected in your grammar specification, <tt>yacc.py</tt> will produce +diagnostic messages and possibly raise an exception. Some of the errors that can be detected include: + +<ul> +<li>Duplicated function names (if more than one rule function have the same name in the grammar file). +<li>Shift/reduce and reduce/reduce conflicts generated by ambiguous grammars. +<li>Badly specified grammar rules. +<li>Infinite recursion (rules that can never terminate). +<li>Unused rules and tokens +<li>Undefined rules and tokens +</ul> + +The next few sections now discuss a few finer points of grammar construction. + +<h2>Combining Grammar Rule Functions</h2> + +When grammar rules are similar, they can be combined into a single function. +For example, consider the two rules in our earlier example: + +<blockquote> +<pre> +def p_expression_plus(t): + 'expression : expression PLUS term' + t[0] = t[1] + t[3] + +def p_expression_minus(t): + 'expression : expression MINUS term' + t[0] = t[1] - t[3] +</pre> +</blockquote> + +Instead of writing two functions, you might write a single function like this: + +<blockquote> +<pre> +def p_expression(t): + '''expression : expression PLUS term + | expression MINUS term''' + if t[2] == '+': + t[0] = t[1] + t[3] + elif t[2] == '-': + t[0] = t[1] - t[3] +</pre> +</blockquote> + +In general, the doc string for any given function can contain multiple grammar rules. So, it would +have also been legal (although possibly confusing) to write this: + +<blockquote> +<pre> +def p_binary_operators(t): + '''expression : expression PLUS term + | expression MINUS term + term : term TIMES factor + | term DIVIDE factor''' + if t[2] == '+': + t[0] = t[1] + t[3] + elif t[2] == '-': + t[0] = t[1] - t[3] + elif t[2] == '*': + t[0] = t[1] * t[3] + elif t[2] == '/': + t[0] = t[1] / t[3] +</pre> +</blockquote> + +When combining grammar rules into a single function, it is usually a good idea for all of the rules to have +a similar structure (e.g., the same number of terms). Otherwise, the corresponding action code may be more +complicated than necessary. + +<h2>Empty Productions</h2> + +<tt>yacc.py</tt> can handle empty productions by defining a rule like this: + +<blockquote> +<pre> +def p_empty(t): + 'empty :' + pass +</pre> +</blockquote> + +Now to use the empty production, simply use 'empty' as a symbol. For example: + +<blockquote> +<pre> +def p_optitem(t): + 'optitem : item' + ' | empty' + ... +</pre> +</blockquote> + +<h2>Dealing With Ambiguous Grammars</h2> + +The expression grammar given in the earlier example has been written in a special format to eliminate ambiguity. +However, in many situations, it is extremely difficult or awkward to write grammars in this format. A +much more natural way to express the grammar is in a more compact form like this: + +<blockquote> +<pre> +expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | LPAREN expression RPAREN + | NUMBER +</pre> +</blockquote> + +Unfortunately, this grammar specification is ambiguous. For example, if you are parsing the string +"3 * 4 + 5", there is no way to tell how the operators are supposed to be grouped. +For example, does this expression mean "(3 * 4) + 5" or is it "3 * (4+5)"? + +<p> +When an ambiguous grammar is given to <tt>yacc.py</tt> it will print messages about "shift/reduce conflicts" +or a "reduce/reduce conflicts". A shift/reduce conflict is caused when the parser generator can't decide +whether or not to reduce a rule or shift a symbol on the parsing stack. For example, consider +the string "3 * 4 + 5" and the internal parsing stack: + +<blockquote> +<pre> +Step Symbol Stack Input Tokens Action +---- --------------------- --------------------- ------------------------------- +1 $ 3 * 4 + 5$ Shift 3 +2 $ 3 * 4 + 5$ Reduce : expression : NUMBER +3 $ expr * 4 + 5$ Shift * +4 $ expr * 4 + 5$ Shift 4 +5 $ expr * 4 + 5$ Reduce: expression : NUMBER +6 $ expr * expr + 5$ SHIFT/REDUCE CONFLICT ???? +</pre> +</blockquote> + +In this case, when the parser reaches step 6, it has two options. One is the reduce the +rule <tt>expr : expr * expr</tt> on the stack. The other option is to shift the +token <tt>+</tt> on the stack. Both options are perfectly legal from the rules +of the context-free-grammar. + +<p> +By default, all shift/reduce conflicts are resolved in favor of shifting. Therefore, in the above +example, the parser will always shift the <tt>+</tt> instead of reducing. Although this +strategy works in many cases (including the ambiguous if-then-else), it is not enough for arithmetic +expressions. In fact, in the above example, the decision to shift <tt>+</tt> is completely wrong---we should have +reduced <tt>expr * expr</tt> since multiplication has higher precedence than addition. + +<p>To resolve ambiguity, especially in expression grammars, <tt>yacc.py</tt> allows individual +tokens to be assigned a precedence level and associativity. This is done by adding a variable +<tt>precedence</tt> to the grammar file like this: + +<blockquote> +<pre> +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), +) +</pre> +</blockquote> + +This declaration specifies that <tt>PLUS</tt>/<tt>MINUS</tt> have +the same precedence level and are left-associative and that +<tt>TIMES</tt>/<tt>DIVIDE</tt> have the same precedence and are left-associative. +Furthermore, the declaration specifies that <tt>TIMES</tt>/<tt>DIVIDE</tt> have higher +precedence than <tt>PLUS</tt>/<tt>MINUS</tt> (since they appear later in the +precedence specification). + +<p> +The precedence specification is used to attach a numerical precedence value and associativity direction +to each grammar rule. This is always determined by the precedence of the right-most terminal symbol. Therefore, +if PLUS/MINUS had a precedence of 1 and TIMES/DIVIDE had a precedence of 2, the grammar rules +would have precedence values as follows: + +<blockquote> +<pre> +expression : expression PLUS expression # prec = 1, left + | expression MINUS expression # prec = 1, left + | expression TIMES expression # prec = 2, left + | expression DIVIDE expression # prec = 2, left + | LPAREN expression RPAREN # prec = unknown + | NUMBER # prec = unknown +</pre> +</blockquote> + +When shift/reduce conflicts are encountered, the parser generator resolves the conflict by +looking at the precedence rules and associativity specifiers. + +<p> +<ol> +<li>If the current token has higher precedence, it is shifted. +<li>If the grammar rule on the stack has higher precedence, the rule is reduced. +<li>If the current token and the grammar rule have the same precedence, the +rule is reduced for left associativity, whereas the token is shifted for right associativity. +<li>If nothing is known about the precedence, shift/reduce conflicts are resolved in +favor of shifting (the default). +</ol> + +<p> +When shift/reduce conflicts are resolved using the first three techniques (with the help of +precedence rules), <tt>yacc.py</tt> will report no errors or conflicts in the grammar. + +<p> +One problem with the precedence specifier technique is that it is sometimes necessary to +change the precedence of an operator in certain contents. For example, consider a unary-minus operator +in "3 + 4 * -5". Normally, unary minus has a very high precedence--being evaluated before the multiply. +However, in our precedence specifier, MINUS has a lower precedence than TIMES. To deal with this, +precedence rules can be given for fictitious tokens like this: + +<blockquote> +<pre> +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator +) +</pre> +</blockquote> + +Now, in the grammar file, we can write our unary minus rule like this: + +<blockquote> +<pre> +def p_expr_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] +</pre> +</blockquote> + +In this case, <tt>%prec UMINUS</tt> overrides the default rule precedence--setting it to that +of UMINUS in the precedence specifier. + +<p> +It is also possible to specify non-associativity in the <tt>precedence</tt> table. This would +be used when you <em>don't</em> want operations to chain together. For example, suppose +you wanted to support a comparison operators like <tt><</tt> and <tt>></tt> but you didn't want to allow +combinations like <tt>a < b < c</tt>. To do this, simply specify a rule like this: + +<blockquote> +<pre> +precedence = ( + ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator +) +</pre> +</blockquote> + +<p> +Reduce/reduce conflicts are caused when there are multiple grammar +rules that can be applied to a given set of symbols. This kind of +conflict is almost always bad and is always resolved by picking the +rule that appears first in the grammar file. Reduce/reduce conflicts +are almost always caused when different sets of grammar rules somehow +generate the same set of symbols. For example: + +<blockquote> +<pre> +assignment : ID EQUALS NUMBER + | ID EQUALS expression + +expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | LPAREN expression RPAREN + | NUMBER +</pre> +</blockquote> + +In this case, a reduce/reduce conflict exists between these two rules: + +<blockquote> +<pre> +assignment : ID EQUALS NUMBER +expression : NUMBER +</pre> +</blockquote> + +For example, if you wrote "a = 5", the parser can't figure out if this +is supposed to reduced as <tt>assignment : ID EQUALS NUMBER</tt> or +whether it's supposed to reduce the 5 as an expression and then reduce +the rule <tt>assignment : ID EQUALS expression</tt>. + +<h2>The parser.out file</h2> + +Tracking down shift/reduce and reduce/reduce conflicts is one of the finer pleasures of using an LR +parsing algorithm. To assist in debugging, <tt>yacc.py</tt> creates a debugging file called +'parser.out' when it generates the parsing table. The contents of this file look like the following: + +<blockquote> +<pre> +Unused terminals: + + +Grammar + +Rule 1 expression -> expression PLUS expression +Rule 2 expression -> expression MINUS expression +Rule 3 expression -> expression TIMES expression +Rule 4 expression -> expression DIVIDE expression +Rule 5 expression -> NUMBER +Rule 6 expression -> LPAREN expression RPAREN + +Terminals, with rules where they appear + +TIMES : 3 +error : +MINUS : 2 +RPAREN : 6 +LPAREN : 6 +DIVIDE : 4 +PLUS : 1 +NUMBER : 5 + +Nonterminals, with rules where they appear + +expression : 1 1 2 2 3 3 4 4 6 0 + + +Parsing method: SLR + + +state 0 + + S' -> . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 1 + + S' -> expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + PLUS shift and go to state 6 + MINUS shift and go to state 5 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + +state 2 + + expression -> LPAREN . expression RPAREN + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 3 + + expression -> NUMBER . + + $ reduce using rule 5 + PLUS reduce using rule 5 + MINUS reduce using rule 5 + TIMES reduce using rule 5 + DIVIDE reduce using rule 5 + RPAREN reduce using rule 5 + + +state 4 + + expression -> expression TIMES . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 5 + + expression -> expression MINUS . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 6 + + expression -> expression PLUS . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 7 + + expression -> expression DIVIDE . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 8 + + expression -> LPAREN expression . RPAREN + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + RPAREN shift and go to state 13 + PLUS shift and go to state 6 + MINUS shift and go to state 5 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + +state 9 + + expression -> expression TIMES expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 3 + PLUS reduce using rule 3 + MINUS reduce using rule 3 + TIMES reduce using rule 3 + DIVIDE reduce using rule 3 + RPAREN reduce using rule 3 + + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + ! TIMES [ shift and go to state 4 ] + ! DIVIDE [ shift and go to state 7 ] + +state 10 + + expression -> expression MINUS expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 2 + PLUS reduce using rule 2 + MINUS reduce using rule 2 + RPAREN reduce using rule 2 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + ! TIMES [ reduce using rule 2 ] + ! DIVIDE [ reduce using rule 2 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + +state 11 + + expression -> expression PLUS expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 1 + PLUS reduce using rule 1 + MINUS reduce using rule 1 + RPAREN reduce using rule 1 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + ! TIMES [ reduce using rule 1 ] + ! DIVIDE [ reduce using rule 1 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + +state 12 + + expression -> expression DIVIDE expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 4 + PLUS reduce using rule 4 + MINUS reduce using rule 4 + TIMES reduce using rule 4 + DIVIDE reduce using rule 4 + RPAREN reduce using rule 4 + + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + ! TIMES [ shift and go to state 4 ] + ! DIVIDE [ shift and go to state 7 ] + +state 13 + + expression -> LPAREN expression RPAREN . + + $ reduce using rule 6 + PLUS reduce using rule 6 + MINUS reduce using rule 6 + TIMES reduce using rule 6 + DIVIDE reduce using rule 6 + RPAREN reduce using rule 6 +</pre> +</blockquote> + +In the file, each state of the grammar is described. Within each state the "." indicates the current +location of the parse within any applicable grammar rules. In addition, the actions for each valid +input token are listed. When a shift/reduce or reduce/reduce conflict arises, rules <em>not</em> selected +are prefixed with an !. For example: + +<blockquote> +<pre> + ! TIMES [ reduce using rule 2 ] + ! DIVIDE [ reduce using rule 2 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] +</pre> +</blockquote> + +By looking at these rules (and with a little practice), you can usually track down the source +of most parsing conflicts. It should also be stressed that not all shift-reduce conflicts are +bad. However, the only way to be sure that they are resolved correctly is to look at <tt>parser.out</tt>. + +<h2>Syntax Error Handling</h2> + +When a syntax error occurs during parsing, the error is immediately +detected (i.e., the parser does not read any more tokens beyond the +source of the error). Error recovery in LR parsers is a delicate +topic that involves ancient rituals and black-magic. The recovery mechanism +provided by <tt>yacc.py</tt> is comparable to Unix yacc so you may want +consult a book like O'Reilly's "Lex and Yacc" for some of the finer details. + +<p> +When a syntax error occurs, <tt>yacc.py</tt> performs the following steps: + +<ol> +<li>On the first occurrence of an error, the user-defined <tt>p_error()</tt> function +is called with the offending token as an argument. Afterwards, the parser enters +an "error-recovery" mode in which it will not make future calls to <tt>p_error()</tt> until it +has successfully shifted at least 3 tokens onto the parsing stack. + +<p> +<li>If no recovery action is taken in <tt>p_error()</tt>, the offending lookahead token is replaced +with a special <tt>error</tt> token. + +<p> +<li>If the offending lookahead token is already set to <tt>error</tt>, the top item of the parsing stack is +deleted. + +<p> +<li>If the entire parsing stack is unwound, the parser enters a restart state and attempts to start +parsing from its initial state. + +<p> +<li>If a grammar rule accepts <tt>error</tt> as a token, it will be +shifted onto the parsing stack. + +<p> +<li>If the top item of the parsing stack is <tt>error</tt>, lookahead tokens will be discarded until the +parser can successfully shift a new symbol or reduce a rule involving <tt>error</tt>. +</ol> + +<h4>Recovery and resynchronization with error rules</h4> + +The most well-behaved approach for handling syntax errors is to write grammar rules that include the <tt>error</tt> +token. For example, suppose your language had a grammar rule for a print statement like this: + +<blockquote> +<pre> +def p_statement_print(t): + 'statement : PRINT expr SEMI' + ... +</pre> +</blockquote> + +To account for the possibility of a bad expression, you might write an additional grammar rule like this: + +<blockquote> +<pre> +def p_statement_print_error(t): + 'statement : PRINT error SEMI' + print "Syntax error in print statement. Bad expression" + +</pre> +</blockquote> + +In this case, the <tt>error</tt> token will match any sequence of +tokens that might appear up to the first semicolon that is +encountered. Once the semicolon is reached, the rule will be +invoked and the <tt>error</tt> token will go away. + +<p> +This type of recovery is sometimes known as parser resynchronization. +The <tt>error</tt> token acts as a wildcard for any bad input text and +the token immediately following <tt>error</tt> acts as a +synchronization token. + +<p> +It is important to note that the <tt>error</tt> token usually does not appear as the last token +on the right in an error rule. For example: + +<blockquote> +<pre> +def p_statement_print_error(t): + 'statement : PRINT error' + print "Syntax error in print statement. Bad expression" +</pre> +</blockquote> + +This is because the first bad token encountered will cause the rule to +be reduced--which may make it difficult to recover if more bad tokens +immediately follow. + +<h4>Panic mode recovery</h4> + +An alternative error recovery scheme is to enter a panic mode recovery in which tokens are +discarded to a point where the parser might be able to recover in some sensible manner. + +<p> +Panic mode recovery is implemented entirely in the <tt>p_error()</tt> function. For example, this +function starts discarding tokens until it reaches a closing '}'. Then, it restarts the +parser in its initial state. + +<blockquote> +<pre> +def p_error(t): + print "Whoa. You are seriously hosed." + # Read ahead looking for a closing '}' + while 1: + tok = yacc.token() # Get the next token + if not tok or tok.type == 'RBRACE': break + yacc.restart() +</pre> +</blockquote> + +<p> +This function simply discards the bad token and tells the parser that the error was ok. + +<blockquote> +<pre> +def p_error(t): + print "Syntax error at token", t.type + # Just discard the token and tell the parser it's okay. + yacc.errok() +</pre> +</blockquote> + +<P> +Within the <tt>p_error()</tt> function, three functions are available to control the behavior +of the parser: +<p> +<ul> +<li><tt>yacc.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery +mode. This will prevent an <tt>error</tt> token from being generated and will reset the internal +error counters so that the next syntax error will call <tt>p_error()</tt> again. + +<p> +<li><tt>yacc.token()</tt>. This returns the next token on the input stream. + +<p> +<li><tt>yacc.restart()</tt>. This discards the entire parsing stack and resets the parser +to its initial state. +</ul> + +Note: these functions are only available when invoking <tt>p_error()</tt> and are not available +at any other time. + +<p> +To supply the next lookahead token to the parser, <tt>p_error()</tt> can return a token. This might be +useful if trying to synchronize on special characters. For example: + +<blockquote> +<pre> +def p_error(t): + # Read ahead looking for a terminating ";" + while 1: + tok = yacc.token() # Get the next token + if not tok or tok.type == 'SEMI': break + yacc.errok() + + # Return SEMI to the parser as the next lookahead token + return tok +</pre> +</blockquote> + +<h4>General comments on error handling</h4> + +For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable +technique. This is because you can instrument the grammar to catch errors at selected places where it is relatively easy +to recover and continue parsing. Panic mode recovery is really only useful in certain specialized applications where you might want +to discard huge portions of the input text to find a valid restart point. + +<h2>Line Number Tracking</h2> + +<tt>yacc.py</tt> automatically tracks line numbers for all of the grammar symbols and tokens it processes. To retrieve the line +numbers, two functions are used in grammar rules: + +<ul> +<li><tt>t.lineno(num)</tt>. Return the starting line number for symbol <em>num</em> +<li><tt>t.linespan(num)</tt>. Return a tuple (startline,endline) with the starting and ending line number for symbol <em>num</em>. +</ul> + +For example: + +<blockquote> +<pre> +def t_expression(t): + 'expression : expression PLUS expression' + t.lineno(1) # Line number of the left expression + t.lineno(2) # line number of the PLUS operator + t.lineno(3) # line number of the right expression + ... + start,end = t.linespan(3) # Start,end lines of the right expression + +</pre> +</blockquote> + +Since line numbers are managed internally by the parser, there is usually no need to modify the line +numbers. However, if you want to save the line numbers in a parse-tree node, you will need to make your own +private copy. + +<h2>AST Construction</h2> + +<tt>yacc.py</tt> provides no special functions for constructing an abstract syntax tree. However, such +construction is easy enough to do on your own. Simply create a data structure for abstract syntax tree nodes +and assign nodes to <tt>t[0]</tt> in each rule. + +For example: + +<blockquote> +<pre> +class Expr: pass + +class BinOp(Expr): + def __init__(self,left,op,right): + self.type = "binop" + self.left = left + self.right = right + self.op = op + +class Number(Expr): + def __init__(self,value): + self.type = "number" + self.value = value + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + + t[0] = BinOp(t[1],t[2],t[3]) + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = Number(t[1]) +</pre> +</blockquote> + +To simplify tree traversal, it may make sense to pick a very generic tree structure for your parse tree nodes. +For example: + +<blockquote> +<pre> +class Node: + def __init__(self,type,children=None,leaf=None): + self.type = type + if children: + self.children = children + else: + self.children = [ ] + self.leaf = leaf + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + + t[0] = Node("binop", [t[1],t[3]], t[2]) +</pre> +</blockquote> + +<h2>Yacc implementation notes</h2> + +<ul> +<li>By default, <tt>yacc.py</tt> relies on <tt>lex.py</tt> for tokenizing. However, an alternative tokenizer +can be supplied as follows: + +<blockquote> +<pre> +yacc.parse(lexer=x) +</pre> +</blockquote> +in this case, <tt>x</tt> must be a Lexer object that minimally has a <tt>x.token()</tt> method for retrieving the next +token. If an input string is given to <tt>yacc.parse()</tt>, the lexer must also have an <tt>x.input()</tt> method. + +<p> +<li>By default, the yacc generates tables in debugging mode (which produces the parser.out file and other output). +To disable this, use + +<blockquote> +<pre> +yacc.yacc(debug=0) +</pre> +</blockquote> + +<p> +<li>To change the name of the <tt>parsetab.py</tt> file, use: + +<blockquote> +<pre> +yacc.yacc(tabmodule="foo") +</pre> +</blockquote> + +<P> +<li>To print copious amounts of debugging during parsing, use: + +<blockquote> +<pre> +yacc.parse(debug=1) +</pre> +</blockquote> + +<p> +<li>The <tt>yacc.yacc()</tt> function really returns a parser object. If you want to support multiple +parsers in the same application, do this: + +<blockquote> +<pre> +p = yacc.yacc() +... +p.parse() +</pre> +</blockquote> + +Note: The function <tt>yacc.parse()</tt> is bound to the last parser that was generated. + +<p> +<li>Since the generation of the SLR tables is relatively expensive, previously generated tables are +cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5 +checksum of all grammar rules and precedence rules. Only in the event of a mismatch are the tables regenerated. + +<p> +It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules +and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow +machine. Please be patient. + +<p> +<li>Since LR parsing is mostly driven by tables, the performance of the parser is largely independent of the +size of the grammar. The biggest bottlenecks will be the lexer and the complexity of your grammar rules. +</ul> + +<h2>Parser and Lexer State Management</h2> + +In advanced parsing applications, you may want to have multiple +parsers and lexers. Furthermore, the parser may want to control the +behavior of the lexer in some way. + +<p> +To do this, it is important to note that both the lexer and parser are +actually implemented as objects. These objects are returned by the +<tt>lex()</tt> and <tt>yacc()</tt> functions respectively. For example: + +<blockquote> +<pre> +lexer = lex.lex() # Return lexer object +parser = yacc.yacc() # Return parser object +</pre> +</blockquote> + +Within lexer and parser rules, these objects are also available. In the lexer, +the "lexer" attribute of a token refers to the lexer object in use. For example: + +<blockquote> +<pre> +def t_NUMBER(t): + r'\d+' + ... + print t.lexer # Show lexer object +</pre> +</blockquote> + +In the parser, the "lexer" and "parser" attributes refer to the lexer +and parser objects respectively. + +<blockquote> +<pre> +def p_expr_plus(t): + 'expr : expr PLUS expr' + ... + print t.parser # Show parser object + print t.lexer # Show lexer object +</pre> +</blockquote> + +If necessary, arbitrary attributes can be attached to the lexer or parser object. +For example, if you wanted to have different parsing modes, you could attach a mode +attribute to the parser object and look at it later. + +<h2>Using Python's Optimized Mode</h2> + +Because PLY uses information from doc-strings, parsing and lexing +information must be gathered while running the Python interpreter in +normal mode (i.e., not with the -O or -OO options). However, if you +specify optimized mode like this: + +<blockquote> +<pre> +lex.lex(optimize=1) +yacc.yacc(optimize=1) +</pre> +</blockquote> + +then PLY can later be used when Python runs in optimized mode. To make this work, +make sure you first run Python in normal mode. Once the lexing and parsing tables +have been generated the first time, run Python in optimized mode. PLY will use +the tables without the need for doc strings. + +<p> +Beware: running PLY in optimized mode disables a lot of error +checking. You should only do this when your project has stabilized +and you don't need to do any debugging. + +<h2>Where to go from here?</h2> + +The <tt>examples</tt> directory of the PLY distribution contains several simple examples. Please consult a +compilers textbook for the theory and underlying implementation details or LR parsing. + +</body> +</html> + + + + + + + diff --git a/ext/ply/example/ansic/README b/ext/ply/example/ansic/README new file mode 100644 index 000000000..e049d3b4e --- /dev/null +++ b/ext/ply/example/ansic/README @@ -0,0 +1,2 @@ +This example is incomplete. Was going to specify an ANSI C parser. +This is part of it. diff --git a/ext/ply/example/ansic/clex.py b/ext/ply/example/ansic/clex.py new file mode 100644 index 000000000..afd995208 --- /dev/null +++ b/ext/ply/example/ansic/clex.py @@ -0,0 +1,161 @@ +# ---------------------------------------------------------------------- +# clex.py +# +# A lexer for ANSI C. +# ---------------------------------------------------------------------- + +import lex + +# Reserved words +reserved = ( + 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', + 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', + 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', + 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', + ) + +tokens = reserved + ( + # Literals (identifier, integer constant, float constant, string constant, char const) + 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', + + # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', + 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', + 'LOR', 'LAND', 'LNOT', + 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', + + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) + 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', + 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', + + # Increment/decrement (++,--) + 'PLUSPLUS', 'MINUSMINUS', + + # Structure dereference (->) + 'ARROW', + + # Conditional operator (?) + 'CONDOP', + + # Delimeters ( ) [ ] { } , . ; : + 'LPAREN', 'RPAREN', + 'LBRACKET', 'RBRACKET', + 'LBRACE', 'RBRACE', + 'COMMA', 'PERIOD', 'SEMI', 'COLON', + + # Ellipsis (...) + 'ELLIPSIS', + ) + +# Completely ignored characters +t_ignore = ' \t\x0c' + +# Newlines +def t_NEWLINE(t): + r'\n+' + t.lineno += t.value.count("\n") + +# Operators +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_MOD = r'%' +t_OR = r'\|' +t_AND = r'&' +t_NOT = r'~' +t_XOR = r'^' +t_LSHIFT = r'<<' +t_RSHIFT = r'>>' +t_LOR = r'\|\|' +t_LAND = r'&&' +t_LNOT = r'!' +t_LT = r'<' +t_GT = r'>' +t_LE = r'<=' +t_GE = r'>=' +t_EQ = r'==' +t_NE = r'!=' + +# Assignment operators + +t_EQUALS = r'=' +t_TIMESEQUAL = r'\*=' +t_DIVEQUAL = r'/=' +t_MODEQUAL = r'%=' +t_PLUSEQUAL = r'\+=' +t_MINUSEQUAL = r'-=' +t_LSHIFTEQUAL = r'<<=' +t_RSHIFTEQUAL = r'>>=' +t_ANDEQUAL = r'&=' +t_OREQUAL = r'\|=' +t_XOREQUAL = r'^=' + +# Increment/decrement +t_PLUSPLUS = r'\+\+' +t_MINUSMINUS = r'--' + +# -> +t_ARROW = r'->' + +# ? +t_CONDOP = r'\?' + +# Delimeters +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LBRACE = r'\{' +t_RBRACE = r'\}' +t_COMMA = r',' +t_PERIOD = r'\.' +t_SEMI = r';' +t_COLON = r':' +t_ELLIPSIS = r'\.\.\.' + +# Identifiers and reserved words + +reserved_map = { } +for r in reserved: + reserved_map[r.lower()] = r + +def t_ID(t): + r'[A-Za-z_][\w_]*' + t.type = reserved_map.get(t.value,"ID") + return t + +# Integer literal +t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' + +# Floating literal +t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' + +# String literal +t_SCONST = r'\"([^\\\n]|(\\.))*?\"' + +# Character constant 'c' or L'c' +t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' + +# Comments +def t_comment(t): + r' /\*(.|\n)*?\*/' + t.lineno += t.value.count('\n') + +# Preprocessor directive (ignored) +def t_preprocessor(t): + r'\#(.)*?\n' + t.lineno += 1 + +def t_error(t): + print "Illegal character %s" % repr(t.value[0]) + t.skip(1) + +lexer = lex.lex(optimize=1) +if __name__ == "__main__": + lex.runmain(lexer) + + + + + diff --git a/ext/ply/example/ansic/cparse.py b/ext/ply/example/ansic/cparse.py new file mode 100644 index 000000000..ddfd5c72b --- /dev/null +++ b/ext/ply/example/ansic/cparse.py @@ -0,0 +1,859 @@ +# ----------------------------------------------------------------------------- +# cparse.py +# +# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed. +# ----------------------------------------------------------------------------- + +import yacc +import clex + +# Get the token map +tokens = clex.tokens + +# translation-unit: + +def p_translation_unit_1(t): + 'translation_unit : external_declaration' + pass + +def p_translation_unit_2(t): + 'translation_unit : translation_unit external_declaration' + pass + +# external-declaration: + +def p_external_declaration_1(t): + 'external_declaration : function_definition' + pass + +def p_external_declaration_2(t): + 'external_declaration : declaration' + pass + +# function-definition: + +def p_function_definition_1(t): + 'function_definition : declaration_specifiers declarator declaration_list compound_statement' + pass + +def p_function_definition_2(t): + 'function_definition : declarator declaration_list compound_statement' + pass + +def p_function_definition_3(t): + 'function_definition : declarator compound_statement' + pass + +def p_function_definition_4(t): + 'function_definition : declaration_specifiers declarator compound_statement' + pass + +# declaration: + +def p_declaration_1(t): + 'declaration : declaration_specifiers init_declarator_list SEMI' + pass + +def p_declaration_2(t): + 'declaration : declaration_specifiers SEMI' + pass + +# declaration-list: + +def p_declaration_list_1(t): + 'declaration_list : declaration' + pass + +def p_declaration_list_2(t): + 'declaration_list : declaration_list declaration ' + pass + +# declaration-specifiers +def p_declaration_specifiers_1(t): + 'declaration_specifiers : storage_class_specifier declaration_specifiers' + pass + +def p_declaration_specifiers_2(t): + 'declaration_specifiers : type_specifier declaration_specifiers' + pass + +def p_declaration_specifiers_3(t): + 'declaration_specifiers : type_qualifier declaration_specifiers' + pass + +def p_declaration_specifiers_4(t): + 'declaration_specifiers : storage_class_specifier' + pass + +def p_declaration_specifiers_5(t): + 'declaration_specifiers : type_specifier' + pass + +def p_declaration_specifiers_6(t): + 'declaration_specifiers : type_qualifier' + pass + +# storage-class-specifier +def p_storage_class_specifier(t): + '''storage_class_specifier : AUTO + | REGISTER + | STATIC + | EXTERN + | TYPEDEF + ''' + pass + +# type-specifier: +def p_type_specifier(t): + '''type_specifier : VOID + | CHAR + | SHORT + | INT + | LONG + | FLOAT + | DOUBLE + | SIGNED + | UNSIGNED + | struct_or_union_specifier + | enum_specifier + | TYPEID + ''' + pass + +# type-qualifier: +def p_type_qualifier(t): + '''type_qualifier : CONST + | VOLATILE''' + pass + +# struct-or-union-specifier + +def p_struct_or_union_specifier_1(t): + 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE' + pass + +def p_struct_or_union_specifier_2(t): + 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE' + pass + +def p_struct_or_union_specifier_3(t): + 'struct_or_union_specifier : struct_or_union ID' + pass + +# struct-or-union: +def p_struct_or_union(t): + '''struct_or_union : STRUCT + | UNION + ''' + pass + +# struct-declaration-list: + +def p_struct_declaration_list_1(t): + 'struct_declaration_list : struct_declaration' + pass + +def p_struct_declaration_list_2(t): + 'struct_declaration_list : struct_declarator_list struct_declaration' + pass + +# init-declarator-list: + +def p_init_declarator_list_1(t): + 'init_declarator_list : init_declarator' + pass + +def p_init_declarator_list_2(t): + 'init_declarator_list : init_declarator_list COMMA init_declarator' + pass + +# init-declarator + +def p_init_declarator_1(t): + 'init_declarator : declarator' + pass + +def p_init_declarator_2(t): + 'init_declarator : declarator EQUALS initializer' + pass + +# struct-declaration: + +def p_struct_declaration(t): + 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI' + pass + +# specifier-qualifier-list: + +def p_specifier_qualifier_list_1(t): + 'specifier_qualifier_list : type_specifier specifier_qualifier_list' + pass + +def p_specifier_qualifier_list_2(t): + 'specifier_qualifier_list : type_specifier' + pass + +def p_specifier_qualifier_list_3(t): + 'specifier_qualifier_list : type_qualifier specifier_qualifier_list' + pass + +def p_specifier_qualifier_list_4(t): + 'specifier_qualifier_list : type_qualifier' + pass + +# struct-declarator-list: + +def p_struct_declarator_list_1(t): + 'struct_declarator_list : struct_declarator' + pass + +def p_struct_declarator_list_2(t): + 'struct_declarator_list : struct_declarator_list COMMA struct_declarator' + pass + +# struct-declarator: + +def p_struct_declarator_1(t): + 'struct_declarator : declarator' + pass + +def p_struct_declarator_2(t): + 'struct_declarator : declarator COLON constant_expression' + pass + +def p_struct_declarator_3(t): + 'struct_declarator : COLON constant_expression' + pass + +# enum-specifier: + +def p_enum_specifier_1(t): + 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE' + pass + +def p_enum_specifier_2(t): + 'enum_specifier : ENUM LBRACE enumerator_list RBRACE' + pass + +def p_enum_specifier_3(t): + 'enum_specifier : ENUM ID' + pass + +# enumerator_list: +def p_enumerator_list_1(t): + 'enumerator_list : enumerator' + pass + +def p_enumerator_list_2(t): + 'enumerator_list : enumerator_list COMMA enumerator' + pass + +# enumerator: +def p_enumerator_1(t): + 'enumerator : ID' + pass + +def p_enumerator_2(t): + 'enumerator : ID EQUALS constant_expression' + pass + +# declarator: + +def p_declarator_1(t): + 'declarator : pointer direct_declarator' + pass + +def p_declarator_2(t): + 'declarator : direct_declarator' + pass + +# direct-declarator: + +def p_direct_declarator_1(t): + 'direct_declarator : ID' + pass + +def p_direct_declarator_2(t): + 'direct_declarator : LPAREN declarator RPAREN' + pass + +def p_direct_declarator_3(t): + 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_declarator_4(t): + 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN ' + pass + +def p_direct_declarator_5(t): + 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN ' + pass + +def p_direct_declarator_6(t): + 'direct_declarator : direct_declarator LPAREN RPAREN ' + pass + +# pointer: +def p_pointer_1(t): + 'pointer : TIMES type_qualifier_list' + pass + +def p_pointer_2(t): + 'pointer : TIMES' + pass + +def p_pointer_3(t): + 'pointer : TIMES type_qualifier_list pointer' + pass + +def p_pointer_4(t): + 'pointer : TIMES pointer' + pass + +# type-qualifier-list: + +def p_type_qualifier_list_1(t): + 'type_qualifier_list : type_qualifier' + pass + +def p_type_qualifier_list_2(t): + 'type_qualifier_list : type_qualifier_list type_qualifier' + pass + +# parameter-type-list: + +def p_parameter_type_list_1(t): + 'parameter_type_list : parameter_list' + pass + +def p_parameter_type_list_2(t): + 'parameter_type_list : parameter_list COMMA ELLIPSIS' + pass + +# parameter-list: + +def p_parameter_list_1(t): + 'parameter_list : parameter_declaration' + pass + +def p_parameter_list_2(t): + 'parameter_list : parameter_list COMMA parameter_declaration' + pass + +# parameter-declaration: +def p_parameter_declaration_1(t): + 'parameter_declaration : declaration_specifiers declarator' + pass + +def p_parameter_declaration_2(t): + 'parameter_declaration : declaration_specifiers abstract_declarator_opt' + pass + +# identifier-list: +def p_identifier_list_1(t): + 'identifier_list : ID' + pass + +def p_identifier_list_2(t): + 'identifier_list : identifier_list COMMA ID' + pass + +# initializer: + +def p_initializer_1(t): + 'initializer : assignment_expression' + pass + +def p_initializer_2(t): + '''initializer : LBRACE initializer_list RBRACE + | LBRACE initializer_list COMMA RBRACE''' + pass + +# initializer-list: + +def p_initializer_list_1(t): + 'initializer_list : initializer' + pass + +def p_initializer_list_2(t): + 'initializer_list : initializer_list COMMA initializer' + pass + +# type-name: + +def p_type_name(t): + 'type_name : specifier_qualifier_list abstract_declarator_opt' + pass + +def p_abstract_declarator_opt_1(t): + 'abstract_declarator_opt : empty' + pass + +def p_abstract_declarator_opt_2(t): + 'abstract_declarator_opt : abstract_declarator' + pass + +# abstract-declarator: + +def p_abstract_declarator_1(t): + 'abstract_declarator : pointer ' + pass + +def p_abstract_declarator_2(t): + 'abstract_declarator : pointer direct_abstract_declarator' + pass + +def p_abstract_declarator_3(t): + 'abstract_declarator : direct_abstract_declarator' + pass + +# direct-abstract-declarator: + +def p_direct_abstract_declarator_1(t): + 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN' + pass + +def p_direct_abstract_declarator_2(t): + 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_abstract_declarator_3(t): + 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_abstract_declarator_4(t): + 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN' + pass + +def p_direct_abstract_declarator_5(t): + 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN' + pass + +# Optional fields in abstract declarators + +def p_constant_expression_opt_1(t): + 'constant_expression_opt : empty' + pass + +def p_constant_expression_opt_2(t): + 'constant_expression_opt : constant_expression' + pass + +def p_parameter_type_list_opt_1(t): + 'parameter_type_list_opt : empty' + pass + +def p_parameter_type_list_opt_2(t): + 'parameter_type_list_opt : parameter_type_list' + pass + +# statement: + +def p_statement(t): + ''' + statement : labeled_statement + | expression_statement + | compound_statement + | selection_statement + | iteration_statement + | jump_statement + ''' + pass + +# labeled-statement: + +def p_labeled_statement_1(t): + 'labeled_statement : ID COLON statement' + pass + +def p_labeled_statement_2(t): + 'labeled_statement : CASE constant_expression COLON statement' + pass + +def p_labeled_statement_3(t): + 'labeled_statement : DEFAULT COLON statement' + pass + +# expression-statement: +def p_expression_statement(t): + 'expression_statement : expression_opt SEMI' + pass + +# compound-statement: + +def p_compound_statement_1(t): + 'compound_statement : LBRACE declaration_list statement_list RBRACE' + pass + +def p_compound_statement_2(t): + 'compound_statement : LBRACE statement_list RBRACE' + pass + +def p_compound_statement_3(t): + 'compound_statement : LBRACE declaration_list RBRACE' + pass + +def p_compound_statement_4(t): + 'compound_statement : LBRACE RBRACE' + pass + +# statement-list: + +def p_statement_list_1(t): + 'statement_list : statement' + pass + +def p_statement_list_2(t): + 'statement_list : statement_list statement' + pass + +# selection-statement + +def p_selection_statement_1(t): + 'selection_statement : IF LPAREN expression RPAREN statement' + pass + +def p_selection_statement_2(t): + 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement ' + pass + +def p_selection_statement_3(t): + 'selection_statement : SWITCH LPAREN expression RPAREN statement ' + pass + +# iteration_statement: + +def p_iteration_statement_1(t): + 'iteration_statement : WHILE LPAREN expression RPAREN statement' + pass + +def p_iteration_statement_2(t): + 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement ' + pass + +def p_iteration_statement_3(t): + 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI' + pass + +# jump_statement: + +def p_jump_statement_1(t): + 'jump_statement : GOTO ID SEMI' + pass + +def p_jump_statement_2(t): + 'jump_statement : CONTINUE SEMI' + pass + +def p_jump_statement_3(t): + 'jump_statement : BREAK SEMI' + pass + +def p_jump_statement_4(t): + 'jump_statement : RETURN expression_opt SEMI' + pass + +def p_expression_opt_1(t): + 'expression_opt : empty' + pass + +def p_expression_opt_2(t): + 'expression_opt : expression' + pass + +# expression: +def p_expression_1(t): + 'expression : assignment_expression' + pass + +def p_expression_2(t): + 'expression : expression COMMA assignment_expression' + pass + +# assigment_expression: +def p_assignment_expression_1(t): + 'assignment_expression : conditional_expression' + pass + +def p_assignment_expression_2(t): + 'assignment_expression : unary_expression assignment_operator assignment_expression' + pass + +# assignment_operator: +def p_assignment_operator(t): + ''' + assignment_operator : EQUALS + | TIMESEQUAL + | DIVEQUAL + | MODEQUAL + | PLUSEQUAL + | MINUSEQUAL + | LSHIFTEQUAL + | RSHIFTEQUAL + | ANDEQUAL + | OREQUAL + | XOREQUAL + ''' + pass + +# conditional-expression +def p_conditional_expression_1(t): + 'conditional_expression : logical_or_expression' + pass + +def p_conditional_expression_2(t): + 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression ' + pass + +# constant-expression + +def p_constant_expression(t): + 'constant_expression : conditional_expression' + pass + +# logical-or-expression + +def p_logical_or_expression_1(t): + 'logical_or_expression : logical_and_expression' + pass + +def p_logical_or_expression_2(t): + 'logical_or_expression : logical_or_expression LOR logical_and_expression' + pass + +# logical-and-expression + +def p_logical_and_expression_1(t): + 'logical_and_expression : inclusive_or_expression' + pass + +def p_logical_and_expression_2(t): + 'logical_and_expression : logical_and_expression LAND inclusive_or_expression' + pass + +# inclusive-or-expression: + +def p_inclusive_or_expression_1(t): + 'inclusive_or_expression : exclusive_or_expression' + pass + +def p_inclusive_or_expression_2(t): + 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression' + pass + +# exclusive-or-expression: + +def p_exclusive_or_expression_1(t): + 'exclusive_or_expression : and_expression' + pass + +def p_exclusive_or_expression_2(t): + 'exclusive_or_expression : exclusive_or_expression XOR and_expression' + pass + +# AND-expression + +def p_and_expression_1(t): + 'and_expression : equality_expression' + pass + +def p_and_expression_2(t): + 'and_expression : and_expression AND equality_expression' + pass + + +# equality-expression: +def p_equality_expression_1(t): + 'equality_expression : relational_expression' + pass + +def p_equality_expression_2(t): + 'equality_expression : equality_expression EQ relational_expression' + pass + +def p_equality_expression_3(t): + 'equality_expression : equality_expression NE relational_expression' + pass + + +# relational-expression: +def p_relational_expression_1(t): + 'relational_expression : shift_expression' + pass + +def p_relational_expression_2(t): + 'relational_expression : relational_expression LT shift_expression' + pass + +def p_relational_expression_3(t): + 'relational_expression : relational_expression GT shift_expression' + pass + +def p_relational_expression_4(t): + 'relational_expression : relational_expression LE shift_expression' + pass + +def p_relational_expression_5(t): + 'relational_expression : relational_expression GE shift_expression' + pass + +# shift-expression + +def p_shift_expression_1(t): + 'shift_expression : additive_expression' + pass + +def p_shift_expression_2(t): + 'shift_expression : shift_expression LSHIFT additive_expression' + pass + +def p_shift_expression_3(t): + 'shift_expression : shift_expression RSHIFT additive_expression' + pass + +# additive-expression + +def p_additive_expression_1(t): + 'additive_expression : multiplicative_expression' + pass + +def p_additive_expression_2(t): + 'additive_expression : additive_expression PLUS multiplicative_expression' + pass + +def p_additive_expression_3(t): + 'additive_expression : additive_expression MINUS multiplicative_expression' + pass + +# multiplicative-expression + +def p_multiplicative_expression_1(t): + 'multiplicative_expression : cast_expression' + pass + +def p_multiplicative_expression_2(t): + 'multiplicative_expression : multiplicative_expression TIMES cast_expression' + pass + +def p_multiplicative_expression_3(t): + 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression' + pass + +def p_multiplicative_expression_4(t): + 'multiplicative_expression : multiplicative_expression MOD cast_expression' + pass + +# cast-expression: + +def p_cast_expression_1(t): + 'cast_expression : unary_expression' + pass + +def p_cast_expression_2(t): + 'cast_expression : LPAREN type_name RPAREN cast_expression' + pass + +# unary-expression: +def p_unary_expression_1(t): + 'unary_expression : postfix_expression' + pass + +def p_unary_expression_2(t): + 'unary_expression : PLUSPLUS unary_expression' + pass + +def p_unary_expression_3(t): + 'unary_expression : MINUSMINUS unary_expression' + pass + +def p_unary_expression_4(t): + 'unary_expression : unary_operator cast_expression' + pass + +def p_unary_expression_5(t): + 'unary_expression : SIZEOF unary_expression' + pass + +def p_unary_expression_6(t): + 'unary_expression : SIZEOF LPAREN type_name RPAREN' + pass + +#unary-operator +def p_unary_operator(t): + '''unary_operator : AND + | TIMES + | PLUS + | MINUS + | NOT + | LNOT ''' + pass + +# postfix-expression: +def p_postfix_expression_1(t): + 'postfix_expression : primary_expression' + pass + +def p_postfix_expression_2(t): + 'postfix_expression : postfix_expression LBRACKET expression RBRACKET' + pass + +def p_postfix_expression_3(t): + 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN' + pass + +def p_postfix_expression_4(t): + 'postfix_expression : postfix_expression LPAREN RPAREN' + pass + +def p_postfix_expression_5(t): + 'postfix_expression : postfix_expression PERIOD ID' + pass + +def p_postfix_expression_6(t): + 'postfix_expression : postfix_expression ARROW ID' + pass + +def p_postfix_expression_7(t): + 'postfix_expression : postfix_expression PLUSPLUS' + pass + +def p_postfix_expression_8(t): + 'postfix_expression : postfix_expression MINUSMINUS' + pass + +# primary-expression: +def p_primary_expression(t): + '''primary_expression : ID + | constant + | SCONST + | LPAREN expression RPAREN''' + pass + +# argument-expression-list: +def p_argument_expression_list(t): + '''argument_expression_list : assignment_expression + | argument_expression_list COMMA assignment_expression''' + pass + +# constant: +def p_constant(t): + '''constant : ICONST + | FCONST + | CCONST''' + pass + + +def p_empty(t): + 'empty : ' + pass + +def p_error(t): + print "Whoa. We're hosed" + +import profile +# Build the grammar +profile.run("yacc.yacc()") + + + + diff --git a/ext/ply/example/calc/calc.py b/ext/ply/example/calc/calc.py new file mode 100644 index 000000000..aeb23c246 --- /dev/null +++ b/ext/ply/example/calc/calc.py @@ -0,0 +1,108 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) diff --git a/ext/ply/example/hedit/hedit.py b/ext/ply/example/hedit/hedit.py new file mode 100644 index 000000000..f00427bf5 --- /dev/null +++ b/ext/ply/example/hedit/hedit.py @@ -0,0 +1,44 @@ +# ----------------------------------------------------------------------------- +# hedit.py +# +# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) +# +# These tokens can't be easily tokenized because they are of the following +# form: +# +# nHc1...cn +# +# where n is a positive integer and c1 ... cn are characters. +# +# This example shows how to modify the state of the lexer to parse +# such tokens +# ----------------------------------------------------------------------------- + +tokens = ( + 'H_EDIT_DESCRIPTOR', + ) + +# Tokens +t_ignore = " \t\n" + +def t_H_EDIT_DESCRIPTOR(t): + r"\d+H.*" # This grabs all of the remaining text + i = t.value.index('H') + n = eval(t.value[:i]) + + # Adjust the tokenizing position + t.lexer.lexpos -= len(t.value) - (i+1+n) + + t.value = t.value[i+1:i+1+n] + return t + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() +lex.runmain() + + diff --git a/ext/ply/example/optcalc/README b/ext/ply/example/optcalc/README new file mode 100644 index 000000000..6d196f0ee --- /dev/null +++ b/ext/ply/example/optcalc/README @@ -0,0 +1,9 @@ +An example showing how to use Python optimized mode. +To run: + + - First run 'python calc.py' + + - Then run 'python -OO calc.py' + +If working corretly, the second version should run the +same way. diff --git a/ext/ply/example/optcalc/calc.py b/ext/ply/example/optcalc/calc.py new file mode 100644 index 000000000..fa66cda5b --- /dev/null +++ b/ext/ply/example/optcalc/calc.py @@ -0,0 +1,110 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex(optimize=1) + +# Parsing rules + +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + elif t[2] == '<': t[0] = t[1] < t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc(optimize=1) + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) + diff --git a/ext/ply/lex.py b/ext/ply/lex.py new file mode 100644 index 000000000..7ad7a394b --- /dev/null +++ b/ext/ply/lex.py @@ -0,0 +1,681 @@ +#----------------------------------------------------------------------------- +# ply: lex.py +# +# Author: David M. Beazley (beazley@cs.uchicago.edu) +# Department of Computer Science +# University of Chicago +# Chicago, IL 60637 +# +# Copyright (C) 2001, David M. Beazley +# +# $Header: /home/stever/bk/newmem2/ext/ply/lex.py 1.1 03/06/06 14:53:34-00:00 stever@ $ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This module automatically constructs a lexical analysis module from regular +# expression rules defined in a user-defined module. The idea is essentially the same +# as that used in John Aycock's Spark framework, but the implementation works +# at the module level rather than requiring the use of classes. +# +# This module tries to provide an interface that is closely modeled after +# the traditional lex interface in Unix. It also differs from Spark +# in that: +# +# - It provides more extensive error checking and reporting if +# the user supplies a set of regular expressions that can't +# be compiled or if there is any other kind of a problem in +# the specification. +# +# - The interface is geared towards LALR(1) and LR(1) parser +# generators. That is tokens are generated one at a time +# rather than being generated in advanced all in one step. +# +# There are a few limitations of this module +# +# - The module interface makes it somewhat awkward to support more +# than one lexer at a time. Although somewhat inelegant from a +# design perspective, this is rarely a practical concern for +# most compiler projects. +# +# - The lexer requires that the entire input text be read into +# a string before scanning. I suppose that most machines have +# enough memory to make this a minor issues, but it makes +# the lexer somewhat difficult to use in interactive sessions +# or with streaming data. +# +#----------------------------------------------------------------------------- + +r""" +lex.py + +This module builds lex-like scanners based on regular expression rules. +To use the module, simply write a collection of regular expression rules +and actions like this: + +# lexer.py +import lex + +# Define a list of valid tokens +tokens = ( + 'IDENTIFIER', 'NUMBER', 'PLUS', 'MINUS' + ) + +# Define tokens as functions +def t_IDENTIFIER(t): + r' ([a-zA-Z_](\w|_)* ' + return t + +def t_NUMBER(t): + r' \d+ ' + return t + +# Some simple tokens with no actions +t_PLUS = r'\+' +t_MINUS = r'-' + +# Initialize the lexer +lex.lex() + +The tokens list is required and contains a complete list of all valid +token types that the lexer is allowed to produce. Token types are +restricted to be valid identifiers. This means that 'MINUS' is a valid +token type whereas '-' is not. + +Rules are defined by writing a function with a name of the form +t_rulename. Each rule must accept a single argument which is +a token object generated by the lexer. This token has the following +attributes: + + t.type = type string of the token. This is initially set to the + name of the rule without the leading t_ + t.value = The value of the lexeme. + t.lineno = The value of the line number where the token was encountered + +For example, the t_NUMBER() rule above might be called with the following: + + t.type = 'NUMBER' + t.value = '42' + t.lineno = 3 + +Each rule returns the token object it would like to supply to the +parser. In most cases, the token t is returned with few, if any +modifications. To discard a token for things like whitespace or +comments, simply return nothing. For instance: + +def t_whitespace(t): + r' \s+ ' + pass + +For faster lexing, you can also define this in terms of the ignore set like this: + +t_ignore = ' \t' + +The characters in this string are ignored by the lexer. Use of this feature can speed +up parsing significantly since scanning will immediately proceed to the next token. + +lex requires that the token returned by each rule has an attribute +t.type. Other than this, rules are free to return any kind of token +object that they wish and may construct a new type of token object +from the attributes of t (provided the new object has the required +type attribute). + +If illegal characters are encountered, the scanner executes the +function t_error(t) where t is a token representing the rest of the +string that hasn't been matched. If this function isn't defined, a +LexError exception is raised. The .text attribute of this exception +object contains the part of the string that wasn't matched. + +The t.skip(n) method can be used to skip ahead n characters in the +input stream. This is usually only used in the error handling rule. +For instance, the following rule would print an error message and +continue: + +def t_error(t): + print "Illegal character in input %s" % t.value[0] + t.skip(1) + +Of course, a nice scanner might wish to skip more than one character +if the input looks very corrupted. + +The lex module defines a t.lineno attribute on each token that can be used +to track the current line number in the input. The value of this +variable is not modified by lex so it is up to your lexer module +to correctly update its value depending on the lexical properties +of the input language. To do this, you might write rules such as +the following: + +def t_newline(t): + r' \n+ ' + t.lineno += t.value.count("\n") + +To initialize your lexer so that it can be used, simply call the lex.lex() +function in your rule file. If there are any errors in your +specification, warning messages or an exception will be generated to +alert you to the problem. + +(dave: this needs to be rewritten) +To use the newly constructed lexer from another module, simply do +this: + + import lex + import lexer + plex.input("position = initial + rate*60") + + while 1: + token = plex.token() # Get a token + if not token: break # No more tokens + ... do whatever ... + +Assuming that the module 'lexer' has initialized plex as shown +above, parsing modules can safely import 'plex' without having +to import the rule file or any additional imformation about the +scanner you have defined. +""" + +# ----------------------------------------------------------------------------- + + +__version__ = "1.3" + +import re, types, sys, copy + +# Exception thrown when invalid token encountered and no default +class LexError(Exception): + def __init__(self,message,s): + self.args = (message,) + self.text = s + +# Token class +class LexToken: + def __str__(self): + return "LexToken(%s,%r,%d)" % (self.type,self.value,self.lineno) + def __repr__(self): + return str(self) + def skip(self,n): + try: + self._skipn += n + except AttributeError: + self._skipn = n + +# ----------------------------------------------------------------------------- +# Lexer class +# +# input() - Store a new string in the lexer +# token() - Get the next token +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexindexfunc = [ ] # Reverse mapping of groups to functions and types + self.lexerrorf = None # Error rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = None # Ignored characters + self.lineno = 1 # Current line number + self.debug = 0 # Debugging mode + self.optimize = 0 # Optimized mode + self.token = self.errtoken + + def __copy__(self): + c = Lexer() + c.lexre = self.lexre + c.lexdata = self.lexdata + c.lexpos = self.lexpos + c.lexlen = self.lexlen + c.lenindexfunc = self.lexindexfunc + c.lexerrorf = self.lexerrorf + c.lextokens = self.lextokens + c.lexignore = self.lexignore + c.lineno = self.lineno + c.optimize = self.optimize + c.token = c.realtoken + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self,s): + if not isinstance(s,types.StringType): + raise ValueError, "Expected a string" + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + self.token = self.realtoken + + # Change the token routine to point to realtoken() + global token + if token == self.errtoken: + token = self.token + + # ------------------------------------------------------------ + # errtoken() - Return error if token is called with no data + # ------------------------------------------------------------ + def errtoken(self): + raise RuntimeError, "No input string given with input()" + + # ------------------------------------------------------------ + # token() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def realtoken(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + m = self.lexre.match(lexdata,lexpos) + if m: + i = m.lastindex + lexpos = m.end() + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexer = self + func,tok.type = self.lexindexfunc[i] + if not func: + self.lexpos = lexpos + return tok + + # If token is processed by a function, call it + self.lexpos = lexpos + newtok = func(tok) + self.lineno = tok.lineno # Update line number + + # Every function must return a token, if nothing, we just move to next token + if not newtok: continue + + # Verify type of the token. If not in the token map, raise an error + if not self.optimize: + if not self.lextokens.has_key(newtok.type): + raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func.func_code.co_filename, func.func_code.co_firstlineno, + func.__name__, newtok.type),lexdata[lexpos:]) + + return newtok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = "error" + tok.lexer = self + oldpos = lexpos + newtok = self.lexerrorf(tok) + lexpos += getattr(tok,"_skipn",0) + if oldpos == lexpos: + # Error method didn't change text position at all. This is an error. + self.lexpos = lexpos + raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + if not newtok: continue + self.lexpos = lexpos + return newtok + + self.lexpos = lexpos + raise LexError, ("No match found", lexdata[lexpos:]) + + # No more input data + self.lexpos = lexpos + 1 + return None + + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This checks to see if there are duplicated t_rulename() functions or strings +# in the parser input file. This is done using a simple regular expression +# match on each line in the filename. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + import os.path + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea what the file is. Return OK + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if not m: + m = sre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev) + noerror = 0 + linen += 1 + return noerror + +# ----------------------------------------------------------------------------- +# _read_lextab(module) +# +# Reads lexer table from a lextab file instead of using introspection. +# ----------------------------------------------------------------------------- + +def _read_lextab(lexer, fdict, module): + exec "import %s as lextab" % module + lexer.lexre = re.compile(lextab._lexre, re.VERBOSE) + lexer.lexindexfunc = lextab._lextab + for i in range(len(lextab._lextab)): + t = lexer.lexindexfunc[i] + if t: + if t[0]: + lexer.lexindexfunc[i] = (fdict[t[0]],t[1]) + lexer.lextokens = lextab._lextokens + lexer.lexignore = lextab._lexignore + if lextab._lexerrorf: + lexer.lexerrorf = fdict[lextab._lexerrorf] + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None,debug=0,optimize=0,lextab="lextab"): + ldict = None + regex = "" + error = 0 + files = { } + lexer = Lexer() + lexer.debug = debug + lexer.optimize = optimize + global token,input + + if module: + if not isinstance(module, types.ModuleType): + raise ValueError,"Expected a module" + + ldict = module.__dict__ + + else: + # No module given. We might be able to get information from the caller. + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + if optimize and lextab: + try: + _read_lextab(lexer,ldict, lextab) + if not lexer.lexignore: lexer.lexignore = "" + token = lexer.token + input = lexer.input + return lexer + + except ImportError: + pass + + # Get the tokens map + tokens = ldict.get("tokens",None) + if not tokens: + raise SyntaxError,"lex: module does not define 'tokens'" + if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): + raise SyntaxError,"lex: tokens must be a list or tuple." + + # Build a dictionary of valid token names + lexer.lextokens = { } + if not optimize: + + # Utility function for verifying tokens + def is_identifier(s): + for c in s: + if not (c.isalnum() or c == '_'): return 0 + return 1 + + for n in tokens: + if not is_identifier(n): + print "lex: Bad token name '%s'" % n + error = 1 + if lexer.lextokens.has_key(n): + print "lex: Warning. Token '%s' multiply defined." % n + lexer.lextokens[n] = None + else: + for n in tokens: lexer.lextokens[n] = None + + + if debug: + print "lex: tokens = '%s'" % lexer.lextokens.keys() + + # Get a list of symbols with the t_ prefix + tsymbols = [f for f in ldict.keys() if f[:2] == 't_'] + + # Now build up a list of functions and a list of strings + fsymbols = [ ] + ssymbols = [ ] + for f in tsymbols: + if isinstance(ldict[f],types.FunctionType): + fsymbols.append(ldict[f]) + elif isinstance(ldict[f],types.StringType): + ssymbols.append((f,ldict[f])) + else: + print "lex: %s not defined as a function or string" % f + error = 1 + + # Sort the functions by line number + fsymbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) + + # Sort the strings by regular expression length + ssymbols.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) + + # Check for non-empty symbols + if len(fsymbols) == 0 and len(ssymbols) == 0: + raise SyntaxError,"lex: no rules of the form t_rulename are defined." + + # Add all of the rules defined with actions first + for f in fsymbols: + + line = f.func_code.co_firstlineno + file = f.func_code.co_filename + files[file] = None + + if not optimize: + if f.func_code.co_argcount > 1: + print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) + error = 1 + continue + + if f.func_code.co_argcount < 1: + print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) + error = 1 + continue + + if f.__name__ == 't_ignore': + print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__) + error = 1 + continue + + if f.__name__ == 't_error': + lexer.lexerrorf = f + continue + + if f.__doc__: + if not optimize: + try: + c = re.compile(f.__doc__, re.VERBOSE) + except re.error,e: + print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e) + error = 1 + continue + + if debug: + print "lex: Adding rule %s -> '%s'" % (f.__name__,f.__doc__) + + # Okay. The regular expression seemed okay. Let's append it to the master regular + # expression we're building + + if (regex): regex += "|" + regex += "(?P<%s>%s)" % (f.__name__,f.__doc__) + else: + print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__) + + # Now add all of the simple rules + for name,r in ssymbols: + + if name == 't_ignore': + lexer.lexignore = r + continue + + if not optimize: + if name == 't_error': + raise SyntaxError,"lex: Rule 't_error' must be defined as a function" + error = 1 + continue + + if not lexer.lextokens.has_key(name[2:]): + print "lex: Rule '%s' defined for an unspecified token %s." % (name,name[2:]) + error = 1 + continue + try: + c = re.compile(r,re.VERBOSE) + except re.error,e: + print "lex: Invalid regular expression for rule '%s'. %s" % (name,e) + error = 1 + continue + if debug: + print "lex: Adding rule %s -> '%s'" % (name,r) + + if regex: regex += "|" + regex += "(?P<%s>%s)" % (name,r) + + if not optimize: + for f in files.keys(): + if not validate_file(f): + error = 1 + try: + if debug: + print "lex: regex = '%s'" % regex + lexer.lexre = re.compile(regex, re.VERBOSE) + + # Build the index to function map for the matching engine + lexer.lexindexfunc = [ None ] * (max(lexer.lexre.groupindex.values())+1) + for f,i in lexer.lexre.groupindex.items(): + handle = ldict[f] + if isinstance(handle,types.FunctionType): + lexer.lexindexfunc[i] = (handle,handle.__name__[2:]) + else: + # If rule was specified as a string, we build an anonymous + # callback function to carry out the action + lexer.lexindexfunc[i] = (None,f[2:]) + + # If a lextab was specified, we create a file containing the precomputed + # regular expression and index table + + if lextab and optimize: + lt = open(lextab+".py","w") + lt.write("# %s.py. This file automatically created by PLY. Don't edit.\n" % lextab) + lt.write("_lexre = %s\n" % repr(regex)) + lt.write("_lextab = [\n"); + for i in range(0,len(lexer.lexindexfunc)): + t = lexer.lexindexfunc[i] + if t: + if t[0]: + lt.write(" ('%s',%s),\n"% (t[0].__name__, repr(t[1]))) + else: + lt.write(" (None,%s),\n" % repr(t[1])) + else: + lt.write(" None,\n") + + lt.write("]\n"); + lt.write("_lextokens = %s\n" % repr(lexer.lextokens)) + lt.write("_lexignore = %s\n" % repr(lexer.lexignore)) + if (lexer.lexerrorf): + lt.write("_lexerrorf = %s\n" % repr(lexer.lexerrorf.__name__)) + else: + lt.write("_lexerrorf = None\n") + lt.close() + + except re.error,e: + print "lex: Fatal error. Unable to compile regular expression rules. %s" % e + error = 1 + if error: + raise SyntaxError,"lex: Unable to build lexer." + if not lexer.lexerrorf: + print "lex: Warning. no t_error rule is defined." + + if not lexer.lexignore: lexer.lexignore = "" + + # Create global versions of the token() and input() functions + token = lexer.token + input = lexer.input + + return lexer + +# ----------------------------------------------------------------------------- +# run() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None,data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + print "Reading from standard input (type EOF to end):" + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while 1: + tok = _token() + if not tok: break + print "(%s,'%s',%d)" % (tok.type, tok.value, tok.lineno) + + + + diff --git a/ext/ply/test/README b/ext/ply/test/README new file mode 100644 index 000000000..bca748497 --- /dev/null +++ b/ext/ply/test/README @@ -0,0 +1,9 @@ +This directory mostly contains tests for various types of error +conditions. To run: + + $ python testlex.py . + $ python testyacc.py . + +(make sure lex.py and yacc.py exist in this directory before +running the tests). + diff --git a/ext/ply/test/calclex.py b/ext/ply/test/calclex.py new file mode 100644 index 000000000..f8eb91a09 --- /dev/null +++ b/ext/ply/test/calclex.py @@ -0,0 +1,46 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + + + diff --git a/ext/ply/test/lex_doc1.exp b/ext/ply/test/lex_doc1.exp new file mode 100644 index 000000000..29381911d --- /dev/null +++ b/ext/ply/test/lex_doc1.exp @@ -0,0 +1 @@ +./lex_doc1.py:15: No regular expression defined for rule 't_NUMBER' diff --git a/ext/ply/test/lex_doc1.py b/ext/ply/test/lex_doc1.py new file mode 100644 index 000000000..fb0fb885e --- /dev/null +++ b/ext/ply/test/lex_doc1.py @@ -0,0 +1,27 @@ +# lex_token.py +# +# Missing documentation string + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t): + pass + +def t_error(t): + pass + + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup1.exp b/ext/ply/test/lex_dup1.exp new file mode 100644 index 000000000..22bca3190 --- /dev/null +++ b/ext/ply/test/lex_dup1.exp @@ -0,0 +1,2 @@ +./lex_dup1.py:17: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup1.py b/ext/ply/test/lex_dup1.py new file mode 100644 index 000000000..88bbe00e9 --- /dev/null +++ b/ext/ply/test/lex_dup1.py @@ -0,0 +1,27 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup2.exp b/ext/ply/test/lex_dup2.exp new file mode 100644 index 000000000..883bdad46 --- /dev/null +++ b/ext/ply/test/lex_dup2.exp @@ -0,0 +1,2 @@ +./lex_dup2.py:19: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup2.py b/ext/ply/test/lex_dup2.py new file mode 100644 index 000000000..65e0b21a2 --- /dev/null +++ b/ext/ply/test/lex_dup2.py @@ -0,0 +1,31 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t): + r'\d+' + pass + +def t_NUMBER(t): + r'\d+' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup3.exp b/ext/ply/test/lex_dup3.exp new file mode 100644 index 000000000..916612aa1 --- /dev/null +++ b/ext/ply/test/lex_dup3.exp @@ -0,0 +1,2 @@ +./lex_dup3.py:17: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup3.py b/ext/ply/test/lex_dup3.py new file mode 100644 index 000000000..424101823 --- /dev/null +++ b/ext/ply/test/lex_dup3.py @@ -0,0 +1,29 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_NUMBER(t): + r'\d+' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_empty.exp b/ext/ply/test/lex_empty.exp new file mode 100644 index 000000000..af38602d5 --- /dev/null +++ b/ext/ply/test/lex_empty.exp @@ -0,0 +1 @@ +SyntaxError: lex: no rules of the form t_rulename are defined. diff --git a/ext/ply/test/lex_empty.py b/ext/ply/test/lex_empty.py new file mode 100644 index 000000000..6472832f1 --- /dev/null +++ b/ext/ply/test/lex_empty.py @@ -0,0 +1,18 @@ +# lex_token.py +# +# No rules defined + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error1.exp b/ext/ply/test/lex_error1.exp new file mode 100644 index 000000000..baa19e5b3 --- /dev/null +++ b/ext/ply/test/lex_error1.exp @@ -0,0 +1 @@ +lex: Warning. no t_error rule is defined. diff --git a/ext/ply/test/lex_error1.py b/ext/ply/test/lex_error1.py new file mode 100644 index 000000000..ed7980346 --- /dev/null +++ b/ext/ply/test/lex_error1.py @@ -0,0 +1,22 @@ +# lex_token.py +# +# Missing t_error() rule + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error2.exp b/ext/ply/test/lex_error2.exp new file mode 100644 index 000000000..fb1b55c8b --- /dev/null +++ b/ext/ply/test/lex_error2.exp @@ -0,0 +1 @@ +SyntaxError: lex: Rule 't_error' must be defined as a function diff --git a/ext/ply/test/lex_error2.py b/ext/ply/test/lex_error2.py new file mode 100644 index 000000000..80020f72b --- /dev/null +++ b/ext/ply/test/lex_error2.py @@ -0,0 +1,24 @@ +# lex_token.py +# +# t_error defined, but not function + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_error = "foo" + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error3.exp b/ext/ply/test/lex_error3.exp new file mode 100644 index 000000000..936828f93 --- /dev/null +++ b/ext/ply/test/lex_error3.exp @@ -0,0 +1,2 @@ +./lex_error3.py:17: Rule 't_error' requires an argument. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_error3.py b/ext/ply/test/lex_error3.py new file mode 100644 index 000000000..46facf589 --- /dev/null +++ b/ext/ply/test/lex_error3.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# t_error defined as function, but with wrong # args + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error4.exp b/ext/ply/test/lex_error4.exp new file mode 100644 index 000000000..242516576 --- /dev/null +++ b/ext/ply/test/lex_error4.exp @@ -0,0 +1,2 @@ +./lex_error4.py:17: Rule 't_error' has too many arguments. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_error4.py b/ext/ply/test/lex_error4.py new file mode 100644 index 000000000..d777fee84 --- /dev/null +++ b/ext/ply/test/lex_error4.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# t_error defined as function, but too many args + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t,s): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_hedit.exp b/ext/ply/test/lex_hedit.exp new file mode 100644 index 000000000..0b09827c6 --- /dev/null +++ b/ext/ply/test/lex_hedit.exp @@ -0,0 +1,3 @@ +(H_EDIT_DESCRIPTOR,'abc',1) +(H_EDIT_DESCRIPTOR,'abcdefghij',1) +(H_EDIT_DESCRIPTOR,'xy',1) diff --git a/ext/ply/test/lex_hedit.py b/ext/ply/test/lex_hedit.py new file mode 100644 index 000000000..68f9fcbd1 --- /dev/null +++ b/ext/ply/test/lex_hedit.py @@ -0,0 +1,44 @@ +# ----------------------------------------------------------------------------- +# hedit.py +# +# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) +# +# These tokens can't be easily tokenized because they are of the following +# form: +# +# nHc1...cn +# +# where n is a positive integer and c1 ... cn are characters. +# +# This example shows how to modify the state of the lexer to parse +# such tokens +# ----------------------------------------------------------------------------- + +tokens = ( + 'H_EDIT_DESCRIPTOR', + ) + +# Tokens +t_ignore = " \t\n" + +def t_H_EDIT_DESCRIPTOR(t): + r"\d+H.*" # This grabs all of the remaining text + i = t.value.index('H') + n = eval(t.value[:i]) + + # Adjust the tokenizing position + t.lexer.lexpos -= len(t.value) - (i+1+n) + t.value = t.value[i+1:i+1+n] + return t + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() +lex.runmain(data="3Habc 10Habcdefghij 2Hxy") + + + diff --git a/ext/ply/test/lex_ignore.exp b/ext/ply/test/lex_ignore.exp new file mode 100644 index 000000000..c3b04a154 --- /dev/null +++ b/ext/ply/test/lex_ignore.exp @@ -0,0 +1,2 @@ +./lex_ignore.py:17: Rule 't_ignore' must be defined as a string. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_ignore.py b/ext/ply/test/lex_ignore.py new file mode 100644 index 000000000..49c303f81 --- /dev/null +++ b/ext/ply/test/lex_ignore.py @@ -0,0 +1,29 @@ +# lex_token.py +# +# Improperly specific ignore declaration + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_ignore(t): + ' \t' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_re1.exp b/ext/ply/test/lex_re1.exp new file mode 100644 index 000000000..634eefefe --- /dev/null +++ b/ext/ply/test/lex_re1.exp @@ -0,0 +1,2 @@ +lex: Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_re1.py b/ext/ply/test/lex_re1.py new file mode 100644 index 000000000..4a055ad72 --- /dev/null +++ b/ext/ply/test/lex_re1.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# Bad regular expression in a string + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'(\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_rule1.exp b/ext/ply/test/lex_rule1.exp new file mode 100644 index 000000000..0c23ca294 --- /dev/null +++ b/ext/ply/test/lex_rule1.exp @@ -0,0 +1,2 @@ +lex: t_NUMBER not defined as a function or string +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_rule1.py b/ext/ply/test/lex_rule1.py new file mode 100644 index 000000000..ff3764ea1 --- /dev/null +++ b/ext/ply/test/lex_rule1.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# Rule defined as some other type + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = 1 + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token1.exp b/ext/ply/test/lex_token1.exp new file mode 100644 index 000000000..3792831fa --- /dev/null +++ b/ext/ply/test/lex_token1.exp @@ -0,0 +1 @@ +SyntaxError: lex: module does not define 'tokens' diff --git a/ext/ply/test/lex_token1.py b/ext/ply/test/lex_token1.py new file mode 100644 index 000000000..e8eca2b63 --- /dev/null +++ b/ext/ply/test/lex_token1.py @@ -0,0 +1,19 @@ +# lex_token.py +# +# Tests for absence of tokens variable + +import lex + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token2.exp b/ext/ply/test/lex_token2.exp new file mode 100644 index 000000000..3f98fe51d --- /dev/null +++ b/ext/ply/test/lex_token2.exp @@ -0,0 +1 @@ +SyntaxError: lex: tokens must be a list or tuple. diff --git a/ext/ply/test/lex_token2.py b/ext/ply/test/lex_token2.py new file mode 100644 index 000000000..38b34dabe --- /dev/null +++ b/ext/ply/test/lex_token2.py @@ -0,0 +1,21 @@ +# lex_token.py +# +# Tests for tokens of wrong type + +import lex + +tokens = "PLUS MINUS NUMBER" + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token3.exp b/ext/ply/test/lex_token3.exp new file mode 100644 index 000000000..d991d3c37 --- /dev/null +++ b/ext/ply/test/lex_token3.exp @@ -0,0 +1,2 @@ +lex: Rule 't_MINUS' defined for an unspecified token MINUS. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_token3.py b/ext/ply/test/lex_token3.py new file mode 100644 index 000000000..909f9180d --- /dev/null +++ b/ext/ply/test/lex_token3.py @@ -0,0 +1,24 @@ +# lex_token.py +# +# tokens is right type, but is missing a token for one rule + +import lex + +tokens = [ + "PLUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token4.exp b/ext/ply/test/lex_token4.exp new file mode 100644 index 000000000..3dd88e05a --- /dev/null +++ b/ext/ply/test/lex_token4.exp @@ -0,0 +1,2 @@ +lex: Bad token name '-' +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_token4.py b/ext/ply/test/lex_token4.py new file mode 100644 index 000000000..d77d1662c --- /dev/null +++ b/ext/ply/test/lex_token4.py @@ -0,0 +1,26 @@ +# lex_token.py +# +# Bad token name + +import lex + +tokens = [ + "PLUS", + "MINUS", + "-", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token5.exp b/ext/ply/test/lex_token5.exp new file mode 100644 index 000000000..d7bcb2e7c --- /dev/null +++ b/ext/ply/test/lex_token5.exp @@ -0,0 +1 @@ +lex.LexError: ./lex_token5.py:16: Rule 't_NUMBER' returned an unknown token type 'NUM' diff --git a/ext/ply/test/lex_token5.py b/ext/ply/test/lex_token5.py new file mode 100644 index 000000000..d9b0c96aa --- /dev/null +++ b/ext/ply/test/lex_token5.py @@ -0,0 +1,31 @@ +# lex_token.py +# +# Return a bad token name + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' + +def t_NUMBER(t): + r'\d+' + t.type = "NUM" + return t + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() +lex.input("1234") +t = lex.token() + + diff --git a/ext/ply/test/testlex.py b/ext/ply/test/testlex.py new file mode 100755 index 000000000..df000b83d --- /dev/null +++ b/ext/ply/test/testlex.py @@ -0,0 +1,57 @@ +#!/usr/local/bin +# ---------------------------------------------------------------------- +# testlex.py +# +# Run tests for the lexing module +# ---------------------------------------------------------------------- + +import sys,os,glob + +if len(sys.argv) < 2: + print "Usage: python testlex.py directory" + raise SystemExit + +dirname = None +make = 0 + +for o in sys.argv[1:]: + if o == '-make': + make = 1 + else: + dirname = o + break + +if not dirname: + print "Usage: python testlex.py [-make] directory" + raise SystemExit + +f = glob.glob("%s/%s" % (dirname,"lex_*.py")) + +print "**** Running tests for lex ****" + +for t in f: + name = t[:-3] + print "Testing %-32s" % name, + if make: + if not os.path.exists("%s.exp" % name): + os.system("python %s.py >%s.exp 2>&1" % (name,name)) + passed = 1 + else: + os.system("python %s.py >%s.out 2>&1" % (name,name)) + a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name)) + if a == 0: + passed = 1 + else: + passed = 0 + + if passed: + print "Passed" + else: + print "Failed. See %s.dif" % name + + + + + + + diff --git a/ext/ply/test/testyacc.py b/ext/ply/test/testyacc.py new file mode 100644 index 000000000..a185cbb29 --- /dev/null +++ b/ext/ply/test/testyacc.py @@ -0,0 +1,58 @@ +#!/usr/local/bin +# ---------------------------------------------------------------------- +# testyacc.py +# +# Run tests for the yacc module +# ---------------------------------------------------------------------- + +import sys,os,glob + +if len(sys.argv) < 2: + print "Usage: python testyacc.py directory" + raise SystemExit + +dirname = None +make = 0 + +for o in sys.argv[1:]: + if o == '-make': + make = 1 + else: + dirname = o + break + +if not dirname: + print "Usage: python testyacc.py [-make] directory" + raise SystemExit + +f = glob.glob("%s/%s" % (dirname,"yacc_*.py")) + +print "**** Running tests for yacc ****" + +for t in f: + name = t[:-3] + print "Testing %-32s" % name, + os.system("rm -f %s/parsetab.*" % dirname) + if make: + if not os.path.exists("%s.exp" % name): + os.system("python %s.py >%s.exp 2>&1" % (name,name)) + passed = 1 + else: + os.system("python %s.py >%s.out 2>&1" % (name,name)) + a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name)) + if a == 0: + passed = 1 + else: + passed = 0 + + if passed: + print "Passed" + else: + print "Failed. See %s.dif" % name + + + + + + + diff --git a/ext/ply/test/yacc_badargs.exp b/ext/ply/test/yacc_badargs.exp new file mode 100644 index 000000000..b145c51f2 --- /dev/null +++ b/ext/ply/test/yacc_badargs.exp @@ -0,0 +1,3 @@ +./yacc_badargs.py:21: Rule 'p_statement_assign' has too many arguments. +./yacc_badargs.py:25: Rule 'p_statement_expr' requires an argument. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_badargs.py b/ext/ply/test/yacc_badargs.py new file mode 100644 index 000000000..12075efcc --- /dev/null +++ b/ext/ply/test/yacc_badargs.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badargs.py +# +# Rules with wrong # args +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t,s): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badprec.exp b/ext/ply/test/yacc_badprec.exp new file mode 100644 index 000000000..7764b0246 --- /dev/null +++ b/ext/ply/test/yacc_badprec.exp @@ -0,0 +1 @@ +yacc.YaccError: precedence must be a list or tuple. diff --git a/ext/ply/test/yacc_badprec.py b/ext/ply/test/yacc_badprec.py new file mode 100644 index 000000000..55bf7720d --- /dev/null +++ b/ext/ply/test/yacc_badprec.py @@ -0,0 +1,63 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec.py +# +# Bad precedence specifier +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = "blah" + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badprec2.exp b/ext/ply/test/yacc_badprec2.exp new file mode 100644 index 000000000..1df1427b2 --- /dev/null +++ b/ext/ply/test/yacc_badprec2.exp @@ -0,0 +1,3 @@ +yacc: Invalid precedence table. +yacc: Generating SLR parsing table... +yacc: 4 shift/reduce conflicts diff --git a/ext/ply/test/yacc_badprec2.py b/ext/ply/test/yacc_badprec2.py new file mode 100644 index 000000000..9cbc99827 --- /dev/null +++ b/ext/ply/test/yacc_badprec2.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec2.py +# +# Bad precedence +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + 42, + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badrule.exp b/ext/ply/test/yacc_badrule.exp new file mode 100644 index 000000000..553779778 --- /dev/null +++ b/ext/ply/test/yacc_badrule.exp @@ -0,0 +1,5 @@ +./yacc_badrule.py:22: Syntax error. Expected ':' +./yacc_badrule.py:26: Syntax error in rule 'statement' +./yacc_badrule.py:31: Syntax error. Expected ':' +./yacc_badrule.py:40: Syntax error. Expected ':' +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_badrule.py b/ext/ply/test/yacc_badrule.py new file mode 100644 index 000000000..cad3a967e --- /dev/null +++ b/ext/ply/test/yacc_badrule.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badrule.py +# +# Syntax problems in the rule strings +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression: MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badtok.exp b/ext/ply/test/yacc_badtok.exp new file mode 100644 index 000000000..f6e64726c --- /dev/null +++ b/ext/ply/test/yacc_badtok.exp @@ -0,0 +1 @@ +yacc.YaccError: tokens must be a list or tuple. diff --git a/ext/ply/test/yacc_badtok.py b/ext/ply/test/yacc_badtok.py new file mode 100644 index 000000000..a17d26aaa --- /dev/null +++ b/ext/ply/test/yacc_badtok.py @@ -0,0 +1,68 @@ +# ----------------------------------------------------------------------------- +# yacc_badtok.py +# +# A grammar, but tokens is a bad datatype +# ----------------------------------------------------------------------------- + +import sys +sys.tracebacklimit = 0 + +tokens = "Hello" + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_dup.exp b/ext/ply/test/yacc_dup.exp new file mode 100644 index 000000000..99f3fe22c --- /dev/null +++ b/ext/ply/test/yacc_dup.exp @@ -0,0 +1,4 @@ +./yacc_dup.py:25: Function p_statement redefined. Previously defined on line 21 +yacc: Warning. Token 'EQUALS' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_dup.py b/ext/ply/test/yacc_dup.py new file mode 100644 index 000000000..557cd0ae1 --- /dev/null +++ b/ext/ply/test/yacc_dup.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_dup.py +# +# Duplicated rule name +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error1.exp b/ext/ply/test/yacc_error1.exp new file mode 100644 index 000000000..980fc905c --- /dev/null +++ b/ext/ply/test/yacc_error1.exp @@ -0,0 +1 @@ +yacc.YaccError: ./yacc_error1.py:59: p_error() requires 1 argument. diff --git a/ext/ply/test/yacc_error1.py b/ext/ply/test/yacc_error1.py new file mode 100644 index 000000000..413004520 --- /dev/null +++ b/ext/ply/test/yacc_error1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t,s): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error2.exp b/ext/ply/test/yacc_error2.exp new file mode 100644 index 000000000..d0573b4dd --- /dev/null +++ b/ext/ply/test/yacc_error2.exp @@ -0,0 +1 @@ +yacc.YaccError: ./yacc_error2.py:59: p_error() requires 1 argument. diff --git a/ext/ply/test/yacc_error2.py b/ext/ply/test/yacc_error2.py new file mode 100644 index 000000000..d4fd1d219 --- /dev/null +++ b/ext/ply/test/yacc_error2.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error3.exp b/ext/ply/test/yacc_error3.exp new file mode 100644 index 000000000..31eaee754 --- /dev/null +++ b/ext/ply/test/yacc_error3.exp @@ -0,0 +1 @@ +yacc.YaccError: 'p_error' defined, but is not a function. diff --git a/ext/ply/test/yacc_error3.py b/ext/ply/test/yacc_error3.py new file mode 100644 index 000000000..7093fab48 --- /dev/null +++ b/ext/ply/test/yacc_error3.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +p_error = "blah" + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_inf.exp b/ext/ply/test/yacc_inf.exp new file mode 100644 index 000000000..a7f47dada --- /dev/null +++ b/ext/ply/test/yacc_inf.exp @@ -0,0 +1,5 @@ +yacc: Warning. Token 'NUMBER' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Infinite recursion detected for symbol 'statement'. +yacc: Infinite recursion detected for symbol 'expression'. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_inf.py b/ext/ply/test/yacc_inf.py new file mode 100644 index 000000000..885e2c4df --- /dev/null +++ b/ext/ply/test/yacc_inf.py @@ -0,0 +1,55 @@ +# ----------------------------------------------------------------------------- +# yacc_inf.py +# +# Infinite recursion +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_missing1.exp b/ext/ply/test/yacc_missing1.exp new file mode 100644 index 000000000..065d6a54a --- /dev/null +++ b/ext/ply/test/yacc_missing1.exp @@ -0,0 +1,2 @@ +./yacc_missing1.py:22: Symbol 'location' used, but not defined as a token or a rule. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_missing1.py b/ext/ply/test/yacc_missing1.py new file mode 100644 index 000000000..e63904d0e --- /dev/null +++ b/ext/ply/test/yacc_missing1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_missing1.py +# +# Grammar with a missing rule +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : location EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_nodoc.exp b/ext/ply/test/yacc_nodoc.exp new file mode 100644 index 000000000..3f52a3287 --- /dev/null +++ b/ext/ply/test/yacc_nodoc.exp @@ -0,0 +1,2 @@ +./yacc_nodoc.py:25: No documentation string specified in function 'p_statement_expr' +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_nodoc.py b/ext/ply/test/yacc_nodoc.py new file mode 100644 index 000000000..e3941bdaa --- /dev/null +++ b/ext/ply/test/yacc_nodoc.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_nodoc.py +# +# Rule with a missing doc-string +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_noerror.exp b/ext/ply/test/yacc_noerror.exp new file mode 100644 index 000000000..986fa31fa --- /dev/null +++ b/ext/ply/test/yacc_noerror.exp @@ -0,0 +1,2 @@ +yacc: Warning. no p_error() function is defined. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_noerror.py b/ext/ply/test/yacc_noerror.py new file mode 100644 index 000000000..d92f48ea6 --- /dev/null +++ b/ext/ply/test/yacc_noerror.py @@ -0,0 +1,64 @@ +# ----------------------------------------------------------------------------- +# yacc_noerror.py +# +# No p_error() rule defined. +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_nop.exp b/ext/ply/test/yacc_nop.exp new file mode 100644 index 000000000..062878b9e --- /dev/null +++ b/ext/ply/test/yacc_nop.exp @@ -0,0 +1,2 @@ +./yacc_nop.py:25: Warning. Possible grammar rule 'statement_expr' defined without p_ prefix. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_nop.py b/ext/ply/test/yacc_nop.py new file mode 100644 index 000000000..c599ffd5d --- /dev/null +++ b/ext/ply/test/yacc_nop.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_nop.py +# +# Possible grammar rule defined without p_ prefix +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_notfunc.exp b/ext/ply/test/yacc_notfunc.exp new file mode 100644 index 000000000..271167341 --- /dev/null +++ b/ext/ply/test/yacc_notfunc.exp @@ -0,0 +1,4 @@ +yacc: Warning. 'p_statement_assign' not defined as a function +yacc: Warning. Token 'EQUALS' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_notfunc.py b/ext/ply/test/yacc_notfunc.py new file mode 100644 index 000000000..f61663d60 --- /dev/null +++ b/ext/ply/test/yacc_notfunc.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_notfunc.py +# +# p_rule not defined as a function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +p_statement_assign = "Blah" + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_notok.exp b/ext/ply/test/yacc_notok.exp new file mode 100644 index 000000000..708f6f597 --- /dev/null +++ b/ext/ply/test/yacc_notok.exp @@ -0,0 +1 @@ +yacc.YaccError: module does not define a list 'tokens' diff --git a/ext/ply/test/yacc_notok.py b/ext/ply/test/yacc_notok.py new file mode 100644 index 000000000..dfa0059be --- /dev/null +++ b/ext/ply/test/yacc_notok.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_notok.py +# +# A grammar, but we forgot to import the tokens list +# ----------------------------------------------------------------------------- + +import sys +sys.tracebacklimit = 0 + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_rr.exp b/ext/ply/test/yacc_rr.exp new file mode 100644 index 000000000..0ec556d16 --- /dev/null +++ b/ext/ply/test/yacc_rr.exp @@ -0,0 +1,2 @@ +yacc: Generating SLR parsing table... +yacc: 1 reduce/reduce conflict diff --git a/ext/ply/test/yacc_rr.py b/ext/ply/test/yacc_rr.py new file mode 100644 index 000000000..c061c2c17 --- /dev/null +++ b/ext/ply/test/yacc_rr.py @@ -0,0 +1,71 @@ +# ----------------------------------------------------------------------------- +# yacc_rr.py +# +# A grammar with a reduce/reduce conflict +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_assign_2(t): + 'statement : NAME EQUALS NUMBER' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_simple.exp b/ext/ply/test/yacc_simple.exp new file mode 100644 index 000000000..de7964b6f --- /dev/null +++ b/ext/ply/test/yacc_simple.exp @@ -0,0 +1 @@ +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_simple.py b/ext/ply/test/yacc_simple.py new file mode 100644 index 000000000..7b4b40b17 --- /dev/null +++ b/ext/ply/test/yacc_simple.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_sr.exp b/ext/ply/test/yacc_sr.exp new file mode 100644 index 000000000..7225ad94b --- /dev/null +++ b/ext/ply/test/yacc_sr.exp @@ -0,0 +1,2 @@ +yacc: Generating SLR parsing table... +yacc: 20 shift/reduce conflicts diff --git a/ext/ply/test/yacc_sr.py b/ext/ply/test/yacc_sr.py new file mode 100644 index 000000000..4341f6997 --- /dev/null +++ b/ext/ply/test/yacc_sr.py @@ -0,0 +1,62 @@ +# ----------------------------------------------------------------------------- +# yacc_sr.py +# +# A grammar with shift-reduce conflicts +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_term1.exp b/ext/ply/test/yacc_term1.exp new file mode 100644 index 000000000..422d2bacd --- /dev/null +++ b/ext/ply/test/yacc_term1.exp @@ -0,0 +1,2 @@ +./yacc_term1.py:22: Illegal rule name 'NUMBER'. Already defined as a token. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_term1.py b/ext/ply/test/yacc_term1.py new file mode 100644 index 000000000..97a2e7a60 --- /dev/null +++ b/ext/ply/test/yacc_term1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_term1.py +# +# Terminal used on the left-hand-side +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'NUMBER : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_unused.exp b/ext/ply/test/yacc_unused.exp new file mode 100644 index 000000000..390754de3 --- /dev/null +++ b/ext/ply/test/yacc_unused.exp @@ -0,0 +1,4 @@ +./yacc_unused.py:60: Symbol 'COMMA' used, but not defined as a token or a rule. +yacc: Symbol 'COMMA' is unreachable. +yacc: Symbol 'exprlist' is unreachable. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_unused.py b/ext/ply/test/yacc_unused.py new file mode 100644 index 000000000..4cbd63327 --- /dev/null +++ b/ext/ply/test/yacc_unused.py @@ -0,0 +1,76 @@ +# ----------------------------------------------------------------------------- +# yacc_unused.py +# +# A grammar with an unused rule +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_expr_list(t): + 'exprlist : exprlist COMMA expression' + pass + +def p_expr_list_2(t): + 'exprlist : expression' + pass + + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_uprec.exp b/ext/ply/test/yacc_uprec.exp new file mode 100644 index 000000000..b1a71a250 --- /dev/null +++ b/ext/ply/test/yacc_uprec.exp @@ -0,0 +1,2 @@ +./yacc_uprec.py:35: Nothing known about the precedence of 'UMINUS' +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_uprec.py b/ext/ply/test/yacc_uprec.py new file mode 100644 index 000000000..139ce6318 --- /dev/null +++ b/ext/ply/test/yacc_uprec.py @@ -0,0 +1,62 @@ +# ----------------------------------------------------------------------------- +# yacc_uprec.py +# +# A grammar with a bad %prec specifier +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/yacc.py b/ext/ply/yacc.py new file mode 100644 index 000000000..1041745ed --- /dev/null +++ b/ext/ply/yacc.py @@ -0,0 +1,1846 @@ +#----------------------------------------------------------------------------- +# ply: yacc.py +# +# Author: David M. Beazley (beazley@cs.uchicago.edu) +# Department of Computer Science +# University of Chicago +# Chicago, IL 60637 +# +# Copyright (C) 2001, David M. Beazley +# +# $Header: /home/stever/bk/newmem2/ext/ply/yacc.py 1.3 03/06/06 14:59:28-00:00 stever@ $ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. Roughly speaking, this module is a cross between +# John Aycock's Spark system and the GNU bison utility. +# +# Disclaimer: This is a work in progress. SLR parsing seems to work fairly +# well and there is extensive error checking. LALR(1) is in progress. The +# rest of this file is a bit of a mess. Please pardon the dust. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +#----------------------------------------------------------------------------- + +__version__ = "1.3" + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = 1 # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'SLR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +import re, types, sys, cStringIO, md5, os.path + +# Exception raised for yacc-related errors +class YaccError(Exception): pass + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) + +class YaccSymbol: + def __str__(self): return self.type + def __repr__(self): return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. + +class YaccSlice: + def __init__(self,s): + self.slice = s + self.pbstack = [] + + def __getitem__(self,n): + return self.slice[n].value + + def __setitem__(self,n,v): + self.slice[n].value = v + + def __len__(self): + return len(self.slice) + + def lineno(self,n): + return getattr(self.slice[n],"lineno",0) + + def linespan(self,n): + startline = getattr(self.slice[n],"lineno",0) + endline = getattr(self.slice[n],"endlineno",startline) + return startline,endline + + def pushback(self,n): + if n <= 0: + raise ValueError, "Expected a positive value" + if n > (len(self.slice)-1): + raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) + for i in range(0,n): + self.pbstack.append(self.slice[-i-1]) + +# The LR Parsing engine. This is defined as a class so that multiple parsers +# can exist in the same process. A user never instantiates this directly. +# Instead, the global yacc() function should be used to create a suitable Parser +# object. + +class Parser: + def __init__(self,magic=None): + + # This is a hack to keep users from trying to instantiate a Parser + # object directly. + + if magic != "xyzzy": + raise YaccError, "Can't instantiate Parser. Use yacc() instead." + + # Reset internal state + self.productions = None # List of productions + self.errorfunc = None # Error handling function + self.action = { } # LR Action table + self.goto = { } # LR goto table + self.require = { } # Attribute require table + self.method = "Unknown LR" # Table construction method used + + def errok(self): + self.errorcount = 0 + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$' + self.symstack.append(sym) + self.statestack.append(0) + + def parse(self,input=None,lexer=None,debug=0): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table + goto = self.goto # Local reference to goto table + prod = self.productions # Local reference to production list + pslice = YaccSlice(None) # Slice object passed to grammar rules + pslice.parser = self # Parser object + self.errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + import lex as lexer + + pslice.lexer = lexer + + # If input was supplied, pass to lexer + if input: + lexer.input(input) + + # Tokenize function + get_token = lexer.token + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + errtoken = None # Err token + + # The start state is assumed to be (0,$) + statestack.append(0) + sym = YaccSymbol() + sym.type = '$' + symstack.append(sym) + + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$' + if debug: + print "%-20s : %s" % (lookahead, [xx.type for xx in symstack]) + + # Check the action table + s = statestack[-1] + ltype = lookahead.type + t = actions.get((s,ltype),None) + + if t is not None: + if t > 0: + # shift a symbol on the stack + if ltype == '$': + # Error, end of input + print "yacc: Parse error. EOF" + return + statestack.append(t) + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if self.errorcount > 0: + self.errorcount -= 1 + + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + try: + sym.lineno = targ[1].lineno + sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) + except AttributeError: + sym.lineno = 0 + del symstack[-plen:] + del statestack[-plen:] + else: + sym.lineno = 0 + targ = [ sym ] + pslice.slice = targ + pslice.pbstack = [] + # Call the grammar rule with our special slice object + p.func(pslice) + + # Validate attributes of the resulting value attribute +# if require: +# try: +# t0 = targ[0] +# r = Requires.get(t0.type,None) +# t0d = t0.__dict__ +# if r: +# for field in r: +# tn = t0 +# for fname in field: +# try: +# tf = tn.__dict__ +# tn = tf.get(fname) +# except StandardError: +# tn = None +# if not tn: +# print "%s:%d: Rule %s doesn't set required attribute '%s'" % \ +# (p.file,p.line,p.name,".".join(field)) +# except TypeError,LookupError: +# print "Bad requires directive " % r +# pass + + + # If there was a pushback, put that on the stack + if pslice.pbstack: + lookaheadstack.append(lookahead) + for _t in pslice.pbstack: + lookaheadstack.append(_t) + lookahead = None + + symstack.append(sym) + statestack.append(goto[statestack[-1],pname]) + continue + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + + if t == None: + # We have some kind of parsing error here. To handle this, + # we are going to push the current token onto the tokenstack + # and replace it with an 'error' token. If there are any synchronization + # rules, they may catch it. + # + # In addition to pushing the error token, we call call the user defined p_error() + # function if this is the first syntax error. This function is only called + # if errorcount == 0. + + if not self.errorcount: + self.errorcount = error_count + errtoken = lookahead + if errtoken.type == '$': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if not self.errorcount: + # User must have done some kind of panic mode recovery on their own. The returned token + # is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type) + else: + print "yacc: Syntax error, token=%s" % errtoken.type + else: + print "yacc: Parse error in input. EOF" + return + + else: + self.errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$': + lookahead = None + errtoken = None + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + + continue + + # Call an error function here + raise RuntimeError, "yacc: internal parser error!!!\n" + +# ----------------------------------------------------------------------------- +# === Parser Construction === +# +# The following functions and variables are used to implement the yacc() function +# itself. This is pretty hairy stuff involving lots of error checking, +# construction of LR items, kernels, and so forth. Although a lot of +# this work is done using global variables, the resulting Parser object +# is completely self contained--meaning that it is safe to repeatedly +# call yacc() with different grammars in the same application. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This function checks to see if there are duplicated p_rulename() functions +# in the parser module file. Without this function, it is really easy for +# users to make mistakes by cutting and pasting code fragments (and it's a real +# bugger to try and figure out why the resulting parser doesn't work). Therefore, +# we just do a little regular expression pattern matching of def statements +# to try and detect duplicates. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea. Assume it's okay. + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev) + noerror = 0 + linen += 1 + return noerror + +# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. +def validate_dict(d): + for n,v in d.items(): + if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue + if n[0:2] == 't_': continue + + if n[0:2] == 'p_': + print "yacc: Warning. '%s' not defined as a function" % n + if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: + try: + doc = v.__doc__.split(" ") + if doc[1] == ':': + print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n) + except StandardError: + pass + +# ----------------------------------------------------------------------------- +# === GRAMMAR FUNCTIONS === +# +# The following global variables and functions are used to store, manipulate, +# and verify the grammar rules specified by the user. +# ----------------------------------------------------------------------------- + +# Initialize all of the global variables used during grammar construction +def initialize_vars(): + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + Prodmap = { } # A dictionary that is only used to detect duplicate + # productions. + + Terminals = { } # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + Nonterminals = { } # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + First = { } # A dictionary of precomputed FIRST(x) symbols + + Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + + Precedence = { } # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + LRitems = [ ] # A list of all LR items for the grammar. These are the + # productions with the "dot" like E -> E . PLUS E + + Errorfunc = None # User defined error handler + + Signature = md5.new() # Digital signature of the grammar rules, precedence + # and other information. Used to determined when a + # parsing table needs to be regenerated. + + Requires = { } # Requires list + + # File objects used when creating the parser.out debugging file + global _vf, _vfc + _vf = cStringIO.StringIO() + _vfc = cStringIO.StringIO() + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# It has a few required attributes: +# +# name - Name of the production (nonterminal) +# prod - A list of symbols making up its production +# number - Production number. +# +# In addition, a few additional attributes are used to help with debugging or +# optimization of table generation. +# +# file - File where production action is defined. +# lineno - Line number where action is defined +# func - Action function +# prec - Precedence level +# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' +# then lr_next refers to 'E -> E PLUS . E' +# lr_index - LR item index (location of the ".") in the prod list. +# len - Length of the production (number of symbols on right hand side) +# ----------------------------------------------------------------------------- + +class Production: + def __init__(self,**kw): + for k,v in kw.items(): + setattr(self,k,v) + self.lr_index = -1 + self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure + self.usyms = [ ] + + def __str__(self): + if self.prod: + s = "%s -> %s" % (self.name," ".join(self.prod)) + else: + s = "%s -> <empty>" % self.name + return s + + def __repr__(self): + return str(self) + + # Compute lr_items from the production + def lr_item(self,n): + if n > len(self.prod): return None + p = Production() + p.name = self.name + p.prod = list(self.prod) + p.number = self.number + p.lr_index = n + p.prod.insert(n,".") + p.prod = tuple(p.prod) + p.len = len(p.prod) + p.usyms = self.usyms + + # Precompute list of productions immediately following + try: + p.lrafter = Prodnames[p.prod[n+1]] + except (IndexError,KeyError),e: + p.lrafter = [] + try: + p.lrbefore = p.prod[n-1] + except IndexError: + p.lrbefore = None + + return p + +class MiniProduction: + pass + +# Utility function +def is_identifier(s): + for c in s: + if not (c.isalnum() or c == '_'): return 0 + return 1 + +# ----------------------------------------------------------------------------- +# add_production() +# +# Given an action function, this function assembles a production rule. +# The production rule is assumed to be found in the function's docstring. +# This rule has the general syntax: +# +# name1 ::= production1 +# | production2 +# | production3 +# ... +# | productionn +# name2 ::= production1 +# | production2 +# ... +# ----------------------------------------------------------------------------- + +def add_production(f,file,line,prodname,syms): + + if Terminals.has_key(prodname): + print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname) + return -1 + if prodname == 'error': + print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname) + return -1 + + if not is_identifier(prodname): + print "%s:%d: Illegal rule name '%s'" % (file,line,prodname) + return -1 + + for s in syms: + if not is_identifier(s) and s != '%prec': + print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname) + return -1 + + # See if the rule is already in the rulemap + map = "%s -> %s" % (prodname,syms) + if Prodmap.has_key(map): + m = Prodmap[map] + print "%s:%d: Duplicate rule %s." % (file,line, m) + print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line) + return -1 + + p = Production() + p.name = prodname + p.prod = syms + p.file = file + p.line = line + p.func = f + p.number = len(Productions) + + + Productions.append(p) + Prodmap[map] = p + if not Nonterminals.has_key(prodname): + Nonterminals[prodname] = [ ] + + # Add all terminals to Terminals + i = 0 + while i < len(p.prod): + t = p.prod[i] + if t == '%prec': + try: + precname = p.prod[i+1] + except IndexError: + print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line) + return -1 + + prec = Precedence.get(precname,None) + if not prec: + print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname) + return -1 + else: + p.prec = prec + del p.prod[i] + del p.prod[i] + continue + + if Terminals.has_key(t): + Terminals[t].append(p.number) + # Is a terminal. We'll assign a precedence to p based on this + if not hasattr(p,"prec"): + p.prec = Precedence.get(t,('right',0)) + else: + if not Nonterminals.has_key(t): + Nonterminals[t] = [ ] + Nonterminals[t].append(p.number) + i += 1 + + if not hasattr(p,"prec"): + p.prec = ('right',0) + + # Set final length of productions + p.len = len(p.prod) + p.prod = tuple(p.prod) + + # Calculate unique syms in the production + p.usyms = [ ] + for s in p.prod: + if s not in p.usyms: + p.usyms.append(s) + + # Add to the global productions list + try: + Prodnames[p.name].append(p) + except KeyError: + Prodnames[p.name] = [ p ] + return 0 + +# Given a raw rule function, this function rips out its doc string +# and adds rules to the grammar + +def add_function(f): + line = f.func_code.co_firstlineno + file = f.func_code.co_filename + error = 0 + + if f.func_code.co_argcount > 1: + print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) + return -1 + + if f.func_code.co_argcount < 1: + print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) + return -1 + + if f.__doc__: + # Split the doc string into lines + pstrings = f.__doc__.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + print "%s:%d: Misplaced '|'." % (file,dline) + return -1 + prodname = lastp + if len(p) > 1: + syms = p[1:] + else: + syms = [ ] + else: + prodname = p[0] + lastp = prodname + assign = p[1] + if len(p) > 2: + syms = p[2:] + else: + syms = [ ] + if assign != ':' and assign != '::=': + print "%s:%d: Syntax error. Expected ':'" % (file,dline) + return -1 + e = add_production(f,file,dline,prodname,syms) + error += e + except StandardError: + print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps) + error -= 1 + else: + print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__) + return error + + +# Cycle checking code (Michael Dyck) + +def compute_reachable(): + ''' + Find each symbol that can be reached from the start symbol. + Print a warning for any nonterminals that can't be reached. + (Unused terminals have already had their warning.) + ''' + Reachable = { } + for s in Terminals.keys() + Nonterminals.keys(): + Reachable[s] = 0 + + mark_reachable_from( Productions[0].prod[0], Reachable ) + + for s in Nonterminals.keys(): + if not Reachable[s]: + print "yacc: Symbol '%s' is unreachable." % s + +def mark_reachable_from(s, Reachable): + ''' + Mark all symbols that are reachable from symbol s. + ''' + if Reachable[s]: + # We've already reached symbol s. + return + Reachable[s] = 1 + for p in Prodnames.get(s,[]): + for r in p.prod: + mark_reachable_from(r, Reachable) + +# ----------------------------------------------------------------------------- +# compute_terminates() +# +# This function looks at the various parsing rules and tries to detect +# infinite recursion cycles (grammar rules where there is no possible way +# to derive a string of only terminals). +# ----------------------------------------------------------------------------- +def compute_terminates(): + ''' + Raise an error for any symbols that don't terminate. + ''' + Terminates = {} + + # Terminals: + for t in Terminals.keys(): + Terminates[t] = 1 + + Terminates['$'] = 1 + + # Nonterminals: + + # Initialize to false: + for n in Nonterminals.keys(): + Terminates[n] = 0 + + # Then propagate termination until no change: + while 1: + some_change = 0 + for (n,pl) in Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not Terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = 0 + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = 1 + + if p_terminates: + # symbol n terminates! + if not Terminates[n]: + Terminates[n] = 1 + some_change = 1 + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + some_error = 0 + for (s,terminates) in Terminates.items(): + if not terminates: + if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + print "yacc: Infinite recursion detected for symbol '%s'." % s + some_error = 1 + + return some_error + +# ----------------------------------------------------------------------------- +# verify_productions() +# +# This function examines all of the supplied rules to see if they seem valid. +# ----------------------------------------------------------------------------- +def verify_productions(cycle_check=1): + error = 0 + for p in Productions: + if not p: continue + + for s in p.prod: + if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': + print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s) + error = 1 + continue + + unused_tok = 0 + # Now verify all of the tokens + if yaccdebug: + _vf.write("Unused terminals:\n\n") + for s,v in Terminals.items(): + if s != 'error' and not v: + print "yacc: Warning. Token '%s' defined, but not used." % s + if yaccdebug: _vf.write(" %s\n"% s) + unused_tok += 1 + + # Print out all of the productions + if yaccdebug: + _vf.write("\nGrammar\n\n") + for i in range(1,len(Productions)): + _vf.write("Rule %-5d %s\n" % (i, Productions[i])) + + unused_prod = 0 + # Verify the use of all productions + for s,v in Nonterminals.items(): + if not v: + p = Prodnames[s][0] + print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s) + unused_prod += 1 + + + if unused_tok == 1: + print "yacc: Warning. There is 1 unused token." + if unused_tok > 1: + print "yacc: Warning. There are %d unused tokens." % unused_tok + + if unused_prod == 1: + print "yacc: Warning. There is 1 unused rule." + if unused_prod > 1: + print "yacc: Warning. There are %d unused rules." % unused_prod + + if yaccdebug: + _vf.write("\nTerminals, with rules where they appear\n\n") + ks = Terminals.keys() + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) + _vf.write("\nNonterminals, with rules where they appear\n\n") + ks = Nonterminals.keys() + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) + + if (cycle_check): + compute_reachable() + error += compute_terminates() +# error += check_cycles() + return error + +# ----------------------------------------------------------------------------- +# build_lritems() +# +# This function walks the list of productions and builds a complete set of the +# LR items. The LR items are stored in two ways: First, they are uniquely +# numbered and placed in the list _lritems. Second, a linked list of LR items +# is built for each production. For example: +# +# E -> E PLUS E +# +# Creates the list +# +# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] +# ----------------------------------------------------------------------------- + +def build_lritems(): + for p in Productions: + lastlri = p + lri = p.lr_item(0) + i = 0 + while 1: + lri = p.lr_item(i) + lastlri.lr_next = lri + if not lri: break + lri.lr_num = len(LRitems) + LRitems.append(lri) + lastlri = lri + i += 1 + + # In order for the rest of the parser generator to work, we need to + # guarantee that no more lritems are generated. Therefore, we nuke + # the p.lr_item method. (Only used in debugging) + # Production.lr_item = None + +# ----------------------------------------------------------------------------- +# add_precedence() +# +# Given a list of precedence rules, add to the precedence table. +# ----------------------------------------------------------------------------- + +def add_precedence(plist): + plevel = 0 + error = 0 + for p in plist: + plevel += 1 + try: + prec = p[0] + terms = p[1:] + if prec != 'left' and prec != 'right' and prec != 'nonassoc': + print "yacc: Invalid precedence '%s'" % prec + return -1 + for t in terms: + if Precedence.has_key(t): + print "yacc: Precedence already specified for terminal '%s'" % t + error += 1 + continue + Precedence[t] = (prec,plevel) + except: + print "yacc: Invalid precedence table." + error += 1 + + return error + +# ----------------------------------------------------------------------------- +# augment_grammar() +# +# Compute the augmented grammar. This is just a rule S' -> start where start +# is the starting symbol. +# ----------------------------------------------------------------------------- + +def augment_grammar(start=None): + if not start: + start = Productions[1].name + Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) + Productions[0].usyms = [ start ] + Nonterminals[start].append(0) + + +# ------------------------------------------------------------------------- +# first() +# +# Compute the value of FIRST1(beta) where beta is a tuple of symbols. +# +# During execution of compute_first1, the result may be incomplete. +# Afterward (e.g., when called from compute_follow()), it will be complete. +# ------------------------------------------------------------------------- +def first(beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [ ] + for x in beta: + x_produces_empty = 0 + + # Add all the non-<empty> symbols of First[x] to the result. + for f in First[x]: + if f == '<empty>': + x_produces_empty = 1 + else: + if f not in result: result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('<empty>') + + return result + + +# FOLLOW(x) +# Given a non-terminal. This function computes the set of all symbols +# that might follow it. Dragon book, p. 189. + +def compute_follow(start=None): + # Add '$' to the follow list of the start symbol + for k in Nonterminals.keys(): + Follow[k] = [ ] + + if not start: + start = Productions[1].name + + Follow[start] = [ '$' ] + + while 1: + didadd = 0 + for p in Productions[1:]: + # Here is the production set + for i in range(len(p.prod)): + B = p.prod[i] + if Nonterminals.has_key(B): + # Okay. We got a non-terminal in a production + fst = first(p.prod[i+1:]) + hasempty = 0 + for f in fst: + if f != '<empty>' and f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if f == '<empty>': + hasempty = 1 + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in Follow[p.name]: + if f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if not didadd: break + + if 0 and yaccdebug: + _vf.write('\nFollow:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) + +# ------------------------------------------------------------------------- +# compute_first1() +# +# Compute the value of FIRST1(X) for all symbols +# ------------------------------------------------------------------------- +def compute_first1(): + + # Terminals: + for t in Terminals.keys(): + First[t] = [t] + + First['$'] = ['$'] + First['#'] = ['#'] # what's this for? + + # Nonterminals: + + # Initialize to the empty set: + for n in Nonterminals.keys(): + First[n] = [] + + # Then propagate symbols until no change: + while 1: + some_change = 0 + for n in Nonterminals.keys(): + for p in Prodnames[n]: + for f in first(p.prod): + if f not in First[n]: + First[n].append( f ) + some_change = 1 + if not some_change: + break + + if 0 and yaccdebug: + _vf.write('\nFirst:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % + (k, " ".join([str(s) for s in First[k]]))) + +# ----------------------------------------------------------------------------- +# === SLR Generation === +# +# The following functions are used to construct SLR (Simple LR) parsing tables +# as described on p.221-229 of the dragon book. +# ----------------------------------------------------------------------------- + +# Global variables for the LR parsing engine +def lr_init_vars(): + global _lr_action, _lr_goto, _lr_method + global _lr_goto_cache + + _lr_action = { } # Action table + _lr_goto = { } # Goto table + _lr_method = "Unknown" # LR method used + _lr_goto_cache = { } + +# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. +# prodlist is a list of productions. + +_add_count = 0 # Counter used to detect cycles + +def lr0_closure(I): + global _add_count + + _add_count += 1 + prodlist = Productions + + # Add everything in I to J + J = I[:] + didadd = 1 + while didadd: + didadd = 0 + for j in J: + for x in j.lrafter: + if x.lr0_added == _add_count: continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = _add_count + didadd = 1 + + return J + +# Compute the LR(0) goto function goto(I,X) where I is a set +# of LR(0) items and X is a grammar symbol. This function is written +# in a way that guarantees uniqueness of the generated goto sets +# (i.e. the same goto set will never be returned as two different Python +# objects). With uniqueness, we can later do fast set comparisons using +# id(obj) instead of element-wise comparison. + +def lr0_goto(I,x): + # First we look for a previously cached entry + g = _lr_goto_cache.get((id(I),x),None) + if g: return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = _lr_goto_cache.get(x,None) + if not s: + s = { } + _lr_goto_cache[x] = s + + gs = [ ] + for p in I: + n = p.lr_next + if n and n.lrbefore == x: + s1 = s.get(id(n),None) + if not s1: + s1 = { } + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$',None) + if not g: + if gs: + g = lr0_closure(gs) + s['$'] = g + else: + s['$'] = gs + _lr_goto_cache[(id(I),x)] = g + return g + +# Compute the kernel of a set of LR(0) items +def lr0_kernel(I): + KI = [ ] + for p in I: + if p.name == "S'" or p.lr_index > 0 or p.len == 0: + KI.append(p) + + return KI + +_lr0_cidhash = { } + +# Compute the LR(0) sets of item function +def lr0_items(): + + C = [ lr0_closure([Productions[0].lr_next]) ] + i = 0 + for I in C: + _lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = { } + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms.keys(): + g = lr0_goto(I,x) + if not g: continue + if _lr0_cidhash.has_key(id(g)): continue + _lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + +# ----------------------------------------------------------------------------- +# slr_parse_table() +# +# This function constructs an SLR table. +# ----------------------------------------------------------------------------- +def slr_parse_table(): + global _lr_method + goto = _lr_goto # Goto array + action = _lr_action # Action array + actionp = { } # Action production array (temporary) + + _lr_method = "SLR" + + n_srconflict = 0 + n_rrconflict = 0 + + if yaccdebug: + _vf.write("\n\nParsing method: SLR\n\n") + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = lr0_items() + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [ ] # List of actions + + if yaccdebug: + _vf.write("\nstate %d\n\n" % st) + for p in I: + _vf.write(" (%d) %s\n" % (p.number, str(p))) + _vf.write("\n") + + for p in I: + try: + if p.prod[-1] == ".": + if p.name == "S'": + # Start symbol. Accept! + action[st,"$"] = 0 + actionp[st,"$"] = p + else: + # We are at the end of a production. Reduce! + for a in Follow[p.name]: + actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) + r = action.get((st,a),None) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + sprec,slevel = Productions[actionp[st,a].number].prec + rprec,rlevel = Precedence.get(a,('right',0)) + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + action[st,a] = -p.number + actionp[st,a] = p + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + n_srconflict += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the shift + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + n_srconflict +=1 + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + action[st,a] = -p.number + actionp[st,a] = p + # print "Reduce/reduce conflict in state %d" % st + n_rrconflict += 1 + _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) + _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) + else: + print "Unknown conflict in state %d" % st + else: + action[st,a] = -p.number + actionp[st,a] = p + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if Terminals.has_key(a): + g = lr0_goto(I,a) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + # We are in a shift state + actlist.append((a,p,"shift and go to state %d" % j)) + r = action.get((st,a),None) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + print "Shift/shift conflict in state %d" % st + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + rprec,rlevel = Productions[actionp[st,a].number].prec + sprec,slevel = Precedence.get(a,('right',0)) + if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): + # We decide to shift here... highest precedence to shift + action[st,a] = j + actionp[st,a] = p + if not slevel and not rlevel: + n_srconflict += 1 + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + n_srconflict +=1 + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + + else: + print "Unknown conflict in state %d" % st + else: + action[st,a] = j + actionp[st,a] = p + + except StandardError,e: + raise YaccError, "Hosed in slr_parse_table", e + + # Print the actions associated with each terminal + if yaccdebug: + for a,p,m in actlist: + if action.has_key((st,a)): + if p is actionp[st,a]: + _vf.write(" %-15s %s\n" % (a,m)) + _vf.write("\n") + for a,p,m in actlist: + if action.has_key((st,a)): + if p is not actionp[st,a]: + _vf.write(" ! %-15s [ %s ]\n" % (a,m)) + + # Construct the goto table for this state + if yaccdebug: + _vf.write("\n") + nkeys = { } + for ii in I: + for s in ii.usyms: + if Nonterminals.has_key(s): + nkeys[s] = None + for n in nkeys.keys(): + g = lr0_goto(I,n) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + goto[st,n] = j + if yaccdebug: + _vf.write(" %-15s shift and go to state %d\n" % (n,j)) + + st += 1 + + if n_srconflict == 1: + print "yacc: %d shift/reduce conflict" % n_srconflict + if n_srconflict > 1: + print "yacc: %d shift/reduce conflicts" % n_srconflict + if n_rrconflict == 1: + print "yacc: %d reduce/reduce conflict" % n_rrconflict + if n_rrconflict > 1: + print "yacc: %d reduce/reduce conflicts" % n_rrconflict + + +# ----------------------------------------------------------------------------- +# ==== LALR(1) Parsing ==== +# **** UNFINISHED! 6/16/01 +# ----------------------------------------------------------------------------- + + +# Compute the lr1_closure of a set I. I is a list of tuples (p,a) where +# p is a LR0 item and a is a terminal + +_lr1_add_count = 0 + +def lr1_closure(I): + global _lr1_add_count + + _lr1_add_count += 1 + + J = I[:] + + # Loop over items (p,a) in I. + ji = 0 + while ji < len(J): + p,a = J[ji] + # p = [ A -> alpha . B beta] + + # For each production B -> gamma + for B in p.lr1_after: + f = tuple(p.lr1_beta + (a,)) + + # For each terminal b in first(Beta a) + for b in first(f): + # Check if (B -> . gamma, b) is in J + # Only way this can happen is if the add count mismatches + pn = B.lr_next + if pn.lr_added.get(b,0) == _lr1_add_count: continue + pn.lr_added[b] = _lr1_add_count + J.append((pn,b)) + ji += 1 + + return J + +def lalr_parse_table(): + + # Compute some lr1 information about all of the productions + for p in LRitems: + try: + after = p.prod[p.lr_index + 1] + p.lr1_after = Prodnames[after] + p.lr1_beta = p.prod[p.lr_index + 2:] + except LookupError: + p.lr1_after = [ ] + p.lr1_beta = [ ] + p.lr_added = { } + + # Compute the LR(0) items + C = lr0_items() + CK = [] + for I in C: + CK.append(lr0_kernel(I)) + + print CK + +# ----------------------------------------------------------------------------- +# ==== LR Utility functions ==== +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _lr_write_tables() +# +# This function writes the LR parsing tables to a file +# ----------------------------------------------------------------------------- + +def lr_write_tables(modulename=tab_module): + filename = modulename + ".py" + try: + f = open(filename,"w") + + f.write(""" +# %s +# This file is automatically generated. Do not edit. + +_lr_method = %s + +_lr_signature = %s +""" % (filename, repr(_lr_method), repr(Signature.digest()))) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = { } + + for k,v in _lr_action.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_action_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_action[(_x,_k)] = _y +del _lr_action_items +""") + + else: + f.write("\n_lr_action = { "); + for k,v in _lr_action.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + if smaller: + # Factor out names to try and make smaller + items = { } + + for k,v in _lr_goto.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_goto_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_goto[(_x,_k)] = _y +del _lr_goto_items +""") + else: + f.write("\n_lr_goto = { "); + for k,v in _lr_goto.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + # Write production table + f.write("_lr_productions = [\n") + for p in Productions: + if p: + if (p.func): + f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) + else: + f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) + else: + f.write(" None,\n") + f.write("]\n") + f.close() + + except IOError,e: + print "Unable to create '%s'" % filename + print e + return + +def lr_read_tables(module=tab_module,optimize=0): + global _lr_action, _lr_goto, _lr_productions, _lr_method + try: + exec "import %s as parsetab" % module + + if (optimize) or (Signature.digest() == parsetab._lr_signature): + _lr_action = parsetab._lr_action + _lr_goto = parsetab._lr_goto + _lr_productions = parsetab._lr_productions + _lr_method = parsetab._lr_method + return 1 + else: + return 0 + + except (ImportError,AttributeError): + return 0 + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build the parser module +# ----------------------------------------------------------------------------- + +def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0): + global yaccdebug + yaccdebug = debug + + initialize_vars() + files = { } + error = 0 + + # Add starting symbol to signature + if start: + Signature.update(start) + + # Try to figure out what module we are working with + if module: + # User supplied a module object. + if not isinstance(module, types.ModuleType): + raise ValueError,"Expected a module" + + ldict = module.__dict__ + + else: + # No module given. We might be able to get information from the caller. + # Throw an exception and unwind the traceback to get the globals + + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + # If running in optimized mode. We're going to + + if (optimize and lr_read_tables(tabmodule,1)): + # Read parse table + del Productions[:] + for p in _lr_productions: + if not p: + Productions.append(None) + else: + m = MiniProduction() + m.name = p[0] + m.len = p[1] + m.file = p[3] + m.line = p[4] + if p[2]: + m.func = ldict[p[2]] + Productions.append(m) + + else: + # Get the tokens map + tokens = ldict.get("tokens",None) + + if not tokens: + raise YaccError,"module does not define a list 'tokens'" + if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): + raise YaccError,"tokens must be a list or tuple." + + # Check to see if a requires dictionary is defined. + requires = ldict.get("require",None) + if requires: + if not (isinstance(requires,types.DictType)): + raise YaccError,"require must be a dictionary." + + for r,v in requires.items(): + try: + if not (isinstance(v,types.ListType)): + raise TypeError + v1 = [x.split(".") for x in v] + Requires[r] = v1 + except StandardError: + print "Invalid specification for rule '%s' in require. Expected a list of strings" % r + + + # Build the dictionary of terminals. We a record a 0 in the + # dictionary to track whether or not a terminal is actually + # used in the grammar + + if 'error' in tokens: + print "yacc: Illegal token 'error'. Is a reserved word." + raise YaccError,"Illegal token name" + + for n in tokens: + if Terminals.has_key(n): + print "yacc: Warning. Token '%s' multiply defined." % n + Terminals[n] = [ ] + + Terminals['error'] = [ ] + + # Get the precedence map (if any) + prec = ldict.get("precedence",None) + if prec: + if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): + raise YaccError,"precedence must be a list or tuple." + add_precedence(prec) + Signature.update(repr(prec)) + + for n in tokens: + if not Precedence.has_key(n): + Precedence[n] = ('right',0) # Default, right associative, 0 precedence + + # Look for error handler + ef = ldict.get('p_error',None) + if ef: + if not isinstance(ef,types.FunctionType): + raise YaccError,"'p_error' defined, but is not a function." + eline = ef.func_code.co_firstlineno + efile = ef.func_code.co_filename + files[efile] = None + + if (ef.func_code.co_argcount != 1): + raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) + global Errorfunc + Errorfunc = ef + else: + print "yacc: Warning. no p_error() function is defined." + + # Get the list of built-in functions with p_ prefix + symbols = [ldict[f] for f in ldict.keys() + if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_' + and ldict[f].__name__ != 'p_error')] + + # Check for non-empty symbols + if len(symbols) == 0: + raise YaccError,"no rules of the form p_rulename are defined." + + # Sort the symbols by line number + symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) + + # Add all of the symbols to the grammar + for f in symbols: + if (add_function(f)) < 0: + error += 1 + else: + files[f.func_code.co_filename] = None + + # Make a signature of the docstrings + for f in symbols: + if f.__doc__: + Signature.update(f.__doc__) + + lr_init_vars() + + if error: + raise YaccError,"Unable to construct parser." + + if not lr_read_tables(tabmodule): + + # Validate files + for filename in files.keys(): + if not validate_file(filename): + error = 1 + + # Validate dictionary + validate_dict(ldict) + + if start and not Prodnames.has_key(start): + raise YaccError,"Bad starting symbol '%s'" % start + + augment_grammar(start) + error = verify_productions(cycle_check=check_recursion) + otherfunc = [ldict[f] for f in ldict.keys() + if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')] + + if error: + raise YaccError,"Unable to construct parser." + + build_lritems() + compute_first1() + compute_follow(start) + + if method == 'SLR': + slr_parse_table() + elif method == 'LALR1': + lalr_parse_table() + return + else: + raise YaccError, "Unknown parsing method '%s'" % method + + lr_write_tables(tabmodule) + + if yaccdebug: + try: + f = open(debug_file,"w") + f.write(_vfc.getvalue()) + f.write("\n\n") + f.write(_vf.getvalue()) + f.close() + except IOError,e: + print "yacc: can't create '%s'" % debug_file,e + + # Made it here. Create a parser object and set up its internal state. + # Set global parse() method to bound method of parser object. + + p = Parser("xyzzy") + p.productions = Productions + p.errorfunc = Errorfunc + p.action = _lr_action + p.goto = _lr_goto + p.method = _lr_method + p.require = Requires + + global parse + parse = p.parse + + # Clean up all of the globals we created + if (not optimize): + yacc_cleanup() + return p + +# yacc_cleanup function. Delete all of the global variables +# used during table construction + +def yacc_cleanup(): + global _lr_action, _lr_goto, _lr_method, _lr_goto_cache + del _lr_action, _lr_goto, _lr_method, _lr_goto_cache + + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + del Productions, Prodnames, Prodmap, Terminals + del Nonterminals, First, Follow, Precedence, LRitems + del Errorfunc, Signature, Requires + + global _vf, _vfc + del _vf, _vfc + + +# Stub that raises an error if parsing is attempted without first calling yacc() +def parse(*args,**kwargs): + raise YaccError, "yacc: No parser built with yacc()" + |