From 4710c53dcad1ebf3755f3efb9e80ac24bd72a9b2 Mon Sep 17 00:00:00 2001 From: darylm503 Date: Mon, 16 Apr 2012 22:12:42 +0000 Subject: AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python 2.7.3 made them unavailable from the python.org web site. These files are a subset of the python-2.7.2.tgz distribution from python.org. Changed files from PyMod-2.7.2 have been copied into the corresponding directories of this tree, replacing the original files in the distribution. Signed-off-by: daryl.mcdaniel@intel.com git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13197 6f19259b-4bc3-4df7-8a09-765794883524 --- .../Python/Python-2.7.2/Tools/webchecker/README | 23 + .../Python-2.7.2/Tools/webchecker/tktools.py | 366 +++++++++ .../Python/Python-2.7.2/Tools/webchecker/wcgui.py | 456 +++++++++++ .../Python/Python-2.7.2/Tools/webchecker/wcmac.py | 7 + .../Python-2.7.2/Tools/webchecker/webchecker.py | 892 +++++++++++++++++++++ .../Python-2.7.2/Tools/webchecker/websucker.py | 123 +++ .../Python/Python-2.7.2/Tools/webchecker/wsgui.py | 240 ++++++ 7 files changed, 2107 insertions(+) create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/README create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/tktools.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wcgui.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wcmac.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/webchecker.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/websucker.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wsgui.py (limited to 'AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker') diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/README b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/README new file mode 100644 index 0000000000..542081c7bc --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/README @@ -0,0 +1,23 @@ +Webchecker +---------- + +This is a simple web tree checker, useful to find bad links in a web +tree. It currently checks links pointing within the same subweb for +validity. The main program is "webchecker.py". See its doc string +(or invoke it with the option "-?") for more defails. + +History: + +- Jan 1997. First release. The module robotparser.py was written by +Skip Montanaro; the rest is original work by Guido van Rossum. + +- May 1999. Sam Bayer contributed a new version, wcnew.py, which +supports checking internal links (#spam fragments in URLs) and some +other options. + +- Nov 1999. Sam Bayer contributed patches to reintegrate wcnew.py +into webchecker.py, and corresponding mods to wcgui.py and +websucker.py. + +- Mar 2004. Chris Herborth contributed a patch to let webchecker.py +handle XHTML's 'id' attribute. diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/tktools.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/tktools.py new file mode 100644 index 0000000000..af6437a9c4 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/tktools.py @@ -0,0 +1,366 @@ +"""Assorted Tk-related subroutines used in Grail.""" + + +from types import * +from Tkinter import * + +def _clear_entry_widget(event): + try: + widget = event.widget + widget.delete(0, INSERT) + except: pass +def install_keybindings(root): + root.bind_class('Entry', '', _clear_entry_widget) + + +def make_toplevel(master, title=None, class_=None): + """Create a Toplevel widget. + + This is a shortcut for a Toplevel() instantiation plus calls to + set the title and icon name of the widget. + + """ + + if class_: + widget = Toplevel(master, class_=class_) + else: + widget = Toplevel(master) + if title: + widget.title(title) + widget.iconname(title) + return widget + +def set_transient(widget, master, relx=0.5, rely=0.3, expose=1): + """Make an existing toplevel widget transient for a master. + + The widget must exist but should not yet have been placed; in + other words, this should be called after creating all the + subwidget but before letting the user interact. + """ + + widget.withdraw() # Remain invisible while we figure out the geometry + widget.transient(master) + widget.update_idletasks() # Actualize geometry information + if master.winfo_ismapped(): + m_width = master.winfo_width() + m_height = master.winfo_height() + m_x = master.winfo_rootx() + m_y = master.winfo_rooty() + else: + m_width = master.winfo_screenwidth() + m_height = master.winfo_screenheight() + m_x = m_y = 0 + w_width = widget.winfo_reqwidth() + w_height = widget.winfo_reqheight() + x = m_x + (m_width - w_width) * relx + y = m_y + (m_height - w_height) * rely + widget.geometry("+%d+%d" % (x, y)) + if expose: + widget.deiconify() # Become visible at the desired location + return widget + + +def make_scrollbars(parent, hbar, vbar, pack=1, class_=None, name=None, + takefocus=0): + + """Subroutine to create a frame with scrollbars. + + This is used by make_text_box and similar routines. + + Note: the caller is responsible for setting the x/y scroll command + properties (e.g. by calling set_scroll_commands()). + + Return a tuple containing the hbar, the vbar, and the frame, where + hbar and vbar are None if not requested. + + """ + if class_: + if name: frame = Frame(parent, class_=class_, name=name) + else: frame = Frame(parent, class_=class_) + else: + if name: frame = Frame(parent, name=name) + else: frame = Frame(parent) + + if pack: + frame.pack(fill=BOTH, expand=1) + + corner = None + if vbar: + if not hbar: + vbar = Scrollbar(frame, takefocus=takefocus) + vbar.pack(fill=Y, side=RIGHT) + else: + vbarframe = Frame(frame, borderwidth=0) + vbarframe.pack(fill=Y, side=RIGHT) + vbar = Scrollbar(frame, name="vbar", takefocus=takefocus) + vbar.pack(in_=vbarframe, expand=1, fill=Y, side=TOP) + sbwidth = vbar.winfo_reqwidth() + corner = Frame(vbarframe, width=sbwidth, height=sbwidth) + corner.propagate(0) + corner.pack(side=BOTTOM) + else: + vbar = None + + if hbar: + hbar = Scrollbar(frame, orient=HORIZONTAL, name="hbar", + takefocus=takefocus) + hbar.pack(fill=X, side=BOTTOM) + else: + hbar = None + + return hbar, vbar, frame + + +def set_scroll_commands(widget, hbar, vbar): + + """Link a scrollable widget to its scroll bars. + + The scroll bars may be empty. + + """ + + if vbar: + widget['yscrollcommand'] = (vbar, 'set') + vbar['command'] = (widget, 'yview') + + if hbar: + widget['xscrollcommand'] = (hbar, 'set') + hbar['command'] = (widget, 'xview') + + widget.vbar = vbar + widget.hbar = hbar + + +def make_text_box(parent, width=0, height=0, hbar=0, vbar=1, + fill=BOTH, expand=1, wrap=WORD, pack=1, + class_=None, name=None, takefocus=None): + + """Subroutine to create a text box. + + Create: + - a both-ways filling and expanding frame, containing: + - a text widget on the left, and + - possibly a vertical scroll bar on the right. + - possibly a horizonta; scroll bar at the bottom. + + Return the text widget and the frame widget. + + """ + hbar, vbar, frame = make_scrollbars(parent, hbar, vbar, pack, + class_=class_, name=name, + takefocus=takefocus) + + widget = Text(frame, wrap=wrap, name="text") + if width: widget.config(width=width) + if height: widget.config(height=height) + widget.pack(expand=expand, fill=fill, side=LEFT) + + set_scroll_commands(widget, hbar, vbar) + + return widget, frame + + +def make_list_box(parent, width=0, height=0, hbar=0, vbar=1, + fill=BOTH, expand=1, pack=1, class_=None, name=None, + takefocus=None): + + """Subroutine to create a list box. + + Like make_text_box(). + """ + hbar, vbar, frame = make_scrollbars(parent, hbar, vbar, pack, + class_=class_, name=name, + takefocus=takefocus) + + widget = Listbox(frame, name="listbox") + if width: widget.config(width=width) + if height: widget.config(height=height) + widget.pack(expand=expand, fill=fill, side=LEFT) + + set_scroll_commands(widget, hbar, vbar) + + return widget, frame + + +def make_canvas(parent, width=0, height=0, hbar=1, vbar=1, + fill=BOTH, expand=1, pack=1, class_=None, name=None, + takefocus=None): + + """Subroutine to create a canvas. + + Like make_text_box(). + + """ + + hbar, vbar, frame = make_scrollbars(parent, hbar, vbar, pack, + class_=class_, name=name, + takefocus=takefocus) + + widget = Canvas(frame, scrollregion=(0, 0, width, height), name="canvas") + if width: widget.config(width=width) + if height: widget.config(height=height) + widget.pack(expand=expand, fill=fill, side=LEFT) + + set_scroll_commands(widget, hbar, vbar) + + return widget, frame + + + +def make_form_entry(parent, label, borderwidth=None): + + """Subroutine to create a form entry. + + Create: + - a horizontally filling and expanding frame, containing: + - a label on the left, and + - a text entry on the right. + + Return the entry widget and the frame widget. + + """ + + frame = Frame(parent) + frame.pack(fill=X) + + label = Label(frame, text=label) + label.pack(side=LEFT) + + if borderwidth is None: + entry = Entry(frame, relief=SUNKEN) + else: + entry = Entry(frame, relief=SUNKEN, borderwidth=borderwidth) + entry.pack(side=LEFT, fill=X, expand=1) + + return entry, frame + +# This is a slightly modified version of the function above. This +# version does the proper alighnment of labels with their fields. It +# should probably eventually replace make_form_entry altogether. +# +# The one annoying bug is that the text entry field should be +# expandable while still aligning the colons. This doesn't work yet. +# +def make_labeled_form_entry(parent, label, entrywidth=20, entryheight=1, + labelwidth=0, borderwidth=None, + takefocus=None): + """Subroutine to create a form entry. + + Create: + - a horizontally filling and expanding frame, containing: + - a label on the left, and + - a text entry on the right. + + Return the entry widget and the frame widget. + """ + if label and label[-1] != ':': label = label + ':' + + frame = Frame(parent) + + label = Label(frame, text=label, width=labelwidth, anchor=E) + label.pack(side=LEFT) + if entryheight == 1: + if borderwidth is None: + entry = Entry(frame, relief=SUNKEN, width=entrywidth) + else: + entry = Entry(frame, relief=SUNKEN, width=entrywidth, + borderwidth=borderwidth) + entry.pack(side=RIGHT, expand=1, fill=X) + frame.pack(fill=X) + else: + entry = make_text_box(frame, entrywidth, entryheight, 1, 1, + takefocus=takefocus) + frame.pack(fill=BOTH, expand=1) + + return entry, frame, label + + +def make_double_frame(master=None, class_=None, name=None, relief=RAISED, + borderwidth=1): + """Create a pair of frames suitable for 'hosting' a dialog.""" + if name: + if class_: frame = Frame(master, class_=class_, name=name) + else: frame = Frame(master, name=name) + else: + if class_: frame = Frame(master, class_=class_) + else: frame = Frame(master) + top = Frame(frame, name="topframe", relief=relief, + borderwidth=borderwidth) + bottom = Frame(frame, name="bottomframe") + bottom.pack(fill=X, padx='1m', pady='1m', side=BOTTOM) + top.pack(expand=1, fill=BOTH, padx='1m', pady='1m') + frame.pack(expand=1, fill=BOTH) + top = Frame(top) + top.pack(expand=1, fill=BOTH, padx='2m', pady='2m') + + return frame, top, bottom + + +def make_group_frame(master, name=None, label=None, fill=Y, + side=None, expand=None, font=None): + """Create nested frames with a border and optional label. + + The outer frame is only used to provide the decorative border, to + control packing, and to host the label. The inner frame is packed + to fill the outer frame and should be used as the parent of all + sub-widgets. Only the inner frame is returned. + + """ + font = font or "-*-helvetica-medium-r-normal-*-*-100-*-*-*-*-*-*" + outer = Frame(master, borderwidth=2, relief=GROOVE) + outer.pack(expand=expand, fill=fill, side=side) + if label: + Label(outer, text=label, font=font, anchor=W).pack(fill=X) + inner = Frame(master, borderwidth='1m', name=name) + inner.pack(expand=1, fill=BOTH, in_=outer) + inner.forget = outer.forget + return inner + + +def unify_button_widths(*buttons): + """Make buttons passed in all have the same width. + + Works for labels and other widgets with the 'text' option. + + """ + wid = 0 + for btn in buttons: + wid = max(wid, len(btn["text"])) + for btn in buttons: + btn["width"] = wid + + +def flatten(msg): + """Turn a list or tuple into a single string -- recursively.""" + t = type(msg) + if t in (ListType, TupleType): + msg = ' '.join(map(flatten, msg)) + elif t is ClassType: + msg = msg.__name__ + else: + msg = str(msg) + return msg + + +def boolean(s): + """Test whether a string is a Tk boolean, without error checking.""" + if s.lower() in ('', '0', 'no', 'off', 'false'): return 0 + else: return 1 + + +def test(): + """Test make_text_box(), make_form_entry(), flatten(), boolean().""" + import sys + root = Tk() + entry, eframe = make_form_entry(root, 'Boolean:') + text, tframe = make_text_box(root) + def enter(event, entry=entry, text=text): + s = boolean(entry.get()) and '\nyes' or '\nno' + text.insert('end', s) + entry.bind('', enter) + entry.insert(END, flatten(sys.argv)) + root.mainloop() + + +if __name__ == '__main__': + test() diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wcgui.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wcgui.py new file mode 100644 index 0000000000..03004855bc --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wcgui.py @@ -0,0 +1,456 @@ +#! /usr/bin/env python + +"""GUI interface to webchecker. + +This works as a Grail applet too! E.g. + + + +Checkpoints are not (yet??? ever???) supported. + +User interface: + +Enter a root to check in the text entry box. To enter more than one root, +enter them one at a time and press for each one. + +Command buttons Start, Stop and "Check one" govern the checking process in +the obvious way. Start and "Check one" also enter the root from the text +entry box if one is present. There's also a check box (enabled by default) +to decide whether actually to follow external links (since this can slow +the checking down considerably). Finally there's a Quit button. + +A series of checkbuttons determines whether the corresponding output panel +is shown. List panels are also automatically shown or hidden when their +status changes between empty to non-empty. There are six panels: + +Log -- raw output from the checker (-v, -q affect this) +To check -- links discovered but not yet checked +Checked -- links that have been checked +Bad links -- links that failed upon checking +Errors -- pages containing at least one bad link +Details -- details about one URL; double click on a URL in any of + the above list panels (not in Log) will show details + for that URL + +Use your window manager's Close command to quit. + +Command line options: + +-m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d) +-q -- quiet operation (also suppresses external links report) +-v -- verbose operation; repeating -v will increase verbosity +-t root -- specify root dir which should be treated as internal (can repeat) +-a -- don't check name anchors + +Command line arguments: + +rooturl -- URL to start checking + (default %(DEFROOT)s) + +XXX The command line options (-m, -q, -v) should be GUI accessible. + +XXX The roots should be visible as a list (?). + +XXX The multipanel user interface is clumsy. + +""" + +# ' Emacs bait + + +import sys +import getopt +from Tkinter import * +import tktools +import webchecker + +def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], 't:m:qva') + except getopt.error, msg: + sys.stdout = sys.stderr + print msg + print __doc__%vars(webchecker) + sys.exit(2) + webchecker.verbose = webchecker.VERBOSE + webchecker.nonames = webchecker.NONAMES + webchecker.maxpage = webchecker.MAXPAGE + extra_roots = [] + for o, a in opts: + if o == '-m': + webchecker.maxpage = int(a) + if o == '-q': + webchecker.verbose = 0 + if o == '-v': + webchecker.verbose = webchecker.verbose + 1 + if o == '-t': + extra_roots.append(a) + if o == '-a': + webchecker.nonames = not webchecker.nonames + root = Tk(className='Webchecker') + root.protocol("WM_DELETE_WINDOW", root.quit) + c = CheckerWindow(root) + c.setflags(verbose=webchecker.verbose, maxpage=webchecker.maxpage, + nonames=webchecker.nonames) + if args: + for arg in args[:-1]: + c.addroot(arg) + c.suggestroot(args[-1]) + # Usually conditioned on whether external links + # will be checked, but since that's not a command + # line option, just toss them in. + for url_root in extra_roots: + # Make sure it's terminated by a slash, + # so that addroot doesn't discard the last + # directory component. + if url_root[-1] != "/": + url_root = url_root + "/" + c.addroot(url_root, add_to_do = 0) + root.mainloop() + + +class CheckerWindow(webchecker.Checker): + + def __init__(self, parent, root=webchecker.DEFROOT): + self.__parent = parent + + self.__topcontrols = Frame(parent) + self.__topcontrols.pack(side=TOP, fill=X) + self.__label = Label(self.__topcontrols, text="Root URL:") + self.__label.pack(side=LEFT) + self.__rootentry = Entry(self.__topcontrols, width=60) + self.__rootentry.pack(side=LEFT) + self.__rootentry.bind('', self.enterroot) + self.__rootentry.focus_set() + + self.__controls = Frame(parent) + self.__controls.pack(side=TOP, fill=X) + self.__running = 0 + self.__start = Button(self.__controls, text="Run", command=self.start) + self.__start.pack(side=LEFT) + self.__stop = Button(self.__controls, text="Stop", command=self.stop, + state=DISABLED) + self.__stop.pack(side=LEFT) + self.__step = Button(self.__controls, text="Check one", + command=self.step) + self.__step.pack(side=LEFT) + self.__cv = BooleanVar(parent) + self.__cv.set(self.checkext) + self.__checkext = Checkbutton(self.__controls, variable=self.__cv, + command=self.update_checkext, + text="Check nonlocal links",) + self.__checkext.pack(side=LEFT) + self.__reset = Button(self.__controls, text="Start over", command=self.reset) + self.__reset.pack(side=LEFT) + if __name__ == '__main__': # No Quit button under Grail! + self.__quit = Button(self.__controls, text="Quit", + command=self.__parent.quit) + self.__quit.pack(side=RIGHT) + + self.__status = Label(parent, text="Status: initial", anchor=W) + self.__status.pack(side=TOP, fill=X) + self.__checking = Label(parent, text="Idle", anchor=W) + self.__checking.pack(side=TOP, fill=X) + self.__mp = mp = MultiPanel(parent) + sys.stdout = self.__log = LogPanel(mp, "Log") + self.__todo = ListPanel(mp, "To check", self, self.showinfo) + self.__done = ListPanel(mp, "Checked", self, self.showinfo) + self.__bad = ListPanel(mp, "Bad links", self, self.showinfo) + self.__errors = ListPanel(mp, "Pages w/ bad links", self, self.showinfo) + self.__details = LogPanel(mp, "Details") + self.root_seed = None + webchecker.Checker.__init__(self) + if root: + root = str(root).strip() + if root: + self.suggestroot(root) + self.newstatus() + + def reset(self): + webchecker.Checker.reset(self) + for p in self.__todo, self.__done, self.__bad, self.__errors: + p.clear() + if self.root_seed: + self.suggestroot(self.root_seed) + + def suggestroot(self, root): + self.__rootentry.delete(0, END) + self.__rootentry.insert(END, root) + self.__rootentry.select_range(0, END) + self.root_seed = root + + def enterroot(self, event=None): + root = self.__rootentry.get() + root = root.strip() + if root: + self.__checking.config(text="Adding root "+root) + self.__checking.update_idletasks() + self.addroot(root) + self.__checking.config(text="Idle") + try: + i = self.__todo.items.index(root) + except (ValueError, IndexError): + pass + else: + self.__todo.list.select_clear(0, END) + self.__todo.list.select_set(i) + self.__todo.list.yview(i) + self.__rootentry.delete(0, END) + + def start(self): + self.__start.config(state=DISABLED, relief=SUNKEN) + self.__stop.config(state=NORMAL) + self.__step.config(state=DISABLED) + self.enterroot() + self.__running = 1 + self.go() + + def stop(self): + self.__stop.config(state=DISABLED, relief=SUNKEN) + self.__running = 0 + + def step(self): + self.__start.config(state=DISABLED) + self.__step.config(state=DISABLED, relief=SUNKEN) + self.enterroot() + self.__running = 0 + self.dosomething() + + def go(self): + if self.__running: + self.__parent.after_idle(self.dosomething) + else: + self.__checking.config(text="Idle") + self.__start.config(state=NORMAL, relief=RAISED) + self.__stop.config(state=DISABLED, relief=RAISED) + self.__step.config(state=NORMAL, relief=RAISED) + + __busy = 0 + + def dosomething(self): + if self.__busy: return + self.__busy = 1 + if self.todo: + l = self.__todo.selectedindices() + if l: + i = l[0] + else: + i = 0 + self.__todo.list.select_set(i) + self.__todo.list.yview(i) + url = self.__todo.items[i] + self.__checking.config(text="Checking "+self.format_url(url)) + self.__parent.update() + self.dopage(url) + else: + self.stop() + self.__busy = 0 + self.go() + + def showinfo(self, url): + d = self.__details + d.clear() + d.put("URL: %s\n" % self.format_url(url)) + if self.bad.has_key(url): + d.put("Error: %s\n" % str(self.bad[url])) + if url in self.roots: + d.put("Note: This is a root URL\n") + if self.done.has_key(url): + d.put("Status: checked\n") + o = self.done[url] + elif self.todo.has_key(url): + d.put("Status: to check\n") + o = self.todo[url] + else: + d.put("Status: unknown (!)\n") + o = [] + if (not url[1]) and self.errors.has_key(url[0]): + d.put("Bad links from this page:\n") + for triple in self.errors[url[0]]: + link, rawlink, msg = triple + d.put(" HREF %s" % self.format_url(link)) + if self.format_url(link) != rawlink: d.put(" (%s)" %rawlink) + d.put("\n") + d.put(" error %s\n" % str(msg)) + self.__mp.showpanel("Details") + for source, rawlink in o: + d.put("Origin: %s" % source) + if rawlink != self.format_url(url): + d.put(" (%s)" % rawlink) + d.put("\n") + d.text.yview("1.0") + + def setbad(self, url, msg): + webchecker.Checker.setbad(self, url, msg) + self.__bad.insert(url) + self.newstatus() + + def setgood(self, url): + webchecker.Checker.setgood(self, url) + self.__bad.remove(url) + self.newstatus() + + def newlink(self, url, origin): + webchecker.Checker.newlink(self, url, origin) + if self.done.has_key(url): + self.__done.insert(url) + elif self.todo.has_key(url): + self.__todo.insert(url) + self.newstatus() + + def markdone(self, url): + webchecker.Checker.markdone(self, url) + self.__done.insert(url) + self.__todo.remove(url) + self.newstatus() + + def seterror(self, url, triple): + webchecker.Checker.seterror(self, url, triple) + self.__errors.insert((url, '')) + self.newstatus() + + def newstatus(self): + self.__status.config(text="Status: "+self.status()) + self.__parent.update() + + def update_checkext(self): + self.checkext = self.__cv.get() + + +class ListPanel: + + def __init__(self, mp, name, checker, showinfo=None): + self.mp = mp + self.name = name + self.showinfo = showinfo + self.checker = checker + self.panel = mp.addpanel(name) + self.list, self.frame = tktools.make_list_box( + self.panel, width=60, height=5) + self.list.config(exportselection=0) + if showinfo: + self.list.bind('', self.doubleclick) + self.items = [] + + def clear(self): + self.items = [] + self.list.delete(0, END) + self.mp.hidepanel(self.name) + + def doubleclick(self, event): + l = self.selectedindices() + if l: + self.showinfo(self.items[l[0]]) + + def selectedindices(self): + l = self.list.curselection() + if not l: return [] + return map(int, l) + + def insert(self, url): + if url not in self.items: + if not self.items: + self.mp.showpanel(self.name) + # (I tried sorting alphabetically, but the display is too jumpy) + i = len(self.items) + self.list.insert(i, self.checker.format_url(url)) + self.list.yview(i) + self.items.insert(i, url) + + def remove(self, url): + try: + i = self.items.index(url) + except (ValueError, IndexError): + pass + else: + was_selected = i in self.selectedindices() + self.list.delete(i) + del self.items[i] + if not self.items: + self.mp.hidepanel(self.name) + elif was_selected: + if i >= len(self.items): + i = len(self.items) - 1 + self.list.select_set(i) + + +class LogPanel: + + def __init__(self, mp, name): + self.mp = mp + self.name = name + self.panel = mp.addpanel(name) + self.text, self.frame = tktools.make_text_box(self.panel, height=10) + self.text.config(wrap=NONE) + + def clear(self): + self.text.delete("1.0", END) + self.text.yview("1.0") + + def put(self, s): + self.text.insert(END, s) + if '\n' in s: + self.text.yview(END) + + def write(self, s): + self.text.insert(END, s) + if '\n' in s: + self.text.yview(END) + self.panel.update() + + +class MultiPanel: + + def __init__(self, parent): + self.parent = parent + self.frame = Frame(self.parent) + self.frame.pack(expand=1, fill=BOTH) + self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED) + self.topframe.pack(fill=X) + self.botframe = Frame(self.frame) + self.botframe.pack(expand=1, fill=BOTH) + self.panelnames = [] + self.panels = {} + + def addpanel(self, name, on=0): + v = StringVar(self.parent) + if on: + v.set(name) + else: + v.set("") + check = Checkbutton(self.topframe, text=name, + offvalue="", onvalue=name, variable=v, + command=self.checkpanel) + check.pack(side=LEFT) + panel = Frame(self.botframe) + label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W) + label.pack(side=TOP, fill=X) + t = v, check, panel + self.panelnames.append(name) + self.panels[name] = t + if on: + panel.pack(expand=1, fill=BOTH) + return panel + + def showpanel(self, name): + v, check, panel = self.panels[name] + v.set(name) + panel.pack(expand=1, fill=BOTH) + + def hidepanel(self, name): + v, check, panel = self.panels[name] + v.set("") + panel.pack_forget() + + def checkpanel(self): + for name in self.panelnames: + v, check, panel = self.panels[name] + panel.pack_forget() + for name in self.panelnames: + v, check, panel = self.panels[name] + if v.get(): + panel.pack(expand=1, fill=BOTH) + + +if __name__ == '__main__': + main() diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wcmac.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wcmac.py new file mode 100644 index 0000000000..efab29d6cb --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wcmac.py @@ -0,0 +1,7 @@ +import webchecker, sys +webchecker.DEFROOT = "http://www.python.org/python/" +webchecker.MAXPAGE = 50000 +webchecker.verbose = 2 +sys.argv.append('-x') +webchecker.main() +raw_input("\nCR to exit: ") diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/webchecker.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/webchecker.py new file mode 100644 index 0000000000..9a0a6e63e7 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/webchecker.py @@ -0,0 +1,892 @@ +#! /usr/bin/env python + +# Original code by Guido van Rossum; extensive changes by Sam Bayer, +# including code to check URL fragments. + +"""Web tree checker. + +This utility is handy to check a subweb of the world-wide web for +errors. A subweb is specified by giving one or more ``root URLs''; a +page belongs to the subweb if one of the root URLs is an initial +prefix of it. + +File URL extension: + +In order to easy the checking of subwebs via the local file system, +the interpretation of ``file:'' URLs is extended to mimic the behavior +of your average HTTP daemon: if a directory pathname is given, the +file index.html in that directory is returned if it exists, otherwise +a directory listing is returned. Now, you can point webchecker to the +document tree in the local file system of your HTTP daemon, and have +most of it checked. In fact the default works this way if your local +web tree is located at /usr/local/etc/httpd/htdpcs (the default for +the NCSA HTTP daemon and probably others). + +Report printed: + +When done, it reports pages with bad links within the subweb. When +interrupted, it reports for the pages that it has checked already. + +In verbose mode, additional messages are printed during the +information gathering phase. By default, it prints a summary of its +work status every 50 URLs (adjustable with the -r option), and it +reports errors as they are encountered. Use the -q option to disable +this output. + +Checkpoint feature: + +Whether interrupted or not, it dumps its state (a Python pickle) to a +checkpoint file and the -R option allows it to restart from the +checkpoint (assuming that the pages on the subweb that were already +processed haven't changed). Even when it has run till completion, -R +can still be useful -- it will print the reports again, and -Rq prints +the errors only. In this case, the checkpoint file is not written +again. The checkpoint file can be set with the -d option. + +The checkpoint file is written as a Python pickle. Remember that +Python's pickle module is currently quite slow. Give it the time it +needs to load and save the checkpoint file. When interrupted while +writing the checkpoint file, the old checkpoint file is not +overwritten, but all work done in the current run is lost. + +Miscellaneous: + +- You may find the (Tk-based) GUI version easier to use. See wcgui.py. + +- Webchecker honors the "robots.txt" convention. Thanks to Skip +Montanaro for his robotparser.py module (included in this directory)! +The agent name is hardwired to "webchecker". URLs that are disallowed +by the robots.txt file are reported as external URLs. + +- Because the SGML parser is a bit slow, very large SGML files are +skipped. The size limit can be set with the -m option. + +- When the server or protocol does not tell us a file's type, we guess +it based on the URL's suffix. The mimetypes.py module (also in this +directory) has a built-in table mapping most currently known suffixes, +and in addition attempts to read the mime.types configuration files in +the default locations of Netscape and the NCSA HTTP daemon. + +- We follow links indicated by , and tags. We also +honor the tag. + +- We now check internal NAME anchor links, as well as toplevel links. + +- Checking external links is now done by default; use -x to *disable* +this feature. External links are now checked during normal +processing. (XXX The status of a checked link could be categorized +better. Later...) + +- If external links are not checked, you can use the -t flag to +provide specific overrides to -x. + +Usage: webchecker.py [option] ... [rooturl] ... + +Options: + +-R -- restart from checkpoint file +-d file -- checkpoint filename (default %(DUMPFILE)s) +-m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d) +-n -- reports only, no checking (use with -R) +-q -- quiet operation (also suppresses external links report) +-r number -- number of links processed per round (default %(ROUNDSIZE)d) +-t root -- specify root dir which should be treated as internal (can repeat) +-v -- verbose operation; repeating -v will increase verbosity +-x -- don't check external links (these are often slow to check) +-a -- don't check name anchors + +Arguments: + +rooturl -- URL to start checking + (default %(DEFROOT)s) + +""" + + +__version__ = "$Revision$" + + +import sys +import os +from types import * +import StringIO +import getopt +import pickle + +import urllib +import urlparse +import sgmllib +import cgi + +import mimetypes +import robotparser + +# Extract real version number if necessary +if __version__[0] == '$': + _v = __version__.split() + if len(_v) == 3: + __version__ = _v[1] + + +# Tunable parameters +DEFROOT = "file:/usr/local/etc/httpd/htdocs/" # Default root URL +CHECKEXT = 1 # Check external references (1 deep) +VERBOSE = 1 # Verbosity level (0-3) +MAXPAGE = 150000 # Ignore files bigger than this +ROUNDSIZE = 50 # Number of links processed per round +DUMPFILE = "@webchecker.pickle" # Pickled checkpoint +AGENTNAME = "webchecker" # Agent name for robots.txt parser +NONAMES = 0 # Force name anchor checking + + +# Global variables + + +def main(): + checkext = CHECKEXT + verbose = VERBOSE + maxpage = MAXPAGE + roundsize = ROUNDSIZE + dumpfile = DUMPFILE + restart = 0 + norun = 0 + + try: + opts, args = getopt.getopt(sys.argv[1:], 'Rd:m:nqr:t:vxa') + except getopt.error, msg: + sys.stdout = sys.stderr + print msg + print __doc__%globals() + sys.exit(2) + + # The extra_roots variable collects extra roots. + extra_roots = [] + nonames = NONAMES + + for o, a in opts: + if o == '-R': + restart = 1 + if o == '-d': + dumpfile = a + if o == '-m': + maxpage = int(a) + if o == '-n': + norun = 1 + if o == '-q': + verbose = 0 + if o == '-r': + roundsize = int(a) + if o == '-t': + extra_roots.append(a) + if o == '-a': + nonames = not nonames + if o == '-v': + verbose = verbose + 1 + if o == '-x': + checkext = not checkext + + if verbose > 0: + print AGENTNAME, "version", __version__ + + if restart: + c = load_pickle(dumpfile=dumpfile, verbose=verbose) + else: + c = Checker() + + c.setflags(checkext=checkext, verbose=verbose, + maxpage=maxpage, roundsize=roundsize, + nonames=nonames + ) + + if not restart and not args: + args.append(DEFROOT) + + for arg in args: + c.addroot(arg) + + # The -t flag is only needed if external links are not to be + # checked. So -t values are ignored unless -x was specified. + if not checkext: + for root in extra_roots: + # Make sure it's terminated by a slash, + # so that addroot doesn't discard the last + # directory component. + if root[-1] != "/": + root = root + "/" + c.addroot(root, add_to_do = 0) + + try: + + if not norun: + try: + c.run() + except KeyboardInterrupt: + if verbose > 0: + print "[run interrupted]" + + try: + c.report() + except KeyboardInterrupt: + if verbose > 0: + print "[report interrupted]" + + finally: + if c.save_pickle(dumpfile): + if dumpfile == DUMPFILE: + print "Use ``%s -R'' to restart." % sys.argv[0] + else: + print "Use ``%s -R -d %s'' to restart." % (sys.argv[0], + dumpfile) + + +def load_pickle(dumpfile=DUMPFILE, verbose=VERBOSE): + if verbose > 0: + print "Loading checkpoint from %s ..." % dumpfile + f = open(dumpfile, "rb") + c = pickle.load(f) + f.close() + if verbose > 0: + print "Done." + print "Root:", "\n ".join(c.roots) + return c + + +class Checker: + + checkext = CHECKEXT + verbose = VERBOSE + maxpage = MAXPAGE + roundsize = ROUNDSIZE + nonames = NONAMES + + validflags = tuple(dir()) + + def __init__(self): + self.reset() + + def setflags(self, **kw): + for key in kw.keys(): + if key not in self.validflags: + raise NameError, "invalid keyword argument: %s" % str(key) + for key, value in kw.items(): + setattr(self, key, value) + + def reset(self): + self.roots = [] + self.todo = {} + self.done = {} + self.bad = {} + + # Add a name table, so that the name URLs can be checked. Also + # serves as an implicit cache for which URLs are done. + self.name_table = {} + + self.round = 0 + # The following are not pickled: + self.robots = {} + self.errors = {} + self.urlopener = MyURLopener() + self.changed = 0 + + def note(self, level, format, *args): + if self.verbose > level: + if args: + format = format%args + self.message(format) + + def message(self, format, *args): + if args: + format = format%args + print format + + def __getstate__(self): + return (self.roots, self.todo, self.done, self.bad, self.round) + + def __setstate__(self, state): + self.reset() + (self.roots, self.todo, self.done, self.bad, self.round) = state + for root in self.roots: + self.addrobot(root) + for url in self.bad.keys(): + self.markerror(url) + + def addroot(self, root, add_to_do = 1): + if root not in self.roots: + troot = root + scheme, netloc, path, params, query, fragment = \ + urlparse.urlparse(root) + i = path.rfind("/") + 1 + if 0 < i < len(path): + path = path[:i] + troot = urlparse.urlunparse((scheme, netloc, path, + params, query, fragment)) + self.roots.append(troot) + self.addrobot(root) + if add_to_do: + self.newlink((root, ""), ("", root)) + + def addrobot(self, root): + root = urlparse.urljoin(root, "/") + if self.robots.has_key(root): return + url = urlparse.urljoin(root, "/robots.txt") + self.robots[root] = rp = robotparser.RobotFileParser() + self.note(2, "Parsing %s", url) + rp.debug = self.verbose > 3 + rp.set_url(url) + try: + rp.read() + except (OSError, IOError), msg: + self.note(1, "I/O error parsing %s: %s", url, msg) + + def run(self): + while self.todo: + self.round = self.round + 1 + self.note(0, "\nRound %d (%s)\n", self.round, self.status()) + urls = self.todo.keys() + urls.sort() + del urls[self.roundsize:] + for url in urls: + self.dopage(url) + + def status(self): + return "%d total, %d to do, %d done, %d bad" % ( + len(self.todo)+len(self.done), + len(self.todo), len(self.done), + len(self.bad)) + + def report(self): + self.message("") + if not self.todo: s = "Final" + else: s = "Interim" + self.message("%s Report (%s)", s, self.status()) + self.report_errors() + + def report_errors(self): + if not self.bad: + self.message("\nNo errors") + return + self.message("\nError Report:") + sources = self.errors.keys() + sources.sort() + for source in sources: + triples = self.errors[source] + self.message("") + if len(triples) > 1: + self.message("%d Errors in %s", len(triples), source) + else: + self.message("Error in %s", source) + # Call self.format_url() instead of referring + # to the URL directly, since the URLs in these + # triples is now a (URL, fragment) pair. The value + # of the "source" variable comes from the list of + # origins, and is a URL, not a pair. + for url, rawlink, msg in triples: + if rawlink != self.format_url(url): s = " (%s)" % rawlink + else: s = "" + self.message(" HREF %s%s\n msg %s", + self.format_url(url), s, msg) + + def dopage(self, url_pair): + + # All printing of URLs uses format_url(); argument changed to + # url_pair for clarity. + if self.verbose > 1: + if self.verbose > 2: + self.show("Check ", self.format_url(url_pair), + " from", self.todo[url_pair]) + else: + self.message("Check %s", self.format_url(url_pair)) + url, local_fragment = url_pair + if local_fragment and self.nonames: + self.markdone(url_pair) + return + try: + page = self.getpage(url_pair) + except sgmllib.SGMLParseError, msg: + msg = self.sanitize(msg) + self.note(0, "Error parsing %s: %s", + self.format_url(url_pair), msg) + # Dont actually mark the URL as bad - it exists, just + # we can't parse it! + page = None + if page: + # Store the page which corresponds to this URL. + self.name_table[url] = page + # If there is a fragment in this url_pair, and it's not + # in the list of names for the page, call setbad(), since + # it's a missing anchor. + if local_fragment and local_fragment not in page.getnames(): + self.setbad(url_pair, ("Missing name anchor `%s'" % local_fragment)) + for info in page.getlinkinfos(): + # getlinkinfos() now returns the fragment as well, + # and we store that fragment here in the "todo" dictionary. + link, rawlink, fragment = info + # However, we don't want the fragment as the origin, since + # the origin is logically a page. + origin = url, rawlink + self.newlink((link, fragment), origin) + else: + # If no page has been created yet, we want to + # record that fact. + self.name_table[url_pair[0]] = None + self.markdone(url_pair) + + def newlink(self, url, origin): + if self.done.has_key(url): + self.newdonelink(url, origin) + else: + self.newtodolink(url, origin) + + def newdonelink(self, url, origin): + if origin not in self.done[url]: + self.done[url].append(origin) + + # Call self.format_url(), since the URL here + # is now a (URL, fragment) pair. + self.note(3, " Done link %s", self.format_url(url)) + + # Make sure that if it's bad, that the origin gets added. + if self.bad.has_key(url): + source, rawlink = origin + triple = url, rawlink, self.bad[url] + self.seterror(source, triple) + + def newtodolink(self, url, origin): + # Call self.format_url(), since the URL here + # is now a (URL, fragment) pair. + if self.todo.has_key(url): + if origin not in self.todo[url]: + self.todo[url].append(origin) + self.note(3, " Seen todo link %s", self.format_url(url)) + else: + self.todo[url] = [origin] + self.note(3, " New todo link %s", self.format_url(url)) + + def format_url(self, url): + link, fragment = url + if fragment: return link + "#" + fragment + else: return link + + def markdone(self, url): + self.done[url] = self.todo[url] + del self.todo[url] + self.changed = 1 + + def inroots(self, url): + for root in self.roots: + if url[:len(root)] == root: + return self.isallowed(root, url) + return 0 + + def isallowed(self, root, url): + root = urlparse.urljoin(root, "/") + return self.robots[root].can_fetch(AGENTNAME, url) + + def getpage(self, url_pair): + # Incoming argument name is a (URL, fragment) pair. + # The page may have been cached in the name_table variable. + url, fragment = url_pair + if self.name_table.has_key(url): + return self.name_table[url] + + scheme, path = urllib.splittype(url) + if scheme in ('mailto', 'news', 'javascript', 'telnet'): + self.note(1, " Not checking %s URL" % scheme) + return None + isint = self.inroots(url) + + # Ensure that openpage gets the URL pair to + # print out its error message and record the error pair + # correctly. + if not isint: + if not self.checkext: + self.note(1, " Not checking ext link") + return None + f = self.openpage(url_pair) + if f: + self.safeclose(f) + return None + text, nurl = self.readhtml(url_pair) + + if nurl != url: + self.note(1, " Redirected to %s", nurl) + url = nurl + if text: + return Page(text, url, maxpage=self.maxpage, checker=self) + + # These next three functions take (URL, fragment) pairs as + # arguments, so that openpage() receives the appropriate tuple to + # record error messages. + def readhtml(self, url_pair): + url, fragment = url_pair + text = None + f, url = self.openhtml(url_pair) + if f: + text = f.read() + f.close() + return text, url + + def openhtml(self, url_pair): + url, fragment = url_pair + f = self.openpage(url_pair) + if f: + url = f.geturl() + info = f.info() + if not self.checkforhtml(info, url): + self.safeclose(f) + f = None + return f, url + + def openpage(self, url_pair): + url, fragment = url_pair + try: + return self.urlopener.open(url) + except (OSError, IOError), msg: + msg = self.sanitize(msg) + self.note(0, "Error %s", msg) + if self.verbose > 0: + self.show(" HREF ", url, " from", self.todo[url_pair]) + self.setbad(url_pair, msg) + return None + + def checkforhtml(self, info, url): + if info.has_key('content-type'): + ctype = cgi.parse_header(info['content-type'])[0].lower() + if ';' in ctype: + # handle content-type: text/html; charset=iso8859-1 : + ctype = ctype.split(';', 1)[0].strip() + else: + if url[-1:] == "/": + return 1 + ctype, encoding = mimetypes.guess_type(url) + if ctype == 'text/html': + return 1 + else: + self.note(1, " Not HTML, mime type %s", ctype) + return 0 + + def setgood(self, url): + if self.bad.has_key(url): + del self.bad[url] + self.changed = 1 + self.note(0, "(Clear previously seen error)") + + def setbad(self, url, msg): + if self.bad.has_key(url) and self.bad[url] == msg: + self.note(0, "(Seen this error before)") + return + self.bad[url] = msg + self.changed = 1 + self.markerror(url) + + def markerror(self, url): + try: + origins = self.todo[url] + except KeyError: + origins = self.done[url] + for source, rawlink in origins: + triple = url, rawlink, self.bad[url] + self.seterror(source, triple) + + def seterror(self, url, triple): + try: + # Because of the way the URLs are now processed, I need to + # check to make sure the URL hasn't been entered in the + # error list. The first element of the triple here is a + # (URL, fragment) pair, but the URL key is not, since it's + # from the list of origins. + if triple not in self.errors[url]: + self.errors[url].append(triple) + except KeyError: + self.errors[url] = [triple] + + # The following used to be toplevel functions; they have been + # changed into methods so they can be overridden in subclasses. + + def show(self, p1, link, p2, origins): + self.message("%s %s", p1, link) + i = 0 + for source, rawlink in origins: + i = i+1 + if i == 2: + p2 = ' '*len(p2) + if rawlink != link: s = " (%s)" % rawlink + else: s = "" + self.message("%s %s%s", p2, source, s) + + def sanitize(self, msg): + if isinstance(IOError, ClassType) and isinstance(msg, IOError): + # Do the other branch recursively + msg.args = self.sanitize(msg.args) + elif isinstance(msg, TupleType): + if len(msg) >= 4 and msg[0] == 'http error' and \ + isinstance(msg[3], InstanceType): + # Remove the Message instance -- it may contain + # a file object which prevents pickling. + msg = msg[:3] + msg[4:] + return msg + + def safeclose(self, f): + try: + url = f.geturl() + except AttributeError: + pass + else: + if url[:4] == 'ftp:' or url[:7] == 'file://': + # Apparently ftp connections don't like to be closed + # prematurely... + text = f.read() + f.close() + + def save_pickle(self, dumpfile=DUMPFILE): + if not self.changed: + self.note(0, "\nNo need to save checkpoint") + elif not dumpfile: + self.note(0, "No dumpfile, won't save checkpoint") + else: + self.note(0, "\nSaving checkpoint to %s ...", dumpfile) + newfile = dumpfile + ".new" + f = open(newfile, "wb") + pickle.dump(self, f) + f.close() + try: + os.unlink(dumpfile) + except os.error: + pass + os.rename(newfile, dumpfile) + self.note(0, "Done.") + return 1 + + +class Page: + + def __init__(self, text, url, verbose=VERBOSE, maxpage=MAXPAGE, checker=None): + self.text = text + self.url = url + self.verbose = verbose + self.maxpage = maxpage + self.checker = checker + + # The parsing of the page is done in the __init__() routine in + # order to initialize the list of names the file + # contains. Stored the parser in an instance variable. Passed + # the URL to MyHTMLParser(). + size = len(self.text) + if size > self.maxpage: + self.note(0, "Skip huge file %s (%.0f Kbytes)", self.url, (size*0.001)) + self.parser = None + return + self.checker.note(2, " Parsing %s (%d bytes)", self.url, size) + self.parser = MyHTMLParser(url, verbose=self.verbose, + checker=self.checker) + self.parser.feed(self.text) + self.parser.close() + + def note(self, level, msg, *args): + if self.checker: + apply(self.checker.note, (level, msg) + args) + else: + if self.verbose >= level: + if args: + msg = msg%args + print msg + + # Method to retrieve names. + def getnames(self): + if self.parser: + return self.parser.names + else: + return [] + + def getlinkinfos(self): + # File reading is done in __init__() routine. Store parser in + # local variable to indicate success of parsing. + + # If no parser was stored, fail. + if not self.parser: return [] + + rawlinks = self.parser.getlinks() + base = urlparse.urljoin(self.url, self.parser.getbase() or "") + infos = [] + for rawlink in rawlinks: + t = urlparse.urlparse(rawlink) + # DON'T DISCARD THE FRAGMENT! Instead, include + # it in the tuples which are returned. See Checker.dopage(). + fragment = t[-1] + t = t[:-1] + ('',) + rawlink = urlparse.urlunparse(t) + link = urlparse.urljoin(base, rawlink) + infos.append((link, rawlink, fragment)) + + return infos + + +class MyStringIO(StringIO.StringIO): + + def __init__(self, url, info): + self.__url = url + self.__info = info + StringIO.StringIO.__init__(self) + + def info(self): + return self.__info + + def geturl(self): + return self.__url + + +class MyURLopener(urllib.FancyURLopener): + + http_error_default = urllib.URLopener.http_error_default + + def __init__(*args): + self = args[0] + apply(urllib.FancyURLopener.__init__, args) + self.addheaders = [ + ('User-agent', 'Python-webchecker/%s' % __version__), + ] + + def http_error_401(self, url, fp, errcode, errmsg, headers): + return None + + def open_file(self, url): + path = urllib.url2pathname(urllib.unquote(url)) + if os.path.isdir(path): + if path[-1] != os.sep: + url = url + '/' + indexpath = os.path.join(path, "index.html") + if os.path.exists(indexpath): + return self.open_file(url + "index.html") + try: + names = os.listdir(path) + except os.error, msg: + exc_type, exc_value, exc_tb = sys.exc_info() + raise IOError, msg, exc_tb + names.sort() + s = MyStringIO("file:"+url, {'content-type': 'text/html'}) + s.write('\n' % + urllib.quote(os.path.join(path, ""))) + for name in names: + q = urllib.quote(name) + s.write('%s\n' % (q, q)) + s.seek(0) + return s + return urllib.FancyURLopener.open_file(self, url) + + +class MyHTMLParser(sgmllib.SGMLParser): + + def __init__(self, url, verbose=VERBOSE, checker=None): + self.myverbose = verbose # now unused + self.checker = checker + self.base = None + self.links = {} + self.names = [] + self.url = url + sgmllib.SGMLParser.__init__(self) + + def check_name_id(self, attributes): + """ Check the name or id attributes on an element. + """ + # We must rescue the NAME or id (name is deprecated in XHTML) + # attributes from the anchor, in order to + # cache the internal anchors which are made + # available in the page. + for name, value in attributes: + if name == "name" or name == "id": + if value in self.names: + self.checker.message("WARNING: duplicate ID name %s in %s", + value, self.url) + else: self.names.append(value) + break + + def unknown_starttag(self, tag, attributes): + """ In XHTML, you can have id attributes on any element. + """ + self.check_name_id(attributes) + + def start_a(self, attributes): + self.link_attr(attributes, 'href') + self.check_name_id(attributes) + + def end_a(self): pass + + def do_area(self, attributes): + self.link_attr(attributes, 'href') + self.check_name_id(attributes) + + def do_body(self, attributes): + self.link_attr(attributes, 'background', 'bgsound') + self.check_name_id(attributes) + + def do_img(self, attributes): + self.link_attr(attributes, 'src', 'lowsrc') + self.check_name_id(attributes) + + def do_frame(self, attributes): + self.link_attr(attributes, 'src', 'longdesc') + self.check_name_id(attributes) + + def do_iframe(self, attributes): + self.link_attr(attributes, 'src', 'longdesc') + self.check_name_id(attributes) + + def do_link(self, attributes): + for name, value in attributes: + if name == "rel": + parts = value.lower().split() + if ( parts == ["stylesheet"] + or parts == ["alternate", "stylesheet"]): + self.link_attr(attributes, "href") + break + self.check_name_id(attributes) + + def do_object(self, attributes): + self.link_attr(attributes, 'data', 'usemap') + self.check_name_id(attributes) + + def do_script(self, attributes): + self.link_attr(attributes, 'src') + self.check_name_id(attributes) + + def do_table(self, attributes): + self.link_attr(attributes, 'background') + self.check_name_id(attributes) + + def do_td(self, attributes): + self.link_attr(attributes, 'background') + self.check_name_id(attributes) + + def do_th(self, attributes): + self.link_attr(attributes, 'background') + self.check_name_id(attributes) + + def do_tr(self, attributes): + self.link_attr(attributes, 'background') + self.check_name_id(attributes) + + def link_attr(self, attributes, *args): + for name, value in attributes: + if name in args: + if value: value = value.strip() + if value: self.links[value] = None + + def do_base(self, attributes): + for name, value in attributes: + if name == 'href': + if value: value = value.strip() + if value: + if self.checker: + self.checker.note(1, " Base %s", value) + self.base = value + self.check_name_id(attributes) + + def getlinks(self): + return self.links.keys() + + def getbase(self): + return self.base + + +if __name__ == '__main__': + main() diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/websucker.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/websucker.py new file mode 100644 index 0000000000..9e4fd292c8 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/websucker.py @@ -0,0 +1,123 @@ +#! /usr/bin/env python + +"""A variant on webchecker that creates a mirror copy of a remote site.""" + +__version__ = "$Revision$" + +import os +import sys +import urllib +import getopt + +import webchecker + +# Extract real version number if necessary +if __version__[0] == '$': + _v = __version__.split() + if len(_v) == 3: + __version__ = _v[1] + +def main(): + verbose = webchecker.VERBOSE + try: + opts, args = getopt.getopt(sys.argv[1:], "qv") + except getopt.error, msg: + print msg + print "usage:", sys.argv[0], "[-qv] ... [rooturl] ..." + return 2 + for o, a in opts: + if o == "-q": + verbose = 0 + if o == "-v": + verbose = verbose + 1 + c = Sucker() + c.setflags(verbose=verbose) + c.urlopener.addheaders = [ + ('User-agent', 'websucker/%s' % __version__), + ] + for arg in args: + print "Adding root", arg + c.addroot(arg) + print "Run..." + c.run() + +class Sucker(webchecker.Checker): + + checkext = 0 + nonames = 1 + + # SAM 11/13/99: in general, URLs are now URL pairs. + # Since we've suppressed name anchor checking, + # we can ignore the second dimension. + + def readhtml(self, url_pair): + url = url_pair[0] + text = None + path = self.savefilename(url) + try: + f = open(path, "rb") + except IOError: + f = self.openpage(url_pair) + if f: + info = f.info() + nurl = f.geturl() + if nurl != url: + url = nurl + path = self.savefilename(url) + text = f.read() + f.close() + self.savefile(text, path) + if not self.checkforhtml(info, url): + text = None + else: + if self.checkforhtml({}, url): + text = f.read() + f.close() + return text, url + + def savefile(self, text, path): + dir, base = os.path.split(path) + makedirs(dir) + try: + f = open(path, "wb") + f.write(text) + f.close() + self.message("saved %s", path) + except IOError, msg: + self.message("didn't save %s: %s", path, str(msg)) + + def savefilename(self, url): + type, rest = urllib.splittype(url) + host, path = urllib.splithost(rest) + path = path.lstrip("/") + user, host = urllib.splituser(host) + host, port = urllib.splitnport(host) + host = host.lower() + if not path or path[-1] == "/": + path = path + "index.html" + if os.sep != "/": + path = os.sep.join(path.split("/")) + path = os.path.join(host, path) + return path + +def makedirs(dir): + if not dir: + return + if os.path.exists(dir): + if not os.path.isdir(dir): + try: + os.rename(dir, dir + ".bak") + os.mkdir(dir) + os.rename(dir + ".bak", os.path.join(dir, "index.html")) + except os.error: + pass + return + head, tail = os.path.split(dir) + if not tail: + print "Huh? Don't know how to make dir", dir + return + makedirs(head) + os.mkdir(dir, 0777) + +if __name__ == '__main__': + sys.exit(main() or 0) diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wsgui.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wsgui.py new file mode 100644 index 0000000000..09fac2f3d9 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/wsgui.py @@ -0,0 +1,240 @@ +#! /usr/bin/env python + +"""Tkinter-based GUI for websucker. + +Easy use: type or paste source URL and destination directory in +their respective text boxes, click GO or hit return, and presto. +""" + +from Tkinter import * +import websucker +import os +import threading +import Queue +import time + +VERBOSE = 2 + + +try: + class Canceled(Exception): + "Exception used to cancel run()." +except (NameError, TypeError): + Canceled = __name__ + ".Canceled" + + +class SuckerThread(websucker.Sucker): + + stopit = 0 + savedir = None + rootdir = None + + def __init__(self, msgq): + self.msgq = msgq + websucker.Sucker.__init__(self) + self.setflags(verbose=VERBOSE) + self.urlopener.addheaders = [ + ('User-agent', 'websucker/%s' % websucker.__version__), + ] + + def message(self, format, *args): + if args: + format = format%args + ##print format + self.msgq.put(format) + + def run1(self, url): + try: + try: + self.reset() + self.addroot(url) + self.run() + except Canceled: + self.message("[canceled]") + else: + self.message("[done]") + finally: + self.msgq.put(None) + + def savefile(self, text, path): + if self.stopit: + raise Canceled + websucker.Sucker.savefile(self, text, path) + + def getpage(self, url): + if self.stopit: + raise Canceled + return websucker.Sucker.getpage(self, url) + + def savefilename(self, url): + path = websucker.Sucker.savefilename(self, url) + if self.savedir: + n = len(self.rootdir) + if path[:n] == self.rootdir: + path = path[n:] + while path[:1] == os.sep: + path = path[1:] + path = os.path.join(self.savedir, path) + return path + + def XXXaddrobot(self, *args): + pass + + def XXXisallowed(self, *args): + return 1 + + +class App: + + sucker = None + msgq = None + + def __init__(self, top): + self.top = top + top.columnconfigure(99, weight=1) + self.url_label = Label(top, text="URL:") + self.url_label.grid(row=0, column=0, sticky='e') + self.url_entry = Entry(top, width=60, exportselection=0) + self.url_entry.grid(row=0, column=1, sticky='we', + columnspan=99) + self.url_entry.focus_set() + self.url_entry.bind("", self.go) + self.dir_label = Label(top, text="Directory:") + self.dir_label.grid(row=1, column=0, sticky='e') + self.dir_entry = Entry(top) + self.dir_entry.grid(row=1, column=1, sticky='we', + columnspan=99) + self.go_button = Button(top, text="Go", command=self.go) + self.go_button.grid(row=2, column=1, sticky='w') + self.cancel_button = Button(top, text="Cancel", + command=self.cancel, + state=DISABLED) + self.cancel_button.grid(row=2, column=2, sticky='w') + self.auto_button = Button(top, text="Paste+Go", + command=self.auto) + self.auto_button.grid(row=2, column=3, sticky='w') + self.status_label = Label(top, text="[idle]") + self.status_label.grid(row=2, column=4, sticky='w') + self.top.update_idletasks() + self.top.grid_propagate(0) + + def message(self, text, *args): + if args: + text = text % args + self.status_label.config(text=text) + + def check_msgq(self): + while not self.msgq.empty(): + msg = self.msgq.get() + if msg is None: + self.go_button.configure(state=NORMAL) + self.auto_button.configure(state=NORMAL) + self.cancel_button.configure(state=DISABLED) + if self.sucker: + self.sucker.stopit = 0 + self.top.bell() + else: + self.message(msg) + self.top.after(100, self.check_msgq) + + def go(self, event=None): + if not self.msgq: + self.msgq = Queue.Queue(0) + self.check_msgq() + if not self.sucker: + self.sucker = SuckerThread(self.msgq) + if self.sucker.stopit: + return + self.url_entry.selection_range(0, END) + url = self.url_entry.get() + url = url.strip() + if not url: + self.top.bell() + self.message("[Error: No URL entered]") + return + self.rooturl = url + dir = self.dir_entry.get().strip() + if not dir: + self.sucker.savedir = None + else: + self.sucker.savedir = dir + self.sucker.rootdir = os.path.dirname( + websucker.Sucker.savefilename(self.sucker, url)) + self.go_button.configure(state=DISABLED) + self.auto_button.configure(state=DISABLED) + self.cancel_button.configure(state=NORMAL) + self.message( '[running...]') + self.sucker.stopit = 0 + t = threading.Thread(target=self.sucker.run1, args=(url,)) + t.start() + + def cancel(self): + if self.sucker: + self.sucker.stopit = 1 + self.message("[canceling...]") + + def auto(self): + tries = ['PRIMARY', 'CLIPBOARD'] + text = "" + for t in tries: + try: + text = self.top.selection_get(selection=t) + except TclError: + continue + text = text.strip() + if text: + break + if not text: + self.top.bell() + self.message("[Error: clipboard is empty]") + return + self.url_entry.delete(0, END) + self.url_entry.insert(0, text) + self.go() + + +class AppArray: + + def __init__(self, top=None): + if not top: + top = Tk() + top.title("websucker GUI") + top.iconname("wsgui") + top.wm_protocol('WM_DELETE_WINDOW', self.exit) + self.top = top + self.appframe = Frame(self.top) + self.appframe.pack(fill='both') + self.applist = [] + self.exit_button = Button(top, text="Exit", command=self.exit) + self.exit_button.pack(side=RIGHT) + self.new_button = Button(top, text="New", command=self.addsucker) + self.new_button.pack(side=LEFT) + self.addsucker() + ##self.applist[0].url_entry.insert(END, "http://www.python.org/doc/essays/") + + def addsucker(self): + self.top.geometry("") + frame = Frame(self.appframe, borderwidth=2, relief=GROOVE) + frame.pack(fill='x') + app = App(frame) + self.applist.append(app) + + done = 0 + + def mainloop(self): + while not self.done: + time.sleep(0.1) + self.top.update() + + def exit(self): + for app in self.applist: + app.cancel() + app.message("[exiting...]") + self.done = 1 + + +def main(): + AppArray().mainloop() + +if __name__ == '__main__': + main() -- cgit v1.2.3