Attachment 'HTML2MoinMoin.py'

Download

   1 #!/usr/local/bin/python
   2 """
   3 Usage:
   4   moinconvert URL
   5 
   6 Retrives the given URL and convert it to MoinMoin markup. The result is
   7 written to stdout.
   8 """
   9 
  10 import htmlentitydefs, sys
  11 
  12 from HTMLParser import HTMLParser
  13 
  14 class HTML2MoinMoin(HTMLParser):
  15 
  16     start_tags = {
  17         "a"     : " [%(0)s ",
  18         "b"     : "'''",
  19         "em"    : "''",
  20         "tt"    : "{{{",
  21         "pre"   : "\n{{{",
  22         "p"     : "\n\n",
  23         "br"    : "\n\n",
  24         "h1"    : "\n\n= ",
  25         "h2"    : "\n\n== ",
  26         "h3"    : "\n\n=== ",
  27         "h4"    : "\n\n==== ",
  28         "h5"    : "\n\n===== ",
  29         "title" : "TITLE: ",
  30         "table" : "\n",
  31         "tr"    : "",
  32         "td"    : "||"
  33         }
  34 
  35     end_tags = {
  36         "a"     : ']',
  37         "b"     : "'''",
  38         "em"    : "''",
  39         "tt"    : "}}}",
  40         "pre"   : "}}}\n",
  41         "p"     : "",
  42         "h1"    : " =\n\n",
  43         "h2"    : " ==\n\n",
  44         "h3"    : " ===\n\n",
  45         "h4"    : " ====\n\n",
  46         "h5"    : " =====\n\n",
  47         "table" : "\n", 
  48         "tr"    : "||\n",
  49         "dt"    : ":: "
  50         }
  51 
  52     def __init__(self):
  53         HTMLParser.__init__(self)
  54         self.output = sys.stdout
  55         self.list_mode = []
  56         self.preformatted = False
  57         self.verbose = 0
  58 
  59     def write(self, text):
  60         self.output.write(text)
  61 
  62     def do_ul_start(self, attrs, tag):
  63         self.list_mode.append("*")
  64 
  65     def do_ol_start(self, attrs, tag):
  66         self.list_mode.append("1.")
  67 
  68     def do_dl_start(self, attrs, tag):
  69         self.list_mode.append("")
  70 
  71     def do_ul_end(self, tag):
  72         self.list_mode = self.list_mode[:-1]
  73 
  74     do_ol_end = do_ul_end
  75     do_dl_end = do_ul_end
  76 
  77     def do_li_start(self, args, tag):
  78         self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1])
  79 
  80     def do_dt_start(self, args, tag):
  81         self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1])
  82 
  83     def do_pre_start(self, args, tag):
  84         self.preformatted = True
  85         self.write(self.start_tags["pre"])
  86 
  87     def do_pre_end(self, tag):
  88         self.preformatted = False
  89         self.write(self.end_tags["pre"])
  90 
  91     def handle_starttag(self, tag, attrs):
  92         if hasattr(self, "do_%s_start" % tag):
  93             getattr(self, "do_%s_start" % tag)(attrs, tag)
  94         elif self.start_tags.has_key(tag):
  95             attr_dict = {}
  96             i = 0
  97             for a in attrs:
  98                 attr_dict[a[0]] = a[1]
  99                 attr_dict[str(i)] = a[1]
 100                 i += 1
 101             self.write(self.start_tags[tag] % attr_dict)            
 102         else:
 103             self.do_default_start(attrs, tag)
 104 
 105     def handle_endtag(self, tag):
 106         if hasattr(self, "do_%s_end" % tag):
 107             getattr(self, "do_%s_end" % tag)(tag)
 108         elif self.end_tags.has_key(tag):
 109             self.write(self.end_tags[tag])
 110         else:
 111             self.do_default_end(tag)
 112 
 113 
 114     def handle_data(self, data):
 115         if self.preformatted:
 116             self.write(data)
 117         else:
 118             self.write(data.replace("\n", " "))
 119 
 120     def handle_charref(self, name):
 121         self.write(name)
 122 
 123     def handle_entityref(self, name):
 124         if htmlentitydefs.entitydefs.has_key(name):
 125             self.write(htmlentitydefs.entitydefs[name])
 126         else:
 127             self.write("&" + name)
 128 
 129     def do_default_start(self, attrs, tag):
 130         if self.verbose:
 131             print "Encountered the beginning of a %s tag" % tag
 132             print "Attribs: %s" % attrs
 133             
 134     def do_default_end(self, tag):
 135         if self.verbose:
 136             print "Encountered the end of a %s tag" % tag
 137 
 138 
 139 def main():
 140     import urllib
 141     htmldata = urllib.urlopen(sys.argv[1]).read()
 142 
 143     p = HTML2MoinMoin()
 144     p.feed(htmldata)
 145     p.close()
 146 
 147 
 148 if __name__ == "__main__":
 149     main()

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2003-12-07 18:15:54, 3.7 KB) [[attachment:HTML2MoinMoin.py]]
  • [get | view] (2003-12-07 18:15:54, 1.2 KB) [[attachment:gettiki_html.py]]
  • [get | view] (2003-12-07 18:15:54, 7.7 KB) [[attachment:tikihtml2moinmoin.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.