Attachment 'HTML2MoinMoin.py'
Download 1 #!/usr/local/bin/python
2 """
3 Usage:
4 moinconvert URL
5
6 Retrives the given URL and convert it to MoinMoin markup. The result is
7 written to stdout.
8 """
9
10 import htmlentitydefs, sys
11
12 from HTMLParser import HTMLParser
13
14 class HTML2MoinMoin(HTMLParser):
15
16 start_tags = {
17 "a" : " [%(0)s ",
18 "b" : "'''",
19 "em" : "''",
20 "tt" : "{{{",
21 "pre" : "\n{{{",
22 "p" : "\n\n",
23 "br" : "\n\n",
24 "h1" : "\n\n= ",
25 "h2" : "\n\n== ",
26 "h3" : "\n\n=== ",
27 "h4" : "\n\n==== ",
28 "h5" : "\n\n===== ",
29 "title" : "TITLE: ",
30 "table" : "\n",
31 "tr" : "",
32 "td" : "||"
33 }
34
35 end_tags = {
36 "a" : ']',
37 "b" : "'''",
38 "em" : "''",
39 "tt" : "}}}",
40 "pre" : "}}}\n",
41 "p" : "",
42 "h1" : " =\n\n",
43 "h2" : " ==\n\n",
44 "h3" : " ===\n\n",
45 "h4" : " ====\n\n",
46 "h5" : " =====\n\n",
47 "table" : "\n",
48 "tr" : "||\n",
49 "dt" : ":: "
50 }
51
52 def __init__(self):
53 HTMLParser.__init__(self)
54 self.output = sys.stdout
55 self.list_mode = []
56 self.preformatted = False
57 self.verbose = 0
58
59 def write(self, text):
60 self.output.write(text)
61
62 def do_ul_start(self, attrs, tag):
63 self.list_mode.append("*")
64
65 def do_ol_start(self, attrs, tag):
66 self.list_mode.append("1.")
67
68 def do_dl_start(self, attrs, tag):
69 self.list_mode.append("")
70
71 def do_ul_end(self, tag):
72 self.list_mode = self.list_mode[:-1]
73
74 do_ol_end = do_ul_end
75 do_dl_end = do_ul_end
76
77 def do_li_start(self, args, tag):
78 self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1])
79
80 def do_dt_start(self, args, tag):
81 self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1])
82
83 def do_pre_start(self, args, tag):
84 self.preformatted = True
85 self.write(self.start_tags["pre"])
86
87 def do_pre_end(self, tag):
88 self.preformatted = False
89 self.write(self.end_tags["pre"])
90
91 def handle_starttag(self, tag, attrs):
92 if hasattr(self, "do_%s_start" % tag):
93 getattr(self, "do_%s_start" % tag)(attrs, tag)
94 elif self.start_tags.has_key(tag):
95 attr_dict = {}
96 i = 0
97 for a in attrs:
98 attr_dict[a[0]] = a[1]
99 attr_dict[str(i)] = a[1]
100 i += 1
101 self.write(self.start_tags[tag] % attr_dict)
102 else:
103 self.do_default_start(attrs, tag)
104
105 def handle_endtag(self, tag):
106 if hasattr(self, "do_%s_end" % tag):
107 getattr(self, "do_%s_end" % tag)(tag)
108 elif self.end_tags.has_key(tag):
109 self.write(self.end_tags[tag])
110 else:
111 self.do_default_end(tag)
112
113
114 def handle_data(self, data):
115 if self.preformatted:
116 self.write(data)
117 else:
118 self.write(data.replace("\n", " "))
119
120 def handle_charref(self, name):
121 self.write(name)
122
123 def handle_entityref(self, name):
124 if htmlentitydefs.entitydefs.has_key(name):
125 self.write(htmlentitydefs.entitydefs[name])
126 else:
127 self.write("&" + name)
128
129 def do_default_start(self, attrs, tag):
130 if self.verbose:
131 print "Encountered the beginning of a %s tag" % tag
132 print "Attribs: %s" % attrs
133
134 def do_default_end(self, tag):
135 if self.verbose:
136 print "Encountered the end of a %s tag" % tag
137
138
139 def main():
140 import urllib
141 htmldata = urllib.urlopen(sys.argv[1]).read()
142
143 p = HTML2MoinMoin()
144 p.feed(htmldata)
145 p.close()
146
147
148 if __name__ == "__main__":
149 main()
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.