sakana2moin.py

Abstract

Author: DanSandler
Usage: sakana2moin.py <sakana-files-dir> <moin-files-dir>

Convert pages from the Sakana wiki syntax to MoinMoin markup. Requires shell access to the Moin installation, because it actually creates a pile of files which need to be moved into <moin>/data/text.

For Sakana pages with comments, the output MoinMoin page will have the comments appended to its body (with --SomeName at TIME and horz. rules). Special (user) pages have the (user) part stripped.

Limitations/Bugs

About Sakana

Sakana (魚 -- Japanese for "fish") is a Python wikisystem written (as a "hello world", no less!) by Brian Swetland. It has some really interesting features, including:

Of course, Brian's not maintaining it anymore, and the installation is pretty fragile (as seems to be the case with all wikis that run their own HTTPd).

Implementation

   1 #!/usr/bin/env python
   2 # sakana2moin.py
   3 # by dan sandler <dan.sandler /at/ gmail DOTCOM>
   4 # converts sakana wikipages into moin wikipage files
   5 # usage: sakana2moin.py <srcdir> <destdir>
   6 # version: 0.1
   7 
   8 import sys, os, re, time
   9 
  10 from MoinMoin import wikiutil
  11 
  12 class Translator:
  13     indentstring = '    '
  14     indentlength = len(indentstring)
  15     def __init__(self,OUT):
  16         self._out = OUT
  17         self._inlist = 0
  18         self._indent = ""
  19         self._newline = 1
  20         self._preformatted = 0
  21     def indent(self):
  22         self._indent = self._indent + Translator.indentstring
  23     def outdent(self):
  24         self._indent = self._indent[Translator.indentlength:]
  25     def output(self, s):
  26         if self._newline:
  27             self._out.write(self._indent)
  28             self._newline = 0
  29         if re.search('\n', s):
  30             self._newline = 1
  31         self._out.write(s)
  32     def translate(self, IN):
  33         self._in = IN
  34         while 1:
  35             line = self._in.readline()
  36             if line == '': break
  37             if self._preformatted:
  38                 line = re.sub(r'\n+$', '', line)
  39             else:
  40                 line = line.strip()
  41 
  42             # full-line macros
  43             m = re.search(r'{list}', line)
  44             if m:
  45                 self._inlist += 1
  46                 self.indent() ; continue
  47 
  48             m = re.search(r'{/list}', line)
  49             if m:
  50                 self._inlist -= 1
  51                 self.outdent() ; continue
  52 
  53             # full-line macros with possible internal text
  54 
  55             postfix = '\n'
  56             m = re.search(r'^@\s*(.*)$', line)
  57             if m:
  58                 line = m.group(1)
  59                 postfix = ' =\n'
  60                 self.output("= ")
  61 
  62             m = re.search(r'^-\s*(.*)$', line)
  63             if m:
  64                 line = m.group(1)
  65                 if self._inlist:
  66                     self.output("* ")
  67                 else:
  68                     # I feel like sakana's - operator is more like a H3 than a
  69                     # H2 ... is that just me?
  70                     postfix = " ===\n"
  71                     self.output("=== ")
  72 
  73             # inline formatters
  74 
  75             chunked = re.split(r'({[^}]*})', line)
  76             for chunk in chunked:
  77                 if len(chunk) > 0 \
  78                 and chunk[0] == '{' and chunk[-1] == '}':
  79                     tag = chunk[1:-1]
  80 
  81                     if tag in ('code','text'):
  82                         self.output("{{{\n")
  83                         self._preformatted += 1
  84                         continue
  85 
  86                     if tag in ('/code','/text'):
  87                         self.output("}}}\n")
  88                         self._preformatted -= 1
  89                         continue
  90 
  91                     m = re.search(r'^(part|quote)(.*)$', tag)
  92                     if m:
  93                         if m.group(1) == "quote" and m.group(2).startswith("|"):
  94                             pagename = m.group(2)[1:]
  95                             if pagename.startswith("(user)"):
  96                                 pagename = pagename[len("(user)"):]
  97                             self.output("""''Quote from ["%s"]:''"""
  98                                 % pagename)
  99                         self.indent()
 100                         self.output("\n") #start the indent
 101                         continue
 102 
 103                     if tag in ('/part','/quote'):
 104                         self.outdent()
 105                         continue
 106 
 107                     if tag == 'b' or tag == 'strong' \
 108                     or tag == '/b' or tag == '/strong':
 109                         self.output("'''")
 110                         continue
 111                     
 112                     if tag == 'i' or tag == 'em' \
 113                     or tag == '/i' or tag == '/em':
 114                         self.output("''")
 115                         continue
 116                     
 117                     if tag == 'tt' or tag == '/tt':
 118                         self.output("`")
 119                         if tag == 'tt':
 120                             self._preformatted += 1
 121                         else: self._preformatted -= 1
 122                         continue
 123                     
 124                     if tag == 'hr':
 125                         self.output("----")
 126                         continue
 127                     
 128                     if tag == 'br':
 129                         self.output("[[BR]]")
 130                         continue
 131 
 132                     m = re.search(r'^verb\|(.*)$', tag)
 133                     if m:
 134                         self.output("[[HTML(%s)]]" % m.group(1))
 135                         continue
 136 
 137                     m = re.search(r'^(eroom|change|bug)\|(.*)$', tag)
 138                     if m:
 139                         if m.group(1) == "eroom":
 140                             func = "ERoom"
 141                         elif m.group(1) == "change":
 142                             func = "Change"
 143                         elif m.group(1) == "bug":
 144                             func="Bug"
 145 
 146                         m2 = re.search(r'^(.*)\|(.*)', m.group(2))
 147                         if m2:
 148                             page = m2.group(1)
 149                             name = ',' + m2.group(2)
 150                         else:
 151                             page = m.group(2)
 152                             name = ''
 153                         self.output("[[%s(%s%s)]]" % (func, page, name))
 154                         continue
 155 
 156                     m = re.search(r'^page\|(.*)$', tag)
 157                     if m:
 158                         m2 = re.search(r'^(.*)\|(.*)', m.group(1))
 159                         if m2:
 160                             page = m2.group(1)
 161                             name = m2.group(2)
 162                         else:
 163                             page = m.group(1)
 164                             name = ''
 165                         self.output("[:%s:%s]" % (page, name))
 166                         continue
 167 
 168                     m = re.search(r'^link\|(.*)$', tag)
 169                     if m:
 170                         m2 = re.search(r'^(.*)\|(.*)', m.group(1))
 171                         if m2:
 172                             page = m2.group(1)
 173                             name = ' ' + m2.group(2)
 174                         else:
 175                             page = m.group(1)
 176                             name = ''
 177                         self.output("[%s%s]" % (page, name))
 178                         continue
 179 
 180                     self.output('`[Unknown macro: %s]`' % chunk)
 181 
 182                 else:
 183                     # blocks of text
 184                     # must substitute [OtherPage]
 185                     pagelink_chunks = re.split(r'(\[[^\]]*\])', chunk)
 186                     for subchunk in pagelink_chunks:
 187                         if len(subchunk) > 2 \
 188                         and subchunk[0] == '[' and subchunk[-1] == ']':
 189                             # OK, special pages!
 190                             page = subchunk[1:-1]
 191                             m = re.match(r'\(user\)(.*)$', page)
 192                             if m: page = m.group(1) #users are regular pages
 193                             self.output('["%s"]'% page)
 194                         else:
 195                             # finally, text with no tags or ANYTHING.
 196                             self.output(re.sub(r'\\', '', subchunk))
 197 
 198             self.output(postfix)
 199 
 200 def dehexify(s):
 201     name=""
 202     for i in range(0,len(s),2):
 203         name += ("%c" % int(s[i:i+2], 16))
 204     return name
 205 
 206 def hexify(name):
 207     s=''
 208     for c in name:
 209         s += ("%x" % ord(c))
 210     return s
 211 
 212 class Snip:
 213     def __init__(self, srcdir, name):
 214         self.srcdir = srcdir
 215         self.name = name
 216         filepath = os.path.join(srcdir, hexify(name))
 217         self.info = eval(open(filepath+':meta').read())
 218         if not self.name == self.info["name"]:
 219             raise "names not consistent: '%s', '%s'" \
 220                 % (self.name, self.info['name'])
 221         self.datafile = filepath+':data'
 222 
 223     def appendToMoinStream(self, outfile):
 224         t = Translator(outfile)
 225         text = open(self.datafile)
 226         t.translate(text)
 227 
 228         # now, append any notes
 229         
 230         try:
 231             emitted_hdr = 0
 232             for note in self.info["notes"]:
 233                 noteSnip = Snip(self.srcdir, note)
 234                 print "    note: %s <%s>" % (self.name, self.datafile)
 235                 if not emitted_hdr:
 236                     outfile.write("== Comments ==\n")
 237                     emitted_hdr = 1
 238                 else:
 239                     outfile.write("----\n")
 240                 noteSnip.appendToMoinStream(outfile)
 241                 outfile.write("""''  -- ["%s"] at %s''\n""" % (
 242                     noteSnip.info["created_by"][len("(user)"):],
 243                     time.strftime("%X %x",
 244                         time.localtime(noteSnip.info['created_at']))
 245                     ))
 246         except KeyError: pass
 247 
 248     def toMoin(self, destdir):
 249         moin_name = self.name
 250         if moin_name.startswith("(user)"):
 251             moin_name = moin_name[len("(user)"):]
 252 
 253         # MoinMoin has its own bizarre ideas about how to encode file names
 254         moin_name = wikiutil.quoteFilename(moin_name)
 255 
 256         print "page: %s <%s>" % (self.name, self.datafile)
 257 
 258         self.appendToMoinStream(open(os.path.join(destdir, moin_name),'w'))
 259 
 260 if __name__ == '__main__':
 261     if len(sys.argv) < 3:
 262         print "usage: sakana2moin.py <srcdir> <destdir>"
 263         sys.exit(1)
 264 
 265     srcdir = sys.argv[1]
 266     destdir = sys.argv[2]
 267 
 268     srcfiles = os.listdir(srcdir)
 269     for fn in srcfiles:
 270         if fn.endswith(":data"):
 271             try:
 272                 #fp = open(os.path.join(srcdir,fn[:-5]+':meta'))
 273                 #text = fp.read()
 274                 #exec("info = " + text) # eww! thanks, swetland
 275                 #pagename = info["name"]
 276     
 277                 fn = fn[:-5]
 278                 pagename = dehexify(fn)
 279 
 280                 if pagename.startswith("(note)"):
 281                     # it will be processed with its parent page
 282                     continue
 283                 elif pagename.startswith("(meta)") \
 284                 or pagename.startswith("(topic)"):
 285                     print "warning: skipping '%s' (can't handle meta/topic snips)" % pagename
 286                     continue
 287 
 288                 Snip(srcdir, pagename).toMoin(destdir)
 289 
 290             finally:
 291                 pass
 292 
 293 # vim: ft=python ts=4 sts=4 sw=4 expandtab:
sakana2moin.py

MoinMoin: SakanaMoinMoinConverter (last edited 2007-10-29 19:15:51 by localhost)