Attachment 'moin_export.py'

Download

   1 # -*- coding: iso-8859-1 -*-
   2 """
   3 MoinMoin - Dump a MoinMoin wiki to static pages
   4 
   5 Copyright (c) 2002, 2003 by Jürgen Hermann <jh@web.de>
   6 All rights reserved, see COPYING for details.
   7 
   8 This program is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2 of the License, or
  11 (at your option) any later version.
  12 
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17 
  18     ORS modifications:
  19         12.01.04 RS upgrade to 1.1., mark all previous ORS changes
  20         20.01.04 RS repair indent errors
  21         29.06.04 RS handle KeyboardInterrupt
  22         07.09.05 RS upgrade to 1.3.5
  23         24.09.05 RS use StringIO to buffer output, enable postprocessing
  24 """
  25 __version__ = "20040329"
  26 
  27 # use this if your moin installation is not in sys.path:
  28 #############################################################################
  29 ### Helpers
  30 #############################################################################
  31 
  32 ##HTML_SUFFIX = ".html"  # perhaps put this in config.py as html_suffix?
  33 
  34 logo_html = '<img src="moinmoin.png">'
  35 
  36 url_prefix = "."
  37 
  38 page_template = u'''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  39 <html>
  40 <head>
  41 <meta http-equiv="content-type" content="text/html; charset=%(charset)s">
  42 <title>%(pagename)s</title>
  43 <link rel="stylesheet" type="text/css" media="all" charset="utf-8" href="%(theme)s/css/common.css">
  44 <link rel="stylesheet" type="text/css" media="screen" charset="utf-8" href="%(theme)s/css/screen.css">
  45 <link rel="stylesheet" type="text/css" media="print" charset="utf-8" href="%(theme)s/css/print.css">
  46 </head>
  47 <body>
  48 <table>
  49 <tr>
  50 <td>
  51 %(logo_html)s
  52 </td>
  53 <td>
  54 %(navibar_html)s
  55 </td>
  56 </tr>
  57 </table>
  58 <hr>
  59 %(pagehtml)s
  60 <hr>
  61 %(timestamp)s
  62 </body>
  63 </html>
  64 '''
  65 
  66 SUFFIX={'html':".html",'plain':".txt",'word':".docdmp",'hhk':".hhk",'hhc':".hhc",'tal':".html",'xxml':".xml",'pdf':".pdfdmp"}
  67 #############################################################################
  68 ### Main program
  69 #############################################################################
  70 import sys,re,string
  71 import os, time, StringIO, codecs, shutil
  72 from MoinMoin import config, search, wikiutil, Page
  73 from MoinMoin.scripts import _util
  74 #from MoinMoin.util import pysupport
  75 from MoinMoin.request_ors import RequestExportCLI
  76 
  77 class MoinExporter(_util.Script):
  78     def __init__(self):
  79         _util.Script.__init__(self, __name__, "[options] <target-directory>")
  80         self.pageoptions={}
  81         self.exp_format="html"
  82 #        self.request=None
  83         # --config=DIR            
  84         self.parser.add_option(
  85             "--config", metavar="DIR", dest="configdir",
  86             help="Path to wikiconfig.py (or its directory)"
  87         )
  88 
  89         # --wiki=URL             
  90         self.parser.add_option(
  91             "--wiki", metavar="WIKIURL", dest="wiki_url",
  92             help="URL of wiki to dump (e.g. moinmaster.wikiwikiweb.de)"
  93         )
  94 
  95         # --page=NAME             
  96         self.parser.add_option(
  97             "--page", metavar="NAME", dest="page",
  98             help="Dump a single page (with possibly broken links)"
  99         )
 100         
 101 #RS additional options
 102         # --pattern=REGEX             
 103         self.parser.add_option(
 104             "--pattern", metavar="PATTERN", dest="pattern",
 105             help="Dump all pages with this pattern"
 106         )
 107         # --format=FORMAT             
 108         self.parser.add_option(
 109             "--format", metavar="FORMAT", dest="exportformat",
 110             help="Use export format"
 111         )
 112 #RS end
 113 #SYNC0
 114 #RS refactored: separated code parts from parsing parts
 115     def setOutputDir(self,outdir):
 116         """
 117            set and create output dir
 118         """
 119         self.outputdir = os.path.abspath(outdir)
 120         if not os.path.isdir(self.outputdir):
 121             try:
 122                 os.mkdir(self.outputdir)
 123                 _util.log("Created output directory '%s'!" % self.outputdir)
 124             except OSError:
 125                 _util.fatal("Cannot create output directory '%s'!" % self.outputdir)
 126         self.request.export_dir=self.outputdir
 127 
 128     def setFormatter(self,exp_format="html"):
 129         """
 130            load formatter
 131         """
 132         from MoinMoin import wikiutil
 133         # try to load the formatter
 134         mimetype = u"text/%s" % exp_format
 135 ##        print "format set to %s" % mimetype
 136 #RS extension handling
 137         self.ext=SUFFIX.get(string.split(mimetype,"/")[1],'.html')
 138 #        qfn_file = lambda pagename, qfn=wikiutil.quoteWikiname: qfn(pagename) + self.ext
 139 
 140         #modify behavior of wikiutil.quoteWikinameURL to add the extension 
 141         wikiutil.quoteWikinameURL = lambda pagename, qfn=wikiutil.quoteWikinameFS: (qfn(pagename)) + self.ext
 142 #RS stop
 143 
 144         Formatter=None
 145         Formatter = wikiutil.importPlugin(self.request.cfg, "formatter",
 146             mimetype.translate({ord(u'/'): u'_', ord(u'.'): u'_'}), "Formatter")
 147 #        Formatter = pysupport.importName("MoinMoin.formatter." +
 148 #            string.translate(mimetype, string.maketrans('/.', '__')), "Formatter")
 149         if Formatter is None:
 150             # default to plain text formatter
 151             sys.stderr.write("\nfallback to text/html!")
 152             del Formatter
 153             mimetype = "text/html"
 154             from MoinMoin.formatter.text_html import Formatter
 155     
 156 #        self.pageoptions["formatter"]=Formatter(self.request,targetdir=self.outputdir)
 157 #        self.pageoptions["formatter"]=Formatter(self.request)
 158 #        self.pageoptions["export_mode"]=1
 159 #        self.formatter=self.pageoptions["formatter"]
 160         sys.stderr.write("\nformatter:%s" % str(Formatter))
 161         self.formatter=Formatter(self.request,targetdir=self.outputdir)
 162         self.pageoptions["formatter"]=self.formatter
 163     
 164         self.exp_format=exp_format
 165         sys.stderr.write("\nself.formatter:%s" % str(self.formatter))
 166     
 167     def write(self, *data):
 168         """ Write to output stream.
 169         """
 170         for piece in data:
 171             self.out.write(piece)
 172 
 173 
 174 
 175     def dumpPage(self,pagename,targetfile):
 176         """
 177            export the single page
 178         """
 179         from MoinMoin import Page
 180 #RS attachment handling
 181         from MoinMoin.action import AttachFile
 182 #RS end
 183         _util.log('Writing "%s"...' % targetfile)
 184 #RS output
 185         self.request.write('Writing page "%s" to "%s"...' % (pagename,targetfile))
 186 #        return
 187 #RS end
 188         self.request.pragma={}
 189 
 190         errcnt=0
 191         request=self.request
 192         errlog=self.errlog
 193         outputdir=self.outputdir
 194         filepath = os.path.join(self.outputdir, targetfile)
 195 #        self.out = codecs.open(filepath, 'w', config.charset)
 196         self.out=StringIO.StringIO()
 197         self.result=""
 198 #        self.normwrite=self.request.write
 199         try:
 200 #RS options
 201 #            page = Page.Page(self.request,pagename,**self.pageoptions)
 202             page = Page.Page(request, pagename,**self.pageoptions)
 203 #RS end
 204 #    
 205             try:
 206                 request.reset()
 207 #                out = StringIO.StringIO()
 208                 request.redirect(self.out)
 209                 page.send_page(request, count_hit=0, content_only=0)
 210 #                pagehtml = out.getvalue()
 211                 request.exportAttachments(pagename)
 212                 request.redirect()
 213             except KeyboardInterrupt:
 214                 print >>sys.stderr, "*** keyboard interrupt,terminating while writing page %s!" % pagename
 215                 print >>self.errlog, "~" * 78
 216                 request.redirect()
 217                 self.errlog.close()
 218                 sys.exit(1)
 219             except:
 220                 errcnt = errcnt + 1
 221                 print >>sys.stderr, "*** Caught exception while writing page!"
 222                 print >>errlog, "~" * 78
 223                 print >>errlog, targetfile # page filename
 224                 import traceback
 225                 traceback.print_exc(None, errlog)
 226                 request.redirect()
 227 
 228 #
 229 #            sys.stdout = self.out
 230 #            self.request.write=self.write
 231 #            try:
 232 #                page.send_page(self.request)
 233 #RS handle KeyboardInterrupt
 234 ##            except KeyboardInterrupt:
 235 ##                print >>sys.stderr, "*** keyboard interrupt,terminating while writing page %s!" % pagename
 236 ##                print >>self.errlog, "~" * 78
 237 ##                self.out.close()
 238 ##                sys.stdout = sys.__stdout__
 239 ##                self.request.write=self.normwrite
 240 ##                self.errlog.close()
 241 ##                sys.exit(1)
 242 ###RS end            
 243 ##            except:
 244 ##                errcnt = errcnt + 1
 245 ##                print >>sys.stderr, "*** Caught exception while writing page %s!" % pagename
 246 ##                print >>self.errlog, "~" * 78
 247 ##                import traceback
 248 ##                traceback.print_exc(None, self.errlog)
 249 #RS word cleanup
 250 #                Formatter=self.pageoptions.get("formatter",None)
 251                 Formatter=self.formatter
 252                 if Formatter!=None:
 253                     if vars(Formatter).has_key('word_host'):
 254                         if Formatter.word_host!=None:
 255                             Formatter.word_host.Quit()
 256         finally:
 257 #            timestamp = time.strftime("%Y-%m-%d %H:%M")
 258 #            filepath = os.path.join(outputdir, file)
 259 #            fileout = codecs.open(filepath, 'w', config.charset)
 260 #            fileout.write(page_template % {
 261 ##                'charset': config.charset,
 262 ##                'pagename': pagename,
 263 ##                'pagehtml': pagehtml,
 264 ##                'logo_html': logo_html,
 265 ##                'navibar_html': navibar_html,
 266 ##                'timestamp': timestamp,
 267 ##                'theme': request.cfg.theme_default,
 268 #            })
 269 #            fileout.close()
 270 
 271             self.result=self.out.getvalue()
 272             self.out.close()
 273 ##            sys.stdout = sys.__stdout__
 274 ##            self.request.write=self.normwrite
 275 #RS attachment
 276 #            AttachFile.dump_filelist(self.request, pagename,os.path.dirname(self.outputdir))
 277 #RS end
 278 
 279 #RS end
 280 #        if errcnt:
 281 #            print >>sys.stderr, "*** %d error(s) occurred, see '%s'!" % (errcnt, errfile)
 282         self.outf = codecs.open(filepath, 'w', config.charset)
 283         self.outf.write(self.result)
 284         self.outf.close()
 285         return self.result
 286 
 287     def postprocessPage(self,pagename,file,output):
 288         """
 289         process the dumpPage result
 290         """
 291         if self.exp_format=="pdf":
 292             pass
 293             # output is a string with embedded tags, this could be processed similar to the odyssee sample
 294             # just line-by line and handling paging
 295             ptitle = wikiutil.quoteWikinameFS(pname)
 296             pfile = ptitle + ".pdf"
 297             pfilepath = os.path.abspath(os.path.join(exporter.outputdir, pfile))
 298             pauthor = exporter.request.user.name
 299         #fuer testzwecke den outputstring ueberschreiben
 300             ##    output ="""
 301             ##Provided by The Internet Classics Archive.
 302             ##See bottom for copyright. Available online at
 303             ##    http://classics.mit.edu//Homer/odyssey.html
 304             ##
 305             ##The Odyssey
 306             ##By Homer
 307             ##
 308             ##
 309             ##Translated by Samuel Butler
 310             ##
 311             ##----------------------------------------------------------------------
 312             ##
 313             ##BOOK I
 314             output ="""
 315             BOOK 1
 316          Das <sup>ist</sup> ein <sub>kleiner</sub> Teststring.
 317             Er geht <u>ueber</u> <i>mehrere</i> <b>Zeilen</b>
 318 
 319         --achtung, hier sollte nicht zu lesen sein, dass ein zweiter abschnitt beginnt!!--
 320             BOOK 2
 321             
 322             und das ist auch ganz gut so"""
 323             from MoinMoin.formatter.text_pdf import parseOdyssey
 324             parseOdyssey(output,pfilepath, ptitle, pauthor)
 325             
 326             #    pfile = wikiutil.quoteFilename(pname) + ".X.pdf"
 327             #    pfilepath = os.path.abspath(os.path.join(exporter.outputdir, pfile))
 328             #    canv = canvas.Canvas(pfilepath, invariant=1)
 329             #    canv.setPageCompression(1)
 330             #    drawPageFrame(canv,pagename)
 331 
 332                 #do some title page stuff
 333             #    canv.setFont("Times-Bold", 36)
 334             #    canv.drawCentredString(0.5 * A4[0], 7 * inch, pname)
 335 
 336             #    canv.setFont("Times-Bold", 18)
 337             #    canv.drawCentredString(0.5 * A4[0], 5 * inch, "Translated by %s" % exporter.request.user.name)
 338 
 339             #    canv.setFont("Times-Bold", 12)
 340             #    tx = canv.beginText(left_margin, 3 * inch)
 341             #    canv.showPage()
 342             #    canv.setFont('Times-Roman', 12)
 343             #    tx = canv.beginText(left_margin, top_margin - 0.5*inch)
 344             #    data=output.split("\n")
 345             #    verbose=1
 346             #    for line in data:
 347                     #this just does it the fast way...
 348             #        tx.textLine(line)
 349                     #this forces it to do text metrics, which would be the slow
 350                     #part if we were wrappng paragraphs.
 351                     #canv.textOut(line)
 352                     #canv.textLine('')
 353 
 354                     #page breaking
 355             #        y = tx.getY()   #get y coordinate
 356             #        if y < bottom_margin + 0.5*inch:
 357             #            canv.drawText(tx)
 358             #            canv.showPage()
 359             #            drawPageFrame(canv,pagename)
 360             #            canv.setFont('Times-Roman', 12)
 361             #            tx = canv.beginText(left_margin, top_margin - 0.5*inch)
 362 
 363                         #page
 364             #            pg = canv.getPageNumber()
 365             #            if verbose and pg % 10 == 0:
 366             #                print 'formatted page %d' % canv.getPageNumber()
 367             #
 368             #    if tx:
 369             #        canv.drawText(tx)
 370             #        canv.showPage()
 371             #        drawPageFrame(canv,pagename)
 372             #
 373             #    if verbose:
 374             #        print 'about to write to disk...'
 375             #
 376             #    canv.save()
 377         return
 378 
 379 
 380 
 381     def mainloop(self):
 382         """ moin-exporter main code.
 383         """
 384 
 385         if len(sys.argv) == 1:
 386             self.parser.print_help()
 387             sys.exit(1)
 388 
 389         if len(self.args) != 1:
 390             self.parser.error("incorrect number of arguments")
 391 
 392         #
 393 ##SYNC1
 394         #
 395         # Load the configuration
 396         #
 397         configdir = self.options.configdir
 398 #RS additional options
 399         if self.options.pattern:
 400             page_pattern = self.options.pattern
 401         else:
 402             page_pattern=".*"
 403        
 404         if self.options.exportformat:
 405             exp_format = self.options.exportformat
 406         else:
 407             exp_format = "html"
 408 
 409         
 410 
 411 ##        print "reading config from", configdir
 412         if configdir:
 413             if os.path.isfile(configdir):
 414                 configdir = os.path.dirname(configdir)
 415 ##            print "configdir now ",configdir    
 416             if not os.path.isdir(configdir):
 417                 _util.fatal("Bad path given to --config parameter")
 418             configdir = os.path.abspath(configdir)
 419 ##            print "configdir now ",configdir    
 420             sys.path[0:0] = [configdir]
 421             os.chdir(configdir)
 422 ##        print "importing config from", configdir
 423 ##        try:
 424 ##            from MoinMoin import config
 425 ##        except:
 426 ####            print "CFG ERR"
 427 ##            raise
 428 ##        if config.default_config:
 429 ##            _util.fatal("You have to be in the directory containing moin_config.py, "
 430 ##                "or use the --config option!")
 431 
 432         # fix some values so we get relative paths in output html
 433         # XXX maybe this should be configurable
 434 #        config.url_prefix = "../wiki"
 435 #        config.css_url    = "../wiki/css/moinmoin.css"
 436 #RS output
 437 ##        print "dumping wiki site:",config.sitename," using pattern:",page_pattern
 438 #RS end
 439 
 440         #
 441         # Dump the wiki
 442         
 443         sys.stderr.write("\n\nself.options.wiki_url:%s" % self.options.wiki_url)
 444 #        request = RequestExportCLI({'url':self.options.wiki_url})
 445         request = RequestExportCLI(self.options.wiki_url)
 446         request.form = request.args = request.setup_args()
 447         # fix url_prefix so we get relative paths in output html
 448         request.cfg.url_prefix = url_prefix
 449         self.request=request
 450         self.cfg=request.cfg
 451         # Prepare output directory
 452         #
 453 #RS refactored code
 454         self.setOutputDir(self.args[0])
 455 #RS end
 456         #
 457 #RS user handling
 458         from MoinMoin import user
 459         from MoinMoin import wikiutil
 460 #RS stop
 461 #        self.request = cgimain.createRequest()
 462 #RS continue
 463 #        admin=user.getUserId('RobertSeeger')
 464 #        self.request.user=user.User(self.request,admin)
 465 #        self.request.user.show_topbottom=0
 466         
 467 #RS end
 468 
 469 
 470 #    
 471 #SYNC3
 472 
 473 #        import cgi
 474 #        self.request.form = cgi.FieldStorage(environ = {'QUERY_STRING': 'action=print&export=1'})
 475 #RS refactored code
 476         self.setFormatter(exp_format)
 477         if exp_format in ["word","pdf"]:    
 478             self.request.cfg.default_markup="wiki_word"
 479 #RS end
 480 
 481 
 482 ##        print "Filtering from %s pages" % (len(all_pages))
 483         if self.options.page:
 484             pages = [self.options.page]
 485         else:
 486 #RS continue
 487 
 488 ##            pages = list(wikiutil.getPageList(config.text_dir))
 489 #RS filter pagelist by pattern
 490             query = search.QueryParser(regex=1, titlesearch=1,
 491                                        case=1).parse_query(page_pattern)
 492             results = search.searchPages(self.request, query)
 493             results.sortByPagename()
 494 #            pages= results.pageList(self.request, self.formatter)
 495             pages= [(hit.page_name) for hit in results.hits]
 496 
 497 #RS stop
 498 ##            all_pages = list(wikiutil.getPageList(config.text_dir))
 499 ##            if page_pattern:
 500 ##                try:
 501 ##                    needle_re = re.compile(page_pattern, re.IGNORECASE)
 502 ##                except re.error, e:
 503 ##                    print ('%s' %
 504 ##                    _("ERROR in pattern regex '%s'") % (inc_pattern,), e)
 505 ##                pages = filter(needle_re.search, all_pages)
 506 ##            else:
 507 ##                pages=all_pages
 508 #RS end
 509 #        pages.sort()
 510 ##        print "Dumping %s pages" % (len(pages))
 511         
 512         errfile = os.path.join(self.outputdir, 'error.log')
 513         self.errlog = open(errfile, 'w')
 514         errcnt = 0
 515 
 516         for pagename in pages:
 517 #RS refactored code
 518 #RS extension handling
 519 #            file = wikiutil.quoteFilename(pagename) + self.ext
 520             file = wikiutil.quoteWikinameURL(pagename) # we have the same name in URL and FS
 521             _util.log('Writing "%s"...' % file)
 522 #RS end
 523             output=self.dumpPage(pagename,file)
 524             self.postprocessPage(pagename,file,output)
 525 
 526 #RS disabled, HTML only and nonsense for our purpose
 527 #RS end
 528         self.errlog.close()
 529         if errcnt:
 530             print >>sys.stderr, "*** %d error(s) occurred, see '%s'!" % (errcnt, errfile)
 531 
 532 def run():
 533     print "this is run from ROBERTS FARM"
 534 
 535     MoinExporter().run()
 536 
 537 if __name__ == "__main__":
 538     run()
 539 
 540 """
 541 
 542 
 543 
 544 
 545 
 546 """

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2005-10-24 16:41:36, 3.6 KB) [[attachment:FrontPage.pdf]]
  • [get | view] (2005-10-20 23:45:56, 124.5 KB) [[attachment:SyntaxReference.doc]]
  • [get | view] (2005-10-20 23:27:53, 19.0 KB) [[attachment:moin_export.py]]
  • [get | view] (2005-10-24 14:06:32, 3.9 KB) [[attachment:request_ors.py]]
  • [get | view] (2005-10-27 14:27:18, 81.6 KB) [[attachment:text_pdf.py]]
  • [get | view] (2005-10-20 23:29:45, 66.2 KB) [[attachment:text_word.py]]
  • [get | view] (2005-11-03 17:30:17, 53.9 KB) [[attachment:wiki_word.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.