MoinMoin Exporter Package - Work in Progress

I do not have the time to do this the proper way - isolate the minimum necessary core patches and test this extensively, write unit tests, document it, do i18n...

However, as some people have asked for more info, I have at least created some patch files for all the affected modules and decided to publish the whole thing as-is.

Patches to core MoinMoin 1.3.5

I put them here as code snippets rather than attachments, so I can comment on questions later on.

MoinMoin.Page

I have reduced this to the core patches relevant for the exporter feature, however I did not do intensive testing of this reduced patch (my work environment contains a lot of other patches in this file). Reasons are commented in the patch. The patches are mostly done to correct the calling sequence for formatter functions (which is wrong in the original but does not matter in the default formatters), to avoid parser reset and to use the text/html formatter even if it is not set as default_formatter.

--- E:\Install\MoinMoin\moin-1.3.5.tar\moin-1.3.5\MoinMoin\Page.py      Sun Jul 24 13:06:26 2005
+++ E:\moin13\patch\Page.py     Mon Oct 24 15:02:58 2005
@@ -1,13 +1,15 @@
 # -*- coding: iso-8859-1 -*-
 """
     MoinMoin - Page class
 
     @copyright: 2000-2004 by Jürgen Hermann <jh@web.de>
     @license: GNU GPL, see COPYING for details.
+    contains core patches by RobertSeeger (#RS) for the MoinExporter feature
+     
 """
 
 import StringIO, os, re, urllib, random, codecs
 
 from MoinMoin import config, caching, user, util, wikiutil
 from MoinMoin.logfile import eventlog
 from MoinMoin.util import filesys, web
@@ -964,17 +967,23 @@
             verb, args = (line[1:]+' ').split(' ', 1)
             verb = verb.lower()
             args = args.strip()
 
             # check the PIs
             if verb == "format":
                 # markup format
-                pi_format, pi_formatargs = (args+' ').split(' ',1)
-                pi_format = pi_format.lower()
-                pi_formatargs = pi_formatargs.strip()
+#RS for some exporter formats we use a modified wiki parser 'wiki_word'
+#   and we do have to ignore a page format instruction 'wiki' in this case
+#   to keep using our modified parser
+                m_pi_format, m_pi_formatargs = (args+' ').split(' ',1)
+                if m_pi_format!="wiki":
+                    pi_format=m_pi_format
+                    pi_format = pi_format.lower()
+                    pi_formatargs = m_pi_formatargs.strip()
+#RS end
             elif verb == "refresh":
                 if self.cfg.refresh:
                     try:
                         mindelay, targetallowed = self.cfg.refresh
                         args = args.split()
                         if len(args) >= 1:
                             delay = max(int(args[0]), mindelay)
@@ -1118,14 +1128,25 @@
                     request.write(''.join(pi_formtext))
+#RS extensions for export mode
+            else:
+#nearly same as default formatter but used in export
+                if self.formatter.mimetype=="text/html":
+                    title = self.split_title(request)
+                    wikiutil.send_title(request, title,  page=self, link='', msg='',
+                                        pagename=self.page_name, print_mode=1,
+                                        media='print', pi_refresh=None,
+                                        allow_doubleclick=0,trail=None
+                                        )
+#RS end
 
         # try to load the parser
         Parser = wikiutil.importPlugin(self.request.cfg, "parser",
                                        self.pi_format, "Parser")
         if Parser is None:
             # default to plain text formatter (i.e. show the page source)
             del Parser
@@ -1157,23 +1178,38 @@
             if getattr(request, 'footnotes', None):
                 from MoinMoin.macro.FootNote import emit_footnotes
                 request.write(emit_footnotes(request, self.formatter))
 
         # end wiki content div
         request.write(self.formatter.endContent())
 
+#RS for the exporter, self.formatter.endDocument() must be the last activity
+#   as some formatters really have to close the document.
+#   also, we should only do self.formatter.endDocument() if we also did
+#   self.formatter.startDocument()
+
+#        doc_trailer = self.formatter.endDocument()
+#
         # end document output
-        doc_trailer = self.formatter.endDocument()
         if not content_only:
             # send the page footer
             if self.default_formatter:
                 wikiutil.send_footer(request, self.page_name, print_mode=print_mode)
-
-            request.write(doc_trailer)
-
+                doc_trailer = self.formatter.endDocument()
+                request.write(doc_trailer)
+            elif self.formatter.mimetype=="text/html":
+                wikiutil.send_footer(request, self.page_name, print_mode=print_mode)
+                doc_trailer = self.formatter.endDocument()
+                request.write(doc_trailer)
+            else:
+                doc_trailer = self.formatter.endDocument()
+                request.write(doc_trailer)
+                
+#RS end
+        
         # cache the pagelinks
         if do_cache and self.default_formatter and page_exists:
             cache = caching.CacheEntry(request, self, 'pagelinks')
             if cache.needsUpdate(self._text_filename()):
                 links = self.formatter.pagelinks
                 cache.update('\n'.join(links) + '\n', True)

MoinMoin.parser.wiki

This is not so important, I do'nt even recall exactly why I did this, but it should not hurt.

--- E:\Install\MoinMoin\moin-1.3.5.tar\moin-1.3.5\MoinMoin\parser\wiki.py       Sat Jul 30 14:51:12 2005
+++ E:\moin13\patch\parser\wiki.py      Mon Oct 24 15:22:46 2005
@@ -1,13 +1,14 @@
 # -*- coding: iso-8859-1 -*-
 """
     MoinMoin - MoinMoin Wiki Markup Parser
 
     @copyright: 2000, 2001, 2002 by Jürgen Hermann <jh@web.de>
     @license: GNU GPL, see COPYING for details.
+    contains core patches by RobertSeeger (#RS) for the MoinExporter feature
 """
 
 import os, re
 from MoinMoin import config, wikimacro, wikiutil
 from MoinMoin.Page import Page
 from MoinMoin.util import web
 
@@ -526,15 +528,17 @@
         """Handle definition lists."""
         result = []
         self._close_item(result)
         #self.inhibit_p = 1
         self.in_dd = 1
         result.extend([
             self.formatter.definition_term(1),
-            self.formatter.text(match[1:-3]),
+#RS ignore character formatting inside DT
+            self.formatter.text(match[1:-3].replace("'","")),
+#RS end
             self.formatter.definition_term(0),
             self.formatter.definition_desc(1),
             ## CHANGE: no automatic paragraph
             ##self.formatter.paragraph(1)
         ])
         return ''.join(result)

MoinMoin.formatter.base

I introduced a formatter property self.mimetype to be able to recognize a loaded formatter (e.g. in some of my macros I use the formatter in a different way depending on its mimetype. In addition, I defined a function pure to be called by my modified parser if it finds some "pure" text or characters, as well as a function nbsp to replace the hard use of &nbsp; in cases where the output format is not html or similar. I did this back in 1.1. when "pure" text was just ignored by default wiki parser and formatter, most likely this can be done better or is even not needed in 1.3.5.

--- E:\Install\MoinMoin\moin-1.3.5.tar\moin-1.3.5\MoinMoin\formatter\base.py    Tue Jul 26 20:46:52 2005
+++ E:\moin13\patch\formatter\base.py   Mon Oct 24 15:51:58 2005
@@ -1,13 +1,14 @@
 # -*- coding: iso-8859-1 -*-
 """
     MoinMoin - Formatter Base Class
 
     @copyright: 2000 - 2004 by Jürgen Hermann <jh@web.de>
     @license: GNU GPL, see COPYING for details.
+    contains core patches by RobertSeeger (#RS) for the MoinExporter feature
 """
 
 from MoinMoin import wikiutil
 import re, types
 
 class FormatterBase:
     """ This defines the output interface used all over the rest of the code.
@@ -18,15 +19,17 @@
     """
 
     hardspace = ' '
 
     def __init__(self, request, **kw):
         self.request = request
         self._ = request.getText
-
+#RS additional property "mimetype"
+        self.mimetype="text/base"
+#RS end
         self._store_pagelinks = kw.get('store_pagelinks', 0)
         self._terse = kw.get('terse', 0)
         self.pagelinks = []
         self.in_p = 0
         self.in_pre = 0
         self._highlight_re = None
         self._base_depth = 0
@@ -107,14 +110,27 @@
         return u'<img%s>' % attrstr
 
     def smiley(self, text):
         return text
 
     # Text and Text Attributes ########################################### 
     
+#RS additional handler for pure and nbsp
+#may be obsolete as text is now properly used by parser??
+    def pure(self, text):
+        """
+        this handles the "not in any markup" case
+        used in formatters with "side effects"
+        """
+        return self._text(text)
+
+    def nbsp(self):
+        return self.hardspace
+
+#RS end
     def text(self, text):
         if not self._highlight_re:
             return self._text(text)
             
         result = []
         lastpos = 0
         match = self._highlight_re.search(text)

MoinMoin.formatter.text_html

This redefines endDocument() to properly close the HTML tags in export mode (at least in 1.1. Moin did not do this properly in the page handling or formatter calls)

--- E:\Install\MoinMoin\moin-1.3.5.tar\moin-1.3.5\MoinMoin\formatter\text_html.py       Tue Jul 26 20:46:52 2005
+++ E:\moin13\patch\formatter\text_html.py      Mon Oct 24 15:53:43 2005
@@ -1,13 +1,14 @@
 # -*- coding: iso-8859-1 -*-
 """
     MoinMoin - "text/html+css" Formatter
 
     @copyright: 2000 - 2004 by Jürgen Hermann <jh@web.de>
     @license: GNU GPL, see COPYING for details.
+    contains core patches by RobertSeeger (#RS) for the MoinExporter feature
 """
 
 from MoinMoin.formatter.base import FormatterBase
 from MoinMoin import wikiutil, i18n, config
 from MoinMoin.Page import Page
 
 class Formatter(FormatterBase):
@@ -15,14 +16,17 @@
         Send HTML data.
     """
 
     hardspace = '&nbsp;'
 
     def __init__(self, request, **kw):
         apply(FormatterBase.__init__, (self, request), kw)
+#RS additional property "mimetype"
+        self.mimetype="text/html"
+#RS end
 
         # inline tags stack. When an inline tag is called, it goes into
         # the stack. When a block element starts, all inline tags in
         # the stack are closed.
         self._inlineStack = []
 
         self._in_li = 0
@@ -144,14 +148,22 @@
             # The code that calls us should keep correct calling order.
             if tag in self._inlineStack:
                 self._inlineStack.remove(tag)
             return '</%s>' % tag
 
 
     # Public methods ###################################################
+    def startDocument(self, pagename):
+        return ""
+
+    def endDocument(self):
+        if self.request.export_mode:
+            return "\n<!-- EOD -->\n</body>\n</html>\n"
+        return ""
+
 
     def startContent(self, content_id='content', **kwargs):
         """ Start page content div """
 
         # Setup id
         if content_id!='content':
             aid = 'top_%s' % (content_id,)
@@ -733,10 +745,15 @@
                 attrs = self._checkTableAttr(attrs, '')
             return self.open(tag, newline=1, attr=attrs)
         return self.close(tag)
 
     def escapedText(self, text):
         return wikiutil.escape(text)
 
+#RS additional handler for nbsp
+    def nbsp(self):
+        return '&nbsp;'
+
+#RS end
+
     def rawHTML(self, markup):
         return markup
-

Additions to the configuration

    #the physical path to your /wiki url, needed to export smiley images etc.
    url_prefix_dir = 'E:\\moin13\\share\\moin\\htdocs\\'

You also need to have a way to configure the target export directory - at the moment I have not bothered to integrate this into the config, but rather define it in the used action or command batch.

Additional files

MoinMoin.request_ors

I have derived from the core request classes to encapsulate some general patches and enhancements. I did this only for the CGI (needed if the exporter is called via action) and CLI (needed for batch use of the exporter), you may have to derive for your type of server. The most relevant addition is the attachment export (I have seen today a patch to moin_dump.py on MoinMoinPatch that is quite similar in its effect).

MoinMoin.parser.wiki_word

I had to create a modified wiki parser that is only used in the exporter context and only for the text_word and text_pdf formatters. Basically it parses also for unformatted text words and characters which are not needed by the normal parser-formatter combination.

The Exporter Files

MoinMoin.scripts.moin_export

This is based on moin_dump.py but has been completely refactored into a class and enhanced to fit my purpose.

The new formatters

MoinMoin.formatter.text_word

The implementation is nearly finished for all the main markup features (however migration to 1.3.5 broke some tables etc.), see an export of page SyntaxReference: SyntaxReference.doc

You need to have Microsoft Word (2000) on your MoinMoin server and should have generated a python wrapper with makepy (oh yes, and you need Mark Hammonds win32 package (win32all build 163), of course). Due to some weird exceptions I have decided to serve the Word control with Visible=1, this means that you will watch Word "typing in" your target document!

If you have a different server environment (e.g. Unix) then you could still use this on a special Windows client machine with Microsoft Word and python with a special setting:

MoinMoin.formatter.text_pdf

This is still in its early stages and will be finalized by Alexander Bormann in a few weeks. It needs the reportlab package. Here is a preview version that may break e.g. on some macro calls, but it renders something simple like FrontPage (see FrontPage.pdf):

Usage Scenarios

Exporting from a command line batch

This can be used to batch export multiple pages matching PAGEPATTERN - be aware that Word may be slow, though ;-().

set GATEWAY_INTERFACE=CGI/1.1
set DUMPNAME=word
set PYTHONPATH=E:\moin13\Lib\site-packages 
set TARGETROOT=E:\moin13
set WIKIURL="http://localhost/moin13/"
set DUMPPATH=%TARGETROOT%\dump_%DUMPNAME%
set PAGEPATTERN=SyntaxReference

set PAGEPATTERN="%PAGEPATTERN%"

E:\python23\python.exe -c "from MoinMoin.scripts.moin_export import run; run()" --wiki=%WIKIURL% --format=word --pattern=%PAGEPATTERN% %DUMPPATH% 1>dump.log 2>dumperr.log
pause

Exporting from an Action

To use this, you may have to modify the server to use a different request from request_ors.py, in my case I modified the moin.cgi (or you could try to create such a request inside the action?):

from MoinMoin.request_ors import RequestORSCGI

request = RequestORSCGI()
#disable my more critical modifications
request.ors_mod_active=0

Here is a test action that can be applied to a single page to trigger the export of this page (in Word format, but for PDF it looks very similar). We will later provide an action with a proper user dialog (select format, output name ...maybe even return a link to the created file). Right now, the output directory is hardcoded and you must pick up the result file from the server.

   1 # Imports
   2 import string, time,sys,os,copy
   3 from MoinMoin import user, webapi, wikiutil
   4 from MoinMoin.PageEditor import PageEditor
   5 from MoinMoin.scripts.moin_export import MoinExporter
   6 
   7 def execute(pagename, request):
   8     _ = request.getText
   9     page = PageEditor(request,pagename)
  10     exp_format="word"
  11 #    exp_format="html"
  12 #    exp_format="plain"
  13 #    exp_format="pdf"
  14 #must protect a function that is overrided by the exporter
  15     qfn_orig=wikiutil.quoteWikinameURL
  16     markup_orig=request.cfg.default_markup
  17     
  18     pname=pagename
  19 
  20 #open exporter
  21     exporter=MoinExporter()
  22 #assign a request (use a copy because we modify the request)
  23     exporter.request=copy.copy(request)
  24 #set target output directory
  25     exporter.setOutputDir(r"E:\moin13\dump_word")
  26 #redirect the request to action=print and indicate "export" mode
  27     exporter.request.user.show_topbottom=0
  28     exporter.request.form['action']='print'
  29     exporter.request.form['export']='1'
  30 #assign the export formatter
  31     exporter.setFormatter(exp_format)
  32 #use special parser for some export formats
  33     if exp_format in ("word","pdf"):    
  34         request.cfg.default_markup="wiki_word"
  35 #create target file name - this is the plain output of the formatter, the "real" target file
  36 #is different for word and pdf as they create it as a "side effect"
  37     file = wikiutil.quoteWikinameFS(pname) + exporter.ext
  38 #create error log file (right now we always use the same file for all - this may be bad)
  39     errfile = os.path.join(exporter.outputdir, 'error.log')
  40 
  41     exporter.errlog = open(errfile, 'w')
  42 #dump the page using the formatter
  43     output=exporter.dumpPage(pname,file)
  44 #close the error log
  45     exporter.errlog.close()
  46 
  47     
  48 #reset overrided function
  49     wikiutil.quoteWikinameURL = qfn_orig
  50     request.cfg.default_markup=markup_orig
  51 #use this to test output
  52 #    return page.send_page(request,        msg='<pre>%s</pre>' % output)
  53 
  54     return page.send_page(request,
  55         msg='<strong>%s</strong>' %
  56             _('Export of page %s completed.' % pagename))

MoinMoin: RobertSeeger/MoinExporter (last edited 2009-02-18 15:15:20 by DennisBenzinger)