Attachment 'replace_in_wiki_LDAP.py'

Download

   1 #!/bin/env python
   2 """
   3     WikiRpc Search-and-replace tool for MoinMoin 
   4 
   5     Copyright 2008 Remco Boerma <r.boerma@drenthecollege.nl>
   6 
   7     Purpose: 
   8         Using this script you can search-and-replace in multiple wiki pages at once. 
   9         {{{ preformatted }}}, `text` and = level 1 titles = are preserved at will.
  10 
  11         There are several parameters to be used, some of them set using environment
  12         variables ('username' and 'password' for example). While sometimes one wants 
  13         to change every occurence of a needle, at times it's very imported not to 
  14         change just-everything. The user is asked when running if Save replacement 
  15         should be used or not. 
  16 
  17     Configuration:
  18         Environment variables: 
  19             username :: the username to be used for xmlrpc 
  20             password :: the password to be used for xmlrpc <optional>
  21             moin_url :: the hostname of your moinmoin server
  22 
  23         Hardcoded:
  24             MOIN_URL :: set this to your site's hostname if you don't want to use 
  25                         the environment variable
  26             RPC_URL  :: normally set using your MOIN_URL, but this allows for 
  27                         complex urls
  28             sever regular expressions used in get_save_regions :: 
  29                         regular expressions used to test what lines not to touch
  30 
  31     Usage: 
  32         1. Set the username and moin_url environment variables. If you like, 
  33         set the password variable as well (or retype this at every execution). 
  34         2. start replace_in_wiki.py 
  35         3. enter the expression to select the pages to appy this search-and-replace on
  36         4. enter the regular expression (FORCED) to search for in the pages
  37         5. enter the replacement expressions (can use \1 and \2 etc)
  38         6. enter yes,y or simply hit enter to enable SaveReplacer (and keep 
  39            {{{preformatted}}} `text` = intact = ) or enter anything else to replace
  40            every occurence per page. 
  41         7. Examen the result of diff comparison on the old and new content and decide: 
  42         8. Either accept the change (yes, y, or simply enter) and upload this new 
  43            page to the server, or enter any other value (and enter) to not upload the
  44            new version. 
  45         
  46     Notes: 
  47         1. The regular expression search is performed with re.MULTILINE and re.DOTALL 
  48            as default options: this allows for multiline matches, as well as using ^ and
  49            $ in your expression on a line basis. 
  50 
  51 
  52     Verified to work with: 
  53       * Python 2.5
  54       * MoinMoin 1.5.6 (using plain http authentication)
  55 
  56     @copyright: GPL v2
  57 
  58 """
  59 import xmlrpclib
  60 import getpass,os,re,difflib,sys
  61 import time
  62 
  63 
  64 MOIN_URL  = os.environ.get('moin_url','beohome/wiki')
  65 RPC_URL   = "http://%s/?action=xmlrpc2" % MOIN_URL
  66 
  67 # Number of seconds to sleep to not trigger surge protection 
  68 # on the server (503 SERVICE UNAVAILABLE)
  69 surgesleep = 1
  70 
  71 DEBUG = 0 # use this for SaveReplacer debugging... 
  72 class SaveReplacer(object):
  73     def __init__(self,text):
  74         self.text = text
  75 
  76     def _is_a_save_region(self,start,end):
  77         global DEBUG
  78         if DEBUG>2: print 'Checking %d,%d in %s'%(start,end,self._save_regions)
  79         for save_start,save_end in self._save_regions:
  80             if (start >= save_start and start <= save_end) or \
  81                 (end >= save_start and end <= save_end): 
  82                   return True
  83         else :
  84             return False
  85 
  86     def _do_replace(self,match):
  87         global DEBUG
  88         start = match.start()
  89         end = match.end()
  90         text = match.string
  91         newtext = match.expand(self.replacement)
  92         if not self._is_a_save_region(start,end):
  93             if DEBUG: print 'Changing',`text[start:end]`,'@ %d-%d' % (start,end),'to',`newtext`
  94             return newtext
  95         else:
  96             if DEBUG: print 'PREVENTED',`self.text[start:end]`,'@ %d-%d' % (start,end),'to',`newtext`
  97             return match.string[start:end]
  98 
  99     def get_save_regions(self):
 100         regions  = [match.span() for match in re.finditer(r'^=\s.*?\s+=\s*$',self.text,re.MULTILINE)] # multiline to work per line with ^ and $
 101         regions += [match.span() for match in re.finditer('{{{.*?}}}',self.text,re.DOTALL)] # dotall for . being all including newlines
 102         regions += [match.span() for match in re.finditer('`.*?`',self.text)]
 103         return regions
 104 
 105     def run(self,needle,replacement,options=0):
 106         self.replacement = replacement
 107         self._save_regions = self.get_save_regions()
 108         self.text = needle.sub(self._do_replace,self.text,options)
 109         del self._save_regions
 110         del self.replacement
 111 
 112 if __name__ == '__main__':
 113     assert sys.version_info[0:2]>=(2,5), "\n\nThis script requires python 2.5 or higher .. \n\n(re.finditer() method should accept options...)"
 114     username = os.environ.get('username',None)
 115 
 116     if not username: 
 117         username = getpass.getpass('username: ')
 118     print 'Using username:',username
 119     password = os.environ.get('password',None)
 120     if not password: 
 121         password = getpass.getpass('password for '+username+' : ')
 122 
 123     server = xmlrpclib.ServerProxy(RPC_URL)
 124 
 125     print 'searching...',
 126     pagelist = [ pagename for pagename, junk in server.searchPages(raw_input('Page search:'))]
 127     maxcount = len(pagelist)
 128     print 'Found',maxcount,'pages...'
 129     needle = re.compile(unicode(raw_input('Needle: ')))
 130     replacement = unicode(raw_input('Replace with: '))
 131     wants_safe_replace = raw_input('Safe replace  yes/no [yes]:').lower().strip() in ['yes','y','']
 132 
 133     homewiki = xmlrpclib.ServerProxy(RPC_URL)
 134     auth_token = homewiki.getAuthToken(username, password)
 135     assert auth_token != None
 136     for count,pagename in enumerate(pagelist):
 137 
 138         time.sleep(surgesleep)
 139         print '--[%d/%d]------[%s]----' % (count+1,maxcount,pagename)
 140         text = unicode(server.getPage(pagename))
 141         if wants_safe_replace:
 142             save = SaveReplacer(text)
 143             save.run(needle,replacement,re.MULTILINE + re.DOTALL)
 144             patched = save.text
 145         else:
 146             patched = needle.sub(replacement,text,re.MULTILINE + re.DOTALL)
 147         if patched == text:
 148             print 'NO Changes'
 149             continue 
 150         lines = list(difflib.Differ().compare(text.splitlines(1),patched.splitlines(1)))
 151         for line in lines:
 152             if line[0] in '-+?':
 153                 print line.encode('utf-8'),
 154         update_answer=raw_input('Update wiki? yes/no [yes] ').lower().strip()
 155         if update_answer in ['yes','y','',' ']:
 156             time.sleep(surgesleep)
 157             print 'Updating',
 158 
 159             mc = xmlrpclib.MultiCall(homewiki)
 160             mc.applyAuthToken(auth_token)
 161             mc.putPage(pagename,patched)
 162 
 163             updated = mc()
 164             assert updated[0] == 'SUCCESS'
 165         else:
 166             print 'Skipped'
 167             

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2008-02-22 15:57:42, 6.4 KB) [[attachment:replace_in_wiki.py]]
  • [get | view] (2011-03-01 18:11:30, 6.7 KB) [[attachment:replace_in_wiki_LDAP.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.