Tool to convert multiply files between Python known file encodings. Useful for Hebrew and other RTL languages translation

/!\ This is not enough to switch a wiki to utf-8. A tool for a complete utf-8 conversion is contained in moin--main--1.3.

   1 #!/usr/bin/env python
   2 """ convert multiply files between Python known file encodings
   3 
   4 Open files in the command line arguments and save copies of the files
   5 in outencoding format, adding format name to the names. 
   6 
   7 Author: Nir Soffer nirs at freeshell.org"""
   8 
   9 import os.path, sys, glob, operator
  10 import codecs   # does the encoding
  11 
  12 
  13 def main():
  14     # get arguments
  15     patterns = sys.argv[1:]
  16     if not patterns:
  17             sys.exit('Noting to do\nUsage: unicode2utf-8.py [file_pattern...]')
  18 
  19     # expand files - for windows comptibility
  20     # windows shell does not expand * etc.
  21     files = reduce(
  22         operator.add,               # add file lists
  23         map(glob.glob, patterns)    # expand each pattern
  24         ) 
  25 
  26     for path in files:
  27 
  28         # is it a file?
  29         if os.path.isfile(path):
  30             convert(path)
  31         else:
  32             print 'Skipping %s: Not a file' % path
  33 
  34 
  35 def convert(path, inEncoding='utf-16', outEncoding='utf-8'):
  36     """ convert - convert file encoding
  37 
  38     Open the file using codecs module which does the dirty work.
  39     Support all python built in encodings"""
  40 
  41     try:
  42         # read data from the file
  43         inFile = codecs.open(path, 'rb', encoding=inEncoding)
  44         data = inFile.read()
  45         inFile.close()
  46 
  47         # make new name
  48         # insert '.outEncoding' before the extension
  49         split = path.rfind('.')
  50         name, ext = path[:split], path[split:]
  51         newName = name + '.' + outEncoding + ext
  52         
  53         # write to output file
  54         outFile = codecs.open(newName, 'wb', encoding=outEncoding)
  55         outFile.write(data)
  56         outFile.close()
  57 
  58         # log
  59         print 'Converted %s to %s' % (path, newName)
  60 
  61     except (IOError, OSError), why:
  62         print 'Could not convert %s: %s' % (path, str(why))
  63 
  64     
  65 if __name__ == '__main__':
  66     main()

MoinMoin: Unicode2Utf8 (last edited 2007-10-29 19:13:56 by localhost)