1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 """
   4    URL class
   5 
   6    @copyright: 2006 Thomas Waldmann
   7    @license: GNU GPL, see COPYING for details
   8 """
   9 import posixpath, urllib, urlparse, cgi
  10 
  11 class URL(object):
  12     """ represents an Uniform Resource Locator """
  13     url_coding = 'utf-8' # we use this for en/decoding URLs
  14     default_ports = { 'ftp': 21, 'http': 80, 'https': 443, }
  15 
  16     def __init__(self, url=None, **kw):
  17         """create URL object
  18 
  19            If url is given, it can be:
  20            * str (will be parsed and also stored "as is")
  21            * unicode (will be encoded, then: see str)
  22            * another URL object
  23            
  24            If url is given, it will set the defaults. Those defaults
  25            can be modified by specifying a different value as kw argument.
  26 
  27            If url is not given, you can also give all pieces needed to
  28            make one by kw arguments:
  29            * scheme
  30            * server
  31            * alternatively: host and maybe port
  32            * path
  33            * fragment (e.g. "anchorid")
  34            * and named query args 
  35         """
  36         # some defaults to start with:
  37         self._url = None
  38         self._scheme = 'http'
  39         self._server = ''
  40         self._host = ''
  41         self._port = None
  42         self._path = ''
  43         self._query = ''
  44         self._querydict = {}
  45         self._fragment = ''
  46         if isinstance(url, URL):
  47             # there is an easier way for this:
  48             self._scheme = url.scheme
  49             self._server = url.server
  50             #self._host = url.host
  51             #self._port = url.port
  52             self._path = url.path
  53             self._query = url.query
  54             self._querydict = url.querydict
  55             self._fragment = url.fragment
  56         elif isinstance(url, str):
  57             self.url = url
  58         elif isinstance(url, unicode):
  59             self.url = url.encode(self.url_coding)
  60         elif url is None:
  61             pass
  62         else:
  63             raise AttributeError, "unexpected argument type for url_default"
  64         
  65         # now update what we have with the kw arg stuff:
  66         if kw:
  67             self._update(**kw)
  68 
  69     def _update(self, **kw):
  70         """ update what we have from kw, care for unicode encoding """
  71         for key in ('scheme', 'server', 'host', 'port', 'path', 'fragment',):
  72             if kw.has_key(key):
  73                 value = kw[key]
  74                 del kw[key]
  75                 if isinstance(value, unicode):
  76                     value = value.encode(self.url_coding)
  77                 getattr(self, '_set_%s' % key)(value)
  78         # add what's left to the query dict and remove query arguments that are None:
  79         qd = self.querydict
  80         for key, value in kw.items():
  81             if value is None:
  82                 try:
  83                     del qd[key]
  84                 except KeyError:
  85                     pass
  86             elif isinstance(value, unicode):
  87                 qd[key] = value.encode(self.url_coding)
  88             else:
  89                 qd[key] = str(value)
  90         self.querydict = qd
  91     
  92     def _split(self, url):
  93         """ wrapper around urlparse.urlsplit """
  94         self._scheme, self._server, self._path, self._query, self._fragment = urlparse.urlsplit(url)
  95         # we also split the server part into host and port:
  96         host_port = self._server.split(':', 1) # rsplit would be better, but is 2.4 only
  97         self._host = host_port[0]
  98         try:
  99             self._port = int(host_port[1])
 100         except (ValueError, IndexError), err:
 101             self._port = URL.default_ports.get(self._scheme)
 102 
 103     def __str__(self):
 104         return self.url
 105 
 106     def __repr__(self):
 107         return '<URL "%s">' % self
 108     
 109     def _join_host_port(self):
 110         """ join host and port components to make a server string """
 111         port = self._port
 112         def_port = URL.default_ports.get(self._scheme)
 113         if port == def_port or port is None:
 114             self._server = self._host
 115         else:
 116             self._server = "%s:%d" % (self._host, port)
 117     
 118     def _get_scheme(self):
 119         if not hasattr(self, '_scheme'):
 120             self._split(self._url)
 121         return self._scheme
 122     def _set_scheme(self, v):
 123         if v != self._scheme:
 124             self._scheme = v
 125             del self.url
 126     def _del_scheme(self):
 127         try:
 128             del self._scheme
 129         except AttributeError:
 130             pass
 131 
 132     def _get_server(self):
 133         if not hasattr(self, '_server'):
 134             self._split(self._url)
 135         return self._server
 136     def _set_server(self, v):
 137         if v != self._server:
 138             self._server = v
 139             del self.host
 140             del self.port
 141             del self.url
 142     def _del_server(self):
 143         try:
 144             del self._server
 145         except AttributeError:
 146             pass
 147 
 148     def _get_host(self):
 149         if not hasattr(self, '_host'):
 150             self._split(self._url)
 151         return self._host
 152     def _set_host(self, v):
 153         if v != self._host:
 154             self._host = v
 155             self._join_host_port()
 156             del self.url
 157     def _del_host(self):
 158         try:
 159             del self._host
 160         except AttributeError:
 161             pass
 162 
 163     def _get_port(self):
 164         if not hasattr(self, '_port'):
 165             self._split(self._url)
 166         return self._port
 167     def _set_port(self, v):
 168         if v != self._port:
 169             self._port = v
 170             self._join_host_port()
 171             del self.url
 172     def _del_port(self):
 173         try:
 174             del self._port
 175         except AttributeError:
 176             pass
 177 
 178     def _get_path(self):
 179         if not hasattr(self, '_path'):
 180             self._split(self._url)
 181         return self._path
 182     def _set_path(self, v):
 183         if v != self._path:
 184             self._path = v
 185             del self.url
 186     def _del_path(self):
 187         try:
 188             del self._path
 189         except AttributeError:
 190             pass
 191 
 192     def _get_query(self):
 193         if not hasattr(self, '_query'):
 194             self._split(self._url)
 195         return self._query
 196     def _set_query(self, v):
 197         if v != self._query:
 198             self._query = v
 199             del self.querydict
 200             del self.url
 201     def _del_query(self):
 202         try:
 203             del self._query
 204         except AttributeError:
 205             pass
 206 
 207     def _get_querydict(self):
 208         if not hasattr(self, '_querydict'):
 209             self._querydict = cgi.parse_qs(self.query)
 210         return self._querydict
 211     def _set_querydict(self, v):
 212         self._querydict = v
 213         self._query = urllib.urlencode(self._querydict)
 214         del self.url
 215     def _del_querydict(self):
 216         try:
 217             del self._querydict
 218         except AttributeError:
 219             pass
 220     
 221     def _get_fragment(self):
 222         if not hasattr(self, '_fragment'):
 223             self._split(self._url)
 224         return self._fragment
 225     def _set_fragment(self, v):
 226         if v != self._fragment:
 227             self._fragment = v
 228             del self.url
 229     def _del_fragment(self):
 230         try:
 231             del self._fragment
 232         except AttributeError:
 233             pass
 234 
 235     def _get_url(self):
 236         if not hasattr(self, '_url') or self._url is None:
 237             self._url = urlparse.urlunsplit((self.scheme, self.server, self.path, self.query, self.fragment))
 238         return self._url
 239     def _set_url(self, v):
 240         if v != self._url:
 241             self._url = v
 242             del self.scheme
 243             del self.host
 244             del self.port
 245             del self.server
 246             del self.path
 247             del self.query
 248             del self.querydict
 249             del self.fragment
 250     def _del_url(self):
 251         try:
 252             del self._url
 253         except AttributeError:
 254             pass
 255 
 256     def _get_local(self):
 257         if not hasattr(self, '_local'):
 258             self._local = urlparse.urlunsplit(('', '', self.path, self.query, self.fragment))
 259         return self._local
 260     
 261     scheme = property(_get_scheme, _set_scheme, _del_scheme)
 262     server = property(_get_server, _set_server, _del_server)
 263     host = property(_get_host, _set_host, _del_host)
 264     port = property(_get_port, _set_port, _del_port)
 265     path = property(_get_path, _set_path, _del_path)
 266     query = property(_get_query, _set_query, _del_query)
 267     querydict = property(_get_querydict, _set_querydict, _del_querydict)
 268     fragment = property(_get_fragment, _set_fragment, _del_fragment)
 269     url = property(_get_url, _set_url, _del_url)
 270     local = property(_get_local)
 271 
 272 if __name__ == '__main__':
 273     curr = URL('http://www.google.de/asdfasdf/asdfasdf?asdfasdf=adsfsadf/')
 274     assert str(curr) == 'http://www.google.de/asdfasdf/asdfasdf?asdfasdf=adsfsadf/'
 275     assert curr.scheme == 'http'
 276     assert curr.server == 'www.google.de'
 277     assert (curr.host, curr.port) == ('www.google.de', 80)
 278     assert curr.path == '/asdfasdf/asdfasdf'
 279     assert curr.query == 'asdfasdf=adsfsadf/'
 280     assert curr.fragment == ''
 281     print "splitting assertions successful"
 282 
 283     curr = URL(scheme='http', server='wikiwikiweb.de', path='/FrontPage', action="diff", rev1=2, rev2=3, fragment='line-123')
 284     assert str(curr) == 'http://wikiwikiweb.de/FrontPage?action=diff&rev1=2&rev2=3#line-123'
 285     assert (curr.host, curr.port) == ('wikiwikiweb.de', 80)
 286     print "joining assertions successful"
 287    
 288     curr = URL("http://wikiwikiweb.de/FrontPage")
 289     u = URL(curr, scheme="https")
 290     assert str(u) == 'https://wikiwikiweb.de/FrontPage'
 291     u = URL(curr, action="raw", rev=42)
 292     assert str(u) == 'http://wikiwikiweb.de/FrontPage?action=raw&rev=42'
 293     u = URL(curr, action="AttachFile", do="get", file=u"Übler Dübel.doc")
 294     assert str(u) == 'http://wikiwikiweb.de/FrontPage?action=AttachFile&do=get&rev=42&file=%C3%9Cbler+D%C3%BCbel.doc'
 295     u = URL(u, action=None, do=None, rev=None, file=None)
 296     assert str(u) == 'http://wikiwikiweb.de/FrontPage'
 297     print "updating assertions successful"
 298     
 299     #these still fail:
 300     u = URL(curr, port=42, path="/wiki/img/moinmoin.png")
 301     print u
 302     assert str(u) == 'http://wikiwikiweb.de:42/wiki/img/moinmoin.png'
url.py

MoinMoin: UrlClass (last edited 2007-10-29 19:19:46 by localhost)