Attachment 'xapian_re_search_opt.patch'

Download

   1 diff -r 9fbcd746f135 MoinMoin/search/Xapian/indexing.py
   2 --- a/MoinMoin/search/Xapian/indexing.py	Fri Mar 12 13:52:00 2010 +0100
   3 +++ b/MoinMoin/search/Xapian/indexing.py	Sun Mar 14 09:25:38 2010 +0100
   4 @@ -97,6 +97,7 @@
   5          self.add_field_action('title', INDEX_FREETEXT, weight=100)
   6          self.add_field_action('title', STORE_CONTENT)
   7          self.add_field_action('content', INDEX_FREETEXT, spell=True)
   8 +        self.add_field_action('content', STORE_CONTENT)
   9          self.add_field_action('domain', INDEX_EXACT)
  10          self.add_field_action('domain', STORE_CONTENT)
  11          self.add_field_action('lang', INDEX_EXACT)
  12 @@ -297,7 +298,7 @@
  13          @param page: the page instance
  14          """
  15          body = page.get_raw_body()
  16 -
  17 +        # ToDo check category regex below
  18          prev, next = (0, 1)
  19          pos = 0
  20          while next:
  21 diff -r 9fbcd746f135 MoinMoin/search/queryparser/expressions.py
  22 --- a/MoinMoin/search/queryparser/expressions.py	Fri Mar 12 13:52:00 2010 +0100
  23 +++ b/MoinMoin/search/queryparser/expressions.py	Sun Mar 14 09:25:38 2010 +0100
  24 @@ -140,7 +140,32 @@
  25                  if field_to_check in data:
  26                      for term in data[field_to_check]:
  27                          if self.search_re.match(term):
  28 -                            queries.append(connection.query_field(field_to_check, term))
  29 +                            # To speed up search we try to minimize the length of re search queries.
  30 +                            # for some search patterns we can minimize the query string
  31 +                            # e.g. if someone searches for .* then any word in term matches.
  32 +                            # this means for showing results we can use the first word.
  33 +                            # for a search term like .*text.* we try to match for
  34 +                            # (?ims)(?P<text>.\w*%s.\w*). If the result of this match
  35 +                            # also matches for the original search pattern we use this result
  36 +                            # for the query string.
  37 +                            pattern = self.search_re.pattern
  38 +                            if pattern.startswith('.*') and pattern.endswith('.*'):
  39 +                                new_pattern = pattern.replace('.*', '.\w*')
  40 +                                embedded_rawstr = "(?ims)(?P<value>%s)" %  new_pattern
  41 +                                match_obj = re.search(embedded_rawstr, term)
  42 +                                if match_obj:
  43 +                                    value = match_obj.groups('value')[0]
  44 +                                    if self.search_re.match(value):
  45 +                                        queries.append(connection.query_field(field_to_check, value))
  46 +                                        break
  47 +                            elif pattern == '.*':
  48 +                                queries.append(connection.query_field(field_to_check, term[0]))
  49 +                                break
  50 +                            # queries.append becomes very slow if too much content
  51 +                            # is appended and your system can run out of memory.
  52 +                            max_len = 3000
  53 +                            queries.append(connection.query_field(field_to_check, term[:max_len]))
  54 +                            break
  55              else:
  56                  # Check all fields
  57                  for field, terms in data.iteritems():

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2010-03-05 13:13:55, 50.0 KB) [[attachment:index.py.adraw]]
  • [get | view] (2010-03-14 08:36:05, 3.3 KB) [[attachment:xapian_re_search_opt.patch]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.