Package translate :: Package search :: Package indexing
[hide private]
[frames] | no frames]

Source Code for Package translate.search.indexing

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  # Copyright 2008 Zuza Software Foundation 
  4  # 
  5  # This file is part of translate. 
  6  # 
  7  # translate is free software; you can redistribute it and/or modify 
  8  # it under the terms of the GNU General Public License as published by 
  9  # the Free Software Foundation; either version 2 of the License, or 
 10  # (at your option) any later version. 
 11  # 
 12  # translate is distributed in the hope that it will be useful, 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 15  # GNU General Public License for more details. 
 16  # 
 17  # You should have received a copy of the GNU General Public License 
 18  # along with translate; if not, write to the Free Software 
 19  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 20  # 
 21   
 22   
 23  """ 
 24  interface for differrent indexing engines for the translate toolkit 
 25   
 26  """ 
 27   
 28  __revision__ = "$Id: __init__.py 15615 2010-08-22 21:13:42Z dwaynebailey $" 
 29   
 30  import os 
 31  import shutil 
 32  import logging 
 33   
 34  import CommonIndexer 
 35   
 36  """ TODO for indexing engines: 
 37      * get rid of jToolkit.glock dependency 
 38      * add partial matching at the beginning of a term 
 39      * do a proper cleanup - e.g.: the pylucene lockfiles remain in /tmp/ 
 40      * do unittests for PyLucene v1.x 
 41      """ 
 42   
 43   
44 -def _get_available_indexers():
45 """get a list of the available supported indexing engines 46 47 search through the translate.search.indexer package for modules derived from 48 the CommonIndexer class 49 """ 50 result = [] 51 # get the package directory 52 indexer_dir = os.path.dirname(os.path.abspath(__file__)) 53 # sort the files in the directory by name - to make it determinable, 54 # which indexing engine is chosen in case of multiple possibilities 55 all_files = os.listdir(indexer_dir) 56 all_files.sort() 57 for mod_file in all_files: 58 if mod_file == __file__: 59 # we should not import ourself 60 continue 61 mod_path = os.path.join(indexer_dir, mod_file) 62 if (not mod_path.endswith(".py")) or (not os.path.isfile(mod_path)) \ 63 or (not os.access(mod_path, os.R_OK)): 64 # no file / wrong extension / not readable -> skip it 65 continue 66 # strip the ".py" prefix 67 mod_name = mod_file[:-3] 68 # TODO - debug: "[Indexer]: trying to import indexing engines from '%s'" % mod_path 69 try: 70 module = __import__(mod_name, globals(), {}) 71 except ImportError: 72 # maybe it is unusable or dependencies are missing 73 continue 74 # the module function "is_available" must return "True" 75 if not (hasattr(module, "is_available") and \ 76 callable(module.is_available) and \ 77 module.is_available()): 78 continue 79 for item in dir(module): 80 try: 81 element = getattr(module, item) 82 except TypeError: 83 # this rarely happens: e.g. for 'item' being 'None' 84 continue 85 try: 86 # the class must inherit CommonDatabase (without being the same) 87 if issubclass(element, CommonIndexer.CommonDatabase) \ 88 and not element is CommonIndexer.CommonDatabase: 89 # TODO: debug - "[Indexer]: indexing engine found in '%s': %s" % (mod_path, element) 90 # the interface is ok 91 result.append(element) 92 except TypeError: 93 # 'element' is not a class 94 continue 95 return result
96 97
98 -def _sort_indexers_by_preference(indexer_classes, pref_order):
99 """sort a given list of indexer classes according to the given order 100 101 the list of preferred indexers are strings that should match the filenames 102 (without suppix ".py") of the respective modules (e.g.: XapianIndexer or 103 PyLuceneIndexer) 104 105 @param indexer_classes: the list of all available indexer classes 106 @type indexer_classes: list of CommonIndexer.CommonDatabase objects 107 @param pref_order: list of preferred indexer names 108 @type pref_order: str 109 @return: sorted list of indexer classes 110 @rtype: list of CommonIndexer.CommonDatabase objects 111 """ 112 # define useful function for readability 113 get_indexer_name = lambda indexer_class: \ 114 os.path.basename(indexer_class.__module__).split(".")[-1] 115 # use a copy to avoid side effects 116 avail_indexers = indexer_classes[:] 117 result = [] 118 # go through all preferred items and move the matching indexers to 'result' 119 for choice in pref_order: 120 # find matching indexers 121 matches = [indexer for indexer in avail_indexers 122 if get_indexer_name(indexer) == choice] 123 # move all matching items to the 'result' queue 124 for match_item in matches: 125 result.append(match_item) 126 avail_indexers.remove(match_item) 127 # append the remaining indexers to the result 128 return result + avail_indexers
129 130 131 # store the available indexers - this is done only once during the first import 132 _AVAILABLE_INDEXERS = _get_available_indexers() 133 134 # True for a not-empty list - this should be used to check if indexing support 135 # is available 136 HAVE_INDEXER = bool(_AVAILABLE_INDEXERS) 137 138
139 -def get_indexer(basedir, preference=None):
140 """return an appropriate indexer for the given directory 141 142 If the directory already exists, then we check, if one of the available 143 indexers knows how to handle it. Otherwise we return the first available 144 indexer. 145 146 @raise IndexError: there is no indexing engine available 147 @raise ValueError: the database location already exists, but we did not find 148 a suitable indexing engine for it 149 @raise OSError: any error that could occour while creating or opening the 150 database 151 152 @param basedir: the parent directory of (possible) different indexing 153 databases 154 @type basedir: string 155 @return: the class of the most appropriate indexer 156 @rtype: subclass of L{CommonIndexer.CommonDatabase} 157 """ 158 if not _AVAILABLE_INDEXERS: 159 raise IndexError("Indexer: no indexing engines are available") 160 if preference is None: 161 preference = [] 162 # sort available indexers by preference 163 preferred_indexers = _sort_indexers_by_preference(_AVAILABLE_INDEXERS, 164 preference) 165 if os.path.exists(basedir): 166 for index_class in preferred_indexers: 167 try: 168 # the first match is sufficient - but we do not want to 169 # create a new database, if a database for another 170 # indexing engine could exist. Thus we try it read-only first. 171 return index_class(basedir, create_allowed=False) 172 except (ValueError, OSError): 173 # invalid type of database or some other error 174 continue 175 # the database does not exist yet or we did not find an appropriate 176 # class that can handle it - so we remove the whole base directory 177 shutil.rmtree(basedir, ignore_errors=True) 178 logging.info("Deleting invalid indexing directory '%s'", basedir) 179 # the database does not exist or it was deleted (see above) 180 # we choose the first available indexing engine 181 return preferred_indexers[0](basedir)
182 183 184 if __name__ == "__main__": 185 # show all supported indexing engines (with fulfilled requirements) 186 for ONE_INDEX in _AVAILABLE_INDEXERS: 187 print ONE_INDEX 188