Package translate :: Package storage :: Module statsdb
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.statsdb

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21   
 22  """Module to provide a cache of statistics in a database. 
 23   
 24  @organization: Zuza Software Foundation 
 25  @copyright: 2007 Zuza Software Foundation 
 26  @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>} 
 27  """ 
 28   
 29  try: 
 30      from sqlite3 import dbapi2 
 31  except ImportError: 
 32      from pysqlite2 import dbapi2 
 33  import os.path 
 34  import re 
 35  import sys 
 36  import stat 
 37  import thread 
 38  from UserDict import UserDict 
 39   
 40  from translate import __version__ as toolkitversion 
 41  from translate.lang.common import Common 
 42  from translate.misc.multistring import multistring 
 43  from translate.storage import factory 
 44  from translate.storage.workflow import StateEnum 
 45   
 46  kdepluralre = re.compile("^_n: ") 
 47  brtagre = re.compile("<br\s*?/?>") 
 48  xmltagre = re.compile("<[^>]+>") 
 49  numberre = re.compile("\\D\\.\\D") 
 50   
 51  extended_state_strings = { 
 52      StateEnum.EMPTY: "empty", 
 53      StateEnum.NEEDS_WORK: "needs-work", 
 54      StateEnum.REJECTED: "rejected", 
 55      StateEnum.NEEDS_REVIEW: "needs-review", 
 56      StateEnum.UNREVIEWED: "unreviewed", 
 57      StateEnum.FINAL: "final", 
 58      } 
 59   
 60  UNTRANSLATED = StateEnum.EMPTY 
 61  FUZZY = StateEnum.NEEDS_WORK 
 62  TRANSLATED = StateEnum.UNREVIEWED 
 63   
 64  state_strings = { 
 65      UNTRANSLATED: "untranslated", 
 66      FUZZY: "fuzzy", 
 67      TRANSLATED: "translated", 
 68  } 
69 70 71 -def wordcount(string):
72 # TODO: po class should understand KDE style plurals 73 string = kdepluralre.sub("", string) 74 string = brtagre.sub("\n", string) 75 string = xmltagre.sub("", string) 76 string = numberre.sub(" ", string) 77 #TODO: This should still use the correct language to count in the target 78 #language 79 return len(Common.words(string))
80
81 82 -def wordsinunit(unit):
83 """Counts the words in the unit's source and target, taking plurals into 84 account. The target words are only counted if the unit is translated.""" 85 (sourcewords, targetwords) = (0, 0) 86 if isinstance(unit.source, multistring): 87 sourcestrings = unit.source.strings 88 else: 89 sourcestrings = [unit.source or ""] 90 for s in sourcestrings: 91 sourcewords += wordcount(s) 92 if not unit.istranslated(): 93 return sourcewords, targetwords 94 if isinstance(unit.target, multistring): 95 targetstrings = unit.target.strings 96 else: 97 targetstrings = [unit.target or ""] 98 for s in targetstrings: 99 targetwords += wordcount(s) 100 return sourcewords, targetwords
101
102 103 -class Record(UserDict):
104
105 - def __init__(self, record_keys, record_values=None, compute_derived_values=lambda x: x):
106 if record_values == None: 107 record_values = (0 for _i in record_keys) 108 self.record_keys = record_keys 109 self.data = dict(zip(record_keys, record_values)) 110 self._compute_derived_values = compute_derived_values 111 self._compute_derived_values(self)
112
113 - def to_tuple(self):
114 return tuple(self[key] for key in self.record_keys)
115
116 - def __add__(self, other):
117 result = Record(self.record_keys) 118 for key in self.keys(): 119 result[key] = self[key] + other[key] 120 self._compute_derived_values(self) 121 return result
122
123 - def __sub__(self, other):
124 result = Record(self.record_keys) 125 for key in self.keys(): 126 result[key] = self[key] - other[key] 127 self._compute_derived_values(self) 128 return result
129
130 - def as_string_for_db(self):
131 return ",".join([repr(x) for x in self.to_tuple()])
132
133 134 -def transaction(f):
135 """Modifies f to commit database changes if it executes without exceptions. 136 Otherwise it rolls back the database. 137 138 ALL publicly accessible methods in StatsCache MUST be decorated with this 139 decorator. 140 """ 141 142 def decorated_f(self, *args, **kwargs): 143 try: 144 result = f(self, *args, **kwargs) 145 self.con.commit() 146 return result 147 except: 148 # If ANY exception is raised, we're left in an 149 # uncertain state and we MUST roll back any changes to avoid getting 150 # stuck in an inconsistent state. 151 if self.con: 152 self.con.rollback() 153 raise
154 return decorated_f 155
156 157 -def statefordb(unit):
158 """Returns the numeric database state for the unit.""" 159 if unit.istranslated(): 160 return TRANSLATED 161 if unit.isfuzzy() and unit.target: 162 return FUZZY 163 return UNTRANSLATED
164
165 166 -class FileTotals(object):
167 keys = ['translatedsourcewords', 168 'fuzzysourcewords', 169 'untranslatedsourcewords', 170 'translated', 171 'fuzzy', 172 'untranslated', 173 'translatedtargetwords'] 174
175 - def db_keys(self):
176 return ",".join(self.keys)
177
178 - def __init__(self, cur):
179 self.cur = cur 180 self.cur.execute(""" 181 CREATE TABLE IF NOT EXISTS filetotals( 182 fileid INTEGER PRIMARY KEY AUTOINCREMENT, 183 translatedsourcewords INTEGER NOT NULL, 184 fuzzysourcewords INTEGER NOT NULL, 185 untranslatedsourcewords INTEGER NOT NULL, 186 translated INTEGER NOT NULL, 187 fuzzy INTEGER NOT NULL, 188 untranslated INTEGER NOT NULL, 189 translatedtargetwords INTEGER NOT NULL);""")
190
191 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
192 record = Record(cls.keys, compute_derived_values=cls._compute_derived_values) 193 if state_for_db is not None: 194 if state_for_db is UNTRANSLATED: 195 record['untranslated'] = 1 196 record['untranslatedsourcewords'] = sourcewords 197 if state_for_db is TRANSLATED: 198 record['translated'] = 1 199 record['translatedsourcewords'] = sourcewords 200 record['translatedtargetwords'] = targetwords 201 elif state_for_db is FUZZY: 202 record['fuzzy'] = 1 203 record['fuzzysourcewords'] = sourcewords 204 return record
205 206 new_record = classmethod(new_record) 207
208 - def _compute_derived_values(cls, record):
209 record["total"] = record["untranslated"] + \ 210 record["translated"] + \ 211 record["fuzzy"] 212 record["totalsourcewords"] = record["untranslatedsourcewords"] + \ 213 record["translatedsourcewords"] + \ 214 record["fuzzysourcewords"] 215 record["review"] = 0
216 _compute_derived_values = classmethod(_compute_derived_values) 217
218 - def __getitem__(self, fileid):
219 result = self.cur.execute(""" 220 SELECT %(keys)s 221 FROM filetotals 222 WHERE fileid=?;""" % {'keys': self.db_keys()}, (fileid,)) 223 return Record(FileTotals.keys, result.fetchone(), self._compute_derived_values)
224
225 - def __setitem__(self, fileid, record):
226 self.cur.execute(""" 227 INSERT OR REPLACE into filetotals 228 VALUES (%(fileid)d, %(vals)s); 229 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
230
231 - def __delitem__(self, fileid):
232 self.cur.execute(""" 233 DELETE FROM filetotals 234 WHERE fileid=?; 235 """, (fileid,))
236
237 238 -def emptyfiletotals():
239 """Returns a dictionary with all statistics initalised to 0.""" 240 return FileTotals.new_record()
241
242 243 -def emptyfilechecks():
244 return {}
245
246 247 -def emptyfilestats():
248 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
249
250 251 -def emptyunitstats():
252 return {"sourcewordcount": [], "targetwordcount": []}
253
254 255 # We allow the caller to specify which value to return when errors_return_empty 256 # is True. We do this, since Poolte wants None to be returned when it calls 257 # get_mod_info directly, whereas we want an integer to be returned for 258 # uses of get_mod_info within this module. 259 # TODO: Get rid of empty_return when Pootle code is improved to not require 260 # this. 261 262 263 -def get_mod_info(file_path):
264 file_stat = os.stat(file_path) 265 assert not stat.S_ISDIR(file_stat.st_mode) 266 return file_stat.st_mtime, file_stat.st_size
267
268 269 -def suggestion_extension():
270 return os.path.extsep + 'pending'
271
272 273 -def suggestion_filename(filename):
274 return filename + suggestion_extension()
275
276 277 # ALL PUBLICLY ACCESSIBLE METHODS MUST BE DECORATED WITH THE transaction DECORATOR. 278 -class StatsCache(object):
279 """An object instantiated as a singleton for each statsfile that provides 280 access to the database cache from a pool of StatsCache objects.""" 281 _caches = {} 282 defaultfile = None 283 con = None 284 """This cache's connection""" 285 cur = None 286 """The current cursor""" 287
288 - def __new__(cls, statsfile=None):
289 current_thread = thread.get_ident() 290 291 def make_database(statsfile): 292 293 def connect(cache): 294 cache.con = dbapi2.connect(statsfile) 295 cache.cur = cache.con.cursor()
296 297 def clear_old_data(cache): 298 try: 299 cache.cur.execute("""SELECT toolkitbuild FROM files""") 300 val = cache.cur.fetchone() 301 # If the database is empty, we have no idea whether its layout 302 # is correct, so we might as well delete it. 303 if val is None or val[0] < toolkitversion.build: 304 cache.con.close() 305 del cache 306 os.unlink(statsfile) 307 return True 308 return False 309 except dbapi2.OperationalError: 310 return False
311 312 cache = cls._caches.setdefault(current_thread, {})[statsfile] = object.__new__(cls) 313 connect(cache) 314 if clear_old_data(cache): 315 connect(cache) 316 cache.create() 317 return cache 318 319 if not statsfile: 320 if not cls.defaultfile: 321 userdir = os.path.expanduser("~") 322 cachedir = None 323 if os.name == "nt": 324 cachedir = os.path.join(userdir, "Translate Toolkit") 325 else: 326 cachedir = os.path.join(userdir, ".translate_toolkit") 327 if not os.path.exists(cachedir): 328 os.mkdir(cachedir) 329 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db")) 330 statsfile = cls.defaultfile 331 else: 332 statsfile = os.path.realpath(statsfile) 333 # First see if a cache for this file already exists: 334 if current_thread in cls._caches and statsfile in cls._caches[current_thread]: 335 return cls._caches[current_thread][statsfile] 336 # No existing cache. Let's build a new one and keep a copy 337 return make_database(statsfile) 338 339 @transaction
340 - def create(self):
341 """Create all tables and indexes.""" 342 self.file_totals = FileTotals(self.cur) 343 344 self.cur.execute("""CREATE TABLE IF NOT EXISTS files( 345 fileid INTEGER PRIMARY KEY AUTOINCREMENT, 346 path VARCHAR NOT NULL UNIQUE, 347 st_mtime INTEGER NOT NULL, 348 st_size INTEGER NOT NULL, 349 toolkitbuild INTEGER NOT NULL);""") 350 351 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex 352 ON files (path);""") 353 354 self.cur.execute("""CREATE TABLE IF NOT EXISTS units( 355 id INTEGER PRIMARY KEY AUTOINCREMENT, 356 unitid VARCHAR NOT NULL, 357 fileid INTEGER NOT NULL, 358 unitindex INTEGER NOT NULL, 359 source VARCHAR NOT NULL, 360 target VARCHAR, 361 state INTEGER, 362 e_state INTEGER, 363 sourcewords INTEGER, 364 targetwords INTEGER);""") 365 366 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex 367 ON units(fileid);""") 368 369 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs( 370 configid INTEGER PRIMARY KEY AUTOINCREMENT, 371 config VARCHAR);""") 372 373 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex 374 ON checkerconfigs(config);""") 375 376 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors( 377 errorid INTEGER PRIMARY KEY AUTOINCREMENT, 378 unitindex INTEGER NOT NULL, 379 fileid INTEGER NOT NULL, 380 configid INTEGER NOT NULL, 381 name VARCHAR NOT NULL, 382 message VARCHAR);""") 383 384 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex 385 ON uniterrors(fileid, configid);""")
386 387 @transaction
388 - def _getfileid(self, filename, check_mod_info=True, store=None):
389 """return fileid representing the given file in the statscache. 390 391 if file not in cache or has been updated since last record 392 update, recalculate stats. 393 394 optional argument store can be used to avoid unnessecary 395 reparsing of already loaded translation files. 396 397 store can be a TranslationFile object or a callback that returns one. 398 """ 399 if isinstance(filename, str): 400 filename = unicode(filename, sys.getfilesystemencoding()) 401 realpath = os.path.realpath(filename) 402 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files 403 WHERE path=?;""", (realpath,)) 404 filerow = self.cur.fetchone() 405 mod_info = get_mod_info(realpath) 406 if filerow: 407 fileid = filerow[0] 408 if not check_mod_info: 409 # Update the mod_info of the file 410 self.cur.execute("""UPDATE files 411 SET st_mtime=?, st_size=? 412 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid)) 413 return fileid 414 if (filerow[1], filerow[2]) == mod_info: 415 return fileid 416 417 # file wasn't in db at all, lets recache it 418 if callable(store): 419 store = store() 420 else: 421 store = store or factory.getobject(realpath) 422 423 return self._cachestore(store, realpath, mod_info)
424
425 - def _getstoredcheckerconfig(self, checker):
426 """See if this checker configuration has been used before.""" 427 config = str(checker.config.__dict__) 428 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE 429 config=?;""", (config,)) 430 configrow = self.cur.fetchone() 431 if not configrow or configrow[1] != config: 432 return None 433 else: 434 return configrow[0]
435 436 @transaction
437 - def _cacheunitstats(self, units, fileid, unitindex=None, file_totals_record=FileTotals.new_record()):
438 """Cache the statistics for the supplied unit(s).""" 439 unitvalues = [] 440 for index, unit in enumerate(units): 441 if unit.istranslatable(): 442 sourcewords, targetwords = wordsinunit(unit) 443 if unitindex: 444 index = unitindex 445 # what about plurals in .source and .target? 446 unitvalues.append((unit.getid(), fileid, index, \ 447 unit.source, unit.target, \ 448 sourcewords, targetwords, \ 449 statefordb(unit), 450 unit.get_state_id())) 451 file_totals_record = file_totals_record + FileTotals.new_record(statefordb(unit), sourcewords, targetwords) 452 # XXX: executemany is non-standard 453 self.cur.executemany("""INSERT INTO units 454 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state, e_state) 455 values (?, ?, ?, ?, ?, ?, ?, ?, ?);""", 456 unitvalues) 457 self.file_totals[fileid] = file_totals_record 458 if unitindex: 459 return state_strings[statefordb(units[0])] 460 return ""
461 462 @transaction
463 - def _cachestore(self, store, realpath, mod_info):
464 """Calculates and caches the statistics of the given store 465 unconditionally.""" 466 self.cur.execute("""DELETE FROM files WHERE 467 path=?;""", (realpath,)) 468 self.cur.execute("""INSERT INTO files 469 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""", 470 (realpath, mod_info[0], mod_info[1], toolkitversion.build)) 471 fileid = self.cur.lastrowid 472 self.cur.execute("""DELETE FROM units WHERE 473 fileid=?""", (fileid,)) 474 self._cacheunitstats(store.units, fileid) 475 return fileid
476
477 - def file_extended_totals(self, filename, store=None):
478 stats = {} 479 fileid = self._getfileid(filename, store=store) 480 481 self.cur.execute("""SELECT e_state, COUNT(id), SUM(sourcewords), SUM(targetwords) 482 FROM units WHERE fileid=? GROUP BY e_state""", (fileid,)) 483 values = self.cur.fetchall() 484 485 for value in values: 486 stats[extended_state_strings[value[0]]] = { 487 "units": value[1], 488 "sourcewords": value[2], 489 "targetwords": value[3], 490 } 491 return stats
492
493 - def filetotals(self, filename, store=None, extended=False):
494 """Retrieves the statistics for the given file if possible, otherwise 495 delegates to cachestore().""" 496 stats = self.file_totals[self._getfileid(filename, store=store)] 497 if extended: 498 stats["extended"] = self.file_extended_totals(filename, store=store) 499 return stats
500 501 @transaction
502 - def _cacheunitschecks(self, units, fileid, configid, checker, unitindex=None):
503 """Helper method for cachestorechecks() and recacheunit()""" 504 # We always want to store one dummy error to know that we have actually 505 # run the checks on this file with the current checker configuration 506 dummy = (-1, fileid, configid, "noerror", "") 507 unitvalues = [dummy] 508 # if we are doing a single unit, we want to return the checknames 509 errornames = [] 510 for index, unit in enumerate(units): 511 if unit.istranslatable(): 512 # Correctly assign the unitindex 513 if unitindex: 514 index = unitindex 515 failures = checker.run_filters(unit) 516 for checkname, checkmessage in failures.iteritems(): 517 unitvalues.append((index, fileid, configid, checkname, checkmessage)) 518 errornames.append("check-" + checkname) 519 checker.setsuggestionstore(None) 520 521 if unitindex: 522 # We are only updating a single unit, so we don't want to add an 523 # extra noerror-entry 524 unitvalues.remove(dummy) 525 errornames.append("total") 526 527 # XXX: executemany is non-standard 528 self.cur.executemany("""INSERT INTO uniterrors 529 (unitindex, fileid, configid, name, message) 530 values (?, ?, ?, ?, ?);""", 531 unitvalues) 532 return errornames
533 534 @transaction
535 - def _cachestorechecks(self, fileid, store, checker, configid):
536 """Calculates and caches the error statistics of the given store 537 unconditionally.""" 538 # Let's purge all previous failures because they will probably just 539 # fill up the database without much use. 540 self.cur.execute("""DELETE FROM uniterrors WHERE 541 fileid=?;""", (fileid,)) 542 self._cacheunitschecks(store.units, fileid, configid, checker) 543 return fileid
544
545 - def get_unit_stats(self, fileid, unitid):
546 values = self.cur.execute(""" 547 SELECT state, sourcewords, targetwords 548 FROM units 549 WHERE fileid=? AND unitid=? 550 """, (fileid, unitid)) 551 result = values.fetchone() 552 if result is not None: 553 return result 554 else: 555 print >> sys.stderr, """WARNING: Database in inconsistent state. 556 fileid %d and unitid %s have no entries in the table units.""" % (fileid, unitid) 557 # If values.fetchone() is None, then we return an empty list, 558 # to make FileTotals.new_record(*self.get_unit_stats(fileid, unitid)) 559 # do the right thing. 560 return []
561 562 @transaction
563 - def recacheunit(self, filename, checker, unit):
564 """Recalculate all information for a specific unit. This is necessary 565 for updating all statistics when a translation of a unit took place, 566 for example. 567 568 This method assumes that everything was up to date before (file totals, 569 checks, checker config, etc.""" 570 fileid = self._getfileid(filename, check_mod_info=False) 571 configid = self._get_config_id(fileid, checker) 572 unitid = unit.getid() 573 # get the unit index 574 totals_without_unit = self.file_totals[fileid] - \ 575 FileTotals.new_record(*self.get_unit_stats(fileid, unitid)) 576 self.cur.execute("""SELECT unitindex FROM units WHERE 577 fileid=? AND unitid=?;""", (fileid, unitid)) 578 unitindex = self.cur.fetchone()[0] 579 self.cur.execute("""DELETE FROM units WHERE 580 fileid=? AND unitid=?;""", (fileid, unitid)) 581 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)] 582 # remove the current errors 583 self.cur.execute("""DELETE FROM uniterrors WHERE 584 fileid=? AND unitindex=?;""", (fileid, unitindex)) 585 if os.path.exists(suggestion_filename(filename)): 586 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension())) 587 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex)) 588 return state
589
590 - def _checkerrors(self, filename, fileid, configid, checker, store):
591 592 def geterrors(): 593 self.cur.execute("""SELECT 594 name, 595 unitindex 596 FROM uniterrors WHERE fileid=? and configid=? 597 ORDER BY unitindex;""", (fileid, configid)) 598 return self.cur.fetchone(), self.cur
599 600 first, cur = geterrors() 601 if first is not None: 602 return first, cur 603 604 # This could happen if we haven't done the checks before, or the 605 # file changed, or we are using a different configuration 606 if callable(store): 607 store = store() 608 else: 609 store = store or factory.getobject(filename) 610 611 if os.path.exists(suggestion_filename(filename)): 612 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension())) 613 self._cachestorechecks(fileid, store, checker, configid) 614 return geterrors() 615
616 - def _geterrors(self, filename, fileid, configid, checker, store):
617 result = [] 618 first, cur = self._checkerrors(filename, fileid, configid, checker, store) 619 result.append(first) 620 result.extend(cur.fetchall()) 621 return result
622 623 @transaction
624 - def _get_config_id(self, fileid, checker):
625 configid = self._getstoredcheckerconfig(checker) 626 if configid: 627 return configid 628 self.cur.execute("""INSERT INTO checkerconfigs 629 (configid, config) values (NULL, ?);""", 630 (str(checker.config.__dict__),)) 631 return self.cur.lastrowid
632
633 - def filechecks(self, filename, checker, store=None):
634 """Retrieves the error statistics for the given file if possible, 635 otherwise delegates to cachestorechecks().""" 636 fileid = self._getfileid(filename, store=store) 637 configid = self._get_config_id(fileid, checker) 638 values = self._geterrors(filename, fileid, configid, checker, store) 639 640 errors = emptyfilechecks() 641 for value in values: 642 if value[1] == -1: 643 continue 644 checkkey = 'check-' + value[0] #value[0] is the error name 645 if not checkkey in errors: 646 errors[checkkey] = [] 647 errors[checkkey].append(value[1]) #value[1] is the unitindex 648 649 return errors
650
651 - def file_fails_test(self, filename, checker, name):
652 fileid = self._getfileid(filename) 653 configid = self._get_config_id(fileid, checker) 654 self._checkerrors(filename, fileid, configid, checker, None) 655 self.cur.execute("""SELECT 656 name, 657 unitindex 658 FROM uniterrors 659 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name)) 660 return self.cur.fetchone() is not None
661
662 - def filestatestats(self, filename, store=None, extended=False):
663 """Return a dictionary of unit stats mapping sets of unit 664 indices with those states""" 665 stats = emptyfilestats() 666 if extended: 667 stats["extended"] = {} 668 669 fileid = self._getfileid(filename, store=store) 670 671 self.cur.execute("""SELECT state, e_state, unitindex 672 FROM units WHERE fileid=? ORDER BY unitindex;""", (fileid,)) 673 values = self.cur.fetchall() 674 675 for value in values: 676 stats[state_strings[value[0]]].append(value[2]) 677 if extended: 678 if value[1] not in stats["extended"]: 679 stats["extended"][value[1]] = [] 680 stats["extended"][value[1]].append(value[2]) 681 stats["total"].append(value[2]) 682 return stats
683
684 - def filestats(self, filename, checker, store=None, extended=False):
685 """Return a dictionary of property names mapping sets of unit 686 indices with those properties.""" 687 stats = emptyfilestats() 688 stats.update(self.filechecks(filename, checker, store)) 689 stats.update(self.filestatestats(filename, store, extended=extended)) 690 return stats
691
692 - def unitstats(self, filename, _lang=None, store=None):
693 # For now, lang and store are unused. lang will allow the user to 694 # base stats information on the given language. See the commented 695 # line containing stats.update below. 696 """Return a dictionary of property names mapping to arrays which 697 map unit indices to property values. 698 699 Please note that this is different from filestats, since filestats 700 supplies sets of unit indices with a given property, whereas this 701 method supplies arrays which map unit indices to given values.""" 702 stats = emptyunitstats() 703 704 #stats.update(self.unitchecks(filename, lang, store)) 705 fileid = self._getfileid(filename, store=store) 706 707 self.cur.execute("""SELECT 708 sourcewords, targetwords 709 FROM units WHERE fileid=? 710 ORDER BY unitindex;""", (fileid,)) 711 712 for sourcecount, targetcount in self.cur.fetchall(): 713 stats["sourcewordcount"].append(sourcecount) 714 stats["targetwordcount"].append(targetcount) 715 716 return stats
717