Package translate :: Package storage :: Module benchmark
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.benchmark

  1  #!/usr/bin/env python 
  2  # 
  3  # Copyright 2004-2006 Zuza Software Foundation 
  4  # 
  5  # This file is part of translate. 
  6  # 
  7  # translate is free software; you can redistribute it and/or modify 
  8  # it under the terms of the GNU General Public License as published by 
  9  # the Free Software Foundation; either version 2 of the License, or 
 10  # (at your option) any later version. 
 11  # 
 12  # translate is distributed in the hope that it will be useful, 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 15  # GNU General Public License for more details. 
 16  # 
 17  # You should have received a copy of the GNU General Public License 
 18  # along with translate; if not, write to the Free Software 
 19  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 20   
 21  import cProfile 
 22  import os 
 23  import pstats 
 24  import random 
 25  import sys 
 26   
 27  from translate.storage import factory 
 28   
 29   
30 -class TranslateBenchmarker:
31 """class to aid in benchmarking Translate Toolkit stores""" 32
33 - def __init__(self, test_dir, storeclass):
34 """sets up benchmarking on the test directory""" 35 self.test_dir = os.path.abspath(test_dir) 36 self.StoreClass = storeclass 37 self.extension = self.StoreClass.Extensions[0] 38 self.project_dir = os.path.join(self.test_dir, "benchmark") 39 self.file_dir = os.path.join(self.project_dir, "zxx")
40
41 - def clear_test_dir(self):
42 """removes the given directory""" 43 if os.path.exists(self.test_dir): 44 for dirpath, subdirs, filenames in os.walk(self.test_dir, topdown=False): 45 for name in filenames: 46 os.remove(os.path.join(dirpath, name)) 47 for name in subdirs: 48 os.rmdir(os.path.join(dirpath, name)) 49 if os.path.exists(self.test_dir): 50 os.rmdir(self.test_dir) 51 assert not os.path.exists(self.test_dir)
52
53 - def create_sample_files(self, num_dirs, files_per_dir, strings_per_file, source_words_per_string, target_words_per_string):
54 """creates sample files for benchmarking""" 55 if not os.path.exists(self.test_dir): 56 os.mkdir(self.test_dir) 57 if not os.path.exists(self.project_dir): 58 os.mkdir(self.project_dir) 59 if not os.path.exists(self.file_dir): 60 os.mkdir(self.file_dir) 61 for dirnum in range(num_dirs): 62 if num_dirs > 1: 63 dirname = os.path.join(self.file_dir, "sample_%d" % dirnum) 64 if not os.path.exists(dirname): 65 os.mkdir(dirname) 66 else: 67 dirname = self.file_dir 68 for filenum in range(files_per_dir): 69 sample_file = self.StoreClass() 70 for stringnum in range(strings_per_file): 71 source_string = " ".join(["word%d" % (random.randint(0, strings_per_file) * i) for i in range(source_words_per_string)]) 72 sample_unit = sample_file.addsourceunit(source_string) 73 sample_unit.target = " ".join(["drow%d" % (random.randint(0, strings_per_file) * i) for i in range(target_words_per_string)]) 74 sample_file.savefile(os.path.join(dirname, "file_%d.%s" % (filenum, self.extension)))
75
76 - def parse_file(self):
77 """parses all the files in the test directory into memory""" 78 count = 0 79 for dirpath, subdirs, filenames in os.walk(self.file_dir, topdown=False): 80 for name in filenames: 81 pofilename = os.path.join(dirpath, name) 82 parsedfile = self.StoreClass(open(pofilename, 'r')) 83 count += len(parsedfile.units) 84 print "counted %d units" % count
85 86 if __name__ == "__main__": 87 storetype = "po" 88 if len(sys.argv) > 1: 89 storetype = sys.argv[1] 90 if storetype in factory.classes: 91 storeclass = factory.classes[storetype] 92 else: 93 print "StoreClass: '%s' is not a base class that the class factory can load" % storetype 94 sys.exit() 95 for sample_file_sizes in [ 96 # num_dirs, files_per_dir, strings_per_file, source_words_per_string, target_words_per_string 97 # (1, 1, 2, 2, 2), 98 (1, 1, 10000, 5, 10), # Creat 1 very large file with German like ratios or source to target 99 # (100, 10, 10, 5, 10), # Create lots of directories and files with smaller then avarage size 100 # (1, 5, 10, 10, 10), 101 # (1, 10, 10, 10, 10), 102 # (5, 10, 10, 10, 10), 103 # (5, 10, 100, 20, 20), 104 # (10, 20, 100, 10, 10), 105 # (10, 20, 100, 10, 10), 106 # (100, 2, 140, 3, 3), # OpenOffice.org approximate ratios 107 ]: 108 benchmarker = TranslateBenchmarker("BenchmarkDir", storeclass) 109 benchmarker.clear_test_dir() 110 benchmarker.create_sample_files(*sample_file_sizes) 111 methods = [("create_sample_files", "*sample_file_sizes"), ("parse_file", ""), ] 112 for methodname, methodparam in methods: 113 print methodname, "%d dirs, %d files, %d strings, %d/%d words" % sample_file_sizes 114 print "_______________________________________________________" 115 statsfile = "%s_%s" % (methodname, storetype) + '_%d_%d_%d_%d_%d.stats' % sample_file_sizes 116 cProfile.run('benchmarker.%s(%s)' % (methodname, methodparam), statsfile) 117 stats = pstats.Stats(statsfile) 118 stats.sort_stats('cumulative').print_stats(20) 119 print "_______________________________________________________" 120 #benchmarker.clear_test_dir() 121