Package translate :: Package convert :: Module csv2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.csv2po

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2003-2006 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """convert Comma-Separated Value (.csv) files to Gettext PO localization files 
 23   
 24  See: http://translate.sourceforge.net/wiki/toolkit/csv2po for examples and 
 25  usage instructions 
 26  """ 
 27   
 28  import sys 
 29   
 30  from translate.misc import sparse 
 31  from translate.storage import po 
 32  from translate.storage import csvl10n 
 33   
 34   
35 -def replacestrings(source, *pairs):
36 for orig, new in pairs: 37 source = source.replace(orig, new) 38 return source
39 40
41 -def quotecsvstr(source):
42 return '"' + replacestrings(source, ('\\"', '"'), ('"', '\\"'), ("\\\\'", "\\'"), ('\\\\n', '\\n')) + '"'
43 44
45 -def simplify(string):
46 return filter(type(string).isalnum, string) 47 tokens = sparse.SimpleParser().tokenize(string) 48 return " ".join(tokens)
49 50
51 -class csv2po:
52 """a class that takes translations from a .csv file and puts them in a .po file""" 53
54 - def __init__(self, templatepo=None, charset=None, duplicatestyle="keep"):
55 """construct the converter...""" 56 self.pofile = templatepo 57 self.charset = charset 58 self.duplicatestyle = duplicatestyle 59 if self.pofile is not None: 60 self.unmatched = 0 61 self.makeindex()
62
63 - def makeindex(self):
64 """makes indexes required for searching...""" 65 self.commentindex = {} 66 self.sourceindex = {} 67 self.simpleindex = {} 68 self.duplicatecomments = [] 69 for pounit in self.pofile.units: 70 joinedcomment = " ".join(pounit.getlocations()) 71 source = pounit.source 72 # the definitive way to match is by source comment (joinedcomment) 73 if joinedcomment in self.commentindex: 74 # unless more than one thing matches... 75 self.duplicatecomments.append(joinedcomment) 76 else: 77 self.commentindex[joinedcomment] = pounit 78 # do simpler matching in case things have been mangled... 79 simpleid = simplify(source) 80 # but check for duplicates 81 if simpleid in self.simpleindex and not (source in self.sourceindex): 82 # keep a list of them... 83 self.simpleindex[simpleid].append(pounit) 84 else: 85 self.simpleindex[simpleid] = [pounit] 86 # also match by standard msgid 87 self.sourceindex[source] = pounit 88 for comment in self.duplicatecomments: 89 if comment in self.commentindex: 90 del self.commentindex[comment]
91
92 - def convertunit(self, csvunit):
93 """converts csv unit to po unit""" 94 pounit = po.pounit(encoding="UTF-8") 95 if csvunit.location: 96 pounit.addlocation(csvunit.location) 97 pounit.source = csvunit.source 98 pounit.target = csvunit.target 99 return pounit
100
101 - def handlecsvunit(self, csvunit):
102 """handles reintegrating a csv unit into the .po file""" 103 if len(csvunit.location.strip()) > 0 and csvunit.location in self.commentindex: 104 pounit = self.commentindex[csvunit.location] 105 elif csvunit.source in self.sourceindex: 106 pounit = self.sourceindex[csvunit.source] 107 elif simplify(csvunit.source) in self.simpleindex: 108 thepolist = self.simpleindex[simplify(csvunit.source)] 109 if len(thepolist) > 1: 110 csvfilename = getattr(self.csvfile, "filename", "(unknown)") 111 matches = "\n ".join(["possible match: " + pounit.source for pounit in thepolist]) 112 print >> sys.stderr, "%s - csv entry not found in pofile, multiple matches found:\n location\t%s\n original\t%s\n translation\t%s\n %s" % (csvfilename, csvunit.location, csvunit.source, csvunit.target, matches) 113 self.unmatched += 1 114 return 115 pounit = thepolist[0] 116 else: 117 csvfilename = getattr(self.csvfile, "filename", "(unknown)") 118 print >> sys.stderr, "%s - csv entry not found in pofile:\n location\t%s\n original\t%s\n translation\t%s" % (csvfilename, csvunit.location, csvunit.source, csvunit.target) 119 self.unmatched += 1 120 return 121 if pounit.hasplural(): 122 # we need to work out whether we matched the singular or the plural 123 singularid = pounit.source.strings[0] 124 pluralid = pounit.source.strings[1] 125 if csvunit.source == singularid: 126 pounit.msgstr[0] = csvunit.target 127 elif csvunit.source == pluralid: 128 pounit.msgstr[1] = csvunit.target 129 elif simplify(csvunit.source) == simplify(singularid): 130 pounit.msgstr[0] = csvunit.target 131 elif simplify(csvunit.source) == simplify(pluralid): 132 pounit.msgstr[1] = csvunit.target 133 else: 134 print >> sys.stderr, "couldn't work out singular or plural: %r, %r, %r" % \ 135 (csvunit.source, singularid, pluralid) 136 self.unmatched += 1 137 return 138 else: 139 pounit.target = csvunit.target
140
141 - def convertstore(self, thecsvfile):
142 """converts a csvfile to a pofile, and returns it. uses templatepo if given at construction""" 143 self.csvfile = thecsvfile 144 if self.pofile is None: 145 self.pofile = po.pofile() 146 mergemode = False 147 else: 148 mergemode = True 149 if self.pofile.units and self.pofile.units[0].isheader(): 150 targetheader = self.pofile.units[0] 151 self.pofile.updateheader(content_type="text/plain; charset=UTF-8", content_transfer_encoding="8bit") 152 else: 153 targetheader = self.pofile.makeheader(charset="UTF-8", encoding="8bit") 154 targetheader.addnote("extracted from %s" % self.csvfile.filename, "developer") 155 mightbeheader = True 156 for csvunit in self.csvfile.units: 157 #if self.charset is not None: 158 # csvunit.source = csvunit.source.decode(self.charset) 159 # csvunit.target = csvunit.target.decode(self.charset) 160 if mightbeheader: 161 # ignore typical header strings... 162 mightbeheader = False 163 if csvunit.match_header(): 164 continue 165 if len(csvunit.location.strip()) == 0 and csvunit.source.find("Content-Type:") != -1: 166 continue 167 if mergemode: 168 self.handlecsvunit(csvunit) 169 else: 170 pounit = self.convertunit(csvunit) 171 self.pofile.addunit(pounit) 172 self.pofile.removeduplicates(self.duplicatestyle) 173 return self.pofile
174 175
176 -def convertcsv(inputfile, outputfile, templatefile, charset=None, columnorder=None, duplicatestyle="msgctxt"):
177 """reads in inputfile using csvl10n, converts using csv2po, writes to outputfile""" 178 inputstore = csvl10n.csvfile(inputfile, fieldnames=columnorder) 179 if templatefile is None: 180 convertor = csv2po(charset=charset, duplicatestyle=duplicatestyle) 181 else: 182 templatestore = po.pofile(templatefile) 183 convertor = csv2po(templatestore, charset=charset, duplicatestyle=duplicatestyle) 184 outputstore = convertor.convertstore(inputstore) 185 if outputstore.isempty(): 186 return 0 187 outputfile.write(str(outputstore)) 188 return 1
189 190
191 -def main(argv=None):
192 from translate.convert import convert 193 formats = {("csv", "po"): ("po", convertcsv), ("csv", "pot"): ("po", convertcsv), 194 ("csv", None): ("po", convertcsv)} 195 parser = convert.ConvertOptionParser(formats, usetemplates=True, description=__doc__) 196 parser.add_option("", "--charset", dest="charset", default=None, 197 help="set charset to decode from csv files", metavar="CHARSET") 198 parser.add_option("", "--columnorder", dest="columnorder", default=None, 199 help="specify the order and position of columns (location,source,target)") 200 parser.add_duplicates_option() 201 parser.passthrough.append("charset") 202 parser.passthrough.append("columnorder") 203 parser.run(argv)
204 205 206 if __name__ == '__main__': 207 main() 208