Package translate :: Package storage :: Module fpo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.fpo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Classes for the support of Gettext .po and .pot files. 
 22   
 23  This implementation assumes that cpo is working. This should not be used 
 24  directly, but can be used once cpo has been established to work.""" 
 25   
 26  #TODO: 
 27  # - handle headerless PO files better 
 28  # - previous msgid and msgctxt 
 29  # - accept only unicodes everywhere 
 30   
 31  from translate.misc.multistring import multistring 
 32  from translate.lang import data 
 33  from translate.storage import pocommon, base, cpo 
 34  import re 
 35  import copy 
 36  import cStringIO 
 37   
 38  lsep = " " 
 39  """Seperator for #: entries""" 
 40   
 41  basic_header = r'''msgid "" 
 42  msgstr "" 
 43  "Content-Type: text/plain; charset=UTF-8\n" 
 44  "Content-Transfer-Encoding: 8bit\n" 
 45  ''' 
 46   
47 -def encodingToUse(encoding):
48 """Tests whether the given encoding is known in the python runtime, or returns utf-8. 49 This function is used to ensure that a valid encoding is always used.""" 50 if encoding == "CHARSET" or encoding == None: 51 return 'utf-8' 52 return encoding
53
54 -class pounit(pocommon.pounit):
55 # othercomments = [] # # this is another comment 56 # automaticcomments = [] # #. comment extracted from the source code 57 # sourcecomments = [] # #: sourcefile.xxx:35 58 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 59 # prev_msgid = [] # 60 # prev_msgid_plural = [] # 61 # typecomments = [] # #, fuzzy 62 # msgidcomment = u"" # _: within msgid 63 # msgctxt 64 # msgid = [] 65 # msgstr = [] 66 67 # Our homegrown way to indicate what must be copied in a shallow 68 # fashion 69 __shallow__ = ['_store'] 70
71 - def __init__(self, source=None, encoding="UTF-8"):
72 pocommon.pounit.__init__(self, source) 73 self._encoding = encodingToUse(encoding) 74 self.obsolete = False 75 self._initallcomments(blankall=True) 76 self._msgctxt = u"" 77 78 self.target = u""
79
80 - def _initallcomments(self, blankall=False):
81 """Initialises allcomments""" 82 if blankall: 83 self.othercomments = [] 84 self.automaticcomments = [] 85 self.sourcecomments = [] 86 self.typecomments = [] 87 self.msgidcomment = u""
88
89 - def getsource(self):
90 return self._source
91
92 - def setsource(self, source):
93 self._rich_source = None 94 # assert isinstance(source, unicode) 95 source = data.forceunicode(source or u"") 96 source = source or u"" 97 if isinstance(source, multistring): 98 self._source = source 99 elif isinstance(source, unicode): 100 self._source = source 101 else: 102 #unicode, list, dict 103 self._source = multistring(source)
104 source = property(getsource, setsource) 105 106
107 - def gettarget(self):
108 """Returns the unescaped msgstr""" 109 return self._target
110
111 - def settarget(self, target):
112 """Sets the msgstr to the given (unescaped) value""" 113 self._rich_target = None 114 # assert isinstance(target, unicode) 115 # target = data.forceunicode(target) 116 if self.hasplural(): 117 if isinstance(target, multistring): 118 self._target = target 119 else: 120 #unicode, list, dict 121 self._target = multistring(target) 122 elif isinstance(target, (dict, list)): 123 if len(target) == 1: 124 self._target = target[0] 125 else: 126 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 127 else: 128 self._target = target
129 target = property(gettarget, settarget) 130
131 - def getnotes(self, origin=None):
132 """Return comments based on origin value (programmer, developer, source code and translator)""" 133 if origin == None: 134 comments = u"\n".join(self.othercomments) 135 comments += u"\n".join(self.automaticcomments) 136 elif origin == "translator": 137 comments = u"\n".join (self.othercomments) 138 elif origin in ["programmer", "developer", "source code"]: 139 comments = u"\n".join(self.automaticcomments) 140 else: 141 raise ValueError("Comment type not valid") 142 return comments
143
144 - def addnote(self, text, origin=None, position="append"):
145 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 146 # ignore empty strings and strings without non-space characters 147 if not (text and text.strip()): 148 return 149 text = data.forceunicode(text) 150 commentlist = self.othercomments 151 if origin in ["programmer", "developer", "source code"]: 152 autocomments = True 153 commentlist = self.automaticcomments 154 if text.endswith(u'\n'): 155 text = text[:-1] 156 text = text.split(u"\n") 157 if position == "append": 158 commentlist.extend(text) 159 else: 160 newcomments = text 161 newcomments.extend(commentlist) 162 if autocomments: 163 self.automaticcomments = newcomments 164 else: 165 self.othercomments = newcomments
166
167 - def removenotes(self):
168 """Remove all the translator's notes (other comments)""" 169 self.othercomments = []
170
171 - def __deepcopy__(self, memo={}):
172 # Make an instance to serve as the copy 173 new_unit = self.__class__() 174 # We'll be testing membership frequently, so make a set from 175 # self.__shallow__ 176 shallow = set(self.__shallow__) 177 # Make deep copies of all members which are not in shallow 178 for key, value in self.__dict__.iteritems(): 179 if key not in shallow: 180 setattr(new_unit, key, copy.deepcopy(value)) 181 # Make shallow copies of all members which are in shallow 182 for key in set(shallow): 183 setattr(new_unit, key, getattr(self, key)) 184 # Mark memo with ourself, so that we won't get deep copied 185 # again 186 memo[id(self)] = self 187 # Return our copied unit 188 return new_unit
189
190 - def copy(self):
191 return copy.deepcopy(self)
192
193 - def _msgidlen(self):
194 if self.hasplural(): 195 len("".join([string.strip() for string in self.source.strings])) 196 else: 197 return len(self.source.strip())
198
199 - def _msgstrlen(self):
200 if self.hasplural(): 201 len("".join([string.strip() for string in self.target.strings])) 202 else: 203 return len(self.target.strip())
204
205 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
206 """Merges the otherpo (with the same msgid) into this one. 207 208 Overwrite non-blank self.msgstr only if overwrite is True 209 merge comments only if comments is True 210 """ 211 212 def mergelists(list1, list2, split=False): 213 #Split if directed to do so: 214 if split: 215 splitlist1 = [] 216 splitlist2 = [] 217 for item in list1: 218 splitlist1.extend(item.split()) 219 for item in list2: 220 splitlist2.extend(item.split()) 221 list1.extend([item for item in splitlist2 if not item in splitlist1]) 222 else: 223 #Normal merge, but conform to list1 newline style 224 if list1 != list2: 225 for item in list2: 226 # avoid duplicate comment lines (this might cause some problems) 227 if item not in list1 or len(item) < 5: 228 list1.append(item)
229 230 if not isinstance(otherpo, pounit): 231 super(pounit, self).merge(otherpo, overwrite, comments) 232 return 233 if comments: 234 mergelists(self.othercomments, otherpo.othercomments) 235 mergelists(self.typecomments, otherpo.typecomments) 236 if not authoritative: 237 # We don't bring across otherpo.automaticcomments as we consider ourself 238 # to be the the authority. Same applies to otherpo.msgidcomments 239 mergelists(self.automaticcomments, otherpo.automaticcomments) 240 # mergelists(self.msgidcomments, otherpo.msgidcomments) #XXX? 241 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 242 if not self.istranslated() or overwrite: 243 # Remove kde-style comments from the translation (if any). XXX - remove 244 if pocommon.extract_msgid_comment(otherpo.target): 245 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 246 self.target = otherpo.target 247 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 248 self.markfuzzy() 249 else: 250 self.markfuzzy(otherpo.isfuzzy()) 251 elif not otherpo.istranslated(): 252 if self.source != otherpo.source: 253 self.markfuzzy() 254 else: 255 if self.target != otherpo.target: 256 self.markfuzzy()
257
258 - def isheader(self):
259 #TODO: fix up nicely 260 return not self.getid() and len(self.target) > 0
261
262 - def isblank(self):
263 if self.isheader() or self.msgidcomment: 264 return False 265 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and len(self._msgctxt) == 0: 266 return True 267 return False
268
269 - def hastypecomment(self, typecomment):
270 """Check whether the given type comment is present""" 271 # check for word boundaries properly by using a regular expression... 272 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
273
274 - def hasmarkedcomment(self, commentmarker):
275 """Check whether the given comment marker is present as # (commentmarker) ...""" 276 # raise DeprecationWarning 277 commentmarker = "(%s)" % commentmarker 278 for comment in self.othercomments: 279 if comment.startswith(commentmarker): 280 return True 281 return False
282
283 - def settypecomment(self, typecomment, present=True):
284 """Alters whether a given typecomment is present""" 285 if self.hastypecomment(typecomment) != present: 286 if present: 287 self.typecomments.append("#, %s\n" % typecomment) 288 else: 289 # this should handle word boundaries properly ... 290 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 291 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
292
293 - def istranslated(self):
294 return super(pounit, self).istranslated() and not self.isobsolete()
295
296 - def istranslatable(self):
297 return not (self.isheader() or self.isblank() or self.obsolete)
298
299 - def isfuzzy(self):
300 return self.hastypecomment("fuzzy")
301
302 - def markfuzzy(self, present=True):
303 self.settypecomment("fuzzy", present)
304
305 - def isobsolete(self):
306 return self.obsolete
307
308 - def makeobsolete(self):
309 """Makes this unit obsolete""" 310 self.obsolete = True 311 self.sourcecomments = [] 312 self.automaticcomments = []
313
314 - def resurrect(self):
315 """Makes an obsolete unit normal""" 316 self.obsolete = False
317
318 - def hasplural(self):
319 """returns whether this pounit contains plural strings...""" 320 source = self.source 321 return isinstance(source, multistring) and len(source.strings) > 1
322
323 - def parse(self, src):
324 raise DeprecationWarning("Should not be parsing with a unit") 325 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
326
327 - def __str__(self):
328 """convert to a string. double check that unicode is handled somehow here""" 329 _cpo_unit = cpo.pounit.buildfromunit(self) 330 return str(_cpo_unit)
331
332 - def getlocations(self):
333 """Get a list of locations from sourcecomments in the PO unit 334 335 rtype: List 336 return: A list of the locations with '#: ' stripped 337 338 """ 339 #TODO: rename to .locations 340 return self.sourcecomments
341
342 - def addlocation(self, location):
343 """Add a location to sourcecomments in the PO unit 344 345 @param location: Text location e.g. 'file.c:23' does not include #: 346 @type location: String 347 """ 348 self.sourcecomments.extend(location.split())
349
350 - def _extract_msgidcomments(self, text=None):
351 """Extract KDE style msgid comments from the unit. 352 353 @rtype: String 354 @return: Returns the extracted msgidcomments found in this unit's msgid. 355 """ 356 if text: 357 return pocommon.extract_msgid_comment(text) 358 else: 359 return self.msgidcomment
360
361 - def getcontext(self):
362 """Get the message context.""" 363 return self._msgctxt + self.msgidcomment
364
365 - def getid(self):
366 """Returns a unique identifier for this unit.""" 367 context = self.getcontext() 368 # Gettext does not consider the plural to determine duplicates, only 369 # the msgid. For generation of .mo files, we might want to use this 370 # code to generate the entry for the hash table, but for now, it is 371 # commented out for conformance to gettext. 372 # id = '\0'.join(self.source.strings) 373 id = self.source 374 if self.msgidcomment: 375 id = u"_: %s\n%s" % (context, id) 376 elif context: 377 id = u"%s\04%s" % (context, id) 378 return id
379
380 - def buildfromunit(cls, unit):
381 """Build a native unit from a foreign unit, preserving as much 382 information as possible.""" 383 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 384 return unit.copy() 385 elif isinstance(unit, pocommon.pounit): 386 newunit = cls(unit.source) 387 newunit.target = unit.target 388 #context 389 newunit.msgidcomment = unit._extract_msgidcomments() 390 if not newunit.msgidcomment: 391 newunit._msgctxt = unit.getcontext() 392 393 locations = unit.getlocations() 394 if locations: 395 newunit.addlocations(locations) 396 notes = unit.getnotes("developer") 397 if notes: 398 newunit.addnote(notes, "developer") 399 notes = unit.getnotes("translator") 400 if notes: 401 newunit.addnote(notes, "translator") 402 if unit.isobsolete(): 403 newunit.makeobsolete() 404 newunit.markfuzzy(unit.isfuzzy()) 405 for tc in ['python-format', 'c-format', 'php-format']: 406 if unit.hastypecomment(tc): 407 newunit.settypecomment(tc) 408 break 409 return newunit 410 else: 411 return base.TranslationUnit.buildfromunit(unit)
412 buildfromunit = classmethod(buildfromunit) 413
414 -class pofile(pocommon.pofile):
415 """A .po file containing various units""" 416 UnitClass = pounit 417
418 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit):
419 """Construct a pofile, optionally reading in from inputfile. 420 encoding can be specified but otherwise will be read from the PO header""" 421 self.UnitClass = unitclass 422 pocommon.pofile.__init__(self, unitclass=unitclass) 423 self.units = [] 424 self.filename = '' 425 self._encoding = encodingToUse(encoding) 426 if inputfile is not None: 427 self.parse(inputfile)
428
429 - def changeencoding(self, newencoding):
430 """Deprecated: changes the encoding on the file.""" 431 # This should not be here but in poheader. It also shouldn't mangle the 432 # header itself, but use poheader methods. All users are removed, so 433 # we can deprecate after one release. 434 raise DeprecationWarning 435 436 self._encoding = encodingToUse(newencoding) 437 if not self.units: 438 return 439 header = self.header() 440 if not header or header.isblank(): 441 return 442 charsetline = None 443 headerstr = header.target 444 for line in headerstr.split("\n"): 445 if not ":" in line: 446 continue 447 key, value = line.strip().split(":", 1) 448 if key.strip() != "Content-Type": 449 continue 450 charsetline = line 451 if charsetline is None: 452 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 453 else: 454 charset = re.search("charset=([^ ]*)", charsetline) 455 if charset is None: 456 newcharsetline = charsetline 457 if not newcharsetline.strip().endswith(";"): 458 newcharsetline += ";" 459 newcharsetline += " charset=%s" % self._encoding 460 else: 461 charset = charset.group(1) 462 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 463 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 464 header.target = headerstr
465
466 - def _build_self_from_cpo(self):
467 """Builds up this store from the internal cpo store. 468 469 A user must ensure that self._cpo_store already exists, and that it is 470 deleted afterwards.""" 471 for unit in self._cpo_store.units: 472 self.addunit(self.UnitClass.buildfromunit(unit)) 473 self._encoding = self._cpo_store._encoding
474
475 - def _build_cpo_from_self(self):
476 """Builds the internal cpo store from the data in self. 477 478 A user must ensure that self._cpo_store does not exist, and should 479 delete it after using it.""" 480 self._cpo_store = cpo.pofile() 481 for unit in self.units: 482 if not unit.isblank(): 483 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit)) 484 if not self._cpo_store.header(): 485 #only add a temporary header 486 self._cpo_store.makeheader(charset="utf-8", encoding="8bit")
487 488
489 - def parse(self, input):
490 """Parses the given file or file source string.""" 491 try: 492 if hasattr(input, 'name'): 493 self.filename = input.name 494 elif not getattr(self, 'filename', ''): 495 self.filename = '' 496 tmp_header_added = False 497 # if isinstance(input, str) and '"Content-Type: text/plain; charset=' not in input[:200]: 498 # input = basic_header + input 499 # tmp_header_added = True 500 self._cpo_store = cpo.pofile(input) 501 self._build_self_from_cpo() 502 del self._cpo_store 503 if tmp_header_added: 504 self.units = self.units[1:] 505 except Exception, e: 506 raise base.ParseError(e)
507
508 - def removeduplicates(self, duplicatestyle="merge"):
509 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 510 # TODO: can we handle consecutive calls to removeduplicates()? What 511 # about files already containing msgctxt? - test 512 id_dict = {} 513 uniqueunits = [] 514 # TODO: this is using a list as the pos aren't hashable, but this is slow. 515 # probably not used frequently enough to worry about it, though. 516 markedpos = [] 517 def addcomment(thepo): 518 thepo.msgidcomment = " ".join(thepo.getlocations()) 519 markedpos.append(thepo)
520 for thepo in self.units: 521 id = thepo.getid() 522 if thepo.isheader() and not thepo.getlocations(): 523 # header msgids shouldn't be merged... 524 uniqueunits.append(thepo) 525 elif id in id_dict: 526 if duplicatestyle == "merge": 527 if id: 528 id_dict[id].merge(thepo) 529 else: 530 addcomment(thepo) 531 uniqueunits.append(thepo) 532 elif duplicatestyle == "msgctxt": 533 origpo = id_dict[id] 534 if origpo not in markedpos: 535 origpo._msgctxt += " ".join(origpo.getlocations()) 536 markedpos.append(thepo) 537 thepo._msgctxt += " ".join(thepo.getlocations()) 538 uniqueunits.append(thepo) 539 else: 540 if not id: 541 if duplicatestyle == "merge": 542 addcomment(thepo) 543 else: 544 thepo._msgctxt += u" ".join(thepo.getlocations()) 545 id_dict[id] = thepo 546 uniqueunits.append(thepo) 547 self.units = uniqueunits
548
549 - def __str__(self):
550 """Convert to a string. double check that unicode is handled somehow here""" 551 self._cpo_store = cpo.pofile(encoding=self._encoding) 552 self._build_cpo_from_self() 553 output = str(self._cpo_store) 554 del self._cpo_store 555 return output
556