1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 import copy
26 import cStringIO
27 import re
28 import urllib
29
30 from translate.lang import data
31 from translate.misc.multistring import multistring
32 from translate.misc import quote
33 from translate.misc import textwrap
34 from translate.storage import pocommon, base, poparser
35 from translate.storage.pocommon import encodingToUse
36
37 lsep = "\n#: "
38 """Seperator for #: entries"""
39
40
41
42 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
43 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
44
45
47 """Escapes a line for po format. assumes no \n occurs in the line.
48
49 @param line: unescaped text
50 """
51 special_locations = []
52 for special_key in po_escape_map:
53 special_locations.extend(quote.find_all(line, special_key))
54 special_locations = dict.fromkeys(special_locations).keys()
55 special_locations.sort()
56 escaped_line = ""
57 last_location = 0
58 for location in special_locations:
59 escaped_line += line[last_location:location]
60 escaped_line += po_escape_map[line[location:location+1]]
61 last_location = location + 1
62 escaped_line += line[last_location:]
63 return escaped_line
64
65
68
69
71 """Wrap text for po files."""
72 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
73
74
75 if len(wrappedlines) > 1:
76 for index, line in enumerate(wrappedlines[1:]):
77 if line.startswith(' '):
78
79 wrappedlines[index+1] = line[1:]
80
81
82 wrappedlines[index] += ' '
83 return wrappedlines
84
85
87 """quotes the given text for a PO file, returning quoted and escaped lines"""
88 polines = []
89 if text is None:
90 return polines
91 lines = text.split("\n")
92 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
93 if len(lines) != 2 or lines[1]:
94 polines.extend(['""'])
95 for line in lines[:-1]:
96
97 lns = wrapline(line)
98 if len(lns) > 0:
99 for ln in lns[:-1]:
100 polines.extend(['"' + escapeforpo(ln) + '"'])
101 if lns[-1]:
102 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
103 else:
104 polines.extend(['"\\n"'])
105 if lines[-1]:
106 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
107 return polines
108
109
111 """Remove quote and unescape line from po file.
112
113 @param line: a quoted line from a po file (msgid or msgstr)
114 """
115 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
116 return extracted
117
118
121
122
124 return lst == [] or len(lst) == 1 and lst[0] == '""'
125
126
128 left = string.find('"')
129 right = string.rfind('"')
130 if right > -1:
131 return string[left:right+1]
132 else:
133 return string[left:] + '"'
134
135
136 -class pounit(pocommon.pounit):
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 __shallow__ = ['_store']
152
153 - def __init__(self, source=None, encoding="UTF-8"):
154 self._encoding = encodingToUse(encoding)
155 self.obsolete = False
156 self._initallcomments(blankall=True)
157 self.prev_msgctxt = []
158 self.prev_msgid = []
159 self.prev_msgid_plural = []
160 self.msgctxt = []
161 self.msgid = []
162 self.msgid_pluralcomments = []
163 self.msgid_plural = []
164 self.msgstr = []
165 self.obsoletemsgctxt = []
166 self.obsoletemsgid = []
167 self.obsoletemsgid_pluralcomments = []
168 self.obsoletemsgid_plural = []
169 self.obsoletemsgstr = []
170 pocommon.pounit.__init__(self, source)
171
181
189
190 allcomments = property(_get_all_comments)
191
200
218
222
224 """Sets the msgid to the given (unescaped) value.
225
226 @param source: an unescaped source string.
227 """
228 self._rich_source = None
229 self.msgid, self.msgid_plural = self._set_source_vars(source)
230 source = property(getsource, setsource)
231
233 """Returns the unescaped msgid"""
234 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
235
237 """Sets the msgid to the given (unescaped) value.
238
239 @param source: an unescaped source string.
240 """
241 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
242 prev_source = property(_get_prev_source, _set_prev_source)
243
251
253 """Sets the msgstr to the given (unescaped) value"""
254 self._rich_target = None
255 if isinstance(target, str):
256 target = target.decode(self._encoding)
257 if self.hasplural():
258 if isinstance(target, multistring):
259 target = target.strings
260 elif isinstance(target, basestring):
261 target = [target]
262 elif isinstance(target, (dict, list)):
263 if len(target) == 1:
264 target = target[0]
265 else:
266 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
267 templates = self.msgstr
268 if isinstance(templates, list):
269 templates = {0: templates}
270 if isinstance(target, list):
271 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
272 elif isinstance(target, dict):
273 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
274 else:
275 self.msgstr = quoteforpo(target)
276 target = property(gettarget, settarget)
277
279 """Return a list of alternate units.
280
281 Previous msgid and current msgstr is combined to form a single
282 alternative unit."""
283 prev_source = self.prev_source
284 if prev_source and self.isfuzzy():
285 unit = type(self)(prev_source)
286 unit.target = self.target
287
288
289
290 unit.xmlelement = dict()
291 return [unit]
292 return []
293
295 """Return comments based on origin value (programmer, developer, source code and translator)"""
296 if origin == None:
297 comments = u"".join([comment[2:] for comment in self.othercomments])
298 comments += u"".join([comment[3:] for comment in self.automaticcomments])
299 elif origin == "translator":
300 comments = u"".join([comment[2:] for comment in self.othercomments])
301 elif origin in ["programmer", "developer", "source code"]:
302 comments = u"".join([comment[3:] for comment in self.automaticcomments])
303 else:
304 raise ValueError("Comment type not valid")
305
306 return comments[:-1]
307
308 - def addnote(self, text, origin=None, position="append"):
309 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
310
311 if not (text and text.strip()):
312 return
313 text = data.forceunicode(text)
314 commentlist = self.othercomments
315 linestart = "# "
316 autocomments = False
317 if origin in ["programmer", "developer", "source code"]:
318 autocomments = True
319 commentlist = self.automaticcomments
320 linestart = "#. "
321 text = text.split("\n")
322 newcomments = [linestart + line + "\n" for line in text]
323 if position == "append":
324 newcomments = commentlist + newcomments
325 elif position == "prepend":
326 newcomments = newcomments + commentlist
327
328 if autocomments:
329 self.automaticcomments = newcomments
330 else:
331 self.othercomments = newcomments
332
334 """Remove all the translator's notes (other comments)"""
335 self.othercomments = []
336
338
339 new_unit = self.__class__()
340
341
342 shallow = set(self.__shallow__)
343
344 for key, value in self.__dict__.iteritems():
345 if key not in shallow:
346 setattr(new_unit, key, copy.deepcopy(value))
347
348 for key in set(shallow):
349 setattr(new_unit, key, getattr(self, key))
350
351
352 memo[id(self)] = self
353
354 return new_unit
355
357 return copy.deepcopy(self)
358
364
371
372 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
373 """Merges the otherpo (with the same msgid) into this one.
374
375 Overwrite non-blank self.msgstr only if overwrite is True
376 merge comments only if comments is True
377 """
378
379 def mergelists(list1, list2, split=False):
380
381 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
382 for position, item in enumerate(list1):
383 if isinstance(item, str):
384 list1[position] = item.decode("utf-8")
385 for position, item in enumerate(list2):
386 if isinstance(item, str):
387 list2[position] = item.decode("utf-8")
388
389
390 lineend = ""
391 if list1 and list1[0]:
392 for candidate in ["\n", "\r", "\n\r"]:
393 if list1[0].endswith(candidate):
394 lineend = candidate
395 if not lineend:
396 lineend = ""
397 else:
398 lineend = "\n"
399
400
401 if split:
402 splitlist1 = []
403 splitlist2 = []
404 prefix = "#"
405 for item in list1:
406 splitlist1.extend(item.split()[1:])
407 prefix = item.split()[0]
408 for item in list2:
409 splitlist2.extend(item.split()[1:])
410 prefix = item.split()[0]
411 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
412 else:
413
414 if list1 != list2:
415 for item in list2:
416 if lineend:
417 item = item.rstrip() + lineend
418
419 if item not in list1 or len(item) < 5:
420 list1.append(item)
421 if not isinstance(otherpo, pounit):
422 super(pounit, self).merge(otherpo, overwrite, comments)
423 return
424 if comments:
425 mergelists(self.othercomments, otherpo.othercomments)
426 mergelists(self.typecomments, otherpo.typecomments)
427 if not authoritative:
428
429
430 mergelists(self.automaticcomments, otherpo.automaticcomments)
431 mergelists(self.msgidcomments, otherpo.msgidcomments)
432 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
433 if not self.istranslated() or overwrite:
434
435 if self._extract_msgidcomments(otherpo.target):
436 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '')
437 self.target = otherpo.target
438 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
439 self.markfuzzy()
440 else:
441 self.markfuzzy(otherpo.isfuzzy())
442 elif not otherpo.istranslated():
443 if self.source != otherpo.source:
444 self.markfuzzy()
445 else:
446 if self.target != otherpo.target:
447 self.markfuzzy()
448
450
451
452 return (is_null(self.msgid)
453 and not is_null(self.msgstr)
454 and self.msgidcomments == []
455 and is_null(self.msgctxt))
456
458 if self.isheader() or len(self.msgidcomments):
459 return False
460 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
461 return True
462 return False
463
464
465
466
471
479
493
499
507
510
516
519
521 """Makes this unit obsolete"""
522 self.obsolete = True
523 if self.msgctxt:
524 self.obsoletemsgctxt = self.msgctxt
525 if self.msgid:
526 self.obsoletemsgid = self.msgid
527 self.msgid = []
528 if self.msgidcomments:
529 self.obsoletemsgidcomments = self.msgidcomments
530 self.msgidcomments = []
531 if self.msgid_plural:
532 self.obsoletemsgid_plural = self.msgid_plural
533 self.msgid_plural = []
534 if self.msgstr:
535 self.obsoletemsgstr = self.msgstr
536 self.msgstr = []
537 self.sourcecomments = []
538 self.automaticcomments = []
539
541 """Makes an obsolete unit normal"""
542 self.obsolete = False
543 if self.obsoletemsgctxt:
544 self.msgid = self.obsoletemsgctxt
545 self.obsoletemsgctxt = []
546 if self.obsoletemsgid:
547 self.msgid = self.obsoletemsgid
548 self.obsoletemsgid = []
549 if self.obsoletemsgidcomments:
550 self.msgidcomments = self.obsoletemsgidcomments
551 self.obsoletemsgidcomments = []
552 if self.obsoletemsgid_plural:
553 self.msgid_plural = self.obsoletemsgid_plural
554 self.obsoletemsgid_plural = []
555 if self.obsoletemsgstr:
556 self.msgstr = self.obsoletemsgstr
557 self.obsoletemgstr = []
558
560 """returns whether this pounit contains plural strings..."""
561 return len(self.msgid_plural) > 0
562
565
567 if isinstance(partlines, dict):
568 partkeys = partlines.keys()
569 partkeys.sort()
570 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
571 partstr = partname + " "
572 partstartline = 0
573 if len(partlines) > 0 and len(partcomments) == 0:
574 partstr += partlines[0]
575 partstartline = 1
576 elif len(partcomments) > 0:
577 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
578
579 partstr += partlines[0] + '\n'
580
581 if len(partlines) > 1:
582 partstartline += 1
583 else:
584
585 partstr += '""\n'
586
587 if len(partcomments) > 1:
588 combinedcomment = []
589 for comment in partcomments:
590 comment = unquotefrompo([comment])
591 if comment.startswith("_:"):
592 comment = comment[len("_:"):]
593 if comment.endswith("\\n"):
594 comment = comment[:-len("\\n")]
595
596 combinedcomment.append(comment)
597 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
598
599 partstr += "\n".join(partcomments)
600 partstr = quote.rstripeol(partstr)
601 else:
602 partstr += '""'
603 partstr += '\n'
604
605 for partline in partlines[partstartline:]:
606 partstr += partline + '\n'
607 return partstr
608
610 """encodes unicode strings and returns other strings unchanged"""
611 if isinstance(output, unicode):
612 encoding = encodingToUse(getattr(self, "_encoding", "UTF-8"))
613 return output.encode(encoding)
614 return output
615
617 """convert to a string. double check that unicode is handled somehow here"""
618 output = self._getoutput()
619 return self._encodeifneccessary(output)
620
622 """return this po element as a string"""
623
624 def add_prev_msgid_lines(lines, prefix, header, var):
625 if len(var) > 0:
626 lines.append("%s %s %s\n" % (prefix, header, var[0]))
627 lines.extend("%s %s\n" % (prefix, line) for line in var[1:])
628
629 def add_prev_msgid_info(lines, prefix):
630 add_prev_msgid_lines(lines, prefix, 'msgctxt', self.prev_msgctxt)
631 add_prev_msgid_lines(lines, prefix, 'msgid', self.prev_msgid)
632 add_prev_msgid_lines(lines, prefix, 'msgid_plural', self.prev_msgid_plural)
633
634 lines = []
635 lines.extend(self.othercomments)
636 if self.isobsolete():
637 lines.extend(self.typecomments)
638 obsoletelines = []
639 add_prev_msgid_info(obsoletelines, prefix="#~|")
640 if self.obsoletemsgctxt:
641 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
642 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
643 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
644 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
645 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
646 for index, obsoleteline in enumerate(obsoletelines):
647
648 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
649 lines.extend(obsoletelines)
650 return u"".join(lines)
651
652
653 if is_null(self.msgid):
654 if not (self.isheader() or self.getcontext() or self.sourcecomments):
655 return u"".join(lines)
656 lines.extend(self.automaticcomments)
657 lines.extend(self.sourcecomments)
658 lines.extend(self.typecomments)
659 add_prev_msgid_info(lines, prefix="#|")
660 if self.msgctxt:
661 lines.append(self._getmsgpartstr(u"msgctxt", self.msgctxt))
662 lines.append(self._getmsgpartstr(u"msgid", self.msgid, self.msgidcomments))
663 if self.msgid_plural or self.msgid_pluralcomments:
664 lines.append(self._getmsgpartstr(u"msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
665 lines.append(self._getmsgpartstr(u"msgstr", self.msgstr))
666 postr = u"".join(lines)
667 return postr
668
670 """Get a list of locations from sourcecomments in the PO unit
671
672 rtype: List
673 return: A list of the locations with '#: ' stripped
674
675 """
676 locations = []
677 for sourcecomment in self.sourcecomments:
678 locations += quote.rstripeol(sourcecomment)[3:].split()
679 for i, loc in enumerate(locations):
680 locations[i] = urllib.unquote_plus(loc)
681 return locations
682
684 """Add a location to sourcecomments in the PO unit
685
686 @param location: Text location e.g. 'file.c:23' does not include #:
687 @type location: String
688
689 """
690 if location.find(" ") != -1:
691 location = urllib.quote_plus(location)
692 self.sourcecomments.append("#: %s\n" % location)
693
704
710
711 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
712
713 - def getcontext(self):
714 """Get the message context."""
715 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
716
717 - def setcontext(self, context):
718 context = data.forceunicode(context)
719 self.msgctxt = quoteforpo(context)
720
722 """Returns a unique identifier for this unit."""
723 context = self.getcontext()
724
725
726
727
728
729 id = self.source
730 if self.msgidcomments:
731 id = u"_: %s\n%s" % (context, id)
732 elif context:
733 id = u"%s\04%s" % (context, id)
734 return id
735
736
737 -class pofile(pocommon.pofile):
738 """A .po file containing various units"""
739 UnitClass = pounit
740
742 """Parses the given file or file source string."""
743 if True:
744
745 if hasattr(input, 'name'):
746 self.filename = input.name
747 elif not getattr(self, 'filename', ''):
748 self.filename = ''
749 if isinstance(input, str):
750 input = cStringIO.StringIO(input)
751
752 self.units = []
753 poparser.parse_units(poparser.ParseState(input, pounit), self)
754
755
756
758 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
759
760
761 id_dict = {}
762 uniqueunits = []
763
764
765 markedpos = []
766
767 def addcomment(thepo):
768 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
769 markedpos.append(thepo)
770 for thepo in self.units:
771 id = thepo.getid()
772 if thepo.isheader() and not thepo.getlocations():
773
774 uniqueunits.append(thepo)
775 elif id in id_dict:
776 if duplicatestyle == "merge":
777 if id:
778 id_dict[id].merge(thepo)
779 else:
780 addcomment(thepo)
781 uniqueunits.append(thepo)
782 elif duplicatestyle == "msgctxt":
783 origpo = id_dict[id]
784 if origpo not in markedpos:
785 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
786 markedpos.append(thepo)
787 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
788 uniqueunits.append(thepo)
789 else:
790 if not id:
791 if duplicatestyle == "merge":
792 addcomment(thepo)
793 else:
794 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
795 id_dict[id] = thepo
796 uniqueunits.append(thepo)
797 self.units = uniqueunits
798
800 """Convert to a string. double check that unicode is handled somehow here"""
801 output = self._getoutput()
802 if isinstance(output, unicode):
803 try:
804 return output.encode(getattr(self, "_encoding", "UTF-8"))
805 except UnicodeEncodeError, e:
806 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8")
807 self._encoding = "UTF-8"
808 for unit in self.units:
809 unit._encoding = "UTF-8"
810 return self._getoutput().encode("UTF-8")
811
812 return output
813
815 """convert the units back to lines"""
816 lines = []
817 for unit in self.units:
818 unitsrc = unit._getoutput() + u"\n"
819 lines.append(unitsrc)
820 lines = u"".join(lines).rstrip()
821
822 if lines:
823 lines += u"\n"
824 return lines
825
837
839 """decode any non-unicode strings in lines with self._encoding"""
840 newlines = []
841 for line in lines:
842 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
843 try:
844 line = line.decode(self._encoding)
845 except UnicodeError, e:
846 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
847 newlines.append(line)
848 return newlines
849
851 for unit in self.units:
852 if not (unit.isheader() or unit.isobsolete()):
853 yield unit
854