1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Base classes for storage interfaces.
22
23 @organization: Zuza Software Foundation
24 @copyright: 2006-2009 Zuza Software Foundation
25 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
26 """
27
28 import logging
29 try:
30 import cPickle as pickle
31 except ImportError:
32 import pickle
33 from exceptions import NotImplementedError
34
35 import translate.i18n
36 from translate.misc.multistring import multistring
37 from translate.misc.typecheck import accepts, Self, IsOneOf
38 from translate.storage.placeables import StringElem, general, parse as rich_parse
39 from translate.storage.workflow import StateEnum as states
40
41
43 """Forces derived classes to override method."""
44
45 if type(method.im_self) == type(baseclass):
46
47 actualclass = method.im_self
48 else:
49 actualclass = method.im_class
50 if actualclass != baseclass:
51 raise NotImplementedError(
52 "%s does not reimplement %s as required by %s" % \
53 (actualclass.__name__, method.__name__, baseclass.__name__))
54
55
57
59 self.inner_exc = inner_exc
60
62 return repr(self.inner_exc)
63
64
66 """Base class for translation units.
67
68 Our concept of a I{translation unit} is influenced heavily by XLIFF:
69 U{http://www.oasis-open.org/committees/xliff/documents/xliff-specification.htm}
70
71 As such most of the method- and variable names borrows from XLIFF
72 terminology.
73
74 A translation unit consists of the following:
75 - A I{source} string. This is the original translatable text.
76 - A I{target} string. This is the translation of the I{source}.
77 - Zero or more I{notes} on the unit. Notes would typically be some
78 comments from a translator on the unit, or some comments originating
79 from the source code.
80 - Zero or more I{locations}. Locations indicate where in the original
81 source code this unit came from.
82 - Zero or more I{errors}. Some tools (eg. L{pofilter<filters.pofilter>})
83 can run checks on translations and produce error messages.
84
85 @group Source: *source*
86 @group Target: *target*
87 @group Notes: *note*
88 @group Locations: *location*
89 @group Errors: *error*
90 """
91
92 rich_parsers = []
93 """A list of functions to use for parsing a string into a rich string
94 tree."""
95
96
97 S_OBSOLETE = states.OBSOLETE
98 S_EMPTY = states.EMPTY
99 S_NEEDS_WORK = states.NEEDS_WORK
100 S_REJECTED = states.REJECTED
101 S_NEEDS_REVIEW = states.NEEDS_REVIEW
102 S_UNREVIEWED = states.UNREVIEWED
103 S_FINAL = states.FINAL
104
105 STATE = {
106 S_OBSOLETE: (states.OBSOLETE, states.EMPTY),
107 S_EMPTY: (states.EMPTY, states.NEEDS_WORK),
108 S_NEEDS_WORK: (states.NEEDS_WORK, states.REJECTED),
109 S_REJECTED: (states.REJECTED, states.NEEDS_REVIEW),
110 S_NEEDS_REVIEW: (states.NEEDS_REVIEW, states.UNREVIEWED),
111 S_UNREVIEWED: (states.UNREVIEWED, states.FINAL),
112 S_FINAL: (states.FINAL, states.MAX),
113 }
114 """
115 Default supported states:
116 * obsolete: The unit is not to be used.
117 * empty: The unit has not been translated before.
118 * needs work: Some translation has been done, but is not complete.
119 * rejected: The unit has been reviewed, but was rejected.
120 * needs review: The unit has been translated, but review was requested.
121 * unreviewed: The unit has been translated, but not reviewed.
122 * final: The unit is translated, reviewed and accepted.
123 """
124
126 """Constructs a TranslationUnit containing the given source string."""
127 self.notes = ""
128 self._store = None
129 self.source = source
130 self._target = None
131 self._rich_source = None
132 self._rich_target = None
133 self._state_n = 0
134
136 """Compares two TranslationUnits.
137
138 @type other: L{TranslationUnit}
139 @param other: Another L{TranslationUnit}
140 @rtype: Boolean
141 @return: Returns True if the supplied TranslationUnit equals this unit.
142 """
143 return self.source == other.source and self.target == other.target
144
146 """Converts to a string representation that can be parsed back using
147 L{parsestring()}."""
148
149 store = getattr(self, "_store", None)
150 self._store = None
151 dump = pickle.dumps(self)
152 self._store = store
153 return dump
154
156 """Convert a "rich" string tree to a C{multistring}:
157
158 >>> from translate.storage.placeables.interfaces import X
159 >>> rich = [StringElem(['foo', X(id='xxx', sub=[' ']), 'bar'])]
160 >>> TranslationUnit.rich_to_multistring(rich)
161 multistring(u'foo bar')
162 """
163 return multistring([unicode(elem) for elem in elem_list])
164 rich_to_multistring = classmethod(rich_to_multistring)
165
167 """Convert a multistring to a list of "rich" string trees:
168
169 >>> target = multistring([u'foo', u'bar', u'baz'])
170 >>> TranslationUnit.multistring_to_rich(target)
171 [<StringElem([<StringElem([u'foo'])>])>,
172 <StringElem([<StringElem([u'bar'])>])>,
173 <StringElem([<StringElem([u'baz'])>])>]
174 """
175 if isinstance(mulstring, multistring):
176 return [rich_parse(s, self.rich_parsers) for s in mulstring.strings]
177 return [rich_parse(mulstring, self.rich_parsers)]
178
180 """Sets the source string to the given value."""
181 self._rich_source = None
182 self._source = source
183 source = property(lambda self: self._source, setsource)
184
186 """Sets the target string to the given value."""
187 self._rich_target = None
188 self._target = target
189 target = property(lambda self: self._target, settarget)
190
195
197 if not hasattr(value, '__iter__'):
198 raise ValueError('value must be iterable')
199 if len(value) < 1:
200 raise ValueError('value must have at least one element.')
201 if not isinstance(value[0], StringElem):
202 raise ValueError('value[0] must be of type StringElem.')
203 self._rich_source = list(value)
204 multi = self.rich_to_multistring(value)
205 if self.source != multi:
206 self.source = multi
207 rich_source = property(_get_rich_source, _set_rich_source)
208 """ @see: rich_to_multistring
209 @see: multistring_to_rich"""
210
215
217 if not hasattr(value, '__iter__'):
218 raise ValueError('value must be iterable')
219 if len(value) < 1:
220 raise ValueError('value must have at least one element.')
221 if not isinstance(value[0], StringElem):
222 raise ValueError('value[0] must be of type StringElem.')
223 self._rich_target = list(value)
224 self.target = self.rich_to_multistring(value)
225 rich_target = property(_get_rich_target, _set_rich_target)
226 """ @see: rich_to_multistring
227 @see: multistring_to_rich"""
228
230 """Returns the length of the target string.
231
232 @note: Plural forms might be combined.
233 @rtype: Integer
234 """
235 length = len(self.target or "")
236 strings = getattr(self.target, "strings", [])
237 if strings:
238 length += sum([len(pluralform) for pluralform in strings[1:]])
239 return length
240
242 """A unique identifier for this unit.
243
244 @rtype: string
245 @return: an identifier for this unit that is unique in the store
246
247 Derived classes should override this in a way that guarantees a unique
248 identifier for each unit in the store.
249 """
250 return self.source
251
253 """Sets the unique identified for this unit.
254
255 only implemented if format allows ids independant from other
256 unit properties like source or context"""
257 pass
258
260 """A list of source code locations.
261
262 @note: Shouldn't be implemented if the format doesn't support it.
263 @rtype: List
264 """
265 return []
266
268 """Add one location to the list of locations.
269
270 @note: Shouldn't be implemented if the format doesn't support it.
271 """
272 pass
273
275 """Add a location or a list of locations.
276
277 @note: Most classes shouldn't need to implement this,
278 but should rather implement L{addlocation()}.
279 @warning: This method might be removed in future.
280 """
281 if isinstance(location, list):
282 for item in location:
283 self.addlocation(item)
284 else:
285 self.addlocation(location)
286
287 - def getcontext(self):
288 """Get the message context."""
289 return ""
290
291 - def setcontext(self, context):
292 """Set the message context"""
293 pass
294
296 """Returns all notes about this unit.
297
298 It will probably be freeform text or something reasonable that can be
299 synthesised by the format.
300 It should not include location comments (see L{getlocations()}).
301 """
302 return getattr(self, "notes", "")
303
304 - def addnote(self, text, origin=None, position="append"):
305 """Adds a note (comment).
306
307 @type text: string
308 @param text: Usually just a sentence or two.
309 @type origin: string
310 @param origin: Specifies who/where the comment comes from.
311 Origin can be one of the following text strings:
312 - 'translator'
313 - 'developer', 'programmer', 'source code' (synonyms)
314 """
315 if position == "append" and getattr(self, "notes", None):
316 self.notes += '\n' + text
317 else:
318 self.notes = text
319
321 """Remove all the translator's notes."""
322 self.notes = u''
323
324 - def adderror(self, errorname, errortext):
325 """Adds an error message to this unit.
326
327 @type errorname: string
328 @param errorname: A single word to id the error.
329 @type errortext: string
330 @param errortext: The text describing the error.
331 """
332 pass
333
335 """Get all error messages.
336
337 @rtype: Dictionary
338 """
339 return {}
340
342 """Marks the unit to indicate whether it needs review.
343
344 @keyword needsreview: Defaults to True.
345 @keyword explanation: Adds an optional explanation as a note.
346 """
347 pass
348
350 """Indicates whether this unit is translated.
351
352 This should be used rather than deducing it from .target,
353 to ensure that other classes can implement more functionality
354 (as XLIFF does).
355 """
356 return bool(self.target) and not self.isfuzzy()
357
359 """Indicates whether this unit can be translated.
360
361 This should be used to distinguish real units for translation from
362 header, obsolete, binary or other blank units.
363 """
364 return bool(self.source)
365
367 """Indicates whether this unit is fuzzy."""
368 return False
369
371 """Marks the unit as fuzzy or not."""
372 pass
373
375 """indicate whether a unit is obsolete"""
376 return False
377
379 """Make a unit obsolete"""
380 pass
381
383 """Indicates whether this unit is a header."""
384 return False
385
387 """Indicates whether this unit needs review."""
388 return False
389
391 """Used to see if this unit has no source or target string.
392
393 @note: This is probably used more to find translatable units,
394 and we might want to move in that direction rather and get rid of this.
395 """
396 return not (self.source or self.target)
397
399 """Tells whether or not this specific unit has plural strings."""
400
401 return False
402
405
408
409 - def merge(self, otherunit, overwrite=False, comments=True,
410 authoritative=False):
414
416 """Iterator that only returns this unit."""
417 yield self
418
420 """This unit in a list."""
421 return [self]
422
424 """Build a native unit from a foreign unit, preserving as much
425 information as possible."""
426 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy):
427 return unit.copy()
428 newunit = cls(unit.source)
429 newunit.target = unit.target
430 newunit.markfuzzy(unit.isfuzzy())
431 locations = unit.getlocations()
432 if locations:
433 newunit.addlocations(locations)
434 notes = unit.getnotes()
435 if notes:
436 newunit.addnote(notes)
437 return newunit
438 buildfromunit = classmethod(buildfromunit)
439
440 xid = property(lambda self: None, lambda self, value: None)
441 rid = property(lambda self: None, lambda self, value: None)
442
444 if n is None:
445 n = self.get_state_n()
446 for state_id, state_range in self.STATE.iteritems():
447 if state_range[0] <= n < state_range[1]:
448 return state_id
449 raise ValueError('No state containing value %s' % (n))
450
453
455 self._state_n = value
456
458 """Empty method that should be overridden in sub-classes to infer the
459 current state(_n) of the unit from its current state."""
460 pass
461
462
464 """Base class for stores for multiple translation units of type
465 UnitClass."""
466
467 UnitClass = TranslationUnit
468 """The class of units that will be instantiated and used by this class"""
469 Name = "Base translation store"
470 """The human usable name of this store type"""
471 Mimetypes = None
472 """A list of MIME types associated with this store type"""
473 Extensions = None
474 """A list of file extentions associated with this store type"""
475 _binary = False
476 """Indicates whether a file should be accessed as a binary file."""
477 suggestions_in_format = False
478 """Indicates if format can store suggestions and alternative translation
479 for a unit"""
480
489
491 """Gets the source language for this store"""
492 return self.sourcelanguage
493
497
499 """Gets the target language for this store"""
500 return self.targetlanguage
501
505
507 """Gets the project type for this store"""
508 return getattr(self, '_project_style', None)
509
511 """Sets the project type for this store"""
512 self._project_style = project_style
513
515 """Iterator over all the units in this store."""
516 for unit in self.units:
517 yield unit
518
520 """Return a list of all units in this store."""
521 return [unit for unit in self.unit_iter()]
522
524 """Appends the given unit to the object's list of units.
525
526 This method should always be used rather than trying to modify the
527 list manually.
528
529 @type unit: L{TranslationUnit}
530 @param unit: The unit that will be added.
531 """
532 unit._store = self
533 self.units.append(unit)
534
536 """Adds and returns a new unit with the given source string.
537
538 @rtype: L{TranslationUnit}
539 """
540 unit = self.UnitClass(source)
541 self.addunit(unit)
542 return unit
543
545 """find unit with matching id by checking id_index"""
546 self.require_index()
547 return self.id_index.get(id, None)
548
550 """Finds the unit with the given source string.
551
552 @rtype: L{TranslationUnit} or None
553 """
554 if len(getattr(self, "sourceindex", [])):
555 if source in self.sourceindex:
556 return self.sourceindex[source][0]
557 else:
558 for unit in self.units:
559 if unit.source == source:
560 return unit
561 return None
562
564 """Finds the units with the given source string.
565
566 @rtype: L{TranslationUnit} or None
567 """
568 if len(getattr(self, "sourceindex", [])):
569 if source in self.sourceindex:
570 return self.sourceindex[source]
571 else:
572
573
574 result = []
575 for unit in self.units:
576 if unit.source == source:
577 result.append(unit)
578 return result
579 return None
580
582 """Returns the translated string for a given source string.
583
584 @rtype: String or None
585 """
586 unit = self.findunit(source)
587 if unit and unit.target:
588 return unit.target
589 else:
590 return None
591
593 """Remove a unit from source and locaton indexes"""
594
595 def remove_unit(source):
596 if source in self.sourceindex:
597 try:
598 self.sourceindex[source].remove(unit)
599 if len(self.sourceindex[source]) == 0:
600 del(self.sourceindex[source])
601 except ValueError:
602 pass
603
604 if unit.hasplural():
605 for source in unit.source.strings:
606 remove_unit(source)
607 else:
608 remove_unit(unit.source)
609
610 for location in unit.getlocations():
611 if location in self.locationindex \
612 and self.locationindex[location] is not None \
613 and self.locationindex[location] == unit:
614 del(self.locationindex[location])
615
617 """Add a unit to source and location idexes"""
618 self.id_index[unit.getid()] = unit
619
620 def insert_unit(source):
621 if not source in self.sourceindex:
622 self.sourceindex[source] = [unit]
623 else:
624 self.sourceindex[source].append(unit)
625
626 if unit.hasplural():
627 for source in unit.source.strings:
628 insert_unit(source)
629 else:
630 insert_unit(unit.source)
631
632 for location in unit.getlocations():
633 if location in self.locationindex:
634
635
636 self.locationindex[location] = None
637 else:
638 self.locationindex[location] = unit
639
641 """Indexes the items in this store. At least .sourceindex should be
642 usefull."""
643 self.locationindex = {}
644 self.sourceindex = {}
645 self.id_index = {}
646 for index, unit in enumerate(self.units):
647 unit.index = index
648 if unit.istranslatable():
649 self.add_unit_to_index(unit)
650
652 """make sure source index exists"""
653 if not hasattr(self, "id_index"):
654 self.makeindex()
655
656 - def getids(self, filename=None):
657 """return a list of unit ids"""
658 self.require_index()
659 return self.id_index.keys()
660
662 odict = self.__dict__.copy()
663 odict['fileobj'] = None
664 return odict
665
667 self.__dict__.update(dict)
668 if getattr(self, "filename", False):
669 self.fileobj = open(self.filename)
670
672 """Converts to a string representation that can be parsed back using
673 L{parsestring()}."""
674
675 fileobj = getattr(self, "fileobj", None)
676 self.fileobj = None
677 dump = pickle.dumps(self)
678 self.fileobj = fileobj
679 return dump
680
682 """Returns True if the object doesn't contain any translation units."""
683 if len(self.units) == 0:
684 return True
685 for unit in self.units:
686 if unit.istranslatable():
687 return False
688 return True
689
691 """Tries to work out what the name of the filesystem file is and
692 assigns it to .filename."""
693 fileobj = getattr(self, "fileobj", None)
694 if fileobj:
695 filename = getattr(fileobj, "name",
696 getattr(fileobj, "filename", None))
697 if filename:
698 self.filename = filename
699
701 """Converts the string representation back to an object."""
702 newstore = cls()
703 if storestring:
704 newstore.parse(storestring)
705 return newstore
706 parsestring = classmethod(parsestring)
707
709 if not default_encodings:
710 default_encodings = ['utf-8']
711 try:
712 import chardet
713 detected_encoding = chardet.detect(text)
714 if detected_encoding['confidence'] < 0.48:
715 detected_encoding = None
716 except ImportError:
717 detected_encoding = None
718
719 encodings = []
720 if self.encoding == 'auto':
721 if detected_encoding and detected_encoding['encoding'] not in encodings:
722 encodings.append(detected_encoding['encoding'])
723 for encoding in default_encodings:
724 if encoding not in encodings:
725 encodings.append(encoding)
726 else:
727 encodings.append(self.encoding)
728 if detected_encoding and detected_encoding['encoding'] != self.encoding:
729 logging.warn("trying to parse % with encoding: %s but detected encoding is %s",
730 self.filename, self.encoding, detected_encoding['encoding'])
731 encodings.append(self.encoding)
732
733 for encoding in encodings:
734 try:
735 r_text = unicode(text, encoding)
736 r_encoding = encoding
737 break
738 except UnicodeDecodeError:
739 r_text = None
740 r_encoding = None
741 if r_encoding == 'ascii':
742 r_encoding = 'utf-8'
743 return r_text, r_encoding
744
746 """parser to process the given source string"""
747 self.units = pickle.loads(data).units
748
750 """Writes the string representation to the given file (or filename)."""
751 if isinstance(storefile, basestring):
752 mode = 'w'
753 if self._binary:
754 mode = 'wb'
755 storefile = open(storefile, mode)
756 self.fileobj = storefile
757 self._assignname()
758 storestring = str(self)
759 storefile.write(storestring)
760 storefile.close()
761
763 """Save to the file that data was originally read from, if
764 available."""
765 fileobj = getattr(self, "fileobj", None)
766 mode = 'w'
767 if self._binary:
768 mode = 'wb'
769 if not fileobj:
770 filename = getattr(self, "filename", None)
771 if filename:
772 fileobj = file(filename, mode)
773 else:
774 fileobj.close()
775 filename = getattr(fileobj, "name",
776 getattr(fileobj, "filename", None))
777 if not filename:
778 raise ValueError("No file or filename to save to")
779 fileobj = fileobj.__class__(filename, mode)
780 self.savefile(fileobj)
781
783 """Reads the given file (or opens the given filename) and parses back
784 to an object."""
785 mode = 'r'
786 if cls._binary:
787 mode = 'rb'
788 if isinstance(storefile, basestring):
789 storefile = open(storefile, mode)
790 mode = getattr(storefile, "mode", mode)
791
792 if mode == 1 or "r" in mode:
793 storestring = storefile.read()
794 storefile.close()
795 else:
796 storestring = ""
797 newstore = cls.parsestring(storestring)
798 newstore.fileobj = storefile
799 newstore._assignname()
800 return newstore
801 parsefile = classmethod(parsefile)
802