1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This class implements the functionality for handling plain text files, or
23 similar wiki type files.
24
25 Supported formats are
26 - Plain text
27 - dokuwiki
28 - MediaWiki
29 """
30
31 import re
32
33 from translate.storage import base
34
35 dokuwiki = []
36 dokuwiki.append(("Dokuwiki heading", re.compile(r"( ?={2,6}[\s]*)(.+)"), re.compile("([\s]*={2,6}[\s]*)$")))
37 dokuwiki.append(("Dokuwiki bullet", re.compile(r"([\s]{2,}\*[\s]*)(.+)"), re.compile("[\s]+$")))
38 dokuwiki.append(("Dokuwiki numbered item", re.compile(r"([\s]{2,}-[\s]*)(.+)"), re.compile("[\s]+$")))
39
40 mediawiki = []
41 mediawiki.append(("MediaWiki heading", re.compile(r"(={1,5}[\s]*)(.+)"), re.compile("([\s]*={1,5}[\s]*)$")))
42 mediawiki.append(("MediaWiki bullet", re.compile(r"(\*+[\s]*)(.+)"), re.compile("[\s]+$")))
43 mediawiki.append(("MediaWiki numbered item", re.compile(r"(#+[\s]*)(.+)"), re.compile("[\s]+$")))
44
45 flavours = {
46 "dokuwiki": dokuwiki,
47 "mediawiki": mediawiki,
48 None: [],
49 "plain": [],
50 }
51
52
53 -class TxtUnit(base.TranslationUnit):
54 """This class represents a block of text from a text file"""
55
56 - def __init__(self, source="", encoding="utf-8"):
64
66 """Convert a txt unit to a string"""
67 string = u"".join([self.pretext, self.source, self.posttext])
68 if isinstance(string, unicode):
69 return string.encode(self.encoding)
70 return string
71
72
79
81 """gets the unquoted source string"""
82 return self._source
83 source = property(getsource, setsource)
84
89
91 """gets the unquoted target string"""
92 return self.source
93 target = property(gettarget, settarget)
94
96 self.location.append(location)
97
100
101
102 -class TxtFile(base.TranslationStore):
103 """This class represents a text file, made up of txtunits"""
104 UnitClass = TxtUnit
105
106 - def __init__(self, inputfile=None, flavour=None, encoding="utf-8"):
114
116 """Read in text lines and create txtunits from the blocks of text"""
117 block = []
118 startline = 0
119 pretext = ""
120 posttext = ""
121 if not isinstance(lines, list):
122 lines = lines.split("\n")
123 for linenum in range(len(lines)):
124 line = lines[linenum].rstrip("\r\n")
125 for rule, prere, postre in self.flavour:
126 match = prere.match(line)
127 if match:
128 pretext, source = match.groups()
129 postmatch = postre.search(source)
130 if postmatch:
131 posttext = postmatch.group()
132 source = source[:postmatch.start()]
133 block.append(source)
134 isbreak = True
135 break
136 else:
137 isbreak = not line.strip()
138 if isbreak and block:
139 unit = self.addsourceunit("\n".join(block))
140 unit.addlocation("%s:%d" % (self.filename, startline + 1))
141 unit.pretext = pretext
142 unit.posttext = posttext
143 pretext = ""
144 posttext = ""
145 block = []
146 elif not isbreak:
147 if not block:
148 startline = linenum
149 block.append(line)
150 if block:
151 unit = self.addsourceunit("\n".join(block))
152 unit.addlocation("%s:%d" % (self.filename, startline + 1))
153
159
161 """Convert the units back to blocks"""
162 blocks = [str(unit) for unit in self.units]
163 string = "\n\n".join(blocks)
164 return string
165