Package translate :: Package storage :: Package xml_extract :: Module unit_tree
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xml_extract.unit_tree

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008-2010 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  from lxml import etree 
 24   
 25  from translate.storage import base, xliff 
 26  from translate.misc.typecheck import accepts, Self, IsOneOf 
 27  from translate.misc.typecheck.typeclasses import Number 
28 29 30 -class XPathTree(object):
31 32 @accepts(Self(), IsOneOf(base.TranslationUnit, type(None)))
33 - def __init__(self, unit=None):
34 self.unit = unit 35 self.children = {}
36
37 - def __eq__(self, other):
38 return isinstance(other, XPathTree) and \ 39 self.unit == other.unit and \ 40 self.children == other.children
41
42 43 @accepts(unicode) 44 -def _split_xpath_component(xpath_component):
45 """Split an xpath component into a tag-index tuple. 46 47 >>> split_xpath_component('{urn:oasis:names:tc:opendocument:xmlns:office:1.0}document-content[0]') 48 ('{urn:oasis:names:tc:opendocument:xmlns:office:1.0}document-content', 0). 49 """ 50 lbrac = xpath_component.rfind(u'[') 51 rbrac = xpath_component.rfind(u']') 52 tag = xpath_component[:lbrac] 53 index = int(xpath_component[lbrac+1:rbrac]) 54 return tag, index
55
56 57 @accepts(unicode) 58 -def _split_xpath(xpath):
59 """Split an 'xpath' string separated by / into a reversed list of its components. Thus: 60 61 >>> split_xpath('document-content[1]/body[2]/text[3]/p[4]') 62 [('p', 4), ('text', 3), ('body', 2), ('document-content', 1)] 63 64 The list is reversed so that it can be used as a stack, where the top of the stack is 65 the first component. 66 """ 67 if xliff.ID_SEPARATOR in xpath: 68 xpath = xpath.split(xliff.ID_SEPARATOR)[-1] 69 components = xpath.split(u'/') 70 components = [_split_xpath_component(component) for component in components] 71 return list(reversed(components))
72 73 74 @accepts(IsOneOf(etree._Element, XPathTree), [(unicode, Number)], base.TranslationUnit)
75 -def _add_unit_to_tree(node, xpath_components, unit):
76 """Walk down the tree rooted a node, and follow nodes which correspond to the 77 components of xpath_components. When reaching the end of xpath_components, 78 set the reference of the node to unit. 79 80 With reference to the tree diagram in build_unit_tree:: 81 82 add_unit_to_tree(node, [('p', 2), ('text', 3), ('body', 2), ('document-content', 1)], unit) 83 84 would begin by popping ('document-content', 1) from the path and following the node marked 85 ('document-content', 1) in the tree. Likewise, will descend down the nodes marked ('body', 2) 86 and ('text', 3). 87 88 Since the node marked ('text', 3) has no child node marked ('p', 2), this node is created. Then 89 the add_unit_to_tree descends down this node. When this happens, there are no xpath components 90 left to pop. Thus, node.unit = unit is executed. 91 """ 92 if len(xpath_components) > 0: 93 component = xpath_components.pop() # pop the stack; is a component such as ('p', 4) 94 # if the current node does not have any children indexed by 95 # the current component, add such a child 96 if component not in node.children: 97 node.children[component] = XPathTree() 98 _add_unit_to_tree(node.children[component], xpath_components, unit) 99 else: 100 node.unit = unit
101 102 103 @accepts(base.TranslationStore)
104 -def build_unit_tree(store):
105 """Enumerate a translation store and build a tree with XPath components as nodes 106 and where a node contains a unit if a path from the root of the tree to the node 107 containing the unit, is equal to the XPath of the unit. 108 109 The tree looks something like this:: 110 root 111 `- ('document-content', 1) 112 `- ('body', 2) 113 |- ('text', 1) 114 | `- ('p', 1) 115 | `- <reference to a unit> 116 |- ('text', 2) 117 | `- ('p', 1) 118 | `- <reference to a unit> 119 `- ('text', 3) 120 `- ('p', 1) 121 `- <reference to a unit> 122 """ 123 tree = XPathTree() 124 for unit in store.units: 125 if not unit.isfuzzy(): 126 location = _split_xpath(unit.getlocations()[0]) 127 _add_unit_to_tree(tree, location, unit) 128 return tree
129