View Javadoc

1   /*
2    $Id: XmlParser.java,v 1.5 2006/01/08 16:30:06 dierk Exp $
3   
4    Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved.
5   
6    Redistribution and use of this software and associated documentation
7    ("Software"), with or without modification, are permitted provided
8    that the following conditions are met:
9   
10   1. Redistributions of source code must retain copyright
11      statements and notices.  Redistributions must also contain a
12      copy of this document.
13  
14   2. Redistributions in binary form must reproduce the
15      above copyright notice, this list of conditions and the
16      following disclaimer in the documentation and/or other
17      materials provided with the distribution.
18  
19   3. The name "groovy" must not be used to endorse or promote
20      products derived from this Software without prior written
21      permission of The Codehaus.  For written permission,
22      please contact info@codehaus.org.
23  
24   4. Products derived from this Software may not be called "groovy"
25      nor may "groovy" appear in their names without prior written
26      permission of The Codehaus. "groovy" is a registered
27      trademark of The Codehaus.
28  
29   5. Due credit should be given to The Codehaus -
30      http://groovy.codehaus.org/
31  
32   THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS
33   ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
34   NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35   FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
36   THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43   OF THE POSSIBILITY OF SUCH DAMAGE.
44  
45   */
46  package groovy.util;
47  
48  import groovy.xml.QName;
49  
50  import java.io.File;
51  import java.io.FileInputStream;
52  import java.io.IOException;
53  import java.io.InputStream;
54  import java.io.Reader;
55  import java.io.StringReader;
56  import java.security.AccessController;
57  import java.security.PrivilegedActionException;
58  import java.security.PrivilegedExceptionAction;
59  import java.util.ArrayList;
60  import java.util.HashMap;
61  import java.util.List;
62  import java.util.Map;
63  
64  import javax.xml.parsers.ParserConfigurationException;
65  import javax.xml.parsers.SAXParser;
66  import javax.xml.parsers.SAXParserFactory;
67  
68  import org.xml.sax.*;
69  
70  /***
71   * A helper class for parsing XML into a tree of Node instances for 
72   * a simple way of processing XML. This parser does not preserve the
73   * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
74   * This parser ignores comments and processing instructions and converts the
75   * XML into a Node for each element in the XML with attributes
76   * and child Nodes and Strings. This simple model is sufficient for
77   * most simple use cases of processing XML.
78   * 
79   * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
80   * @version $Revision: 1.5 $
81   */
82  public class XmlParser implements ContentHandler {
83  
84      private StringBuffer bodyText = new StringBuffer();
85      private List stack = new ArrayList();
86      private Locator locator;
87      private XMLReader reader;
88      private Node parent;
89      private boolean trimWhitespace = true;
90  
91      public XmlParser() throws ParserConfigurationException, SAXException {
92          this(false, true);
93      }
94  
95      public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
96          SAXParserFactory factory = null;
97          try {
98              factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
99                  public Object run() throws ParserConfigurationException {
100                     return SAXParserFactory.newInstance();
101                 }
102             });
103         } catch (PrivilegedActionException pae) {
104             Exception e = pae.getException();
105             if (e instanceof ParserConfigurationException) {
106                 throw (ParserConfigurationException) e;
107             } else {
108                 throw new RuntimeException(e);
109             }
110         }
111         factory.setNamespaceAware(namespaceAware);
112         factory.setValidating(validating);
113 
114         SAXParser parser = factory.newSAXParser();
115         reader = parser.getXMLReader();
116     }
117 
118     public XmlParser(XMLReader reader) {
119         this.reader = reader;
120     }
121 
122     public XmlParser(SAXParser parser) throws SAXException {
123         reader = parser.getXMLReader();
124     }
125 
126 
127     /***
128      * Parses the content of the given file as XML turning it into a tree
129      * of Nodes
130      */
131     public Node parse(File file) throws IOException, SAXException {
132 
133         InputSource input = new InputSource(new FileInputStream(file));
134         input.setSystemId("file://" + file.getAbsolutePath());
135         getXMLReader().parse(input);
136         return parent;
137 
138     }
139 
140     /***
141      * Parse the content of the specified input source into a tree of Nodes.
142      */
143     public Node parse(InputSource input) throws IOException, SAXException {
144         getXMLReader().parse(input);
145         return parent;
146     }
147 
148     /***
149      * Parse the content of the specified input stream into a tree of Nodes.
150      * Note that using this method will not provide the parser with any URI
151      * for which to find DTDs etc
152      */
153     public Node parse(InputStream input) throws IOException, SAXException {
154         InputSource is = new InputSource(input);
155         getXMLReader().parse(is);
156         return parent;
157     }
158 
159     /***
160      * Parse the content of the specified reader into a tree of Nodes.
161      * Note that using this method will not provide the parser with any URI
162      * for which to find DTDs etc
163      */
164     public Node parse(Reader in) throws IOException, SAXException {
165         InputSource is = new InputSource(in);
166         getXMLReader().parse(is);
167         return parent;
168     }
169 
170     /***
171      * Parse the content of the specified URI into a tree of Nodes
172      */
173     public Node parse(String uri) throws IOException, SAXException {
174         InputSource is = new InputSource(uri);
175         getXMLReader().parse(is);
176         return parent;
177     }
178 
179     /***
180      * A helper method to parse the given text as XML
181      * 
182      * @param text
183      * @return
184      */
185     public Node parseText(String text) throws IOException, SAXException {
186         return parse(new StringReader(text));
187     }
188     // Delegated XMLReader methods
189     //------------------------------------------------------------------------
190 
191     /* (non-Javadoc)
192      * @see org.xml.sax.XMLReader#getDTDHandler()
193      */
194     public DTDHandler getDTDHandler() {
195         return this.reader.getDTDHandler();
196     }
197 
198     /* (non-Javadoc)
199      * @see org.xml.sax.XMLReader#getEntityResolver()
200      */
201     public EntityResolver getEntityResolver() {
202         return this.reader.getEntityResolver();
203     }
204 
205     /* (non-Javadoc)
206      * @see org.xml.sax.XMLReader#getErrorHandler()
207      */
208     public ErrorHandler getErrorHandler() {
209         return this.reader.getErrorHandler();
210     }
211 
212     /* (non-Javadoc)
213      * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
214      */
215     public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
216         return this.reader.getFeature(uri);
217     }
218 
219     /* (non-Javadoc)
220      * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
221      */
222     public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
223         return this.reader.getProperty(uri);
224     }
225 
226     /* (non-Javadoc)
227      * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
228      */
229     public void setDTDHandler(final DTDHandler dtdHandler) {
230         this.reader.setDTDHandler(dtdHandler);
231     }
232 
233     /* (non-Javadoc)
234      * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
235      */
236     public void setEntityResolver(final EntityResolver entityResolver) {
237         this.reader.setEntityResolver(entityResolver);
238     }
239 
240     /* (non-Javadoc)
241      * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
242      */
243     public void setErrorHandler(final ErrorHandler errorHandler) {
244         this.reader.setErrorHandler(errorHandler);
245     }
246 
247     /* (non-Javadoc)
248      * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
249      */
250     public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
251         this.reader.setFeature(uri, value);
252     }
253 
254     /* (non-Javadoc)
255      * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
256      */
257     public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
258          this.reader.setProperty(uri, value);
259     }
260 
261     // ContentHandler interface
262     //-------------------------------------------------------------------------                    
263     public void startDocument() throws SAXException {
264         parent = null;
265     }
266 
267     public void endDocument() throws SAXException {
268         stack.clear();
269     }
270 
271     public void startElement(String namespaceURI, String localName, String qName, Attributes list)
272         throws SAXException {
273         addTextToNode();
274 
275         Object name = getElementName(namespaceURI, localName, qName);
276 
277         int size = list.getLength();
278         Map attributes = new HashMap(size);
279         for (int i = 0; i < size; i++) {
280             Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
281             String value = list.getValue(i);
282             attributes.put(attributeName, value);
283         }
284         parent = new Node(parent, name, attributes, new ArrayList());
285         stack.add(parent);
286     }
287 
288     public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
289         addTextToNode();
290 
291         if (!stack.isEmpty()) {
292             stack.remove(stack.size() - 1);
293             if (!stack.isEmpty()) {
294                 parent = (Node) stack.get(stack.size() - 1);
295             }
296         }
297     }
298 
299     public void characters(char buffer[], int start, int length) throws SAXException {
300         bodyText.append(buffer, start, length);
301     }
302 
303     public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
304     }
305 
306     public void endPrefixMapping(String prefix) throws SAXException {
307     }
308 
309     public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
310     }
311 
312     public void processingInstruction(String target, String data) throws SAXException {
313     }
314 
315     public Locator getDocumentLocator() {
316         return locator;
317     }
318 
319     public void setDocumentLocator(Locator locator) {
320         this.locator = locator;
321     }
322 
323     public void skippedEntity(String name) throws SAXException {
324     }
325 
326     // Implementation methods
327     //-------------------------------------------------------------------------           
328     protected XMLReader getXMLReader() {
329         reader.setContentHandler(this);
330         return reader;
331     }
332 
333     protected void addTextToNode() {
334         String text = bodyText.toString();
335         if (trimWhitespace) {
336             text = text.trim();
337         }
338         if (text.length() > 0) {
339             parent.children().add(text);
340         }
341         bodyText = new StringBuffer();
342     }
343 
344     protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
345         String name = localName;
346         if ((name == null) || (name.length() < 1)) {
347             name = qName;
348         }
349         if (namespaceURI == null || namespaceURI.length() <= 0) {
350             return name;
351         }
352         else {
353             return new QName(namespaceURI, name, qName);
354         }
355     }
356 }