View Javadoc

1   /*
2    $Id: XmlParser.java,v 1.4 2004/04/15 17:35:14 jstrachan Exp $
3   
4    Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved.
5   
6    Redistribution and use of this software and associated documentation
7    ("Software"), with or without modification, are permitted provided
8    that the following conditions are met:
9   
10   1. Redistributions of source code must retain copyright
11      statements and notices.  Redistributions must also contain a
12      copy of this document.
13  
14   2. Redistributions in binary form must reproduce the
15      above copyright notice, this list of conditions and the
16      following disclaimer in the documentation and/or other
17      materials provided with the distribution.
18  
19   3. The name "groovy" must not be used to endorse or promote
20      products derived from this Software without prior written
21      permission of The Codehaus.  For written permission,
22      please contact info@codehaus.org.
23  
24   4. Products derived from this Software may not be called "groovy"
25      nor may "groovy" appear in their names without prior written
26      permission of The Codehaus. "groovy" is a registered
27      trademark of The Codehaus.
28  
29   5. Due credit should be given to The Codehaus -
30      http://groovy.codehaus.org/
31  
32   THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS
33   ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
34   NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35   FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
36   THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43   OF THE POSSIBILITY OF SUCH DAMAGE.
44  
45   */
46  package groovy.util;
47  
48  import groovy.xml.QName;
49  
50  import java.io.File;
51  import java.io.FileInputStream;
52  import java.io.IOException;
53  import java.io.InputStream;
54  import java.io.Reader;
55  import java.io.StringReader;
56  import java.security.AccessController;
57  import java.security.PrivilegedActionException;
58  import java.security.PrivilegedExceptionAction;
59  import java.util.ArrayList;
60  import java.util.HashMap;
61  import java.util.List;
62  import java.util.Map;
63  
64  import javax.xml.parsers.ParserConfigurationException;
65  import javax.xml.parsers.SAXParser;
66  import javax.xml.parsers.SAXParserFactory;
67  
68  import org.xml.sax.Attributes;
69  import org.xml.sax.ContentHandler;
70  import org.xml.sax.InputSource;
71  import org.xml.sax.Locator;
72  import org.xml.sax.SAXException;
73  import org.xml.sax.XMLReader;
74  
75  /***
76   * A helper class for parsing XML into a tree of Node instances for 
77   * a simple way of processing XML. This parser does not preserve the
78   * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
79   * This parser ignores comments and processing instructions and converts the
80   * XML into a Node for each element in the XML with attributes
81   * and child Nodes and Strings. This simple model is sufficient for
82   * most simple use cases of processing XML.
83   * 
84   * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
85   * @version $Revision: 1.4 $
86   */
87  public class XmlParser implements ContentHandler {
88  
89      private StringBuffer bodyText = new StringBuffer();
90      private List stack = new ArrayList();
91      private Locator locator;
92      private XMLReader reader;
93      private Node parent;
94      private boolean trimWhitespace = true;
95  
96      public XmlParser() throws ParserConfigurationException, SAXException {
97          this(false, true);
98      }
99  
100     public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
101         SAXParserFactory factory = null;
102     	try {
103 			factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
104 				public Object run() throws ParserConfigurationException {
105 					return SAXParserFactory.newInstance();
106 				}
107 			});
108     	} catch (PrivilegedActionException pae) {
109     		Exception e = pae.getException();
110     		if (e instanceof ParserConfigurationException) {
111     			throw (ParserConfigurationException) e;
112     		} else {
113     			throw new RuntimeException(e);
114     		}
115     	}
116         factory.setNamespaceAware(namespaceAware);
117         factory.setValidating(validating);
118 
119         SAXParser parser = factory.newSAXParser();
120         reader = parser.getXMLReader();
121     }
122 
123     public XmlParser(XMLReader reader) {
124         this.reader = reader;
125     }
126 
127     public XmlParser(SAXParser parser) throws SAXException {
128         reader = parser.getXMLReader();
129     }
130 
131     
132     /***
133      * Parses the content of the given file as XML turning it into a tree
134      * of Nodes
135      */
136     public Node parse(File file) throws IOException, SAXException {
137 
138         InputSource input = new InputSource(new FileInputStream(file));
139         input.setSystemId("file://" + file.getAbsolutePath());
140         getXMLReader().parse(input);
141         return parent;
142 
143     }
144 
145     /***
146      * Parse the content of the specified input source into a tree of Nodes.
147      */
148     public Node parse(InputSource input) throws IOException, SAXException {
149         getXMLReader().parse(input);
150         return parent;
151     }
152 
153     /***
154      * Parse the content of the specified input stream into a tree of Nodes.
155      * Note that using this method will not provide the parser with any URI
156      * for which to find DTDs etc
157      */
158     public Node parse(InputStream input) throws IOException, SAXException {
159         InputSource is = new InputSource(input);
160         getXMLReader().parse(is);
161         return parent;
162     }
163 
164     /***
165      * Parse the content of the specified reader into a tree of Nodes.
166      * Note that using this method will not provide the parser with any URI
167      * for which to find DTDs etc
168      */
169     public Node parse(Reader in) throws IOException, SAXException {
170         InputSource is = new InputSource(in);
171         getXMLReader().parse(is);
172         return parent;
173     }
174 
175     /***
176      * Parse the content of the specified URI into a tree of Nodes
177      */
178     public Node parse(String uri) throws IOException, SAXException {
179         InputSource is = new InputSource(uri);
180         getXMLReader().parse(is);
181         return parent;
182     }
183 
184     /***
185      * A helper method to parse the given text as XML
186      * 
187      * @param text
188      * @return
189      */
190     public Node parseText(String text) throws IOException, SAXException {
191         return parse(new StringReader(text));
192     }
193     
194 
195     // ContentHandler interface
196     //-------------------------------------------------------------------------                    
197     public void startDocument() throws SAXException {
198         parent = null;
199     }
200 
201     public void endDocument() throws SAXException {
202         stack.clear();
203     }
204 
205     public void startElement(String namespaceURI, String localName, String qName, Attributes list)
206         throws SAXException {
207         addTextToNode();
208         
209         Object name = getElementName(namespaceURI, localName, qName);
210 
211         int size = list.getLength();
212         Map attributes = new HashMap(size);
213         for (int i = 0; i < size; i++) {
214             Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
215             String value = list.getValue(i);
216             attributes.put(attributeName, value);
217         }
218         parent = new Node(parent, name, attributes, new ArrayList());
219         stack.add(parent);
220     }
221 
222     public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
223         addTextToNode();
224         
225         if (!stack.isEmpty()) {
226             stack.remove(stack.size() - 1);
227             if (!stack.isEmpty()) {
228                 parent = (Node) stack.get(stack.size() - 1);
229             }
230         }
231     }
232 
233     public void characters(char buffer[], int start, int length) throws SAXException {
234         bodyText.append(buffer, start, length);
235     }
236 
237     public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
238     }
239 
240     public void endPrefixMapping(String prefix) throws SAXException {
241     }
242 
243     public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
244     }
245 
246     public void processingInstruction(String target, String data) throws SAXException {
247     }
248 
249     public Locator getDocumentLocator() {
250         return locator;
251     }
252 
253     public void setDocumentLocator(Locator locator) {
254         this.locator = locator;
255     }
256 
257     public void skippedEntity(String name) throws SAXException {
258     }
259 
260     // Implementation methods
261     //-------------------------------------------------------------------------           
262     protected XMLReader getXMLReader() {
263         reader.setContentHandler(this);
264         return reader;
265     }
266 
267     protected void addTextToNode() {
268         String text = bodyText.toString();
269         if (trimWhitespace) {
270             text = text.trim();
271         }
272         if (text.length() > 0) {
273             parent.children().add(text);
274         }
275         bodyText = new StringBuffer();
276     }
277 
278     protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
279         String name = localName;
280         if ((name == null) || (name.length() < 1)) {
281             name = qName;
282         }
283         if (namespaceURI == null || namespaceURI.length() <= 0) {
284             return name;
285         }
286         else {
287             return new QName(namespaceURI, name, qName);
288         }
289     }
290 }