View Javadoc

1   /*
2    * Copyright 2005 John G. Wilson
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  
18  package groovy.util;
19  
20  import groovy.util.slurpersupport.GPathResult;
21  import groovy.util.slurpersupport.Node;
22  import groovy.util.slurpersupport.NodeChild;
23  
24  import java.io.File;
25  import java.io.FileInputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.Reader;
29  import java.io.StringReader;
30  import java.net.URL;
31  import java.security.AccessController;
32  import java.security.PrivilegedActionException;
33  import java.security.PrivilegedExceptionAction;
34  import java.util.HashMap;
35  import java.util.Hashtable;
36  import java.util.Map;
37  import java.util.Stack;
38  
39  import javax.xml.parsers.ParserConfigurationException;
40  import javax.xml.parsers.SAXParser;
41  import javax.xml.parsers.SAXParserFactory;
42  
43  import org.xml.sax.Attributes;
44  import org.xml.sax.DTDHandler;
45  import org.xml.sax.EntityResolver;
46  import org.xml.sax.ErrorHandler;
47  import org.xml.sax.InputSource;
48  import org.xml.sax.SAXException;
49  import org.xml.sax.SAXNotRecognizedException;
50  import org.xml.sax.SAXNotSupportedException;
51  import org.xml.sax.XMLReader;
52  import org.xml.sax.helpers.DefaultHandler;
53  
54  /***
55   * @author John Wilson
56   *
57   */
58  
59  public class XmlSlurper extends DefaultHandler {
60    private final XMLReader reader;
61    private Node currentNode = null;
62    private final Stack stack = new Stack();
63    private final StringBuffer charBuffer = new StringBuffer();
64    private final Map namespaceTagHints = new Hashtable();
65  
66    public XmlSlurper() throws ParserConfigurationException, SAXException {
67      this(false, true);
68    }
69    
70    public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
71    SAXParserFactory factory = null;
72      
73      try {
74        factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
75          public Object run() throws ParserConfigurationException {
76            return SAXParserFactory.newInstance();
77          }
78        });
79      } catch (final PrivilegedActionException pae) {
80        final Exception e = pae.getException();
81        
82        if (e instanceof ParserConfigurationException) {
83          throw (ParserConfigurationException) e;
84        } else {
85          throw new RuntimeException(e);
86        }
87      }
88      factory.setNamespaceAware(namespaceAware);
89      factory.setValidating(validating);
90      
91      final SAXParser parser = factory.newSAXParser();
92      this.reader = parser.getXMLReader();
93    }
94    
95    public XmlSlurper(final XMLReader reader) {
96      this.reader = reader;
97    }
98    
99    public XmlSlurper(final SAXParser parser) throws SAXException {
100     this(parser.getXMLReader());
101   }
102   
103   /***
104    * @return The GPathResult instance created by consuming a stream of SAX events
105    * Note if one of the parse methods has been called then this returns null
106    * Note if this is called more than once all calls after the first will return null
107    *
108    */
109   public GPathResult getDocument() {
110     try {
111       return new NodeChild(this.currentNode, null, this.namespaceTagHints);
112     } finally {
113       this.currentNode = null;
114     }
115   }
116   
117   /***
118    * Parse the content of the specified input source into a GPathResult object
119    * 
120    * @param input
121    * @return An object which supports GPath expressions
122    * @throws IOException
123    * @throws SAXException
124    */
125   public GPathResult parse(final InputSource input) throws IOException, SAXException {
126     this.reader.setContentHandler(this);
127     this.reader.parse(input);
128     
129     return getDocument();
130     
131   }
132   
133   /***
134    * Parses the content of the given file as XML turning it into a GPathResult object
135    * 
136    * @param file
137    * @return An object which supports GPath expressions
138    * @throws IOException
139    * @throws SAXException
140    */
141   public GPathResult parse(final File file) throws IOException, SAXException {
142   final InputSource input = new InputSource(new FileInputStream(file));
143     
144     input.setSystemId("file://" + file.getAbsolutePath());
145     
146     return parse(input);
147     
148   }
149   
150   /***
151    * Parse the content of the specified input stream into an GPathResult Object.
152    * Note that using this method will not provide the parser with any URI
153    * for which to find DTDs etc
154    * 
155    * @param input
156    * @return An object which supports GPath expressions
157    * @throws IOException
158    * @throws SAXException
159    */
160   public GPathResult parse(final InputStream input) throws IOException, SAXException {
161     return parse(new InputSource(input));
162   }
163   
164   /***
165    * Parse the content of the specified reader into a GPathResult Object.
166    * Note that using this method will not provide the parser with any URI
167    * for which to find DTDs etc
168    * 
169    * @param in
170    * @return An object which supports GPath expressions
171    * @throws IOException
172    * @throws SAXException
173    */
174   public GPathResult parse(final Reader in) throws IOException, SAXException {
175     return parse(new InputSource(in));
176   }
177   
178   /***
179    * Parse the content of the specified URI into a GPathResult Object
180    * 
181    * @param uri
182    * @return An object which supports GPath expressions
183    * @throws IOException
184    * @throws SAXException
185    */
186   public GPathResult parse(final String uri) throws IOException, SAXException {
187     return parse(new InputSource(uri));
188   }
189   
190   /***
191    * A helper method to parse the given text as XML
192    * 
193    * @param text
194    * @return An object which supports GPath expressions
195    */
196   public GPathResult parseText(final String text) throws IOException, SAXException {
197     return parse(new StringReader(text));
198   }
199   
200   // Delegated XMLReader methods
201   //------------------------------------------------------------------------
202 
203   /* (non-Javadoc)
204    * @see org.xml.sax.XMLReader#getDTDHandler()
205    */
206   public DTDHandler getDTDHandler() {
207       return this.reader.getDTDHandler();
208   }
209 
210   /* (non-Javadoc)
211    * @see org.xml.sax.XMLReader#getEntityResolver()
212    */
213   public EntityResolver getEntityResolver() {
214       return this.reader.getEntityResolver();
215   }
216 
217   /* (non-Javadoc)
218    * @see org.xml.sax.XMLReader#getErrorHandler()
219    */
220   public ErrorHandler getErrorHandler() {
221       return this.reader.getErrorHandler();
222   }
223 
224   /* (non-Javadoc)
225    * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
226    */
227   public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
228       return this.reader.getFeature(uri);
229   }
230 
231   /* (non-Javadoc)
232    * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
233    */
234   public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
235       return this.reader.getProperty(uri);
236   }
237 
238   /* (non-Javadoc)
239    * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
240    */
241   public void setDTDHandler(final DTDHandler dtdHandler) {
242       this.reader.setDTDHandler(dtdHandler);
243   }
244 
245   /* (non-Javadoc)
246    * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
247    */
248   public void setEntityResolver(final EntityResolver entityResolver) {
249       this.reader.setEntityResolver(entityResolver);
250   }
251 
252   /***
253    * Resolves entities against using the suppied URL as the base for relative URLs
254    * 
255    * @param base
256    * The URL used to resolve relative URLs
257    */
258   public void setEntityBaseUrl(final URL base) {
259       this.reader.setEntityResolver(new EntityResolver() {
260           public InputSource resolveEntity(final String publicId, final String systemId) throws IOException {
261               return new InputSource(new URL(base, systemId).openStream());
262           }
263       });
264   }
265 
266   /* (non-Javadoc)
267    * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
268    */
269   public void setErrorHandler(final ErrorHandler errorHandler) {
270       this.reader.setErrorHandler(errorHandler);
271   }
272 
273   /* (non-Javadoc)
274    * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
275    */
276   public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
277       this.reader.setFeature(uri, value);
278   }
279 
280   /* (non-Javadoc)
281    * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
282    */
283   public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
284        this.reader.setProperty(uri, value);
285   }
286   
287   
288   // ContentHandler interface
289   //-------------------------------------------------------------------------                    
290   
291   /* (non-Javadoc)
292    * @see org.xml.sax.ContentHandler#startDocument()
293    */
294   public void startDocument() throws SAXException {
295     this.currentNode = null;
296     this.charBuffer.setLength(0);
297   }
298   
299   /* (non-Javadoc)
300    * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
301    */
302   public void startPrefixMapping(final String tag, final String uri) throws SAXException {
303     this.namespaceTagHints.put(tag, uri);
304   }
305 
306   /* (non-Javadoc)
307    * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
308    */
309   public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
310     addNonWhitespaceCdata();
311     
312     final Map attributes = new HashMap();
313     final Map attributeNamespaces = new HashMap();
314     
315     for (int i = atts.getLength() - 1; i != -1; i--) {
316       if (atts.getURI(i).length() == 0) {
317         attributes.put(atts.getQName(i), atts.getValue(i));
318       } else {
319         attributes.put(atts.getLocalName(i), atts.getValue(i));
320         attributeNamespaces.put(atts.getLocalName(i), atts.getURI(i));
321       }
322       
323     }
324     
325     final Node newElement;
326     
327     if (namespaceURI.length() == 0){
328       newElement = new Node(this.currentNode, qName, attributes, attributeNamespaces, namespaceURI);
329     } else {
330       newElement = new Node(this.currentNode, localName, attributes, attributeNamespaces, namespaceURI);
331     }
332     
333     if (this.currentNode != null) {
334       this.currentNode.addChild(newElement);
335     }
336     
337     this.stack.push(this.currentNode);
338     this.currentNode = newElement;
339   }
340   
341   /* (non-Javadoc)
342    * @see org.xml.sax.ContentHandler#characters(char[], int, int)
343    */
344   public void characters(final char[] ch, final int start, final int length) throws SAXException {
345     this.charBuffer.append(ch, start, length);
346   }
347   
348   /* (non-Javadoc)
349    * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
350    */
351   public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
352     addNonWhitespaceCdata();
353     
354     final Object oldCurrentNode = this.stack.pop();
355     
356     if (oldCurrentNode != null) {
357       this.currentNode = (Node)oldCurrentNode;
358     }
359   }
360   
361   /* (non-Javadoc)
362    * @see org.xml.sax.ContentHandler#endDocument()
363    */
364   public void endDocument() throws SAXException {
365   }
366   
367   // Implementation methods
368   //-------------------------------------------------------------------------           
369   
370   /***
371    * 
372    */
373   private void addNonWhitespaceCdata() {
374     if (this.charBuffer.length() != 0) {
375       //
376       // This element is preceeded by CDATA if it's not whitespace add it to the body
377       // Note that, according to the XML spec, we should preserve the CDATA if it's all whitespace
378       // but for the sort of work I'm doing ignoring the whitespace is preferable
379       //
380       final String cdata = this.charBuffer.toString();
381       
382       this.charBuffer.setLength(0);
383       if (cdata.trim().length() != 0) {
384         this.currentNode.addChild(cdata);
385       }
386     }   
387   }
388 }