View Javadoc

1   package org.codehaus.groovy.sandbox.util;
2   import groovy.lang.Closure;
3   import groovy.lang.GroovyObject;
4   import groovy.lang.GroovyObjectSupport;
5   import groovy.lang.Writable;
6   
7   import java.io.File;
8   import java.io.FileInputStream;
9   import java.io.IOException;
10  import java.io.InputStream;
11  import java.io.Reader;
12  import java.io.StringReader;
13  import java.io.Writer;
14  import java.security.AccessController;
15  import java.security.PrivilegedActionException;
16  import java.security.PrivilegedExceptionAction;
17  import java.util.HashMap;
18  import java.util.Iterator;
19  import java.util.LinkedList;
20  import java.util.List;
21  import java.util.Map;
22  
23  import javax.xml.parsers.ParserConfigurationException;
24  import javax.xml.parsers.SAXParser;
25  import javax.xml.parsers.SAXParserFactory;
26  
27  import org.codehaus.groovy.sandbox.markup.Buildable;
28  import org.xml.sax.Attributes;
29  import org.xml.sax.InputSource;
30  import org.xml.sax.SAXException;
31  import org.xml.sax.XMLReader;
32  import org.xml.sax.helpers.DefaultHandler;
33  
34  
35  public class XmlSlurper extends DefaultHandler {
36      private final XMLReader reader;
37  	private List result = null;
38  	private List body = null;
39  	private final StringBuffer charBuffer = new StringBuffer();
40  
41      public XmlSlurper() throws ParserConfigurationException, SAXException {
42          this(false, true);
43      }
44  
45      public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
46          SAXParserFactory factory = null;
47          
48  	    	try {
49  				factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
50  					public Object run() throws ParserConfigurationException {
51  						return SAXParserFactory.newInstance();
52  					}
53  				});
54  	    	} catch (final PrivilegedActionException pae) {
55  	    	final Exception e = pae.getException();
56  	    		
57  	    		if (e instanceof ParserConfigurationException) {
58  	    			throw (ParserConfigurationException) e;
59  	    		} else {
60  	    			throw new RuntimeException(e);
61  	    		}
62  	    	}
63          factory.setNamespaceAware(namespaceAware);
64          factory.setValidating(validating);
65  
66          final SAXParser parser = factory.newSAXParser();
67          this.reader = parser.getXMLReader();
68      }
69  
70      public XmlSlurper(final XMLReader reader) {
71          this.reader = reader;
72      }
73  
74      public XmlSlurper(final SAXParser parser) throws SAXException {
75          this(parser.getXMLReader());
76      }
77  
78      /***
79       * Parse the content of the specified input source into a List
80       */
81      public XmlList parse(final InputSource input) throws IOException, SAXException {
82      		this.reader.setContentHandler(this);
83      		this.reader.parse(input);
84          
85          return (XmlList)this.result.get(0);
86      }
87      
88      /***
89       * Parses the content of the given file as XML turning it into a List
90       */
91      public XmlList parse(final File file) throws IOException, SAXException {
92      final InputSource input = new InputSource(new FileInputStream(file));
93      
94          input.setSystemId("file://" + file.getAbsolutePath());
95          
96          return parse(input);
97  
98      }
99  
100     /***
101      * Parse the content of the specified input stream into a List.
102      * Note that using this method will not provide the parser with any URI
103      * for which to find DTDs etc
104      */
105     public XmlList parse(final InputStream input) throws IOException, SAXException {
106         return parse(new InputSource(input));
107     }
108 
109     /***
110      * Parse the content of the specified reader into a List.
111      * Note that using this method will not provide the parser with any URI
112      * for which to find DTDs etc
113      */
114     public XmlList parse(final Reader in) throws IOException, SAXException {
115         return parse(new InputSource(in));
116     }
117 
118     /***
119      * Parse the content of the specified URI into a List
120      */
121     public XmlList parse(final String uri) throws IOException, SAXException {
122         return parse(new InputSource(uri));
123     }
124 
125     /***
126      * A helper method to parse the given text as XML
127      * 
128      * @param text
129      * @return
130      */
131     public XmlList parseText(final String text) throws IOException, SAXException {
132         return parse(new StringReader(text));
133     }
134     
135 
136     // ContentHandler interface
137     //-------------------------------------------------------------------------                    
138 	
139 	/* (non-Javadoc)
140 	 * @see org.xml.sax.ContentHandler#startDocument()
141 	 */
142 	public void startDocument() throws SAXException {
143 		this.result = null;
144 		this.body = new LinkedList();
145 		this.charBuffer.setLength(0);
146 	}
147 	
148 	/* (non-Javadoc)
149 	 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
150 	 */
151 	public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
152 		addNonWhitespaceCdata();
153 		
154 		final Map attributes = new HashMap();
155 		
156 		for (int i = atts.getLength() - 1; i != -1; i--) {
157 			if (atts.getURI(i).length() == 0) {
158 				attributes.put(atts.getQName(i), atts.getValue(i));
159 			} else {
160 				//
161 				// Note this is strictly incorrect the name is really localname + URI
162 				// We need to figure out what to do with paramenters in namespaces
163 				//
164 				attributes.put(atts.getLocalName(i), atts.getValue(i));
165 			}
166 			
167 		}
168 		
169 		final List newBody = new LinkedList();
170 
171 		newBody.add(attributes);
172 		
173 		newBody.add(this.body);
174 
175 		this.body = newBody;
176 	}
177 
178 	/* (non-Javadoc)
179 	 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
180 	 */
181 	public void characters(final char[] ch, final int start, final int length) throws SAXException {
182 		this.charBuffer.append(ch, start, length);
183 	}
184 	
185 	/* (non-Javadoc)
186 	 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
187 	 */
188 	public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
189 		addNonWhitespaceCdata();
190 		
191 		final List children = this.body;
192 		
193 		final Map attributes = (Map)this.body.remove(0);
194 		
195 		this.body = (List)this.body.remove(0);
196 		
197 		if (namespaceURI.length() == 0) {
198 			this.body.add(new XmlList(qName, attributes, children, namespaceURI));
199 		} else {
200 			this.body.add(new XmlList(localName, attributes, children, namespaceURI));
201 		}
202 	}
203 	
204 	/* (non-Javadoc)
205 	 * @see org.xml.sax.ContentHandler#endDocument()
206 	 */
207 	public void endDocument() throws SAXException {
208 		this.result = this.body;
209 		this.body = null;
210 	}
211 
212     // Implementation methods
213     //-------------------------------------------------------------------------           
214 
215 	/***
216 	 * 
217 	 */
218 	private void addNonWhitespaceCdata() {
219 		if (this.charBuffer.length() != 0) {
220 			//
221 			// This element is preceeded by CDATA if it's not whitespace add it to the body
222 			// Note that, according to the XML spec, we should preserve the CDATA if it's all whitespace
223 			// but for the sort of work I'm doing ignoring the whitespace is preferable
224 			//
225 			final String cdata = this.charBuffer.toString();
226 			
227 			this.charBuffer.setLength(0);
228 			if (cdata.trim().length() != 0) {
229 				this.body.add(cdata);
230 			}
231 		}		
232 	}
233 }
234 
235 class XmlList extends GroovyObjectSupport implements Writable, Buildable {
236 	final String name;
237 	final Map attributes;
238 	final Object[] children;
239 	final String namespaceURI;
240 	
241     public XmlList(final String name, final Map attributes, final List body, final String namespaceURI) {
242         super();
243         
244         this.name = name;
245         this.attributes = attributes;
246         this.children = body.toArray();
247         this.namespaceURI = namespaceURI;
248     }
249     
250     public Object getProperty(final String elementName) {
251 	    	if (elementName.startsWith("@")) {
252 	    		return this.attributes.get(elementName.substring(1));
253 	    	} else {
254 	    	final int indexOfFirst = getNextXmlElement(elementName, -1);
255 	    	
256 	    		if (indexOfFirst == -1) { // no elements match the element name
257     				return new ElementCollection() {
258         				protected ElementCollection getResult(final String property) {
259         					return this;
260         				}
261 
262 	    	    	    		/***
263 	    	    	    		 * 
264 	    	    	    		 * Used by the Invoker when it wants to iterate over this object
265 	    	    	    		 * 
266 	    	    	    		 * @return
267 	    	    	    		 */
268 	    	    	    		public ElementIterator iterator() {
269 	    	    	    			return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{-1}) {
270 	    	    	    				{
271 	    	    	    					findNextChild();		// set up the element indexes
272 	    	    	    				}
273 	    	    	    				
274 	    	        				protected void findNextChild() {
275 	    	        					this.nextParentElements[0] = -1;
276 	    	        				}
277 	    	    	    			};
278 	    	    	    		}
279     				};
280 	    		}
281 	    		
282     			if (getNextXmlElement(elementName, indexOfFirst) == -1) {	// one element matches the element name
283     				return this.children[indexOfFirst];
284     			} else {		// > 1 element matches the element name
285 	    	    		return new ElementCollection() {
286 	        				protected ElementCollection getResult(final String property) {
287 	        					return new ComplexElementCollection(new XmlList[]{XmlList.this},
288 	    							     						new int[] {indexOfFirst},
289 	    														new String[] {elementName},
290 	    														property);
291 	        				}
292 	
293 	    	    	    		/***
294 	    	    	    		 * 
295 	    	    	    		 * Used by the Invoker when it wants to iterate over this object
296 	    	    	    		 * 
297 	    	    	    		 * @return
298 	    	    	    		 */
299 	    	    	    		public ElementIterator iterator() {
300 	    	    	    			return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{indexOfFirst}) {
301 	    	        				protected void findNextChild() {
302 	    	        					this.nextParentElements[0] = XmlList.this.getNextXmlElement(elementName, this.nextParentElements[0]);
303 	    	        				}
304 	    	    	    			};
305 	    	    	    		}
306 	    	    	    };
307     			}
308 	    	}
309     }
310     
311     public Object getAt(final int index) {
312     		if (index == 0) {
313     			return this;
314     		} else {
315     			throw new ArrayIndexOutOfBoundsException(index);
316     		}
317     	}
318     
319     public int size() {
320     		return 1;
321     }
322 
323     public Object invokeMethod(final String name, final Object args) {
324 		if ("attributes".equals(name)) {
325 			return this.attributes;
326 		} else if ("name".equals(name)) {
327 			return this.name;
328 		} else if ("children".equals(name)) {
329 			return this.children;
330 		} else if ("contents".equals(name)) {
331 			return new Buildable() {
332 				public void build(GroovyObject builder) {
333 					buildChildren(builder);
334 				}
335 			};
336 		} else if ("text".equals(name)) {
337 			return text();
338 		} else if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
339 			return getProperty((String)((Object[])args)[0]);
340 		} else if ("depthFirst".equals(name)) {
341 			//
342 			// TODO: replace this with an iterator
343 			//
344 			
345 			return new GroovyObjectSupport() {
346 				public Object invokeMethod(final String name, final Object args) {
347 					if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
348 						return getProperty((String)((Object[])args)[0]);
349 					} else {
350 						return XmlList.this.invokeMethod(name, args);
351 					}
352 				}
353 				
354 				public Object getProperty(final String property) {
355 					if (property.startsWith("@")) {
356 						return XmlList.this.getProperty(property);
357 					} else {
358 					final List result = new LinkedList();
359 
360 						depthFirstGetProperty(property, XmlList.this.children, result);
361 						
362 						return result;
363 					}
364 				}
365 				
366 				private void depthFirstGetProperty(final String property, final Object[] contents, final List result) {
367 			    		for (int i = 0; i != contents.length; i++) {
368 			    		final Object item = contents[i];
369 			    		
370 			    			if (item instanceof XmlList) {
371 			    				if (((XmlList)item).name.equals(property)) {
372 			    					result.add(item);
373 			    				}
374 			    				
375 			    				depthFirstGetProperty(property, ((XmlList)item).children, result);
376 			    			}
377 					}
378 				}
379 			};
380     		} else {
381     			return getMetaClass().invokeMethod(this, name, args);
382     		}
383     }
384     
385 	/* (non-Javadoc)
386 	 * @see groovy.lang.Writable#writeTo(java.io.Writer)
387 	 */
388 	public Writer writeTo(Writer out) throws IOException {
389 
390 		for (int i = 0; i != this.children.length; i++) {
391 		final Object child = this.children[i];
392 		
393 			if (child instanceof String) {
394 				out.write((String)child);
395 			} else {
396 				((XmlList)child).writeTo(out);
397 			}
398 		}
399 		
400 		return out;
401 	}
402     
403 	/* (non-Javadoc)
404 	 * @see org.codehaus.groovy.sandbox.markup.Buildable#build(groovy.lang.GroovyObject)
405 	 */
406 	public void build(final GroovyObject builder) {
407 		// TODO handle Namespaces
408 	final Closure rest = new Closure(null) {
409 		public Object doCall(final Object o) {
410 			buildChildren(builder);
411 			
412 			return null;
413 		}
414 	};
415 
416 		builder.invokeMethod(this.name, new Object[]{this.attributes, rest});
417 		
418 	}
419 	
420 	public String toString() {
421 		return text();
422 	}
423 	
424 	private String text() {
425 	final StringBuffer buff = new StringBuffer();
426 
427 		for (int i = 0; i != this.children.length; i++) {
428 		final Object child = this.children[i];
429 		
430 			if (child instanceof String) {
431 				buff.append(child);
432 			} else {
433 				buff.append(((XmlList)child).text());
434 			}
435 		}	
436 	
437 		return buff.toString();
438 	}
439 	
440 	private void buildChildren(final GroovyObject builder) {
441 		for (int i = 0; i != this.children.length; i++) {
442 			if (this.children[i] instanceof Buildable) {
443 				((Buildable)this.children[i]).build(builder);
444 			} else {
445 				builder.getProperty("mkp");
446 				builder.invokeMethod("yield", new Object[]{this.children[i]});
447 			}
448 		}
449 	}
450 
451     	protected int getNextXmlElement(final String name, final int lastFound) {
452     		for (int i = lastFound + 1; i < this.children.length; i++) {
453 	    	final Object item = this.children[i];
454 	    		
455 	    		if (item instanceof XmlList && ((XmlList)item).name.equals(name)) {
456 	    			return i;
457 	    		}
458 	    	}
459     		
460     		return -1;
461     	}
462 }
463 
464 abstract class ElementIterator implements Iterator {
465 	protected final XmlList[] parents;
466 	protected final int[] nextParentElements;
467 	
468 	protected ElementIterator(final XmlList[] parents, int[] nextParentElements) {
469 		this.parents = new XmlList[parents.length];
470 		System.arraycopy(parents, 0, this.parents, 0, parents.length);
471 		
472 		this.nextParentElements = new int[nextParentElements.length];
473 		System.arraycopy(nextParentElements, 0, this.nextParentElements, 0, nextParentElements.length);
474 	}
475 	
476 	/* (non-Javadoc)
477 	 * @see java.util.Iterator#hasNext()
478 	 */
479 	public boolean hasNext() {
480 		return this.nextParentElements[0] != -1;
481 	}
482 	
483 	/* (non-Javadoc)
484 	 * @see java.util.Iterator#next()
485 	 */
486 	public Object next() {
487 	final Object result = this.parents[0].children[this.nextParentElements[0]];
488 			
489 		findNextChild();
490 	
491 		return result;
492 	}
493 	
494 	/* (non-Javadoc)
495 	 * @see java.util.Iterator#remove()
496 	 */
497 	public void remove() {
498 		throw new UnsupportedOperationException();
499 	}
500 	
501 	protected abstract void findNextChild();
502 }
503 
504 abstract class ElementCollection extends GroovyObjectSupport {
505 	private int count = -1;
506 	
507 	public abstract ElementIterator iterator();
508 	
509 	/* (non-Javadoc)
510 	 * @see groovy.lang.GroovyObject#getProperty(java.lang.String)
511 	 */
512 	public Object getProperty(final String property) {
513 	final ElementCollection result = getResult(property);
514 	final Iterator iterator = result.iterator();
515 
516 		if (iterator.hasNext()) {				
517 			//
518 			// See if there's only one available
519 			//
520 			final Object first = iterator.next();
521 			
522 			if (!iterator.hasNext()) {
523 				return first;
524 			}
525 		}
526 		
527 		return result;
528 	}
529 	
530 	protected abstract ElementCollection getResult(String property);
531     
532     public synchronized Object getAt(int index) {
533 	    	if (index >= 0) {
534 		final Iterator iter = iterator();
535 		
536 			while (iter.hasNext()) {
537 				if (index-- == 0) {
538 					return iter.next();
539 				} else {
540 					iter.next();
541 				}
542 			}
543 	    	}
544 	    	
545 	    	throw new ArrayIndexOutOfBoundsException(index);
546     }
547     
548 	public synchronized int size() {
549 		if (this.count == -1) {
550 		final Iterator iter = iterator();
551 		
552 			this.count = 0;
553 			
554 			while (iter.hasNext()) {
555 				this.count++;
556 				iter.next();
557 			}
558 		}
559 		return this.count;
560 	}
561 }
562 
563 class ComplexElementCollection extends ElementCollection {
564 	private final XmlList[] parents;
565 	private final int[] nextParentElements;
566 	private final String[] parentElementNames;
567 	
568 	public ComplexElementCollection(final XmlList[] parents,
569               				  	  final int[] nextParentElements,
570 								  final String[] parentElementNames,
571 								  final String childElementName)
572 	{
573 		this.parents = new XmlList[parents.length + 1];
574 		this.parents[0] = (XmlList)parents[0].children[nextParentElements[0]];
575 		System.arraycopy(parents, 0, this.parents, 1, parents.length);
576 		
577 		this.nextParentElements = new int[nextParentElements.length + 1];
578 		this.nextParentElements[0] = -1;	
579 		System.arraycopy(nextParentElements, 0, this.nextParentElements, 1, nextParentElements.length);
580 		
581 		this.parentElementNames = new String[parentElementNames.length + 1];
582 		this.parentElementNames[0] = childElementName;
583 		System.arraycopy(parentElementNames, 0, this.parentElementNames, 1, parentElementNames.length);
584 		
585 		//
586 		// Use the iterator to get the index of the first element
587 		//
588 		
589 		final ElementIterator iter = this.iterator();
590 		
591 		iter.findNextChild();
592 		
593 		this.nextParentElements[0] = iter.nextParentElements[0];
594 	}
595 	
596 	protected ElementCollection getResult(final String property) {
597 		return new ComplexElementCollection(this.parents,
598 				   							this.nextParentElements,
599 											this.parentElementNames,
600 											property);
601 	}
602 	
603 	/***
604 	 * 
605 	 * Used by the Invoker when it wants to iterate over this object
606 	 * 
607 	 * @return
608 	 */
609 	public ElementIterator iterator() {
610 		return new ElementIterator(this.parents, this.nextParentElements) {
611 						protected void findNextChild() {	
612 							this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], this.nextParentElements[0]);
613 							
614 							while (this.nextParentElements[0] == -1) {
615 								this.parents[0] = findNextParent(1);
616 								
617 								if (this.parents[0] == null) {
618 									return;
619 								} else {
620 									this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], -1);
621 								}
622 							}
623 						}
624 						
625 						private XmlList findNextParent(final int i) {
626 							if (i == this.nextParentElements.length) return null;
627 							
628 							this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], this.nextParentElements[i]);
629 							
630 							while (this.nextParentElements[i] == -1) {
631 								this.parents[i] = findNextParent(i + 1);
632 								
633 								if (this.parents[i] == null) {
634 									return null;
635 								} else {
636 									this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], -1);
637 								}
638 							}
639 						
640 							return (XmlList)this.parents[i].children[this.nextParentElements[i]];
641 						}
642 		};
643 	}
644 }