Package pyparsing ::
Module pyparsing
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
37 from pyparsing import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45 The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
51
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
54
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59 """
60
61 __version__ = "1.5.0"
62 __versionTime__ = "28 May 2008 10:05"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65 import string
66 from weakref import ref as wkref
67 import copy,sys
68 import warnings
69 import re
70 import sre_constants
71 import xml.sax.saxutils
72
73
74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock',
92 ]
93
94
95 """
96 Detect if we are running version 3.X and make appropriate changes
97 Robert A. Clark
98 """
99 if sys.version_info[0] > 2:
100 _PY3K = True
101 _MAX_INT = sys.maxsize
102 basestring = str
103 else:
104 _PY3K = False
105 _MAX_INT = sys.maxint
106
107 if not _PY3K:
109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
111 then < returns the unicode object | encodes it with the default encoding | ... >.
112 """
113 try:
114
115
116 return str(obj)
117
118 except UnicodeEncodeError:
119
120
121
122
123
124 return unicode(obj)
125
126
127
128
129
130
131 else:
132 _ustr = str
133
135 return dict( [(c,0) for c in strg] )
136
137
140
141 if not _PY3K:
142 alphas = string.lowercase + string.uppercase
143 else:
144 alphas = string.ascii_lowercase + string.ascii_uppercase
145 nums = string.digits
146 hexnums = nums + "ABCDEFabcdef"
147 alphanums = alphas + nums
148 _bslash = "\\"
149 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
150
152 """base exception class for all parsing runtime exceptions"""
153 __slots__ = ( "loc","msg","pstr","parserElement" )
154
155
156 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
165
167 """supported attributes by name are:
168 - lineno - returns the line number of the exception text
169 - col - returns the column number of the exception text
170 - line - returns the line containing the exception text
171 """
172 if( aname == "lineno" ):
173 return lineno( self.loc, self.pstr )
174 elif( aname in ("col", "column") ):
175 return col( self.loc, self.pstr )
176 elif( aname == "line" ):
177 return line( self.loc, self.pstr )
178 else:
179 raise AttributeError(aname)
180
182 return "%s (at char %d), (line:%d, col:%d)" % \
183 ( self.msg, self.loc, self.lineno, self.column )
196
198 """exception thrown when parse expressions don't match class;
199 supported attributes by name are:
200 - lineno - returns the line number of the exception text
201 - col - returns the column number of the exception text
202 - line - returns the line containing the exception text
203 """
204 pass
205
207 """user-throwable exception thrown when inconsistent parse content
208 is found; stops all parsing immediately"""
209 pass
210
212 """just like ParseFatalException, but thrown internally when an
213 ErrorStop indicates that parsing is to stop immediately because
214 an unbacktrackable syntax error has been found"""
218
219
220
221
222
223
224
225
226
227
228
229
230
231
233 """exception thrown by validate() if the grammar could be improperly recursive"""
234 - def __init__( self, parseElementList ):
235 self.parseElementTrace = parseElementList
236
238 return "RecursiveGrammarException: %s" % self.parseElementTrace
239
246 return repr(self.tup)
247
249 """Structured parse results, to provide multiple means of access to the parsed data:
250 - as a list (len(results))
251 - by list index (results[0], results[1], etc.)
252 - by attribute (results.<resultsName>)
253 """
254 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
255 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
256 if isinstance(toklist, cls):
257 return toklist
258 retobj = object.__new__(cls)
259 retobj.__doinit = True
260 return retobj
261
262
263
264 - def __init__( self, toklist, name=None, asList=True, modal=True ):
265 if self.__doinit:
266 self.__doinit = False
267 self.__name = None
268 self.__parent = None
269 self.__accumNames = {}
270 if isinstance(toklist, list):
271 self.__toklist = toklist[:]
272 else:
273 self.__toklist = [toklist]
274 self.__tokdict = dict()
275
276
277
278
279 if name:
280 if not modal:
281 self.__accumNames[name] = 0
282 if isinstance(name,int):
283 name = _ustr(name)
284 self.__name = name
285 if not toklist in (None,'',[]):
286 if isinstance(toklist,basestring):
287 toklist = [ toklist ]
288 if asList:
289 if isinstance(toklist,ParseResults):
290 self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
291 else:
292 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
293 self[name].__name = name
294 else:
295 try:
296 self[name] = toklist[0]
297 except (KeyError,TypeError):
298 self[name] = toklist
299
301 if isinstance( i, (int,slice) ):
302 return self.__toklist[i]
303 else:
304 if i not in self.__accumNames:
305 return self.__tokdict[i][-1][0]
306 else:
307 return ParseResults([ v[0] for v in self.__tokdict[i] ])
308
310 if isinstance(v,_ParseResultsWithOffset):
311 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
312 sub = v[0]
313 elif isinstance(k,int):
314 self.__toklist[k] = v
315 sub = v
316 else:
317 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
318 sub = v
319 if isinstance(sub,ParseResults):
320 sub.__parent = wkref(self)
321
323 if isinstance(i,(int,slice)):
324 mylen = len( self.__toklist )
325 del self.__toklist[i]
326
327
328 if isinstance(i, int):
329 if i < 0:
330 i += mylen
331 i = slice(i, i+1)
332
333 removed = list(range(*i.indices(mylen)))
334 removed.reverse()
335
336 for name in self.__tokdict:
337 occurrences = self.__tokdict[name]
338 for j in removed:
339 for k, (value, position) in enumerate(occurrences):
340 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
341 else:
342 del self.__tokdict[i]
343
345 return k in self.__tokdict
346
347 - def __len__( self ): return len( self.__toklist )
348 - def __bool__(self): return len( self.__toklist ) > 0
349 __nonzero__ = __bool__
350 - def __iter__( self ): return iter( self.__toklist )
351 - def __reversed__( self ): return iter( reversed(self.__toklist) )
353 """Returns all named result keys."""
354 return self.__tokdict.keys()
355
356 - def pop( self, index=-1 ):
357 """Removes and returns item at specified index (default=last).
358 Will work with either numeric indices or dict-key indicies."""
359 ret = self[index]
360 del self[index]
361 return ret
362
363 - def get(self, key, defaultValue=None):
364 """Returns named result matching the given key, or if there is no
365 such name, then returns the given defaultValue or None if no
366 defaultValue is specified."""
367 if key in self:
368 return self[key]
369 else:
370 return defaultValue
371
372 - def insert( self, index, insStr ):
373 self.__toklist.insert(index, insStr)
374
375 for name in self.__tokdict:
376 occurrences = self.__tokdict[name]
377 for k, (value, position) in enumerate(occurrences):
378 occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
379
381 """Returns all named result keys and values as a list of tuples."""
382 return [(k,self[k]) for k in self.__tokdict]
383
385 """Returns all named result values."""
386 return [ v[-1][0] for v in self.__tokdict.values() ]
387
389 if name not in self.__slots__:
390 if name in self.__tokdict:
391 if name not in self.__accumNames:
392 return self.__tokdict[name][-1][0]
393 else:
394 return ParseResults([ v[0] for v in self.__tokdict[name] ])
395 else:
396 return ""
397 return None
398
400 ret = self.copy()
401 ret += other
402 return ret
403
405 if other.__tokdict:
406 offset = len(self.__toklist)
407 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
408 otheritems = other.__tokdict.items()
409 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
410 for (k,vlist) in otheritems for v in vlist]
411 for k,v in otherdictitems:
412 self[k] = v
413 if isinstance(v[0],ParseResults):
414 v[0].__parent = wkref(self)
415 self.__toklist += other.__toklist
416 self.__accumNames.update( other.__accumNames )
417 del other
418 return self
419
421 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
422
424 out = "["
425 sep = ""
426 for i in self.__toklist:
427 if isinstance(i, ParseResults):
428 out += sep + _ustr(i)
429 else:
430 out += sep + repr(i)
431 sep = ", "
432 out += "]"
433 return out
434
436 out = []
437 for item in self.__toklist:
438 if out and sep:
439 out.append(sep)
440 if isinstance( item, ParseResults ):
441 out += item._asStringList()
442 else:
443 out.append( _ustr(item) )
444 return out
445
447 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
448 out = []
449 for res in self.__toklist:
450 if isinstance(res,ParseResults):
451 out.append( res.asList() )
452 else:
453 out.append( res )
454 return out
455
457 """Returns the named parse results as dictionary."""
458 return dict( self.items() )
459
461 """Returns a new copy of a ParseResults object."""
462 ret = ParseResults( self.__toklist )
463 ret.__tokdict = self.__tokdict.copy()
464 ret.__parent = self.__parent
465 ret.__accumNames.update( self.__accumNames )
466 ret.__name = self.__name
467 return ret
468
469 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
470 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
471 nl = "\n"
472 out = []
473 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
474 for v in vlist ] )
475 nextLevelIndent = indent + " "
476
477
478 if not formatted:
479 indent = ""
480 nextLevelIndent = ""
481 nl = ""
482
483 selfTag = None
484 if doctag is not None:
485 selfTag = doctag
486 else:
487 if self.__name:
488 selfTag = self.__name
489
490 if not selfTag:
491 if namedItemsOnly:
492 return ""
493 else:
494 selfTag = "ITEM"
495
496 out += [ nl, indent, "<", selfTag, ">" ]
497
498 worklist = self.__toklist
499 for i,res in enumerate(worklist):
500 if isinstance(res,ParseResults):
501 if i in namedItems:
502 out += [ res.asXML(namedItems[i],
503 namedItemsOnly and doctag is None,
504 nextLevelIndent,
505 formatted)]
506 else:
507 out += [ res.asXML(None,
508 namedItemsOnly and doctag is None,
509 nextLevelIndent,
510 formatted)]
511 else:
512
513 resTag = None
514 if i in namedItems:
515 resTag = namedItems[i]
516 if not resTag:
517 if namedItemsOnly:
518 continue
519 else:
520 resTag = "ITEM"
521 xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
522 out += [ nl, nextLevelIndent, "<", resTag, ">",
523 xmlBodyText,
524 "</", resTag, ">" ]
525
526 out += [ nl, indent, "</", selfTag, ">" ]
527 return "".join(out)
528
530 for k,vlist in self.__tokdict.items():
531 for v,loc in vlist:
532 if sub is v:
533 return k
534 return None
535
537 """Returns the results name for this token expression."""
538 if self.__name:
539 return self.__name
540 elif self.__parent:
541 par = self.__parent()
542 if par:
543 return par.__lookup(self)
544 else:
545 return None
546 elif (len(self) == 1 and
547 len(self.__tokdict) == 1 and
548 self.__tokdict.values()[0][0][1] in (0,-1)):
549 return self.__tokdict.keys()[0]
550 else:
551 return None
552
553 - def dump(self,indent='',depth=0):
554 """Diagnostic method for listing out the contents of a ParseResults.
555 Accepts an optional indent argument so that this string can be embedded
556 in a nested display of other data."""
557 out = []
558 out.append( indent+_ustr(self.asList()) )
559 keys = self.items()
560 keys.sort()
561 for k,v in keys:
562 if out:
563 out.append('\n')
564 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
565 if isinstance(v,ParseResults):
566 if v.keys():
567
568 out.append( v.dump(indent,depth+1) )
569
570 else:
571 out.append(_ustr(v))
572 else:
573 out.append(_ustr(v))
574
575 return "".join(out)
576
577
579 return ( self.__toklist,
580 ( self.__tokdict.copy(),
581 self.__parent is not None and self.__parent() or None,
582 self.__accumNames,
583 self.__name ) )
584
586 self.__toklist = state[0]
587 self.__tokdict, \
588 par, \
589 inAccumNames, \
590 self.__name = state[1]
591 self.__accumNames = {}
592 self.__accumNames.update(inAccumNames)
593 if par is not None:
594 self.__parent = wkref(par)
595 else:
596 self.__parent = None
597
598
600 """Returns current column within a string, counting newlines as line separators.
601 The first column is number 1.
602
603 Note: the default parsing behavior is to expand tabs in the input string
604 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
605 on parsing strings containing <TAB>s, and suggested methods to maintain a
606 consistent view of the parsed string, the parse location, and line and column
607 positions within the parsed string.
608 """
609 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
610
612 """Returns current line number within a string, counting newlines as line separators.
613 The first line is number 1.
614
615 Note: the default parsing behavior is to expand tabs in the input string
616 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
617 on parsing strings containing <TAB>s, and suggested methods to maintain a
618 consistent view of the parsed string, the parse location, and line and column
619 positions within the parsed string.
620 """
621 return strg.count("\n",0,loc) + 1
622
623 -def line( loc, strg ):
624 """Returns the line of text containing loc within a string, counting newlines as line separators.
625 """
626 lastCR = strg.rfind("\n", 0, loc)
627 nextCR = strg.find("\n", loc)
628 if nextCR > 0:
629 return strg[lastCR+1:nextCR]
630 else:
631 return strg[lastCR+1:]
632
634 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
635
637 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
638
640 print ("Exception raised:" + _ustr(exc))
641
643 """'Do-nothing' debug action, to suppress debugging output during parsing."""
644 pass
645
647 """Abstract base level parser element class."""
648 DEFAULT_WHITE_CHARS = " \n\t\r"
649
654 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
655
657 self.parseAction = list()
658 self.failAction = None
659
660 self.strRepr = None
661 self.resultsName = None
662 self.saveAsList = savelist
663 self.skipWhitespace = True
664 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
665 self.copyDefaultWhiteChars = True
666 self.mayReturnEmpty = False
667 self.keepTabs = False
668 self.ignoreExprs = list()
669 self.debug = False
670 self.streamlined = False
671 self.mayIndexError = True
672 self.errmsg = ""
673 self.modalResults = True
674 self.debugActions = ( None, None, None )
675 self.re = None
676 self.callPreparse = True
677 self.callDuringTry = False
678
680 """Make a copy of this ParserElement. Useful for defining different parse actions
681 for the same parsing pattern, using copies of the original parse element."""
682 cpy = copy.copy( self )
683 cpy.parseAction = self.parseAction[:]
684 cpy.ignoreExprs = self.ignoreExprs[:]
685 if self.copyDefaultWhiteChars:
686 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
687 return cpy
688
690 """Define name for this expression, for use in debugging."""
691 self.name = name
692 self.errmsg = "Expected " + self.name
693 if hasattr(self,"exception"):
694 self.exception.msg = self.errmsg
695 return self
696
698 """Define name for referencing matching tokens as a nested attribute
699 of the returned parse results.
700 NOTE: this returns a *copy* of the original ParserElement object;
701 this is so that the client can define a basic element, such as an
702 integer, and reference it in multiple places with different names.
703 """
704 newself = self.copy()
705 newself.resultsName = name
706 newself.modalResults = not listAllMatches
707 return newself
708
710 """Method to invoke the Python pdb debugger when this element is
711 about to be parsed. Set breakFlag to True to enable, False to
712 disable.
713 """
714 if breakFlag:
715 _parseMethod = self._parse
716 def breaker(instring, loc, doActions=True, callPreParse=True):
717 import pdb
718 pdb.set_trace()
719 _parseMethod( instring, loc, doActions, callPreParse )
720 breaker._originalParseMethod = _parseMethod
721 self._parse = breaker
722 else:
723 if hasattr(self._parse,"_originalParseMethod"):
724 self._parse = self._parse._originalParseMethod
725 return self
726
728 """Internal method used to decorate parse actions that take fewer than 3 arguments,
729 so that all parse actions can be called as f(s,l,t)."""
730 STAR_ARGS = 4
731
732 try:
733 restore = None
734 if isinstance(f,type):
735 restore = f
736 f = f.__init__
737 if not _PY3K:
738 codeObj = f.func_code
739 else:
740 codeObj = f.code
741 if codeObj.co_flags & STAR_ARGS:
742 return f
743 numargs = codeObj.co_argcount
744 if not _PY3K:
745 if hasattr(f,"im_self"):
746 numargs -= 1
747 else:
748 if hasattr(f,"__self__"):
749 numargs -= 1
750 if restore:
751 f = restore
752 except AttributeError:
753 try:
754 if not _PY3K:
755 call_im_func_code = f.__call__.im_func.func_code
756 else:
757 call_im_func_code = f.__code__
758
759
760
761 if call_im_func_code.co_flags & STAR_ARGS:
762 return f
763 numargs = call_im_func_code.co_argcount
764 if not _PY3K:
765 if hasattr(f.__call__,"im_self"):
766 numargs -= 1
767 else:
768 if hasattr(f.__call__,"__self__"):
769 numargs -= 0
770 except AttributeError:
771 if not _PY3K:
772 call_func_code = f.__call__.func_code
773 else:
774 call_func_code = f.__call__.__code__
775
776 if call_func_code.co_flags & STAR_ARGS:
777 return f
778 numargs = call_func_code.co_argcount
779 if not _PY3K:
780 if hasattr(f.__call__,"im_self"):
781 numargs -= 1
782 else:
783 if hasattr(f.__call__,"__self__"):
784 numargs -= 1
785
786
787
788 if numargs == 3:
789 return f
790 else:
791 if numargs > 3:
792 def tmp(s,l,t):
793 return f(f.__call__.__self__, s,l,t)
794 if numargs == 2:
795 def tmp(s,l,t):
796 return f(l,t)
797 elif numargs == 1:
798 def tmp(s,l,t):
799 return f(t)
800 else:
801 def tmp(s,l,t):
802 return f()
803 try:
804 tmp.__name__ = f.__name__
805 except (AttributeError,TypeError):
806
807 pass
808 try:
809 tmp.__doc__ = f.__doc__
810 except (AttributeError,TypeError):
811
812 pass
813 try:
814 tmp.__dict__.update(f.__dict__)
815 except (AttributeError,TypeError):
816
817 pass
818 return tmp
819 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
820
822 """Define action to perform when successfully matching parse element definition.
823 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
824 fn(loc,toks), fn(toks), or just fn(), where:
825 - s = the original string being parsed (see note below)
826 - loc = the location of the matching substring
827 - toks = a list of the matched tokens, packaged as a ParseResults object
828 If the functions in fns modify the tokens, they can return them as the return
829 value from fn, and the modified list of tokens will replace the original.
830 Otherwise, fn does not need to return any value.
831
832 Note: the default parsing behavior is to expand tabs in the input string
833 before starting the parsing process. See L{I{parseString}<parseString>} for more information
834 on parsing strings containing <TAB>s, and suggested methods to maintain a
835 consistent view of the parsed string, the parse location, and line and column
836 positions within the parsed string.
837 """
838 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
839 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
840 return self
841
843 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
844 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
845 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
846 return self
847
849 """Define action to perform if parsing fails at this expression.
850 Fail acton fn is a callable function that takes the arguments
851 fn(s,loc,expr,err) where:
852 - s = string being parsed
853 - loc = location where expression match was attempted and failed
854 - expr = the parse expression that failed
855 - err = the exception thrown
856 The function returns no value. It may throw ParseFatalException
857 if it is desired to stop parsing immediately."""
858 self.failAction = fn
859 return self
860
862 exprsFound = True
863 while exprsFound:
864 exprsFound = False
865 for e in self.ignoreExprs:
866 try:
867 while 1:
868 loc,dummy = e._parse( instring, loc )
869 exprsFound = True
870 except ParseException:
871 pass
872 return loc
873
875 if self.ignoreExprs:
876 loc = self._skipIgnorables( instring, loc )
877
878 if self.skipWhitespace:
879 wt = self.whiteChars
880 instrlen = len(instring)
881 while loc < instrlen and instring[loc] in wt:
882 loc += 1
883
884 return loc
885
886 - def parseImpl( self, instring, loc, doActions=True ):
888
889 - def postParse( self, instring, loc, tokenlist ):
891
892
893 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
894 debugging = ( self.debug )
895
896 if debugging or self.failAction:
897
898 if (self.debugActions[0] ):
899 self.debugActions[0]( instring, loc, self )
900 if callPreParse and self.callPreparse:
901 preloc = self.preParse( instring, loc )
902 else:
903 preloc = loc
904 tokensStart = loc
905 try:
906 try:
907 loc,tokens = self.parseImpl( instring, preloc, doActions )
908 except IndexError:
909 raise ParseException( instring, len(instring), self.errmsg, self )
910 except ParseBaseException, err:
911
912 if self.debugActions[2]:
913 self.debugActions[2]( instring, tokensStart, self, err )
914 if self.failAction:
915 self.failAction( instring, tokensStart, self, err )
916 raise
917 else:
918 if callPreParse and self.callPreparse:
919 preloc = self.preParse( instring, loc )
920 else:
921 preloc = loc
922 tokensStart = loc
923 if self.mayIndexError or loc >= len(instring):
924 try:
925 loc,tokens = self.parseImpl( instring, preloc, doActions )
926 except IndexError:
927 raise ParseException( instring, len(instring), self.errmsg, self )
928 else:
929 loc,tokens = self.parseImpl( instring, preloc, doActions )
930
931 tokens = self.postParse( instring, loc, tokens )
932
933 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
934 if self.parseAction and (doActions or self.callDuringTry):
935 if debugging:
936 try:
937 for fn in self.parseAction:
938 tokens = fn( instring, tokensStart, retTokens )
939 if tokens is not None:
940 retTokens = ParseResults( tokens,
941 self.resultsName,
942 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
943 modal=self.modalResults )
944 except ParseBaseException, err:
945
946 if (self.debugActions[2] ):
947 self.debugActions[2]( instring, tokensStart, self, err )
948 raise
949 else:
950 for fn in self.parseAction:
951 tokens = fn( instring, tokensStart, retTokens )
952 if tokens is not None:
953 retTokens = ParseResults( tokens,
954 self.resultsName,
955 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
956 modal=self.modalResults )
957
958 if debugging:
959
960 if (self.debugActions[1] ):
961 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
962
963 return loc, retTokens
964
970
971
972
973 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
974 lookup = (self,instring,loc,callPreParse,doActions)
975 if lookup in ParserElement._exprArgCache:
976 value = ParserElement._exprArgCache[ lookup ]
977 if isinstance(value,Exception):
978 raise value
979 return value
980 else:
981 try:
982 value = self._parseNoCache( instring, loc, doActions, callPreParse )
983 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
984 return value
985 except ParseBaseException, pe:
986 ParserElement._exprArgCache[ lookup ] = pe
987 raise
988
989 _parse = _parseNoCache
990
991
992 _exprArgCache = {}
995 resetCache = staticmethod(resetCache)
996
997 _packratEnabled = False
999 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1000 Repeated parse attempts at the same string location (which happens
1001 often in many complex grammars) can immediately return a cached value,
1002 instead of re-executing parsing/validating code. Memoizing is done of
1003 both valid results and parsing exceptions.
1004
1005 This speedup may break existing programs that use parse actions that
1006 have side-effects. For this reason, packrat parsing is disabled when
1007 you first import pyparsing. To activate the packrat feature, your
1008 program must call the class method ParserElement.enablePackrat(). If
1009 your program uses psyco to "compile as you go", you must call
1010 enablePackrat before calling psyco.full(). If you do not do this,
1011 Python will crash. For best results, call enablePackrat() immediately
1012 after importing pyparsing.
1013 """
1014 if not ParserElement._packratEnabled:
1015 ParserElement._packratEnabled = True
1016 ParserElement._parse = ParserElement._parseCache
1017 enablePackrat = staticmethod(enablePackrat)
1018
1020 """Execute the parse expression with the given string.
1021 This is the main interface to the client code, once the complete
1022 expression has been built.
1023
1024 If you want the grammar to require that the entire input string be
1025 successfully parsed, then set parseAll to True (equivalent to ending
1026 the grammar with StringEnd()).
1027
1028 Note: parseString implicitly calls expandtabs() on the input string,
1029 in order to report proper column numbers in parse actions.
1030 If the input string contains tabs and
1031 the grammar uses parse actions that use the loc argument to index into the
1032 string being parsed, you can ensure you have a consistent view of the input
1033 string by:
1034 - calling parseWithTabs on your grammar before calling parseString
1035 (see L{I{parseWithTabs}<parseWithTabs>})
1036 - define your parse action using the full (s,loc,toks) signature, and
1037 reference the input string using the parse action's s argument
1038 - explictly expand the tabs in your input string before calling
1039 parseString
1040 """
1041 ParserElement.resetCache()
1042 if not self.streamlined:
1043 self.streamline()
1044
1045 for e in self.ignoreExprs:
1046 e.streamline()
1047 if not self.keepTabs:
1048 instring = instring.expandtabs()
1049 loc, tokens = self._parse( instring, 0 )
1050 if parseAll:
1051 StringEnd()._parse( instring, loc )
1052 return tokens
1053
1055 """Scan the input string for expression matches. Each match will return the
1056 matching tokens, start location, and end location. May be called with optional
1057 maxMatches argument, to clip scanning after 'n' matches are found.
1058
1059 Note that the start and end locations are reported relative to the string
1060 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1061 strings with embedded tabs."""
1062 if not self.streamlined:
1063 self.streamline()
1064 for e in self.ignoreExprs:
1065 e.streamline()
1066
1067 if not self.keepTabs:
1068 instring = _ustr(instring).expandtabs()
1069 instrlen = len(instring)
1070 loc = 0
1071 preparseFn = self.preParse
1072 parseFn = self._parse
1073 ParserElement.resetCache()
1074 matches = 0
1075 while loc <= instrlen and matches < maxMatches:
1076 try:
1077 preloc = preparseFn( instring, loc )
1078 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1079 except ParseException:
1080 loc = preloc+1
1081 else:
1082 matches += 1
1083 yield tokens, preloc, nextLoc
1084 loc = nextLoc
1085
1110
1112 """Another extension to scanString, simplifying the access to the tokens found
1113 to match the given parse expression. May be called with optional
1114 maxMatches argument, to clip searching after 'n' matches are found.
1115 """
1116 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1117
1119 """Implementation of + operator - returns And"""
1120 if isinstance( other, basestring ):
1121 other = Literal( other )
1122 if not isinstance( other, ParserElement ):
1123 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1124 SyntaxWarning, stacklevel=2)
1125 return None
1126 return And( [ self, other ] )
1127
1129 """Implementation of + operator when left operand is not a ParserElement"""
1130 if isinstance( other, basestring ):
1131 other = Literal( other )
1132 if not isinstance( other, ParserElement ):
1133 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1134 SyntaxWarning, stacklevel=2)
1135 return None
1136 return other + self
1137
1139 """Implementation of - operator, returns And with error stop"""
1140 if isinstance( other, basestring ):
1141 other = Literal( other )
1142 if not isinstance( other, ParserElement ):
1143 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1144 SyntaxWarning, stacklevel=2)
1145 return None
1146 return And( [ self, And._ErrorStop(), other ] )
1147
1149 """Implementation of - operator when left operand is not a ParserElement"""
1150 if isinstance( other, basestring ):
1151 other = Literal( other )
1152 if not isinstance( other, ParserElement ):
1153 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1154 SyntaxWarning, stacklevel=2)
1155 return None
1156 return other - self
1157
1159 if isinstance(other,int):
1160 minElements, optElements = other,0
1161 elif isinstance(other,tuple):
1162 if len(other)==0:
1163 other = (None,None)
1164 elif len(other)==1:
1165 other = (other[0],None)
1166 if len(other)==2:
1167 if other[0] is None:
1168 other = (0, other[1])
1169 if isinstance(other[0],int) and other[1] is None:
1170 if other[0] == 0:
1171 return ZeroOrMore(self)
1172 if other[0] == 1:
1173 return OneOrMore(self)
1174 else:
1175 return self*other[0] + ZeroOrMore(self)
1176 elif isinstance(other[0],int) and isinstance(other[1],int):
1177 minElements, optElements = other
1178 optElements -= minElements
1179 else:
1180 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1181 else:
1182 raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
1183 else:
1184 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1185
1186 if minElements < 0:
1187 raise ValueError("cannot multiply ParserElement by negative value")
1188 if optElements < 0:
1189 raise ValueError("second tuple value must be greater or equal to first tuple value")
1190 if minElements == optElements == 0:
1191 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1192
1193 if (optElements):
1194 def makeOptionalList(n):
1195 if n>1:
1196 return Optional(self + makeOptionalList(n-1))
1197 else:
1198 return Optional(self)
1199 if minElements:
1200 if minElements == 1:
1201 ret = self + makeOptionalList(optElements)
1202 else:
1203 ret = And([self]*minElements) + makeOptionalList(optElements)
1204 else:
1205 ret = makeOptionalList(optElements)
1206 else:
1207 if minElements == 1:
1208 ret = self
1209 else:
1210 ret = And([self]*minElements)
1211 return ret
1212
1215
1217 """Implementation of | operator - returns MatchFirst"""
1218 if isinstance( other, basestring ):
1219 other = Literal( other )
1220 if not isinstance( other, ParserElement ):
1221 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1222 SyntaxWarning, stacklevel=2)
1223 return None
1224 return MatchFirst( [ self, other ] )
1225
1227 """Implementation of | operator when left operand is not a ParserElement"""
1228 if isinstance( other, basestring ):
1229 other = Literal( other )
1230 if not isinstance( other, ParserElement ):
1231 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1232 SyntaxWarning, stacklevel=2)
1233 return None
1234 return other | self
1235
1237 """Implementation of ^ operator - returns Or"""
1238 if isinstance( other, basestring ):
1239 other = Literal( other )
1240 if not isinstance( other, ParserElement ):
1241 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1242 SyntaxWarning, stacklevel=2)
1243 return None
1244 return Or( [ self, other ] )
1245
1247 """Implementation of ^ operator when left operand is not a ParserElement"""
1248 if isinstance( other, basestring ):
1249 other = Literal( other )
1250 if not isinstance( other, ParserElement ):
1251 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1252 SyntaxWarning, stacklevel=2)
1253 return None
1254 return other ^ self
1255
1257 """Implementation of & operator - returns Each"""
1258 if isinstance( other, basestring ):
1259 other = Literal( other )
1260 if not isinstance( other, ParserElement ):
1261 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1262 SyntaxWarning, stacklevel=2)
1263 return None
1264 return Each( [ self, other ] )
1265
1267 """Implementation of & operator when left operand is not a ParserElement"""
1268 if isinstance( other, basestring ):
1269 other = Literal( other )
1270 if not isinstance( other, ParserElement ):
1271 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1272 SyntaxWarning, stacklevel=2)
1273 return None
1274 return other & self
1275
1277 """Implementation of ~ operator - returns NotAny"""
1278 return NotAny( self )
1279
1281 """Shortcut for setResultsName, with listAllMatches=default::
1282 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1283 could be written as::
1284 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1285 """
1286 return self.setResultsName(name)
1287
1289 """Suppresses the output of this ParserElement; useful to keep punctuation from
1290 cluttering up returned output.
1291 """
1292 return Suppress( self )
1293
1295 """Disables the skipping of whitespace before matching the characters in the
1296 ParserElement's defined pattern. This is normally only used internally by
1297 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1298 """
1299 self.skipWhitespace = False
1300 return self
1301
1303 """Overrides the default whitespace chars
1304 """
1305 self.skipWhitespace = True
1306 self.whiteChars = chars
1307 self.copyDefaultWhiteChars = False
1308 return self
1309
1311 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1312 Must be called before parseString when the input grammar contains elements that
1313 match <TAB> characters."""
1314 self.keepTabs = True
1315 return self
1316
1318 """Define expression to be ignored (e.g., comments) while doing pattern
1319 matching; may be called repeatedly, to define multiple comment or other
1320 ignorable patterns.
1321 """
1322 if isinstance( other, Suppress ):
1323 if other not in self.ignoreExprs:
1324 self.ignoreExprs.append( other )
1325 else:
1326 self.ignoreExprs.append( Suppress( other ) )
1327 return self
1328
1329 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1330 """Enable display of debugging messages while doing pattern matching."""
1331 self.debugActions = (startAction or _defaultStartDebugAction,
1332 successAction or _defaultSuccessDebugAction,
1333 exceptionAction or _defaultExceptionDebugAction)
1334 self.debug = True
1335 return self
1336
1338 """Enable display of debugging messages while doing pattern matching.
1339 Set flag to True to enable, False to disable."""
1340 if flag:
1341 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1342 else:
1343 self.debug = False
1344 return self
1345
1348
1351
1353 self.streamlined = True
1354 self.strRepr = None
1355 return self
1356
1359
1360 - def validate( self, validateTrace=[] ):
1361 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1362 self.checkRecursion( [] )
1363
1365 """Execute the parse expression on the given file or filename.
1366 If a filename is specified (instead of a file object),
1367 the entire file is opened, read, and closed before parsing.
1368 """
1369 try:
1370 file_contents = file_or_filename.read()
1371 except AttributeError:
1372 f = open(file_or_filename, "rb")
1373 file_contents = f.read()
1374 f.close()
1375 return self.parseString(file_contents)
1376
1379
1381 if aname == "myException":
1382 self.myException = ret = self.getException();
1383 return ret;
1384 else:
1385 raise AttributeError("no such attribute " + aname)
1386
1396
1398 return hash(id(self))
1399
1401 return self == other
1402
1403
1404 -class Token(ParserElement):
1405 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1408
1409
1411 s = super(Token,self).setName(name)
1412 self.errmsg = "Expected " + self.name
1413
1414 return s
1415
1416
1418 """An empty token, will always match."""
1420 super(Empty,self).__init__()
1421 self.name = "Empty"
1422 self.mayReturnEmpty = True
1423 self.mayIndexError = False
1424
1425
1427 """A token that will never match."""
1429 super(NoMatch,self).__init__()
1430 self.name = "NoMatch"
1431 self.mayReturnEmpty = True
1432 self.mayIndexError = False
1433 self.errmsg = "Unmatchable token"
1434
1435
1436 - def parseImpl( self, instring, loc, doActions=True ):
1437 exc = self.myException
1438 exc.loc = loc
1439 exc.pstr = instring
1440 raise exc
1441
1442
1444 """Token to exactly match a specified string."""
1446 super(Literal,self).__init__()
1447 self.match = matchString
1448 self.matchLen = len(matchString)
1449 try:
1450 self.firstMatchChar = matchString[0]
1451 except IndexError:
1452 warnings.warn("null string passed to Literal; use Empty() instead",
1453 SyntaxWarning, stacklevel=2)
1454 self.__class__ = Empty
1455 self.name = '"%s"' % _ustr(self.match)
1456 self.errmsg = "Expected " + self.name
1457 self.mayReturnEmpty = False
1458
1459 self.mayIndexError = False
1460
1461
1462
1463
1464
1465 - def parseImpl( self, instring, loc, doActions=True ):
1466 if (instring[loc] == self.firstMatchChar and
1467 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1468 return loc+self.matchLen, self.match
1469
1470 exc = self.myException
1471 exc.loc = loc
1472 exc.pstr = instring
1473 raise exc
1474 _L = Literal
1475
1477 """Token to exactly match a specified string as a keyword, that is, it must be
1478 immediately followed by a non-keyword character. Compare with Literal::
1479 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1480 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1481 Accepts two optional constructor arguments in addition to the keyword string:
1482 identChars is a string of characters that would be valid identifier characters,
1483 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1484 matching, default is False.
1485 """
1486 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1487
1489 super(Keyword,self).__init__()
1490 self.match = matchString
1491 self.matchLen = len(matchString)
1492 try:
1493 self.firstMatchChar = matchString[0]
1494 except IndexError:
1495 warnings.warn("null string passed to Keyword; use Empty() instead",
1496 SyntaxWarning, stacklevel=2)
1497 self.name = '"%s"' % self.match
1498 self.errmsg = "Expected " + self.name
1499 self.mayReturnEmpty = False
1500
1501 self.mayIndexError = False
1502 self.caseless = caseless
1503 if caseless:
1504 self.caselessmatch = matchString.upper()
1505 identChars = identChars.upper()
1506 self.identChars = _str2dict(identChars)
1507
1508 - def parseImpl( self, instring, loc, doActions=True ):
1509 if self.caseless:
1510 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1511 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1512 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1513 return loc+self.matchLen, self.match
1514 else:
1515 if (instring[loc] == self.firstMatchChar and
1516 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1517 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1518 (loc == 0 or instring[loc-1] not in self.identChars) ):
1519 return loc+self.matchLen, self.match
1520
1521 exc = self.myException
1522 exc.loc = loc
1523 exc.pstr = instring
1524 raise exc
1525
1530
1535 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1536
1537
1539 """Token to match a specified string, ignoring case of letters.
1540 Note: the matched results will always be in the case of the given
1541 match string, NOT the case of the input text.
1542 """
1544 super(CaselessLiteral,self).__init__( matchString.upper() )
1545
1546 self.returnString = matchString
1547 self.name = "'%s'" % self.returnString
1548 self.errmsg = "Expected " + self.name
1549
1550
1551 - def parseImpl( self, instring, loc, doActions=True ):
1552 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1553 return loc+self.matchLen, self.returnString
1554
1555 exc = self.myException
1556 exc.loc = loc
1557 exc.pstr = instring
1558 raise exc
1559
1563
1564 - def parseImpl( self, instring, loc, doActions=True ):
1565 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1566 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1567 return loc+self.matchLen, self.match
1568
1569 exc = self.myException
1570 exc.loc = loc
1571 exc.pstr = instring
1572 raise exc
1573
1575 """Token for matching words composed of allowed character sets.
1576 Defined with string containing all allowed initial characters,
1577 an optional string containing allowed body characters (if omitted,
1578 defaults to the initial character set), and an optional minimum,
1579 maximum, and/or exact length. The default value for min is 1 (a
1580 minimum value < 1 is not valid); the default values for max and exact
1581 are 0, meaning no maximum or exact length restriction.
1582 """
1583 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1584 super(Word,self).__init__()
1585 self.initCharsOrig = initChars
1586 self.initChars = _str2dict(initChars)
1587 if bodyChars :
1588 self.bodyCharsOrig = bodyChars
1589 self.bodyChars = _str2dict(bodyChars)
1590 else:
1591 self.bodyCharsOrig = initChars
1592 self.bodyChars = _str2dict(initChars)
1593
1594 self.maxSpecified = max > 0
1595
1596 if min < 1:
1597 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1598
1599 self.minLen = min
1600
1601 if max > 0:
1602 self.maxLen = max
1603 else:
1604 self.maxLen = _MAX_INT
1605
1606 if exact > 0:
1607 self.maxLen = exact
1608 self.minLen = exact
1609
1610 self.name = _ustr(self)
1611 self.errmsg = "Expected " + self.name
1612
1613 self.mayIndexError = False
1614 self.asKeyword = asKeyword
1615
1616 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1617 if self.bodyCharsOrig == self.initCharsOrig:
1618 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1619 elif len(self.bodyCharsOrig) == 1:
1620 self.reString = "%s[%s]*" % \
1621 (re.escape(self.initCharsOrig),
1622 _escapeRegexRangeChars(self.bodyCharsOrig),)
1623 else:
1624 self.reString = "[%s][%s]*" % \
1625 (_escapeRegexRangeChars(self.initCharsOrig),
1626 _escapeRegexRangeChars(self.bodyCharsOrig),)
1627 if self.asKeyword:
1628 self.reString = r"\b"+self.reString+r"\b"
1629 try:
1630 self.re = re.compile( self.reString )
1631 except:
1632 self.re = None
1633
1634 - def parseImpl( self, instring, loc, doActions=True ):
1635 if self.re:
1636 result = self.re.match(instring,loc)
1637 if not result:
1638 exc = self.myException
1639 exc.loc = loc
1640 exc.pstr = instring
1641 raise exc
1642
1643 loc = result.end()
1644 return loc,result.group()
1645
1646 if not(instring[ loc ] in self.initChars):
1647
1648 exc = self.myException
1649 exc.loc = loc
1650 exc.pstr = instring
1651 raise exc
1652 start = loc
1653 loc += 1
1654 instrlen = len(instring)
1655 bodychars = self.bodyChars
1656 maxloc = start + self.maxLen
1657 maxloc = min( maxloc, instrlen )
1658 while loc < maxloc and instring[loc] in bodychars:
1659 loc += 1
1660
1661 throwException = False
1662 if loc - start < self.minLen:
1663 throwException = True
1664 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1665 throwException = True
1666 if self.asKeyword:
1667 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1668 throwException = True
1669
1670 if throwException:
1671
1672 exc = self.myException
1673 exc.loc = loc
1674 exc.pstr = instring
1675 raise exc
1676
1677 return loc, instring[start:loc]
1678
1680 try:
1681 return super(Word,self).__str__()
1682 except:
1683 pass
1684
1685
1686 if self.strRepr is None:
1687
1688 def charsAsStr(s):
1689 if len(s)>4:
1690 return s[:4]+"..."
1691 else:
1692 return s
1693
1694 if ( self.initCharsOrig != self.bodyCharsOrig ):
1695 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1696 else:
1697 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1698
1699 return self.strRepr
1700
1701
1703 """Token for matching strings that match a given regular expression.
1704 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1705 """
1706 - def __init__( self, pattern, flags=0):
1707 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1708 super(Regex,self).__init__()
1709
1710 if len(pattern) == 0:
1711 warnings.warn("null string passed to Regex; use Empty() instead",
1712 SyntaxWarning, stacklevel=2)
1713
1714 self.pattern = pattern
1715 self.flags = flags
1716
1717 try:
1718 self.re = re.compile(self.pattern, self.flags)
1719 self.reString = self.pattern
1720 except sre_constants.error:
1721 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1722 SyntaxWarning, stacklevel=2)
1723 raise
1724
1725 self.name = _ustr(self)
1726 self.errmsg = "Expected " + self.name
1727
1728 self.mayIndexError = False
1729 self.mayReturnEmpty = True
1730
1731 - def parseImpl( self, instring, loc, doActions=True ):
1732 result = self.re.match(instring,loc)
1733 if not result:
1734 exc = self.myException
1735 exc.loc = loc
1736 exc.pstr = instring
1737 raise exc
1738
1739 loc = result.end()
1740 d = result.groupdict()
1741 ret = ParseResults(result.group())
1742 if d:
1743 for k in d:
1744 ret[k] = d[k]
1745 return loc,ret
1746
1748 try:
1749 return super(Regex,self).__str__()
1750 except:
1751 pass
1752
1753 if self.strRepr is None:
1754 self.strRepr = "Re:(%s)" % repr(self.pattern)
1755
1756 return self.strRepr
1757
1758
1760 """Token for matching strings that are delimited by quoting characters.
1761 """
1762 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1763 """
1764 Defined with the following parameters:
1765 - quoteChar - string of one or more characters defining the quote delimiting string
1766 - escChar - character to escape quotes, typically backslash (default=None)
1767 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1768 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1769 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1770 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1771 """
1772 super(QuotedString,self).__init__()
1773
1774
1775 quoteChar = quoteChar.strip()
1776 if len(quoteChar) == 0:
1777 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1778 raise SyntaxError()
1779
1780 if endQuoteChar is None:
1781 endQuoteChar = quoteChar
1782 else:
1783 endQuoteChar = endQuoteChar.strip()
1784 if len(endQuoteChar) == 0:
1785 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1786 raise SyntaxError()
1787
1788 self.quoteChar = quoteChar
1789 self.quoteCharLen = len(quoteChar)
1790 self.firstQuoteChar = quoteChar[0]
1791 self.endQuoteChar = endQuoteChar
1792 self.endQuoteCharLen = len(endQuoteChar)
1793 self.escChar = escChar
1794 self.escQuote = escQuote
1795 self.unquoteResults = unquoteResults
1796
1797 if multiline:
1798 self.flags = re.MULTILINE | re.DOTALL
1799 self.pattern = r'%s(?:[^%s%s]' % \
1800 ( re.escape(self.quoteChar),
1801 _escapeRegexRangeChars(self.endQuoteChar[0]),
1802 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1803 else:
1804 self.flags = 0
1805 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1806 ( re.escape(self.quoteChar),
1807 _escapeRegexRangeChars(self.endQuoteChar[0]),
1808 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1809 if len(self.endQuoteChar) > 1:
1810 self.pattern += (
1811 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1812 _escapeRegexRangeChars(self.endQuoteChar[i]))
1813 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1814 )
1815 if escQuote:
1816 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1817 if escChar:
1818 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1819 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1820 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1821
1822 try:
1823 self.re = re.compile(self.pattern, self.flags)
1824 self.reString = self.pattern
1825 except sre_constants.error:
1826 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1827 SyntaxWarning, stacklevel=2)
1828 raise
1829
1830 self.name = _ustr(self)
1831 self.errmsg = "Expected " + self.name
1832
1833 self.mayIndexError = False
1834 self.mayReturnEmpty = True
1835
1836 - def parseImpl( self, instring, loc, doActions=True ):
1837 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1838 if not result:
1839 exc = self.myException
1840 exc.loc = loc
1841 exc.pstr = instring
1842 raise exc
1843
1844 loc = result.end()
1845 ret = result.group()
1846
1847 if self.unquoteResults:
1848
1849
1850 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1851
1852 if isinstance(ret,basestring):
1853
1854 if self.escChar:
1855 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1856
1857
1858 if self.escQuote:
1859 ret = ret.replace(self.escQuote, self.endQuoteChar)
1860
1861 return loc, ret
1862
1864 try:
1865 return super(QuotedString,self).__str__()
1866 except:
1867 pass
1868
1869 if self.strRepr is None:
1870 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1871
1872 return self.strRepr
1873
1874
1876 """Token for matching words composed of characters *not* in a given set.
1877 Defined with string containing all disallowed characters, and an optional
1878 minimum, maximum, and/or exact length. The default value for min is 1 (a
1879 minimum value < 1 is not valid); the default values for max and exact
1880 are 0, meaning no maximum or exact length restriction.
1881 """
1882 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1883 super(CharsNotIn,self).__init__()
1884 self.skipWhitespace = False
1885 self.notChars = notChars
1886
1887 if min < 1:
1888 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1889
1890 self.minLen = min
1891
1892 if max > 0:
1893 self.maxLen = max
1894 else:
1895 self.maxLen = _MAX_INT
1896
1897 if exact > 0:
1898 self.maxLen = exact
1899 self.minLen = exact
1900
1901 self.name = _ustr(self)
1902 self.errmsg = "Expected " + self.name
1903 self.mayReturnEmpty = ( self.minLen == 0 )
1904
1905 self.mayIndexError = False
1906
1907 - def parseImpl( self, instring, loc, doActions=True ):
1908 if instring[loc] in self.notChars:
1909
1910 exc = self.myException
1911 exc.loc = loc
1912 exc.pstr = instring
1913 raise exc
1914
1915 start = loc
1916 loc += 1
1917 notchars = self.notChars
1918 maxlen = min( start+self.maxLen, len(instring) )
1919 while loc < maxlen and \
1920 (instring[loc] not in notchars):
1921 loc += 1
1922
1923 if loc - start < self.minLen:
1924
1925 exc = self.myException
1926 exc.loc = loc
1927 exc.pstr = instring
1928 raise exc
1929
1930 return loc, instring[start:loc]
1931
1933 try:
1934 return super(CharsNotIn, self).__str__()
1935 except:
1936 pass
1937
1938 if self.strRepr is None:
1939 if len(self.notChars) > 4:
1940 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1941 else:
1942 self.strRepr = "!W:(%s)" % self.notChars
1943
1944 return self.strRepr
1945
1947 """Special matching class for matching whitespace. Normally, whitespace is ignored
1948 by pyparsing grammars. This class is included when some whitespace structures
1949 are significant. Define with a string containing the whitespace characters to be
1950 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
1951 as defined for the Word class."""
1952 whiteStrs = {
1953 " " : "<SPC>",
1954 "\t": "<TAB>",
1955 "\n": "<LF>",
1956 "\r": "<CR>",
1957 "\f": "<FF>",
1958 }
1959 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1960 super(White,self).__init__()
1961 self.matchWhite = ws
1962 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
1963
1964 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1965 self.mayReturnEmpty = True
1966 self.errmsg = "Expected " + self.name
1967
1968
1969 self.minLen = min
1970
1971 if max > 0:
1972 self.maxLen = max
1973 else:
1974 self.maxLen = _MAX_INT
1975
1976 if exact > 0:
1977 self.maxLen = exact
1978 self.minLen = exact
1979
1980 - def parseImpl( self, instring, loc, doActions=True ):
1981 if not(instring[ loc ] in self.matchWhite):
1982
1983 exc = self.myException
1984 exc.loc = loc
1985 exc.pstr = instring
1986 raise exc
1987 start = loc
1988 loc += 1
1989 maxloc = start + self.maxLen
1990 maxloc = min( maxloc, len(instring) )
1991 while loc < maxloc and instring[loc] in self.matchWhite:
1992 loc += 1
1993
1994 if loc - start < self.minLen:
1995
1996 exc = self.myException
1997 exc.loc = loc
1998 exc.pstr = instring
1999 raise exc
2000
2001 return loc, instring[start:loc]
2002
2003
2006 super(_PositionToken,self).__init__()
2007 self.name=self.__class__.__name__
2008 self.mayReturnEmpty = True
2009 self.mayIndexError = False
2010
2012 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2016
2018 if col(loc,instring) != self.col:
2019 instrlen = len(instring)
2020 if self.ignoreExprs:
2021 loc = self._skipIgnorables( instring, loc )
2022 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2023 loc += 1
2024 return loc
2025
2026 - def parseImpl( self, instring, loc, doActions=True ):
2027 thiscol = col( loc, instring )
2028 if thiscol > self.col:
2029 raise ParseException( instring, loc, "Text not in expected column", self )
2030 newloc = loc + self.col - thiscol
2031 ret = instring[ loc: newloc ]
2032 return newloc, ret
2033
2035 """Matches if current position is at the beginning of a line within the parse string"""
2040
2041
2047
2048 - def parseImpl( self, instring, loc, doActions=True ):
2049 if not( loc==0 or
2050 (loc == self.preParse( instring, 0 )) or
2051 (instring[loc-1] == "\n") ):
2052
2053 exc = self.myException
2054 exc.loc = loc
2055 exc.pstr = instring
2056 raise exc
2057 return loc, []
2058
2060 """Matches if current position is at the end of a line within the parse string"""
2065
2066
2067 - def parseImpl( self, instring, loc, doActions=True ):
2068 if loc<len(instring):
2069 if instring[loc] == "\n":
2070 return loc+1, "\n"
2071 else:
2072
2073 exc = self.myException
2074 exc.loc = loc
2075 exc.pstr = instring
2076 raise exc
2077 elif loc == len(instring):
2078 return loc+1, []
2079 else:
2080 exc = self.myException
2081 exc.loc = loc
2082 exc.pstr = instring
2083 raise exc
2084
2086 """Matches if current position is at the beginning of the parse string"""
2090
2091
2092 - def parseImpl( self, instring, loc, doActions=True ):
2093 if loc != 0:
2094
2095 if loc != self.preParse( instring, 0 ):
2096
2097 exc = self.myException
2098 exc.loc = loc
2099 exc.pstr = instring
2100 raise exc
2101 return loc, []
2102
2104 """Matches if current position is at the end of the parse string"""
2108
2109
2110 - def parseImpl( self, instring, loc, doActions=True ):
2111 if loc < len(instring):
2112
2113 exc = self.myException
2114 exc.loc = loc
2115 exc.pstr = instring
2116 raise exc
2117 elif loc == len(instring):
2118 return loc+1, []
2119 elif loc > len(instring):
2120 return loc, []
2121 else:
2122 exc = self.myException
2123 exc.loc = loc
2124 exc.pstr = instring
2125 raise exc
2126
2128 """Matches if the current position is at the beginning of a Word, and
2129 is not preceded by any character in a given set of wordChars
2130 (default=printables). To emulate the \b behavior of regular expressions,
2131 use WordStart(alphanums). WordStart will also match at the beginning of
2132 the string being parsed, or at the beginning of a line.
2133 """
2135 super(WordStart,self).__init__()
2136 self.wordChars = _str2dict(wordChars)
2137 self.errmsg = "Not at the start of a word"
2138
2139 - def parseImpl(self, instring, loc, doActions=True ):
2140 if loc != 0:
2141 if (instring[loc-1] in self.wordChars or
2142 instring[loc] not in self.wordChars):
2143 exc = self.myException
2144 exc.loc = loc
2145 exc.pstr = instring
2146 raise exc
2147 return loc, []
2148
2150 """Matches if the current position is at the end of a Word, and
2151 is not followed by any character in a given set of wordChars
2152 (default=printables). To emulate the \b behavior of regular expressions,
2153 use WordEnd(alphanums). WordEnd will also match at the end of
2154 the string being parsed, or at the end of a line.
2155 """
2157 super(WordEnd,self).__init__()
2158 self.wordChars = _str2dict(wordChars)
2159 self.skipWhitespace = False
2160 self.errmsg = "Not at the end of a word"
2161
2162 - def parseImpl(self, instring, loc, doActions=True ):
2163 instrlen = len(instring)
2164 if instrlen>0 and loc<instrlen:
2165 if (instring[loc] in self.wordChars or
2166 instring[loc-1] not in self.wordChars):
2167
2168 exc = self.myException
2169 exc.loc = loc
2170 exc.pstr = instring
2171 raise exc
2172 return loc, []
2173
2174
2176 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2177 - def __init__( self, exprs, savelist = False ):
2178 super(ParseExpression,self).__init__(savelist)
2179 if isinstance( exprs, list ):
2180 self.exprs = exprs
2181 elif isinstance( exprs, basestring ):
2182 self.exprs = [ Literal( exprs ) ]
2183 else:
2184 self.exprs = [ exprs ]
2185 self.callPreparse = False
2186
2188 return self.exprs[i]
2189
2191 self.exprs.append( other )
2192 self.strRepr = None
2193 return self
2194
2196 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2197 all contained expressions."""
2198 self.skipWhitespace = False
2199 self.exprs = [ e.copy() for e in self.exprs ]
2200 for e in self.exprs:
2201 e.leaveWhitespace()
2202 return self
2203
2205 if isinstance( other, Suppress ):
2206 if other not in self.ignoreExprs:
2207 super( ParseExpression, self).ignore( other )
2208 for e in self.exprs:
2209 e.ignore( self.ignoreExprs[-1] )
2210 else:
2211 super( ParseExpression, self).ignore( other )
2212 for e in self.exprs:
2213 e.ignore( self.ignoreExprs[-1] )
2214 return self
2215
2217 try:
2218 return super(ParseExpression,self).__str__()
2219 except:
2220 pass
2221
2222 if self.strRepr is None:
2223 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2224 return self.strRepr
2225
2227 super(ParseExpression,self).streamline()
2228
2229 for e in self.exprs:
2230 e.streamline()
2231
2232
2233
2234
2235 if ( len(self.exprs) == 2 ):
2236 other = self.exprs[0]
2237 if ( isinstance( other, self.__class__ ) and
2238 not(other.parseAction) and
2239 other.resultsName is None and
2240 not other.debug ):
2241 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2242 self.strRepr = None
2243 self.mayReturnEmpty |= other.mayReturnEmpty
2244 self.mayIndexError |= other.mayIndexError
2245
2246 other = self.exprs[-1]
2247 if ( isinstance( other, self.__class__ ) and
2248 not(other.parseAction) and
2249 other.resultsName is None and
2250 not other.debug ):
2251 self.exprs = self.exprs[:-1] + other.exprs[:]
2252 self.strRepr = None
2253 self.mayReturnEmpty |= other.mayReturnEmpty
2254 self.mayIndexError |= other.mayIndexError
2255
2256 return self
2257
2261
2262 - def validate( self, validateTrace=[] ):
2263 tmp = validateTrace[:]+[self]
2264 for e in self.exprs:
2265 e.validate(tmp)
2266 self.checkRecursion( [] )
2267
2268 -class And(ParseExpression):
2269 """Requires all given ParseExpressions to be found in the given order.
2270 Expressions may be separated by whitespace.
2271 May be constructed using the '+' operator.
2272 """
2273
2276 return And._ErrorStop.instance
2277 _ErrorStop.instance = Empty()
2278 _ErrorStop.instance.leaveWhitespace()
2279
2280 - def __init__( self, exprs, savelist = True ):
2281 super(And,self).__init__(exprs, savelist)
2282 self.mayReturnEmpty = True
2283 for e in self.exprs:
2284 if not e.mayReturnEmpty:
2285 self.mayReturnEmpty = False
2286 break
2287 self.setWhitespaceChars( exprs[0].whiteChars )
2288 self.skipWhitespace = exprs[0].skipWhitespace
2289 self.callPreparse = True
2290
2291 - def parseImpl( self, instring, loc, doActions=True ):
2292
2293
2294 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2295 errorStop = False
2296 for e in self.exprs[1:]:
2297 if e is And._ErrorStop.instance:
2298 errorStop = True
2299 continue
2300 if errorStop:
2301 try:
2302 loc, exprtokens = e._parse( instring, loc, doActions )
2303 except ParseBaseException, pe:
2304 raise ParseSyntaxException(pe)
2305 except IndexError, ie:
2306 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2307 else:
2308 loc, exprtokens = e._parse( instring, loc, doActions )
2309 if exprtokens or exprtokens.keys():
2310 resultlist += exprtokens
2311 return loc, resultlist
2312
2314 if isinstance( other, basestring ):
2315 other = Literal( other )
2316 return self.append( other )
2317
2319 subRecCheckList = parseElementList[:] + [ self ]
2320 for e in self.exprs:
2321 e.checkRecursion( subRecCheckList )
2322 if not e.mayReturnEmpty:
2323 break
2324
2326 if hasattr(self,"name"):
2327 return self.name
2328
2329 if self.strRepr is None:
2330 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2331
2332 return self.strRepr
2333
2334
2335 -class Or(ParseExpression):
2336 """Requires that at least one ParseExpression is found.
2337 If two expressions match, the expression that matches the longest string will be used.
2338 May be constructed using the '^' operator.
2339 """
2340 - def __init__( self, exprs, savelist = False ):
2341 super(Or,self).__init__(exprs, savelist)
2342 self.mayReturnEmpty = False
2343 for e in self.exprs:
2344 if e.mayReturnEmpty:
2345 self.mayReturnEmpty = True
2346 break
2347
2348 - def parseImpl( self, instring, loc, doActions=True ):
2349 maxExcLoc = -1
2350 maxMatchLoc = -1
2351 maxException = None
2352 for e in self.exprs:
2353 try:
2354 loc2 = e.tryParse( instring, loc )
2355 except ParseException, err:
2356 if err.loc > maxExcLoc:
2357 maxException = err
2358 maxExcLoc = err.loc
2359 except IndexError:
2360 if len(instring) > maxExcLoc:
2361 maxException = ParseException(instring,len(instring),e.errmsg,self)
2362 maxExcLoc = len(instring)
2363 else:
2364 if loc2 > maxMatchLoc:
2365 maxMatchLoc = loc2
2366 maxMatchExp = e
2367
2368 if maxMatchLoc < 0:
2369 if maxException is not None:
2370 raise maxException
2371 else:
2372 raise ParseException(instring, loc, "no defined alternatives to match", self)
2373
2374 return maxMatchExp._parse( instring, loc, doActions )
2375
2377 if isinstance( other, basestring ):
2378 other = Literal( other )
2379 return self.append( other )
2380
2382 if hasattr(self,"name"):
2383 return self.name
2384
2385 if self.strRepr is None:
2386 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2387
2388 return self.strRepr
2389
2391 subRecCheckList = parseElementList[:] + [ self ]
2392 for e in self.exprs:
2393 e.checkRecursion( subRecCheckList )
2394
2395
2397 """Requires that at least one ParseExpression is found.
2398 If two expressions match, the first one listed is the one that will match.
2399 May be constructed using the '|' operator.
2400 """
2401 - def __init__( self, exprs, savelist = False ):
2402 super(MatchFirst,self).__init__(exprs, savelist)
2403 if exprs:
2404 self.mayReturnEmpty = False
2405 for e in self.exprs:
2406 if e.mayReturnEmpty:
2407 self.mayReturnEmpty = True
2408 break
2409 else:
2410 self.mayReturnEmpty = True
2411
2412 - def parseImpl( self, instring, loc, doActions=True ):
2413 maxExcLoc = -1
2414 maxException = None
2415 for e in self.exprs:
2416 try:
2417 ret = e._parse( instring, loc, doActions )
2418 return ret
2419 except ParseException, err:
2420 if err.loc > maxExcLoc:
2421 maxException = err
2422 maxExcLoc = err.loc
2423 except IndexError:
2424 if len(instring) > maxExcLoc:
2425 maxException = ParseException(instring,len(instring),e.errmsg,self)
2426 maxExcLoc = len(instring)
2427
2428
2429 else:
2430 if maxException is not None:
2431 raise maxException
2432 else:
2433 raise ParseException(instring, loc, "no defined alternatives to match", self)
2434
2436 if isinstance( other, basestring ):
2437 other = Literal( other )
2438 return self.append( other )
2439
2441 if hasattr(self,"name"):
2442 return self.name
2443
2444 if self.strRepr is None:
2445 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2446
2447 return self.strRepr
2448
2450 subRecCheckList = parseElementList[:] + [ self ]
2451 for e in self.exprs:
2452 e.checkRecursion( subRecCheckList )
2453
2454
2455 -class Each(ParseExpression):
2456 """Requires all given ParseExpressions to be found, but in any order.
2457 Expressions may be separated by whitespace.
2458 May be constructed using the '&' operator.
2459 """
2460 - def __init__( self, exprs, savelist = True ):
2461 super(Each,self).__init__(exprs, savelist)
2462 self.mayReturnEmpty = True
2463 for e in self.exprs:
2464 if not e.mayReturnEmpty:
2465 self.mayReturnEmpty = False
2466 break
2467 self.skipWhitespace = True
2468 self.initExprGroups = True
2469
2470 - def parseImpl( self, instring, loc, doActions=True ):
2471 if self.initExprGroups:
2472 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2473 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2474 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2475 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2476 self.required += self.multirequired
2477 self.initExprGroups = False
2478 tmpLoc = loc
2479 tmpReqd = self.required[:]
2480 tmpOpt = self.optionals[:]
2481 matchOrder = []
2482
2483 keepMatching = True
2484 while keepMatching:
2485 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2486 failed = []
2487 for e in tmpExprs:
2488 try:
2489 tmpLoc = e.tryParse( instring, tmpLoc )
2490 except ParseException:
2491 failed.append(e)
2492 else:
2493 matchOrder.append(e)
2494 if e in tmpReqd:
2495 tmpReqd.remove(e)
2496 elif e in tmpOpt:
2497 tmpOpt.remove(e)
2498 if len(failed) == len(tmpExprs):
2499 keepMatching = False
2500
2501 if tmpReqd:
2502 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2503 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2504
2505
2506 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
2507
2508 resultlist = []
2509 for e in matchOrder:
2510 loc,results = e._parse(instring,loc,doActions)
2511 resultlist.append(results)
2512
2513 finalResults = ParseResults([])
2514 for r in resultlist:
2515 dups = {}
2516 for k in r.keys():
2517 if k in finalResults.keys():
2518 tmp = ParseResults(finalResults[k])
2519 tmp += ParseResults(r[k])
2520 dups[k] = tmp
2521 finalResults += ParseResults(r)
2522 for k,v in dups.items():
2523 finalResults[k] = v
2524 return loc, finalResults
2525
2527 if hasattr(self,"name"):
2528 return self.name
2529
2530 if self.strRepr is None:
2531 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2532
2533 return self.strRepr
2534
2536 subRecCheckList = parseElementList[:] + [ self ]
2537 for e in self.exprs:
2538 e.checkRecursion( subRecCheckList )
2539
2540
2542 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2543 - def __init__( self, expr, savelist=False ):
2544 super(ParseElementEnhance,self).__init__(savelist)
2545 if isinstance( expr, basestring ):
2546 expr = Literal(expr)
2547 self.expr = expr
2548 self.strRepr = None
2549 if expr is not None:
2550 self.mayIndexError = expr.mayIndexError
2551 self.mayReturnEmpty = expr.mayReturnEmpty
2552 self.setWhitespaceChars( expr.whiteChars )
2553 self.skipWhitespace = expr.skipWhitespace
2554 self.saveAsList = expr.saveAsList
2555 self.callPreparse = expr.callPreparse
2556 self.ignoreExprs.extend(expr.ignoreExprs)
2557
2558 - def parseImpl( self, instring, loc, doActions=True ):
2559 if self.expr is not None:
2560 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2561 else:
2562 raise ParseException("",loc,self.errmsg,self)
2563
2565 self.skipWhitespace = False
2566 self.expr = self.expr.copy()
2567 if self.expr is not None:
2568 self.expr.leaveWhitespace()
2569 return self
2570
2572 if isinstance( other, Suppress ):
2573 if other not in self.ignoreExprs:
2574 super( ParseElementEnhance, self).ignore( other )
2575 if self.expr is not None:
2576 self.expr.ignore( self.ignoreExprs[-1] )
2577 else:
2578 super( ParseElementEnhance, self).ignore( other )
2579 if self.expr is not None:
2580 self.expr.ignore( self.ignoreExprs[-1] )
2581 return self
2582
2588
2590 if self in parseElementList:
2591 raise RecursiveGrammarException( parseElementList+[self] )
2592 subRecCheckList = parseElementList[:] + [ self ]
2593 if self.expr is not None:
2594 self.expr.checkRecursion( subRecCheckList )
2595
2596 - def validate( self, validateTrace=[] ):
2597 tmp = validateTrace[:]+[self]
2598 if self.expr is not None:
2599 self.expr.validate(tmp)
2600 self.checkRecursion( [] )
2601
2603 try:
2604 return super(ParseElementEnhance,self).__str__()
2605 except:
2606 pass
2607
2608 if self.strRepr is None and self.expr is not None:
2609 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2610 return self.strRepr
2611
2612
2614 """Lookahead matching of the given parse expression. FollowedBy
2615 does *not* advance the parsing position within the input string, it only
2616 verifies that the specified parse expression matches at the current
2617 position. FollowedBy always returns a null token list."""
2621
2622 - def parseImpl( self, instring, loc, doActions=True ):
2625
2626
2627 -class NotAny(ParseElementEnhance):
2628 """Lookahead to disallow matching with the given parse expression. NotAny
2629 does *not* advance the parsing position within the input string, it only
2630 verifies that the specified parse expression does *not* match at the current
2631 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2632 always returns a null token list. May be constructed using the '~' operator."""
2634 super(NotAny,self).__init__(expr)
2635
2636 self.skipWhitespace = False
2637 self.mayReturnEmpty = True
2638 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2639
2640
2641 - def parseImpl( self, instring, loc, doActions=True ):
2642 try:
2643 self.expr.tryParse( instring, loc )
2644 except (ParseException,IndexError):
2645 pass
2646 else:
2647
2648 exc = self.myException
2649 exc.loc = loc
2650 exc.pstr = instring
2651 raise exc
2652 return loc, []
2653
2655 if hasattr(self,"name"):
2656 return self.name
2657
2658 if self.strRepr is None:
2659 self.strRepr = "~{" + _ustr(self.expr) + "}"
2660
2661 return self.strRepr
2662
2663
2665 """Optional repetition of zero or more of the given expression."""
2669
2670 - def parseImpl( self, instring, loc, doActions=True ):
2671 tokens = []
2672 try:
2673 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2674 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2675 while 1:
2676 if hasIgnoreExprs:
2677 preloc = self._skipIgnorables( instring, loc )
2678 else:
2679 preloc = loc
2680 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2681 if tmptokens or tmptokens.keys():
2682 tokens += tmptokens
2683 except (ParseException,IndexError):
2684 pass
2685
2686 return loc, tokens
2687
2689 if hasattr(self,"name"):
2690 return self.name
2691
2692 if self.strRepr is None:
2693 self.strRepr = "[" + _ustr(self.expr) + "]..."
2694
2695 return self.strRepr
2696
2701
2702
2704 """Repetition of one or more of the given expression."""
2705 - def parseImpl( self, instring, loc, doActions=True ):
2706
2707 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2708 try:
2709 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2710 while 1:
2711 if hasIgnoreExprs:
2712 preloc = self._skipIgnorables( instring, loc )
2713 else:
2714 preloc = loc
2715 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2716 if tmptokens or tmptokens.keys():
2717 tokens += tmptokens
2718 except (ParseException,IndexError):
2719 pass
2720
2721 return loc, tokens
2722
2724 if hasattr(self,"name"):
2725 return self.name
2726
2727 if self.strRepr is None:
2728 self.strRepr = "{" + _ustr(self.expr) + "}..."
2729
2730 return self.strRepr
2731
2736
2743
2744 _optionalNotMatched = _NullToken()
2746 """Optional matching of the given expression.
2747 A default return string can also be specified, if the optional expression
2748 is not found.
2749 """
2751 super(Optional,self).__init__( exprs, savelist=False )
2752 self.defaultValue = default
2753 self.mayReturnEmpty = True
2754
2755 - def parseImpl( self, instring, loc, doActions=True ):
2756 try:
2757 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2758 except (ParseException,IndexError):
2759 if self.defaultValue is not _optionalNotMatched:
2760 if self.expr.resultsName:
2761 tokens = ParseResults([ self.defaultValue ])
2762 tokens[self.expr.resultsName] = self.defaultValue
2763 else:
2764 tokens = [ self.defaultValue ]
2765 else:
2766 tokens = []
2767 return loc, tokens
2768
2770 if hasattr(self,"name"):
2771 return self.name
2772
2773 if self.strRepr is None:
2774 self.strRepr = "[" + _ustr(self.expr) + "]"
2775
2776 return self.strRepr
2777
2778
2779 -class SkipTo(ParseElementEnhance):
2780 """Token for skipping over all undefined text until the matched expression is found.
2781 If include is set to true, the matched expression is also consumed. The ignore
2782 argument is used to define grammars (typically quoted strings and comments) that
2783 might contain false matches.
2784 """
2785 - def __init__( self, other, include=False, ignore=None ):
2786 super( SkipTo, self ).__init__( other )
2787 if ignore is not None:
2788 self.expr = self.expr.copy()
2789 self.expr.ignore(ignore)
2790 self.mayReturnEmpty = True
2791 self.mayIndexError = False
2792 self.includeMatch = include
2793 self.asList = False
2794 self.errmsg = "No match found for "+_ustr(self.expr)
2795
2796
2797 - def parseImpl( self, instring, loc, doActions=True ):
2798 startLoc = loc
2799 instrlen = len(instring)
2800 expr = self.expr
2801 while loc <= instrlen:
2802 try:
2803 loc = expr._skipIgnorables( instring, loc )
2804 expr._parse( instring, loc, doActions=False, callPreParse=False )
2805 if self.includeMatch:
2806 skipText = instring[startLoc:loc]
2807 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2808 if mat:
2809 skipRes = ParseResults( skipText )
2810 skipRes += mat
2811 return loc, [ skipRes ]
2812 else:
2813 return loc, [ skipText ]
2814 else:
2815 return loc, [ instring[startLoc:loc] ]
2816 except (ParseException,IndexError):
2817 loc += 1
2818 exc = self.myException
2819 exc.loc = loc
2820 exc.pstr = instring
2821 raise exc
2822
2823 -class Forward(ParseElementEnhance):
2824 """Forward declaration of an expression to be defined later -
2825 used for recursive grammars, such as algebraic infix notation.
2826 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2827
2828 Note: take care when assigning to Forward not to overlook precedence of operators.
2829 Specifically, '|' has a lower precedence than '<<', so that::
2830 fwdExpr << a | b | c
2831 will actually be evaluated as::
2832 (fwdExpr << a) | b | c
2833 thereby leaving b and c out as parseable alternatives. It is recommended that you
2834 explicitly group the values inserted into the Forward::
2835 fwdExpr << (a | b | c)
2836 """
2839
2841 if isinstance( other, basestring ):
2842 other = Literal(other)
2843 self.expr = other
2844 self.mayReturnEmpty = other.mayReturnEmpty
2845 self.strRepr = None
2846 self.mayIndexError = self.expr.mayIndexError
2847 self.mayReturnEmpty = self.expr.mayReturnEmpty
2848 self.setWhitespaceChars( self.expr.whiteChars )
2849 self.skipWhitespace = self.expr.skipWhitespace
2850 self.saveAsList = self.expr.saveAsList
2851 self.ignoreExprs.extend(self.expr.ignoreExprs)
2852 return None
2853
2855 self.skipWhitespace = False
2856 return self
2857
2859 if not self.streamlined:
2860 self.streamlined = True
2861 if self.expr is not None:
2862 self.expr.streamline()
2863 return self
2864
2865 - def validate( self, validateTrace=[] ):
2866 if self not in validateTrace:
2867 tmp = validateTrace[:]+[self]
2868 if self.expr is not None:
2869 self.expr.validate(tmp)
2870 self.checkRecursion([])
2871
2873 if hasattr(self,"name"):
2874 return self.name
2875
2876 self.__class__ = _ForwardNoRecurse
2877 try:
2878 if self.expr is not None:
2879 retString = _ustr(self.expr)
2880 else:
2881 retString = "None"
2882 finally:
2883 self.__class__ = Forward
2884 return "Forward: "+retString
2885
2887 if self.expr is not None:
2888 return super(Forward,self).copy()
2889 else:
2890 ret = Forward()
2891 ret << self
2892 return ret
2893
2897
2899 """Abstract subclass of ParseExpression, for converting parsed results."""
2900 - def __init__( self, expr, savelist=False ):
2903
2904 -class Upcase(TokenConverter):
2905 """Converter to upper case all matching tokens."""
2907 super(Upcase,self).__init__(*args)
2908 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2909 DeprecationWarning,stacklevel=2)
2910
2911 - def postParse( self, instring, loc, tokenlist ):
2912 return list(map( string.upper, tokenlist ))
2913
2914
2916 """Converter to concatenate all matching tokens to a single string.
2917 By default, the matching patterns must also be contiguous in the input string;
2918 this can be disabled by specifying 'adjacent=False' in the constructor.
2919 """
2920 - def __init__( self, expr, joinString="", adjacent=True ):
2921 super(Combine,self).__init__( expr )
2922
2923 if adjacent:
2924 self.leaveWhitespace()
2925 self.adjacent = adjacent
2926 self.skipWhitespace = True
2927 self.joinString = joinString
2928
2935
2936 - def postParse( self, instring, loc, tokenlist ):
2937 retToks = tokenlist.copy()
2938 del retToks[:]
2939 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2940
2941 if self.resultsName and len(retToks.keys())>0:
2942 return [ retToks ]
2943 else:
2944 return retToks
2945
2946 -class Group(TokenConverter):
2947 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2949 super(Group,self).__init__( expr )
2950 self.saveAsList = True
2951
2952 - def postParse( self, instring, loc, tokenlist ):
2953 return [ tokenlist ]
2954
2955 -class Dict(TokenConverter):
2956 """Converter to return a repetitive expression as a list, but also as a dictionary.
2957 Each element can also be referenced using the first token in the expression as its key.
2958 Useful for tabular report scraping when the first column can be used as a item key.
2959 """
2961 super(Dict,self).__init__( exprs )
2962 self.saveAsList = True
2963
2964 - def postParse( self, instring, loc, tokenlist ):
2965 for i,tok in enumerate(tokenlist):
2966 if len(tok) == 0:
2967 continue
2968 ikey = tok[0]
2969 if isinstance(ikey,int):
2970 ikey = _ustr(tok[0]).strip()
2971 if len(tok)==1:
2972 tokenlist[ikey] = _ParseResultsWithOffset("",i)
2973 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
2974 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
2975 else:
2976 dictvalue = tok.copy()
2977 del dictvalue[0]
2978 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
2979 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
2980 else:
2981 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
2982
2983 if self.resultsName:
2984 return [ tokenlist ]
2985 else:
2986 return tokenlist
2987
2988
2990 """Converter for ignoring the results of a parsed expression."""
2991 - def postParse( self, instring, loc, tokenlist ):
2993
2996
2997
2999 """Wrapper for parse actions, to ensure they are only called once."""
3001 self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3002 self.called = False
3004 if not self.called:
3005 results = self.callable(s,l,t)
3006 self.called = True
3007 return results
3008 raise ParseException(s,l,"")
3011
3013 """Decorator for debugging parse actions."""
3014 f = ParserElement._normalizeParseActionArgs(f)
3015 def z(*paArgs):
3016 thisFunc = f.func_name
3017 s,l,t = paArgs[-3:]
3018 if len(paArgs)>3:
3019 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3020 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3021 try:
3022 ret = f(*paArgs)
3023 except Exception, exc:
3024 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3025 raise
3026 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3027 return ret
3028 try:
3029 z.__name__ = f.__name__
3030 except AttributeError:
3031 pass
3032 return z
3033
3034
3035
3036
3038 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3039 By default, the list elements and delimiters can have intervening whitespace, and
3040 comments, but this can be overridden by passing 'combine=True' in the constructor.
3041 If combine is set to True, the matching tokens are returned as a single token
3042 string, with the delimiters included; otherwise, the matching tokens are returned
3043 as a list of tokens, with the delimiters suppressed.
3044 """
3045 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3046 if combine:
3047 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3048 else:
3049 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3050
3052 """Helper to define a counted list of expressions.
3053 This helper defines a pattern of the form::
3054 integer expr expr expr...
3055 where the leading integer tells how many expr expressions follow.
3056 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3057 """
3058 arrayExpr = Forward()
3059 def countFieldParseAction(s,l,t):
3060 n = int(t[0])
3061 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3062 return []
3063 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3064
3066 if type(L) is not list: return [L]
3067 if L == []: return L
3068 return _flatten(L[0]) + _flatten(L[1:])
3069
3071 """Helper to define an expression that is indirectly defined from
3072 the tokens matched in a previous expression, that is, it looks
3073 for a 'repeat' of a previous expression. For example::
3074 first = Word(nums)
3075 second = matchPreviousLiteral(first)
3076 matchExpr = first + ":" + second
3077 will match "1:1", but not "1:2". Because this matches a
3078 previous literal, will also match the leading "1:1" in "1:10".
3079 If this is not desired, use matchPreviousExpr.
3080 Do *not* use with packrat parsing enabled.
3081 """
3082 rep = Forward()
3083 def copyTokenToRepeater(s,l,t):
3084 if t:
3085 if len(t) == 1:
3086 rep << t[0]
3087 else:
3088
3089 tflat = _flatten(t.asList())
3090 rep << And( [ Literal(tt) for tt in tflat ] )
3091 else:
3092 rep << Empty()
3093 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3094 return rep
3095
3097 """Helper to define an expression that is indirectly defined from
3098 the tokens matched in a previous expression, that is, it looks
3099 for a 'repeat' of a previous expression. For example::
3100 first = Word(nums)
3101 second = matchPreviousExpr(first)
3102 matchExpr = first + ":" + second
3103 will match "1:1", but not "1:2". Because this matches by
3104 expressions, will *not* match the leading "1:1" in "1:10";
3105 the expressions are evaluated first, and then compared, so
3106 "1" is compared with "10".
3107 Do *not* use with packrat parsing enabled.
3108 """
3109 rep = Forward()
3110 e2 = expr.copy()
3111 rep << e2
3112 def copyTokenToRepeater(s,l,t):
3113 matchTokens = _flatten(t.asList())
3114 def mustMatchTheseTokens(s,l,t):
3115 theseTokens = _flatten(t.asList())
3116 if theseTokens != matchTokens:
3117 raise ParseException("",0,"")
3118 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3119 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3120 return rep
3121
3123
3124 for c in r"\^-]":
3125 s = s.replace(c,"\\"+c)
3126 s = s.replace("\n",r"\n")
3127 s = s.replace("\t",r"\t")
3128 return _ustr(s)
3129
3130 -def oneOf( strs, caseless=False, useRegex=True ):
3131 """Helper to quickly define a set of alternative Literals, and makes sure to do
3132 longest-first testing when there is a conflict, regardless of the input order,
3133 but returns a MatchFirst for best performance.
3134
3135 Parameters:
3136 - strs - a string of space-delimited literals, or a list of string literals
3137 - caseless - (default=False) - treat all literals as caseless
3138 - useRegex - (default=True) - as an optimization, will generate a Regex
3139 object; otherwise, will generate a MatchFirst object (if caseless=True, or
3140 if creating a Regex raises an exception)
3141 """
3142 if caseless:
3143 isequal = ( lambda a,b: a.upper() == b.upper() )
3144 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3145 parseElementClass = CaselessLiteral
3146 else:
3147 isequal = ( lambda a,b: a == b )
3148 masks = ( lambda a,b: b.startswith(a) )
3149 parseElementClass = Literal
3150
3151 if isinstance(strs,(list,tuple)):
3152 symbols = strs[:]
3153 elif isinstance(strs,basestring):
3154 symbols = strs.split()
3155 else:
3156 warnings.warn("Invalid argument to oneOf, expected string or list",
3157 SyntaxWarning, stacklevel=2)
3158
3159 i = 0
3160 while i < len(symbols)-1:
3161 cur = symbols[i]
3162 for j,other in enumerate(symbols[i+1:]):
3163 if ( isequal(other, cur) ):
3164 del symbols[i+j+1]
3165 break
3166 elif ( masks(cur, other) ):
3167 del symbols[i+j+1]
3168 symbols.insert(i,other)
3169 cur = other
3170 break
3171 else:
3172 i += 1
3173
3174 if not caseless and useRegex:
3175
3176 try:
3177 if len(symbols)==len("".join(symbols)):
3178 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3179 else:
3180 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3181 except:
3182 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3183 SyntaxWarning, stacklevel=2)
3184
3185
3186
3187 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3188
3190 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3191 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3192 in the proper order. The key pattern can include delimiting markers or punctuation,
3193 as long as they are suppressed, thereby leaving the significant key text. The value
3194 pattern can include named results, so that the Dict results can include named token
3195 fields.
3196 """
3197 return Dict( ZeroOrMore( Group ( key + value ) ) )
3198
3199
3200 empty = Empty().setName("empty")
3201 lineStart = LineStart().setName("lineStart")
3202 lineEnd = LineEnd().setName("lineEnd")
3203 stringStart = StringStart().setName("stringStart")
3204 stringEnd = StringEnd().setName("stringEnd")
3205
3206 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3207 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3208 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3209 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3210 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3211 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3212 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3213
3214 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3215
3217 r"""Helper to easily define string ranges for use in Word construction. Borrows
3218 syntax from regexp '[]' string range definitions::
3219 srange("[0-9]") -> "0123456789"
3220 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3221 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3222 The input string must be enclosed in []'s, and the returned string is the expanded
3223 character set joined into a single string.
3224 The values enclosed in the []'s may be::
3225 a single character
3226 an escaped character with a leading backslash (such as \- or \])
3227 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3228 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3229 a range of any of the above, separated by a dash ('a-z', etc.)
3230 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3231 """
3232 try:
3233 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3234 except:
3235 return ""
3236
3238 """Helper method for defining parse actions that require matching at a specific
3239 column in the input text.
3240 """
3241 def verifyCol(strg,locn,toks):
3242 if col(locn,strg) != n:
3243 raise ParseException(strg,locn,"matched token not at column %d" % n)
3244 return verifyCol
3245
3247 """Helper method for common parse actions that simply return a literal value. Especially
3248 useful when used with transformString().
3249 """
3250 def _replFunc(*args):
3251 return [replStr]
3252 return _replFunc
3253
3255 """Helper parse action for removing quotation marks from parsed quoted strings.
3256 To use, add this parse action to quoted string using::
3257 quotedString.setParseAction( removeQuotes )
3258 """
3259 return t[0][1:-1]
3260
3262 """Helper parse action to convert tokens to upper case."""
3263 return [ tt.upper() for tt in map(_ustr,t) ]
3264
3266 """Helper parse action to convert tokens to lower case."""
3267 return [ tt.lower() for tt in map(_ustr,t) ]
3268
3269 -def keepOriginalText(s,startLoc,t):
3270 """Helper parse action to preserve original parsed text,
3271 overriding any nested parse actions."""
3272 try:
3273 endloc = getTokensEndLoc()
3274 except ParseException:
3275 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3276 del t[:]
3277 t += ParseResults(s[startLoc:endloc])
3278 return t
3279
3281 """Method to be called from within a parse action to determine the end
3282 location of the parsed tokens."""
3283 import inspect
3284 fstack = inspect.stack()
3285 try:
3286
3287 for f in fstack[2:]:
3288 if f[3] == "_parseNoCache":
3289 endloc = f[0].f_locals["loc"]
3290 return endloc
3291 else:
3292 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3293 finally:
3294 del fstack
3295
3323
3327
3331
3333 """Helper to create a validating parse action to be used with start tags created
3334 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3335 with a required attribute value, to avoid false matches on common tags such as
3336 <TD> or <DIV>.
3337
3338 Call withAttribute with a series of attribute names and values. Specify the list
3339 of filter attributes names and values as:
3340 - keyword arguments, as in (class="Customer",align="right"), or
3341 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3342 For attribute names with a namespace prefix, you must use the second form. Attribute
3343 names are matched insensitive to upper/lower case.
3344
3345 To verify that the attribute exists, but without specifying a value, pass
3346 withAttribute.ANY_VALUE as the value.
3347 """
3348 if args:
3349 attrs = args[:]
3350 else:
3351 attrs = attrDict.items()
3352 attrs = [(k,v) for k,v in attrs]
3353 def pa(s,l,tokens):
3354 for attrName,attrValue in attrs:
3355 if attrName not in tokens:
3356 raise ParseException(s,l,"no matching attribute " + attrName)
3357 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3358 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3359 (attrName, tokens[attrName], attrValue))
3360 return pa
3361 withAttribute.ANY_VALUE = object()
3362
3363 opAssoc = _Constants()
3364 opAssoc.LEFT = object()
3365 opAssoc.RIGHT = object()
3366
3368 """Helper method for constructing grammars of expressions made up of
3369 operators working in a precedence hierarchy. Operators may be unary or
3370 binary, left- or right-associative. Parse actions can also be attached
3371 to operator expressions.
3372
3373 Parameters:
3374 - baseExpr - expression representing the most basic element for the nested
3375 - opList - list of tuples, one for each operator precedence level in the
3376 expression grammar; each tuple is of the form
3377 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3378 - opExpr is the pyparsing expression for the operator;
3379 may also be a string, which will be converted to a Literal;
3380 if numTerms is 3, opExpr is a tuple of two expressions, for the
3381 two operators separating the 3 terms
3382 - numTerms is the number of terms for this operator (must
3383 be 1, 2, or 3)
3384 - rightLeftAssoc is the indicator whether the operator is
3385 right or left associative, using the pyparsing-defined
3386 constants opAssoc.RIGHT and opAssoc.LEFT.
3387 - parseAction is the parse action to be associated with
3388 expressions matching this operator expression (the
3389 parse action tuple member may be omitted)
3390 """
3391 ret = Forward()
3392 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3393 for i,operDef in enumerate(opList):
3394 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3395 if arity == 3:
3396 if opExpr is None or len(opExpr) != 2:
3397 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3398 opExpr1, opExpr2 = opExpr
3399 thisExpr = Forward()
3400 if rightLeftAssoc == opAssoc.LEFT:
3401 if arity == 1:
3402 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3403 elif arity == 2:
3404 if opExpr is not None:
3405 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3406 else:
3407 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3408 elif arity == 3:
3409 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3410 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3411 else:
3412 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3413 elif rightLeftAssoc == opAssoc.RIGHT:
3414 if arity == 1:
3415
3416 if not isinstance(opExpr, Optional):
3417 opExpr = Optional(opExpr)
3418 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3419 elif arity == 2:
3420 if opExpr is not None:
3421 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3422 else:
3423 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3424 elif arity == 3:
3425 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3426 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3427 else:
3428 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3429 else:
3430 raise ValueError("operator must indicate right or left associativity")
3431 if pa:
3432 matchExpr.setParseAction( pa )
3433 thisExpr << ( matchExpr | lastExpr )
3434 lastExpr = thisExpr
3435 ret << lastExpr
3436 return ret
3437
3438 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3439 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3440 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3441 unicodeString = Combine(_L('u') + quotedString.copy())
3442
3444 """Helper method for defining nested lists enclosed in opening and closing
3445 delimiters ("(" and ")" are the default).
3446
3447 Parameters:
3448 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3449 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3450 - content - expression for items within the nested lists (default=None)
3451 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3452
3453 If an expression is not provided for the content argument, the nested
3454 expression will capture all whitespace-delimited content between delimiters
3455 as a list of separate values.
3456
3457 Use the ignoreExpr argument to define expressions that may contain
3458 opening or closing characters that should not be treated as opening
3459 or closing characters for nesting, such as quotedString or a comment
3460 expression. Specify multiple expressions using an Or or MatchFirst.
3461 The default is quotedString, but if no expressions are to be ignored,
3462 then pass None for this argument.
3463 """
3464 if opener == closer:
3465 raise ValueError("opening and closing strings cannot be the same")
3466 if content is None:
3467 if isinstance(opener,basestring) and isinstance(closer,basestring):
3468 if ignoreExpr is not None:
3469 content = (Combine(OneOrMore(~ignoreExpr +
3470 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3471 ).setParseAction(lambda t:t[0].strip()))
3472 else:
3473 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
3474 else:
3475 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3476 ret = Forward()
3477 if ignoreExpr is not None:
3478 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3479 else:
3480 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3481 return ret
3482
3483 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3484 """Helper method for defining space-delimited indentation blocks, such as
3485 those used to define block statements in Python source code.
3486
3487 Parameters:
3488 - blockStatementExpr - expression defining syntax of statement that
3489 is repeated within the indented block
3490 - indentStack - list created by caller to manage indentation stack
3491 (multiple statementWithIndentedBlock expressions within a single grammar
3492 should share a common indentStack)
3493 - indent - boolean indicating whether block must be indented beyond the
3494 the current level; set to False for block of left-most statements
3495 (default=True)
3496
3497 A valid block must contain at least one blockStatement.
3498 """
3499 def checkPeerIndent(s,l,t):
3500 if l >= len(s): return
3501 curCol = col(l,s)
3502 if curCol != indentStack[-1]:
3503 if curCol > indentStack[-1]:
3504 raise ParseFatalException(s,l,"illegal nesting")
3505 raise ParseException(s,l,"not a peer entry")
3506
3507 def checkSubIndent(s,l,t):
3508 curCol = col(l,s)
3509 if curCol > indentStack[-1]:
3510 indentStack.append( curCol )
3511 else:
3512 raise ParseException(s,l,"not a subentry")
3513
3514 def checkUnindent(s,l,t):
3515 if l >= len(s): return
3516 curCol = col(l,s)
3517 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3518 raise ParseException(s,l,"not an unindent")
3519 indentStack.pop()
3520
3521 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3522 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3523 PEER = Empty().setParseAction(checkPeerIndent)
3524 UNDENT = Empty().setParseAction(checkUnindent)
3525 if indent:
3526 smExpr = Group( Optional(NL) +
3527 FollowedBy(blockStatementExpr) +
3528 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3529 else:
3530 smExpr = Group( Optional(NL) +
3531 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3532 blockStatementExpr.ignore("\\" + LineEnd())
3533 return smExpr
3534
3535 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3536 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3537
3538 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3539 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
3540 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
3541 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3542
3543
3544 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3545
3546 htmlComment = Regex(r"<!--[\s\S]*?-->")
3547 restOfLine = Regex(r".*").leaveWhitespace()
3548 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3549 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3550
3551 javaStyleComment = cppStyleComment
3552 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3553 _noncomma = "".join( [ c for c in printables if c != "," ] )
3554 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3555 Optional( Word(" \t") +
3556 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3557 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3558
3559
3560 if __name__ == "__main__":
3561
3562 - def test( teststring ):
3563 try:
3564 tokens = simpleSQL.parseString( teststring )
3565 tokenlist = tokens.asList()
3566 print (teststring + "->" + str(tokenlist))
3567 print ("tokens = " + str(tokens))
3568 print ("tokens.columns = " + str(tokens.columns))
3569 print ("tokens.tables = " + str(tokens.tables))
3570 print (tokens.asXML("SQL",True))
3571 except ParseBaseException,err:
3572 print (teststring + "->")
3573 print (err.line)
3574 print (" "*(err.column-1) + "^")
3575 print (err)
3576 print()
3577
3578 selectToken = CaselessLiteral( "select" )
3579 fromToken = CaselessLiteral( "from" )
3580
3581 ident = Word( alphas, alphanums + "_$" )
3582 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3583 columnNameList = Group( delimitedList( columnName ) )
3584 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3585 tableNameList = Group( delimitedList( tableName ) )
3586 simpleSQL = ( selectToken + \
3587 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3588 fromToken + \
3589 tableNameList.setResultsName( "tables" ) )
3590
3591 test( "SELECT * from XYZZY, ABC" )
3592 test( "select * from SYS.XYZZY" )
3593 test( "Select A from Sys.dual" )
3594 test( "Select AA,BB,CC from Sys.dual" )
3595 test( "Select A, B, C from Sys.dual" )
3596 test( "Select A, B, C from Sys.dual" )
3597 test( "Xelect A, B, C from Sys.dual" )
3598 test( "Select A, B, C frox Sys.dual" )
3599 test( "Select" )
3600 test( "Select ^^^ frox Sys.dual" )
3601 test( "Select A, B, C from Sys.dual, Table2 " )
3602