Package pyparsing :: Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing.pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2008  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24  #from __future__ import generators 
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.0" 
  62  __versionTime__ = "28 May 2008 10:05" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy,sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import xml.sax.saxutils 
  72  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 
  92  ] 
  93   
  94   
  95  """ 
  96  Detect if we are running version 3.X and make appropriate changes 
  97  Robert A. Clark 
  98  """ 
  99  if sys.version_info[0] > 2: 
 100      _PY3K = True 
 101      _MAX_INT = sys.maxsize 
 102      basestring = str 
 103  else: 
 104      _PY3K = False 
 105      _MAX_INT = sys.maxint 
 106   
 107  if not _PY3K: 
108 - def _ustr(obj):
109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 111 then < returns the unicode object | encodes it with the default encoding | ... >. 112 """ 113 try: 114 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 115 # it won't break any existing code. 116 return str(obj) 117 118 except UnicodeEncodeError: 119 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 120 # state that "The return value must be a string object". However, does a 121 # unicode object (being a subclass of basestring) count as a "string 122 # object"? 123 # If so, then return a unicode object: 124 return unicode(obj)
125 # Else encode it... but how? There are many choices... :) 126 # Replace unprintables with escape codes? 127 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 128 # Replace unprintables with question marks? 129 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 130 # ... 131 else: 132 _ustr = str 133
134 -def _str2dict(strg):
135 return dict( [(c,0) for c in strg] )
136 #~ return set( [c for c in strg] ) 137
138 -class _Constants(object):
139 pass
140 141 if not _PY3K: 142 alphas = string.lowercase + string.uppercase 143 else: 144 alphas = string.ascii_lowercase + string.ascii_uppercase 145 nums = string.digits 146 hexnums = nums + "ABCDEFabcdef" 147 alphanums = alphas + nums 148 _bslash = "\\" 149 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 150
151 -class ParseBaseException(Exception):
152 """base exception class for all parsing runtime exceptions""" 153 __slots__ = ( "loc","msg","pstr","parserElement" ) 154 # Performance tuning: we construct a *lot* of these, so keep this 155 # constructor as small and fast as possible
156 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
157 self.loc = loc 158 if msg is None: 159 self.msg = pstr 160 self.pstr = "" 161 else: 162 self.msg = msg 163 self.pstr = pstr 164 self.parserElement = elem
165
166 - def __getattr__( self, aname ):
167 """supported attributes by name are: 168 - lineno - returns the line number of the exception text 169 - col - returns the column number of the exception text 170 - line - returns the line containing the exception text 171 """ 172 if( aname == "lineno" ): 173 return lineno( self.loc, self.pstr ) 174 elif( aname in ("col", "column") ): 175 return col( self.loc, self.pstr ) 176 elif( aname == "line" ): 177 return line( self.loc, self.pstr ) 178 else: 179 raise AttributeError(aname)
180
181 - def __str__( self ):
182 return "%s (at char %d), (line:%d, col:%d)" % \ 183 ( self.msg, self.loc, self.lineno, self.column )
184 - def __repr__( self ):
185 return _ustr(self)
186 - def markInputline( self, markerString = ">!<" ):
187 """Extracts the exception line from the input string, and marks 188 the location of the exception with a special symbol. 189 """ 190 line_str = self.line 191 line_column = self.column - 1 192 if markerString: 193 line_str = "".join( [line_str[:line_column], 194 markerString, line_str[line_column:]]) 195 return line_str.strip()
196
197 -class ParseException(ParseBaseException):
198 """exception thrown when parse expressions don't match class; 199 supported attributes by name are: 200 - lineno - returns the line number of the exception text 201 - col - returns the column number of the exception text 202 - line - returns the line containing the exception text 203 """ 204 pass
205
206 -class ParseFatalException(ParseBaseException):
207 """user-throwable exception thrown when inconsistent parse content 208 is found; stops all parsing immediately""" 209 pass
210
211 -class ParseSyntaxException(ParseFatalException):
212 """just like ParseFatalException, but thrown internally when an 213 ErrorStop indicates that parsing is to stop immediately because 214 an unbacktrackable syntax error has been found"""
215 - def __init__(self, pe):
216 super(ParseSyntaxException, self).__init__( 217 pe.pstr, pe.loc, pe.msg, pe.parserElement)
218 219 #~ class ReparseException(ParseBaseException): 220 #~ """Experimental class - parse actions can raise this exception to cause 221 #~ pyparsing to reparse the input string: 222 #~ - with a modified input string, and/or 223 #~ - with a modified start location 224 #~ Set the values of the ReparseException in the constructor, and raise the 225 #~ exception in a parse action to cause pyparsing to use the new string/location. 226 #~ Setting the values as None causes no change to be made. 227 #~ """ 228 #~ def __init_( self, newstring, restartLoc ): 229 #~ self.newParseText = newstring 230 #~ self.reparseLoc = restartLoc 231
232 -class RecursiveGrammarException(Exception):
233 """exception thrown by validate() if the grammar could be improperly recursive"""
234 - def __init__( self, parseElementList ):
235 self.parseElementTrace = parseElementList
236
237 - def __str__( self ):
238 return "RecursiveGrammarException: %s" % self.parseElementTrace
239
240 -class _ParseResultsWithOffset(object):
241 - def __init__(self,p1,p2):
242 self.tup = (p1,p2)
243 - def __getitem__(self,i):
244 return self.tup[i]
245 - def __repr__(self):
246 return repr(self.tup)
247
248 -class ParseResults(object):
249 """Structured parse results, to provide multiple means of access to the parsed data: 250 - as a list (len(results)) 251 - by list index (results[0], results[1], etc.) 252 - by attribute (results.<resultsName>) 253 """ 254 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
255 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
256 if isinstance(toklist, cls): 257 return toklist 258 retobj = object.__new__(cls) 259 retobj.__doinit = True 260 return retobj
261 262 # Performance tuning: we construct a *lot* of these, so keep this 263 # constructor as small and fast as possible
264 - def __init__( self, toklist, name=None, asList=True, modal=True ):
265 if self.__doinit: 266 self.__doinit = False 267 self.__name = None 268 self.__parent = None 269 self.__accumNames = {} 270 if isinstance(toklist, list): 271 self.__toklist = toklist[:] 272 else: 273 self.__toklist = [toklist] 274 self.__tokdict = dict() 275 276 # this line is related to debugging the asXML bug 277 #~ asList = False 278 279 if name: 280 if not modal: 281 self.__accumNames[name] = 0 282 if isinstance(name,int): 283 name = _ustr(name) # will always return a str, but use _ustr for consistency 284 self.__name = name 285 if not toklist in (None,'',[]): 286 if isinstance(toklist,basestring): 287 toklist = [ toklist ] 288 if asList: 289 if isinstance(toklist,ParseResults): 290 self[name] = _ParseResultsWithOffset(toklist.copy(),-1) 291 else: 292 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1) 293 self[name].__name = name 294 else: 295 try: 296 self[name] = toklist[0] 297 except (KeyError,TypeError): 298 self[name] = toklist
299
300 - def __getitem__( self, i ):
301 if isinstance( i, (int,slice) ): 302 return self.__toklist[i] 303 else: 304 if i not in self.__accumNames: 305 return self.__tokdict[i][-1][0] 306 else: 307 return ParseResults([ v[0] for v in self.__tokdict[i] ])
308
309 - def __setitem__( self, k, v ):
310 if isinstance(v,_ParseResultsWithOffset): 311 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 312 sub = v[0] 313 elif isinstance(k,int): 314 self.__toklist[k] = v 315 sub = v 316 else: 317 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 318 sub = v 319 if isinstance(sub,ParseResults): 320 sub.__parent = wkref(self)
321
322 - def __delitem__( self, i ):
323 if isinstance(i,(int,slice)): 324 mylen = len( self.__toklist ) 325 del self.__toklist[i] 326 327 # convert int to slice 328 if isinstance(i, int): 329 if i < 0: 330 i += mylen 331 i = slice(i, i+1) 332 # get removed indices 333 removed = list(range(*i.indices(mylen))) 334 removed.reverse() 335 # fixup indices in token dictionary 336 for name in self.__tokdict: 337 occurrences = self.__tokdict[name] 338 for j in removed: 339 for k, (value, position) in enumerate(occurrences): 340 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 341 else: 342 del self.__tokdict[i]
343
344 - def __contains__( self, k ):
345 return k in self.__tokdict
346
347 - def __len__( self ): return len( self.__toklist )
348 - def __bool__(self): return len( self.__toklist ) > 0
349 __nonzero__ = __bool__
350 - def __iter__( self ): return iter( self.__toklist )
351 - def __reversed__( self ): return iter( reversed(self.__toklist) )
352 - def keys( self ):
353 """Returns all named result keys.""" 354 return self.__tokdict.keys()
355
356 - def pop( self, index=-1 ):
357 """Removes and returns item at specified index (default=last). 358 Will work with either numeric indices or dict-key indicies.""" 359 ret = self[index] 360 del self[index] 361 return ret
362
363 - def get(self, key, defaultValue=None):
364 """Returns named result matching the given key, or if there is no 365 such name, then returns the given defaultValue or None if no 366 defaultValue is specified.""" 367 if key in self: 368 return self[key] 369 else: 370 return defaultValue
371
372 - def insert( self, index, insStr ):
373 self.__toklist.insert(index, insStr) 374 # fixup indices in token dictionary 375 for name in self.__tokdict: 376 occurrences = self.__tokdict[name] 377 for k, (value, position) in enumerate(occurrences): 378 occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
379
380 - def items( self ):
381 """Returns all named result keys and values as a list of tuples.""" 382 return [(k,self[k]) for k in self.__tokdict]
383
384 - def values( self ):
385 """Returns all named result values.""" 386 return [ v[-1][0] for v in self.__tokdict.values() ]
387
388 - def __getattr__( self, name ):
389 if name not in self.__slots__: 390 if name in self.__tokdict: 391 if name not in self.__accumNames: 392 return self.__tokdict[name][-1][0] 393 else: 394 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 395 else: 396 return "" 397 return None
398
399 - def __add__( self, other ):
400 ret = self.copy() 401 ret += other 402 return ret
403
404 - def __iadd__( self, other ):
405 if other.__tokdict: 406 offset = len(self.__toklist) 407 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 408 otheritems = other.__tokdict.items() 409 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 410 for (k,vlist) in otheritems for v in vlist] 411 for k,v in otherdictitems: 412 self[k] = v 413 if isinstance(v[0],ParseResults): 414 v[0].__parent = wkref(self) 415 self.__toklist += other.__toklist 416 self.__accumNames.update( other.__accumNames ) 417 del other 418 return self
419
420 - def __repr__( self ):
421 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
422
423 - def __str__( self ):
424 out = "[" 425 sep = "" 426 for i in self.__toklist: 427 if isinstance(i, ParseResults): 428 out += sep + _ustr(i) 429 else: 430 out += sep + repr(i) 431 sep = ", " 432 out += "]" 433 return out
434
435 - def _asStringList( self, sep='' ):
436 out = [] 437 for item in self.__toklist: 438 if out and sep: 439 out.append(sep) 440 if isinstance( item, ParseResults ): 441 out += item._asStringList() 442 else: 443 out.append( _ustr(item) ) 444 return out
445
446 - def asList( self ):
447 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 448 out = [] 449 for res in self.__toklist: 450 if isinstance(res,ParseResults): 451 out.append( res.asList() ) 452 else: 453 out.append( res ) 454 return out
455
456 - def asDict( self ):
457 """Returns the named parse results as dictionary.""" 458 return dict( self.items() )
459
460 - def copy( self ):
461 """Returns a new copy of a ParseResults object.""" 462 ret = ParseResults( self.__toklist ) 463 ret.__tokdict = self.__tokdict.copy() 464 ret.__parent = self.__parent 465 ret.__accumNames.update( self.__accumNames ) 466 ret.__name = self.__name 467 return ret
468
469 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
470 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 471 nl = "\n" 472 out = [] 473 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 474 for v in vlist ] ) 475 nextLevelIndent = indent + " " 476 477 # collapse out indents if formatting is not desired 478 if not formatted: 479 indent = "" 480 nextLevelIndent = "" 481 nl = "" 482 483 selfTag = None 484 if doctag is not None: 485 selfTag = doctag 486 else: 487 if self.__name: 488 selfTag = self.__name 489 490 if not selfTag: 491 if namedItemsOnly: 492 return "" 493 else: 494 selfTag = "ITEM" 495 496 out += [ nl, indent, "<", selfTag, ">" ] 497 498 worklist = self.__toklist 499 for i,res in enumerate(worklist): 500 if isinstance(res,ParseResults): 501 if i in namedItems: 502 out += [ res.asXML(namedItems[i], 503 namedItemsOnly and doctag is None, 504 nextLevelIndent, 505 formatted)] 506 else: 507 out += [ res.asXML(None, 508 namedItemsOnly and doctag is None, 509 nextLevelIndent, 510 formatted)] 511 else: 512 # individual token, see if there is a name for it 513 resTag = None 514 if i in namedItems: 515 resTag = namedItems[i] 516 if not resTag: 517 if namedItemsOnly: 518 continue 519 else: 520 resTag = "ITEM" 521 xmlBodyText = xml.sax.saxutils.escape(_ustr(res)) 522 out += [ nl, nextLevelIndent, "<", resTag, ">", 523 xmlBodyText, 524 "</", resTag, ">" ] 525 526 out += [ nl, indent, "</", selfTag, ">" ] 527 return "".join(out)
528
529 - def __lookup(self,sub):
530 for k,vlist in self.__tokdict.items(): 531 for v,loc in vlist: 532 if sub is v: 533 return k 534 return None
535
536 - def getName(self):
537 """Returns the results name for this token expression.""" 538 if self.__name: 539 return self.__name 540 elif self.__parent: 541 par = self.__parent() 542 if par: 543 return par.__lookup(self) 544 else: 545 return None 546 elif (len(self) == 1 and 547 len(self.__tokdict) == 1 and 548 self.__tokdict.values()[0][0][1] in (0,-1)): 549 return self.__tokdict.keys()[0] 550 else: 551 return None
552
553 - def dump(self,indent='',depth=0):
554 """Diagnostic method for listing out the contents of a ParseResults. 555 Accepts an optional indent argument so that this string can be embedded 556 in a nested display of other data.""" 557 out = [] 558 out.append( indent+_ustr(self.asList()) ) 559 keys = self.items() 560 keys.sort() 561 for k,v in keys: 562 if out: 563 out.append('\n') 564 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 565 if isinstance(v,ParseResults): 566 if v.keys(): 567 #~ out.append('\n') 568 out.append( v.dump(indent,depth+1) ) 569 #~ out.append('\n') 570 else: 571 out.append(_ustr(v)) 572 else: 573 out.append(_ustr(v)) 574 #~ out.append('\n') 575 return "".join(out)
576 577 # add support for pickle protocol
578 - def __getstate__(self):
579 return ( self.__toklist, 580 ( self.__tokdict.copy(), 581 self.__parent is not None and self.__parent() or None, 582 self.__accumNames, 583 self.__name ) )
584
585 - def __setstate__(self,state):
586 self.__toklist = state[0] 587 self.__tokdict, \ 588 par, \ 589 inAccumNames, \ 590 self.__name = state[1] 591 self.__accumNames = {} 592 self.__accumNames.update(inAccumNames) 593 if par is not None: 594 self.__parent = wkref(par) 595 else: 596 self.__parent = None
597 598
599 -def col (loc,strg):
600 """Returns current column within a string, counting newlines as line separators. 601 The first column is number 1. 602 603 Note: the default parsing behavior is to expand tabs in the input string 604 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 605 on parsing strings containing <TAB>s, and suggested methods to maintain a 606 consistent view of the parsed string, the parse location, and line and column 607 positions within the parsed string. 608 """ 609 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
610
611 -def lineno(loc,strg):
612 """Returns current line number within a string, counting newlines as line separators. 613 The first line is number 1. 614 615 Note: the default parsing behavior is to expand tabs in the input string 616 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 617 on parsing strings containing <TAB>s, and suggested methods to maintain a 618 consistent view of the parsed string, the parse location, and line and column 619 positions within the parsed string. 620 """ 621 return strg.count("\n",0,loc) + 1
622
623 -def line( loc, strg ):
624 """Returns the line of text containing loc within a string, counting newlines as line separators. 625 """ 626 lastCR = strg.rfind("\n", 0, loc) 627 nextCR = strg.find("\n", loc) 628 if nextCR > 0: 629 return strg[lastCR+1:nextCR] 630 else: 631 return strg[lastCR+1:]
632
633 -def _defaultStartDebugAction( instring, loc, expr ):
634 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
635
636 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
637 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
638
639 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
640 print ("Exception raised:" + _ustr(exc))
641
642 -def nullDebugAction(*args):
643 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 644 pass
645
646 -class ParserElement(object):
647 """Abstract base level parser element class.""" 648 DEFAULT_WHITE_CHARS = " \n\t\r" 649
650 - def setDefaultWhitespaceChars( chars ):
651 """Overrides the default whitespace chars 652 """ 653 ParserElement.DEFAULT_WHITE_CHARS = chars
654 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 655
656 - def __init__( self, savelist=False ):
657 self.parseAction = list() 658 self.failAction = None 659 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 660 self.strRepr = None 661 self.resultsName = None 662 self.saveAsList = savelist 663 self.skipWhitespace = True 664 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 665 self.copyDefaultWhiteChars = True 666 self.mayReturnEmpty = False # used when checking for left-recursion 667 self.keepTabs = False 668 self.ignoreExprs = list() 669 self.debug = False 670 self.streamlined = False 671 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 672 self.errmsg = "" 673 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 674 self.debugActions = ( None, None, None ) #custom debug actions 675 self.re = None 676 self.callPreparse = True # used to avoid redundant calls to preParse 677 self.callDuringTry = False
678
679 - def copy( self ):
680 """Make a copy of this ParserElement. Useful for defining different parse actions 681 for the same parsing pattern, using copies of the original parse element.""" 682 cpy = copy.copy( self ) 683 cpy.parseAction = self.parseAction[:] 684 cpy.ignoreExprs = self.ignoreExprs[:] 685 if self.copyDefaultWhiteChars: 686 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 687 return cpy
688
689 - def setName( self, name ):
690 """Define name for this expression, for use in debugging.""" 691 self.name = name 692 self.errmsg = "Expected " + self.name 693 if hasattr(self,"exception"): 694 self.exception.msg = self.errmsg 695 return self
696
697 - def setResultsName( self, name, listAllMatches=False ):
698 """Define name for referencing matching tokens as a nested attribute 699 of the returned parse results. 700 NOTE: this returns a *copy* of the original ParserElement object; 701 this is so that the client can define a basic element, such as an 702 integer, and reference it in multiple places with different names. 703 """ 704 newself = self.copy() 705 newself.resultsName = name 706 newself.modalResults = not listAllMatches 707 return newself
708
709 - def setBreak(self,breakFlag = True):
710 """Method to invoke the Python pdb debugger when this element is 711 about to be parsed. Set breakFlag to True to enable, False to 712 disable. 713 """ 714 if breakFlag: 715 _parseMethod = self._parse 716 def breaker(instring, loc, doActions=True, callPreParse=True): 717 import pdb 718 pdb.set_trace() 719 _parseMethod( instring, loc, doActions, callPreParse )
720 breaker._originalParseMethod = _parseMethod 721 self._parse = breaker 722 else: 723 if hasattr(self._parse,"_originalParseMethod"): 724 self._parse = self._parse._originalParseMethod 725 return self
726
727 - def _normalizeParseActionArgs( f ):
728 """Internal method used to decorate parse actions that take fewer than 3 arguments, 729 so that all parse actions can be called as f(s,l,t).""" 730 STAR_ARGS = 4 731 732 try: 733 restore = None 734 if isinstance(f,type): 735 restore = f 736 f = f.__init__ 737 if not _PY3K: 738 codeObj = f.func_code 739 else: 740 codeObj = f.code 741 if codeObj.co_flags & STAR_ARGS: 742 return f 743 numargs = codeObj.co_argcount 744 if not _PY3K: 745 if hasattr(f,"im_self"): 746 numargs -= 1 747 else: 748 if hasattr(f,"__self__"): 749 numargs -= 1 750 if restore: 751 f = restore 752 except AttributeError: 753 try: 754 if not _PY3K: 755 call_im_func_code = f.__call__.im_func.func_code 756 else: 757 call_im_func_code = f.__code__ 758 759 # not a function, must be a callable object, get info from the 760 # im_func binding of its bound __call__ method 761 if call_im_func_code.co_flags & STAR_ARGS: 762 return f 763 numargs = call_im_func_code.co_argcount 764 if not _PY3K: 765 if hasattr(f.__call__,"im_self"): 766 numargs -= 1 767 else: 768 if hasattr(f.__call__,"__self__"): 769 numargs -= 0 770 except AttributeError: 771 if not _PY3K: 772 call_func_code = f.__call__.func_code 773 else: 774 call_func_code = f.__call__.__code__ 775 # not a bound method, get info directly from __call__ method 776 if call_func_code.co_flags & STAR_ARGS: 777 return f 778 numargs = call_func_code.co_argcount 779 if not _PY3K: 780 if hasattr(f.__call__,"im_self"): 781 numargs -= 1 782 else: 783 if hasattr(f.__call__,"__self__"): 784 numargs -= 1 785 786 787 #~ print ("adding function %s with %d args" % (f.func_name,numargs)) 788 if numargs == 3: 789 return f 790 else: 791 if numargs > 3: 792 def tmp(s,l,t): 793 return f(f.__call__.__self__, s,l,t)
794 if numargs == 2: 795 def tmp(s,l,t): 796 return f(l,t) 797 elif numargs == 1: 798 def tmp(s,l,t): 799 return f(t) 800 else: #~ numargs == 0: 801 def tmp(s,l,t): 802 return f() 803 try: 804 tmp.__name__ = f.__name__ 805 except (AttributeError,TypeError): 806 # no need for special handling if attribute doesnt exist 807 pass 808 try: 809 tmp.__doc__ = f.__doc__ 810 except (AttributeError,TypeError): 811 # no need for special handling if attribute doesnt exist 812 pass 813 try: 814 tmp.__dict__.update(f.__dict__) 815 except (AttributeError,TypeError): 816 # no need for special handling if attribute doesnt exist 817 pass 818 return tmp 819 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 820
821 - def setParseAction( self, *fns, **kwargs ):
822 """Define action to perform when successfully matching parse element definition. 823 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), 824 fn(loc,toks), fn(toks), or just fn(), where: 825 - s = the original string being parsed (see note below) 826 - loc = the location of the matching substring 827 - toks = a list of the matched tokens, packaged as a ParseResults object 828 If the functions in fns modify the tokens, they can return them as the return 829 value from fn, and the modified list of tokens will replace the original. 830 Otherwise, fn does not need to return any value. 831 832 Note: the default parsing behavior is to expand tabs in the input string 833 before starting the parsing process. See L{I{parseString}<parseString>} for more information 834 on parsing strings containing <TAB>s, and suggested methods to maintain a 835 consistent view of the parsed string, the parse location, and line and column 836 positions within the parsed string. 837 """ 838 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 839 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 840 return self
841
842 - def addParseAction( self, *fns, **kwargs ):
843 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 844 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 845 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 846 return self
847
848 - def setFailAction( self, fn ):
849 """Define action to perform if parsing fails at this expression. 850 Fail acton fn is a callable function that takes the arguments 851 fn(s,loc,expr,err) where: 852 - s = string being parsed 853 - loc = location where expression match was attempted and failed 854 - expr = the parse expression that failed 855 - err = the exception thrown 856 The function returns no value. It may throw ParseFatalException 857 if it is desired to stop parsing immediately.""" 858 self.failAction = fn 859 return self
860
861 - def _skipIgnorables( self, instring, loc ):
862 exprsFound = True 863 while exprsFound: 864 exprsFound = False 865 for e in self.ignoreExprs: 866 try: 867 while 1: 868 loc,dummy = e._parse( instring, loc ) 869 exprsFound = True 870 except ParseException: 871 pass 872 return loc
873
874 - def preParse( self, instring, loc ):
875 if self.ignoreExprs: 876 loc = self._skipIgnorables( instring, loc ) 877 878 if self.skipWhitespace: 879 wt = self.whiteChars 880 instrlen = len(instring) 881 while loc < instrlen and instring[loc] in wt: 882 loc += 1 883 884 return loc
885
886 - def parseImpl( self, instring, loc, doActions=True ):
887 return loc, []
888
889 - def postParse( self, instring, loc, tokenlist ):
890 return tokenlist
891 892 #~ @profile
893 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
894 debugging = ( self.debug ) #and doActions ) 895 896 if debugging or self.failAction: 897 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 898 if (self.debugActions[0] ): 899 self.debugActions[0]( instring, loc, self ) 900 if callPreParse and self.callPreparse: 901 preloc = self.preParse( instring, loc ) 902 else: 903 preloc = loc 904 tokensStart = loc 905 try: 906 try: 907 loc,tokens = self.parseImpl( instring, preloc, doActions ) 908 except IndexError: 909 raise ParseException( instring, len(instring), self.errmsg, self ) 910 except ParseBaseException, err: 911 #~ print ("Exception raised:", err) 912 if self.debugActions[2]: 913 self.debugActions[2]( instring, tokensStart, self, err ) 914 if self.failAction: 915 self.failAction( instring, tokensStart, self, err ) 916 raise 917 else: 918 if callPreParse and self.callPreparse: 919 preloc = self.preParse( instring, loc ) 920 else: 921 preloc = loc 922 tokensStart = loc 923 if self.mayIndexError or loc >= len(instring): 924 try: 925 loc,tokens = self.parseImpl( instring, preloc, doActions ) 926 except IndexError: 927 raise ParseException( instring, len(instring), self.errmsg, self ) 928 else: 929 loc,tokens = self.parseImpl( instring, preloc, doActions ) 930 931 tokens = self.postParse( instring, loc, tokens ) 932 933 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 934 if self.parseAction and (doActions or self.callDuringTry): 935 if debugging: 936 try: 937 for fn in self.parseAction: 938 tokens = fn( instring, tokensStart, retTokens ) 939 if tokens is not None: 940 retTokens = ParseResults( tokens, 941 self.resultsName, 942 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 943 modal=self.modalResults ) 944 except ParseBaseException, err: 945 #~ print "Exception raised in user parse action:", err 946 if (self.debugActions[2] ): 947 self.debugActions[2]( instring, tokensStart, self, err ) 948 raise 949 else: 950 for fn in self.parseAction: 951 tokens = fn( instring, tokensStart, retTokens ) 952 if tokens is not None: 953 retTokens = ParseResults( tokens, 954 self.resultsName, 955 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 956 modal=self.modalResults ) 957 958 if debugging: 959 #~ print ("Matched",self,"->",retTokens.asList()) 960 if (self.debugActions[1] ): 961 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 962 963 return loc, retTokens
964
965 - def tryParse( self, instring, loc ):
966 try: 967 return self._parse( instring, loc, doActions=False )[0] 968 except ParseFatalException: 969 raise ParseException( instring, loc, self.errmsg, self)
970 971 # this method gets repeatedly called during backtracking with the same arguments - 972 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
973 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
974 lookup = (self,instring,loc,callPreParse,doActions) 975 if lookup in ParserElement._exprArgCache: 976 value = ParserElement._exprArgCache[ lookup ] 977 if isinstance(value,Exception): 978 raise value 979 return value 980 else: 981 try: 982 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 983 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 984 return value 985 except ParseBaseException, pe: 986 ParserElement._exprArgCache[ lookup ] = pe 987 raise
988 989 _parse = _parseNoCache 990 991 # argument cache for optimizing repeated calls when backtracking through recursive expressions 992 _exprArgCache = {}
993 - def resetCache():
994 ParserElement._exprArgCache.clear()
995 resetCache = staticmethod(resetCache) 996 997 _packratEnabled = False
998 - def enablePackrat():
999 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1000 Repeated parse attempts at the same string location (which happens 1001 often in many complex grammars) can immediately return a cached value, 1002 instead of re-executing parsing/validating code. Memoizing is done of 1003 both valid results and parsing exceptions. 1004 1005 This speedup may break existing programs that use parse actions that 1006 have side-effects. For this reason, packrat parsing is disabled when 1007 you first import pyparsing. To activate the packrat feature, your 1008 program must call the class method ParserElement.enablePackrat(). If 1009 your program uses psyco to "compile as you go", you must call 1010 enablePackrat before calling psyco.full(). If you do not do this, 1011 Python will crash. For best results, call enablePackrat() immediately 1012 after importing pyparsing. 1013 """ 1014 if not ParserElement._packratEnabled: 1015 ParserElement._packratEnabled = True 1016 ParserElement._parse = ParserElement._parseCache
1017 enablePackrat = staticmethod(enablePackrat) 1018
1019 - def parseString( self, instring, parseAll=False ):
1020 """Execute the parse expression with the given string. 1021 This is the main interface to the client code, once the complete 1022 expression has been built. 1023 1024 If you want the grammar to require that the entire input string be 1025 successfully parsed, then set parseAll to True (equivalent to ending 1026 the grammar with StringEnd()). 1027 1028 Note: parseString implicitly calls expandtabs() on the input string, 1029 in order to report proper column numbers in parse actions. 1030 If the input string contains tabs and 1031 the grammar uses parse actions that use the loc argument to index into the 1032 string being parsed, you can ensure you have a consistent view of the input 1033 string by: 1034 - calling parseWithTabs on your grammar before calling parseString 1035 (see L{I{parseWithTabs}<parseWithTabs>}) 1036 - define your parse action using the full (s,loc,toks) signature, and 1037 reference the input string using the parse action's s argument 1038 - explictly expand the tabs in your input string before calling 1039 parseString 1040 """ 1041 ParserElement.resetCache() 1042 if not self.streamlined: 1043 self.streamline() 1044 #~ self.saveAsList = True 1045 for e in self.ignoreExprs: 1046 e.streamline() 1047 if not self.keepTabs: 1048 instring = instring.expandtabs() 1049 loc, tokens = self._parse( instring, 0 ) 1050 if parseAll: 1051 StringEnd()._parse( instring, loc ) 1052 return tokens
1053
1054 - def scanString( self, instring, maxMatches=_MAX_INT ):
1055 """Scan the input string for expression matches. Each match will return the 1056 matching tokens, start location, and end location. May be called with optional 1057 maxMatches argument, to clip scanning after 'n' matches are found. 1058 1059 Note that the start and end locations are reported relative to the string 1060 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1061 strings with embedded tabs.""" 1062 if not self.streamlined: 1063 self.streamline() 1064 for e in self.ignoreExprs: 1065 e.streamline() 1066 1067 if not self.keepTabs: 1068 instring = _ustr(instring).expandtabs() 1069 instrlen = len(instring) 1070 loc = 0 1071 preparseFn = self.preParse 1072 parseFn = self._parse 1073 ParserElement.resetCache() 1074 matches = 0 1075 while loc <= instrlen and matches < maxMatches: 1076 try: 1077 preloc = preparseFn( instring, loc ) 1078 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1079 except ParseException: 1080 loc = preloc+1 1081 else: 1082 matches += 1 1083 yield tokens, preloc, nextLoc 1084 loc = nextLoc
1085
1086 - def transformString( self, instring ):
1087 """Extension to scanString, to modify matching text with modified tokens that may 1088 be returned from a parse action. To use transformString, define a grammar and 1089 attach a parse action to it that modifies the returned token list. 1090 Invoking transformString() on a target string will then scan for matches, 1091 and replace the matched text patterns according to the logic in the parse 1092 action. transformString() returns the resulting transformed string.""" 1093 out = [] 1094 lastE = 0 1095 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1096 # keep string locs straight between transformString and scanString 1097 self.keepTabs = True 1098 for t,s,e in self.scanString( instring ): 1099 out.append( instring[lastE:s] ) 1100 if t: 1101 if isinstance(t,ParseResults): 1102 out += t.asList() 1103 elif isinstance(t,list): 1104 out += t 1105 else: 1106 out.append(t) 1107 lastE = e 1108 out.append(instring[lastE:]) 1109 return "".join(map(_ustr,out))
1110
1111 - def searchString( self, instring, maxMatches=_MAX_INT ):
1112 """Another extension to scanString, simplifying the access to the tokens found 1113 to match the given parse expression. May be called with optional 1114 maxMatches argument, to clip searching after 'n' matches are found. 1115 """ 1116 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1117
1118 - def __add__(self, other ):
1119 """Implementation of + operator - returns And""" 1120 if isinstance( other, basestring ): 1121 other = Literal( other ) 1122 if not isinstance( other, ParserElement ): 1123 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1124 SyntaxWarning, stacklevel=2) 1125 return None 1126 return And( [ self, other ] )
1127
1128 - def __radd__(self, other ):
1129 """Implementation of + operator when left operand is not a ParserElement""" 1130 if isinstance( other, basestring ): 1131 other = Literal( other ) 1132 if not isinstance( other, ParserElement ): 1133 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1134 SyntaxWarning, stacklevel=2) 1135 return None 1136 return other + self
1137
1138 - def __sub__(self, other):
1139 """Implementation of - operator, returns And with error stop""" 1140 if isinstance( other, basestring ): 1141 other = Literal( other ) 1142 if not isinstance( other, ParserElement ): 1143 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1144 SyntaxWarning, stacklevel=2) 1145 return None 1146 return And( [ self, And._ErrorStop(), other ] )
1147
1148 - def __rsub__(self, other ):
1149 """Implementation of - operator when left operand is not a ParserElement""" 1150 if isinstance( other, basestring ): 1151 other = Literal( other ) 1152 if not isinstance( other, ParserElement ): 1153 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1154 SyntaxWarning, stacklevel=2) 1155 return None 1156 return other - self
1157
1158 - def __mul__(self,other):
1159 if isinstance(other,int): 1160 minElements, optElements = other,0 1161 elif isinstance(other,tuple): 1162 if len(other)==0: 1163 other = (None,None) 1164 elif len(other)==1: 1165 other = (other[0],None) 1166 if len(other)==2: 1167 if other[0] is None: 1168 other = (0, other[1]) 1169 if isinstance(other[0],int) and other[1] is None: 1170 if other[0] == 0: 1171 return ZeroOrMore(self) 1172 if other[0] == 1: 1173 return OneOrMore(self) 1174 else: 1175 return self*other[0] + ZeroOrMore(self) 1176 elif isinstance(other[0],int) and isinstance(other[1],int): 1177 minElements, optElements = other 1178 optElements -= minElements 1179 else: 1180 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1181 else: 1182 raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects") 1183 else: 1184 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1185 1186 if minElements < 0: 1187 raise ValueError("cannot multiply ParserElement by negative value") 1188 if optElements < 0: 1189 raise ValueError("second tuple value must be greater or equal to first tuple value") 1190 if minElements == optElements == 0: 1191 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1192 1193 if (optElements): 1194 def makeOptionalList(n): 1195 if n>1: 1196 return Optional(self + makeOptionalList(n-1)) 1197 else: 1198 return Optional(self)
1199 if minElements: 1200 if minElements == 1: 1201 ret = self + makeOptionalList(optElements) 1202 else: 1203 ret = And([self]*minElements) + makeOptionalList(optElements) 1204 else: 1205 ret = makeOptionalList(optElements) 1206 else: 1207 if minElements == 1: 1208 ret = self 1209 else: 1210 ret = And([self]*minElements) 1211 return ret 1212
1213 - def __rmul__(self, other):
1214 return self.__mul__(other)
1215
1216 - def __or__(self, other ):
1217 """Implementation of | operator - returns MatchFirst""" 1218 if isinstance( other, basestring ): 1219 other = Literal( other ) 1220 if not isinstance( other, ParserElement ): 1221 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1222 SyntaxWarning, stacklevel=2) 1223 return None 1224 return MatchFirst( [ self, other ] )
1225
1226 - def __ror__(self, other ):
1227 """Implementation of | operator when left operand is not a ParserElement""" 1228 if isinstance( other, basestring ): 1229 other = Literal( other ) 1230 if not isinstance( other, ParserElement ): 1231 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1232 SyntaxWarning, stacklevel=2) 1233 return None 1234 return other | self
1235
1236 - def __xor__(self, other ):
1237 """Implementation of ^ operator - returns Or""" 1238 if isinstance( other, basestring ): 1239 other = Literal( other ) 1240 if not isinstance( other, ParserElement ): 1241 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1242 SyntaxWarning, stacklevel=2) 1243 return None 1244 return Or( [ self, other ] )
1245
1246 - def __rxor__(self, other ):
1247 """Implementation of ^ operator when left operand is not a ParserElement""" 1248 if isinstance( other, basestring ): 1249 other = Literal( other ) 1250 if not isinstance( other, ParserElement ): 1251 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1252 SyntaxWarning, stacklevel=2) 1253 return None 1254 return other ^ self
1255
1256 - def __and__(self, other ):
1257 """Implementation of & operator - returns Each""" 1258 if isinstance( other, basestring ): 1259 other = Literal( other ) 1260 if not isinstance( other, ParserElement ): 1261 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1262 SyntaxWarning, stacklevel=2) 1263 return None 1264 return Each( [ self, other ] )
1265
1266 - def __rand__(self, other ):
1267 """Implementation of & operator when left operand is not a ParserElement""" 1268 if isinstance( other, basestring ): 1269 other = Literal( other ) 1270 if not isinstance( other, ParserElement ): 1271 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1272 SyntaxWarning, stacklevel=2) 1273 return None 1274 return other & self
1275
1276 - def __invert__( self ):
1277 """Implementation of ~ operator - returns NotAny""" 1278 return NotAny( self )
1279
1280 - def __call__(self, name):
1281 """Shortcut for setResultsName, with listAllMatches=default:: 1282 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1283 could be written as:: 1284 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1285 """ 1286 return self.setResultsName(name)
1287
1288 - def suppress( self ):
1289 """Suppresses the output of this ParserElement; useful to keep punctuation from 1290 cluttering up returned output. 1291 """ 1292 return Suppress( self )
1293
1294 - def leaveWhitespace( self ):
1295 """Disables the skipping of whitespace before matching the characters in the 1296 ParserElement's defined pattern. This is normally only used internally by 1297 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1298 """ 1299 self.skipWhitespace = False 1300 return self
1301
1302 - def setWhitespaceChars( self, chars ):
1303 """Overrides the default whitespace chars 1304 """ 1305 self.skipWhitespace = True 1306 self.whiteChars = chars 1307 self.copyDefaultWhiteChars = False 1308 return self
1309
1310 - def parseWithTabs( self ):
1311 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 1312 Must be called before parseString when the input grammar contains elements that 1313 match <TAB> characters.""" 1314 self.keepTabs = True 1315 return self
1316
1317 - def ignore( self, other ):
1318 """Define expression to be ignored (e.g., comments) while doing pattern 1319 matching; may be called repeatedly, to define multiple comment or other 1320 ignorable patterns. 1321 """ 1322 if isinstance( other, Suppress ): 1323 if other not in self.ignoreExprs: 1324 self.ignoreExprs.append( other ) 1325 else: 1326 self.ignoreExprs.append( Suppress( other ) ) 1327 return self
1328
1329 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1330 """Enable display of debugging messages while doing pattern matching.""" 1331 self.debugActions = (startAction or _defaultStartDebugAction, 1332 successAction or _defaultSuccessDebugAction, 1333 exceptionAction or _defaultExceptionDebugAction) 1334 self.debug = True 1335 return self
1336
1337 - def setDebug( self, flag=True ):
1338 """Enable display of debugging messages while doing pattern matching. 1339 Set flag to True to enable, False to disable.""" 1340 if flag: 1341 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1342 else: 1343 self.debug = False 1344 return self
1345
1346 - def __str__( self ):
1347 return self.name
1348
1349 - def __repr__( self ):
1350 return _ustr(self)
1351
1352 - def streamline( self ):
1353 self.streamlined = True 1354 self.strRepr = None 1355 return self
1356
1357 - def checkRecursion( self, parseElementList ):
1358 pass
1359
1360 - def validate( self, validateTrace=[] ):
1361 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1362 self.checkRecursion( [] )
1363
1364 - def parseFile( self, file_or_filename ):
1365 """Execute the parse expression on the given file or filename. 1366 If a filename is specified (instead of a file object), 1367 the entire file is opened, read, and closed before parsing. 1368 """ 1369 try: 1370 file_contents = file_or_filename.read() 1371 except AttributeError: 1372 f = open(file_or_filename, "rb") 1373 file_contents = f.read() 1374 f.close() 1375 return self.parseString(file_contents)
1376
1377 - def getException(self):
1378 return ParseException("",0,self.errmsg,self)
1379
1380 - def __getattr__(self,aname):
1381 if aname == "myException": 1382 self.myException = ret = self.getException(); 1383 return ret; 1384 else: 1385 raise AttributeError("no such attribute " + aname)
1386
1387 - def __eq__(self,other):
1388 if isinstance(other, basestring): 1389 try: 1390 (self + StringEnd()).parseString(_ustr(other)) 1391 return True 1392 except ParseBaseException: 1393 return False 1394 else: 1395 return super(ParserElement,self)==other
1396
1397 - def __hash__(self):
1398 return hash(id(self))
1399
1400 - def __req__(self,other):
1401 return self == other
1402 1403
1404 -class Token(ParserElement):
1405 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1406 - def __init__( self ):
1407 super(Token,self).__init__( savelist=False )
1408 #self.myException = ParseException("",0,"",self) 1409
1410 - def setName(self, name):
1411 s = super(Token,self).setName(name) 1412 self.errmsg = "Expected " + self.name 1413 #s.myException.msg = self.errmsg 1414 return s
1415 1416
1417 -class Empty(Token):
1418 """An empty token, will always match."""
1419 - def __init__( self ):
1420 super(Empty,self).__init__() 1421 self.name = "Empty" 1422 self.mayReturnEmpty = True 1423 self.mayIndexError = False
1424 1425
1426 -class NoMatch(Token):
1427 """A token that will never match."""
1428 - def __init__( self ):
1429 super(NoMatch,self).__init__() 1430 self.name = "NoMatch" 1431 self.mayReturnEmpty = True 1432 self.mayIndexError = False 1433 self.errmsg = "Unmatchable token"
1434 #self.myException.msg = self.errmsg 1435
1436 - def parseImpl( self, instring, loc, doActions=True ):
1437 exc = self.myException 1438 exc.loc = loc 1439 exc.pstr = instring 1440 raise exc
1441 1442
1443 -class Literal(Token):
1444 """Token to exactly match a specified string."""
1445 - def __init__( self, matchString ):
1446 super(Literal,self).__init__() 1447 self.match = matchString 1448 self.matchLen = len(matchString) 1449 try: 1450 self.firstMatchChar = matchString[0] 1451 except IndexError: 1452 warnings.warn("null string passed to Literal; use Empty() instead", 1453 SyntaxWarning, stacklevel=2) 1454 self.__class__ = Empty 1455 self.name = '"%s"' % _ustr(self.match) 1456 self.errmsg = "Expected " + self.name 1457 self.mayReturnEmpty = False 1458 #self.myException.msg = self.errmsg 1459 self.mayIndexError = False
1460 1461 # Performance tuning: this routine gets called a *lot* 1462 # if this is a single character match string and the first character matches, 1463 # short-circuit as quickly as possible, and avoid calling startswith 1464 #~ @profile
1465 - def parseImpl( self, instring, loc, doActions=True ):
1466 if (instring[loc] == self.firstMatchChar and 1467 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1468 return loc+self.matchLen, self.match 1469 #~ raise ParseException( instring, loc, self.errmsg ) 1470 exc = self.myException 1471 exc.loc = loc 1472 exc.pstr = instring 1473 raise exc
1474 _L = Literal 1475
1476 -class Keyword(Token):
1477 """Token to exactly match a specified string as a keyword, that is, it must be 1478 immediately followed by a non-keyword character. Compare with Literal:: 1479 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 1480 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 1481 Accepts two optional constructor arguments in addition to the keyword string: 1482 identChars is a string of characters that would be valid identifier characters, 1483 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 1484 matching, default is False. 1485 """ 1486 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1487
1488 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1489 super(Keyword,self).__init__() 1490 self.match = matchString 1491 self.matchLen = len(matchString) 1492 try: 1493 self.firstMatchChar = matchString[0] 1494 except IndexError: 1495 warnings.warn("null string passed to Keyword; use Empty() instead", 1496 SyntaxWarning, stacklevel=2) 1497 self.name = '"%s"' % self.match 1498 self.errmsg = "Expected " + self.name 1499 self.mayReturnEmpty = False 1500 #self.myException.msg = self.errmsg 1501 self.mayIndexError = False 1502 self.caseless = caseless 1503 if caseless: 1504 self.caselessmatch = matchString.upper() 1505 identChars = identChars.upper() 1506 self.identChars = _str2dict(identChars)
1507
1508 - def parseImpl( self, instring, loc, doActions=True ):
1509 if self.caseless: 1510 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1511 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1512 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1513 return loc+self.matchLen, self.match 1514 else: 1515 if (instring[loc] == self.firstMatchChar and 1516 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1517 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1518 (loc == 0 or instring[loc-1] not in self.identChars) ): 1519 return loc+self.matchLen, self.match 1520 #~ raise ParseException( instring, loc, self.errmsg ) 1521 exc = self.myException 1522 exc.loc = loc 1523 exc.pstr = instring 1524 raise exc
1525
1526 - def copy(self):
1527 c = super(Keyword,self).copy() 1528 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1529 return c
1530
1531 - def setDefaultKeywordChars( chars ):
1532 """Overrides the default Keyword chars 1533 """ 1534 Keyword.DEFAULT_KEYWORD_CHARS = chars
1535 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1536 1537
1538 -class CaselessLiteral(Literal):
1539 """Token to match a specified string, ignoring case of letters. 1540 Note: the matched results will always be in the case of the given 1541 match string, NOT the case of the input text. 1542 """
1543 - def __init__( self, matchString ):
1544 super(CaselessLiteral,self).__init__( matchString.upper() ) 1545 # Preserve the defining literal. 1546 self.returnString = matchString 1547 self.name = "'%s'" % self.returnString 1548 self.errmsg = "Expected " + self.name
1549 #self.myException.msg = self.errmsg 1550
1551 - def parseImpl( self, instring, loc, doActions=True ):
1552 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1553 return loc+self.matchLen, self.returnString 1554 #~ raise ParseException( instring, loc, self.errmsg ) 1555 exc = self.myException 1556 exc.loc = loc 1557 exc.pstr = instring 1558 raise exc
1559
1560 -class CaselessKeyword(Keyword):
1561 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1562 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1563
1564 - def parseImpl( self, instring, loc, doActions=True ):
1565 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1566 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1567 return loc+self.matchLen, self.match 1568 #~ raise ParseException( instring, loc, self.errmsg ) 1569 exc = self.myException 1570 exc.loc = loc 1571 exc.pstr = instring 1572 raise exc
1573
1574 -class Word(Token):
1575 """Token for matching words composed of allowed character sets. 1576 Defined with string containing all allowed initial characters, 1577 an optional string containing allowed body characters (if omitted, 1578 defaults to the initial character set), and an optional minimum, 1579 maximum, and/or exact length. The default value for min is 1 (a 1580 minimum value < 1 is not valid); the default values for max and exact 1581 are 0, meaning no maximum or exact length restriction. 1582 """
1583 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1584 super(Word,self).__init__() 1585 self.initCharsOrig = initChars 1586 self.initChars = _str2dict(initChars) 1587 if bodyChars : 1588 self.bodyCharsOrig = bodyChars 1589 self.bodyChars = _str2dict(bodyChars) 1590 else: 1591 self.bodyCharsOrig = initChars 1592 self.bodyChars = _str2dict(initChars) 1593 1594 self.maxSpecified = max > 0 1595 1596 if min < 1: 1597 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1598 1599 self.minLen = min 1600 1601 if max > 0: 1602 self.maxLen = max 1603 else: 1604 self.maxLen = _MAX_INT 1605 1606 if exact > 0: 1607 self.maxLen = exact 1608 self.minLen = exact 1609 1610 self.name = _ustr(self) 1611 self.errmsg = "Expected " + self.name 1612 #self.myException.msg = self.errmsg 1613 self.mayIndexError = False 1614 self.asKeyword = asKeyword 1615 1616 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1617 if self.bodyCharsOrig == self.initCharsOrig: 1618 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1619 elif len(self.bodyCharsOrig) == 1: 1620 self.reString = "%s[%s]*" % \ 1621 (re.escape(self.initCharsOrig), 1622 _escapeRegexRangeChars(self.bodyCharsOrig),) 1623 else: 1624 self.reString = "[%s][%s]*" % \ 1625 (_escapeRegexRangeChars(self.initCharsOrig), 1626 _escapeRegexRangeChars(self.bodyCharsOrig),) 1627 if self.asKeyword: 1628 self.reString = r"\b"+self.reString+r"\b" 1629 try: 1630 self.re = re.compile( self.reString ) 1631 except: 1632 self.re = None
1633
1634 - def parseImpl( self, instring, loc, doActions=True ):
1635 if self.re: 1636 result = self.re.match(instring,loc) 1637 if not result: 1638 exc = self.myException 1639 exc.loc = loc 1640 exc.pstr = instring 1641 raise exc 1642 1643 loc = result.end() 1644 return loc,result.group() 1645 1646 if not(instring[ loc ] in self.initChars): 1647 #~ raise ParseException( instring, loc, self.errmsg ) 1648 exc = self.myException 1649 exc.loc = loc 1650 exc.pstr = instring 1651 raise exc 1652 start = loc 1653 loc += 1 1654 instrlen = len(instring) 1655 bodychars = self.bodyChars 1656 maxloc = start + self.maxLen 1657 maxloc = min( maxloc, instrlen ) 1658 while loc < maxloc and instring[loc] in bodychars: 1659 loc += 1 1660 1661 throwException = False 1662 if loc - start < self.minLen: 1663 throwException = True 1664 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1665 throwException = True 1666 if self.asKeyword: 1667 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1668 throwException = True 1669 1670 if throwException: 1671 #~ raise ParseException( instring, loc, self.errmsg ) 1672 exc = self.myException 1673 exc.loc = loc 1674 exc.pstr = instring 1675 raise exc 1676 1677 return loc, instring[start:loc]
1678
1679 - def __str__( self ):
1680 try: 1681 return super(Word,self).__str__() 1682 except: 1683 pass 1684 1685 1686 if self.strRepr is None: 1687 1688 def charsAsStr(s): 1689 if len(s)>4: 1690 return s[:4]+"..." 1691 else: 1692 return s
1693 1694 if ( self.initCharsOrig != self.bodyCharsOrig ): 1695 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1696 else: 1697 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1698 1699 return self.strRepr
1700 1701
1702 -class Regex(Token):
1703 """Token for matching strings that match a given regular expression. 1704 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1705 """
1706 - def __init__( self, pattern, flags=0):
1707 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 1708 super(Regex,self).__init__() 1709 1710 if len(pattern) == 0: 1711 warnings.warn("null string passed to Regex; use Empty() instead", 1712 SyntaxWarning, stacklevel=2) 1713 1714 self.pattern = pattern 1715 self.flags = flags 1716 1717 try: 1718 self.re = re.compile(self.pattern, self.flags) 1719 self.reString = self.pattern 1720 except sre_constants.error: 1721 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1722 SyntaxWarning, stacklevel=2) 1723 raise 1724 1725 self.name = _ustr(self) 1726 self.errmsg = "Expected " + self.name 1727 #self.myException.msg = self.errmsg 1728 self.mayIndexError = False 1729 self.mayReturnEmpty = True
1730
1731 - def parseImpl( self, instring, loc, doActions=True ):
1732 result = self.re.match(instring,loc) 1733 if not result: 1734 exc = self.myException 1735 exc.loc = loc 1736 exc.pstr = instring 1737 raise exc 1738 1739 loc = result.end() 1740 d = result.groupdict() 1741 ret = ParseResults(result.group()) 1742 if d: 1743 for k in d: 1744 ret[k] = d[k] 1745 return loc,ret
1746
1747 - def __str__( self ):
1748 try: 1749 return super(Regex,self).__str__() 1750 except: 1751 pass 1752 1753 if self.strRepr is None: 1754 self.strRepr = "Re:(%s)" % repr(self.pattern) 1755 1756 return self.strRepr
1757 1758
1759 -class QuotedString(Token):
1760 """Token for matching strings that are delimited by quoting characters. 1761 """
1762 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1763 """ 1764 Defined with the following parameters: 1765 - quoteChar - string of one or more characters defining the quote delimiting string 1766 - escChar - character to escape quotes, typically backslash (default=None) 1767 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1768 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1769 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1770 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1771 """ 1772 super(QuotedString,self).__init__() 1773 1774 # remove white space from quote chars - wont work anyway 1775 quoteChar = quoteChar.strip() 1776 if len(quoteChar) == 0: 1777 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1778 raise SyntaxError() 1779 1780 if endQuoteChar is None: 1781 endQuoteChar = quoteChar 1782 else: 1783 endQuoteChar = endQuoteChar.strip() 1784 if len(endQuoteChar) == 0: 1785 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1786 raise SyntaxError() 1787 1788 self.quoteChar = quoteChar 1789 self.quoteCharLen = len(quoteChar) 1790 self.firstQuoteChar = quoteChar[0] 1791 self.endQuoteChar = endQuoteChar 1792 self.endQuoteCharLen = len(endQuoteChar) 1793 self.escChar = escChar 1794 self.escQuote = escQuote 1795 self.unquoteResults = unquoteResults 1796 1797 if multiline: 1798 self.flags = re.MULTILINE | re.DOTALL 1799 self.pattern = r'%s(?:[^%s%s]' % \ 1800 ( re.escape(self.quoteChar), 1801 _escapeRegexRangeChars(self.endQuoteChar[0]), 1802 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1803 else: 1804 self.flags = 0 1805 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1806 ( re.escape(self.quoteChar), 1807 _escapeRegexRangeChars(self.endQuoteChar[0]), 1808 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1809 if len(self.endQuoteChar) > 1: 1810 self.pattern += ( 1811 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1812 _escapeRegexRangeChars(self.endQuoteChar[i])) 1813 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1814 ) 1815 if escQuote: 1816 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1817 if escChar: 1818 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1819 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1820 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1821 1822 try: 1823 self.re = re.compile(self.pattern, self.flags) 1824 self.reString = self.pattern 1825 except sre_constants.error: 1826 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1827 SyntaxWarning, stacklevel=2) 1828 raise 1829 1830 self.name = _ustr(self) 1831 self.errmsg = "Expected " + self.name 1832 #self.myException.msg = self.errmsg 1833 self.mayIndexError = False 1834 self.mayReturnEmpty = True
1835
1836 - def parseImpl( self, instring, loc, doActions=True ):
1837 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1838 if not result: 1839 exc = self.myException 1840 exc.loc = loc 1841 exc.pstr = instring 1842 raise exc 1843 1844 loc = result.end() 1845 ret = result.group() 1846 1847 if self.unquoteResults: 1848 1849 # strip off quotes 1850 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1851 1852 if isinstance(ret,basestring): 1853 # replace escaped characters 1854 if self.escChar: 1855 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1856 1857 # replace escaped quotes 1858 if self.escQuote: 1859 ret = ret.replace(self.escQuote, self.endQuoteChar) 1860 1861 return loc, ret
1862
1863 - def __str__( self ):
1864 try: 1865 return super(QuotedString,self).__str__() 1866 except: 1867 pass 1868 1869 if self.strRepr is None: 1870 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1871 1872 return self.strRepr
1873 1874
1875 -class CharsNotIn(Token):
1876 """Token for matching words composed of characters *not* in a given set. 1877 Defined with string containing all disallowed characters, and an optional 1878 minimum, maximum, and/or exact length. The default value for min is 1 (a 1879 minimum value < 1 is not valid); the default values for max and exact 1880 are 0, meaning no maximum or exact length restriction. 1881 """
1882 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1883 super(CharsNotIn,self).__init__() 1884 self.skipWhitespace = False 1885 self.notChars = notChars 1886 1887 if min < 1: 1888 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1889 1890 self.minLen = min 1891 1892 if max > 0: 1893 self.maxLen = max 1894 else: 1895 self.maxLen = _MAX_INT 1896 1897 if exact > 0: 1898 self.maxLen = exact 1899 self.minLen = exact 1900 1901 self.name = _ustr(self) 1902 self.errmsg = "Expected " + self.name 1903 self.mayReturnEmpty = ( self.minLen == 0 ) 1904 #self.myException.msg = self.errmsg 1905 self.mayIndexError = False
1906
1907 - def parseImpl( self, instring, loc, doActions=True ):
1908 if instring[loc] in self.notChars: 1909 #~ raise ParseException( instring, loc, self.errmsg ) 1910 exc = self.myException 1911 exc.loc = loc 1912 exc.pstr = instring 1913 raise exc 1914 1915 start = loc 1916 loc += 1 1917 notchars = self.notChars 1918 maxlen = min( start+self.maxLen, len(instring) ) 1919 while loc < maxlen and \ 1920 (instring[loc] not in notchars): 1921 loc += 1 1922 1923 if loc - start < self.minLen: 1924 #~ raise ParseException( instring, loc, self.errmsg ) 1925 exc = self.myException 1926 exc.loc = loc 1927 exc.pstr = instring 1928 raise exc 1929 1930 return loc, instring[start:loc]
1931
1932 - def __str__( self ):
1933 try: 1934 return super(CharsNotIn, self).__str__() 1935 except: 1936 pass 1937 1938 if self.strRepr is None: 1939 if len(self.notChars) > 4: 1940 self.strRepr = "!W:(%s...)" % self.notChars[:4] 1941 else: 1942 self.strRepr = "!W:(%s)" % self.notChars 1943 1944 return self.strRepr
1945
1946 -class White(Token):
1947 """Special matching class for matching whitespace. Normally, whitespace is ignored 1948 by pyparsing grammars. This class is included when some whitespace structures 1949 are significant. Define with a string containing the whitespace characters to be 1950 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments, 1951 as defined for the Word class.""" 1952 whiteStrs = { 1953 " " : "<SPC>", 1954 "\t": "<TAB>", 1955 "\n": "<LF>", 1956 "\r": "<CR>", 1957 "\f": "<FF>", 1958 }
1959 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1960 super(White,self).__init__() 1961 self.matchWhite = ws 1962 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 1963 #~ self.leaveWhitespace() 1964 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 1965 self.mayReturnEmpty = True 1966 self.errmsg = "Expected " + self.name 1967 #self.myException.msg = self.errmsg 1968 1969 self.minLen = min 1970 1971 if max > 0: 1972 self.maxLen = max 1973 else: 1974 self.maxLen = _MAX_INT 1975 1976 if exact > 0: 1977 self.maxLen = exact 1978 self.minLen = exact
1979
1980 - def parseImpl( self, instring, loc, doActions=True ):
1981 if not(instring[ loc ] in self.matchWhite): 1982 #~ raise ParseException( instring, loc, self.errmsg ) 1983 exc = self.myException 1984 exc.loc = loc 1985 exc.pstr = instring 1986 raise exc 1987 start = loc 1988 loc += 1 1989 maxloc = start + self.maxLen 1990 maxloc = min( maxloc, len(instring) ) 1991 while loc < maxloc and instring[loc] in self.matchWhite: 1992 loc += 1 1993 1994 if loc - start < self.minLen: 1995 #~ raise ParseException( instring, loc, self.errmsg ) 1996 exc = self.myException 1997 exc.loc = loc 1998 exc.pstr = instring 1999 raise exc 2000 2001 return loc, instring[start:loc]
2002 2003
2004 -class _PositionToken(Token):
2005 - def __init__( self ):
2006 super(_PositionToken,self).__init__() 2007 self.name=self.__class__.__name__ 2008 self.mayReturnEmpty = True 2009 self.mayIndexError = False
2010
2011 -class GoToColumn(_PositionToken):
2012 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2013 - def __init__( self, colno ):
2014 super(GoToColumn,self).__init__() 2015 self.col = colno
2016
2017 - def preParse( self, instring, loc ):
2018 if col(loc,instring) != self.col: 2019 instrlen = len(instring) 2020 if self.ignoreExprs: 2021 loc = self._skipIgnorables( instring, loc ) 2022 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2023 loc += 1 2024 return loc
2025
2026 - def parseImpl( self, instring, loc, doActions=True ):
2027 thiscol = col( loc, instring ) 2028 if thiscol > self.col: 2029 raise ParseException( instring, loc, "Text not in expected column", self ) 2030 newloc = loc + self.col - thiscol 2031 ret = instring[ loc: newloc ] 2032 return newloc, ret
2033
2034 -class LineStart(_PositionToken):
2035 """Matches if current position is at the beginning of a line within the parse string"""
2036 - def __init__( self ):
2037 super(LineStart,self).__init__() 2038 self.setWhitespaceChars( " \t" ) 2039 self.errmsg = "Expected start of line"
2040 #self.myException.msg = self.errmsg 2041
2042 - def preParse( self, instring, loc ):
2043 preloc = super(LineStart,self).preParse(instring,loc) 2044 if instring[preloc] == "\n": 2045 loc += 1 2046 return loc
2047
2048 - def parseImpl( self, instring, loc, doActions=True ):
2049 if not( loc==0 or 2050 (loc == self.preParse( instring, 0 )) or 2051 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2052 #~ raise ParseException( instring, loc, "Expected start of line" ) 2053 exc = self.myException 2054 exc.loc = loc 2055 exc.pstr = instring 2056 raise exc 2057 return loc, []
2058
2059 -class LineEnd(_PositionToken):
2060 """Matches if current position is at the end of a line within the parse string"""
2061 - def __init__( self ):
2062 super(LineEnd,self).__init__() 2063 self.setWhitespaceChars( " \t" ) 2064 self.errmsg = "Expected end of line"
2065 #self.myException.msg = self.errmsg 2066
2067 - def parseImpl( self, instring, loc, doActions=True ):
2068 if loc<len(instring): 2069 if instring[loc] == "\n": 2070 return loc+1, "\n" 2071 else: 2072 #~ raise ParseException( instring, loc, "Expected end of line" ) 2073 exc = self.myException 2074 exc.loc = loc 2075 exc.pstr = instring 2076 raise exc 2077 elif loc == len(instring): 2078 return loc+1, [] 2079 else: 2080 exc = self.myException 2081 exc.loc = loc 2082 exc.pstr = instring 2083 raise exc
2084
2085 -class StringStart(_PositionToken):
2086 """Matches if current position is at the beginning of the parse string"""
2087 - def __init__( self ):
2088 super(StringStart,self).__init__() 2089 self.errmsg = "Expected start of text"
2090 #self.myException.msg = self.errmsg 2091
2092 - def parseImpl( self, instring, loc, doActions=True ):
2093 if loc != 0: 2094 # see if entire string up to here is just whitespace and ignoreables 2095 if loc != self.preParse( instring, 0 ): 2096 #~ raise ParseException( instring, loc, "Expected start of text" ) 2097 exc = self.myException 2098 exc.loc = loc 2099 exc.pstr = instring 2100 raise exc 2101 return loc, []
2102
2103 -class StringEnd(_PositionToken):
2104 """Matches if current position is at the end of the parse string"""
2105 - def __init__( self ):
2106 super(StringEnd,self).__init__() 2107 self.errmsg = "Expected end of text"
2108 #self.myException.msg = self.errmsg 2109
2110 - def parseImpl( self, instring, loc, doActions=True ):
2111 if loc < len(instring): 2112 #~ raise ParseException( instring, loc, "Expected end of text" ) 2113 exc = self.myException 2114 exc.loc = loc 2115 exc.pstr = instring 2116 raise exc 2117 elif loc == len(instring): 2118 return loc+1, [] 2119 elif loc > len(instring): 2120 return loc, [] 2121 else: 2122 exc = self.myException 2123 exc.loc = loc 2124 exc.pstr = instring 2125 raise exc
2126
2127 -class WordStart(_PositionToken):
2128 """Matches if the current position is at the beginning of a Word, and 2129 is not preceded by any character in a given set of wordChars 2130 (default=printables). To emulate the \b behavior of regular expressions, 2131 use WordStart(alphanums). WordStart will also match at the beginning of 2132 the string being parsed, or at the beginning of a line. 2133 """
2134 - def __init__(self, wordChars = printables):
2135 super(WordStart,self).__init__() 2136 self.wordChars = _str2dict(wordChars) 2137 self.errmsg = "Not at the start of a word"
2138
2139 - def parseImpl(self, instring, loc, doActions=True ):
2140 if loc != 0: 2141 if (instring[loc-1] in self.wordChars or 2142 instring[loc] not in self.wordChars): 2143 exc = self.myException 2144 exc.loc = loc 2145 exc.pstr = instring 2146 raise exc 2147 return loc, []
2148
2149 -class WordEnd(_PositionToken):
2150 """Matches if the current position is at the end of a Word, and 2151 is not followed by any character in a given set of wordChars 2152 (default=printables). To emulate the \b behavior of regular expressions, 2153 use WordEnd(alphanums). WordEnd will also match at the end of 2154 the string being parsed, or at the end of a line. 2155 """
2156 - def __init__(self, wordChars = printables):
2157 super(WordEnd,self).__init__() 2158 self.wordChars = _str2dict(wordChars) 2159 self.skipWhitespace = False 2160 self.errmsg = "Not at the end of a word"
2161
2162 - def parseImpl(self, instring, loc, doActions=True ):
2163 instrlen = len(instring) 2164 if instrlen>0 and loc<instrlen: 2165 if (instring[loc] in self.wordChars or 2166 instring[loc-1] not in self.wordChars): 2167 #~ raise ParseException( instring, loc, "Expected end of word" ) 2168 exc = self.myException 2169 exc.loc = loc 2170 exc.pstr = instring 2171 raise exc 2172 return loc, []
2173 2174
2175 -class ParseExpression(ParserElement):
2176 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2177 - def __init__( self, exprs, savelist = False ):
2178 super(ParseExpression,self).__init__(savelist) 2179 if isinstance( exprs, list ): 2180 self.exprs = exprs 2181 elif isinstance( exprs, basestring ): 2182 self.exprs = [ Literal( exprs ) ] 2183 else: 2184 self.exprs = [ exprs ] 2185 self.callPreparse = False
2186
2187 - def __getitem__( self, i ):
2188 return self.exprs[i]
2189
2190 - def append( self, other ):
2191 self.exprs.append( other ) 2192 self.strRepr = None 2193 return self
2194
2195 - def leaveWhitespace( self ):
2196 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 2197 all contained expressions.""" 2198 self.skipWhitespace = False 2199 self.exprs = [ e.copy() for e in self.exprs ] 2200 for e in self.exprs: 2201 e.leaveWhitespace() 2202 return self
2203
2204 - def ignore( self, other ):
2205 if isinstance( other, Suppress ): 2206 if other not in self.ignoreExprs: 2207 super( ParseExpression, self).ignore( other ) 2208 for e in self.exprs: 2209 e.ignore( self.ignoreExprs[-1] ) 2210 else: 2211 super( ParseExpression, self).ignore( other ) 2212 for e in self.exprs: 2213 e.ignore( self.ignoreExprs[-1] ) 2214 return self
2215
2216 - def __str__( self ):
2217 try: 2218 return super(ParseExpression,self).__str__() 2219 except: 2220 pass 2221 2222 if self.strRepr is None: 2223 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2224 return self.strRepr
2225
2226 - def streamline( self ):
2227 super(ParseExpression,self).streamline() 2228 2229 for e in self.exprs: 2230 e.streamline() 2231 2232 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2233 # but only if there are no parse actions or resultsNames on the nested And's 2234 # (likewise for Or's and MatchFirst's) 2235 if ( len(self.exprs) == 2 ): 2236 other = self.exprs[0] 2237 if ( isinstance( other, self.__class__ ) and 2238 not(other.parseAction) and 2239 other.resultsName is None and 2240 not other.debug ): 2241 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2242 self.strRepr = None 2243 self.mayReturnEmpty |= other.mayReturnEmpty 2244 self.mayIndexError |= other.mayIndexError 2245 2246 other = self.exprs[-1] 2247 if ( isinstance( other, self.__class__ ) and 2248 not(other.parseAction) and 2249 other.resultsName is None and 2250 not other.debug ): 2251 self.exprs = self.exprs[:-1] + other.exprs[:] 2252 self.strRepr = None 2253 self.mayReturnEmpty |= other.mayReturnEmpty 2254 self.mayIndexError |= other.mayIndexError 2255 2256 return self
2257
2258 - def setResultsName( self, name, listAllMatches=False ):
2259 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2260 return ret
2261
2262 - def validate( self, validateTrace=[] ):
2263 tmp = validateTrace[:]+[self] 2264 for e in self.exprs: 2265 e.validate(tmp) 2266 self.checkRecursion( [] )
2267
2268 -class And(ParseExpression):
2269 """Requires all given ParseExpressions to be found in the given order. 2270 Expressions may be separated by whitespace. 2271 May be constructed using the '+' operator. 2272 """ 2273
2274 - class _ErrorStop(Empty):
2275 - def __new__(cls,*args,**kwargs):
2276 return And._ErrorStop.instance
2277 _ErrorStop.instance = Empty() 2278 _ErrorStop.instance.leaveWhitespace() 2279
2280 - def __init__( self, exprs, savelist = True ):
2281 super(And,self).__init__(exprs, savelist) 2282 self.mayReturnEmpty = True 2283 for e in self.exprs: 2284 if not e.mayReturnEmpty: 2285 self.mayReturnEmpty = False 2286 break 2287 self.setWhitespaceChars( exprs[0].whiteChars ) 2288 self.skipWhitespace = exprs[0].skipWhitespace 2289 self.callPreparse = True
2290
2291 - def parseImpl( self, instring, loc, doActions=True ):
2292 # pass False as last arg to _parse for first element, since we already 2293 # pre-parsed the string as part of our And pre-parsing 2294 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2295 errorStop = False 2296 for e in self.exprs[1:]: 2297 if e is And._ErrorStop.instance: 2298 errorStop = True 2299 continue 2300 if errorStop: 2301 try: 2302 loc, exprtokens = e._parse( instring, loc, doActions ) 2303 except ParseBaseException, pe: 2304 raise ParseSyntaxException(pe) 2305 except IndexError, ie: 2306 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2307 else: 2308 loc, exprtokens = e._parse( instring, loc, doActions ) 2309 if exprtokens or exprtokens.keys(): 2310 resultlist += exprtokens 2311 return loc, resultlist
2312
2313 - def __iadd__(self, other ):
2314 if isinstance( other, basestring ): 2315 other = Literal( other ) 2316 return self.append( other ) #And( [ self, other ] )
2317
2318 - def checkRecursion( self, parseElementList ):
2319 subRecCheckList = parseElementList[:] + [ self ] 2320 for e in self.exprs: 2321 e.checkRecursion( subRecCheckList ) 2322 if not e.mayReturnEmpty: 2323 break
2324
2325 - def __str__( self ):
2326 if hasattr(self,"name"): 2327 return self.name 2328 2329 if self.strRepr is None: 2330 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2331 2332 return self.strRepr
2333 2334
2335 -class Or(ParseExpression):
2336 """Requires that at least one ParseExpression is found. 2337 If two expressions match, the expression that matches the longest string will be used. 2338 May be constructed using the '^' operator. 2339 """
2340 - def __init__( self, exprs, savelist = False ):
2341 super(Or,self).__init__(exprs, savelist) 2342 self.mayReturnEmpty = False 2343 for e in self.exprs: 2344 if e.mayReturnEmpty: 2345 self.mayReturnEmpty = True 2346 break
2347
2348 - def parseImpl( self, instring, loc, doActions=True ):
2349 maxExcLoc = -1 2350 maxMatchLoc = -1 2351 maxException = None 2352 for e in self.exprs: 2353 try: 2354 loc2 = e.tryParse( instring, loc ) 2355 except ParseException, err: 2356 if err.loc > maxExcLoc: 2357 maxException = err 2358 maxExcLoc = err.loc 2359 except IndexError: 2360 if len(instring) > maxExcLoc: 2361 maxException = ParseException(instring,len(instring),e.errmsg,self) 2362 maxExcLoc = len(instring) 2363 else: 2364 if loc2 > maxMatchLoc: 2365 maxMatchLoc = loc2 2366 maxMatchExp = e 2367 2368 if maxMatchLoc < 0: 2369 if maxException is not None: 2370 raise maxException 2371 else: 2372 raise ParseException(instring, loc, "no defined alternatives to match", self) 2373 2374 return maxMatchExp._parse( instring, loc, doActions )
2375
2376 - def __ixor__(self, other ):
2377 if isinstance( other, basestring ): 2378 other = Literal( other ) 2379 return self.append( other ) #Or( [ self, other ] )
2380
2381 - def __str__( self ):
2382 if hasattr(self,"name"): 2383 return self.name 2384 2385 if self.strRepr is None: 2386 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2387 2388 return self.strRepr
2389
2390 - def checkRecursion( self, parseElementList ):
2391 subRecCheckList = parseElementList[:] + [ self ] 2392 for e in self.exprs: 2393 e.checkRecursion( subRecCheckList )
2394 2395
2396 -class MatchFirst(ParseExpression):
2397 """Requires that at least one ParseExpression is found. 2398 If two expressions match, the first one listed is the one that will match. 2399 May be constructed using the '|' operator. 2400 """
2401 - def __init__( self, exprs, savelist = False ):
2402 super(MatchFirst,self).__init__(exprs, savelist) 2403 if exprs: 2404 self.mayReturnEmpty = False 2405 for e in self.exprs: 2406 if e.mayReturnEmpty: 2407 self.mayReturnEmpty = True 2408 break 2409 else: 2410 self.mayReturnEmpty = True
2411
2412 - def parseImpl( self, instring, loc, doActions=True ):
2413 maxExcLoc = -1 2414 maxException = None 2415 for e in self.exprs: 2416 try: 2417 ret = e._parse( instring, loc, doActions ) 2418 return ret 2419 except ParseException, err: 2420 if err.loc > maxExcLoc: 2421 maxException = err 2422 maxExcLoc = err.loc 2423 except IndexError: 2424 if len(instring) > maxExcLoc: 2425 maxException = ParseException(instring,len(instring),e.errmsg,self) 2426 maxExcLoc = len(instring) 2427 2428 # only got here if no expression matched, raise exception for match that made it the furthest 2429 else: 2430 if maxException is not None: 2431 raise maxException 2432 else: 2433 raise ParseException(instring, loc, "no defined alternatives to match", self)
2434
2435 - def __ior__(self, other ):
2436 if isinstance( other, basestring ): 2437 other = Literal( other ) 2438 return self.append( other ) #MatchFirst( [ self, other ] )
2439
2440 - def __str__( self ):
2441 if hasattr(self,"name"): 2442 return self.name 2443 2444 if self.strRepr is None: 2445 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2446 2447 return self.strRepr
2448
2449 - def checkRecursion( self, parseElementList ):
2450 subRecCheckList = parseElementList[:] + [ self ] 2451 for e in self.exprs: 2452 e.checkRecursion( subRecCheckList )
2453 2454
2455 -class Each(ParseExpression):
2456 """Requires all given ParseExpressions to be found, but in any order. 2457 Expressions may be separated by whitespace. 2458 May be constructed using the '&' operator. 2459 """
2460 - def __init__( self, exprs, savelist = True ):
2461 super(Each,self).__init__(exprs, savelist) 2462 self.mayReturnEmpty = True 2463 for e in self.exprs: 2464 if not e.mayReturnEmpty: 2465 self.mayReturnEmpty = False 2466 break 2467 self.skipWhitespace = True 2468 self.initExprGroups = True
2469
2470 - def parseImpl( self, instring, loc, doActions=True ):
2471 if self.initExprGroups: 2472 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2473 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2474 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2475 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2476 self.required += self.multirequired 2477 self.initExprGroups = False 2478 tmpLoc = loc 2479 tmpReqd = self.required[:] 2480 tmpOpt = self.optionals[:] 2481 matchOrder = [] 2482 2483 keepMatching = True 2484 while keepMatching: 2485 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2486 failed = [] 2487 for e in tmpExprs: 2488 try: 2489 tmpLoc = e.tryParse( instring, tmpLoc ) 2490 except ParseException: 2491 failed.append(e) 2492 else: 2493 matchOrder.append(e) 2494 if e in tmpReqd: 2495 tmpReqd.remove(e) 2496 elif e in tmpOpt: 2497 tmpOpt.remove(e) 2498 if len(failed) == len(tmpExprs): 2499 keepMatching = False 2500 2501 if tmpReqd: 2502 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2503 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2504 2505 # add any unmatched Optionals, in case they have default values defined 2506 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) 2507 2508 resultlist = [] 2509 for e in matchOrder: 2510 loc,results = e._parse(instring,loc,doActions) 2511 resultlist.append(results) 2512 2513 finalResults = ParseResults([]) 2514 for r in resultlist: 2515 dups = {} 2516 for k in r.keys(): 2517 if k in finalResults.keys(): 2518 tmp = ParseResults(finalResults[k]) 2519 tmp += ParseResults(r[k]) 2520 dups[k] = tmp 2521 finalResults += ParseResults(r) 2522 for k,v in dups.items(): 2523 finalResults[k] = v 2524 return loc, finalResults
2525
2526 - def __str__( self ):
2527 if hasattr(self,"name"): 2528 return self.name 2529 2530 if self.strRepr is None: 2531 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2532 2533 return self.strRepr
2534
2535 - def checkRecursion( self, parseElementList ):
2536 subRecCheckList = parseElementList[:] + [ self ] 2537 for e in self.exprs: 2538 e.checkRecursion( subRecCheckList )
2539 2540
2541 -class ParseElementEnhance(ParserElement):
2542 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2543 - def __init__( self, expr, savelist=False ):
2544 super(ParseElementEnhance,self).__init__(savelist) 2545 if isinstance( expr, basestring ): 2546 expr = Literal(expr) 2547 self.expr = expr 2548 self.strRepr = None 2549 if expr is not None: 2550 self.mayIndexError = expr.mayIndexError 2551 self.mayReturnEmpty = expr.mayReturnEmpty 2552 self.setWhitespaceChars( expr.whiteChars ) 2553 self.skipWhitespace = expr.skipWhitespace 2554 self.saveAsList = expr.saveAsList 2555 self.callPreparse = expr.callPreparse 2556 self.ignoreExprs.extend(expr.ignoreExprs)
2557
2558 - def parseImpl( self, instring, loc, doActions=True ):
2559 if self.expr is not None: 2560 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2561 else: 2562 raise ParseException("",loc,self.errmsg,self)
2563
2564 - def leaveWhitespace( self ):
2565 self.skipWhitespace = False 2566 self.expr = self.expr.copy() 2567 if self.expr is not None: 2568 self.expr.leaveWhitespace() 2569 return self
2570
2571 - def ignore( self, other ):
2572 if isinstance( other, Suppress ): 2573 if other not in self.ignoreExprs: 2574 super( ParseElementEnhance, self).ignore( other ) 2575 if self.expr is not None: 2576 self.expr.ignore( self.ignoreExprs[-1] ) 2577 else: 2578 super( ParseElementEnhance, self).ignore( other ) 2579 if self.expr is not None: 2580 self.expr.ignore( self.ignoreExprs[-1] ) 2581 return self
2582
2583 - def streamline( self ):
2584 super(ParseElementEnhance,self).streamline() 2585 if self.expr is not None: 2586 self.expr.streamline() 2587 return self
2588
2589 - def checkRecursion( self, parseElementList ):
2590 if self in parseElementList: 2591 raise RecursiveGrammarException( parseElementList+[self] ) 2592 subRecCheckList = parseElementList[:] + [ self ] 2593 if self.expr is not None: 2594 self.expr.checkRecursion( subRecCheckList )
2595
2596 - def validate( self, validateTrace=[] ):
2597 tmp = validateTrace[:]+[self] 2598 if self.expr is not None: 2599 self.expr.validate(tmp) 2600 self.checkRecursion( [] )
2601
2602 - def __str__( self ):
2603 try: 2604 return super(ParseElementEnhance,self).__str__() 2605 except: 2606 pass 2607 2608 if self.strRepr is None and self.expr is not None: 2609 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2610 return self.strRepr
2611 2612
2613 -class FollowedBy(ParseElementEnhance):
2614 """Lookahead matching of the given parse expression. FollowedBy 2615 does *not* advance the parsing position within the input string, it only 2616 verifies that the specified parse expression matches at the current 2617 position. FollowedBy always returns a null token list."""
2618 - def __init__( self, expr ):
2619 super(FollowedBy,self).__init__(expr) 2620 self.mayReturnEmpty = True
2621
2622 - def parseImpl( self, instring, loc, doActions=True ):
2623 self.expr.tryParse( instring, loc ) 2624 return loc, []
2625 2626
2627 -class NotAny(ParseElementEnhance):
2628 """Lookahead to disallow matching with the given parse expression. NotAny 2629 does *not* advance the parsing position within the input string, it only 2630 verifies that the specified parse expression does *not* match at the current 2631 position. Also, NotAny does *not* skip over leading whitespace. NotAny 2632 always returns a null token list. May be constructed using the '~' operator."""
2633 - def __init__( self, expr ):
2634 super(NotAny,self).__init__(expr) 2635 #~ self.leaveWhitespace() 2636 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2637 self.mayReturnEmpty = True 2638 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2639 #self.myException = ParseException("",0,self.errmsg,self) 2640
2641 - def parseImpl( self, instring, loc, doActions=True ):
2642 try: 2643 self.expr.tryParse( instring, loc ) 2644 except (ParseException,IndexError): 2645 pass 2646 else: 2647 #~ raise ParseException(instring, loc, self.errmsg ) 2648 exc = self.myException 2649 exc.loc = loc 2650 exc.pstr = instring 2651 raise exc 2652 return loc, []
2653
2654 - def __str__( self ):
2655 if hasattr(self,"name"): 2656 return self.name 2657 2658 if self.strRepr is None: 2659 self.strRepr = "~{" + _ustr(self.expr) + "}" 2660 2661 return self.strRepr
2662 2663
2664 -class ZeroOrMore(ParseElementEnhance):
2665 """Optional repetition of zero or more of the given expression."""
2666 - def __init__( self, expr ):
2667 super(ZeroOrMore,self).__init__(expr) 2668 self.mayReturnEmpty = True
2669
2670 - def parseImpl( self, instring, loc, doActions=True ):
2671 tokens = [] 2672 try: 2673 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2674 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2675 while 1: 2676 if hasIgnoreExprs: 2677 preloc = self._skipIgnorables( instring, loc ) 2678 else: 2679 preloc = loc 2680 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2681 if tmptokens or tmptokens.keys(): 2682 tokens += tmptokens 2683 except (ParseException,IndexError): 2684 pass 2685 2686 return loc, tokens
2687
2688 - def __str__( self ):
2689 if hasattr(self,"name"): 2690 return self.name 2691 2692 if self.strRepr is None: 2693 self.strRepr = "[" + _ustr(self.expr) + "]..." 2694 2695 return self.strRepr
2696
2697 - def setResultsName( self, name, listAllMatches=False ):
2698 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2699 ret.saveAsList = True 2700 return ret
2701 2702
2703 -class OneOrMore(ParseElementEnhance):
2704 """Repetition of one or more of the given expression."""
2705 - def parseImpl( self, instring, loc, doActions=True ):
2706 # must be at least one 2707 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2708 try: 2709 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2710 while 1: 2711 if hasIgnoreExprs: 2712 preloc = self._skipIgnorables( instring, loc ) 2713 else: 2714 preloc = loc 2715 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2716 if tmptokens or tmptokens.keys(): 2717 tokens += tmptokens 2718 except (ParseException,IndexError): 2719 pass 2720 2721 return loc, tokens
2722
2723 - def __str__( self ):
2724 if hasattr(self,"name"): 2725 return self.name 2726 2727 if self.strRepr is None: 2728 self.strRepr = "{" + _ustr(self.expr) + "}..." 2729 2730 return self.strRepr
2731
2732 - def setResultsName( self, name, listAllMatches=False ):
2733 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2734 ret.saveAsList = True 2735 return ret
2736
2737 -class _NullToken(object):
2738 - def __bool__(self):
2739 return False
2740 __nonzero__ = __bool__
2741 - def __str__(self):
2742 return ""
2743 2744 _optionalNotMatched = _NullToken()
2745 -class Optional(ParseElementEnhance):
2746 """Optional matching of the given expression. 2747 A default return string can also be specified, if the optional expression 2748 is not found. 2749 """
2750 - def __init__( self, exprs, default=_optionalNotMatched ):
2751 super(Optional,self).__init__( exprs, savelist=False ) 2752 self.defaultValue = default 2753 self.mayReturnEmpty = True
2754
2755 - def parseImpl( self, instring, loc, doActions=True ):
2756 try: 2757 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2758 except (ParseException,IndexError): 2759 if self.defaultValue is not _optionalNotMatched: 2760 if self.expr.resultsName: 2761 tokens = ParseResults([ self.defaultValue ]) 2762 tokens[self.expr.resultsName] = self.defaultValue 2763 else: 2764 tokens = [ self.defaultValue ] 2765 else: 2766 tokens = [] 2767 return loc, tokens
2768
2769 - def __str__( self ):
2770 if hasattr(self,"name"): 2771 return self.name 2772 2773 if self.strRepr is None: 2774 self.strRepr = "[" + _ustr(self.expr) + "]" 2775 2776 return self.strRepr
2777 2778
2779 -class SkipTo(ParseElementEnhance):
2780 """Token for skipping over all undefined text until the matched expression is found. 2781 If include is set to true, the matched expression is also consumed. The ignore 2782 argument is used to define grammars (typically quoted strings and comments) that 2783 might contain false matches. 2784 """
2785 - def __init__( self, other, include=False, ignore=None ):
2786 super( SkipTo, self ).__init__( other ) 2787 if ignore is not None: 2788 self.expr = self.expr.copy() 2789 self.expr.ignore(ignore) 2790 self.mayReturnEmpty = True 2791 self.mayIndexError = False 2792 self.includeMatch = include 2793 self.asList = False 2794 self.errmsg = "No match found for "+_ustr(self.expr)
2795 #self.myException = ParseException("",0,self.errmsg,self) 2796
2797 - def parseImpl( self, instring, loc, doActions=True ):
2798 startLoc = loc 2799 instrlen = len(instring) 2800 expr = self.expr 2801 while loc <= instrlen: 2802 try: 2803 loc = expr._skipIgnorables( instring, loc ) 2804 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2805 if self.includeMatch: 2806 skipText = instring[startLoc:loc] 2807 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2808 if mat: 2809 skipRes = ParseResults( skipText ) 2810 skipRes += mat 2811 return loc, [ skipRes ] 2812 else: 2813 return loc, [ skipText ] 2814 else: 2815 return loc, [ instring[startLoc:loc] ] 2816 except (ParseException,IndexError): 2817 loc += 1 2818 exc = self.myException 2819 exc.loc = loc 2820 exc.pstr = instring 2821 raise exc
2822
2823 -class Forward(ParseElementEnhance):
2824 """Forward declaration of an expression to be defined later - 2825 used for recursive grammars, such as algebraic infix notation. 2826 When the expression is known, it is assigned to the Forward variable using the '<<' operator. 2827 2828 Note: take care when assigning to Forward not to overlook precedence of operators. 2829 Specifically, '|' has a lower precedence than '<<', so that:: 2830 fwdExpr << a | b | c 2831 will actually be evaluated as:: 2832 (fwdExpr << a) | b | c 2833 thereby leaving b and c out as parseable alternatives. It is recommended that you 2834 explicitly group the values inserted into the Forward:: 2835 fwdExpr << (a | b | c) 2836 """
2837 - def __init__( self, other=None ):
2838 super(Forward,self).__init__( other, savelist=False )
2839
2840 - def __lshift__( self, other ):
2841 if isinstance( other, basestring ): 2842 other = Literal(other) 2843 self.expr = other 2844 self.mayReturnEmpty = other.mayReturnEmpty 2845 self.strRepr = None 2846 self.mayIndexError = self.expr.mayIndexError 2847 self.mayReturnEmpty = self.expr.mayReturnEmpty 2848 self.setWhitespaceChars( self.expr.whiteChars ) 2849 self.skipWhitespace = self.expr.skipWhitespace 2850 self.saveAsList = self.expr.saveAsList 2851 self.ignoreExprs.extend(self.expr.ignoreExprs) 2852 return None
2853
2854 - def leaveWhitespace( self ):
2855 self.skipWhitespace = False 2856 return self
2857
2858 - def streamline( self ):
2859 if not self.streamlined: 2860 self.streamlined = True 2861 if self.expr is not None: 2862 self.expr.streamline() 2863 return self
2864
2865 - def validate( self, validateTrace=[] ):
2866 if self not in validateTrace: 2867 tmp = validateTrace[:]+[self] 2868 if self.expr is not None: 2869 self.expr.validate(tmp) 2870 self.checkRecursion([])
2871
2872 - def __str__( self ):
2873 if hasattr(self,"name"): 2874 return self.name 2875 2876 self.__class__ = _ForwardNoRecurse 2877 try: 2878 if self.expr is not None: 2879 retString = _ustr(self.expr) 2880 else: 2881 retString = "None" 2882 finally: 2883 self.__class__ = Forward 2884 return "Forward: "+retString
2885
2886 - def copy(self):
2887 if self.expr is not None: 2888 return super(Forward,self).copy() 2889 else: 2890 ret = Forward() 2891 ret << self 2892 return ret
2893
2894 -class _ForwardNoRecurse(Forward):
2895 - def __str__( self ):
2896 return "..."
2897
2898 -class TokenConverter(ParseElementEnhance):
2899 """Abstract subclass of ParseExpression, for converting parsed results."""
2900 - def __init__( self, expr, savelist=False ):
2901 super(TokenConverter,self).__init__( expr )#, savelist ) 2902 self.saveAsList = False
2903
2904 -class Upcase(TokenConverter):
2905 """Converter to upper case all matching tokens."""
2906 - def __init__(self, *args):
2907 super(Upcase,self).__init__(*args) 2908 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 2909 DeprecationWarning,stacklevel=2)
2910
2911 - def postParse( self, instring, loc, tokenlist ):
2912 return list(map( string.upper, tokenlist ))
2913 2914
2915 -class Combine(TokenConverter):
2916 """Converter to concatenate all matching tokens to a single string. 2917 By default, the matching patterns must also be contiguous in the input string; 2918 this can be disabled by specifying 'adjacent=False' in the constructor. 2919 """
2920 - def __init__( self, expr, joinString="", adjacent=True ):
2921 super(Combine,self).__init__( expr ) 2922 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 2923 if adjacent: 2924 self.leaveWhitespace() 2925 self.adjacent = adjacent 2926 self.skipWhitespace = True 2927 self.joinString = joinString
2928
2929 - def ignore( self, other ):
2930 if self.adjacent: 2931 ParserElement.ignore(self, other) 2932 else: 2933 super( Combine, self).ignore( other ) 2934 return self
2935
2936 - def postParse( self, instring, loc, tokenlist ):
2937 retToks = tokenlist.copy() 2938 del retToks[:] 2939 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 2940 2941 if self.resultsName and len(retToks.keys())>0: 2942 return [ retToks ] 2943 else: 2944 return retToks
2945
2946 -class Group(TokenConverter):
2947 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2948 - def __init__( self, expr ):
2949 super(Group,self).__init__( expr ) 2950 self.saveAsList = True
2951
2952 - def postParse( self, instring, loc, tokenlist ):
2953 return [ tokenlist ]
2954
2955 -class Dict(TokenConverter):
2956 """Converter to return a repetitive expression as a list, but also as a dictionary. 2957 Each element can also be referenced using the first token in the expression as its key. 2958 Useful for tabular report scraping when the first column can be used as a item key. 2959 """
2960 - def __init__( self, exprs ):
2961 super(Dict,self).__init__( exprs ) 2962 self.saveAsList = True
2963
2964 - def postParse( self, instring, loc, tokenlist ):
2965 for i,tok in enumerate(tokenlist): 2966 if len(tok) == 0: 2967 continue 2968 ikey = tok[0] 2969 if isinstance(ikey,int): 2970 ikey = _ustr(tok[0]).strip() 2971 if len(tok)==1: 2972 tokenlist[ikey] = _ParseResultsWithOffset("",i) 2973 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 2974 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 2975 else: 2976 dictvalue = tok.copy() #ParseResults(i) 2977 del dictvalue[0] 2978 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 2979 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 2980 else: 2981 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 2982 2983 if self.resultsName: 2984 return [ tokenlist ] 2985 else: 2986 return tokenlist
2987 2988
2989 -class Suppress(TokenConverter):
2990 """Converter for ignoring the results of a parsed expression."""
2991 - def postParse( self, instring, loc, tokenlist ):
2992 return []
2993
2994 - def suppress( self ):
2995 return self
2996 2997
2998 -class OnlyOnce(object):
2999 """Wrapper for parse actions, to ensure they are only called once."""
3000 - def __init__(self, methodCall):
3001 self.callable = ParserElement._normalizeParseActionArgs(methodCall) 3002 self.called = False
3003 - def __call__(self,s,l,t):
3004 if not self.called: 3005 results = self.callable(s,l,t) 3006 self.called = True 3007 return results 3008 raise ParseException(s,l,"")
3009 - def reset(self):
3010 self.called = False
3011
3012 -def traceParseAction(f):
3013 """Decorator for debugging parse actions.""" 3014 f = ParserElement._normalizeParseActionArgs(f) 3015 def z(*paArgs): 3016 thisFunc = f.func_name 3017 s,l,t = paArgs[-3:] 3018 if len(paArgs)>3: 3019 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3020 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3021 try: 3022 ret = f(*paArgs) 3023 except Exception, exc: 3024 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3025 raise 3026 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3027 return ret
3028 try: 3029 z.__name__ = f.__name__ 3030 except AttributeError: 3031 pass 3032 return z 3033 3034 # 3035 # global helpers 3036 #
3037 -def delimitedList( expr, delim=",", combine=False ):
3038 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3039 By default, the list elements and delimiters can have intervening whitespace, and 3040 comments, but this can be overridden by passing 'combine=True' in the constructor. 3041 If combine is set to True, the matching tokens are returned as a single token 3042 string, with the delimiters included; otherwise, the matching tokens are returned 3043 as a list of tokens, with the delimiters suppressed. 3044 """ 3045 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3046 if combine: 3047 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3048 else: 3049 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3050
3051 -def countedArray( expr ):
3052 """Helper to define a counted list of expressions. 3053 This helper defines a pattern of the form:: 3054 integer expr expr expr... 3055 where the leading integer tells how many expr expressions follow. 3056 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3057 """ 3058 arrayExpr = Forward() 3059 def countFieldParseAction(s,l,t): 3060 n = int(t[0]) 3061 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3062 return []
3063 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) 3064
3065 -def _flatten(L):
3066 if type(L) is not list: return [L] 3067 if L == []: return L 3068 return _flatten(L[0]) + _flatten(L[1:])
3069
3070 -def matchPreviousLiteral(expr):
3071 """Helper to define an expression that is indirectly defined from 3072 the tokens matched in a previous expression, that is, it looks 3073 for a 'repeat' of a previous expression. For example:: 3074 first = Word(nums) 3075 second = matchPreviousLiteral(first) 3076 matchExpr = first + ":" + second 3077 will match "1:1", but not "1:2". Because this matches a 3078 previous literal, will also match the leading "1:1" in "1:10". 3079 If this is not desired, use matchPreviousExpr. 3080 Do *not* use with packrat parsing enabled. 3081 """ 3082 rep = Forward() 3083 def copyTokenToRepeater(s,l,t): 3084 if t: 3085 if len(t) == 1: 3086 rep << t[0] 3087 else: 3088 # flatten t tokens 3089 tflat = _flatten(t.asList()) 3090 rep << And( [ Literal(tt) for tt in tflat ] ) 3091 else: 3092 rep << Empty()
3093 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3094 return rep 3095
3096 -def matchPreviousExpr(expr):
3097 """Helper to define an expression that is indirectly defined from 3098 the tokens matched in a previous expression, that is, it looks 3099 for a 'repeat' of a previous expression. For example:: 3100 first = Word(nums) 3101 second = matchPreviousExpr(first) 3102 matchExpr = first + ":" + second 3103 will match "1:1", but not "1:2". Because this matches by 3104 expressions, will *not* match the leading "1:1" in "1:10"; 3105 the expressions are evaluated first, and then compared, so 3106 "1" is compared with "10". 3107 Do *not* use with packrat parsing enabled. 3108 """ 3109 rep = Forward() 3110 e2 = expr.copy() 3111 rep << e2 3112 def copyTokenToRepeater(s,l,t): 3113 matchTokens = _flatten(t.asList()) 3114 def mustMatchTheseTokens(s,l,t): 3115 theseTokens = _flatten(t.asList()) 3116 if theseTokens != matchTokens: 3117 raise ParseException("",0,"")
3118 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3119 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3120 return rep 3121
3122 -def _escapeRegexRangeChars(s):
3123 #~ escape these chars: ^-] 3124 for c in r"\^-]": 3125 s = s.replace(c,"\\"+c) 3126 s = s.replace("\n",r"\n") 3127 s = s.replace("\t",r"\t") 3128 return _ustr(s)
3129
3130 -def oneOf( strs, caseless=False, useRegex=True ):
3131 """Helper to quickly define a set of alternative Literals, and makes sure to do 3132 longest-first testing when there is a conflict, regardless of the input order, 3133 but returns a MatchFirst for best performance. 3134 3135 Parameters: 3136 - strs - a string of space-delimited literals, or a list of string literals 3137 - caseless - (default=False) - treat all literals as caseless 3138 - useRegex - (default=True) - as an optimization, will generate a Regex 3139 object; otherwise, will generate a MatchFirst object (if caseless=True, or 3140 if creating a Regex raises an exception) 3141 """ 3142 if caseless: 3143 isequal = ( lambda a,b: a.upper() == b.upper() ) 3144 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3145 parseElementClass = CaselessLiteral 3146 else: 3147 isequal = ( lambda a,b: a == b ) 3148 masks = ( lambda a,b: b.startswith(a) ) 3149 parseElementClass = Literal 3150 3151 if isinstance(strs,(list,tuple)): 3152 symbols = strs[:] 3153 elif isinstance(strs,basestring): 3154 symbols = strs.split() 3155 else: 3156 warnings.warn("Invalid argument to oneOf, expected string or list", 3157 SyntaxWarning, stacklevel=2) 3158 3159 i = 0 3160 while i < len(symbols)-1: 3161 cur = symbols[i] 3162 for j,other in enumerate(symbols[i+1:]): 3163 if ( isequal(other, cur) ): 3164 del symbols[i+j+1] 3165 break 3166 elif ( masks(cur, other) ): 3167 del symbols[i+j+1] 3168 symbols.insert(i,other) 3169 cur = other 3170 break 3171 else: 3172 i += 1 3173 3174 if not caseless and useRegex: 3175 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3176 try: 3177 if len(symbols)==len("".join(symbols)): 3178 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3179 else: 3180 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3181 except: 3182 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3183 SyntaxWarning, stacklevel=2) 3184 3185 3186 # last resort, just use MatchFirst 3187 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3188
3189 -def dictOf( key, value ):
3190 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3191 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens 3192 in the proper order. The key pattern can include delimiting markers or punctuation, 3193 as long as they are suppressed, thereby leaving the significant key text. The value 3194 pattern can include named results, so that the Dict results can include named token 3195 fields. 3196 """ 3197 return Dict( ZeroOrMore( Group ( key + value ) ) )
3198 3199 # convenience constants for positional expressions 3200 empty = Empty().setName("empty") 3201 lineStart = LineStart().setName("lineStart") 3202 lineEnd = LineEnd().setName("lineEnd") 3203 stringStart = StringStart().setName("stringStart") 3204 stringEnd = StringEnd().setName("stringEnd") 3205 3206 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3207 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3208 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 3209 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 3210 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3211 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3212 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3213 3214 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3215
3216 -def srange(s):
3217 r"""Helper to easily define string ranges for use in Word construction. Borrows 3218 syntax from regexp '[]' string range definitions:: 3219 srange("[0-9]") -> "0123456789" 3220 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3221 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3222 The input string must be enclosed in []'s, and the returned string is the expanded 3223 character set joined into a single string. 3224 The values enclosed in the []'s may be:: 3225 a single character 3226 an escaped character with a leading backslash (such as \- or \]) 3227 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 3228 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3229 a range of any of the above, separated by a dash ('a-z', etc.) 3230 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3231 """ 3232 try: 3233 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3234 except: 3235 return ""
3236
3237 -def matchOnlyAtCol(n):
3238 """Helper method for defining parse actions that require matching at a specific 3239 column in the input text. 3240 """ 3241 def verifyCol(strg,locn,toks): 3242 if col(locn,strg) != n: 3243 raise ParseException(strg,locn,"matched token not at column %d" % n)
3244 return verifyCol 3245
3246 -def replaceWith(replStr):
3247 """Helper method for common parse actions that simply return a literal value. Especially 3248 useful when used with transformString(). 3249 """ 3250 def _replFunc(*args): 3251 return [replStr]
3252 return _replFunc 3253
3254 -def removeQuotes(s,l,t):
3255 """Helper parse action for removing quotation marks from parsed quoted strings. 3256 To use, add this parse action to quoted string using:: 3257 quotedString.setParseAction( removeQuotes ) 3258 """ 3259 return t[0][1:-1]
3260
3261 -def upcaseTokens(s,l,t):
3262 """Helper parse action to convert tokens to upper case.""" 3263 return [ tt.upper() for tt in map(_ustr,t) ]
3264
3265 -def downcaseTokens(s,l,t):
3266 """Helper parse action to convert tokens to lower case.""" 3267 return [ tt.lower() for tt in map(_ustr,t) ]
3268
3269 -def keepOriginalText(s,startLoc,t):
3270 """Helper parse action to preserve original parsed text, 3271 overriding any nested parse actions.""" 3272 try: 3273 endloc = getTokensEndLoc() 3274 except ParseException: 3275 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3276 del t[:] 3277 t += ParseResults(s[startLoc:endloc]) 3278 return t
3279
3280 -def getTokensEndLoc():
3281 """Method to be called from within a parse action to determine the end 3282 location of the parsed tokens.""" 3283 import inspect 3284 fstack = inspect.stack() 3285 try: 3286 # search up the stack (through intervening argument normalizers) for correct calling routine 3287 for f in fstack[2:]: 3288 if f[3] == "_parseNoCache": 3289 endloc = f[0].f_locals["loc"] 3290 return endloc 3291 else: 3292 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3293 finally: 3294 del fstack
3295
3296 -def _makeTags(tagStr, xml):
3297 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3298 if isinstance(tagStr,basestring): 3299 resname = tagStr 3300 tagStr = Keyword(tagStr, caseless=not xml) 3301 else: 3302 resname = tagStr.name 3303 3304 tagAttrName = Word(alphas,alphanums+"_-:") 3305 if (xml): 3306 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3307 openTag = Suppress("<") + tagStr + \ 3308 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3309 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3310 else: 3311 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3312 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3313 openTag = Suppress("<") + tagStr + \ 3314 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3315 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3316 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3317 closeTag = Combine(_L("</") + tagStr + ">") 3318 3319 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3320 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3321 3322 return openTag, closeTag
3323
3324 -def makeHTMLTags(tagStr):
3325 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3326 return _makeTags( tagStr, False )
3327
3328 -def makeXMLTags(tagStr):
3329 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3330 return _makeTags( tagStr, True )
3331
3332 -def withAttribute(*args,**attrDict):
3333 """Helper to create a validating parse action to be used with start tags created 3334 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 3335 with a required attribute value, to avoid false matches on common tags such as 3336 <TD> or <DIV>. 3337 3338 Call withAttribute with a series of attribute names and values. Specify the list 3339 of filter attributes names and values as: 3340 - keyword arguments, as in (class="Customer",align="right"), or 3341 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3342 For attribute names with a namespace prefix, you must use the second form. Attribute 3343 names are matched insensitive to upper/lower case. 3344 3345 To verify that the attribute exists, but without specifying a value, pass 3346 withAttribute.ANY_VALUE as the value. 3347 """ 3348 if args: 3349 attrs = args[:] 3350 else: 3351 attrs = attrDict.items() 3352 attrs = [(k,v) for k,v in attrs] 3353 def pa(s,l,tokens): 3354 for attrName,attrValue in attrs: 3355 if attrName not in tokens: 3356 raise ParseException(s,l,"no matching attribute " + attrName) 3357 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3358 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3359 (attrName, tokens[attrName], attrValue))
3360 return pa 3361 withAttribute.ANY_VALUE = object() 3362 3363 opAssoc = _Constants() 3364 opAssoc.LEFT = object() 3365 opAssoc.RIGHT = object() 3366
3367 -def operatorPrecedence( baseExpr, opList ):
3368 """Helper method for constructing grammars of expressions made up of 3369 operators working in a precedence hierarchy. Operators may be unary or 3370 binary, left- or right-associative. Parse actions can also be attached 3371 to operator expressions. 3372 3373 Parameters: 3374 - baseExpr - expression representing the most basic element for the nested 3375 - opList - list of tuples, one for each operator precedence level in the 3376 expression grammar; each tuple is of the form 3377 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3378 - opExpr is the pyparsing expression for the operator; 3379 may also be a string, which will be converted to a Literal; 3380 if numTerms is 3, opExpr is a tuple of two expressions, for the 3381 two operators separating the 3 terms 3382 - numTerms is the number of terms for this operator (must 3383 be 1, 2, or 3) 3384 - rightLeftAssoc is the indicator whether the operator is 3385 right or left associative, using the pyparsing-defined 3386 constants opAssoc.RIGHT and opAssoc.LEFT. 3387 - parseAction is the parse action to be associated with 3388 expressions matching this operator expression (the 3389 parse action tuple member may be omitted) 3390 """ 3391 ret = Forward() 3392 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3393 for i,operDef in enumerate(opList): 3394 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3395 if arity == 3: 3396 if opExpr is None or len(opExpr) != 2: 3397 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3398 opExpr1, opExpr2 = opExpr 3399 thisExpr = Forward()#.setName("expr%d" % i) 3400 if rightLeftAssoc == opAssoc.LEFT: 3401 if arity == 1: 3402 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3403 elif arity == 2: 3404 if opExpr is not None: 3405 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3406 else: 3407 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3408 elif arity == 3: 3409 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3410 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3411 else: 3412 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3413 elif rightLeftAssoc == opAssoc.RIGHT: 3414 if arity == 1: 3415 # try to avoid LR with this extra test 3416 if not isinstance(opExpr, Optional): 3417 opExpr = Optional(opExpr) 3418 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3419 elif arity == 2: 3420 if opExpr is not None: 3421 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3422 else: 3423 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3424 elif arity == 3: 3425 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3426 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3427 else: 3428 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3429 else: 3430 raise ValueError("operator must indicate right or left associativity") 3431 if pa: 3432 matchExpr.setParseAction( pa ) 3433 thisExpr << ( matchExpr | lastExpr ) 3434 lastExpr = thisExpr 3435 ret << lastExpr 3436 return ret
3437 3438 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3439 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3440 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3441 unicodeString = Combine(_L('u') + quotedString.copy()) 3442
3443 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3444 """Helper method for defining nested lists enclosed in opening and closing 3445 delimiters ("(" and ")" are the default). 3446 3447 Parameters: 3448 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3449 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3450 - content - expression for items within the nested lists (default=None) 3451 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3452 3453 If an expression is not provided for the content argument, the nested 3454 expression will capture all whitespace-delimited content between delimiters 3455 as a list of separate values. 3456 3457 Use the ignoreExpr argument to define expressions that may contain 3458 opening or closing characters that should not be treated as opening 3459 or closing characters for nesting, such as quotedString or a comment 3460 expression. Specify multiple expressions using an Or or MatchFirst. 3461 The default is quotedString, but if no expressions are to be ignored, 3462 then pass None for this argument. 3463 """ 3464 if opener == closer: 3465 raise ValueError("opening and closing strings cannot be the same") 3466 if content is None: 3467 if isinstance(opener,basestring) and isinstance(closer,basestring): 3468 if ignoreExpr is not None: 3469 content = (Combine(OneOrMore(~ignoreExpr + 3470 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3471 ).setParseAction(lambda t:t[0].strip())) 3472 else: 3473 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip())) 3474 else: 3475 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3476 ret = Forward() 3477 if ignoreExpr is not None: 3478 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3479 else: 3480 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3481 return ret
3482
3483 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3484 """Helper method for defining space-delimited indentation blocks, such as 3485 those used to define block statements in Python source code. 3486 3487 Parameters: 3488 - blockStatementExpr - expression defining syntax of statement that 3489 is repeated within the indented block 3490 - indentStack - list created by caller to manage indentation stack 3491 (multiple statementWithIndentedBlock expressions within a single grammar 3492 should share a common indentStack) 3493 - indent - boolean indicating whether block must be indented beyond the 3494 the current level; set to False for block of left-most statements 3495 (default=True) 3496 3497 A valid block must contain at least one blockStatement. 3498 """ 3499 def checkPeerIndent(s,l,t): 3500 if l >= len(s): return 3501 curCol = col(l,s) 3502 if curCol != indentStack[-1]: 3503 if curCol > indentStack[-1]: 3504 raise ParseFatalException(s,l,"illegal nesting") 3505 raise ParseException(s,l,"not a peer entry")
3506 3507 def checkSubIndent(s,l,t): 3508 curCol = col(l,s) 3509 if curCol > indentStack[-1]: 3510 indentStack.append( curCol ) 3511 else: 3512 raise ParseException(s,l,"not a subentry") 3513 3514 def checkUnindent(s,l,t): 3515 if l >= len(s): return 3516 curCol = col(l,s) 3517 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3518 raise ParseException(s,l,"not an unindent") 3519 indentStack.pop() 3520 3521 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3522 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3523 PEER = Empty().setParseAction(checkPeerIndent) 3524 UNDENT = Empty().setParseAction(checkUnindent) 3525 if indent: 3526 smExpr = Group( Optional(NL) + 3527 FollowedBy(blockStatementExpr) + 3528 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3529 else: 3530 smExpr = Group( Optional(NL) + 3531 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3532 blockStatementExpr.ignore("\\" + LineEnd()) 3533 return smExpr 3534 3535 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3536 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3537 3538 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3539 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";") 3540 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '")) 3541 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3542 3543 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3544 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3545 3546 htmlComment = Regex(r"<!--[\s\S]*?-->") 3547 restOfLine = Regex(r".*").leaveWhitespace() 3548 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3549 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3550 3551 javaStyleComment = cppStyleComment 3552 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3553 _noncomma = "".join( [ c for c in printables if c != "," ] ) 3554 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 3555 Optional( Word(" \t") + 3556 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3557 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList") 3558 3559 3560 if __name__ == "__main__": 3561
3562 - def test( teststring ):
3563 try: 3564 tokens = simpleSQL.parseString( teststring ) 3565 tokenlist = tokens.asList() 3566 print (teststring + "->" + str(tokenlist)) 3567 print ("tokens = " + str(tokens)) 3568 print ("tokens.columns = " + str(tokens.columns)) 3569 print ("tokens.tables = " + str(tokens.tables)) 3570 print (tokens.asXML("SQL",True)) 3571 except ParseBaseException,err: 3572 print (teststring + "->") 3573 print (err.line) 3574 print (" "*(err.column-1) + "^") 3575 print (err) 3576 print()
3577 3578 selectToken = CaselessLiteral( "select" ) 3579 fromToken = CaselessLiteral( "from" ) 3580 3581 ident = Word( alphas, alphanums + "_$" ) 3582 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3583 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3584 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3585 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3586 simpleSQL = ( selectToken + \ 3587 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3588 fromToken + \ 3589 tableNameList.setResultsName( "tables" ) ) 3590 3591 test( "SELECT * from XYZZY, ABC" ) 3592 test( "select * from SYS.XYZZY" ) 3593 test( "Select A from Sys.dual" ) 3594 test( "Select AA,BB,CC from Sys.dual" ) 3595 test( "Select A, B, C from Sys.dual" ) 3596 test( "Select A, B, C from Sys.dual" ) 3597 test( "Xelect A, B, C from Sys.dual" ) 3598 test( "Select A, B, C frox Sys.dual" ) 3599 test( "Select" ) 3600 test( "Select ^^^ frox Sys.dual" ) 3601 test( "Select A, B, C from Sys.dual, Table2 " ) 3602