1 package org.codehaus.groovy.syntax.lexer;
2
3 import org.codehaus.groovy.syntax.ReadException;
4 import org.codehaus.groovy.syntax.Types;
5 import org.codehaus.groovy.syntax.Token;
6 import org.codehaus.groovy.GroovyBugError;
7
8
9 /***
10 * A base class for all other lexers.
11 *
12 * @author Bob Mcwhirter
13 * @author James Strachan
14 * @author John Wilson
15 * @author Chris Poirier
16 */
17
18 public class LexerBase implements Lexer
19 {
20
21 protected int startLine;
22 protected int startColumn;
23
24 protected Lexer delegate = null;
25 protected Lexer source = null;
26
27
28
29 /***
30 * Initializes the <code>LexerBase</code>.
31 */
32
33 public LexerBase( )
34 {
35 }
36
37
38
39 /***
40 * Gets the lexer that is actually doing the <code>nextToken()</code>
41 * work, if it isn't us.
42 */
43
44 public Lexer getDelegate()
45 {
46 return delegate;
47 }
48
49
50
51 /***
52 * Gets the lexer from which this lexer is obtaining characters.
53 */
54
55 public Lexer getSource()
56 {
57 return source;
58 }
59
60
61
62 /***
63 * Finds and returns (consuming) the next token from the underlying stream.
64 * Returns null when out of tokens. This implementation correctly handles
65 * delegation, and subclasses implement undelegatedNextToken(), which will
66 * be called by this routine when appropriate.
67 */
68
69 public Token nextToken() throws ReadException, LexerException
70 {
71
72
73
74 if( delegate != null )
75 {
76 Token next = delegate.nextToken();
77
78 if( next == null )
79 {
80 undelegate();
81 }
82 else
83 {
84 return next;
85 }
86
87 }
88
89 mark();
90 return undelegatedNextToken();
91 }
92
93
94
95 /***
96 * Does undelegated nextToken() operations. You supply your
97 * lexer-specific nextToken() code by overriding this method.
98 */
99
100 protected Token undelegatedNextToken() throws ReadException, LexerException
101 {
102 return null;
103 }
104
105
106
107
108
109
110
111
112 /***
113 * Process an end-of-line marker and returns a NEWLINE token.
114 * Returns null if not at an end-of-line.
115 */
116
117 protected Token tokenizeEOL() throws LexerException, ReadException
118 {
119 Token token = null;
120
121 char c = la();
122 switch( c )
123 {
124 case '\r':
125 case '\n':
126 token = symbol( Types.NEWLINE );
127
128 consume();
129 if (c == '\r' && la() == '\n')
130 {
131 consume();
132 }
133 }
134
135 return token;
136 }
137
138
139
140 /***
141 * Reads an end-of-line marker and writes the text into the
142 * specified buffer, if supplied.
143 */
144
145 protected boolean readEOL( StringBuffer destination ) throws LexerException, ReadException
146 {
147 boolean read = false;
148
149 char c = la();
150 switch( c )
151 {
152 case '\r':
153 case '\n':
154 if( destination == null )
155 {
156 consume();
157 if (c == '\r' && la() == '\n')
158 {
159 consume();
160 }
161 }
162 else
163 {
164 destination.append( consume() );
165 if (c == '\r' && la() == '\n')
166 {
167 destination.append( consume() );
168 }
169 }
170
171 read = true;
172 }
173
174 return read;
175 }
176
177
178
179 /***
180 * Synonym for <code>readEOL(null)</code>.
181 */
182
183 protected void readEOL() throws LexerException, ReadException
184 {
185 readEOL( null );
186 }
187
188
189
190
191
192
193
194
195
196 /***
197 * Resets a lexer for reuse.
198 */
199
200 public void reset()
201 {
202 delegate = null;
203 source = null;
204 }
205
206
207
208 /***
209 * Delegates our duties to another Lexer.
210 */
211
212 public void delegate( Lexer to )
213 {
214 this.delegate = to;
215 to.setSource( this );
216 }
217
218
219
220 /***
221 * Retakes responsibility for our duties.
222 */
223
224 public void undelegate()
225 {
226 if( delegate != null )
227 {
228 delegate.unsetSource( );
229 delegate = null;
230 }
231 }
232
233
234
235 /***
236 * Sets the source lexer.
237 */
238
239 public void setSource( Lexer source )
240 {
241 if( source == null )
242 {
243 throw new GroovyBugError( "use unsetSource() to remove a source from a lexer" );
244 }
245 this.source = source;
246 }
247
248
249
250 /***
251 * Unsets the source lexer.
252 */
253
254 public void unsetSource()
255 {
256 this.source = null;
257 }
258
259
260
261 /***
262 * Returns true if we are delegated to another lexer.
263 */
264
265 public boolean isDelegated()
266 {
267 return delegate != null;
268 }
269
270
271
272 /***
273 * Returns true if we are obtaining our characters
274 * from another lexer.
275 */
276
277 public boolean isExternallySourced()
278 {
279 return source != null;
280 }
281
282
283
284
285
286
287
288
289 /***
290 * Creates and throws a new <code>UnexpectedCharacterException</code>.
291 */
292
293 protected void unexpected( char c, int offset, String message ) throws UnexpectedCharacterException
294 {
295 throw new UnexpectedCharacterException( getStartLine(), getStartColumn() + offset, c, message );
296 }
297
298
299
300 /***
301 * Creates and throws a new <code>UnexpectedCharacterException</code>.
302 */
303
304 protected void unexpected( char c, char[] expected, int offset ) throws UnexpectedCharacterException
305 {
306 throw new UnexpectedCharacterException( getStartLine(), getStartColumn() + offset, c, expected );
307 }
308
309
310
311 /***
312 * Synonym for <code>unexpected( c, null, offset )</code>.
313 */
314
315 protected void unexpected( char c, int offset ) throws UnexpectedCharacterException
316 {
317 unexpected( c, null, offset );
318 }
319
320
321
322
323
324
325
326
327 /***
328 * Creates a new symbol token, and allows you to alter the starting
329 * column.
330 */
331
332 protected Token symbol( int type, int columnOffset )
333 {
334 return Token.newSymbol( type, getStartLine(), getStartColumn() - columnOffset );
335 }
336
337
338
339 /***
340 * Creates a new symbol token.
341 */
342
343 protected Token symbol( int type )
344 {
345 return Token.newSymbol( type, getStartLine(), getStartColumn() );
346 }
347
348
349
350
351
352
353
354
355 /***
356 * Returns the current line number.
357 */
358
359 public int getLine()
360 {
361 if( source != null )
362 {
363 return source.getLine();
364 }
365
366 return -1;
367 }
368
369
370
371 /***
372 * Returns the current column within that line.
373 */
374
375 public int getColumn()
376 {
377 if( source != null )
378 {
379 return source.getColumn();
380 }
381
382 return -1;
383 }
384
385
386
387 /***
388 * Saves information about the current position, for tracking token extents.
389 */
390
391 protected void mark()
392 {
393 startLine = getLine();
394 startColumn = getColumn();
395 }
396
397
398
399 /***
400 * Returns the starting line of the current token.
401 */
402
403 protected int getStartLine()
404 {
405 return this.startLine;
406 }
407
408
409
410 /***
411 * Returns the starting column of the current token.
412 */
413
414 protected int getStartColumn()
415 {
416 return this.startColumn;
417 }
418
419
420
421 /***
422 * Returns the next character, without consuming it.
423 */
424
425 public char la() throws LexerException, ReadException
426 {
427 return la(1);
428 }
429
430
431
432 /***
433 * Returns the next <code>k</code>th character, without consuming any.
434 */
435
436 public char la(int k) throws LexerException, ReadException
437 {
438 if( source != null )
439 {
440 return source.la(k);
441 }
442 else
443 {
444 return CharStream.EOS;
445 }
446 }
447
448
449
450 /***
451 * Eats a character from the input stream.
452 */
453
454 public char consume() throws LexerException, ReadException
455 {
456 if( source != null )
457 {
458 return source.consume();
459 }
460 else
461 {
462 return CharStream.EOS;
463 }
464 }
465
466
467 }