View Javadoc

1   package org.codehaus.groovy.syntax.lexer;
2   
3   import org.codehaus.groovy.syntax.ReadException;
4   
5   /***
6    *  A Lexer for processing here docs.  It reads a line at a time from
7    *  the underlying stream (leaving the EOL for the next read), then
8    *  offers that data for users.
9    *
10   *  @author Chris Poirier
11   */
12  
13  public class HereDocLexer extends TextLexerBase
14  {
15  
16      protected String  marker   = null;   // The marker to watch for
17      protected boolean onmargin = true;   // If false, the marker can be indented
18      protected String  data     = "";     // The current data
19      protected int     consumed = -1;     // The last index consumed
20      protected boolean last     = false;  // Set after the last line is read
21  
22  
23     /***
24      *  Initializes the lexer to read up to (and including) the marker
25      *  on a line by itself.
26      */
27  
28      public HereDocLexer( String marker )
29      {
30          if( marker.startsWith("-") )
31          {
32              this.marker = marker.substring( 1, marker.length() );
33              this.onmargin = false;
34          }
35          else
36          {
37              this.marker = marker;
38              this.onmargin = true;
39          }
40      }
41  
42  
43  
44     /***
45      *  Sets the source lexer and sets the lexer running.
46      */
47  
48      public void setSource( Lexer source )
49      {
50          super.setSource( source );
51  
52          data     = "";
53          consumed = -1;
54          last     = false;
55  
56          restart();
57          delimit( true );
58      }
59  
60  
61  
62     /***
63      *  Unsets the source lexer.
64      */
65  
66      public void unsetSource()
67      {
68          finish();
69          super.unsetSource();
70      }
71  
72  
73  
74     /***
75      *  Sets delimiting on.  The first thing we to is check for and eat our
76      *  delimiter.
77      */
78  
79      public void delimit( boolean delimit )
80      {
81          super.delimit( delimit );
82  
83          if( delimit )
84          {
85              try
86              {
87                  if( !finished && la(1) == CharStream.EOS )
88                  {
89                      finish();
90                  }
91              }
92              catch( Exception e )
93              {
94                  finished = true;
95              }
96          }
97      }
98  
99  
100 
101 
102    /***
103     *  Returns the next <code>k</code>th character, without consuming any.
104     */
105 
106     public char la(int k) throws LexerException, ReadException
107     {
108 
109         if( !finished && source != null )
110         {
111             if( consumed + k >= data.length() )
112             {
113                 refill();
114             }
115 
116             if( consumed + k < data.length() )
117             {
118                 return data.charAt( consumed + k );
119             }
120         }
121 
122         return CharStream.EOS;
123     }
124 
125 
126 
127 
128    /***
129     *  Eats a character from the input stream.  Searches for the delimiter if
130     *  filtered.  Note that turning delimiting on also checks if we are at the
131     *  delimiter, so if we aren't finished, there is something to consume.
132     */
133 
134     public char consume() throws LexerException, ReadException
135     {
136         if( !finished && source != null )
137         {
138             char c = data.charAt( ++consumed );
139             if( delimited && la(1) == CharStream.EOS )
140             {
141                 finish();
142             }
143 
144             return c;
145         }
146 
147         return CharStream.EOS;
148     }
149 
150 
151 
152    /***
153     *  Reads the next line from the underlying stream.  If delimited, checks for
154     *  the marker.  We don't update finished here, though, as that would prevent
155     *  any buffered data from being read.
156     */
157 
158     protected void refill() throws LexerException, ReadException
159     {
160         if( !finished && source != null && !last )
161         {
162             StringBuffer read = new StringBuffer();
163 
164             //
165             // Read any residual data into the buffer.
166 
167             for( int i = consumed + 1; i < data.length(); i++ )
168             {
169                 read.append( data.charAt(i) );
170             }
171 
172 
173             //
174             // Read line ends until we have some non-blank lines to read.
175             // Note that we have to be careful with the line ends, as the
176             // end of one line belongs to the next, when it comes to discards
177             // due to marker identification!
178 
179             char c;
180             StringBuffer raw = new StringBuffer();
181             while( (c = source.la()) == '\n' || c == '\r' )
182             {
183                 if( raw.length() > 0 )
184                 {
185                     read.append( raw );
186                     raw.setLength( 0 );
187                 }
188 
189                 if( !((LexerBase)source).readEOL(raw) ) // bad cast, but for now...
190                 {
191                     throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
192                 }
193             }
194 
195 
196             //
197             // Read the next line, checking for the end marker, if delimited.
198             // We leave the EOL for the next read...
199 
200             boolean use = true;
201 
202             if( !isDelimited() )
203             {
204                 while( (c = source.la()) != '\n' && c != '\r' && c != CharStream.EOS )
205                 {
206                     raw.append( source.consume() );
207                 }
208             }
209 
210             else
211             {
212                 //
213                 // If the marker started with the "-" modifier, whitespace is
214                 // allowed before the marker.  The marker can be followed on
215                 // the same line by code, so if it matches the beginning
216                 // pattern, we stop after reading the last character.
217 
218                 if( !onmargin )
219                 {
220                     while( (c = source.la()) == ' ' || c == '\t' )
221                     {
222                         raw.append( source.consume() );
223                     }
224                 }
225 
226                 int testing = 0, length = marker.length();
227                 boolean found = false, lost = false;
228                 while( (c = source.la()) != '\n' && c != '\r' && c != CharStream.EOS && !found )
229                 {
230                     if( !lost && c == marker.charAt(testing) )
231                     {
232                         testing++;
233                         if( testing == length )
234                         {
235                             found = true;
236                         }
237                     }
238                     else
239                     {
240                         lost = true;
241                     }
242 
243                     raw.append( source.consume() );
244                 }
245 
246                 if( found )
247                 {
248                     use  = false;
249                 }
250             }
251 
252 
253             //
254             // It's either our delimiter or a line of data.
255 
256             if( use )
257             {
258                 read.append( raw );
259             }
260             else
261             {
262                 last = true;
263             }
264 
265 
266             data = read.toString();
267             consumed = -1;
268         }
269     }
270 
271 }