View Javadoc

1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.util;
16  
17  import java.io.UnsupportedEncodingException;
18  
19  
20  
21  /* ------------------------------------------------------------ */
22  /** URI Holder.
23   * This class assists with the decoding and encoding or HTTP URI's.
24   * It differs from the java.net.URL class as it does not provide
25   * communications ability, but it does assist with query string
26   * formatting.
27   * <P>UTF-8 encoding is used by default for % encoded characters. This
28   * may be overridden with the org.mortbay.util.URI.charset system property.
29   * @see UrlEncoded
30   * @author Greg Wilkins (gregw)
31   */
32  public class URIUtil
33      implements Cloneable
34  {
35      public static final String SLASH="/";
36      public static final String HTTP="http";
37      public static final String HTTP_COLON="http:";
38      public static final String HTTPS="https";
39      public static final String HTTPS_COLON="https:";
40  
41      // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
42      public static final String __CHARSET=System.getProperty("org.mortbay.util.URI.charset",StringUtil.__UTF8);
43      
44      private URIUtil()
45      {}
46      
47      /* ------------------------------------------------------------ */
48      /** Encode a URI path.
49       * This is the same encoding offered by URLEncoder, except that
50       * the '/' character is not encoded.
51       * @param path The path the encode
52       * @return The encoded path
53       */
54      public static String encodePath(String path)
55      {
56          if (path==null || path.length()==0)
57              return path;
58          
59          StringBuffer buf = encodePath(null,path);
60          return buf==null?path:buf.toString();
61      }
62          
63      /* ------------------------------------------------------------ */
64      /** Encode a URI path.
65       * @param path The path the encode
66       * @param buf StringBuffer to encode path into (or null)
67       * @return The StringBuffer or null if no substitutions required.
68       */
69      public static StringBuffer encodePath(StringBuffer buf, String path)
70      {
71          if (buf==null)
72          {
73          loop:
74              for (int i=0;i<path.length();i++)
75              {
76                  char c=path.charAt(i);
77                  switch(c)
78                  {
79                    case '%':
80                    case '?':
81                    case ';':
82                    case '#':
83                    case ' ':
84                        buf=new StringBuffer(path.length()<<1);
85                        break loop;
86                  }
87              }
88              if (buf==null)
89                  return null;
90          }
91          
92          synchronized(buf)
93          {
94              for (int i=0;i<path.length();i++)
95              {
96                  char c=path.charAt(i);       
97                  switch(c)
98                  {
99                    case '%':
100                       buf.append("%25");
101                       continue;
102                   case '?':
103                       buf.append("%3F");
104                       continue;
105                   case ';':
106                       buf.append("%3B");
107                       continue;
108                   case '#':
109                       buf.append("%23");
110                       continue;
111                   case ' ':
112                       buf.append("%20");
113                       continue;
114                   default:
115                       buf.append(c);
116                       continue;
117                 }
118             }
119         }
120 
121         return buf;
122     }
123     
124     /* ------------------------------------------------------------ */
125     /** Encode a URI path.
126      * @param path The path the encode
127      * @param buf StringBuffer to encode path into (or null)
128      * @param encode String of characters to encode. % is always encoded.
129      * @return The StringBuffer or null if no substitutions required.
130      */
131     public static StringBuffer encodeString(StringBuffer buf,
132                                             String path,
133                                             String encode)
134     {
135         if (buf==null)
136         {
137         loop:
138             for (int i=0;i<path.length();i++)
139             {
140                 char c=path.charAt(i);
141                 if (c=='%' || encode.indexOf(c)>=0)
142                 {    
143                     buf=new StringBuffer(path.length()<<1);
144                     break loop;
145                 }
146             }
147             if (buf==null)
148                 return null;
149         }
150         
151         synchronized(buf)
152         {
153             for (int i=0;i<path.length();i++)
154             {
155                 char c=path.charAt(i);
156                 if (c=='%' || encode.indexOf(c)>=0)
157                 {
158                     buf.append('%');
159                     StringUtil.append(buf,(byte)(0xff&c),16);
160                 }
161                 else
162                     buf.append(c);
163             }
164         }
165 
166         return buf;
167     }
168     
169     /* ------------------------------------------------------------ */
170     /* Decode a URI path.
171      * @param path The path the encode
172      * @param buf StringBuffer to encode path into
173      */
174     public static String decodePath(String path)
175     {
176         if (path==null)
177             return null;
178         char[] chars=null;
179         int n=0;
180         byte[] bytes=null;
181         int b=0;
182         
183         int len=path.length();
184         
185         for (int i=0;i<len;i++)
186         {
187             char c = path.charAt(i);
188 
189             if (c=='%' && (i+2)<len)
190             {
191                 if (chars==null)
192                 {
193                     chars=new char[len];
194                     bytes=new byte[len];
195                     path.getChars(0,i,chars,0);
196                 }
197                 bytes[b++]=(byte)(0xff&TypeUtil.parseInt(path,i+1,2,16));
198                 i+=2;
199                 continue;
200             }
201             else if (bytes==null)
202             {
203                 n++;
204                 continue;
205             }
206             
207             if (b>0)
208             {
209                 String s;
210                 try
211                 {
212                     s=new String(bytes,0,b,__CHARSET);
213                 }
214                 catch (UnsupportedEncodingException e)
215                 {       
216                     s=new String(bytes,0,b);
217                 }
218                 s.getChars(0,s.length(),chars,n);
219                 n+=s.length();
220                 b=0;
221             }
222             
223             chars[n++]=c;
224         }
225 
226         if (chars==null)
227             return path;
228 
229         if (b>0)
230         {
231             String s;
232             try
233             {
234                 s=new String(bytes,0,b,__CHARSET);
235             }
236             catch (UnsupportedEncodingException e)
237             {       
238                 s=new String(bytes,0,b);
239             }
240             s.getChars(0,s.length(),chars,n);
241             n+=s.length();
242         }
243         
244         return new String(chars,0,n);
245     }
246     
247     /* ------------------------------------------------------------ */
248     /* Decode a URI path.
249      * @param path The path the encode
250      * @param buf StringBuffer to encode path into
251      */
252     public static String decodePath(byte[] buf, int offset, int length)
253     {
254         byte[] bytes=null;
255         int n=0;
256         
257         for (int i=0;i<length;i++)
258         {
259             byte b = buf[i + offset];
260             
261             if (b=='%' && (i+2)<length)
262             {
263                 b=(byte)(0xff&TypeUtil.parseInt(buf,i+offset+1,2,16));
264                 i+=2;
265             }
266             else if (bytes==null)
267             {
268                 n++;
269                 continue;
270             }
271             
272             if (bytes==null)
273             {
274                 bytes=new byte[length];
275                 for (int j=0;j<n;j++)
276                     bytes[j]=buf[j + offset];
277             }
278             
279             bytes[n++]=b;
280         }
281 
282         if (bytes==null)
283             return StringUtil.toString(buf,offset,length,__CHARSET);
284         return StringUtil.toString(bytes,0,n,__CHARSET);
285     }
286 
287     
288     /* ------------------------------------------------------------ */
289     /** Add two URI path segments.
290      * Handles null and empty paths, path and query params (eg ?a=b or
291      * ;JSESSIONID=xxx) and avoids duplicate '/'
292      * @param p1 URI path segment 
293      * @param p2 URI path segment
294      * @return Legally combined path segments.
295      */
296     public static String addPaths(String p1, String p2)
297     {
298         if (p1==null || p1.length()==0)
299         {
300             if (p1!=null && p2==null)
301                 return p1;
302             return p2;
303         }
304         if (p2==null || p2.length()==0)
305             return p1;
306         
307         int split=p1.indexOf(';');
308         if (split<0)
309             split=p1.indexOf('?');
310         if (split==0)
311             return p2+p1;
312         if (split<0)
313             split=p1.length();
314 
315         StringBuffer buf = new StringBuffer(p1.length()+p2.length()+2);
316         buf.append(p1);
317         
318         if (buf.charAt(split-1)=='/')
319         {
320             if (p2.startsWith(URIUtil.SLASH))
321             {
322                 buf.deleteCharAt(split-1);
323                 buf.insert(split-1,p2);
324             }
325             else
326                 buf.insert(split,p2);
327         }
328         else
329         {
330             if (p2.startsWith(URIUtil.SLASH))
331                 buf.insert(split,p2);
332             else
333             {
334                 buf.insert(split,'/');
335                 buf.insert(split+1,p2);
336             }
337         }
338 
339         return buf.toString();
340     }
341     
342     /* ------------------------------------------------------------ */
343     /** Return the parent Path.
344      * Treat a URI like a directory path and return the parent directory.
345      */
346     public static String parentPath(String p)
347     {
348         if (p==null || URIUtil.SLASH.equals(p))
349             return null;
350         int slash=p.lastIndexOf('/',p.length()-2);
351         if (slash>=0)
352             return p.substring(0,slash+1);
353         return null;
354     }
355     
356     /* ------------------------------------------------------------ */
357     /** Strip parameters from a path.
358      * Return path upto any semicolon parameters.
359      */
360     public static String stripPath(String path)
361     {
362         if (path==null)
363             return null;
364         int semi=path.indexOf(';');
365         if (semi<0)
366             return path;
367         return path.substring(0,semi);
368     }
369     
370     /* ------------------------------------------------------------ */
371     /** Convert a path to a cananonical form.
372      * All instances of "." and ".." are factored out.  Null is returned
373      * if the path tries to .. above its root.
374      * @param path 
375      * @return path or null.
376      */
377     public static String canonicalPath(String path)
378     {
379         if (path==null || path.length()==0)
380             return path;
381 
382         int end=path.length();
383         int queryIdx=path.indexOf('?');
384         int start = path.lastIndexOf('/', (queryIdx > 0 ? queryIdx : end));
385 
386     search:
387         while (end>0)
388         {
389             switch(end-start)
390             {
391               case 2: // possible single dot
392                   if (path.charAt(start+1)!='.')
393                       break;
394                   break search;
395               case 3: // possible double dot
396                   if (path.charAt(start+1)!='.' || path.charAt(start+2)!='.')
397                       break;
398                   break search;
399             }
400             
401             end=start;
402             start=path.lastIndexOf('/',end-1);
403         }
404 
405         // If we have checked the entire string
406         if (start>=end)
407             return path;
408         
409         StringBuffer buf = new StringBuffer(path);
410         int delStart=-1;
411         int delEnd=-1;
412         int skip=0;
413         
414         while (end>0)
415         {
416             switch(end-start)
417             {       
418               case 2: // possible single dot
419                   if (buf.charAt(start+1)!='.')
420                   {
421                       if (skip>0 && --skip==0)
422                       {   
423                           delStart=start>=0?start:0;
424                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
425                               delStart++;
426                       }
427                       break;
428                   }
429                   
430                   if(start<0 && buf.length()>2 && buf.charAt(1)=='/' && buf.charAt(2)=='/')
431                       break;
432                   
433                   if(delEnd<0)
434                       delEnd=end;
435                   delStart=start;
436                   if (delStart<0 || delStart==0&&buf.charAt(delStart)=='/')
437                   {
438                       delStart++;
439                       if (delEnd<buf.length() && buf.charAt(delEnd)=='/')
440                           delEnd++;
441                       break;
442                   }
443                   if (end==buf.length())
444                       delStart++;
445                   
446                   end=start--;
447                   while (start>=0 && buf.charAt(start)!='/')
448                       start--;
449                   continue;
450                   
451               case 3: // possible double dot
452                   if (buf.charAt(start+1)!='.' || buf.charAt(start+2)!='.')
453                   {
454                       if (skip>0 && --skip==0)
455                       {   delStart=start>=0?start:0;
456                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
457                               delStart++;
458                       }
459                       break;
460                   }
461                   
462                   delStart=start;
463                   if (delEnd<0)
464                       delEnd=end;
465 
466                   skip++;
467                   end=start--;
468                   while (start>=0 && buf.charAt(start)!='/')
469                       start--;
470                   continue;
471 
472               default:
473                   if (skip>0 && --skip==0)
474                   {
475                       delStart=start>=0?start:0;
476                       if(delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
477                           delStart++;
478                   }
479             }     
480             
481             // Do the delete
482             if (skip<=0 && delStart>=0 && delStart>=0)
483             {  
484                 buf.delete(delStart,delEnd);
485                 delStart=delEnd=-1;
486                 if (skip>0)
487                     delEnd=end;
488             }
489             
490             end=start--;
491             while (start>=0 && buf.charAt(start)!='/')
492                 start--;
493         }      
494 
495         // Too many ..
496         if (skip>0)
497             return null;
498         
499         // Do the delete
500         if (delEnd>=0)
501             buf.delete(delStart,delEnd);
502 
503         return buf.toString();
504     }
505 
506     /* ------------------------------------------------------------ */
507     /** Convert a path to a compact form.
508      * All instances of "//" and "///" etc. are factored out to single "/" 
509      * @param path 
510      * @return path
511      */
512     public static String compactPath(String path)
513     {
514         if (path==null || path.length()==0)
515             return path;
516 
517         int state=0;
518         int end=path.length();
519         int i=0;
520         
521         loop:
522         while (i<end)
523         {
524             char c=path.charAt(i);
525             switch(c)
526             {
527                 case '?':
528                     return path;
529                 case '/':
530                     state++;
531                     if (state==2)
532                         break loop;
533                     break;
534                 default:
535                     state=0;
536             }
537             i++;
538         }
539         
540         if (state<2)
541             return path;
542         
543         StringBuffer buf = new StringBuffer(path.length());
544         char[] chars = path.toCharArray();
545         buf.append(chars,0,i);
546         
547         loop2:
548         while (i<end)
549         {
550             char c=path.charAt(i);
551             switch(c)
552             {
553                 case '?':
554                     buf.append(chars,i,end-i);
555                     break loop2;
556                 case '/':
557                     if (state++==0)
558                         buf.append(c);
559                     break;
560                 default:
561                     state=0;
562                     buf.append(c);
563             }
564             i++;
565         }
566         
567         return buf.toString();
568     }
569 
570     /* ------------------------------------------------------------ */
571     /** 
572      * @param uri URI
573      * @return True if the uri has a scheme
574      */
575     public static boolean hasScheme(String uri)
576     {
577         for (int i=0;i<uri.length();i++)
578         {
579             char c=uri.charAt(i);
580             if (c==':')
581                 return true;
582             if (!(c>='a'&&c<='z' ||
583                   c>='A'&&c<='Z' ||
584                   (i>0 &&(c>='0'&&c<='9' ||
585                           c=='.' ||
586                           c=='+' ||
587                           c=='-'))
588                   ))
589                 break;
590         }
591         return false;
592     }
593     
594 }
595 
596 
597