001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.classloader;
014    
015    import org.jikesrvm.VM;
016    import org.vmmagic.pragma.Interruptible;
017    import org.vmmagic.pragma.Pure;
018    
019    /** <p>A Java class for parsing type descriptors and class names.  The class
020     is <code>abstract</code> to eliminate the temptation to instantiate it,
021     since it contains only static methods.
022    
023     <p>There are five similar kinds of descriptors and names that we have to
024     deal with.  We don't have methods for parsing all of them.
025    
026     <p> In this documentation, I will refer to <i>The Java Native Interface
027     Programmer's Guide and Specification</i> as the <i>JNI Guide</i>.
028    
029     <p> Some of the types I discuss below are described in 12.3 of the JNI
030     Guide.
031    
032     <dl>
033     <dt>Fully-qualified class names and fully-qualified interface names</dt>
034     <dd>These are the dot-separated names, such as "java.lang.String" or
035     "java.util.Map".
036     <p>We can validate these with the static method #isJavaClassName(String)
037     in this class.
038     </dd>
039    
040     <dt>JNI Class Descriptor (including array classes),<br> These include the
041     internal Form of fully-qualified class names
042     and internal form of fully-qualified interface names</dt>
043     <dd>These
044     <dd>&ldquo;It can be derived from a fully qualified class or interface
045     name as defined in The Java Language Specification by substituting the "."
046     character with the "/" character.  For example, the JNI class descriptor
047     for <code>java.lang.String</code> is "<code>java/lang/String</code>&rdquo;
048     Array classes are formed using the "[" character followed by the field
049     descriptor of the element type.  The class descrpitor for "int[]" is "[I".
050     <P>We do not have an interface for parsing these right now.
051     </dd>
052    
053     <dt>Field Descriptors</dt>
054     <dd>Described in 12.3.3 of the JNI Guide.
055     Examples:
056     <ul>
057     <li>"Z" for boolean<br>
058     <li> "B" for byte
059     <li>"D" for double
060     <li>"Ljava/lang/String;" for java.lang.String
061     <li> "[I" for int[].
062     </ul>
063     </dd>
064    
065     <dt>Method Descriptors</dt>
066     <dd>Described in 12.3.4 of the JNI guide.  To quote:
067    
068     <blockquote>
069    
070     Method Descriptors are formed by placing the field descriptors of all
071     argument types in a pair of parentheses, and following that by the
072     field descriptor of the return type.  There are no spaces or other
073     separator characters between the argument types.  "<code>V</code>" is
074     used to denote the <code>void</code> method return type.  Constructors
075     use "<code>V</code>" as their return type and use "<code>&lt;init&gt;"
076     as their name.
077     </blockquote>
078    
079     Example: The method with signature "<code>byte f(int i, String s)</code>"
080     has the Method Descriptor "<code>(ILjava/lang/String;)B</code>"
081    
082     <dt>TypeReference names</dt>
083     <dd>Inside Jikes RVM, we use the TypeReference class to represent the
084     reference in some class file to some type (class, interface, primitive, or
085     array).  We also use them to represent Void (TypeReference.Void).</dd>
086     TypeReference names are just field descriptors plus "V".
087     </dl>
088    
089     */
090    
091    public abstract class TypeDescriptorParsing implements ClassLoaderConstants {
092      /** Is the string <code>s</code> a legal name for a Java class or interface?
093       * This will take either fully-qualified names or names that are not fully
094       * qualified.
095       * <p>
096       * @param s The string to check for whether it's a valid name for a Java
097       *          class.  This is a string of the form, for example:
098       * "<code>java.lang.String</code>"
099       * @return <code>true</code> if <code>s</code> is valid, <code>false</code>
100       * otherwise.
101       *
102       * <p>
103    
104       * <small><b>Implementation Question for wiser heads than mine:</b>
105       * Would it be more efficient for me to convert this to a <code>char</code>
106       * array?
107       * That's the way the example in <i>The Java Class Libraries</i> for
108       * <code>Character.isJavaIdentifier<i>*</i>()</code> is written.  Or is the
109       * <code>String.charAt()</code> method inexpensive?</small> */
110      @Interruptible
111      @Pure
112      public static boolean isJavaClassName(String s) {
113        boolean identStart = true;  // pretend we just saw a .
114        for (int i = 0; i < s.length(); ++i) {
115          char c = s.charAt(i);
116          if (identStart) {
117            if (!isVMIdentifierStart(c)) {
118              return false;         // failure to match identifier start.
119            }
120            identStart = false;     // on to the next one.
121            continue;
122          }
123          if (c == '.' || c == '/') {
124            identStart = true;
125            continue;
126          }
127          /* We have a character that is not the first one of a VM identifier */
128          if (!isVMIdentifierPart(c)) {
129            return false;
130          }
131          /* And on we go around the loop */
132        }
133        // Must not finish by needing the start of another identifier.
134        return !identStart;
135      }
136    
137      /**
138       * Java 1.5 relaxes the historical convention that class file identifiers
139       * (i.e. class, field, and method names) must be drawn from the characters
140       * specified by JLS identifiers (i.e. implemented by
141       * java.lang.Character.isJavaIdentifierStart()).<p>
142       *
143       * Given that, parsing rules for internal and external VM identifier
144       * dictates that identifiers may not contain the following
145       * characters: { <code>'.'</code>, <code>';'</code>, <code>'['</code>,
146       * or <code>'/'</code> }. Method identifiers, excluding <code>&lt;init&gt;</code>
147       * and <code>&lt;clinit&gt;</code>, are further constrained to not include
148       * the characters <code>'&lt;'</code> or <code>'&gt;'</code>.<p>
149       *
150       * To avoid word boundary ambiguity, identifiers are presumed to not
151       * begin with a space character. Although not stated explicitly, this
152       * remains convention.<p>
153       *
154       * This method evaluates whether <code>c</code> is compatible as the starting
155       * character for a VM identifier.
156       *
157       * @param c      character to evaluate for VM identifier compatibility
158       * @return boolean  true iff <code>c</code> represents a valid VM identifier starting character
159       */
160      @Pure
161      public static boolean isVMIdentifierStart(char c) {
162        return ((!Character.isWhitespace(c)) && isVMIdentifierPart(c));
163      }
164    
165      /**
166       * Java 1.5 relaxes the historical convention that class file identifiers
167       * (i.e. class, field, and method names) must be drawn from the characters
168       * specified by JLS identifiers (i.e. implemented by
169       * java.lang.Character.isJavaIdentifierPart()).<p>
170       *
171       * Given that, parsing rules for internal and external VM identifier
172       * dictates that identifiers may not contain the following
173       * characters: { <code>'.'</code>, <code>';'</code>, <code>'['</code>,
174       * or <code>'/'</code> }. Method identifiers, excluding <code>&lt;init&gt;</code>
175       * and <code>&lt;clinit&gt;</code>, are further constrained to not include
176       * the characters <code>'&lt;'</code> or <code>'&gt;'</code>.<p>
177       *
178       * This method evaluates whether <code>c</code> is compatible as a non-starting
179       * character for a VM identifier.
180       *
181       * @param c      character to evaluate for VM identifier compatibility
182       * @return boolean  true iff <code>c</code> represents a valid VM identifier non-starting character
183       */
184      @Pure
185      public static boolean isVMIdentifierPart(char c) {
186        return ((c != '.') && (c != ';') && (c != '[') && (c != '/'));
187      }
188    
189      /**
190       * Is this the internal form of a Java class name?  (the one with the "/"
191       * instead of the "." separating components?)
192       * Takes a character array (i.e., an exploded string) and the indices of the
193       * first and last characters of the array that are to be checked.
194       */
195      public static boolean isJavaClassNameInternalForm(char[] val, int first, int last) {
196        if (val[first++] != ClassTypeCode) {
197          // the L
198          return false;
199        }
200        if (val[last--] != ';') {
201          // malformed("a class ('L') must end in a ';'");
202          return false;
203        }
204    
205        boolean identStart = true;  // pretend we just saw a separator
206        for (int i = first; i <= last; ++i) {
207          char c = val[i];
208          if (identStart) {
209            if (!isVMIdentifierStart(c)) {
210              return false;         // failure to match identifier start.
211            }
212            identStart = false;     // on to the next one.
213            continue;
214          }
215          if (c == '/') {
216            identStart = true;
217            continue;
218          }
219          /* We have a character that is not the first one of a VM identifier */
220          if (!isVMIdentifierPart(c)) {
221            return false;
222          }
223    
224          /* And on we go around the loop */
225        }
226        // Must not finish by needing the start of another identifier.
227        return !identStart;
228      }
229    
230      @Pure
231      public static boolean isValidTypeDescriptor(String s) {
232        try {
233          validateAsTypeDescriptor(s);
234          return true;
235        } catch (IllegalArgumentException iae) {
236          return false;
237        }
238      }
239    
240      @Pure
241      public static boolean isValidTypeDescriptor(Atom a) {
242        try {
243          validateAsTypeDescriptor(a);
244          return true;
245        } catch (IllegalArgumentException iae) {
246          return false;
247        }
248      }
249    
250      @Interruptible
251      @Pure
252      public static void validateAsTypeDescriptor(Atom a) throws IllegalArgumentException {
253        try {
254          // Atoms are always utf-8.
255          a.toUnicodeString();
256        } catch (java.io.UTFDataFormatException udfe) {
257          IllegalArgumentException iae =
258              new IllegalArgumentException(
259                  "The atom in question does not represent a valid UTF8 string, so it's not a type descriptor.");
260          iae.initCause(udfe);
261          throw iae;
262        }
263      }
264    
265      /** Validate that the String @param s is a valid type descriptor.
266       @throws IllegalArgumentException if it isn't.
267       */
268      @Interruptible
269      @Pure
270      public static void validateAsTypeDescriptor(String s) throws IllegalArgumentException {
271        char[] val = s.toCharArray();
272    
273        int i = 0;
274        if (val.length == 0) {
275          malformed("is the empty string", s);
276        }
277    
278        // array dimensions precede the rest.
279        while (val[i] == '[') {
280          if (++i >= val.length) {
281            malformed("has just '[' chars", s);
282          }
283        }
284        if (VM.VerifyAssertions) {
285          // logically impossible:
286          VM._assert(i < val.length);
287        }
288    
289        if (val[i] == VoidTypeCode && i != 0) {
290          malformed("can't have an array of void", s);
291        }
292    
293        if (isJavaPrimitive(val[i])) {
294          // A primitive should be just 1 char long
295          if (i != val.length - 1) {
296            // if this isn't the last character, scream.
297            malformed("nothing should follow the primitive typecode '" + Character.toString(val[i]) + "'", s);
298          }
299          return;                   // otherwise all is well.
300        }
301    
302        // logically impossible:
303        if (VM.VerifyAssertions) {
304          VM._assert(val[i] != '[' && !isJavaPrimitive(val[i]));
305        }
306        // All that's left is ClassTypeCode
307        if (val[i] != ClassTypeCode) {
308          malformed("unknown character '" + Character.toString(val[i]) + "'", s);
309        }
310        if (!isJavaClassNameInternalForm(val, i, val.length - 1)) {
311          malformed("doesn't end with a valid class name in internal form", s);
312        }
313      }
314    
315      @Pure
316      private static boolean isJavaPrimitive(char c) {
317        byte b = (byte) c;
318        if (c != (char) b) {
319          return false;
320        }
321        return isJavaPrimitive(b);
322      }
323    
324      @Pure
325      private static boolean isJavaPrimitive(byte b) {
326        switch (b) {
327          case VoidTypeCode:
328          case BooleanTypeCode:
329          case ByteTypeCode:
330          case ShortTypeCode:
331          case CharTypeCode:
332          case IntTypeCode:
333          case LongTypeCode:
334          case FloatTypeCode:
335          case DoubleTypeCode:
336            return true;
337          default:
338            return false;
339        }
340      }
341    
342      /** Gripe and throw <code>IllegalArgumentException</code> if we get a
343       * malformed type name. */
344      private static void malformed(String msg, String typeName) throws IllegalArgumentException {
345        throw new IllegalArgumentException("Malformed type name" +
346                                           ((msg == null) ? "" : ": " + msg) +
347                                           ": \"" +
348                                           typeName +
349                                           "\"");
350      }
351    
352      // These are test routines you can use to do unit testing on the methods in
353      // this class::
354      //  // Test isJavaClassName()
355    //   public static void main(String[] args) {
356    //     for (int i = 0; i < args.length; ++i) {
357    //       System.out.println(args[i] + " is "
358    //                       + (TypeDescriptorParsing.isJavaClassName(args[i]) ? "" : "NOT " ) + "a valid Java class name.");
359    //     }
360    //   }
361    
362    //   // Test validateAsTypeDescriptor()
363    //   public static void main(String[] args) {
364    //     for (int i = 0; i < args.length; ++i) {
365    //       System.out.println("Validating " + args[i] + " as a type descriptor.");
366    //       validateAsTypeDescriptor(args[i]);
367    
368    //     }
369    //   }
370    
371    }