001/*
002 *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 *  This file is licensed to You under the Eclipse Public License (EPL);
005 *  You may not use this file except in compliance with the License. You
006 *  may obtain a copy of the License at
007 *
008 *      http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 *  See the COPYRIGHT.txt file distributed with this work for information
011 *  regarding copyright ownership.
012 */
013package org.jikesrvm.classloader;
014
015import static org.jikesrvm.classloader.ClassLoaderConstants.BooleanTypeCode;
016import static org.jikesrvm.classloader.ClassLoaderConstants.ByteTypeCode;
017import static org.jikesrvm.classloader.ClassLoaderConstants.CharTypeCode;
018import static org.jikesrvm.classloader.ClassLoaderConstants.ClassTypeCode;
019import static org.jikesrvm.classloader.ClassLoaderConstants.DoubleTypeCode;
020import static org.jikesrvm.classloader.ClassLoaderConstants.FloatTypeCode;
021import static org.jikesrvm.classloader.ClassLoaderConstants.IntTypeCode;
022import static org.jikesrvm.classloader.ClassLoaderConstants.LongTypeCode;
023import static org.jikesrvm.classloader.ClassLoaderConstants.ShortTypeCode;
024import static org.jikesrvm.classloader.ClassLoaderConstants.VoidTypeCode;
025
026import org.jikesrvm.VM;
027import org.vmmagic.pragma.Interruptible;
028import org.vmmagic.pragma.Pure;
029
030/** <p>A Java class for parsing type descriptors and class names.  The class
031 is <code>abstract</code> to eliminate the temptation to instantiate it,
032 since it contains only static methods.
033
034 <p>There are five similar kinds of descriptors and names that we have to
035 deal with.  We don't have methods for parsing all of them.
036
037 <p> In this documentation, I will refer to <i>The Java Native Interface
038 Programmer's Guide and Specification</i> as the <i>JNI Guide</i>.
039
040 <p> Some of the types I discuss below are described in 12.3 of the JNI
041 Guide.
042
043 <dl>
044 <dt>Fully-qualified class names and fully-qualified interface names</dt>
045 <dd>These are the dot-separated names, such as "java.lang.String" or
046 "java.util.Map".
047 <p>We can validate these with the static method #isJavaClassName(String)
048 in this class.
049 </dd>
050
051 <dt>JNI Class Descriptor (including array classes),<br> These include the
052 internal Form of fully-qualified class names
053 and internal form of fully-qualified interface names</dt>
054 <dd>These
055 <dd>&ldquo;It can be derived from a fully qualified class or interface
056 name as defined in The Java Language Specification by substituting the "."
057 character with the "/" character.  For example, the JNI class descriptor
058 for <code>java.lang.String</code> is "<code>java/lang/String</code>&rdquo;
059 Array classes are formed using the "[" character followed by the field
060 descriptor of the element type.  The class descrpitor for "int[]" is "[I".
061 <P>We do not have an interface for parsing these right now.
062 </dd>
063
064 <dt>Field Descriptors</dt>
065 <dd>Described in 12.3.3 of the JNI Guide.
066 Examples:
067 <ul>
068 <li>"Z" for boolean<br>
069 <li> "B" for byte
070 <li>"D" for double
071 <li>"Ljava/lang/String;" for java.lang.String
072 <li> "[I" for int[].
073 </ul>
074 </dd>
075
076 <dt>Method Descriptors</dt>
077 <dd>Described in 12.3.4 of the JNI guide.  To quote:
078
079 <blockquote>
080
081 Method Descriptors are formed by placing the field descriptors of all
082 argument types in a pair of parentheses, and following that by the
083 field descriptor of the return type.  There are no spaces or other
084 separator characters between the argument types.  "<code>V</code>" is
085 used to denote the <code>void</code> method return type.  Constructors
086 use "<code>V</code>" as their return type and use "<code>&lt;init&gt;</code>"
087 as their name.
088 </blockquote>
089
090 Example: The method with signature "<code>byte f(int i, String s)</code>"
091 has the Method Descriptor "<code>(ILjava/lang/String;)B</code>"
092
093 <dt>TypeReference names</dt>
094 <dd>Inside Jikes RVM, we use the TypeReference class to represent the
095 reference in some class file to some type (class, interface, primitive, or
096 array).  We also use them to represent Void ({@code TypeReference.Void}).
097 TypeReference names are just field descriptors plus "V".</dd>
098 </dl>
099
100 */
101
102public abstract class TypeDescriptorParsing {
103  /** Is the string <code>s</code> a legal name for a Java class or interface?
104   * This will take either fully-qualified names or names that are not fully
105   * qualified.
106   * <p>
107   * @param s The string to check for whether it's a valid name for a Java
108   *          class.  This is a string of the form, for example:
109   * "<code>java.lang.String</code>"
110   * @return <code>true</code> if <code>s</code> is valid, <code>false</code>
111   * otherwise.
112   *
113   * <p>
114
115   * <small><b>Implementation Question for wiser heads than mine:</b>
116   * Would it be more efficient for me to convert this to a <code>char</code>
117   * array?
118   * That's the way the example in <i>The Java Class Libraries</i> for
119   * <code>Character.isJavaIdentifier<i>*</i>()</code> is written.  Or is the
120   * <code>String.charAt()</code> method inexpensive?</small> */
121  @Interruptible
122  @Pure
123  public static boolean isJavaClassName(String s) {
124    boolean identStart = true;  // pretend we just saw a .
125    for (int i = 0; i < s.length(); ++i) {
126      char c = s.charAt(i);
127      if (identStart) {
128        if (!isVMIdentifierStart(c)) {
129          return false;         // failure to match identifier start.
130        }
131        identStart = false;     // on to the next one.
132        continue;
133      }
134      if (c == '.' || c == '/') {
135        identStart = true;
136        continue;
137      }
138      /* We have a character that is not the first one of a VM identifier */
139      if (!isVMIdentifierPart(c)) {
140        return false;
141      }
142      /* And on we go around the loop */
143    }
144    // Must not finish by needing the start of another identifier.
145    return !identStart;
146  }
147
148  /**
149   * Java 1.5 relaxes the historical convention that class file identifiers
150   * (i.e. class, field, and method names) must be drawn from the characters
151   * specified by JLS identifiers (i.e. implemented by
152   * java.lang.Character.isJavaIdentifierStart()).<p>
153   *
154   * Given that, parsing rules for internal and external VM identifier
155   * dictates that identifiers may not contain the following
156   * characters: { <code>'.'</code>, <code>';'</code>, <code>'['</code>,
157   * or <code>'/'</code> }. Method identifiers, excluding <code>&lt;init&gt;</code>
158   * and <code>&lt;clinit&gt;</code>, are further constrained to not include
159   * the characters <code>'&lt;'</code> or <code>'&gt;'</code>.<p>
160   *
161   * To avoid word boundary ambiguity, identifiers are presumed to not
162   * begin with a space character. Although not stated explicitly, this
163   * remains convention.<p>
164   *
165   * This method evaluates whether <code>c</code> is compatible as the starting
166   * character for a VM identifier.
167   *
168   * @param c      character to evaluate for VM identifier compatibility
169   * @return boolean  {@code true} iff <code>c</code> represents a valid VM identifier starting character
170   */
171  @Pure
172  public static boolean isVMIdentifierStart(char c) {
173    return ((!Character.isWhitespace(c)) && isVMIdentifierPart(c));
174  }
175
176  /**
177   * Java 1.5 relaxes the historical convention that class file identifiers
178   * (i.e. class, field, and method names) must be drawn from the characters
179   * specified by JLS identifiers (i.e. implemented by
180   * java.lang.Character.isJavaIdentifierPart()).<p>
181   *
182   * Given that, parsing rules for internal and external VM identifier
183   * dictates that identifiers may not contain the following
184   * characters: { <code>'.'</code>, <code>';'</code>, <code>'['</code>,
185   * or <code>'/'</code> }. Method identifiers, excluding <code>&lt;init&gt;</code>
186   * and <code>&lt;clinit&gt;</code>, are further constrained to not include
187   * the characters <code>'&lt;'</code> or <code>'&gt;'</code>.<p>
188   *
189   * This method evaluates whether <code>c</code> is compatible as a non-starting
190   * character for a VM identifier.
191   *
192   * @param c      character to evaluate for VM identifier compatibility
193   * @return boolean  {@code true} iff <code>c</code> represents a valid VM identifier non-starting character
194   */
195  @Pure
196  public static boolean isVMIdentifierPart(char c) {
197    return ((c != '.') && (c != ';') && (c != '[') && (c != '/'));
198  }
199
200  /**
201   * Is this the internal form of a Java class name?  (the one with the "/"
202   * instead of the "." separating components?)
203   *
204   * @param val a string as a char array
205   * @param first the start index of the string to be checked
206   * @param last the last index of the string to be checked
207   * @return {@code true} if the given char array represents an internal java class name
208   */
209  public static boolean isJavaClassNameInternalForm(char[] val, int first, int last) {
210    if (val[first++] != ClassTypeCode) {
211      // the L
212      return false;
213    }
214    if (val[last--] != ';') {
215      // malformed("a class ('L') must end in a ';'");
216      return false;
217    }
218
219    boolean identStart = true;  // pretend we just saw a separator
220    for (int i = first; i <= last; ++i) {
221      char c = val[i];
222      if (identStart) {
223        if (!isVMIdentifierStart(c)) {
224          return false;         // failure to match identifier start.
225        }
226        identStart = false;     // on to the next one.
227        continue;
228      }
229      if (c == '/') {
230        identStart = true;
231        continue;
232      }
233      /* We have a character that is not the first one of a VM identifier */
234      if (!isVMIdentifierPart(c)) {
235        return false;
236      }
237
238      /* And on we go around the loop */
239    }
240    // Must not finish by needing the start of another identifier.
241    return !identStart;
242  }
243
244  @Pure
245  public static boolean isValidTypeDescriptor(String s) {
246    try {
247      validateAsTypeDescriptor(s);
248      return true;
249    } catch (IllegalArgumentException iae) {
250      return false;
251    }
252  }
253
254  @Pure
255  public static boolean isValidTypeDescriptor(Atom a) {
256    try {
257      validateAsTypeDescriptor(a);
258      return true;
259    } catch (IllegalArgumentException iae) {
260      return false;
261    }
262  }
263
264  @Interruptible
265  @Pure
266  public static void validateAsTypeDescriptor(Atom a) throws IllegalArgumentException {
267    try {
268      // Atoms are always utf-8.
269      a.toUnicodeString();
270    } catch (java.io.UTFDataFormatException udfe) {
271      IllegalArgumentException iae =
272          new IllegalArgumentException(
273              "The atom in question does not represent a valid UTF8 string, so it's not a type descriptor.");
274      iae.initCause(udfe);
275      throw iae;
276    }
277  }
278
279  /**
280   * Validates that the given String is a valid type descriptor.
281   * @param s string to check
282   * @throws IllegalArgumentException if the string is not a valid type descriptor
283   */
284  @Interruptible
285  @Pure
286  public static void validateAsTypeDescriptor(String s) throws IllegalArgumentException {
287    char[] val = s.toCharArray();
288
289    int i = 0;
290    if (val.length == 0) {
291      malformed("is the empty string", s);
292    }
293
294    // array dimensions precede the rest.
295    while (val[i] == '[') {
296      if (++i >= val.length) {
297        malformed("has just '[' chars", s);
298      }
299    }
300    if (VM.VerifyAssertions) {
301      // logically impossible:
302      VM._assert(i < val.length);
303    }
304
305    if (val[i] == VoidTypeCode && i != 0) {
306      malformed("can't have an array of void", s);
307    }
308
309    if (isJavaPrimitive(val[i])) {
310      // A primitive should be just 1 char long
311      if (i != val.length - 1) {
312        // if this isn't the last character, scream.
313        malformed("nothing should follow the primitive typecode '" + Character.toString(val[i]) + "'", s);
314      }
315      return;                   // otherwise all is well.
316    }
317
318    // logically impossible:
319    if (VM.VerifyAssertions) {
320      VM._assert(val[i] != '[' && !isJavaPrimitive(val[i]));
321    }
322    // All that's left is ClassTypeCode
323    if (val[i] != ClassTypeCode) {
324      malformed("unknown character '" + Character.toString(val[i]) + "'", s);
325    }
326    if (!isJavaClassNameInternalForm(val, i, val.length - 1)) {
327      malformed("doesn't end with a valid class name in internal form", s);
328    }
329  }
330
331  @Pure
332  private static boolean isJavaPrimitive(char c) {
333    byte b = (byte) c;
334    if (c != (char) b) {
335      return false;
336    }
337    return isJavaPrimitive(b);
338  }
339
340  @Pure
341  private static boolean isJavaPrimitive(byte b) {
342    switch (b) {
343      case VoidTypeCode:
344      case BooleanTypeCode:
345      case ByteTypeCode:
346      case ShortTypeCode:
347      case CharTypeCode:
348      case IntTypeCode:
349      case LongTypeCode:
350      case FloatTypeCode:
351      case DoubleTypeCode:
352        return true;
353      default:
354        return false;
355    }
356  }
357
358  private static void malformed(String msg, String typeName) throws IllegalArgumentException {
359    throw new IllegalArgumentException("Malformed type name" +
360                                       ((msg == null) ? "" : ": " + msg) +
361                                       ": \"" +
362                                       typeName +
363                                       "\"");
364  }
365
366  // These are test routines you can use to do unit testing on the methods in
367  // this class::
368  //  // Test isJavaClassName()
369//   public static void main(String[] args) {
370//     for (int i = 0; i < args.length; ++i) {
371//       System.out.println(args[i] + " is "
372//                       + (TypeDescriptorParsing.isJavaClassName(args[i]) ? "" : "NOT " ) + "a valid Java class name.");
373//     }
374//   }
375
376//   // Test validateAsTypeDescriptor()
377//   public static void main(String[] args) {
378//     for (int i = 0; i < args.length; ++i) {
379//       System.out.println("Validating " + args[i] + " as a type descriptor.");
380//       validateAsTypeDescriptor(args[i]);
381
382//     }
383//   }
384
385}