001 /*
002 * This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 * This file is licensed to You under the Eclipse Public License (EPL);
005 * You may not use this file except in compliance with the License. You
006 * may obtain a copy of the License at
007 *
008 * http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 * See the COPYRIGHT.txt file distributed with this work for information
011 * regarding copyright ownership.
012 */
013 package org.jikesrvm.classloader;
014
015 import org.jikesrvm.VM;
016 import org.vmmagic.pragma.Interruptible;
017 import org.vmmagic.pragma.Pure;
018
019 /** <p>A Java class for parsing type descriptors and class names. The class
020 is <code>abstract</code> to eliminate the temptation to instantiate it,
021 since it contains only static methods.
022
023 <p>There are five similar kinds of descriptors and names that we have to
024 deal with. We don't have methods for parsing all of them.
025
026 <p> In this documentation, I will refer to <i>The Java Native Interface
027 Programmer's Guide and Specification</i> as the <i>JNI Guide</i>.
028
029 <p> Some of the types I discuss below are described in 12.3 of the JNI
030 Guide.
031
032 <dl>
033 <dt>Fully-qualified class names and fully-qualified interface names</dt>
034 <dd>These are the dot-separated names, such as "java.lang.String" or
035 "java.util.Map".
036 <p>We can validate these with the static method #isJavaClassName(String)
037 in this class.
038 </dd>
039
040 <dt>JNI Class Descriptor (including array classes),<br> These include the
041 internal Form of fully-qualified class names
042 and internal form of fully-qualified interface names</dt>
043 <dd>These
044 <dd>“It can be derived from a fully qualified class or interface
045 name as defined in The Java Language Specification by substituting the "."
046 character with the "/" character. For example, the JNI class descriptor
047 for <code>java.lang.String</code> is "<code>java/lang/String</code>”
048 Array classes are formed using the "[" character followed by the field
049 descriptor of the element type. The class descrpitor for "int[]" is "[I".
050 <P>We do not have an interface for parsing these right now.
051 </dd>
052
053 <dt>Field Descriptors</dt>
054 <dd>Described in 12.3.3 of the JNI Guide.
055 Examples:
056 <ul>
057 <li>"Z" for boolean<br>
058 <li> "B" for byte
059 <li>"D" for double
060 <li>"Ljava/lang/String;" for java.lang.String
061 <li> "[I" for int[].
062 </ul>
063 </dd>
064
065 <dt>Method Descriptors</dt>
066 <dd>Described in 12.3.4 of the JNI guide. To quote:
067
068 <blockquote>
069
070 Method Descriptors are formed by placing the field descriptors of all
071 argument types in a pair of parentheses, and following that by the
072 field descriptor of the return type. There are no spaces or other
073 separator characters between the argument types. "<code>V</code>" is
074 used to denote the <code>void</code> method return type. Constructors
075 use "<code>V</code>" as their return type and use "<code><init>"
076 as their name.
077 </blockquote>
078
079 Example: The method with signature "<code>byte f(int i, String s)</code>"
080 has the Method Descriptor "<code>(ILjava/lang/String;)B</code>"
081
082 <dt>TypeReference names</dt>
083 <dd>Inside Jikes RVM, we use the TypeReference class to represent the
084 reference in some class file to some type (class, interface, primitive, or
085 array). We also use them to represent Void (TypeReference.Void).</dd>
086 TypeReference names are just field descriptors plus "V".
087 </dl>
088
089 */
090
091 public abstract class TypeDescriptorParsing implements ClassLoaderConstants {
092 /** Is the string <code>s</code> a legal name for a Java class or interface?
093 * This will take either fully-qualified names or names that are not fully
094 * qualified.
095 * <p>
096 * @param s The string to check for whether it's a valid name for a Java
097 * class. This is a string of the form, for example:
098 * "<code>java.lang.String</code>"
099 * @return <code>true</code> if <code>s</code> is valid, <code>false</code>
100 * otherwise.
101 *
102 * <p>
103
104 * <small><b>Implementation Question for wiser heads than mine:</b>
105 * Would it be more efficient for me to convert this to a <code>char</code>
106 * array?
107 * That's the way the example in <i>The Java Class Libraries</i> for
108 * <code>Character.isJavaIdentifier<i>*</i>()</code> is written. Or is the
109 * <code>String.charAt()</code> method inexpensive?</small> */
110 @Interruptible
111 @Pure
112 public static boolean isJavaClassName(String s) {
113 boolean identStart = true; // pretend we just saw a .
114 for (int i = 0; i < s.length(); ++i) {
115 char c = s.charAt(i);
116 if (identStart) {
117 if (!isVMIdentifierStart(c)) {
118 return false; // failure to match identifier start.
119 }
120 identStart = false; // on to the next one.
121 continue;
122 }
123 if (c == '.' || c == '/') {
124 identStart = true;
125 continue;
126 }
127 /* We have a character that is not the first one of a VM identifier */
128 if (!isVMIdentifierPart(c)) {
129 return false;
130 }
131 /* And on we go around the loop */
132 }
133 // Must not finish by needing the start of another identifier.
134 return !identStart;
135 }
136
137 /**
138 * Java 1.5 relaxes the historical convention that class file identifiers
139 * (i.e. class, field, and method names) must be drawn from the characters
140 * specified by JLS identifiers (i.e. implemented by
141 * java.lang.Character.isJavaIdentifierStart()).<p>
142 *
143 * Given that, parsing rules for internal and external VM identifier
144 * dictates that identifiers may not contain the following
145 * characters: { <code>'.'</code>, <code>';'</code>, <code>'['</code>,
146 * or <code>'/'</code> }. Method identifiers, excluding <code><init></code>
147 * and <code><clinit></code>, are further constrained to not include
148 * the characters <code>'<'</code> or <code>'>'</code>.<p>
149 *
150 * To avoid word boundary ambiguity, identifiers are presumed to not
151 * begin with a space character. Although not stated explicitly, this
152 * remains convention.<p>
153 *
154 * This method evaluates whether <code>c</code> is compatible as the starting
155 * character for a VM identifier.
156 *
157 * @param c character to evaluate for VM identifier compatibility
158 * @return boolean true iff <code>c</code> represents a valid VM identifier starting character
159 */
160 @Pure
161 public static boolean isVMIdentifierStart(char c) {
162 return ((!Character.isWhitespace(c)) && isVMIdentifierPart(c));
163 }
164
165 /**
166 * Java 1.5 relaxes the historical convention that class file identifiers
167 * (i.e. class, field, and method names) must be drawn from the characters
168 * specified by JLS identifiers (i.e. implemented by
169 * java.lang.Character.isJavaIdentifierPart()).<p>
170 *
171 * Given that, parsing rules for internal and external VM identifier
172 * dictates that identifiers may not contain the following
173 * characters: { <code>'.'</code>, <code>';'</code>, <code>'['</code>,
174 * or <code>'/'</code> }. Method identifiers, excluding <code><init></code>
175 * and <code><clinit></code>, are further constrained to not include
176 * the characters <code>'<'</code> or <code>'>'</code>.<p>
177 *
178 * This method evaluates whether <code>c</code> is compatible as a non-starting
179 * character for a VM identifier.
180 *
181 * @param c character to evaluate for VM identifier compatibility
182 * @return boolean true iff <code>c</code> represents a valid VM identifier non-starting character
183 */
184 @Pure
185 public static boolean isVMIdentifierPart(char c) {
186 return ((c != '.') && (c != ';') && (c != '[') && (c != '/'));
187 }
188
189 /**
190 * Is this the internal form of a Java class name? (the one with the "/"
191 * instead of the "." separating components?)
192 * Takes a character array (i.e., an exploded string) and the indices of the
193 * first and last characters of the array that are to be checked.
194 */
195 public static boolean isJavaClassNameInternalForm(char[] val, int first, int last) {
196 if (val[first++] != ClassTypeCode) {
197 // the L
198 return false;
199 }
200 if (val[last--] != ';') {
201 // malformed("a class ('L') must end in a ';'");
202 return false;
203 }
204
205 boolean identStart = true; // pretend we just saw a separator
206 for (int i = first; i <= last; ++i) {
207 char c = val[i];
208 if (identStart) {
209 if (!isVMIdentifierStart(c)) {
210 return false; // failure to match identifier start.
211 }
212 identStart = false; // on to the next one.
213 continue;
214 }
215 if (c == '/') {
216 identStart = true;
217 continue;
218 }
219 /* We have a character that is not the first one of a VM identifier */
220 if (!isVMIdentifierPart(c)) {
221 return false;
222 }
223
224 /* And on we go around the loop */
225 }
226 // Must not finish by needing the start of another identifier.
227 return !identStart;
228 }
229
230 @Pure
231 public static boolean isValidTypeDescriptor(String s) {
232 try {
233 validateAsTypeDescriptor(s);
234 return true;
235 } catch (IllegalArgumentException iae) {
236 return false;
237 }
238 }
239
240 @Pure
241 public static boolean isValidTypeDescriptor(Atom a) {
242 try {
243 validateAsTypeDescriptor(a);
244 return true;
245 } catch (IllegalArgumentException iae) {
246 return false;
247 }
248 }
249
250 @Interruptible
251 @Pure
252 public static void validateAsTypeDescriptor(Atom a) throws IllegalArgumentException {
253 try {
254 // Atoms are always utf-8.
255 a.toUnicodeString();
256 } catch (java.io.UTFDataFormatException udfe) {
257 IllegalArgumentException iae =
258 new IllegalArgumentException(
259 "The atom in question does not represent a valid UTF8 string, so it's not a type descriptor.");
260 iae.initCause(udfe);
261 throw iae;
262 }
263 }
264
265 /** Validate that the String @param s is a valid type descriptor.
266 @throws IllegalArgumentException if it isn't.
267 */
268 @Interruptible
269 @Pure
270 public static void validateAsTypeDescriptor(String s) throws IllegalArgumentException {
271 char[] val = s.toCharArray();
272
273 int i = 0;
274 if (val.length == 0) {
275 malformed("is the empty string", s);
276 }
277
278 // array dimensions precede the rest.
279 while (val[i] == '[') {
280 if (++i >= val.length) {
281 malformed("has just '[' chars", s);
282 }
283 }
284 if (VM.VerifyAssertions) {
285 // logically impossible:
286 VM._assert(i < val.length);
287 }
288
289 if (val[i] == VoidTypeCode && i != 0) {
290 malformed("can't have an array of void", s);
291 }
292
293 if (isJavaPrimitive(val[i])) {
294 // A primitive should be just 1 char long
295 if (i != val.length - 1) {
296 // if this isn't the last character, scream.
297 malformed("nothing should follow the primitive typecode '" + Character.toString(val[i]) + "'", s);
298 }
299 return; // otherwise all is well.
300 }
301
302 // logically impossible:
303 if (VM.VerifyAssertions) {
304 VM._assert(val[i] != '[' && !isJavaPrimitive(val[i]));
305 }
306 // All that's left is ClassTypeCode
307 if (val[i] != ClassTypeCode) {
308 malformed("unknown character '" + Character.toString(val[i]) + "'", s);
309 }
310 if (!isJavaClassNameInternalForm(val, i, val.length - 1)) {
311 malformed("doesn't end with a valid class name in internal form", s);
312 }
313 }
314
315 @Pure
316 private static boolean isJavaPrimitive(char c) {
317 byte b = (byte) c;
318 if (c != (char) b) {
319 return false;
320 }
321 return isJavaPrimitive(b);
322 }
323
324 @Pure
325 private static boolean isJavaPrimitive(byte b) {
326 switch (b) {
327 case VoidTypeCode:
328 case BooleanTypeCode:
329 case ByteTypeCode:
330 case ShortTypeCode:
331 case CharTypeCode:
332 case IntTypeCode:
333 case LongTypeCode:
334 case FloatTypeCode:
335 case DoubleTypeCode:
336 return true;
337 default:
338 return false;
339 }
340 }
341
342 /** Gripe and throw <code>IllegalArgumentException</code> if we get a
343 * malformed type name. */
344 private static void malformed(String msg, String typeName) throws IllegalArgumentException {
345 throw new IllegalArgumentException("Malformed type name" +
346 ((msg == null) ? "" : ": " + msg) +
347 ": \"" +
348 typeName +
349 "\"");
350 }
351
352 // These are test routines you can use to do unit testing on the methods in
353 // this class::
354 // // Test isJavaClassName()
355 // public static void main(String[] args) {
356 // for (int i = 0; i < args.length; ++i) {
357 // System.out.println(args[i] + " is "
358 // + (TypeDescriptorParsing.isJavaClassName(args[i]) ? "" : "NOT " ) + "a valid Java class name.");
359 // }
360 // }
361
362 // // Test validateAsTypeDescriptor()
363 // public static void main(String[] args) {
364 // for (int i = 0; i < args.length; ++i) {
365 // System.out.println("Validating " + args[i] + " as a type descriptor.");
366 // validateAsTypeDescriptor(args[i]);
367
368 // }
369 // }
370
371 }