001/*
002 *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 *  This file is licensed to You under the Eclipse Public License (EPL);
005 *  You may not use this file except in compliance with the License. You
006 *  may obtain a copy of the License at
007 *
008 *      http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 *  See the COPYRIGHT.txt file distributed with this work for information
011 *  regarding copyright ownership.
012 */
013package org.jikesrvm.classloader;
014
015import static org.jikesrvm.classloader.ClassLoaderConstants.ArrayTypeCode;
016import static org.jikesrvm.classloader.ClassLoaderConstants.BooleanTypeCode;
017import static org.jikesrvm.classloader.ClassLoaderConstants.ByteTypeCode;
018import static org.jikesrvm.classloader.ClassLoaderConstants.CharTypeCode;
019import static org.jikesrvm.classloader.ClassLoaderConstants.ClassTypeCode;
020import static org.jikesrvm.classloader.ClassLoaderConstants.DoubleTypeCode;
021import static org.jikesrvm.classloader.ClassLoaderConstants.FloatTypeCode;
022import static org.jikesrvm.classloader.ClassLoaderConstants.IntTypeCode;
023import static org.jikesrvm.classloader.ClassLoaderConstants.LongTypeCode;
024import static org.jikesrvm.classloader.ClassLoaderConstants.ShortTypeCode;
025import static org.jikesrvm.classloader.ClassLoaderConstants.VoidTypeCode;
026
027import java.io.UTFDataFormatException;
028import java.lang.ref.WeakReference;
029import java.util.WeakHashMap;
030
031import org.jikesrvm.VM;
032import org.jikesrvm.runtime.Statics;
033import org.jikesrvm.util.ImmutableEntryHashMapRVM;
034import org.jikesrvm.util.StringUtilities;
035import org.vmmagic.pragma.Pure;
036import org.vmmagic.pragma.Uninterruptible;
037import org.vmmagic.unboxed.Offset;
038
039/**
040 * An  utf8-encoded byte string.
041 * <p>
042 * Atom's are interned (canonicalized)
043 * so they may be compared for equality using the "==" operator.
044 * <p>
045 * Atoms are used to represent names, descriptors, and string literals
046 * appearing in a class's constant pool.
047 * <p>
048 * There is almost always a zero-length Atom, since any class which
049 * contains statements like:
050 * <pre>
051 *          return "";
052 * </pre>
053 * will have one in its constant pool.
054 */
055public final class Atom {
056
057  /**
058   * Used to canonicalize Atoms: possibly non-canonical Atom =&gt; Atom
059   */
060  private static final ImmutableEntryHashMapRVM<Atom, Atom> dictionary =
061    new ImmutableEntryHashMapRVM<Atom, Atom>(12000);
062
063  /**
064   * 2^LOG_ROW_SIZE is the number of elements per row
065   */
066  private static final int LOG_ROW_SIZE = 10;
067  /**
068   * Mask to ascertain row from id number
069   */
070  private static final int ROW_MASK = (1 << LOG_ROW_SIZE) - 1;
071  /**
072   * Dictionary of all Atom instances.
073   */
074  private static Atom[][] atoms = new Atom[36][1 << LOG_ROW_SIZE];
075
076  /**
077   * Used to assign ids. Don't use id 0 to allow clients to use id 0 as a 'null'.
078   */
079  private static int nextId = 1;
080
081  /**
082   * A reference to either a unicode String encoding the atom, an offset in the
083   * JTOC holding a unicode string encoding the atom or null.
084   */
085  private Object unicodeStringOrJTOCoffset;
086
087  /**
088   * The utf8 value this atom represents
089   */
090  private final byte[] val;
091
092  /**
093   * The id of this atom
094   */
095  private final int id;
096
097  /**
098   *@return the id of this atom.
099   */
100  int getId() {
101    return id;
102  }
103
104  /**
105   * Find or create an atom.
106   * @param str atom value, as string literal whose characters are unicode
107   * @return atom
108   */
109  @Pure
110  public static Atom findOrCreateUnicodeAtom(String str) {
111    return findOrCreate(null, true, str);
112  }
113
114  /**
115   * Find an atom.
116   * @param str atom value, as string literal whose characters are unicode
117   * @return atom or null if it doesn't already exist
118   */
119  public static Atom findUnicodeAtom(String str) {
120    return findOrCreate(null, false, str);
121  }
122
123  /**
124   * Find or create an atom.
125   * @param str atom value, as string literal whose characters are from
126   *            ascii subset of unicode (not including null)
127   * @return atom
128   */
129  @Pure
130  public static Atom findOrCreateAsciiAtom(String str) {
131    return findOrCreate(null, true, str);
132  }
133
134  /**
135   * Find an atom.
136   * @param str atom value, as string literal whose characters are from
137   *            ascii subset of unicode (not including null)
138   * @return atom or null if it doesn't already exist
139   */
140  public static Atom findAsciiAtom(String str) {
141    return findOrCreate(null, false, str);
142  }
143
144  /**
145   * Find or create an atom.
146   * @param utf8 atom value, as utf8 encoded bytes
147   * @return atom
148   */
149  @Pure
150  public static Atom findOrCreateUtf8Atom(byte[] utf8) {
151    return findOrCreate(utf8, true, null);
152  }
153
154  /**
155   * Find an atom.
156   * @param utf8 atom value, as utf8 encoded bytes
157   * @return atom or null it it doesn't already exist
158   */
159  public static Atom findUtf8Atom(byte[] utf8) {
160    return findOrCreate(utf8, false, null);
161  }
162
163  /**
164   * Find an atom from the subsequence of another
165   * @param utf8 byte backing of atom
166   * @param off offset of new atom
167   * @param len length of new atom
168   * @param str possible string encoding of atom or null
169   * @return atom
170   */
171  private static Atom findOrCreate(byte[] utf8, int off, int len, String str) {
172    if (str != null) {
173      // string substring is cheap, so try to find using this if possible
174      Atom val = new Atom(null, -1, str.substring(off, off + len));
175      val = dictionary.get(val);
176      if (val != null) return val;
177    }
178    byte[] val = new byte[len];
179    for (int i = 0; i < len; ++i) {
180      val[i] = utf8[off++];
181    }
182    return findOrCreate(val, true, null);
183  }
184
185  /**
186   * This is the findOrCreate() method through which all Atoms are
187   * ultimately created.   The constructor for Atom is a private method, so
188   * someone has to call one of the public findOrCreate() methods to get a new
189   * one.  And they all feed through here.
190   * <p>
191   * Note: either bytes or str will be null but not both at the same time.
192   *
193   * @param bytes content of atom as utf8 bytes
194   * @param create whether an atom should be created if none can be found
195   * @param str string encoding of atom
196   * @return {@code null} if no atom was found and create is false, an atom
197   *  otherwise
198   */
199  private static Atom findOrCreate(byte[] bytes, boolean create, String str) {
200    Atom val = new Atom(bytes, -1, str);
201    val = dictionary.get(val);
202    if (val != null || !create) return val;
203
204    synchronized (Atom.class) {
205      // Check if a matching Atom was created while
206      // the current thread tried to acquire the lock
207      val = new Atom(bytes, -1, str);
208      val = dictionary.get(val);
209      if (val != null) return val;
210
211      val = new Atom(bytes, nextId++, str);
212      int column = val.id >> LOG_ROW_SIZE;
213      if (column == atoms.length) {
214        Atom[][] tmp = new Atom[column + 1][];
215        for (int i = 0; i < column; i++) {
216          tmp[i] = atoms[i];
217        }
218        atoms = tmp;
219        atoms[column] = new Atom[1 << LOG_ROW_SIZE];
220      }
221      atoms[column][val.id & ROW_MASK] = val;
222      dictionary.put(val, val);
223    }
224    return val;
225  }
226
227  /**
228   * @param id the id of an Atom
229   * @return the Atom whose id was given
230   */
231  @Pure
232  @Uninterruptible
233  public static Atom getAtom(int id) {
234    return atoms[id >> LOG_ROW_SIZE][id & ROW_MASK];
235  }
236
237  //-------------//
238  // conversions //
239  //-------------//
240
241  /**
242   * Return printable representation of "this" atom.
243   * Does not correctly handle UTF8 translation.
244   */
245  @Override
246  @Pure
247  public String toString() {
248    return StringUtilities.asciiBytesToString(val);
249  }
250
251  /**
252   * Get at a string-like representation without doing any heap allocation.
253   * Hideous but necessary.  We will use it in the PrintContainer class.
254   *
255   * @return a representation of the atom as bytes
256   */
257  @Uninterruptible
258  public byte[] toByteArray() {
259    return val;
260  }
261
262  /**
263   * @return atom as a string literal
264   * @throws java.io.UTFDataFormatException when conversion of the atom
265   *  to an UTF8 string fails
266   */
267  @Pure
268  public synchronized String toUnicodeString() throws java.io.UTFDataFormatException {
269    if (unicodeStringOrJTOCoffset == null) {
270      String s = UTF8Convert.fromUTF8(val);
271      if (VM.runningVM) {
272        s = InternedStrings.internUnfoundString(s);
273        unicodeStringOrJTOCoffset = s;
274      } else if (!VM.writingImage) {
275        s = s.intern();
276        int offset = Statics.findOrCreateObjectLiteral(s);
277        unicodeStringOrJTOCoffset = offset;
278      }
279      return s;
280    } else if (unicodeStringOrJTOCoffset instanceof String) {
281      return (String)unicodeStringOrJTOCoffset;
282    } else {
283      if (VM.runningVM) {
284        return (String)Statics.getSlotContentsAsObject(Offset.fromIntSignExtend((Integer)unicodeStringOrJTOCoffset));
285      } else {
286        return UTF8Convert.fromUTF8(val).intern();
287      }
288    }
289  }
290
291  /**
292   * @return atom as string literal or {@code null} if atom hasn't been converted
293   */
294  private synchronized String toUnicodeStringInternal() {
295    if (unicodeStringOrJTOCoffset == null) {
296      return null;
297    } else if (unicodeStringOrJTOCoffset instanceof String) {
298      return (String)unicodeStringOrJTOCoffset;
299    } else {
300      if (VM.runningVM) {
301        Object result = Statics.getSlotContentsAsObject(Offset.fromIntSignExtend((Integer)unicodeStringOrJTOCoffset));
302        return (String)result;
303      } else {
304        try {
305          return UTF8Convert.fromUTF8(val).intern();
306        } catch (UTFDataFormatException e) {
307          throw new Error("Error in UTF data encoding: ", e);
308        }
309      }
310    }
311  }
312
313  /**
314   * Offset of an atom's string in the JTOC, for string literals
315   * @return Offset of string literal in JTOC
316   * @throws java.io.UTFDataFormatException when conversion of the atom
317   *  to an UTF8 string fails
318   */
319  public synchronized int getStringLiteralOffset() throws java.io.UTFDataFormatException {
320    if (unicodeStringOrJTOCoffset == null) {
321      String s = UTF8Convert.fromUTF8(val);
322      if (VM.runningVM) {
323        s = InternedStrings.internUnfoundString(s);
324      } else {
325        s = s.intern();
326      }
327      int offset = Statics.findOrCreateObjectLiteral(s);
328      unicodeStringOrJTOCoffset = offset;
329      return offset;
330    } else if (unicodeStringOrJTOCoffset instanceof String) {
331      int offset = Statics.findOrCreateObjectLiteral(unicodeStringOrJTOCoffset);
332      unicodeStringOrJTOCoffset = offset;
333      return offset;
334    } else {
335      return (Integer)unicodeStringOrJTOCoffset;
336    }
337  }
338
339  /**
340   * Return array descriptor corresponding to "this" array-element descriptor.
341   * this: array-element descriptor - something like "I" or "Ljava/lang/Object;"
342   * @return array descriptor - something like "[I" or "[Ljava/lang/Object;"
343   */
344  @Pure
345  Atom arrayDescriptorFromElementDescriptor() {
346    if (VM.VerifyAssertions) {
347      VM._assert(val.length > 0);
348    }
349    byte[] sig = new byte[1 + val.length];
350    sig[0] = (byte) '[';
351    for (int i = 0, n = val.length; i < n; ++i) {
352      sig[i + 1] = val[i];
353    }
354    return findOrCreate(sig, true, null);
355  }
356
357  /**
358   * Return class descriptor corresponding to "this" class name.
359   * this: class name       - something like "java.lang.Object"
360   * @return class descriptor - something like "Ljava/lang/Object;"
361   */
362  @Pure
363  public Atom descriptorFromClassName() {
364    if (VM.VerifyAssertions) {
365      VM._assert(val.length > 0);
366    }
367    if (val[0] == '[') return this;
368    byte[] sig = new byte[1 + val.length + 1];
369    sig[0] = (byte) 'L';
370    for (int i = 0, n = val.length; i < n; ++i) {
371      byte b = val[i];
372      if (b == '.') b = '/';
373      sig[i + 1] = b;
374    }
375    sig[sig.length - 1] = (byte) ';';
376    return findOrCreate(sig, true, null);
377  }
378
379  /**
380   * Return class name corresponding to "this" class descriptor.
381   * this: class descriptor - something like "Ljava/lang/String;"
382   * @return class name - something like "java.lang.String"
383   */
384  @Pure
385  public String classNameFromDescriptor() {
386    if (VM.VerifyAssertions) {
387      VM._assert(val.length > 0);
388      VM._assert(val[0] == 'L' && val[val.length - 1] == ';');
389    }
390    if (unicodeStringOrJTOCoffset == null) {
391      return StringUtilities.asciiBytesToString(val, 1, val.length - 2).replace('/', '.');
392    } else {
393      return toUnicodeStringInternal().substring(1, val.length - 1).replace('/','.');
394    }
395  }
396
397  /**
398   * Return name of class file corresponding to "this" class descriptor.
399   * this: class descriptor - something like "Ljava/lang/String;"
400   * @return class file name  - something like "java/lang/String.class"
401   */
402  @Pure
403  public String classFileNameFromDescriptor() {
404    if (VM.VerifyAssertions) {
405      VM._assert(val.length > 0);
406      VM._assert(val[0] == 'L' && val[val.length - 1] == ';');
407    }
408    if (unicodeStringOrJTOCoffset == null) {
409      return StringUtilities.asciiBytesToString(val, 1, val.length - 2) + ".class";
410    } else {
411      return toUnicodeStringInternal().substring(1, val.length - 1) + ".class";
412    }
413  }
414
415  //----------------//
416  // classification //
417  //----------------//
418
419  /**
420   * Note: Sun has reserved all member names starting with '&lt;' for future use.
421   *       At present, only {@code <init>} and {@code <clinit>} are used.
422   * @return whether "this" atom is a reserved member name
423   */
424  @Uninterruptible
425  @Pure
426  public boolean isReservedMemberName() {
427    if (VM.VerifyAssertions) VM._assert(val.length > 0);
428    return val[0] == '<';
429  }
430
431  /**
432   * @return {@code true} if "this" atom is a class descriptor
433   */
434  @Uninterruptible
435  @Pure
436  public boolean isClassDescriptor() {
437    if (VM.VerifyAssertions) VM._assert(val.length > 0);
438    return val[0] == 'L';
439  }
440
441  /**
442   * @return {@code true} if "this" atom is an array descriptor
443   */
444  @Uninterruptible
445  @Pure
446  public boolean isArrayDescriptor() {
447    if (VM.VerifyAssertions) VM._assert(val.length > 0);
448    return val[0] == '[';
449  }
450
451  /**
452   * @return {@code true} if "this" atom is a method descriptor
453   */
454  @Uninterruptible
455  @Pure
456  public boolean isMethodDescriptor() {
457    if (VM.VerifyAssertions) VM._assert(val.length > 0);
458    return val[0] == '(';
459  }
460
461  //--------------------//
462  // descriptor parsing //
463  //--------------------//
464
465  /**
466   * Parse "this" method descriptor to obtain description of method's
467   * return type.
468   * this: method descriptor - something like "(III)V"
469   * @param cl the classloader
470   * @return type description
471   * @see TypeReference#findOrCreate(ClassLoader, Atom)
472   */
473  @Pure
474  public TypeReference parseForReturnType(ClassLoader cl) {
475    if (VM.VerifyAssertions) {
476      VM._assert(val.length > 0);
477      VM._assert(val[0] == '(', "Method descriptors start with `(`");
478    }
479    int i = 0;
480    while (val[i++] != ')') {
481      if (VM.VerifyAssertions) {
482        VM._assert(i < val.length, "Method descriptor missing closing ')'");
483      }
484    }
485    if (VM.VerifyAssertions) {
486      VM._assert(i < val.length, "Method descriptor missing type after closing ')'");
487    }
488    switch (val[i]) {
489      case VoidTypeCode:
490        return TypeReference.Void;
491      case BooleanTypeCode:
492        return TypeReference.Boolean;
493      case ByteTypeCode:
494        return TypeReference.Byte;
495      case ShortTypeCode:
496        return TypeReference.Short;
497      case IntTypeCode:
498        return TypeReference.Int;
499      case LongTypeCode:
500        return TypeReference.Long;
501      case FloatTypeCode:
502        return TypeReference.Float;
503      case DoubleTypeCode:
504        return TypeReference.Double;
505      case CharTypeCode:
506        return TypeReference.Char;
507      case ClassTypeCode:   // fall through
508      case ArrayTypeCode:
509        return TypeReference.findOrCreate(cl, findOrCreate(val, i, val.length - i, toUnicodeStringInternal()));
510      default:
511        if (VM.VerifyAssertions) {
512          String msg = "Need a valid method descriptor; got \"" + this +
513              "\"; can't parse the character '" + ((char)val[i]) + "'";
514          VM._assert(VM.NOT_REACHED, msg);
515        }
516        return null;            // NOTREACHED
517    }
518  }
519
520
521  /**
522   * Parse "this" method descriptor to obtain descriptions of method's
523   * parameters.
524   * this: method descriptor     - something like "(III)V"
525   * @param cl the classloader
526   * @return parameter descriptions
527   * @see TypeReference#findOrCreate(ClassLoader, Atom)
528   */
529  @Pure
530  public TypeReference[] parseForParameterTypes(ClassLoader cl) {
531    if (VM.VerifyAssertions) {
532      VM._assert(val.length > 0);
533      VM._assert(val[0] == '(', "Method descriptors start with `(`");
534    }
535    TypeReferenceVector sigs = new TypeReferenceVector();
536    int i = 1;
537    while (true) {
538      if (VM.VerifyAssertions) {
539        VM._assert(i < val.length, "Method descriptor missing closing `)`");
540      }
541
542      switch (val[i++]) {
543        case VoidTypeCode:
544          sigs.addElement(TypeReference.Void);
545          continue;
546        case BooleanTypeCode:
547          sigs.addElement(TypeReference.Boolean);
548          continue;
549        case ByteTypeCode:
550          sigs.addElement(TypeReference.Byte);
551          continue;
552        case ShortTypeCode:
553          sigs.addElement(TypeReference.Short);
554          continue;
555        case IntTypeCode:
556          sigs.addElement(TypeReference.Int);
557          continue;
558        case LongTypeCode:
559          sigs.addElement(TypeReference.Long);
560          continue;
561        case FloatTypeCode:
562          sigs.addElement(TypeReference.Float);
563          continue;
564        case DoubleTypeCode:
565          sigs.addElement(TypeReference.Double);
566          continue;
567        case CharTypeCode:
568          sigs.addElement(TypeReference.Char);
569          continue;
570        case ClassTypeCode: {
571          int off = i - 1;
572          while (val[i++] != ';') {
573            if (VM.VerifyAssertions) {
574              VM._assert(i < val.length, "class descriptor missing a final ';'");
575            }
576          }
577          sigs.addElement(TypeReference
578              .findOrCreate(cl, findOrCreate(val, off, i - off, toUnicodeStringInternal())));
579          continue;
580        }
581        case ArrayTypeCode: {
582          int off = i - 1;
583          while (val[i] == ArrayTypeCode) {
584            if (VM.VerifyAssertions) {
585              VM._assert(i < val.length, "malformed array descriptor");
586            }
587            ++i;
588          }
589          if (val[i++] == ClassTypeCode) while (val[i++] != ';') ;
590          sigs.addElement(TypeReference.findOrCreate(cl, findOrCreate(val, off, i - off, toUnicodeStringInternal())));
591          continue;
592        }
593        case(byte) ')': // end of parameter list
594          return sigs.finish();
595
596        default:
597          if (VM.VerifyAssertions) {
598            String msg = "The class descriptor \"" + this + "\" contains the illegal" +
599                " character '" + ((char)val[i]) + "'";
600            VM._assert(VM.NOT_REACHED, msg);
601          }
602      }
603    }
604  }
605
606  /**
607   * Parse "this" method descriptor to obtain descriptions of method's
608   * parameters as classes.
609   * this: method descriptor     - something like "(III)V"
610   * @param cl the classloader
611   * @return parameter classes
612   */
613  @Pure
614  public Class<?>[] parseForParameterClasses(ClassLoader cl) {
615    TypeReference[] typeRefs = this.parseForParameterTypes(cl);
616    Class<?>[] classes = new Class<?>[typeRefs.length];
617    for (int i = 0; i < typeRefs.length; i++) {
618      TypeReference t = typeRefs[i];
619      classes[i] = t.resolve().getClassForType();
620    }
621    return classes;
622  }
623
624  /**
625   * @return the underlying set of bytes for the Atom.  This can be used
626   * to perform comparisons without requiring the allocation of a string.
627   */
628  @Uninterruptible
629  public byte[] getBytes() {
630    return val;
631  }
632
633  /**
634   * Parse "this" field, parameter, or return descriptor to obtain its
635   * type code.
636   * this: descriptor - something like "Ljava/lang/String;" or "[I" or "I"
637   * @return type code  - something like ObjectTypeCode, ArrayTypeCode, or
638   * IntTypeCode
639   *
640   * The type code will be one of the following constants:
641   *
642   * <pre>
643   *               constant         value
644   *           ----------------     -----
645   *            ClassTypeCode        'L'
646   *            ArrayTypeCode        '['
647   *            VoidTypeCode         'V'
648   *            BooleanTypeCode      'Z'
649   *            ByteTypeCode         'B'
650   *            ShortTypeCode        'S'
651   *            IntTypeCode          'I'
652   *            LongTypeCode         'J'
653   *            FloatTypeCode        'F'
654   *            DoubleTypeCode       'D'
655   *            CharTypeCode         'C'
656   * </pre>
657   */
658  @Pure
659  public byte parseForTypeCode() throws IllegalArgumentException {
660    if (VM.VerifyAssertions) {
661      VM._assert(val.length > 0);
662    }
663    return val[0];
664  }
665
666  /**
667   * Parse "this" array descriptor to obtain number of dimensions in
668   * corresponding array type.
669   * this: descriptor     - something like "[Ljava/lang/String;" or "[[I"
670   * @return dimensionality - something like "1" or "2"
671   */
672  @Pure
673  public int parseForArrayDimensionality() {
674    if (VM.VerifyAssertions) {
675      VM._assert(val.length > 1, "An array descriptor has at least two characters");
676      VM._assert(val[0] == '[', "An array descriptor must start with '['");
677    }
678    for (int i = 0; ; ++i) {
679      if (VM.VerifyAssertions) {
680        VM._assert(i < val.length, "Malformed array descriptor: it can't just have [ characters");
681      }
682      if (val[i] != '[') {
683        return i;
684      }
685    }
686  }
687
688  /**
689   * Parse "this" array descriptor to obtain type code for its element type.
690   * this: descriptor - something like "[Ljava/lang/String;" or "[I"
691   * @return type code  - something like VM.ObjectTypeCode or VM.IntTypeCode
692   * The type code will be one of the constants appearing in the table above.
693   *
694   * Implementation note: This is supposed to be uninterruptible, since another
695   * allegedly uninterruptible method (RVMArray.getLogElementSize()) calls it.
696   */
697  @Uninterruptible
698  @Pure
699  public byte parseForArrayElementTypeCode() {
700    if (VM.VerifyAssertions) {
701      VM._assert(val.length > 1, "An array descriptor has at least two characters");
702      VM._assert(val[0] == '[', "An array descriptor must start with '['");
703    }
704    return val[1];
705  }
706
707  /**
708   * @return the innermost element type reference for an array
709   */
710  @Pure
711  public Atom parseForInnermostArrayElementDescriptor() {
712    if (VM.VerifyAssertions) {
713      VM._assert(val.length > 1, "An array descriptor has at least two characters");
714      VM._assert(val[0] == '[', "An array descriptor must start with '['");
715    }
716    int i = 0;
717    while (val[i] == '[') {
718      if (VM.VerifyAssertions) {
719        VM._assert(i < val.length, "Malformed array descriptor: it can't just have [ characters");
720      }
721      i++;
722    }
723    return findOrCreate(val, i, val.length - i, toUnicodeStringInternal());
724  }
725
726  /**
727   * Parse "this" array descriptor to obtain descriptor for array's element
728   * type.
729   * this: array descriptor         - something like "[I"
730   * @return array element descriptor - something like "I"
731   */
732  @Pure
733  public Atom parseForArrayElementDescriptor() {
734    if (VM.VerifyAssertions) {
735      VM._assert(val.length > 1, "An array descriptor has at least two characters");
736      VM._assert(val[0] == '[', "An array descriptor must start with '['");
737    }
738    return findOrCreate(val, 1, val.length - 1, toUnicodeStringInternal());
739  }
740
741  /**
742   * The set of class prefixes that MUST be loaded by bootstrap classloader.
743   * @see #isBootstrapClassDescriptor()
744   */
745  private static final byte[][] BOOTSTRAP_CLASS_PREFIX_SET =
746      {"Ljava/".getBytes(),
747       "Lorg/jikesrvm/".getBytes(),
748       "Lgnu/java/".getBytes(),
749       "Lgnu/classpath/debug/".getBytes(),
750       "Lgnu/classpath/jdwp/".getBytes(),
751       "Lgnu/classpath/NotImplementedException".getBytes(),
752       "Lgnu/classpath/Pair".getBytes(),
753       "Lgnu/classpath/Pointer".getBytes(),
754       "Lgnu/classpath/Pointer32".getBytes(),
755       "Lgnu/classpath/Pointer64".getBytes(),
756       "Lgnu/classpath/ServiceFactory".getBytes(),
757       "Lgnu/classpath/ServiceProviderLoadingAction".getBytes(),
758       "Lgnu/classpath/SystemProperties".getBytes(),
759       "Lorg/vmmagic/".getBytes(),
760       "Lorg/mmtk/".getBytes()};
761
762  /**
763   * The set of class prefixes that MUST NOT be loaded by bootstrap classloader.
764   * @see #isBootstrapClassDescriptor()
765   */
766  private static final byte[][] NON_BOOTSTRAP_CLASS_PREFIX_SET =
767      {"Lorg/jikesrvm/tools/ant/".getBytes(),
768       "Lorg/jikesrvm/tools/apt/".getBytes(),
769       "Lorg/jikesrvm/tools/template/".getBytes()};
770
771  /**
772   * The set of class prefixes for core RVM classes.
773   * @see #isRVMDescriptor()
774   */
775  private static final byte[][] RVM_CLASS_PREFIXES =
776      {"Lorg/jikesrvm/".getBytes(), "Lorg/vmmagic/".getBytes(), "Lorg/mmtk/".getBytes()};
777
778  /**
779   * @return true if this is a class descriptor of a bootstrap class
780   * (ie a class that must be loaded by the bootstrap class loader)
781   */
782  @Pure
783  public boolean isBootstrapClassDescriptor() {
784    non_bootstrap_outer:
785    for (final byte[] test : NON_BOOTSTRAP_CLASS_PREFIX_SET) {
786      if (test.length > val.length) continue;
787      for (int j = 0; j < test.length; j++) {
788        if (val[j] != test[j]) {
789          continue non_bootstrap_outer;
790        }
791      }
792      return false;
793    }
794    bootstrap_outer:
795    for (final byte[] test : BOOTSTRAP_CLASS_PREFIX_SET) {
796      if (test.length > val.length) continue;
797      for (int j = 0; j < test.length; j++) {
798        if (val[j] != test[j]) {
799          continue bootstrap_outer;
800        }
801      }
802      return true;
803    }
804    return false;
805  }
806
807  /**
808   * @return {@code true} if this is a class descriptor of a RVM core class.
809   * This is  defined as one that it would be unwise to invalidate, since invalidating
810   * it might make it impossible to recompile.
811   */
812  @Pure
813  public boolean isRVMDescriptor() {
814    outer:
815    for (final byte[] test : RVM_CLASS_PREFIXES) {
816      if (test.length > val.length) continue;
817      for (int j = 0; j < test.length; j++) {
818        if (val[j] != test[j]) {
819          continue outer;
820        }
821      }
822      return true;
823    }
824    return false;
825  }
826
827  //-------------//
828  // annotations //
829  //-------------//
830
831  /**
832   * Creates an annotation name from a class name. For example
833   * Lfoo.bar; becomes Lfoo.bar$$; NB in Sun VMs the annotation name
834   * of the first annotation is $Proxy1. Classpath may later rely on
835   * this to implement serialization correctly.
836   *
837   * @return atom for the annotation name
838   */
839  @Pure
840  public Atom annotationInterfaceToAnnotationClass() {
841    byte[] annotationClassName_tmp = new byte[val.length + 2];
842    System.arraycopy(val, 0, annotationClassName_tmp, 0, val.length - 1);
843    annotationClassName_tmp[val.length - 1] = '$';
844    annotationClassName_tmp[val.length] = '$';
845    annotationClassName_tmp[val.length + 1] = ';';
846    return Atom.findOrCreateUtf8Atom(annotationClassName_tmp);
847  }
848
849  /**
850   * Creates a class name from a type name. For example Lfoo.bar$$;
851   * becomes the string foo.bar
852   *
853   * @return created ASCII string
854   */
855  @Pure
856  public String annotationClassToAnnotationInterface() {
857    if (VM.VerifyAssertions) {
858      boolean isClassAnnotation = val[0] == 'L' && val[val.length - 1] == ';';
859      VM._assert(val.length > 0);
860      if (!isClassAnnotation) {
861        String msg = toString();
862        VM._assert(isClassAnnotation, msg);
863      }
864    }
865    return StringUtilities.asciiBytesToString(val, 1, val.length - 4).replace('/', '.');
866  }
867
868  /**
869   * @return whether "this" is an annotation class name of the form Lfoo.bar$$;
870   */
871  @Pure
872  public boolean isAnnotationClass() {
873    return (val.length > 4) && (val[val.length - 3] == '$') && (val[val.length - 2] == '$');
874  }
875
876  //-----------//
877  // debugging //
878  //-----------//
879
880  @Uninterruptible
881  public void sysWrite() {
882    for (int i = 0, n = val.length; i < n; ++i) {
883      VM.sysWrite((char) val[i]);
884    }
885  }
886
887  @Uninterruptible
888  public int length() {
889    return val.length;
890  }
891
892  private Atom(byte[] val, int id, String str) {
893    this.id = id;
894    this.unicodeStringOrJTOCoffset = str;
895    if ((val == null) && (id != -1)) {
896      this.val = UTF8Convert.toUTF8(str);
897    } else {
898      this.val = val;
899    }
900  }
901
902  /*
903   * Hash table utilities
904   */
905  /**
906   * Return the hashCode of an atom, this equals the unicode string encoding of
907   * the atom
908   */
909  @Override
910  public int hashCode() {
911    try {
912      if (unicodeStringOrJTOCoffset != null) {
913        return toUnicodeStringInternal().hashCode();
914      } else {
915        return UTF8Convert.computeStringHashCode(val);
916      }
917    } catch (UTFDataFormatException e) {
918      return 0;
919    }
920  }
921
922  /**
923   * Outside of this class atoms are canonical and should be compared using ==.
924   * This method is used to maintain atoms in internal hash tables and shouldn't
925   * be used externally.
926   */
927  @Override
928  @Pure
929  public boolean equals(Object other) {
930    // quick test as atoms are generally canonical
931    if (this == other) {
932      return true;
933    } else {
934      if (other instanceof Atom) {
935        Atom that = (Atom)other;
936        // if the atoms are well formed then their identifiers are unique
937        if ((that.id != -1) && (this.id != -1)) {
938          return that.id == this.id;
939        }
940        // one atom isn't well formed, can we do a string comparison to work out equality?
941        if ((this.unicodeStringOrJTOCoffset != null) && (that.unicodeStringOrJTOCoffset != null)) {
942          return toUnicodeStringInternal().equals(that.toUnicodeStringInternal());
943        }
944        try {
945          // perform byte by byte comparison
946          byte[] val1;
947          if (that.val != null) {
948            val1 = that.val;
949          } else {
950            val1 = UTF8Convert.toUTF8(that.toUnicodeString());
951          }
952          byte[] val2;
953          if (this.val != null) {
954            val2 = this.val;
955          } else {
956            val2 = UTF8Convert.toUTF8(toUnicodeString());
957          }
958          if (val1.length == val2.length) {
959            for (int i = 0; i < val1.length; i++) {
960              if (val1[i] != val2[i]) return false;
961            }
962            return true;
963          }
964        } catch (UTFDataFormatException e) {
965          throw new Error("Error in UTF data encoding: ",e);
966        }
967      }
968      return false;
969    }
970  }
971
972
973  /**
974   * Inner class responsible for string interning. This class' initializer is
975   * run during booting.
976   */
977  private static class InternedStrings {
978    /**
979     * Look up for interned strings.
980     */
981    private static final WeakHashMap<String,WeakReference<String>> internedStrings =
982      new WeakHashMap<String,WeakReference<String>>();
983
984    /**
985     * Find an interned string but don't create it if not found
986     * @param str string to lookup
987     * @return the interned string or null if it isn't interned
988     */
989    static synchronized String findInternedString(String str) {
990      WeakReference<String> ref;
991      ref = internedStrings.get(str);
992      if (ref != null) {
993        String s = ref.get();
994        if (s != null) {
995          return s;
996        }
997      }
998      return null;
999    }
1000
1001    /**
1002     * Find a string literal from an atom
1003     * @param str string to find
1004     * @return the string literal or null
1005     */
1006    static String findAtomString(String str) {
1007      Atom atom = findUnicodeAtom(str);
1008      if (atom != null) {
1009        try {
1010          return atom.toUnicodeString();
1011        } catch (UTFDataFormatException e) {
1012          throw new Error("Error in UTF data encoding: ", e);
1013        }
1014      }
1015      return null;
1016    }
1017
1018    /**
1019     * Intern a string that is not an atom or already interned string
1020     * @param str string to intern
1021     * @return interned string
1022     */
1023    static synchronized String internUnfoundString(String str) {
1024      // double check string isn't found as we're holding the lock on the class
1025      String s = findInternedString(str);
1026      if (s != null) return s;
1027      // If we get to here, then there is no interned version of the String.
1028      // So we make one.
1029      WeakReference<String> ref = new WeakReference<String>(str);
1030      internedStrings.put(str, ref);
1031      return str;
1032    }
1033  }
1034
1035  /**
1036   * External string intern method called from String.intern. This method should
1037   * return a canonical string encoding for the given string and this string
1038   * should also be canonical with string literals.
1039   * @param str string to intern
1040   * @return interned version of string
1041   */
1042  public static String internString(String str) {
1043    // Has the string already been interned
1044    String s = InternedStrings.findInternedString(str);
1045    if (s != null) return s;
1046
1047    // Check to see if this is a StringLiteral:
1048    s = InternedStrings.findAtomString(str);
1049    if (s != null) return s;
1050
1051    // Intern this string
1052    return InternedStrings.internUnfoundString(str);
1053  }
1054}