001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.classloader;
014    
015    import static org.jikesrvm.classloader.ClassLoaderConstants.ArrayTypeCode;
016    import static org.jikesrvm.classloader.ClassLoaderConstants.BooleanTypeCode;
017    import static org.jikesrvm.classloader.ClassLoaderConstants.ByteTypeCode;
018    import static org.jikesrvm.classloader.ClassLoaderConstants.CharTypeCode;
019    import static org.jikesrvm.classloader.ClassLoaderConstants.ClassTypeCode;
020    import static org.jikesrvm.classloader.ClassLoaderConstants.DoubleTypeCode;
021    import static org.jikesrvm.classloader.ClassLoaderConstants.FloatTypeCode;
022    import static org.jikesrvm.classloader.ClassLoaderConstants.IntTypeCode;
023    import static org.jikesrvm.classloader.ClassLoaderConstants.LongTypeCode;
024    import static org.jikesrvm.classloader.ClassLoaderConstants.ShortTypeCode;
025    import static org.jikesrvm.classloader.ClassLoaderConstants.VoidTypeCode;
026    
027    import java.io.UTFDataFormatException;
028    import java.lang.ref.WeakReference;
029    import java.util.WeakHashMap;
030    
031    import org.jikesrvm.VM;
032    import org.jikesrvm.runtime.Statics;
033    import org.jikesrvm.util.ImmutableEntryHashMapRVM;
034    import org.jikesrvm.util.StringUtilities;
035    import org.vmmagic.pragma.Pure;
036    import org.vmmagic.pragma.Uninterruptible;
037    import org.vmmagic.unboxed.Offset;
038    
039    /**
040     * An  utf8-encoded byte string.
041     *
042     * Atom's are interned (canonicalized)
043     * so they may be compared for equality using the "==" operator.
044     *
045     * Atoms are used to represent names, descriptors, and string literals
046     * appearing in a class's constant pool.
047     *
048     * There is almost always a zero-length Atom, since any class which
049     * contains statements like:
050     *          return "";
051     * will have one in its constant pool.
052     */
053    public final class Atom {
054    
055      /**
056       * Used to canonicalize Atoms: possibly non-canonical Atom => Atom
057       */
058      private static final ImmutableEntryHashMapRVM<Atom, Atom> dictionary =
059        new ImmutableEntryHashMapRVM<Atom, Atom>(12000);
060    
061      /**
062       * 2^LOG_ROW_SIZE is the number of elements per row
063       */
064      private static final int LOG_ROW_SIZE = 10;
065      /**
066       * Mask to ascertain row from id number
067       */
068      private static final int ROW_MASK = (1 << LOG_ROW_SIZE)-1;
069      /**
070       * Dictionary of all Atom instances.
071       */
072      private static Atom[][] atoms = new Atom[36][1 << LOG_ROW_SIZE];
073    
074      /**
075       * Used to assign ids. Don't use id 0 to allow clients to use id 0 as a 'null'.
076       */
077      private static int nextId = 1;
078    
079      /**
080       * A reference to either a unicode String encoding the atom, an offset in the
081       * JTOC holding a unicode string encoding the atom or null.
082       */
083      private Object unicodeStringOrJTOCoffset;
084    
085      /**
086       * The utf8 value this atom represents
087       */
088      private final byte[] val;
089    
090      /**
091       * The id of this atom
092       */
093      private final int id;
094    
095      /**
096       *@return the id of this atom.
097       */
098      int getId() { return id; }
099    
100      /**
101       * Find or create an atom.
102       * @param str atom value, as string literal whose characters are unicode
103       * @return atom
104       */
105      @Pure
106      public static Atom findOrCreateUnicodeAtom(String str) {
107        return findOrCreate(null, true, str);
108      }
109    
110      /**
111       * Find an atom.
112       * @param str atom value, as string literal whose characters are unicode
113       * @return atom or null if it doesn't already exist
114       */
115      public static Atom findUnicodeAtom(String str) {
116        return findOrCreate(null, false, str);
117      }
118    
119      /**
120       * Find or create an atom.
121       * @param str atom value, as string literal whose characters are from
122       *            ascii subset of unicode (not including null)
123       * @return atom
124       */
125      @Pure
126      public static Atom findOrCreateAsciiAtom(String str) {
127        return findOrCreate(null, true, str);
128      }
129    
130      /**
131       * Find an atom.
132       * @param str atom value, as string literal whose characters are from
133       *            ascii subset of unicode (not including null)
134       * @return atom or null if it doesn't already exist
135       */
136      public static Atom findAsciiAtom(String str) {
137        return findOrCreate(null, false, str);
138      }
139    
140      /**
141       * Find or create an atom.
142       * @param utf8 atom value, as utf8 encoded bytes
143       * @return atom
144       */
145      @Pure
146      public static Atom findOrCreateUtf8Atom(byte[] utf8) {
147        return findOrCreate(utf8, true, null);
148      }
149    
150      /**
151       * Find an atom.
152       * @param utf8 atom value, as utf8 encoded bytes
153       * @return atom or null it it doesn't already exist
154       */
155      public static Atom findUtf8Atom(byte[] utf8) {
156        return findOrCreate(utf8, false, null);
157      }
158    
159      /**
160       * Find an atom from the subsequence of another
161       * @param utf8 byte backing of atom
162       * @param off offset of new atom
163       * @param len length of new atom
164       * @param str possible string encoding of atom or null
165       * @return atom
166       */
167      private static Atom findOrCreate(byte[] utf8, int off, int len, String str) {
168        if (str != null) {
169          // string substring is cheap, so try to find using this if possible
170          Atom val = new Atom(null, -1, str.substring(off, off+len));
171          val = dictionary.get(val);
172          if (val != null) return val;
173        }
174        byte[] val = new byte[len];
175        for (int i = 0; i < len; ++i) {
176          val[i] = utf8[off++];
177        }
178        return findOrCreate(val, true, null);
179      }
180    
181      /**
182       * This is the findOrCreate() method through which all Atoms are
183       * ultimately created.   The constructor for Atom is a private method, so
184       * someone has to call one of the public findOrCreate() methods to get a new
185       * one.  And they all feed through here.
186       */
187      private static Atom findOrCreate(byte[] bytes, boolean create, String str) {
188        Atom val = new Atom(bytes, -1, str);
189        val = dictionary.get(val);
190        if (val != null || !create) return val;
191        synchronized(Atom.class) {
192          val = new Atom(bytes, nextId++, str);
193          int column = val.id >> LOG_ROW_SIZE;
194          if (column == atoms.length) {
195            Atom[][] tmp = new Atom[column+1][];
196            for (int i=0; i < column; i++) {
197              tmp[i] = atoms[i];
198            }
199            atoms = tmp;
200            atoms[column] = new Atom[1 << LOG_ROW_SIZE];
201          }
202          atoms[column][val.id & ROW_MASK] = val;
203          dictionary.put(val, val);
204        }
205        return val;
206      }
207    
208      /**
209       * @param id the id of an Atom
210       * @return the Atom whose id was given
211       */
212      @Pure
213      @Uninterruptible
214      public static Atom getAtom(int id) {
215        return atoms[id >> LOG_ROW_SIZE][id & ROW_MASK];
216      }
217    
218      //-------------//
219      // conversions //
220      //-------------//
221    
222      /**
223       * Return printable representation of "this" atom.
224       * Does not correctly handle UTF8 translation.
225       */
226      @Pure
227      public String toString() {
228        return StringUtilities.asciiBytesToString(val);
229      }
230    
231      /**
232       * Get at a string-like representation without doing any heap allocation.
233       * Hideous but necessary.  We will use it in the PrintContainer class.
234       */
235      @Uninterruptible
236      public byte[] toByteArray() {
237        return val;
238      }
239    
240      /**
241       * Return atom as a string literal
242       */
243      @Pure
244      public synchronized String toUnicodeString() throws java.io.UTFDataFormatException {
245        if (unicodeStringOrJTOCoffset == null) {
246          String s = UTF8Convert.fromUTF8(val);
247          if (VM.runningVM) {
248            s = InternedStrings.internUnfoundString(s);
249            unicodeStringOrJTOCoffset = s;
250          } else if (!VM.writingImage) {
251            s = s.intern();
252            int offset = Statics.findOrCreateObjectLiteral(s);
253            unicodeStringOrJTOCoffset = offset;
254          }
255          return s;
256        } else if (unicodeStringOrJTOCoffset instanceof String) {
257          return (String)unicodeStringOrJTOCoffset;
258        } else {
259          if (VM.runningVM) {
260            return (String)Statics.getSlotContentsAsObject(Offset.fromIntSignExtend((Integer)unicodeStringOrJTOCoffset));
261          } else {
262            return UTF8Convert.fromUTF8(val).intern();
263          }
264        }
265      }
266    
267      /**
268       * Atom as string literal or null if atom hasn't been converted
269       */
270      private synchronized String toUnicodeStringInternal() {
271        if (unicodeStringOrJTOCoffset == null) {
272          return null;
273        } else if (unicodeStringOrJTOCoffset instanceof String) {
274          return (String)unicodeStringOrJTOCoffset;
275        } else {
276          if (VM.runningVM) {
277            Object result = Statics.getSlotContentsAsObject(Offset.fromIntSignExtend((Integer)unicodeStringOrJTOCoffset));
278            return (String)result;
279          } else {
280            try {
281              return UTF8Convert.fromUTF8(val).intern();
282            } catch (UTFDataFormatException e) {
283              throw new Error("Error in UTF data encoding: ", e);
284            }
285          }
286        }
287      }
288    
289      /**
290       * Offset of an atom's string in the JTOC, for string literals
291       * @return Offset of string literal in JTOC
292       * @throws java.io.UTFDataFormatException
293       */
294      public synchronized int getStringLiteralOffset() throws java.io.UTFDataFormatException {
295        if (unicodeStringOrJTOCoffset == null) {
296          String s = UTF8Convert.fromUTF8(val);
297          if (VM.runningVM) {
298            s = InternedStrings.internUnfoundString(s);
299          } else {
300            s = s.intern();
301          }
302          int offset = Statics.findOrCreateObjectLiteral(s);
303          unicodeStringOrJTOCoffset = offset;
304          return offset;
305        } else if (unicodeStringOrJTOCoffset instanceof String) {
306          int offset = Statics.findOrCreateObjectLiteral(unicodeStringOrJTOCoffset);
307          unicodeStringOrJTOCoffset = offset;
308          return offset;
309        } else {
310          return (Integer)unicodeStringOrJTOCoffset;
311        }
312      }
313    
314      /**
315       * Return array descriptor corresponding to "this" array-element descriptor.
316       * this: array-element descriptor - something like "I" or "Ljava/lang/Object;"
317       * @return array descriptor - something like "[I" or "[Ljava/lang/Object;"
318       */
319      @Pure
320      Atom arrayDescriptorFromElementDescriptor() {
321        if (VM.VerifyAssertions) {
322          VM._assert(val.length > 0);
323        }
324        byte[] sig = new byte[1 + val.length];
325        sig[0] = (byte) '[';
326        for (int i = 0, n = val.length; i < n; ++i) {
327          sig[i + 1] = val[i];
328        }
329        return findOrCreate(sig, true, null);
330      }
331    
332      /**
333       * Return class descriptor corresponding to "this" class name.
334       * this: class name       - something like "java.lang.Object"
335       * @return class descriptor - something like "Ljava/lang/Object;"
336       */
337      @Pure
338      public Atom descriptorFromClassName() {
339        if (VM.VerifyAssertions) {
340          VM._assert(val.length > 0);
341        }
342        if (val[0] == '[') return this;
343        byte[] sig = new byte[1 + val.length + 1];
344        sig[0] = (byte) 'L';
345        for (int i = 0, n = val.length; i < n; ++i) {
346          byte b = val[i];
347          if (b == '.') b = '/';
348          sig[i + 1] = b;
349        }
350        sig[sig.length - 1] = (byte) ';';
351        return findOrCreate(sig, true, null);
352      }
353    
354      /**
355       * Return class name corresponding to "this" class descriptor.
356       * this: class descriptor - something like "Ljava/lang/String;"
357       * @return class name - something like "java.lang.String"
358       */
359      @Pure
360      public String classNameFromDescriptor() {
361        if (VM.VerifyAssertions) {
362          VM._assert(val.length > 0);
363          VM._assert(val[0] == 'L' && val[val.length - 1] == ';');
364        }
365        if (unicodeStringOrJTOCoffset == null) {
366          return StringUtilities.asciiBytesToString(val, 1, val.length - 2).replace('/', '.');
367        } else {
368          return toUnicodeStringInternal().substring(1, val.length-1).replace('/','.');
369        }
370      }
371    
372      /**
373       * Return name of class file corresponding to "this" class descriptor.
374       * this: class descriptor - something like "Ljava/lang/String;"
375       * @return class file name  - something like "java/lang/String.class"
376       */
377      @Pure
378      public String classFileNameFromDescriptor() {
379        if (VM.VerifyAssertions) {
380          VM._assert(val.length > 0);
381          VM._assert(val[0] == 'L' && val[val.length - 1] == ';');
382        }
383        if (unicodeStringOrJTOCoffset == null) {
384          return StringUtilities.asciiBytesToString(val, 1, val.length - 2) + ".class";
385        } else {
386          return toUnicodeStringInternal().substring(1, val.length-1) + ".class";
387        }
388      }
389    
390      //----------------//
391      // classification //
392      //----------------//
393    
394      /**
395       * Is "this" atom a reserved member name?
396       * Note: Sun has reserved all member names starting with '<' for future use.
397       *       At present, only <init> and <clinit> are used.
398       */
399      @Uninterruptible
400      @Pure
401      public boolean isReservedMemberName() {
402        if (VM.VerifyAssertions) VM._assert(val.length > 0);
403        return val[0] == '<';
404      }
405    
406      /**
407       * Is "this" atom a class descriptor?
408       */
409      @Uninterruptible
410      @Pure
411      public boolean isClassDescriptor() {
412        if (VM.VerifyAssertions) VM._assert(val.length > 0);
413        return val[0] == 'L';
414      }
415    
416      /**
417       * Is "this" atom an array descriptor?
418       */
419      @Uninterruptible
420      @Pure
421      public boolean isArrayDescriptor() {
422        if (VM.VerifyAssertions) VM._assert(val.length > 0);
423        return val[0] == '[';
424      }
425    
426      /**
427       * Is "this" atom a method descriptor?
428       */
429      @Uninterruptible
430      @Pure
431      public boolean isMethodDescriptor() {
432        if (VM.VerifyAssertions) VM._assert(val.length > 0);
433        return val[0] == '(';
434      }
435    
436      //--------------------//
437      // descriptor parsing //
438      //--------------------//
439    
440      /**
441       * Parse "this" method descriptor to obtain description of method's
442       * return type.
443       * this: method descriptor - something like "(III)V"
444       * @return type description
445       */
446      @Pure
447      public TypeReference parseForReturnType(ClassLoader cl) {
448        if (VM.VerifyAssertions) {
449          VM._assert(val.length > 0);
450          VM._assert(val[0] == '(', "Method descriptors start with `(`");
451        }
452        int i = 0;
453        while (val[i++] != ')') {
454          if (VM.VerifyAssertions) {
455            VM._assert(i < val.length, "Method descriptor missing closing ')'");
456          }
457        }
458        if (VM.VerifyAssertions) {
459          VM._assert(i < val.length, "Method descriptor missing type after closing ')'");
460        }
461        switch (val[i]) {
462          case VoidTypeCode:
463            return TypeReference.Void;
464          case BooleanTypeCode:
465            return TypeReference.Boolean;
466          case ByteTypeCode:
467            return TypeReference.Byte;
468          case ShortTypeCode:
469            return TypeReference.Short;
470          case IntTypeCode:
471            return TypeReference.Int;
472          case LongTypeCode:
473            return TypeReference.Long;
474          case FloatTypeCode:
475            return TypeReference.Float;
476          case DoubleTypeCode:
477            return TypeReference.Double;
478          case CharTypeCode:
479            return TypeReference.Char;
480          case ClassTypeCode:   // fall through
481          case ArrayTypeCode:
482            return TypeReference.findOrCreate(cl, findOrCreate(val, i, val.length - i, toUnicodeStringInternal()));
483          default:
484            if (VM.VerifyAssertions) {
485              VM._assert(false,
486                         "Need a valid method descriptor; got \"" +
487                         this +
488                         "\"; can't parse the character '" +
489                         ((char)val[i]) +
490                         "'");
491            }
492            return null;            // NOTREACHED
493        }
494      }
495    
496      /**
497       * Parse "this" method descriptor to obtain descriptions of method's
498       * parameters.
499       * this: method descriptor     - something like "(III)V"
500       * @return parameter descriptions
501       */
502      @Pure
503      public TypeReference[] parseForParameterTypes(ClassLoader cl) {
504        if (VM.VerifyAssertions) {
505          VM._assert(val.length > 0);
506          VM._assert(val[0] == '(', "Method descriptors start with `(`");
507        }
508        TypeReferenceVector sigs = new TypeReferenceVector();
509        int i = 1;
510        while (true) {
511          if (VM.VerifyAssertions) {
512            VM._assert(i < val.length, "Method descriptor missing closing `)`");
513          }
514    
515          switch (val[i++]) {
516            case VoidTypeCode:
517              sigs.addElement(TypeReference.Void);
518              continue;
519            case BooleanTypeCode:
520              sigs.addElement(TypeReference.Boolean);
521              continue;
522            case ByteTypeCode:
523              sigs.addElement(TypeReference.Byte);
524              continue;
525            case ShortTypeCode:
526              sigs.addElement(TypeReference.Short);
527              continue;
528            case IntTypeCode:
529              sigs.addElement(TypeReference.Int);
530              continue;
531            case LongTypeCode:
532              sigs.addElement(TypeReference.Long);
533              continue;
534            case FloatTypeCode:
535              sigs.addElement(TypeReference.Float);
536              continue;
537            case DoubleTypeCode:
538              sigs.addElement(TypeReference.Double);
539              continue;
540            case CharTypeCode:
541              sigs.addElement(TypeReference.Char);
542              continue;
543            case ClassTypeCode: {
544              int off = i - 1;
545              while (val[i++] != ';') {
546                if (VM.VerifyAssertions) {
547                  VM._assert(i < val.length, "class descriptor missing a final ';'");
548                }
549              }
550              sigs.addElement(TypeReference
551                  .findOrCreate(cl, findOrCreate(val, off, i - off, toUnicodeStringInternal())));
552              continue;
553            }
554            case ArrayTypeCode: {
555              int off = i - 1;
556              while (val[i] == ArrayTypeCode) {
557                if (VM.VerifyAssertions) {
558                  VM._assert(i < val.length, "malformed array descriptor");
559                }
560                ++i;
561              }
562              if (val[i++] == ClassTypeCode) while (val[i++] != ';') ;
563              sigs.addElement(TypeReference.findOrCreate(cl, findOrCreate(val, off, i - off, toUnicodeStringInternal())));
564              continue;
565            }
566            case(byte) ')': // end of parameter list
567              return sigs.finish();
568    
569            default:
570              if (VM.VerifyAssertions) {
571                VM._assert(false,
572                           "The class descriptor \"" +
573                           this +
574                           "\" contains the illegal" +
575                           " character '" +
576                           ((char)val[i]) +
577                           "'");
578              }
579          }
580        }
581      }
582    
583      /**
584       * Parse "this" method descriptor to obtain descriptions of method's
585       * parameters as classes.
586       * this: method descriptor     - something like "(III)V"
587       * @return parameter classes
588       */
589      @Pure
590      public Class<?>[] parseForParameterClasses(ClassLoader cl) {
591        TypeReference[] typeRefs = this.parseForParameterTypes(cl);
592        Class<?>[] classes = new Class<?>[typeRefs.length];
593        for (int i=0; i < typeRefs.length; i++) {
594          TypeReference t = typeRefs[i];
595          classes[i] = t.resolve().getClassForType();
596        }
597        return classes;
598      }
599    
600      /**
601       * Return the underlying set of bytes for the Atom.  This can be used
602       * to perform comparisons without requiring the allocation of a string.
603       */
604      @Uninterruptible
605      public byte[] getBytes() {
606        return val;
607      }
608    
609      /**
610       * Parse "this" field, parameter, or return descriptor to obtain its
611       * type code.
612       * this: descriptor - something like "Ljava/lang/String;" or "[I" or "I"
613       * @return type code  - something like ObjectTypeCode, ArrayTypeCode, or
614       * IntTypeCode
615       *
616       * The type code will be one of the following constants:
617       *
618       * <pre>
619       *               constant         value
620       *           ----------------     -----
621       *            ClassTypeCode        'L'
622       *            ArrayTypeCode        '['
623       *            VoidTypeCode         'V'
624       *            BooleanTypeCode      'Z'
625       *            ByteTypeCode         'B'
626       *            ShortTypeCode        'S'
627       *            IntTypeCode          'I'
628       *            LongTypeCode         'J'
629       *            FloatTypeCode        'F'
630       *            DoubleTypeCode       'D'
631       *            CharTypeCode         'C'
632       * </pre>
633       */
634      @Pure
635      public byte parseForTypeCode() throws IllegalArgumentException {
636        if (VM.VerifyAssertions) {
637          VM._assert(val.length > 0);
638        }
639        return val[0];
640      }
641    
642      /**
643       * Parse "this" array descriptor to obtain number of dimensions in
644       * corresponding array type.
645       * this: descriptor     - something like "[Ljava/lang/String;" or "[[I"
646       * @return dimensionality - something like "1" or "2"
647       */
648      @Pure
649      public int parseForArrayDimensionality() {
650        if (VM.VerifyAssertions) {
651          VM._assert(val.length > 1, "An array descriptor has at least two characters");
652          VM._assert(val[0] == '[', "An array descriptor must start with '['");
653        }
654        for (int i = 0; ; ++i) {
655          if (VM.VerifyAssertions) {
656            VM._assert(i < val.length, "Malformed array descriptor: it can't just have [ characters");
657          }
658          if (val[i] != '[') {
659            return i;
660          }
661        }
662      }
663    
664      /**
665       * Parse "this" array descriptor to obtain type code for its element type.
666       * this: descriptor - something like "[Ljava/lang/String;" or "[I"
667       * @return type code  - something like VM.ObjectTypeCode or VM.IntTypeCode
668       * The type code will be one of the constants appearing in the table above.
669       *
670       * Implementation note: This is supposed to be uninterruptible, since another
671       * allegedly uninterruptible method (RVMArray.getLogElementSize()) calls it.
672       */
673      @Uninterruptible
674      @Pure
675      public byte parseForArrayElementTypeCode() {
676        if (VM.VerifyAssertions) {
677          VM._assert(val.length > 1, "An array descriptor has at least two characters");
678          VM._assert(val[0] == '[', "An array descriptor must start with '['");
679        }
680        return val[1];
681      }
682    
683      /**
684       * Return the innermost element type reference for an array
685       */
686      @Pure
687      public Atom parseForInnermostArrayElementDescriptor() {
688        if (VM.VerifyAssertions) {
689          VM._assert(val.length > 1, "An array descriptor has at least two characters");
690          VM._assert(val[0] == '[', "An array descriptor must start with '['");
691        }
692        int i = 0;
693        while (val[i] == '[') {
694          if (VM.VerifyAssertions) {
695            VM._assert(i < val.length, "Malformed array descriptor: it can't just have [ characters");
696          }
697          i++;
698        }
699        return findOrCreate(val, i, val.length - i, toUnicodeStringInternal());
700      }
701    
702      /**
703       * Parse "this" array descriptor to obtain descriptor for array's element
704       * type.
705       * this: array descriptor         - something like "[I"
706       * @return array element descriptor - something like "I"
707       */
708      @Pure
709      public Atom parseForArrayElementDescriptor() {
710        if (VM.VerifyAssertions) {
711          VM._assert(val.length > 1, "An array descriptor has at least two characters");
712          VM._assert(val[0] == '[', "An array descriptor must start with '['");
713        }
714        return findOrCreate(val, 1, val.length - 1, toUnicodeStringInternal());
715      }
716    
717      /**
718       * The set of class prefixes that MUST be loaded by bootstrap classloader.
719       * @see #isBootstrapClassDescriptor()
720       */
721      private static final byte[][] BOOTSTRAP_CLASS_PREFIX_SET =
722          {"Ljava/".getBytes(),
723           "Lorg/jikesrvm/".getBytes(),
724           "Lgnu/java/".getBytes(),
725           "Lgnu/classpath/debug/".getBytes(),
726           "Lgnu/classpath/jdwp/".getBytes(),
727           "Lgnu/classpath/NotImplementedException".getBytes(),
728           "Lgnu/classpath/Pair".getBytes(),
729           "Lgnu/classpath/Pointer".getBytes(),
730           "Lgnu/classpath/Pointer32".getBytes(),
731           "Lgnu/classpath/Pointer64".getBytes(),
732           "Lgnu/classpath/ServiceFactory".getBytes(),
733           "Lgnu/classpath/ServiceProviderLoadingAction".getBytes(),
734           "Lgnu/classpath/SystemProperties".getBytes(),
735           "Lorg/vmmagic/".getBytes(),
736           "Lorg/mmtk/".getBytes()};
737    
738      /**
739       * The set of class prefixes that MUST NOT be loaded by bootstrap classloader.
740       * @see #isBootstrapClassDescriptor()
741       */
742      private static final byte[][] NON_BOOTSTRAP_CLASS_PREFIX_SET =
743          {"Lorg/jikesrvm/tools/ant/".getBytes(),
744           "Lorg/jikesrvm/tools/apt/".getBytes(),
745           "Lorg/jikesrvm/tools/template/".getBytes()};
746    
747      /**
748       * The set of class prefixes for core RVM classes.
749       * @see #isRVMDescriptor()
750       */
751      private static final byte[][] RVM_CLASS_PREFIXES =
752          {"Lorg/jikesrvm/".getBytes(), "Lorg/vmmagic/".getBytes(), "Lorg/mmtk/".getBytes()};
753    
754      /**
755       * @return true if this is a class descriptor of a bootstrap class
756       * (ie a class that must be loaded by the bootstrap class loader)
757       */
758      @Pure
759      public boolean isBootstrapClassDescriptor() {
760        non_bootstrap_outer:
761        for (final byte[] test : NON_BOOTSTRAP_CLASS_PREFIX_SET) {
762          if (test.length > val.length) continue;
763          for (int j = 0; j < test.length; j++) {
764            if (val[j] != test[j]) {
765              continue non_bootstrap_outer;
766            }
767          }
768          return false;
769        }
770        bootstrap_outer:
771        for (final byte[] test : BOOTSTRAP_CLASS_PREFIX_SET) {
772          if (test.length > val.length) continue;
773          for (int j = 0; j < test.length; j++) {
774            if (val[j] != test[j]) {
775              continue bootstrap_outer;
776            }
777          }
778          return true;
779        }
780        return false;
781      }
782    
783      /**
784       * @return true if this is a class descriptor of a RVM core class.  This is
785       * defined as one that it would be unwise to invalidate, since invalidating
786       * it might make it impossible to recompile.
787       */
788      @Pure
789      public boolean isRVMDescriptor() {
790        outer:
791        for (final byte[] test : RVM_CLASS_PREFIXES) {
792          if (test.length > val.length) continue;
793          for (int j = 0; j < test.length; j++) {
794            if (val[j] != test[j]) {
795              continue outer;
796            }
797          }
798          return true;
799        }
800        return false;
801      }
802    
803      //-------------//
804      // annotations //
805      //-------------//
806    
807      /**
808       * Create an annotation name from a class name. For example
809       * Lfoo.bar; becomes Lfoo.bar$$; NB in Sun VMs the annotation name
810       * of the first annotation is $Proxy1. Classpath may later rely on
811       * this to implement serialization correctly.
812       */
813      @Pure
814      public Atom annotationInterfaceToAnnotationClass() {
815        byte[] annotationClassName_tmp = new byte[val.length + 2];
816        System.arraycopy(val, 0, annotationClassName_tmp, 0, val.length - 1);
817        annotationClassName_tmp[val.length - 1] = '$';
818        annotationClassName_tmp[val.length] = '$';
819        annotationClassName_tmp[val.length + 1] = ';';
820        return Atom.findOrCreateUtf8Atom(annotationClassName_tmp);
821      }
822    
823      /**
824       * Create a class name from a type name. For example Lfoo.bar$$;
825       * becomes the string foo.bar
826       */
827      @Pure
828      public String annotationClassToAnnotationInterface() {
829        if (VM.VerifyAssertions) {
830          VM._assert(val.length > 0);
831          VM._assert(val[0] == 'L' && val[val.length - 1] == ';', toString());
832        }
833        return StringUtilities.asciiBytesToString(val, 1, val.length - 4).replace('/', '.');
834      }
835    
836      /**
837       * Is this an annotation class name of the form Lfoo.bar$$;
838       */
839      @Pure
840      public boolean isAnnotationClass() {
841        return (val.length > 4) && (val[val.length - 3] == '$') && (val[val.length - 2] == '$');
842      }
843    
844      //-----------//
845      // debugging //
846      //-----------//
847    
848      @Uninterruptible
849      public void sysWrite() {
850        for (int i = 0, n = val.length; i < n; ++i) {
851          VM.sysWrite((char) val[i]);
852        }
853      }
854    
855      @Uninterruptible
856      public int length() {
857        return val.length;
858      }
859    
860      /**
861       * Create atom from the key that maps to it.
862       */
863      private Atom(byte[] val, int id, String str) {
864        this.id = id;
865        this.unicodeStringOrJTOCoffset = str;
866        if ((val == null) && (id != -1)) {
867          this.val = UTF8Convert.toUTF8(str);
868        } else {
869          this.val = val;
870        }
871      }
872    
873      /*
874       * Hash table utilities
875       */
876      /**
877       * Return the hashCode of an atom, this equals the unicode string encoding of
878       * the atom
879       */
880      public int hashCode() {
881        try {
882          if (unicodeStringOrJTOCoffset != null) {
883            return toUnicodeStringInternal().hashCode();
884          } else {
885            return UTF8Convert.computeStringHashCode(val);
886          }
887        } catch (UTFDataFormatException e) {
888          return 0;
889        }
890      }
891    
892      /**
893       * Outside of this class atoms are canonical and should be compared using ==.
894       * This method is used to maintain atoms in internal hash tables and shouldn't
895       * be used externally.
896       */
897      @Pure
898      public boolean equals(Object other) {
899        // quick test as atoms are generally canonical
900        if (this == other) {
901          return true;
902        } else {
903          if (other instanceof Atom) {
904            Atom that = (Atom)other;
905            // if the atoms are well formed then their identifiers are unique
906            if ((that.id != -1) && (this.id != -1)) {
907              return that.id == this.id;
908            }
909            // one atom isn't well formed, can we do a string comparison to work out equality?
910            if ((this.unicodeStringOrJTOCoffset != null) && (that.unicodeStringOrJTOCoffset != null)) {
911              return toUnicodeStringInternal().equals(that.toUnicodeStringInternal());
912            }
913            try {
914              // perform byte by byte comparison
915              byte[] val1;
916              if (that.val != null) {
917                val1 = that.val;
918              } else {
919                val1 = UTF8Convert.toUTF8(that.toUnicodeString());
920              }
921              byte[] val2;
922              if (this.val != null) {
923                val2 = this.val;
924              } else {
925                val2 = UTF8Convert.toUTF8(toUnicodeString());
926              }
927              if (val1.length == val2.length) {
928                for (int i = 0; i < val1.length; i++) {
929                  if (val1[i] != val2[i]) return false;
930                }
931                return true;
932              }
933            } catch (UTFDataFormatException e) {
934              throw new Error("Error in UTF data encoding: ",e);
935            }
936          }
937          return false;
938        }
939      }
940    
941    
942      /**
943       * Inner class responsible for string interning. This class' initializer is
944       * run during booting.
945       */
946      private static class InternedStrings {
947        /**
948         * Look up for interned strings.
949         */
950        private static final WeakHashMap<String,WeakReference<String>> internedStrings =
951          new WeakHashMap<String,WeakReference<String>>();
952    
953        /**
954         * Find an interned string but don't create it if not found
955         * @param str string to lookup
956         * @return the interned string or null if it isn't interned
957         */
958        static synchronized String findInternedString(String str) {
959          WeakReference<String> ref;
960          ref = internedStrings.get(str);
961          if (ref != null) {
962            String s = ref.get();
963            if (s != null) {
964              return s;
965            }
966          }
967          return null;
968        }
969    
970        /**
971         * Find a string literal from an atom
972         * @param str string to find
973         * @return the string literal or null
974         */
975        static String findAtomString(String str) {
976          Atom atom = findUnicodeAtom(str);
977          if (atom != null) {
978            try {
979              return atom.toUnicodeString();
980            } catch (UTFDataFormatException e) {
981              throw new Error("Error in UTF data encoding: ", e);
982            }
983          }
984          return null;
985        }
986    
987        /**
988         * Intern a string that is not an atom or already interned string
989         * @param str string to intern
990         * @return interned string
991         */
992        static synchronized String internUnfoundString(String str) {
993          // double check string isn't found as we're holding the lock on the class
994          String s = findInternedString(str);
995          if (s != null) return s;
996          // If we get to here, then there is no interned version of the String.
997          // So we make one.
998          WeakReference<String> ref = new WeakReference<String>(str);
999          internedStrings.put(str, ref);
1000          return str;
1001        }
1002      }
1003    
1004      /**
1005       * External string intern method called from String.intern. This method should
1006       * return a canonical string encoding for the given string and this string
1007       * should also be canonical with string literals.
1008       * @param str string to intern
1009       * @return interned version of string
1010       */
1011      public static String internString(String str) {
1012        // Has the string already been interned
1013        String s = InternedStrings.findInternedString(str);
1014        if (s != null) return s;
1015    
1016        // Check to see if this is a StringLiteral:
1017        s = InternedStrings.findAtomString(str);
1018        if (s != null) return s;
1019    
1020        // Intern this string
1021        return InternedStrings.internUnfoundString(str);
1022      }
1023    }