001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.ssa;
014    
015    import static org.jikesrvm.compilers.opt.driver.OptConstants.SYNTH_LOOP_VERSIONING_BCI;
016    import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH;
017    import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode;
018    import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
019    import static org.jikesrvm.compilers.opt.ir.Operators.GUARD_MOVE;
020    import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD;
021    import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode;
022    import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP;
023    import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB;
024    import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB_opcode;
025    import static org.jikesrvm.compilers.opt.ir.Operators.PHI;
026    import static org.jikesrvm.compilers.opt.ir.Operators.REF_IFCMP;
027    
028    import java.lang.reflect.Constructor;
029    import java.util.ArrayList;
030    import java.util.Enumeration;
031    import java.util.HashMap;
032    import java.util.HashSet;
033    import java.util.Iterator;
034    
035    import org.jikesrvm.VM;
036    import org.jikesrvm.classloader.TypeReference;
037    import org.jikesrvm.compilers.opt.DefUse;
038    import org.jikesrvm.compilers.opt.OptOptions;
039    import org.jikesrvm.compilers.opt.OptimizingCompilerException;
040    import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTGraph;
041    import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTNode;
042    import org.jikesrvm.compilers.opt.controlflow.DominatorTree;
043    import org.jikesrvm.compilers.opt.controlflow.DominatorsPhase;
044    import org.jikesrvm.compilers.opt.controlflow.LSTGraph;
045    import org.jikesrvm.compilers.opt.controlflow.LTDominators;
046    import org.jikesrvm.compilers.opt.driver.CompilerPhase;
047    import org.jikesrvm.compilers.opt.ir.BBend;
048    import org.jikesrvm.compilers.opt.ir.BasicBlock;
049    import org.jikesrvm.compilers.opt.ir.BasicBlockEnumeration;
050    import org.jikesrvm.compilers.opt.ir.Binary;
051    import org.jikesrvm.compilers.opt.ir.BoundsCheck;
052    import org.jikesrvm.compilers.opt.ir.Goto;
053    import org.jikesrvm.compilers.opt.ir.GuardedUnary;
054    import org.jikesrvm.compilers.opt.ir.IR;
055    import org.jikesrvm.compilers.opt.ir.IREnumeration;
056    import org.jikesrvm.compilers.opt.ir.IfCmp;
057    import org.jikesrvm.compilers.opt.ir.Instruction;
058    import org.jikesrvm.compilers.opt.ir.Label;
059    import org.jikesrvm.compilers.opt.ir.Move;
060    import org.jikesrvm.compilers.opt.ir.NullCheck;
061    import org.jikesrvm.compilers.opt.ir.OperandEnumeration;
062    import org.jikesrvm.compilers.opt.ir.Phi;
063    import org.jikesrvm.compilers.opt.ir.Register;
064    import org.jikesrvm.compilers.opt.ir.operand.BasicBlockOperand;
065    import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand;
066    import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand;
067    import org.jikesrvm.compilers.opt.ir.operand.HeapOperand;
068    import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand;
069    import org.jikesrvm.compilers.opt.ir.operand.NullConstantOperand;
070    import org.jikesrvm.compilers.opt.ir.operand.Operand;
071    import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
072    import org.jikesrvm.compilers.opt.util.GraphNode;
073    
074    /**
075     * This optimisation works from the outer most loop inward, optimising
076     * loops that conform to being regular {@link
077     * AnnotatedLSTNode}s. The optimisations performs the following
078     * operations:
079     *
080     * 1) Determine the bound and null checks to be eliminated. These are
081     * the ones that operate on the loop iterator. If no bound checks can
082     * be eliminated, stop optimising this loop.
083     *
084     * 2) Determine the registers defined in the loop.
085     *
086     * 3) Generate phi nodes that define the original register defined by
087     * the loop and use two newly created registers.
088     *
089     * 4) Create a version of the original loop that uses the first of the
090     * newly created registers instead of the original registers.
091     *
092     * 5) Create a second version, this time with the result of the
093     * eliminated checks set to true.
094     *
095     * 6) Work out what the maximum value for all the bounds checks are
096     * and create branches to optimal or suboptimal loops
097     *
098     * 7) Fix up the phi node predecessors
099     *
100     * 8) Remove the unoptimized loop if its redundant
101     *
102     * 9) Replace register definitions in the original loop with phi
103     * instructions
104     *
105     * 10) Compact node numbering so that CFG number of nodes reflects
106     * that some basic blocks may have been deleted
107     *
108     *
109     * Example:
110     * <listing>
111     *   for (int t1=0; t1 &lt; 100; t1++) {
112     *      g1 = null_check   l0
113     *      g2 = bounds_check l0, t1
114     *      g3 = guard_combine g1,g2
115     *      t2 = aload l0, t1, g3
116     *      g4 = null_check   l1
117     *      g5 = bounds_check l1, t1
118     *      g6 = guard_combine g4,g5
119     *           astore t2, l1, t1, g6
120     *   }
121     * </listing>
122     *
123     * becomes:
124     *
125     * <listing>
126     *   goto explicit_test_block
127     * successor_to_loops:
128     *   g1 = phi g1_1, g1_2
129     *   g2 = phi g2_1, g2_2
130     *   g3 = phi g3_1, g3_2
131     *   t2 = phi t2_1, t2_2
132     *   g4 = phi g4_1, g4_2
133     *   g5 = phi g5_1, g5_2
134     *   g6 = phi g6_1, g6_2
135     *   goto after_loop
136     * explicit_test_block:
137     *   if l0 == null (unlikely) goto sub_optimal_loop
138     *   if 100 >= l0.length (unlikely) goto sub_optimal_loop
139     *   if l1 == null (unlikely) goto sub_optimal_loop
140     *   if 100 >= l1.length (unlikely) goto sub_optimal_loop
141     *   goto optimal_loop
142     * sub_optimal_loop:
143     *   for (int t1_1=0; t1_1 &lt; 100; t1_1++) {
144     *      g1_1 = null_check   l0
145     *      g2_1 = bounds_check l0, t1_1
146     *      g3_1 = guard_combine g1_1,g2_1
147     *      t2_1 = aload l0, t1_1, g3_1
148     *      g4_1 = null_check   l1
149     *      g5_1 = bounds_check l1, t1_1
150     *      g6_1 = guard_combine g4_1,g5_1
151     *             astore t2_1, l1, t1_1, g6_1
152     *   }
153     *   goto successor_to_loops
154     * optimal_loop:
155     *   for (int t1_2=0; t1_2 &lt; 100; t1_2++) {
156     *      g1_2 = true_guard
157     *      g2_2 = true_guard
158     *      g3_2 = guard_combine g1_2,g2_2
159     *      t2_2 = aload l0, t1_2, g3_2
160     *      g4_2 = null_check   l1
161     *      g5_2 = bounds_check l1, t1_2
162     *      g6_2 = guard_combine g4_2,g5_2
163     *             astore t2_2, l1, t1_2, g6_2
164     *   }
165     *   goto successor_to_loops
166     * after_loop:
167     * </listing>
168     *
169     * The optimisation works on the Heap SSA form. A more accurate
170     * example of the transformation would be:
171     *
172     * <listing>
173     *   heap1 = ...; // previous heap state
174     *   t1_1 = 0;
175     *   if t1_1 &ge; 100 goto label2
176     *   label1:
177     *      t1_2 = phi t1_1, t1_3
178     *      heap2 = phi heap1, heap3
179     *      g1 = null_check   l0
180     *      g2 = bounds_check l0, t1_2
181     *      g3 = guard_combine g1,g2
182     *      t2 = aload l0, t1_2, g3
183     *      g4 = null_check   l1
184     *      g5 = bounds_check l1, t1_2
185     *      g6 = guard_combine g4,g5
186     *      heap3 = astore t2, l1, t1_2, g6
187     *      t1_3 = t1_2 + 1
188     *      if t1_3 &lt; 100 label1 *   label2:
189     * </listing>
190     *
191     * becomes:
192     *
193     * <listing>
194     *   heap1 = ...; // previous heap state
195     *   t1_1 = 0;
196     *   if t1_1 &ge; 100 goto label2
197     *   goto explicit_test_block
198     * successor_to_loops:
199     *   t1_2 = phi t1_2_1, t1_2_2
200     *   heap2 = phi heap2_1, heap2_2
201     *   g1 = phi g1_1, g1_2
202     *   g2 = phi g2_1, g2_2
203     *   g3 = phi g3_1, g3_2
204     *   t2 = phi t2_1, t2_2
205     *   g4 = phi g4_1, g4_2
206     *   g5 = phi g5_1, g5_2
207     *   g6 = phi g6_1, g6_2
208     *   heap3 = phi heap3_1, heap3_2
209     *   t1_3 = phi t1_3_1, t1_3_2
210     *   goto after_loop
211     * explicit_test_block:
212     *   g1_2 = if l0 == null (unlikely) goto sub_optimal_loop
213     *   g2_2 = if 100 >= l0.length (unlikely) goto sub_optimal_loop
214     *   g4_2 = if l1 == null (unlikely) goto sub_optimal_loop
215     *   g5_2 = if 100 >= l1.length (unlikely) goto sub_optimal_loop
216     *   goto optimal_loop
217     * sub_optimal_loop:
218     *   label1_1:
219     *      t1_2_1 = phi t1_1, t1_3_1
220     *      heap2_1 = phi heap1, heap3_1
221     *      g1_1 = null_check   l0
222     *      g2_1 = bounds_check l0, t1_2_1
223     *      g3_1 = guard_combine g1_1,g2_1
224     *      t2_1 = aload l0, t1_2_1, g3_1
225     *      g4_1 = null_check   l1
226     *      g5_1 = bounds_check l1, t1_2_1
227     *      g6_1 = guard_combine g4_1,g5_1
228     *      heap3_1 = astore t2_1, l1, t1_2_1, g6_1
229     *      t1_3_1 = t1_2_1 + 1
230     *      if t1_3_1 &lt; 100 label1_1
231     *   goto successor_to_loops
232     * optimal_loop:
233     *   label1_2:
234     *      t1_2_2 = phi t1_1, t1_3_2
235     *      heap2_2 = phi heap1, heap3_2
236     *      g3_2 = guard_combine g1_2,g2_2
237     *      t2_2 = aload l0, t1_2_2, g3_2
238     *      g6_2 = guard_combine g4_2,g5_2
239     *      heap3_2 = astore t2_2, l1, t1_2_2, g6_2
240     *      t1_3_2 = t1_2_2 + 1
241     *      if t1_3_2 &lt; 100 label1_2
242     *   goto successor_to_loops
243     * after_loop:
244     * label2:
245     * </listing>
246     */
247    public final class LoopVersioning extends CompilerPhase {
248      // -oO Debug variables Oo-
249      /**
250       * Flag to optionally print verbose debugging messages
251       */
252      private static final boolean DEBUG = false;
253      /**
254       * Flag to verify computed IR
255       */
256      private static final boolean VERIFY = false;
257    
258      // -oO Debug routines Oo-
259      /**
260       * Human readable report of what goes on
261       *
262       * @param s String to print
263       **/
264      private static void report(String s) {
265        if (DEBUG) {
266          VM.sysWriteln(s);
267        }
268      }
269    
270      /**
271       * Return a string name for this phase.
272       * @return "Loop Versioning"
273       */
274      @Override
275      public String getName() {
276        return "Loop Versioning";
277      }
278    
279      // -oO Variables used throughout the optimisation phase Oo-
280      /**
281       * The phi instruction operand holding the optimized loop variable
282       */
283      private static final int OPTIMIZED_LOOP_OPERAND = 0;
284      /**
285       * The phi instruction operand holding the unoptimized loop variable
286       */
287      private static final int UNOPTIMIZED_LOOP_OPERAND = 1;
288    
289      /**
290       * IR for optimisation
291       */
292      private IR ir;
293    
294      /**
295       * Set used to store the loop related register
296       */
297      private HashSet<Register> loopRegisterSet;
298    
299      /**
300       * SSA options
301       */
302      private SSAOptions desiredSSAOptions;
303      /**
304       * Compiler phases called from this one
305       */
306      private CompilerPhase enterSSA, leaveSSA, domPhase;
307      /**
308       * Run inside SSA sub-phase
309       */
310      private static final boolean inSSAphase = true;
311    
312      // -oO Interface to the rest of the compiler Oo-
313    
314      /**
315       * Constructor for this compiler phase
316       */
317      private static final Constructor<CompilerPhase> constructor =
318          getCompilerPhaseConstructor(LoopVersioning.class);
319    
320      /**
321       * Get a constructor object for this compiler phase
322       * @return compiler phase constructor
323       */
324      @Override
325      public Constructor<CompilerPhase> getClassConstructor() {
326        return constructor;
327      }
328    
329      /**
330       * Constructor
331       */
332      public LoopVersioning() {
333        desiredSSAOptions = new SSAOptions();
334        desiredSSAOptions.setScalarsOnly(true);
335        if (!inSSAphase) {
336          enterSSA = new EnterSSA();
337          leaveSSA = new LeaveSSA();
338        }
339        domPhase = new DominatorsPhase(false);
340      }
341    
342      /**
343       * Should the optimisation be performed
344       */
345      @Override
346      public boolean shouldPerform(OptOptions options) {
347        return options.SSA_LOOP_VERSIONING;
348      }
349    
350      /**
351       * The main entry point
352       *
353       * @param _ir the IR to process
354       */
355      @Override
356      public void perform(IR _ir) {
357        ir = _ir;
358    
359        // Create SSA
360        ir.desiredSSAOptions = desiredSSAOptions;
361        if (!inSSAphase) {
362          enterSSA.perform(ir);
363        }
364    
365        if (DEBUG) {
366          SSA.printInstructions(ir);
367        }
368    
369        // Perform loop annotation
370        if (!ir.hasReachableExceptionHandlers()) {
371          // Build LST tree and dominator info
372          domPhase.perform(ir);
373          DefUse.computeDU(ir);
374          // Build annotated version
375          ir.HIRInfo.loopStructureTree = new AnnotatedLSTGraph(ir, ir.HIRInfo.loopStructureTree);
376        }
377        if (VERIFY) {
378          ir.verify(getName(), true);
379        }
380    
381        // Check loop annotation has been performed
382        if (!(ir.HIRInfo.loopStructureTree instanceof AnnotatedLSTGraph)) {
383          report("Optimisation of " + ir.getMethod() + " failed as LST wasn't annotated\n");
384        } else {
385          loopRegisterSet = new HashSet<Register>();
386    
387          if (DEBUG) {
388            VM.sysWriteln(ir.getMethod().toString());
389            VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString());
390            SSA.printInstructions(ir);
391          }
392    
393          while (findLoopToOptimise((AnnotatedLSTNode) ir.HIRInfo.loopStructureTree.getRoot())) {
394            if (DEBUG) {
395              VM.sysWriteln("Successful optimisation of " + ir.getMethod());
396              SSA.printInstructions(ir);
397              VM.sysWriteln(ir.cfg.toString());
398            }
399            // Get IR into shape for next pass
400            DefUse.computeDU(ir);
401            LTDominators.perform(ir, true, true);
402            ir.HIRInfo.dominatorTree = new DominatorTree(ir, true);
403            LSTGraph.perform(ir);
404            AnnotatedLSTGraph.perform(ir);
405    
406            if (VERIFY) {
407              ir.verify(getName(), true);
408            }
409    
410            if (DEBUG) {
411              VM.sysWriteln("after an optimization pass");
412              VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString());
413              SSA.printInstructions(ir);
414              VM.sysWriteln("Finish optimize: " + ir.getMethod().toString());
415            }
416          }
417          // No longer in use
418          loopRegisterSet = null;
419        }
420    
421        if (!inSSAphase) {
422          // Leave SSA
423          leaveSSA.perform(ir);
424        }
425    
426        if (VERIFY) {
427          ir.verify(getName(), true);
428        }
429      }
430    
431      // -oO Optimisation routines Oo-
432    
433      /**
434       * Find an outermost loop to optimise and optimise it. Focus on
435       * annotated regular loops, LICM should handle possible
436       * optimisation for the non-regular loops
437       *
438       * @param loop  Loop to search
439       * @return was optimisation performed
440       */
441      private boolean findLoopToOptimise(AnnotatedLSTNode loop) {
442        // Has this loop already been optimised?
443        Operand carriedLoopIterator = loop.getCarriedLoopIterator();
444        if ((carriedLoopIterator instanceof RegisterOperand) &&
445            (isOptimizedLoop(carriedLoopIterator.asRegister().getRegister()))) {
446          return false;
447        }
448    
449        // Process inner loops first
450        Enumeration<GraphNode> innerLoops = loop.outNodes();
451        // Iterate over loops
452        while (innerLoops.hasMoreElements()) {
453          AnnotatedLSTNode nestedLoop = (AnnotatedLSTNode) innerLoops.nextElement();
454          // Try to optimise inner loops first
455          if (findLoopToOptimise(nestedLoop)) {
456            // Exit early if inner loop optimisation succeeded
457            return true;
458          }
459        }
460        // Don't try to optimise irregular loops
461        if (loop.isNonRegularLoop()) {
462          return false;
463        }
464        if (DEBUG) {
465          report("LoopFissionOfArrayGuards: found loop in " + ir.getMethod());
466          VM.sysWriteln("dominator tree:");
467          VM.sysWriteln(ir.HIRInfo.dominatorTree.toString());
468        }
469    
470        // 1) Determine the bound and null checks to be eliminated. The
471        // bound checks are the ones that operate on the loop iterator. If
472        // no checks can be eliminated, stop optimising this loop.
473        ArrayList<Instruction> checksToEliminate = new ArrayList<Instruction>();
474        getListOfChecksToEliminate(loop, checksToEliminate);
475        if (checksToEliminate.isEmpty()) {
476          return false;
477        } else {
478          // We found instructions to eliminate
479          if (DEBUG) {
480            VM.sysWriteln("Loop being optimised:");
481            VM.sysWriteln(loop.toString());
482            VM.sysWriteln("Checks to eliminate:");
483            for (Instruction instruction : checksToEliminate) {
484              VM.sysWriteln(instruction.toString());
485            }
486          }
487          // 2) Determine the registers defined in the loop.
488          ArrayList<Register> registersDefinedInOriginalLoop = new ArrayList<Register>();
489          ArrayList<TypeReference> typesOfRegistersDefinedInOriginalLoop = new ArrayList<TypeReference>();
490          ArrayList<Instruction> definingInstructionsInOriginalLoop = new ArrayList<Instruction>();
491          getRegistersDefinedInLoop(loop,
492                                    registersDefinedInOriginalLoop,
493                                    typesOfRegistersDefinedInOriginalLoop,
494                                    definingInstructionsInOriginalLoop);
495          if (DEBUG) {
496            VM.sysWrite("Registers in original loop:\n{");
497            for (int i = 0; i < registersDefinedInOriginalLoop.size(); i++) {
498              VM.sysWrite(registersDefinedInOriginalLoop.get(i).toString());
499              if (definingInstructionsInOriginalLoop.get(i) != null) {
500                VM.sysWrite("(escapes),");
501              } else {
502                VM.sysWrite(",");
503              }
504            }
505            VM.sysWriteln("}");
506          }
507          // 3) Generate phi nodes that define the original register
508          // defined by the loop and use two newly created registers.
509          ArrayList<Instruction> phiInstructions = new ArrayList<Instruction>();
510          HashMap<Register, Register> subOptimalRegMap = new HashMap<Register, Register>();
511          HashMap<Register, Register> optimalRegMap = new HashMap<Register, Register>();
512          generatePhiNodes(loop,
513                           registersDefinedInOriginalLoop,
514                           typesOfRegistersDefinedInOriginalLoop,
515                           phiInstructions,
516                           subOptimalRegMap,
517                           optimalRegMap);
518          if (DEBUG) {
519            VM.sysWriteln("subOptimalRegMap");
520            VM.sysWriteln(subOptimalRegMap.toString());
521            VM.sysWriteln("optimalRegMap");
522            VM.sysWriteln(optimalRegMap.toString());
523          }
524    
525          // 4) Create a version of the original loop that uses the first of
526          // the newly created registers instead of the original
527          // registers.
528          HashMap<Register, BasicBlock> regToUnoptimizedBlockMap = new HashMap<Register, BasicBlock>();
529          HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap =
530              createCloneLoop(loop, subOptimalRegMap, regToUnoptimizedBlockMap);
531          if (DEBUG) {
532            VM.sysWriteln("subOptimalLoopMap");
533            VM.sysWriteln(unoptimizedLoopMap.toString());
534          }
535          // 5) Create a second version, this time with the result of the
536          // eliminated checks set to explicit test guards.
537          HashMap<Register, BasicBlock> regToOptimizedBlockMap = new HashMap<Register, BasicBlock>();
538          HashMap<BasicBlock, BasicBlock> optimizedLoopMap =
539              createOptimizedLoop(loop, optimalRegMap, checksToEliminate, regToOptimizedBlockMap);
540          if (DEBUG) {
541            VM.sysWriteln("optimalLoopMap");
542            VM.sysWriteln(optimizedLoopMap.toString());
543          }
544          // 6) Work out what the maximum value for all the bounds checks
545          // are and create branches to optimal or suboptimal loops - with
546          // the unoptimized loop possibly being unreachable
547          BasicBlock firstBranchBlock = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
548          BasicBlock temp = (BasicBlock) loop.header.prev;
549          ir.cfg.breakCodeOrder(temp, loop.header);
550          ir.cfg.linkInCodeOrder(temp, firstBranchBlock);
551          ir.cfg.linkInCodeOrder(firstBranchBlock, loop.header);
552          temp.redirectOuts(loop.header, firstBranchBlock, ir);
553          boolean isUnoptimizedLoopReachable =
554              createBranchBlocks(loop,
555                                 firstBranchBlock,
556                                 checksToEliminate,
557                                 unoptimizedLoopMap.get(loop.predecessor),
558                                 optimizedLoopMap.get(loop.predecessor),
559                                 optimalRegMap);
560          // 7) Fix up the phi node predecessors
561          fixUpPhiPredecessors(phiInstructions,
562                               isUnoptimizedLoopReachable ? unoptimizedLoopMap.get(loop.exit) : null,
563                               optimizedLoopMap.get(loop.exit));
564          // 8) Remove the unoptimized loop if its redundant
565          if (!isUnoptimizedLoopReachable) {
566            removeUnoptimizedLoop(loop, unoptimizedLoopMap);
567          }
568    
569          // 9) Replace register definitions in the original
570          // loop with phi instructions
571          modifyOriginalLoop(loop, phiInstructions, definingInstructionsInOriginalLoop, subOptimalRegMap, optimalRegMap);
572          // 10) Compact node numbering so that CFG number of nodes
573          // reflects that some basic blocks may have been deleted
574          ir.cfg.compactNodeNumbering();
575          return true;
576        }
577      }
578    
579      /**
580       * Create a list of instructions to be eliminated
581       * @param loop the loop to examine
582       * @param instrToEliminate the instructions to remove
583       */
584      private void getListOfChecksToEliminate(AnnotatedLSTNode loop, ArrayList<Instruction> instrToEliminate) {
585        ArrayList<Instruction> nullChecks = new ArrayList<Instruction>();
586        ArrayList<Instruction> oddBoundChecks = new ArrayList<Instruction>();
587        BasicBlockEnumeration blocks = loop.getBasicBlocks();
588        while (blocks.hasMoreElements()) {
589          BasicBlock block = blocks.next();
590          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
591          while (instructions.hasMoreElements()) {
592            Instruction instruction = instructions.next();
593            if (NullCheck.conforms(instruction)) {
594              if (loop.isInvariant(NullCheck.getRef(instruction))) {
595                instrToEliminate.add(instruction);
596                nullChecks.add(instruction);
597              }
598            } else if (loop.isMonotonic() && BoundsCheck.conforms(instruction)) {
599              if (loop.isInvariant(BoundsCheck.getRef(instruction))) {
600                if (loop.isRelatedToIterator(BoundsCheck.getIndex(instruction))) {
601                  if (loop.isInvariant(BoundsCheck.getGuard(instruction))) {
602                    instrToEliminate.add(instruction);
603                  } else {
604                    // Null check isn't invariant but reference was, check
605                    // null check will be eliminated at end of loop
606                    oddBoundChecks.add(instruction);
607                  }
608                }
609              }
610            }
611          }
612        }
613        // Check cases where the null check isn't loop invariant, however,
614        // it will be in the optimized loop as we'll have eliminated it
615        for (Instruction oddBoundCheck : oddBoundChecks) {
616          Operand guard = BoundsCheck.getGuard(oddBoundCheck);
617          for (Instruction nullCheck : nullChecks) {
618            if (guard.similar(NullCheck.getGuardResult(nullCheck))) {
619              instrToEliminate.add(oddBoundCheck);
620              break;
621            }
622          }
623        }
624      }
625    
626      /**
627       * Get registers defined in the given loop. As we're in SSA form
628       * all register definitions must be unique.
629       * @param loop - the loop to examine
630       * @param registers - vector to which defined registers are added
631       */
632      private void getRegistersDefinedInLoop(AnnotatedLSTNode loop, ArrayList<Register> registers,
633                                             ArrayList<TypeReference> types,
634                                             ArrayList<Instruction> definingInstructions) {
635        BasicBlockEnumeration blocks = loop.getBasicBlocks();
636        while (blocks.hasMoreElements()) {
637          BasicBlock block = blocks.next();
638          // can value escape
639          final boolean escapes = (block == loop.exit) || (ir.HIRInfo.dominatorTree.dominates(block, loop.exit));
640          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
641          while (instructions.hasMoreElements()) {
642            Instruction instruction = instructions.next();
643            OperandEnumeration operands = instruction.getDefs();
644            while (operands.hasMoreElements()) {
645              Operand operand = operands.next();
646              if (operand.isRegister()) {
647                registers.add(operand.asRegister().getRegister());
648                types.add(operand.asRegister().getType());
649                if (escapes) {
650                  definingInstructions.add(instruction);
651                } else {
652                  definingInstructions.add(null);
653                }
654              }
655            }
656          }
657        }
658      }
659    
660      /**
661       * Generate into a new block phi nodes that define the original
662       * register defined by the loop and use two newly created
663       * registers.
664       * @param registers - vector to which defined registers need to be
665       * created registers.x used in creating phi nodes
666       * @param types - vector of corresponding types of registers.
667       * @param phiInstructions - created phi instructions
668       * @param subOptimalRegMap - mapping of orignal destination to the
669       * newly created destination for the unoptimized loop
670       * @param optimalRegMap - mapping of orignal destination to the
671       * newly created destination for the optimized loop
672       */
673      private void generatePhiNodes(AnnotatedLSTNode loop, ArrayList<Register> registers,
674                                    ArrayList<TypeReference> types, ArrayList<Instruction> phiInstructions,
675                                    HashMap<Register, Register> subOptimalRegMap,
676                                    HashMap<Register, Register> optimalRegMap) {
677        // Get the carried loop iterator's register
678        Register carriedLoopIteratorRegister = ((RegisterOperand) loop.getCarriedLoopIterator()).getRegister();
679        for (int i = 0; i < registers.size(); i++) {
680          Register register = registers.get(i);
681          TypeReference type = types.get(i);
682          Instruction phi = Phi.create(PHI, new RegisterOperand(register, type), 2);
683          phi.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
684    
685          // new operand for optimized loop
686          Operand op0 = ir.regpool.makeTemp(type);
687          Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, op0);
688          optimalRegMap.put(register, op0.asRegister().getRegister());
689    
690          // new operand for unoptimized loop
691          Operand op1 = ir.regpool.makeTemp(type);
692          Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, op1);
693          subOptimalRegMap.put(register, op1.asRegister().getRegister());
694    
695          // Add the new created carried loop iterator registers to
696          // internal set to mark the optimized loops
697          if (register == carriedLoopIteratorRegister) {
698            setOptimizedLoop(op0.asRegister().getRegister());
699            setOptimizedLoop(op1.asRegister().getRegister());
700          }
701    
702          phiInstructions.add(phi);
703        }
704        // rename any optimized inner loops registers
705        renameOptimizedLoops(subOptimalRegMap, optimalRegMap);
706      }
707    
708      /**
709       * Create a clone of the loop replacing definitions in the cloned
710       * loop with those found in the register map
711       * @param loop - loop to clone
712       * @param regMap - mapping of original definition to new
713       * definition
714       * @return a mapping from original BBs to created BBs
715       */
716      private HashMap<BasicBlock, BasicBlock> createCloneLoop(AnnotatedLSTNode loop,
717                                                                      HashMap<Register, Register> regMap,
718                                                                      HashMap<Register, BasicBlock> regToBlockMap) {
719        HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>();
720        // After the newly created loop goto the old loop header
721        originalToCloneBBMap.put(loop.successor, loop.header);
722        // Create an empty block to be the loop predecessor
723        BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
724        ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred);
725        originalToCloneBBMap.put(loop.predecessor, new_pred);
726        // Create copy blocks
727        BasicBlockEnumeration blocks = loop.getBasicBlocks();
728        while (blocks.hasMoreElements()) {
729          BasicBlock block = blocks.next();
730          block.killFallThrough(); // get rid of fall through edges to aid recomputeNormalOuts
731          // Create copy and register mapping
732          BasicBlock copy = block.copyWithoutLinks(ir);
733          originalToCloneBBMap.put(block, copy);
734          // Link into code order
735          ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy);
736          // Alter register definitions and uses in copy
737          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
738          while (instructions.hasMoreElements()) {
739            Instruction instruction = instructions.next();
740            OperandEnumeration operands = instruction.getDefs();
741            while (operands.hasMoreElements()) {
742              Operand operand = operands.next();
743              if (operand.isRegister()) {
744                Register register = operand.asRegister().getRegister();
745                if (regMap.containsKey(register)) {
746                  instruction.replaceRegister(register, regMap.get(register));
747                  regToBlockMap.put(regMap.get(register), copy);
748                }
749              }
750            }
751            operands = instruction.getUses();
752            while (operands.hasMoreElements()) {
753              Operand operand = operands.next();
754              if (operand instanceof RegisterOperand) {
755                Register register = operand.asRegister().getRegister();
756                if (regMap.containsKey(register)) {
757                  instruction.replaceRegister(register, regMap.get(register));
758                }
759              }
760            }
761          }
762        }
763        // Fix up outs
764        // loop predecessor
765        new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir);
766        // loop blocks
767        blocks = loop.getBasicBlocks();
768        while (blocks.hasMoreElements()) {
769          BasicBlock block = blocks.next();
770          BasicBlock copy = originalToCloneBBMap.get(block);
771          Enumeration<BasicBlock> outs = block.getOutNodes();
772          while (outs.hasMoreElements()) {
773            BasicBlock out = outs.nextElement();
774            if (originalToCloneBBMap.containsKey(out)) {
775              copy.redirectOuts(out, originalToCloneBBMap.get(out), ir);
776            }
777          }
778        }
779        // Fix up phis
780        blocks = loop.getBasicBlocks();
781        while (blocks.hasMoreElements()) {
782          BasicBlock block = blocks.next();
783          BasicBlock copy = originalToCloneBBMap.get(block);
784          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
785          while (instructions.hasMoreElements()) {
786            Instruction instruction = instructions.next();
787            if (Phi.conforms(instruction)) {
788              for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) {
789                BasicBlock phi_predecessor = Phi.getPred(instruction, i).block;
790                if (originalToCloneBBMap.containsKey(phi_predecessor)) {
791                  Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor)));
792                } else {
793                  dumpIR(ir, "Error when optimising" + ir.getMethod());
794                  throw new Error("There's > 1 route to this phi node " +
795                                  instruction +
796                                  " from outside the loop: " +
797                                  phi_predecessor);
798                }
799              }
800            }
801          }
802        }
803        return originalToCloneBBMap;
804      }
805    
806      /**
807       * Create a clone of the loop replacing definitions in the cloned
808       * loop with those found in the register map and eliminate
809       * unnecessary bound checks
810       * @param loop - loop to clone
811       * @param regMap - mapping of original definition to new
812       * definition
813       * @param instrToEliminate - instructions to eliminate
814       * @param regToBlockMap - mapping of a register to its defining BB
815       * @return a mapping from original BBs to created BBs
816       */
817      private HashMap<BasicBlock, BasicBlock> createOptimizedLoop(AnnotatedLSTNode loop,
818                                                                          HashMap<Register, Register> regMap,
819                                                                          ArrayList<Instruction> instrToEliminate,
820                                                                          HashMap<Register, BasicBlock> regToBlockMap) {
821        HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>();
822        // After the newly created loop goto the old loop header
823        originalToCloneBBMap.put(loop.successor, loop.header);
824        // Create an empty block to be the loop predecessor
825        BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
826        ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred);
827        originalToCloneBBMap.put(loop.predecessor, new_pred);
828    
829        // Create copy blocks
830        BasicBlockEnumeration blocks = loop.getBasicBlocks();
831        while (blocks.hasMoreElements()) {
832          BasicBlock block = blocks.next();
833          // N.B. fall through will have been killed by unoptimized loop
834          // Create copy and register mapping
835          BasicBlock copy = block.copyWithoutLinks(ir);
836          originalToCloneBBMap.put(block, copy);
837          // Link into code order
838          ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy);
839    
840          // Alter register definitions in copy
841          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
842          loop_over_created_instructions:
843          while (instructions.hasMoreElements()) {
844            Instruction instruction = instructions.next();
845            if (BoundsCheck.conforms(instruction)) {
846              for (Instruction anInstrToEliminate : instrToEliminate) {
847                if (instruction.similar(anInstrToEliminate)) {
848                  instruction.remove();
849                  continue loop_over_created_instructions;
850                }
851              }
852            } else if (NullCheck.conforms(instruction)) {
853              for (Instruction anInstrToEliminate : instrToEliminate) {
854                if (instruction.similar(anInstrToEliminate)) {
855                  instruction.remove();
856                  continue loop_over_created_instructions;
857                }
858              }
859            }
860            OperandEnumeration operands = instruction.getDefs();
861            while (operands.hasMoreElements()) {
862              Operand operand = operands.next();
863              if (operand instanceof RegisterOperand) {
864                Register register = operand.asRegister().getRegister();
865                if (regMap.containsKey(register)) {
866                  instruction.replaceRegister(register, regMap.get(register));
867                  regToBlockMap.put(regMap.get(register), copy);
868                }
869              }
870            }
871            operands = instruction.getUses();
872            while (operands.hasMoreElements()) {
873              Operand operand = operands.next();
874              if (operand.isRegister()) {
875                Register register = operand.asRegister().getRegister();
876                if (regMap.containsKey(register)) {
877                  instruction.replaceRegister(register, regMap.get(register));
878                }
879              }
880            }
881          }
882        }
883        // Fix up outs
884        // loop predecessor
885        new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir);
886        blocks = loop.getBasicBlocks();
887        while (blocks.hasMoreElements()) {
888          BasicBlock block = blocks.next();
889          BasicBlock copy = originalToCloneBBMap.get(block);
890          Enumeration<BasicBlock> outs = block.getOutNodes();
891          while (outs.hasMoreElements()) {
892            BasicBlock out = outs.nextElement();
893            if (originalToCloneBBMap.containsKey(out)) {
894              copy.redirectOuts(out, originalToCloneBBMap.get(out), ir);
895            }
896          }
897        }
898        // Fix up phis
899        blocks = loop.getBasicBlocks();
900        while (blocks.hasMoreElements()) {
901          BasicBlock block = blocks.next();
902          BasicBlock copy = originalToCloneBBMap.get(block);
903          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
904          while (instructions.hasMoreElements()) {
905            Instruction instruction = instructions.next();
906            if (Phi.conforms(instruction)) {
907              for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) {
908                BasicBlock phi_predecessor = Phi.getPred(instruction, i).block;
909                if (originalToCloneBBMap.containsKey(phi_predecessor)) {
910                  Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor)));
911                } else {
912                  throw new Error("There's > 1 route to this phi node from outside the loop: " + phi_predecessor);
913                }
914              }
915            }
916          }
917        }
918        return originalToCloneBBMap;
919      }
920    
921      /**
922       * When phi nodes were generated the basic blocks weren't known for
923       * the predecessors, fix this up now. (It may also not be possible
924       * to reach the unoptimized loop any more)
925       */
926      private void fixUpPhiPredecessors(ArrayList<Instruction> phiInstructions, BasicBlock unoptimizedLoopExit,
927                                        BasicBlock optimizedLoopExit) {
928        if (unoptimizedLoopExit != null) {
929          for (Instruction instruction : phiInstructions) {
930            Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit));
931            Phi.setPred(instruction, UNOPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(unoptimizedLoopExit));
932          }
933        } else {
934          for (Instruction instruction : phiInstructions) {
935            Operand operand = Phi.getValue(instruction, OPTIMIZED_LOOP_OPERAND);
936            Phi.resizeNumberOfPreds(instruction, 1);
937            Phi.resizeNumberOfValues(instruction, 1);
938            Phi.setValue(instruction, OPTIMIZED_LOOP_OPERAND, operand);
939            Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit));
940          }
941        }
942      }
943    
944      /**
945       * Create the block containing explict branches to either the
946       * optimized or unoptimized loops
947       * @param optimalRegMap - mapping used to map eliminated bound and
948       * null check guards to
949       */
950      private boolean createBranchBlocks(AnnotatedLSTNode loop, BasicBlock block,
951                                         ArrayList<Instruction> checksToEliminate, BasicBlock unoptimizedLoopEntry,
952                                         BasicBlock optimizedLoopEntry,
953                                         HashMap<Register, Register> optimalRegMap) {
954        BasicBlock blockOnEntry = block;
955        // 1) generate null check guards
956        block = generateNullCheckBranchBlocks(loop, checksToEliminate, optimalRegMap, block, unoptimizedLoopEntry);
957    
958        // 2) generate bound check guards
959        if (loop.isMonotonic()) {
960          // create new operands for values beyond initial and terminal iterator values
961          Operand terminal;
962          Operand terminalLessStrideOnce;
963          Operand terminalPlusStrideOnce;
964    
965          // NB. precomputing these makes life easier and the code easier to read,
966          //     it does create dead code though
967          if (loop.terminalIteratorValue.isIntConstant()) {
968            terminal = loop.terminalIteratorValue;
969            int terminalAsInt = terminal.asIntConstant().value;
970            int stride = loop.strideValue.asIntConstant().value;
971            terminalLessStrideOnce = new IntConstantOperand(terminalAsInt - stride);
972            terminalPlusStrideOnce = new IntConstantOperand(terminalAsInt + stride);
973          } else {
974            Instruction tempInstr;
975            terminal = loop.generateLoopInvariantOperand(block, loop.terminalIteratorValue);
976            terminalLessStrideOnce = ir.regpool.makeTempInt();
977            terminalPlusStrideOnce = ir.regpool.makeTempInt();
978            tempInstr =
979                Binary.create(INT_SUB, terminalLessStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy());
980            tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
981            block.appendInstruction(tempInstr);
982            DefUse.updateDUForNewInstruction(tempInstr);
983    
984            tempInstr =
985                Binary.create(INT_ADD, terminalPlusStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy());
986            tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
987            block.appendInstruction(tempInstr);
988            DefUse.updateDUForNewInstruction(tempInstr);
989          }
990    
991          // Determine maximum and minimum index values for different loop types
992          Operand phiMinIndexValue;
993          Operand phiMaxIndexValue;
994    
995          if (loop.isMonotonicIncreasing()) {
996            phiMinIndexValue = loop.initialIteratorValue;
997            if ((loop.condition.isLESS() ||
998                 loop.condition.isLOWER() ||
999                 loop.condition.isNOT_EQUAL())) {
1000              phiMaxIndexValue = terminal;
1001            } else if ((loop.condition.isLESS_EQUAL() ||
1002                        loop.condition.isLOWER_EQUAL() ||
1003                        loop.condition.isEQUAL())) {
1004              phiMaxIndexValue = terminalPlusStrideOnce;
1005            } else {
1006              throw new Error("Unrecognised loop for fission " + loop);
1007            }
1008          } else if (loop.isMonotonicDecreasing()) {
1009            phiMaxIndexValue = loop.initialIteratorValue;
1010            if ((loop.condition.isGREATER() ||
1011                 loop.condition.isHIGHER() ||
1012                 loop.condition.isNOT_EQUAL())) {
1013              phiMinIndexValue = terminalPlusStrideOnce;
1014            } else if ((loop.condition.isGREATER_EQUAL() ||
1015                        loop.condition.isHIGHER_EQUAL() ||
1016                        loop.condition.isEQUAL())) {
1017              phiMinIndexValue = terminalLessStrideOnce;
1018            } else {
1019              throw new Error("Unrecognised loop for fission " + loop);
1020            }
1021          } else {
1022            throw new Error("Unrecognised loop for fission " + loop);
1023          }
1024          // Generate tests
1025          for (int i = 0; i < checksToEliminate.size(); i++) {
1026            Instruction instr = checksToEliminate.get(i);
1027            if (BoundsCheck.conforms(instr)) {
1028              // Have we already generated these tests?
1029              boolean alreadyChecked = false;
1030              for (int j = 0; j < i; j++) {
1031                Instruction old_instr = checksToEliminate.get(j);
1032                if (BoundsCheck.conforms(old_instr) &&
1033                    (BoundsCheck.getRef(old_instr).similar(BoundsCheck.getRef(instr))) &&
1034                    (BoundsCheck.getIndex(old_instr).similar(BoundsCheck.getIndex(instr)))) {
1035                  // yes - just create a guard move
1036                  alreadyChecked = true;
1037                  RegisterOperand guardResult = BoundsCheck.getGuardResult(instr).copyRO();
1038                  guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1039                  RegisterOperand guardSource = BoundsCheck.getGuardResult(old_instr).copyRO();
1040                  guardSource.setRegister(optimalRegMap.get(guardSource.getRegister()));
1041                  Instruction tempInstr = Move.create(GUARD_MOVE, guardResult, guardSource);
1042                  tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1043                  block.appendInstruction(tempInstr);
1044                  break;
1045                }
1046              }
1047              if (!alreadyChecked) {
1048                // no - generate tests
1049                Operand index = BoundsCheck.getIndex(instr);
1050                int distance = loop.getFixedDistanceFromPhiIterator(index);
1051                if (distance == 0) {
1052                  block =
1053                      generateExplicitBoundCheck(instr,
1054                                                 phiMinIndexValue,
1055                                                 phiMaxIndexValue,
1056                                                 optimalRegMap,
1057                                                 block,
1058                                                 unoptimizedLoopEntry);
1059                } else {
1060                  Instruction tempInstr;
1061                  RegisterOperand minIndex = ir.regpool.makeTempInt();
1062                  RegisterOperand maxIndex = ir.regpool.makeTempInt();
1063    
1064                  tempInstr =
1065                      Binary.create(INT_ADD, minIndex, phiMinIndexValue.copy(), new IntConstantOperand(distance));
1066                  tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1067                  block.appendInstruction(tempInstr);
1068                  DefUse.updateDUForNewInstruction(tempInstr);
1069    
1070                  tempInstr =
1071                      Binary.create(INT_ADD, maxIndex, phiMaxIndexValue.copy(), new IntConstantOperand(distance));
1072                  tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1073                  block.appendInstruction(tempInstr);
1074                  DefUse.updateDUForNewInstruction(tempInstr);
1075    
1076                  block = generateExplicitBoundCheck(instr, minIndex, maxIndex, optimalRegMap, block, unoptimizedLoopEntry);
1077                }
1078              }
1079            }
1080          }
1081        }
1082        // Have we had to create a new basic block since entry => we
1083        // generated a branch to the unoptimized loop
1084        boolean isUnoptimizedLoopReachable = (blockOnEntry != block);
1085        // 3) Finish up with goto and generate true guard value
1086        {
1087          Instruction branch; // the generated branch instruction
1088          branch = Goto.create(GOTO, optimizedLoopEntry.makeJumpTarget());
1089          branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1090          block.appendInstruction(branch);
1091          block.deleteNormalOut();
1092          block.insertOut(optimizedLoopEntry);
1093        }
1094        return isUnoptimizedLoopReachable;
1095      }
1096    
1097      /**
1098       * Generate null check branch blocks
1099       *
1100       * @param loop the current loop where checks are being eliminated
1101       * @param checksToEliminate all of the checks that are being eliminated in the pass
1102       * @param optimalRegMap a map from original register to the register used in the optimal loop
1103       * @param block the block to generate code into
1104       * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails
1105       * @return the new block to generate code into
1106       */
1107      private BasicBlock generateNullCheckBranchBlocks(AnnotatedLSTNode loop,
1108                                                           ArrayList<Instruction> checksToEliminate,
1109                                                           HashMap<Register, Register> optimalRegMap,
1110                                                           BasicBlock block, BasicBlock unoptimizedLoopEntry) {
1111        // Map of already generated null check references to their
1112        // corresponding guard result
1113        HashMap<Register, Operand> refToGuardMap = new HashMap<Register, Operand>();
1114        // Iterate over checks
1115        for (Instruction instr : checksToEliminate) {
1116          // Is this a null check
1117          if (NullCheck.conforms(instr)) {
1118            // the generated branch instruction
1119            Instruction branch;
1120            // the reference to compare
1121            Operand ref = AnnotatedLSTNode.follow(NullCheck.getRef(instr));
1122            // the guard result to define
1123            RegisterOperand guardResult = NullCheck.getGuardResult(instr).copyRO();
1124            guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1125            // check if we've generated this test already
1126            if (ref.isRegister() && refToGuardMap.containsKey(ref.asRegister().getRegister())) {
1127              // yes - generate just a guard move
1128              branch = Move.create(GUARD_MOVE, guardResult, refToGuardMap.get(ref.asRegister().getRegister()).copy());
1129              branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1130              block.appendInstruction(branch);
1131            } else {
1132              // check if we can just move a guard from the loop predecessors
1133              RegisterOperand guard = nullCheckPerformedInLoopPredecessors(loop.header, instr);
1134              if (guard != null) {
1135                // yes - generate just a guard move
1136                branch = Move.create(GUARD_MOVE, guardResult, guard.copyRO());
1137                branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1138                block.appendInstruction(branch);
1139              } else {
1140                // generate explicit null test
1141                branch =
1142                    IfCmp.create(REF_IFCMP,
1143                                 guardResult,
1144                                 ref.copy(),
1145                                 new NullConstantOperand(),
1146                                 ConditionOperand.EQUAL(),
1147                                 unoptimizedLoopEntry.makeJumpTarget(),
1148                                 BranchProfileOperand.unlikely());
1149                if (ref.isRegister()) {
1150                  refToGuardMap.put(ref.asRegister().getRegister(), guardResult);
1151                }
1152                branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1153                block.appendInstruction(branch);
1154                // Adjust block
1155                block.insertOut(unoptimizedLoopEntry);
1156                BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1157                BasicBlock temp = (BasicBlock) block.next;
1158                ir.cfg.breakCodeOrder(block, temp);
1159                ir.cfg.linkInCodeOrder(block, new_block);
1160                ir.cfg.linkInCodeOrder(new_block, temp);
1161                block.insertOut(new_block);
1162                block = new_block;
1163              }
1164            }
1165          }
1166        }
1167        return block;
1168      }
1169    
1170      /**
1171       * Generate bound check branch blocks
1172       *
1173       * @param boundCheckInstr the bound check instruction in question
1174       * @param minIndexValue the min value for an iterator a loop will generate
1175       * @param maxIndexValue the max value for an iterator a loop will generate
1176       * @param optimalRegMap a map from original register to the register used in the optimal loop
1177       * @param block the block to generate code into
1178       * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails
1179       * @return the new block to generate code into
1180       */
1181      private BasicBlock generateExplicitBoundCheck(Instruction boundCheckInstr, Operand minIndexValue,
1182                                                        Operand maxIndexValue,
1183                                                        HashMap<Register, Register> optimalRegMap,
1184                                                        BasicBlock block, BasicBlock unoptimizedLoopEntry) {
1185        // 1) Work out what tests are necessary. NB we don't optimise for
1186        // the case when exceptions will always be generated
1187        boolean lowerBoundTestRedundant;
1188        boolean upperBoundTestRedundant;
1189        {
1190          // as array lengths must be >= 0 the lower bound test is not
1191          // necessary if:
1192          // (minIndexValue >= 0) or ((arraylength A) + zeroOrPositiveConstant)
1193          lowerBoundTestRedundant =
1194              ((minIndexValue.isIntConstant() && (minIndexValue.asIntConstant().value >= 0)) ||
1195               ((getConstantAdjustedArrayLengthRef(minIndexValue) != null) &&
1196                (getConstantAdjustedArrayLengthDistance(minIndexValue) >= 0)));
1197          // as the upper bound must be <= arraylength the test is not
1198          // necessary if:
1199          // maxIndexValue = (arraylength A) - zeroOrPositiveConstant
1200          Operand maxIndexArrayLengthRef = getConstantAdjustedArrayLengthRef(maxIndexValue);
1201          upperBoundTestRedundant =
1202              ((maxIndexArrayLengthRef != null) &&
1203               maxIndexArrayLengthRef.similar(BoundsCheck.getRef(boundCheckInstr)) &&
1204               (getConstantAdjustedArrayLengthDistance(maxIndexValue) <= 0));
1205        }
1206    
1207        // 2) Create explicit bound check
1208    
1209        // register to hold result (NB it's a guard for the optimal loop)
1210        RegisterOperand guardResult = BoundsCheck.getGuardResult(boundCheckInstr).copyRO();
1211        guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1212    
1213        // the guard on the bound check (mapped from the optimal loop as
1214        // it should already have been generated or may already be out of
1215        // the loop)
1216        Operand origGuard = BoundsCheck.getGuard(boundCheckInstr);
1217        Operand guard = origGuard.copy();
1218        if (origGuard.isRegister() && optimalRegMap.containsKey(origGuard.asRegister().getRegister())) {
1219          guard.asRegister().setRegister(optimalRegMap.get(origGuard.asRegister().getRegister()));
1220        }
1221    
1222        if (lowerBoundTestRedundant && upperBoundTestRedundant) {
1223          // both tests redundant so just generate a guard move of the
1224          // bound check guard
1225          Instruction move = Move.create(GUARD_MOVE, guardResult, guard);
1226          move.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1227          block.appendInstruction(move);
1228        } else {
1229          // 2.1) Create array length
1230          RegisterOperand array_length = ir.regpool.makeTempInt();
1231          Instruction array_length_instr =
1232              GuardedUnary.create(ARRAYLENGTH,
1233                                  array_length,
1234                                  AnnotatedLSTNode.follow(BoundsCheck.getRef(boundCheckInstr)).copy(),
1235                                  guard);
1236          array_length_instr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1237          block.appendInstruction(array_length_instr);
1238    
1239          // 2.2) Create minimum index test unless test is redundant
1240          if (!lowerBoundTestRedundant) {
1241            RegisterOperand lowerBoundGuard = upperBoundTestRedundant ? guardResult : ir.regpool.makeTempValidation();
1242            // Generate bound check
1243            Instruction branch =
1244                IfCmp.create(INT_IFCMP,
1245                             lowerBoundGuard,
1246                             minIndexValue.copy(),
1247                             array_length.copyRO(),
1248                             ConditionOperand.LESS(),
1249                             unoptimizedLoopEntry.makeJumpTarget(),
1250                             BranchProfileOperand.unlikely());
1251            branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1252            block.appendInstruction(branch);
1253            // Adjust block
1254            block.insertOut(unoptimizedLoopEntry);
1255            BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1256            BasicBlock temp = (BasicBlock) block.next;
1257            ir.cfg.breakCodeOrder(block, temp);
1258            ir.cfg.linkInCodeOrder(block, new_block);
1259            ir.cfg.linkInCodeOrder(new_block, temp);
1260            block.insertOut(new_block);
1261            block = new_block;
1262          }
1263          // 2.3) Create maximum index test
1264          if (!upperBoundTestRedundant) {
1265            Instruction branch =
1266                IfCmp.create(INT_IFCMP,
1267                             guardResult,
1268                             maxIndexValue.copy(),
1269                             array_length.copyRO(),
1270                             ConditionOperand.GREATER(),
1271                             unoptimizedLoopEntry.makeJumpTarget(),
1272                             BranchProfileOperand.unlikely());
1273            branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1274            block.appendInstruction(branch);
1275            // Adjust block
1276            block.insertOut(unoptimizedLoopEntry);
1277            BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1278            BasicBlock temp = (BasicBlock) block.next;
1279            ir.cfg.breakCodeOrder(block, temp);
1280            ir.cfg.linkInCodeOrder(block, new_block);
1281            ir.cfg.linkInCodeOrder(new_block, temp);
1282            block.insertOut(new_block);
1283            block = new_block;
1284          }
1285        }
1286        return block;
1287      }
1288    
1289      /**
1290       * Can we eliminate a null check as it has lready been performed?
1291       * NB SSA guarantees that if a value is null it must always be null
1292       *
1293       * @param instr null check instruction
1294       */
1295      private RegisterOperand nullCheckPerformedInLoopPredecessors(BasicBlock header, Instruction instr) {
1296        if (VM.VerifyAssertions) VM._assert(NullCheck.conforms(instr));
1297        BasicBlock block = header;
1298        do {
1299          block = ir.HIRInfo.dominatorTree.getParent(block);
1300          Instruction lastInst = block.lastInstruction();
1301          for (Instruction itrInst = block.firstInstruction(); itrInst != lastInst; itrInst =
1302              itrInst.nextInstructionInCodeOrder()) {
1303            if (NullCheck.conforms(itrInst) && NullCheck.getRef(itrInst).similar(NullCheck.getRef(instr))) {
1304              return NullCheck.getGuardResult(itrInst);
1305            }
1306          }
1307        } while (block != ir.cfg.entry());
1308        return null;
1309      }
1310    
1311      /**
1312       * Get the array length reference ignoring instructions that adjust
1313       * its result by a fixed amount
1314       *
1315       * @param op operand to chase arraylength opcode to
1316       * constant value from an array length
1317       */
1318      private Operand getConstantAdjustedArrayLengthRef(Operand op) {
1319        Operand result = null;
1320        if (op.isRegister()) {
1321          Instruction opInstr = AnnotatedLSTNode.definingInstruction(op);
1322          if (opInstr.operator.opcode == ARRAYLENGTH_opcode) {
1323            result = GuardedUnary.getVal(opInstr);
1324          } else if ((opInstr.operator.opcode == INT_ADD_opcode) || (opInstr.operator.opcode == INT_SUB_opcode)) {
1325            Operand val1 = Binary.getVal1(opInstr);
1326            Operand val2 = Binary.getVal2(opInstr);
1327            if (val1.isConstant()) {
1328              result = getConstantAdjustedArrayLengthRef(val2);
1329            } else if (val2.isConstant()) {
1330              result = getConstantAdjustedArrayLengthRef(val1);
1331            }
1332          }
1333        }
1334        return result;
1335      }
1336    
1337      /**
1338       * Get the distance from an array length by addding up instructions
1339       * that adjust the array length result by a constant amount
1340       *
1341       * @param op operand to chase arraylength opcode to
1342       */
1343      private int getConstantAdjustedArrayLengthDistance(Operand op) {
1344        Instruction opInstr = AnnotatedLSTNode.definingInstruction(op);
1345        if (opInstr.operator.opcode == ARRAYLENGTH_opcode) {
1346          return 0;
1347        } else if (opInstr.operator.opcode == INT_ADD_opcode) {
1348          Operand val1 = Binary.getVal1(opInstr);
1349          Operand val2 = Binary.getVal2(opInstr);
1350          if (val1.isConstant()) {
1351            return val1.asIntConstant().value + getConstantAdjustedArrayLengthDistance(val2);
1352          } else {
1353            VM._assert(val2.isConstant());
1354            return getConstantAdjustedArrayLengthDistance(val1) + val2.asIntConstant().value;
1355          }
1356        } else if (opInstr.operator.opcode == INT_SUB_opcode) {
1357          Operand val1 = Binary.getVal1(opInstr);
1358          Operand val2 = Binary.getVal2(opInstr);
1359          if (val1.isConstant()) {
1360            return val1.asIntConstant().value - getConstantAdjustedArrayLengthDistance(val2);
1361          } else {
1362            VM._assert(val2.isConstant());
1363            return getConstantAdjustedArrayLengthDistance(val1) - val2.asIntConstant().value;
1364          }
1365        } else {
1366          throw new Error("Unexpected opcode when computing distance " + op);
1367        }
1368      }
1369    
1370      /**
1371       * Remove loop and replace register definitions in the original loop
1372       * with phi instructions
1373       */
1374      private void modifyOriginalLoop(AnnotatedLSTNode loop, ArrayList<Instruction> phiInstructions,
1375                                      ArrayList<Instruction> definingInstrInOriginalLoop,
1376                                      HashMap<Register, Register> subOptimalRegMap,
1377                                      HashMap<Register, Register> optimalRegMap) {
1378        // Remove instructions from loop header and exit, remove other
1379        // loop body blocks
1380        BasicBlockEnumeration blocks = loop.getBasicBlocks();
1381        while (blocks.hasMoreElements()) {
1382          BasicBlock block = blocks.next();
1383          if ((block == loop.header) || (block == loop.exit)) {
1384            IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
1385            while (instructions.hasMoreElements()) {
1386              Instruction instruction = instructions.next();
1387              if (!BBend.conforms(instruction) && !Label.conforms(instruction)) {
1388                instruction.remove();
1389              }
1390            }
1391          } else {
1392            ir.cfg.removeFromCFGAndCodeOrder(block);
1393          }
1394        }
1395    
1396        // Place phi instructions in loop header
1397        for (int i = 0; i < phiInstructions.size(); i++) {
1398          Instruction origInstr = definingInstrInOriginalLoop.get(i);
1399          // Did the original instructions value escape the loop?
1400          if (origInstr != null) {
1401            // Was this a phi of a phi?
1402            if (Phi.conforms(origInstr)) {
1403              Instruction phi = phiInstructions.get(i);
1404              boolean phiHasUnoptimizedArg = Phi.getNumberOfValues(phi) == 2;
1405              // Phi of a phi - so make sure that we get the value to escape the loop, not the value at the loop header
1406              boolean fixed = false;
1407              for (int index = 0; index < Phi.getNumberOfPreds(origInstr); index++) {
1408                BasicBlockOperand predOp = Phi.getPred(origInstr, index);
1409                // Only worry about values who are on the backward branch
1410                if (predOp.block == loop.exit) {
1411                  if (fixed) { // We've tried to do 2 replaces => something wrong
1412                    SSA.printInstructions(ir);
1413                    OptimizingCompilerException.UNREACHABLE("LoopVersioning",
1414                                                                "Phi node in loop header with multiple in loop predecessors");
1415                  }
1416                  Operand rval = Phi.getValue(origInstr, index);
1417                  if (rval.isRegister()) {
1418                    // Sort out registers
1419                    Register origRegPhiRval = rval.asRegister().getRegister();
1420                    Register subOptRegPhiRval;
1421                    Register optRegPhiRval;
1422                    if (!subOptimalRegMap.containsKey(origRegPhiRval)) {
1423                      // Register comes from loop exit but it wasn't defined in the loop
1424                      subOptRegPhiRval = origRegPhiRval;
1425                      optRegPhiRval = origRegPhiRval;
1426                    } else {
1427                      subOptRegPhiRval = subOptimalRegMap.get(origRegPhiRval);
1428                      optRegPhiRval = optimalRegMap.get(origRegPhiRval);
1429                    }
1430                    if (phiHasUnoptimizedArg) {
1431                      Phi.getValue(phi, UNOPTIMIZED_LOOP_OPERAND).asRegister().setRegister(subOptRegPhiRval);
1432                    }
1433                    Phi.getValue(phi, OPTIMIZED_LOOP_OPERAND).asRegister().setRegister(optRegPhiRval);
1434                  } else if (rval.isConstant()) {
1435                    // Sort out constants
1436                    if (phiHasUnoptimizedArg) {
1437                      Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, rval.copy());
1438                    }
1439                    Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, rval.copy());
1440                  } else if (rval instanceof HeapOperand) {
1441                    // Sort out heap variables
1442                    @SuppressWarnings("unchecked") // Cast to generic type
1443                        HeapVariable<Object> origPhiRval = ((HeapOperand) rval).value;
1444                    HeapVariable<Object> subOptPhiRval;
1445                    HeapVariable<Object> optPhiRval;
1446                    if (true /*subOptimalRegMap.containsKey(origPhiRval) == false*/) {
1447                      // currently we only expect to optimise scalar SSA
1448                      // form
1449                      subOptPhiRval = origPhiRval;
1450                      optPhiRval = origPhiRval;
1451                    } else {
1452                      /*
1453                      subOptPhiRval   = (HeapVariable)subOptimalRegMap.get(origPhiRval);
1454                      optPhiRval      = (HeapVariable)optimalRegMap.get(origPhiRval);
1455                      */
1456                    }
1457                    if (phiHasUnoptimizedArg) {
1458                      Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(subOptPhiRval));
1459                    }
1460                    Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(optPhiRval));
1461                  } else {
1462                    OptimizingCompilerException.UNREACHABLE("LoopVersioning",
1463                                                                "Unknown operand type",
1464                                                                rval.toString());
1465                  }
1466                  fixed = true;
1467                }
1468              }
1469            }
1470            // Add back to loop
1471            loop.header.appendInstruction(phiInstructions.get(i));
1472          }
1473        }
1474        // Remove original loop and branch to loop successor
1475        Instruction tempInstr;
1476        if (loop.header != loop.exit) {
1477          tempInstr = Goto.create(GOTO, loop.exit.makeJumpTarget());
1478          tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1479          loop.header.appendInstruction(tempInstr);
1480          loop.header.deleteNormalOut();
1481          loop.header.insertOut(loop.exit);
1482    
1483        }
1484        tempInstr = Goto.create(GOTO, loop.successor.makeJumpTarget());
1485        tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1486        loop.exit.appendInstruction(tempInstr);
1487        loop.exit.deleteNormalOut();
1488        loop.exit.insertOut(loop.successor);
1489      }
1490    
1491      /**
1492       * Remove unreachable unoptimized loop
1493       */
1494      private void removeUnoptimizedLoop(AnnotatedLSTNode loop,
1495                                         HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap) {
1496        BasicBlockEnumeration blocks = loop.getBasicBlocks();
1497        report("removing unoptimized loop");
1498        BasicBlock block = unoptimizedLoopMap.get(loop.predecessor);
1499        report("removing block " + block);
1500        ir.cfg.removeFromCFGAndCodeOrder(block);
1501        while (blocks.hasMoreElements()) {
1502          block = unoptimizedLoopMap.get(blocks.next());
1503          if (!loop.contains(block)) {
1504            report("removing block " + block);
1505            ir.cfg.removeFromCFGAndCodeOrder(block);
1506          } else {
1507            report("not removing block that's in the original loop" + block);
1508          }
1509        }
1510      }
1511    
1512      /**
1513       * Put the optimized loop's iterator register into the hash set
1514       *
1515       * @param reg register
1516       */
1517      private void setOptimizedLoop(Register reg) {
1518        loopRegisterSet.add(reg);
1519      }
1520    
1521      /**
1522       * Check whether the loop that contain such iterator register had
1523       * been optimized
1524       *
1525       * @param reg register
1526       * @return the test result
1527       */
1528      private boolean isOptimizedLoop(Register reg) {
1529        return loopRegisterSet.contains(reg);
1530      }
1531    
1532      /**
1533       * Rename the iterators for optimized loops so we can tell they are still optimized
1534       */
1535      private void renameOptimizedLoops(HashMap<Register, Register> subOptimalRegMap,
1536                                        HashMap<Register, Register> optimalRegMap) {
1537        Iterator<Register> itr = loopRegisterSet.iterator();
1538        while (itr.hasNext()) {
1539          Register reg = itr.next();
1540          if (subOptimalRegMap.containsKey(reg)) {
1541            loopRegisterSet.remove(reg);
1542            loopRegisterSet.add(subOptimalRegMap.get(reg));
1543            loopRegisterSet.add(optimalRegMap.get(reg));
1544            itr = loopRegisterSet.iterator();
1545          }
1546        }
1547      }
1548    }