001/*
002 *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 *  This file is licensed to You under the Eclipse Public License (EPL);
005 *  You may not use this file except in compliance with the License. You
006 *  may obtain a copy of the License at
007 *
008 *      http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 *  See the COPYRIGHT.txt file distributed with this work for information
011 *  regarding copyright ownership.
012 */
013package org.jikesrvm.compilers.opt.ssa;
014
015import static org.jikesrvm.compilers.opt.driver.OptConstants.SYNTH_LOOP_VERSIONING_BCI;
016import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH;
017import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode;
018import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
019import static org.jikesrvm.compilers.opt.ir.Operators.GUARD_MOVE;
020import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD;
021import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode;
022import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP;
023import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB;
024import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB_opcode;
025import static org.jikesrvm.compilers.opt.ir.Operators.PHI;
026import static org.jikesrvm.compilers.opt.ir.Operators.REF_IFCMP;
027
028import java.lang.reflect.Constructor;
029import java.util.ArrayList;
030import java.util.Enumeration;
031import java.util.HashMap;
032import java.util.HashSet;
033import java.util.Iterator;
034
035import org.jikesrvm.VM;
036import org.jikesrvm.classloader.TypeReference;
037import org.jikesrvm.compilers.opt.DefUse;
038import org.jikesrvm.compilers.opt.OptOptions;
039import org.jikesrvm.compilers.opt.OptimizingCompilerException;
040import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTGraph;
041import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTNode;
042import org.jikesrvm.compilers.opt.controlflow.DominatorTree;
043import org.jikesrvm.compilers.opt.controlflow.DominatorsPhase;
044import org.jikesrvm.compilers.opt.controlflow.LSTGraph;
045import org.jikesrvm.compilers.opt.controlflow.LTDominators;
046import org.jikesrvm.compilers.opt.driver.CompilerPhase;
047import org.jikesrvm.compilers.opt.ir.BBend;
048import org.jikesrvm.compilers.opt.ir.BasicBlock;
049import org.jikesrvm.compilers.opt.ir.Binary;
050import org.jikesrvm.compilers.opt.ir.BoundsCheck;
051import org.jikesrvm.compilers.opt.ir.Goto;
052import org.jikesrvm.compilers.opt.ir.GuardedUnary;
053import org.jikesrvm.compilers.opt.ir.IR;
054import org.jikesrvm.compilers.opt.ir.IREnumeration;
055import org.jikesrvm.compilers.opt.ir.IfCmp;
056import org.jikesrvm.compilers.opt.ir.Instruction;
057import org.jikesrvm.compilers.opt.ir.Label;
058import org.jikesrvm.compilers.opt.ir.Move;
059import org.jikesrvm.compilers.opt.ir.NullCheck;
060import org.jikesrvm.compilers.opt.ir.Phi;
061import org.jikesrvm.compilers.opt.ir.Register;
062import org.jikesrvm.compilers.opt.ir.operand.BasicBlockOperand;
063import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand;
064import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand;
065import org.jikesrvm.compilers.opt.ir.operand.HeapOperand;
066import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand;
067import org.jikesrvm.compilers.opt.ir.operand.NullConstantOperand;
068import org.jikesrvm.compilers.opt.ir.operand.Operand;
069import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
070import org.jikesrvm.compilers.opt.util.GraphNode;
071
072/**
073 * This optimisation works from the outer most loop inward, optimising
074 * loops that conform to being regular {@link
075 * AnnotatedLSTNode}s. The optimisations performs the following
076 * operations:
077 * <ul>
078 * <li>
079 * 1) Determine the bound and null checks to be eliminated. These are
080 * the ones that operate on the loop iterator. If no bound checks can
081 * be eliminated, stop optimising this loop.
082 * <li>
083 * 2) Determine the registers defined in the loop.
084 * <li>
085 * 3) Generate phi nodes that define the original register defined by
086 * the loop and use two newly created registers.
087 * <li>
088 * 4) Create a version of the original loop that uses the first of the
089 * newly created registers instead of the original registers.
090 * <li>
091 * 5) Create a second version, this time with the result of the
092 * eliminated checks set to true.
093 * <li>
094 * 6) Work out what the maximum value for all the bounds checks are
095 * and create branches to optimal or suboptimal loops
096 * <li>
097 * 7) Fix up the phi node predecessors
098 * <li>
099 * 8) Remove the unoptimized loop if its redundant
100 * <li>
101 * 9) Replace register definitions in the original loop with phi
102 * instructions
103 * <li>
104 * 10) Compact node numbering so that CFG number of nodes reflects
105 * that some basic blocks may have been deleted
106 * </ul>
107 * <p>
108 * Example:
109 * <pre>
110 *   for (int t1=0; t1 &lt; 100; t1++) {
111 *      g1 = null_check   l0
112 *      g2 = bounds_check l0, t1
113 *      g3 = guard_combine g1,g2
114 *      t2 = aload l0, t1, g3
115 *      g4 = null_check   l1
116 *      g5 = bounds_check l1, t1
117 *      g6 = guard_combine g4,g5
118 *           astore t2, l1, t1, g6
119 *   }
120 * </pre>
121 *
122 * becomes:
123 *
124 * <pre>
125 *   goto explicit_test_block
126 * successor_to_loops:
127 *   g1 = phi g1_1, g1_2
128 *   g2 = phi g2_1, g2_2
129 *   g3 = phi g3_1, g3_2
130 *   t2 = phi t2_1, t2_2
131 *   g4 = phi g4_1, g4_2
132 *   g5 = phi g5_1, g5_2
133 *   g6 = phi g6_1, g6_2
134 *   goto after_loop
135 * explicit_test_block:
136 *   if l0 == null (unlikely) goto sub_optimal_loop
137 *   if 100 &gt;= l0.length (unlikely) goto sub_optimal_loop
138 *   if l1 == null (unlikely) goto sub_optimal_loop
139 *   if 100 &gt;= l1.length (unlikely) goto sub_optimal_loop
140 *   goto optimal_loop
141 * sub_optimal_loop:
142 *   for (int t1_1=0; t1_1 &lt; 100; t1_1++) {
143 *      g1_1 = null_check   l0
144 *      g2_1 = bounds_check l0, t1_1
145 *      g3_1 = guard_combine g1_1,g2_1
146 *      t2_1 = aload l0, t1_1, g3_1
147 *      g4_1 = null_check   l1
148 *      g5_1 = bounds_check l1, t1_1
149 *      g6_1 = guard_combine g4_1,g5_1
150 *             astore t2_1, l1, t1_1, g6_1
151 *   }
152 *   goto successor_to_loops
153 * optimal_loop:
154 *   for (int t1_2=0; t1_2 &lt; 100; t1_2++) {
155 *      g1_2 = true_guard
156 *      g2_2 = true_guard
157 *      g3_2 = guard_combine g1_2,g2_2
158 *      t2_2 = aload l0, t1_2, g3_2
159 *      g4_2 = null_check   l1
160 *      g5_2 = bounds_check l1, t1_2
161 *      g6_2 = guard_combine g4_2,g5_2
162 *             astore t2_2, l1, t1_2, g6_2
163 *   }
164 *   goto successor_to_loops
165 * after_loop:
166 * </pre>
167 *
168 * The optimisation works on the Heap SSA form. A more accurate
169 * example of the transformation would be:
170 *
171 * <pre>
172 *   heap1 = ...; // previous heap state
173 *   t1_1 = 0;
174 *   if t1_1 &ge; 100 goto label2
175 *   label1:
176 *      t1_2 = phi t1_1, t1_3
177 *      heap2 = phi heap1, heap3
178 *      g1 = null_check   l0
179 *      g2 = bounds_check l0, t1_2
180 *      g3 = guard_combine g1,g2
181 *      t2 = aload l0, t1_2, g3
182 *      g4 = null_check   l1
183 *      g5 = bounds_check l1, t1_2
184 *      g6 = guard_combine g4,g5
185 *      heap3 = astore t2, l1, t1_2, g6
186 *      t1_3 = t1_2 + 1
187 *      if t1_3 &lt; 100 label1 *   label2:
188 * </pre>
189 *
190 * becomes:
191 *
192 * <pre>
193 *   heap1 = ...; // previous heap state
194 *   t1_1 = 0;
195 *   if t1_1 &ge; 100 goto label2
196 *   goto explicit_test_block
197 * successor_to_loops:
198 *   t1_2 = phi t1_2_1, t1_2_2
199 *   heap2 = phi heap2_1, heap2_2
200 *   g1 = phi g1_1, g1_2
201 *   g2 = phi g2_1, g2_2
202 *   g3 = phi g3_1, g3_2
203 *   t2 = phi t2_1, t2_2
204 *   g4 = phi g4_1, g4_2
205 *   g5 = phi g5_1, g5_2
206 *   g6 = phi g6_1, g6_2
207 *   heap3 = phi heap3_1, heap3_2
208 *   t1_3 = phi t1_3_1, t1_3_2
209 *   goto after_loop
210 * explicit_test_block:
211 *   g1_2 = if l0 == null (unlikely) goto sub_optimal_loop
212 *   g2_2 = if 100 &gt;= l0.length (unlikely) goto sub_optimal_loop
213 *   g4_2 = if l1 == null (unlikely) goto sub_optimal_loop
214 *   g5_2 = if 100 &gt;= l1.length (unlikely) goto sub_optimal_loop
215 *   goto optimal_loop
216 * sub_optimal_loop:
217 *   label1_1:
218 *      t1_2_1 = phi t1_1, t1_3_1
219 *      heap2_1 = phi heap1, heap3_1
220 *      g1_1 = null_check   l0
221 *      g2_1 = bounds_check l0, t1_2_1
222 *      g3_1 = guard_combine g1_1,g2_1
223 *      t2_1 = aload l0, t1_2_1, g3_1
224 *      g4_1 = null_check   l1
225 *      g5_1 = bounds_check l1, t1_2_1
226 *      g6_1 = guard_combine g4_1,g5_1
227 *      heap3_1 = astore t2_1, l1, t1_2_1, g6_1
228 *      t1_3_1 = t1_2_1 + 1
229 *      if t1_3_1 &lt; 100 label1_1
230 *   goto successor_to_loops
231 * optimal_loop:
232 *   label1_2:
233 *      t1_2_2 = phi t1_1, t1_3_2
234 *      heap2_2 = phi heap1, heap3_2
235 *      g3_2 = guard_combine g1_2,g2_2
236 *      t2_2 = aload l0, t1_2_2, g3_2
237 *      g6_2 = guard_combine g4_2,g5_2
238 *      heap3_2 = astore t2_2, l1, t1_2_2, g6_2
239 *      t1_3_2 = t1_2_2 + 1
240 *      if t1_3_2 &lt; 100 label1_2
241 *   goto successor_to_loops
242 * after_loop:
243 * label2:
244 * </pre>
245 */
246public final class LoopVersioning extends CompilerPhase {
247  // -oO Debug variables Oo-
248  /**
249   * Flag to optionally print verbose debugging messages
250   */
251  private static final boolean DEBUG = false;
252  /**
253   * Flag to verify computed IR
254   */
255  private static final boolean VERIFY = false;
256
257  // -oO Debug routines Oo-
258  /**
259   * Human readable report of what goes on
260   *
261   * @param s String to print
262   **/
263  private static void report(String s) {
264    if (DEBUG) {
265      VM.sysWriteln(s);
266    }
267  }
268
269  /**
270   * Return a string name for this phase.
271   * @return "Loop Versioning"
272   */
273  @Override
274  public String getName() {
275    return "Loop Versioning";
276  }
277
278  // -oO Variables used throughout the optimisation phase Oo-
279  /**
280   * The phi instruction operand holding the optimized loop variable
281   */
282  private static final int OPTIMIZED_LOOP_OPERAND = 0;
283  /**
284   * The phi instruction operand holding the unoptimized loop variable
285   */
286  private static final int UNOPTIMIZED_LOOP_OPERAND = 1;
287
288  /**
289   * IR for optimisation
290   */
291  private IR ir;
292
293  /**
294   * Set used to store the loop related register
295   */
296  private HashSet<Register> loopRegisterSet;
297
298  /**
299   * SSA options
300   */
301  private final SSAOptions desiredSSAOptions;
302  /**
303   * Compiler phases called from this one
304   */
305  private CompilerPhase enterSSA, leaveSSA;
306  private final CompilerPhase domPhase;
307  /**
308   * Run inside SSA sub-phase
309   */
310  private static final boolean inSSAphase = true;
311
312  // -oO Interface to the rest of the compiler Oo-
313
314  /**
315   * Constructor for this compiler phase
316   */
317  private static final Constructor<CompilerPhase> constructor =
318      getCompilerPhaseConstructor(LoopVersioning.class);
319
320  /**
321   * Get a constructor object for this compiler phase
322   * @return compiler phase constructor
323   */
324  @Override
325  public Constructor<CompilerPhase> getClassConstructor() {
326    return constructor;
327  }
328
329  /**
330   * Constructor
331   */
332  public LoopVersioning() {
333    desiredSSAOptions = new SSAOptions();
334    desiredSSAOptions.setScalarsOnly(true);
335    if (!inSSAphase) {
336      enterSSA = new EnterSSA();
337      leaveSSA = new LeaveSSA();
338    }
339    domPhase = new DominatorsPhase(false);
340  }
341
342  /**
343   * Should loop versioning be performed?
344   */
345  @Override
346  public boolean shouldPerform(OptOptions options) {
347    return options.SSA_LOOP_VERSIONING;
348  }
349
350  /**
351   * @param _ir the IR to process
352   */
353  @Override
354  public void perform(IR _ir) {
355    ir = _ir;
356
357    // Create SSA
358    ir.desiredSSAOptions = desiredSSAOptions;
359    if (!inSSAphase) {
360      enterSSA.perform(ir);
361    }
362
363    if (DEBUG) {
364      SSA.printInstructions(ir);
365    }
366
367    // Perform loop annotation
368    if (!ir.hasReachableExceptionHandlers()) {
369      // Build LST tree and dominator info
370      domPhase.perform(ir);
371      DefUse.computeDU(ir);
372      // Build annotated version
373      ir.HIRInfo.loopStructureTree = new AnnotatedLSTGraph(ir, ir.HIRInfo.loopStructureTree);
374    }
375    if (VERIFY) {
376      ir.verify(getName(), true);
377    }
378
379    // Check loop annotation has been performed
380    if (!(ir.HIRInfo.loopStructureTree instanceof AnnotatedLSTGraph)) {
381      report("Optimisation of " + ir.getMethod() + " failed as LST wasn't annotated\n");
382    } else {
383      loopRegisterSet = new HashSet<Register>();
384
385      if (DEBUG) {
386        VM.sysWriteln(ir.getMethod().toString());
387        VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString());
388        SSA.printInstructions(ir);
389      }
390
391      while (findLoopToOptimise((AnnotatedLSTNode) ir.HIRInfo.loopStructureTree.getRoot())) {
392        if (DEBUG) {
393          VM.sysWriteln("Successful optimisation of " + ir.getMethod());
394          SSA.printInstructions(ir);
395          VM.sysWriteln(ir.cfg.toString());
396        }
397        // Get IR into shape for next pass
398        DefUse.computeDU(ir);
399        LTDominators.perform(ir, true, true);
400        ir.HIRInfo.dominatorTree = new DominatorTree(ir, true);
401        LSTGraph.perform(ir);
402        AnnotatedLSTGraph.perform(ir);
403
404        if (VERIFY) {
405          ir.verify(getName(), true);
406        }
407
408        if (DEBUG) {
409          VM.sysWriteln("after an optimization pass");
410          VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString());
411          SSA.printInstructions(ir);
412          VM.sysWriteln("Finish optimize: " + ir.getMethod().toString());
413        }
414      }
415      // No longer in use
416      loopRegisterSet = null;
417    }
418
419    if (!inSSAphase) {
420      // Leave SSA
421      leaveSSA.perform(ir);
422    }
423
424    if (VERIFY) {
425      ir.verify(getName(), true);
426    }
427  }
428
429  // -oO Optimisation routines Oo-
430
431  /**
432   * Find an outermost loop to optimise and optimise it. Focus on
433   * annotated regular loops, LICM should handle possible
434   * optimisation for the non-regular loops
435   *
436   * @param loop  Loop to search
437   * @return was optimisation performed
438   */
439  private boolean findLoopToOptimise(AnnotatedLSTNode loop) {
440    // Has this loop already been optimised?
441    Operand carriedLoopIterator = loop.getCarriedLoopIterator();
442    if ((carriedLoopIterator instanceof RegisterOperand) &&
443        (isOptimizedLoop(carriedLoopIterator.asRegister().getRegister()))) {
444      return false;
445    }
446
447    // Process inner loops first
448    Enumeration<GraphNode> innerLoops = loop.outNodes();
449    // Iterate over loops
450    while (innerLoops.hasMoreElements()) {
451      AnnotatedLSTNode nestedLoop = (AnnotatedLSTNode) innerLoops.nextElement();
452      // Try to optimise inner loops first
453      if (findLoopToOptimise(nestedLoop)) {
454        // Exit early if inner loop optimisation succeeded
455        return true;
456      }
457    }
458    // Don't try to optimise irregular loops
459    if (loop.isNonRegularLoop()) {
460      return false;
461    }
462    if (DEBUG) {
463      report("LoopFissionOfArrayGuards: found loop in " + ir.getMethod());
464      VM.sysWriteln("dominator tree:");
465      VM.sysWriteln(ir.HIRInfo.dominatorTree.toString());
466    }
467
468    // 1) Determine the bound and null checks to be eliminated. The
469    // bound checks are the ones that operate on the loop iterator. If
470    // no checks can be eliminated, stop optimising this loop.
471    ArrayList<Instruction> checksToEliminate = new ArrayList<Instruction>();
472    getListOfChecksToEliminate(loop, checksToEliminate);
473    if (checksToEliminate.isEmpty()) {
474      return false;
475    } else {
476      // We found instructions to eliminate
477      if (DEBUG) {
478        VM.sysWriteln("Loop being optimised:");
479        VM.sysWriteln(loop.toString());
480        VM.sysWriteln("Checks to eliminate:");
481        for (Instruction instruction : checksToEliminate) {
482          VM.sysWriteln(instruction.toString());
483        }
484      }
485      // 2) Determine the registers defined in the loop.
486      ArrayList<Register> registersDefinedInOriginalLoop = new ArrayList<Register>();
487      ArrayList<TypeReference> typesOfRegistersDefinedInOriginalLoop = new ArrayList<TypeReference>();
488      ArrayList<Instruction> definingInstructionsInOriginalLoop = new ArrayList<Instruction>();
489      getRegistersDefinedInLoop(loop,
490                                registersDefinedInOriginalLoop,
491                                typesOfRegistersDefinedInOriginalLoop,
492                                definingInstructionsInOriginalLoop);
493      if (DEBUG) {
494        VM.sysWrite("Registers in original loop:\n{");
495        for (int i = 0; i < registersDefinedInOriginalLoop.size(); i++) {
496          VM.sysWrite(registersDefinedInOriginalLoop.get(i).toString());
497          if (definingInstructionsInOriginalLoop.get(i) != null) {
498            VM.sysWrite("(escapes),");
499          } else {
500            VM.sysWrite(",");
501          }
502        }
503        VM.sysWriteln("}");
504      }
505      // 3) Generate phi nodes that define the original register
506      // defined by the loop and use two newly created registers.
507      ArrayList<Instruction> phiInstructions = new ArrayList<Instruction>();
508      HashMap<Register, Register> subOptimalRegMap = new HashMap<Register, Register>();
509      HashMap<Register, Register> optimalRegMap = new HashMap<Register, Register>();
510      generatePhiNodes(loop,
511                       registersDefinedInOriginalLoop,
512                       typesOfRegistersDefinedInOriginalLoop,
513                       phiInstructions,
514                       subOptimalRegMap,
515                       optimalRegMap);
516      if (DEBUG) {
517        VM.sysWriteln("subOptimalRegMap");
518        VM.sysWriteln(subOptimalRegMap.toString());
519        VM.sysWriteln("optimalRegMap");
520        VM.sysWriteln(optimalRegMap.toString());
521      }
522
523      // 4) Create a version of the original loop that uses the first of
524      // the newly created registers instead of the original
525      // registers.
526      HashMap<Register, BasicBlock> regToUnoptimizedBlockMap = new HashMap<Register, BasicBlock>();
527      HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap =
528          createCloneLoop(loop, subOptimalRegMap, regToUnoptimizedBlockMap);
529      if (DEBUG) {
530        VM.sysWriteln("subOptimalLoopMap");
531        VM.sysWriteln(unoptimizedLoopMap.toString());
532      }
533      // 5) Create a second version, this time with the result of the
534      // eliminated checks set to explicit test guards.
535      HashMap<Register, BasicBlock> regToOptimizedBlockMap = new HashMap<Register, BasicBlock>();
536      HashMap<BasicBlock, BasicBlock> optimizedLoopMap =
537          createOptimizedLoop(loop, optimalRegMap, checksToEliminate, regToOptimizedBlockMap);
538      if (DEBUG) {
539        VM.sysWriteln("optimalLoopMap");
540        VM.sysWriteln(optimizedLoopMap.toString());
541      }
542      // 6) Work out what the maximum value for all the bounds checks
543      // are and create branches to optimal or suboptimal loops - with
544      // the unoptimized loop possibly being unreachable
545      BasicBlock firstBranchBlock = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
546      BasicBlock temp = (BasicBlock) loop.header.prev;
547      ir.cfg.breakCodeOrder(temp, loop.header);
548      ir.cfg.linkInCodeOrder(temp, firstBranchBlock);
549      ir.cfg.linkInCodeOrder(firstBranchBlock, loop.header);
550      temp.redirectOuts(loop.header, firstBranchBlock, ir);
551      boolean isUnoptimizedLoopReachable =
552          createBranchBlocks(loop,
553                             firstBranchBlock,
554                             checksToEliminate,
555                             unoptimizedLoopMap.get(loop.predecessor),
556                             optimizedLoopMap.get(loop.predecessor),
557                             optimalRegMap);
558      // 7) Fix up the phi node predecessors
559      fixUpPhiPredecessors(phiInstructions,
560                           isUnoptimizedLoopReachable ? unoptimizedLoopMap.get(loop.exit) : null,
561                           optimizedLoopMap.get(loop.exit));
562      // 8) Remove the unoptimized loop if its redundant
563      if (!isUnoptimizedLoopReachable) {
564        removeUnoptimizedLoop(loop, unoptimizedLoopMap);
565      }
566
567      // 9) Replace register definitions in the original
568      // loop with phi instructions
569      modifyOriginalLoop(loop, phiInstructions, definingInstructionsInOriginalLoop, subOptimalRegMap, optimalRegMap);
570      // 10) Compact node numbering so that CFG number of nodes
571      // reflects that some basic blocks may have been deleted
572      ir.cfg.compactNodeNumbering();
573      return true;
574    }
575  }
576
577  /**
578   * Create a list of instructions to be eliminated
579   * @param loop the loop to examine
580   * @param instrToEliminate the instructions to remove
581   */
582  private void getListOfChecksToEliminate(AnnotatedLSTNode loop, ArrayList<Instruction> instrToEliminate) {
583    ArrayList<Instruction> nullChecks = new ArrayList<Instruction>();
584    ArrayList<Instruction> oddBoundChecks = new ArrayList<Instruction>();
585    Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
586    while (blocks.hasMoreElements()) {
587      BasicBlock block = blocks.nextElement();
588      IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
589      while (instructions.hasMoreElements()) {
590        Instruction instruction = instructions.nextElement();
591        if (NullCheck.conforms(instruction)) {
592          if (loop.isInvariant(NullCheck.getRef(instruction))) {
593            instrToEliminate.add(instruction);
594            nullChecks.add(instruction);
595          }
596        } else if (loop.isMonotonic() && BoundsCheck.conforms(instruction)) {
597          if (loop.isInvariant(BoundsCheck.getRef(instruction))) {
598            if (loop.isRelatedToIterator(BoundsCheck.getIndex(instruction))) {
599              if (loop.isInvariant(BoundsCheck.getGuard(instruction))) {
600                instrToEliminate.add(instruction);
601              } else {
602                // Null check isn't invariant but reference was, check
603                // null check will be eliminated at end of loop
604                oddBoundChecks.add(instruction);
605              }
606            }
607          }
608        }
609      }
610    }
611    // Check cases where the null check isn't loop invariant, however,
612    // it will be in the optimized loop as we'll have eliminated it
613    for (Instruction oddBoundCheck : oddBoundChecks) {
614      Operand guard = BoundsCheck.getGuard(oddBoundCheck);
615      for (Instruction nullCheck : nullChecks) {
616        if (guard.similar(NullCheck.getGuardResult(nullCheck))) {
617          instrToEliminate.add(oddBoundCheck);
618          break;
619        }
620      }
621    }
622  }
623
624  /**
625   * Get registers defined in the given loop. As we're in SSA form
626   * all register definitions must be unique.
627   * @param loop - the loop to examine
628   * @param registers - vector to which defined registers are added
629   * @param types list to which the register's types are added
630   * @param definingInstructions list to which the defining instructions for
631   *  the registers are added
632   */
633  private void getRegistersDefinedInLoop(AnnotatedLSTNode loop, ArrayList<Register> registers,
634                                         ArrayList<TypeReference> types,
635                                         ArrayList<Instruction> definingInstructions) {
636    Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
637    while (blocks.hasMoreElements()) {
638      BasicBlock block = blocks.nextElement();
639      // can value escape
640      final boolean escapes = (block == loop.exit) || (ir.HIRInfo.dominatorTree.dominates(block, loop.exit));
641      IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
642      while (instructions.hasMoreElements()) {
643        Instruction instruction = instructions.nextElement();
644        Enumeration<Operand> operands = instruction.getDefs();
645        while (operands.hasMoreElements()) {
646          Operand operand = operands.nextElement();
647          if (operand.isRegister()) {
648            registers.add(operand.asRegister().getRegister());
649            types.add(operand.asRegister().getType());
650            if (escapes) {
651              definingInstructions.add(instruction);
652            } else {
653              definingInstructions.add(null);
654            }
655          }
656        }
657      }
658    }
659  }
660
661  /**
662   * Generate into a new block phi nodes that define the original
663   * register defined by the loop and use two newly created
664   * registers.
665   * @param loop the loop to process
666   * @param registers - vector to which defined registers need to be
667   * created registers.x used in creating phi nodes
668   * @param types - vector of corresponding types of registers.
669   * @param phiInstructions - created phi instructions
670   * @param subOptimalRegMap - mapping of orignal destination to the
671   * newly created destination for the unoptimized loop
672   * @param optimalRegMap - mapping of orignal destination to the
673   * newly created destination for the optimized loop
674   */
675  private void generatePhiNodes(AnnotatedLSTNode loop, ArrayList<Register> registers,
676                                ArrayList<TypeReference> types, ArrayList<Instruction> phiInstructions,
677                                HashMap<Register, Register> subOptimalRegMap,
678                                HashMap<Register, Register> optimalRegMap) {
679    // Get the carried loop iterator's register
680    Register carriedLoopIteratorRegister = ((RegisterOperand) loop.getCarriedLoopIterator()).getRegister();
681    for (int i = 0; i < registers.size(); i++) {
682      Register register = registers.get(i);
683      TypeReference type = types.get(i);
684      Instruction phi = Phi.create(PHI, new RegisterOperand(register, type), 2);
685      phi.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
686
687      // new operand for optimized loop
688      Operand op0 = ir.regpool.makeTemp(type);
689      Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, op0);
690      optimalRegMap.put(register, op0.asRegister().getRegister());
691
692      // new operand for unoptimized loop
693      Operand op1 = ir.regpool.makeTemp(type);
694      Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, op1);
695      subOptimalRegMap.put(register, op1.asRegister().getRegister());
696
697      // Add the new created carried loop iterator registers to
698      // internal set to mark the optimized loops
699      if (register == carriedLoopIteratorRegister) {
700        setOptimizedLoop(op0.asRegister().getRegister());
701        setOptimizedLoop(op1.asRegister().getRegister());
702      }
703
704      phiInstructions.add(phi);
705    }
706    // rename any optimized inner loops registers
707    renameOptimizedLoops(subOptimalRegMap, optimalRegMap);
708  }
709
710  /**
711   * Create a clone of the loop replacing definitions in the cloned
712   * loop with those found in the register map
713   * @param loop - loop to clone
714   * @param regMap - mapping of original definition to new
715   * definition
716   * @param regToBlockMap mapping of registers to new, unoptimized blocks. This starts
717   *  empty and will be filled during execution of the method.
718   * @return a mapping from original BBs to created BBs
719   */
720  private HashMap<BasicBlock, BasicBlock> createCloneLoop(AnnotatedLSTNode loop,
721                                                                  HashMap<Register, Register> regMap,
722                                                                  HashMap<Register, BasicBlock> regToBlockMap) {
723    HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>();
724    // After the newly created loop goto the old loop header
725    originalToCloneBBMap.put(loop.successor, loop.header);
726    // Create an empty block to be the loop predecessor
727    BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
728    ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred);
729    originalToCloneBBMap.put(loop.predecessor, new_pred);
730    // Create copy blocks
731    Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
732    while (blocks.hasMoreElements()) {
733      BasicBlock block = blocks.nextElement();
734      block.killFallThrough(); // get rid of fall through edges to aid recomputeNormalOuts
735      // Create copy and register mapping
736      BasicBlock copy = block.copyWithoutLinks(ir);
737      originalToCloneBBMap.put(block, copy);
738      // Link into code order
739      ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy);
740      // Alter register definitions and uses in copy
741      IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
742      while (instructions.hasMoreElements()) {
743        Instruction instruction = instructions.nextElement();
744        Enumeration<Operand> operands = instruction.getDefs();
745        while (operands.hasMoreElements()) {
746          Operand operand = operands.nextElement();
747          if (operand.isRegister()) {
748            Register register = operand.asRegister().getRegister();
749            if (regMap.containsKey(register)) {
750              instruction.replaceRegister(register, regMap.get(register));
751              regToBlockMap.put(regMap.get(register), copy);
752            }
753          }
754        }
755        operands = instruction.getUses();
756        while (operands.hasMoreElements()) {
757          Operand operand = operands.nextElement();
758          if (operand instanceof RegisterOperand) {
759            Register register = operand.asRegister().getRegister();
760            if (regMap.containsKey(register)) {
761              instruction.replaceRegister(register, regMap.get(register));
762            }
763          }
764        }
765      }
766    }
767    // Fix up outs
768    // loop predecessor
769    new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir);
770    // loop blocks
771    blocks = loop.getBasicBlocks();
772    while (blocks.hasMoreElements()) {
773      BasicBlock block = blocks.nextElement();
774      BasicBlock copy = originalToCloneBBMap.get(block);
775      Enumeration<BasicBlock> outs = block.getOutNodes();
776      while (outs.hasMoreElements()) {
777        BasicBlock out = outs.nextElement();
778        if (originalToCloneBBMap.containsKey(out)) {
779          copy.redirectOuts(out, originalToCloneBBMap.get(out), ir);
780        }
781      }
782    }
783    // Fix up phis
784    blocks = loop.getBasicBlocks();
785    while (blocks.hasMoreElements()) {
786      BasicBlock block = blocks.nextElement();
787      BasicBlock copy = originalToCloneBBMap.get(block);
788      IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
789      while (instructions.hasMoreElements()) {
790        Instruction instruction = instructions.nextElement();
791        if (Phi.conforms(instruction)) {
792          for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) {
793            BasicBlock phi_predecessor = Phi.getPred(instruction, i).block;
794            if (originalToCloneBBMap.containsKey(phi_predecessor)) {
795              Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor)));
796            } else {
797              dumpIR(ir, "Error when optimising" + ir.getMethod());
798              throw new Error("There's > 1 route to this phi node " +
799                              instruction +
800                              " from outside the loop: " +
801                              phi_predecessor);
802            }
803          }
804        }
805      }
806    }
807    return originalToCloneBBMap;
808  }
809
810  /**
811   * Create a clone of the loop replacing definitions in the cloned
812   * loop with those found in the register map and eliminate
813   * unnecessary bound checks
814   * @param loop - loop to clone
815   * @param regMap - mapping of original definition to new
816   * definition
817   * @param instrToEliminate - instructions to eliminate
818   * @param regToBlockMap - mapping of a register to its defining BB
819   * @return a mapping from original BBs to created BBs
820   */
821  private HashMap<BasicBlock, BasicBlock> createOptimizedLoop(AnnotatedLSTNode loop,
822                                                                      HashMap<Register, Register> regMap,
823                                                                      ArrayList<Instruction> instrToEliminate,
824                                                                      HashMap<Register, BasicBlock> regToBlockMap) {
825    HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>();
826    // After the newly created loop goto the old loop header
827    originalToCloneBBMap.put(loop.successor, loop.header);
828    // Create an empty block to be the loop predecessor
829    BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
830    ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred);
831    originalToCloneBBMap.put(loop.predecessor, new_pred);
832
833    // Create copy blocks
834    Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
835    while (blocks.hasMoreElements()) {
836      BasicBlock block = blocks.nextElement();
837      // N.B. fall through will have been killed by unoptimized loop
838      // Create copy and register mapping
839      BasicBlock copy = block.copyWithoutLinks(ir);
840      originalToCloneBBMap.put(block, copy);
841      // Link into code order
842      ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy);
843
844      // Alter register definitions in copy
845      IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
846      loop_over_created_instructions:
847      while (instructions.hasMoreElements()) {
848        Instruction instruction = instructions.nextElement();
849        if (BoundsCheck.conforms(instruction)) {
850          for (Instruction anInstrToEliminate : instrToEliminate) {
851            if (instruction.similar(anInstrToEliminate)) {
852              instruction.remove();
853              continue loop_over_created_instructions;
854            }
855          }
856        } else if (NullCheck.conforms(instruction)) {
857          for (Instruction anInstrToEliminate : instrToEliminate) {
858            if (instruction.similar(anInstrToEliminate)) {
859              instruction.remove();
860              continue loop_over_created_instructions;
861            }
862          }
863        }
864        Enumeration<Operand> operands = instruction.getDefs();
865        while (operands.hasMoreElements()) {
866          Operand operand = operands.nextElement();
867          if (operand instanceof RegisterOperand) {
868            Register register = operand.asRegister().getRegister();
869            if (regMap.containsKey(register)) {
870              instruction.replaceRegister(register, regMap.get(register));
871              regToBlockMap.put(regMap.get(register), copy);
872            }
873          }
874        }
875        operands = instruction.getUses();
876        while (operands.hasMoreElements()) {
877          Operand operand = operands.nextElement();
878          if (operand.isRegister()) {
879            Register register = operand.asRegister().getRegister();
880            if (regMap.containsKey(register)) {
881              instruction.replaceRegister(register, regMap.get(register));
882            }
883          }
884        }
885      }
886    }
887    // Fix up outs
888    // loop predecessor
889    new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir);
890    blocks = loop.getBasicBlocks();
891    while (blocks.hasMoreElements()) {
892      BasicBlock block = blocks.nextElement();
893      BasicBlock copy = originalToCloneBBMap.get(block);
894      Enumeration<BasicBlock> outs = block.getOutNodes();
895      while (outs.hasMoreElements()) {
896        BasicBlock out = outs.nextElement();
897        if (originalToCloneBBMap.containsKey(out)) {
898          copy.redirectOuts(out, originalToCloneBBMap.get(out), ir);
899        }
900      }
901    }
902    // Fix up phis
903    blocks = loop.getBasicBlocks();
904    while (blocks.hasMoreElements()) {
905      BasicBlock block = blocks.nextElement();
906      BasicBlock copy = originalToCloneBBMap.get(block);
907      IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
908      while (instructions.hasMoreElements()) {
909        Instruction instruction = instructions.nextElement();
910        if (Phi.conforms(instruction)) {
911          for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) {
912            BasicBlock phi_predecessor = Phi.getPred(instruction, i).block;
913            if (originalToCloneBBMap.containsKey(phi_predecessor)) {
914              Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor)));
915            } else {
916              throw new Error("There's > 1 route to this phi node from outside the loop: " + phi_predecessor);
917            }
918          }
919        }
920      }
921    }
922    return originalToCloneBBMap;
923  }
924
925  /**
926   * When phi nodes were generated the basic blocks weren't known for
927   * the predecessors, fix this up now. (It may also not be possible
928   * to reach the unoptimized loop any more)
929   *
930   * @param phiInstructions a list of phi nodes
931   * @param unoptimizedLoopExit the exit block of the unoptimized loop.
932   *  This may be {@code null} if the unoptimized loop is no longer reachable.
933   * @param optimizedLoopExit the exit block of the optimized loop
934   */
935  private void fixUpPhiPredecessors(ArrayList<Instruction> phiInstructions, BasicBlock unoptimizedLoopExit,
936                                    BasicBlock optimizedLoopExit) {
937    if (unoptimizedLoopExit != null) {
938      for (Instruction instruction : phiInstructions) {
939        Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit));
940        Phi.setPred(instruction, UNOPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(unoptimizedLoopExit));
941      }
942    } else {
943      for (Instruction instruction : phiInstructions) {
944        Operand operand = Phi.getValue(instruction, OPTIMIZED_LOOP_OPERAND);
945        Phi.resizeNumberOfPreds(instruction, 1);
946        Phi.resizeNumberOfValues(instruction, 1);
947        Phi.setValue(instruction, OPTIMIZED_LOOP_OPERAND, operand);
948        Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit));
949      }
950    }
951  }
952
953  /*
954   * TODO better JavaDoc comment.
955   * <p>
956   * Create the block containing explict branches to either the
957   * optimized or unoptimized loops
958   * @param optimalRegMap - mapping used to map eliminated bound and
959   * null check guards to
960   */
961  private boolean createBranchBlocks(AnnotatedLSTNode loop, BasicBlock block,
962                                     ArrayList<Instruction> checksToEliminate, BasicBlock unoptimizedLoopEntry,
963                                     BasicBlock optimizedLoopEntry,
964                                     HashMap<Register, Register> optimalRegMap) {
965    BasicBlock blockOnEntry = block;
966    // 1) generate null check guards
967    block = generateNullCheckBranchBlocks(loop, checksToEliminate, optimalRegMap, block, unoptimizedLoopEntry);
968
969    // 2) generate bound check guards
970    if (loop.isMonotonic()) {
971      // create new operands for values beyond initial and terminal iterator values
972      Operand terminal;
973      Operand terminalLessStrideOnce;
974      Operand terminalPlusStrideOnce;
975
976      // NB. precomputing these makes life easier and the code easier to read,
977      //     it does create dead code though
978      if (loop.terminalIteratorValue.isIntConstant()) {
979        terminal = loop.terminalIteratorValue;
980        int terminalAsInt = terminal.asIntConstant().value;
981        int stride = loop.strideValue.asIntConstant().value;
982        terminalLessStrideOnce = new IntConstantOperand(terminalAsInt - stride);
983        terminalPlusStrideOnce = new IntConstantOperand(terminalAsInt + stride);
984      } else {
985        Instruction tempInstr;
986        terminal = loop.generateLoopInvariantOperand(block, loop.terminalIteratorValue);
987        terminalLessStrideOnce = ir.regpool.makeTempInt();
988        terminalPlusStrideOnce = ir.regpool.makeTempInt();
989        tempInstr =
990            Binary.create(INT_SUB, terminalLessStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy());
991        tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
992        block.appendInstruction(tempInstr);
993        DefUse.updateDUForNewInstruction(tempInstr);
994
995        tempInstr =
996            Binary.create(INT_ADD, terminalPlusStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy());
997        tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
998        block.appendInstruction(tempInstr);
999        DefUse.updateDUForNewInstruction(tempInstr);
1000      }
1001
1002      // Determine maximum and minimum index values for different loop types
1003      Operand phiMinIndexValue;
1004      Operand phiMaxIndexValue;
1005
1006      if (loop.isMonotonicIncreasing()) {
1007        phiMinIndexValue = loop.initialIteratorValue;
1008        if ((loop.condition.isLESS() ||
1009             loop.condition.isLOWER() ||
1010             loop.condition.isNOT_EQUAL())) {
1011          phiMaxIndexValue = terminal;
1012        } else if ((loop.condition.isLESS_EQUAL() ||
1013                    loop.condition.isLOWER_EQUAL() ||
1014                    loop.condition.isEQUAL())) {
1015          phiMaxIndexValue = terminalPlusStrideOnce;
1016        } else {
1017          throw new Error("Unrecognised loop for fission " + loop);
1018        }
1019      } else if (loop.isMonotonicDecreasing()) {
1020        phiMaxIndexValue = loop.initialIteratorValue;
1021        if ((loop.condition.isGREATER() ||
1022             loop.condition.isHIGHER() ||
1023             loop.condition.isNOT_EQUAL())) {
1024          phiMinIndexValue = terminalPlusStrideOnce;
1025        } else if ((loop.condition.isGREATER_EQUAL() ||
1026                    loop.condition.isHIGHER_EQUAL() ||
1027                    loop.condition.isEQUAL())) {
1028          phiMinIndexValue = terminalLessStrideOnce;
1029        } else {
1030          throw new Error("Unrecognised loop for fission " + loop);
1031        }
1032      } else {
1033        throw new Error("Unrecognised loop for fission " + loop);
1034      }
1035      // Generate tests
1036      for (int i = 0; i < checksToEliminate.size(); i++) {
1037        Instruction instr = checksToEliminate.get(i);
1038        if (BoundsCheck.conforms(instr)) {
1039          // Have we already generated these tests?
1040          boolean alreadyChecked = false;
1041          for (int j = 0; j < i; j++) {
1042            Instruction old_instr = checksToEliminate.get(j);
1043            if (BoundsCheck.conforms(old_instr) &&
1044                (BoundsCheck.getRef(old_instr).similar(BoundsCheck.getRef(instr))) &&
1045                (BoundsCheck.getIndex(old_instr).similar(BoundsCheck.getIndex(instr)))) {
1046              // yes - just create a guard move
1047              alreadyChecked = true;
1048              RegisterOperand guardResult = BoundsCheck.getGuardResult(instr).copyRO();
1049              guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1050              RegisterOperand guardSource = BoundsCheck.getGuardResult(old_instr).copyRO();
1051              guardSource.setRegister(optimalRegMap.get(guardSource.getRegister()));
1052              Instruction tempInstr = Move.create(GUARD_MOVE, guardResult, guardSource);
1053              tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1054              block.appendInstruction(tempInstr);
1055              break;
1056            }
1057          }
1058          if (!alreadyChecked) {
1059            // no - generate tests
1060            Operand index = BoundsCheck.getIndex(instr);
1061            int distance = loop.getFixedDistanceFromPhiIterator(index);
1062            if (distance == 0) {
1063              block =
1064                  generateExplicitBoundCheck(instr,
1065                                             phiMinIndexValue,
1066                                             phiMaxIndexValue,
1067                                             optimalRegMap,
1068                                             block,
1069                                             unoptimizedLoopEntry);
1070            } else {
1071              Instruction tempInstr;
1072              RegisterOperand minIndex = ir.regpool.makeTempInt();
1073              RegisterOperand maxIndex = ir.regpool.makeTempInt();
1074
1075              tempInstr =
1076                  Binary.create(INT_ADD, minIndex, phiMinIndexValue.copy(), new IntConstantOperand(distance));
1077              tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1078              block.appendInstruction(tempInstr);
1079              DefUse.updateDUForNewInstruction(tempInstr);
1080
1081              tempInstr =
1082                  Binary.create(INT_ADD, maxIndex, phiMaxIndexValue.copy(), new IntConstantOperand(distance));
1083              tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1084              block.appendInstruction(tempInstr);
1085              DefUse.updateDUForNewInstruction(tempInstr);
1086
1087              block = generateExplicitBoundCheck(instr, minIndex, maxIndex, optimalRegMap, block, unoptimizedLoopEntry);
1088            }
1089          }
1090        }
1091      }
1092    }
1093    // Have we had to create a new basic block since entry => we
1094    // generated a branch to the unoptimized loop
1095    boolean isUnoptimizedLoopReachable = (blockOnEntry != block);
1096    // 3) Finish up with goto and generate true guard value
1097    {
1098      Instruction branch; // the generated branch instruction
1099      branch = Goto.create(GOTO, optimizedLoopEntry.makeJumpTarget());
1100      branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1101      block.appendInstruction(branch);
1102      block.deleteNormalOut();
1103      block.insertOut(optimizedLoopEntry);
1104    }
1105    return isUnoptimizedLoopReachable;
1106  }
1107
1108  /**
1109   * Generate null check branch blocks
1110   *
1111   * @param loop the current loop where checks are being eliminated
1112   * @param checksToEliminate all of the checks that are being eliminated in the pass
1113   * @param optimalRegMap a map from original register to the register used in the optimal loop
1114   * @param block the block to generate code into
1115   * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails
1116   * @return the new block to generate code into
1117   */
1118  private BasicBlock generateNullCheckBranchBlocks(AnnotatedLSTNode loop,
1119                                                       ArrayList<Instruction> checksToEliminate,
1120                                                       HashMap<Register, Register> optimalRegMap,
1121                                                       BasicBlock block, BasicBlock unoptimizedLoopEntry) {
1122    // Map of already generated null check references to their
1123    // corresponding guard result
1124    HashMap<Register, Operand> refToGuardMap = new HashMap<Register, Operand>();
1125    // Iterate over checks
1126    for (Instruction instr : checksToEliminate) {
1127      // Is this a null check
1128      if (NullCheck.conforms(instr)) {
1129        // the generated branch instruction
1130        Instruction branch;
1131        // the reference to compare
1132        Operand ref = AnnotatedLSTNode.follow(NullCheck.getRef(instr));
1133        // the guard result to define
1134        RegisterOperand guardResult = NullCheck.getGuardResult(instr).copyRO();
1135        guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1136        // check if we've generated this test already
1137        if (ref.isRegister() && refToGuardMap.containsKey(ref.asRegister().getRegister())) {
1138          // yes - generate just a guard move
1139          branch = Move.create(GUARD_MOVE, guardResult, refToGuardMap.get(ref.asRegister().getRegister()).copy());
1140          branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1141          block.appendInstruction(branch);
1142        } else {
1143          // check if we can just move a guard from the loop predecessors
1144          RegisterOperand guard = nullCheckPerformedInLoopPredecessors(loop.header, instr);
1145          if (guard != null) {
1146            // yes - generate just a guard move
1147            branch = Move.create(GUARD_MOVE, guardResult, guard.copyRO());
1148            branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1149            block.appendInstruction(branch);
1150          } else {
1151            // generate explicit null test
1152            branch =
1153                IfCmp.create(REF_IFCMP,
1154                             guardResult,
1155                             ref.copy(),
1156                             new NullConstantOperand(),
1157                             ConditionOperand.EQUAL(),
1158                             unoptimizedLoopEntry.makeJumpTarget(),
1159                             BranchProfileOperand.unlikely());
1160            if (ref.isRegister()) {
1161              refToGuardMap.put(ref.asRegister().getRegister(), guardResult);
1162            }
1163            branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1164            block.appendInstruction(branch);
1165            // Adjust block
1166            block.insertOut(unoptimizedLoopEntry);
1167            BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1168            BasicBlock temp = (BasicBlock) block.next;
1169            ir.cfg.breakCodeOrder(block, temp);
1170            ir.cfg.linkInCodeOrder(block, new_block);
1171            ir.cfg.linkInCodeOrder(new_block, temp);
1172            block.insertOut(new_block);
1173            block = new_block;
1174          }
1175        }
1176      }
1177    }
1178    return block;
1179  }
1180
1181  /**
1182   * Generate bound check branch blocks
1183   *
1184   * @param boundCheckInstr the bound check instruction in question
1185   * @param minIndexValue the min value for an iterator a loop will generate
1186   * @param maxIndexValue the max value for an iterator a loop will generate
1187   * @param optimalRegMap a map from original register to the register used in the optimal loop
1188   * @param block the block to generate code into
1189   * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails
1190   * @return the new block to generate code into
1191   */
1192  private BasicBlock generateExplicitBoundCheck(Instruction boundCheckInstr, Operand minIndexValue,
1193                                                    Operand maxIndexValue,
1194                                                    HashMap<Register, Register> optimalRegMap,
1195                                                    BasicBlock block, BasicBlock unoptimizedLoopEntry) {
1196    // 1) Work out what tests are necessary. NB we don't optimise for
1197    // the case when exceptions will always be generated
1198    boolean lowerBoundTestRedundant;
1199    boolean upperBoundTestRedundant;
1200    {
1201      // as array lengths must be >= 0 the lower bound test is not
1202      // necessary if:
1203      // (minIndexValue >= 0) or ((arraylength A) + zeroOrPositiveConstant)
1204      lowerBoundTestRedundant =
1205          ((minIndexValue.isIntConstant() && (minIndexValue.asIntConstant().value >= 0)) ||
1206           ((getConstantAdjustedArrayLengthRef(minIndexValue) != null) &&
1207            (getConstantAdjustedArrayLengthDistance(minIndexValue) >= 0)));
1208      // as the upper bound must be <= arraylength the test is not
1209      // necessary if:
1210      // maxIndexValue = (arraylength A) - zeroOrPositiveConstant
1211      Operand maxIndexArrayLengthRef = getConstantAdjustedArrayLengthRef(maxIndexValue);
1212      upperBoundTestRedundant =
1213          ((maxIndexArrayLengthRef != null) &&
1214           maxIndexArrayLengthRef.similar(BoundsCheck.getRef(boundCheckInstr)) &&
1215           (getConstantAdjustedArrayLengthDistance(maxIndexValue) <= 0));
1216    }
1217
1218    // 2) Create explicit bound check
1219
1220    // register to hold result (NB it's a guard for the optimal loop)
1221    RegisterOperand guardResult = BoundsCheck.getGuardResult(boundCheckInstr).copyRO();
1222    guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1223
1224    // the guard on the bound check (mapped from the optimal loop as
1225    // it should already have been generated or may already be out of
1226    // the loop)
1227    Operand origGuard = BoundsCheck.getGuard(boundCheckInstr);
1228    Operand guard = origGuard.copy();
1229    if (origGuard.isRegister() && optimalRegMap.containsKey(origGuard.asRegister().getRegister())) {
1230      guard.asRegister().setRegister(optimalRegMap.get(origGuard.asRegister().getRegister()));
1231    }
1232
1233    if (lowerBoundTestRedundant && upperBoundTestRedundant) {
1234      // both tests redundant so just generate a guard move of the
1235      // bound check guard
1236      Instruction move = Move.create(GUARD_MOVE, guardResult, guard);
1237      move.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1238      block.appendInstruction(move);
1239    } else {
1240      // 2.1) Create array length
1241      RegisterOperand array_length = ir.regpool.makeTempInt();
1242      Instruction array_length_instr =
1243          GuardedUnary.create(ARRAYLENGTH,
1244                              array_length,
1245                              AnnotatedLSTNode.follow(BoundsCheck.getRef(boundCheckInstr)).copy(),
1246                              guard);
1247      array_length_instr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1248      block.appendInstruction(array_length_instr);
1249
1250      // 2.2) Create minimum index test unless test is redundant
1251      if (!lowerBoundTestRedundant) {
1252        RegisterOperand lowerBoundGuard = upperBoundTestRedundant ? guardResult : ir.regpool.makeTempValidation();
1253        // Generate bound check
1254        Instruction branch =
1255            IfCmp.create(INT_IFCMP,
1256                         lowerBoundGuard,
1257                         minIndexValue.copy(),
1258                         array_length.copyRO(),
1259                         ConditionOperand.LESS(),
1260                         unoptimizedLoopEntry.makeJumpTarget(),
1261                         BranchProfileOperand.unlikely());
1262        branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1263        block.appendInstruction(branch);
1264        // Adjust block
1265        block.insertOut(unoptimizedLoopEntry);
1266        BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1267        BasicBlock temp = (BasicBlock) block.next;
1268        ir.cfg.breakCodeOrder(block, temp);
1269        ir.cfg.linkInCodeOrder(block, new_block);
1270        ir.cfg.linkInCodeOrder(new_block, temp);
1271        block.insertOut(new_block);
1272        block = new_block;
1273      }
1274      // 2.3) Create maximum index test
1275      if (!upperBoundTestRedundant) {
1276        Instruction branch =
1277            IfCmp.create(INT_IFCMP,
1278                         guardResult,
1279                         maxIndexValue.copy(),
1280                         array_length.copyRO(),
1281                         ConditionOperand.GREATER(),
1282                         unoptimizedLoopEntry.makeJumpTarget(),
1283                         BranchProfileOperand.unlikely());
1284        branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1285        block.appendInstruction(branch);
1286        // Adjust block
1287        block.insertOut(unoptimizedLoopEntry);
1288        BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1289        BasicBlock temp = (BasicBlock) block.next;
1290        ir.cfg.breakCodeOrder(block, temp);
1291        ir.cfg.linkInCodeOrder(block, new_block);
1292        ir.cfg.linkInCodeOrder(new_block, temp);
1293        block.insertOut(new_block);
1294        block = new_block;
1295      }
1296    }
1297    return block;
1298  }
1299
1300  /**
1301   * Can we eliminate a null check as it has lready been performed?
1302   * NB SSA guarantees that if a value is null it must always be null
1303   *
1304   * @param header loop header basic block
1305   * @param instr null check instruction
1306   * @return the guard for the null check if it has already been performed,
1307   * {@code null} if the check hasn't been performed yet
1308   */
1309  private RegisterOperand nullCheckPerformedInLoopPredecessors(BasicBlock header, Instruction instr) {
1310    if (VM.VerifyAssertions) VM._assert(NullCheck.conforms(instr));
1311    BasicBlock block = header;
1312    do {
1313      block = ir.HIRInfo.dominatorTree.getParent(block);
1314      Instruction lastInst = block.lastInstruction();
1315      for (Instruction itrInst = block.firstInstruction(); itrInst != lastInst; itrInst =
1316          itrInst.nextInstructionInCodeOrder()) {
1317        if (NullCheck.conforms(itrInst) && NullCheck.getRef(itrInst).similar(NullCheck.getRef(instr))) {
1318          return NullCheck.getGuardResult(itrInst);
1319        }
1320      }
1321    } while (block != ir.cfg.entry());
1322    return null;
1323  }
1324
1325  /**
1326   * Gets the array length reference ignoring instructions that adjust
1327   * its result by a fixed amount.
1328   *
1329   * @param op operand to chase arraylength opcode to
1330   * constant value from an array length
1331   * @return the array length as defined above
1332   */
1333  private Operand getConstantAdjustedArrayLengthRef(Operand op) {
1334    Operand result = null;
1335    if (op.isRegister()) {
1336      Instruction opInstr = AnnotatedLSTNode.definingInstruction(op);
1337      if (opInstr.getOpcode() == ARRAYLENGTH_opcode) {
1338        result = GuardedUnary.getVal(opInstr);
1339      } else if ((opInstr.getOpcode() == INT_ADD_opcode) || (opInstr.getOpcode() == INT_SUB_opcode)) {
1340        Operand val1 = Binary.getVal1(opInstr);
1341        Operand val2 = Binary.getVal2(opInstr);
1342        if (val1.isConstant()) {
1343          result = getConstantAdjustedArrayLengthRef(val2);
1344        } else if (val2.isConstant()) {
1345          result = getConstantAdjustedArrayLengthRef(val1);
1346        }
1347      }
1348    }
1349    return result;
1350  }
1351
1352  /**
1353   * Get the distance from an array length by addding up instructions
1354   * that adjust the array length result by a constant amount
1355   *
1356   * @param op operand to chase arraylength opcode to
1357   * @return the array length as defined above
1358   */
1359  private int getConstantAdjustedArrayLengthDistance(Operand op) {
1360    Instruction opInstr = AnnotatedLSTNode.definingInstruction(op);
1361    if (opInstr.getOpcode() == ARRAYLENGTH_opcode) {
1362      return 0;
1363    } else if (opInstr.getOpcode() == INT_ADD_opcode) {
1364      Operand val1 = Binary.getVal1(opInstr);
1365      Operand val2 = Binary.getVal2(opInstr);
1366      if (val1.isConstant()) {
1367        return val1.asIntConstant().value + getConstantAdjustedArrayLengthDistance(val2);
1368      } else {
1369        if (VM.VerifyAssertions) VM._assert(val2.isConstant());
1370        return getConstantAdjustedArrayLengthDistance(val1) + val2.asIntConstant().value;
1371      }
1372    } else if (opInstr.getOpcode() == INT_SUB_opcode) {
1373      Operand val1 = Binary.getVal1(opInstr);
1374      Operand val2 = Binary.getVal2(opInstr);
1375      if (val1.isConstant()) {
1376        return val1.asIntConstant().value - getConstantAdjustedArrayLengthDistance(val2);
1377      } else {
1378        if (VM.VerifyAssertions) VM._assert(val2.isConstant());
1379        return getConstantAdjustedArrayLengthDistance(val1) - val2.asIntConstant().value;
1380      }
1381    } else {
1382      throw new Error("Unexpected opcode when computing distance " + op);
1383    }
1384  }
1385
1386  /*
1387   * TODO Convert to JavaDoc and add missing tags.
1388   * <p>
1389   * Remove loop and replace register definitions in the original loop
1390   * with phi instructions
1391   */
1392  private void modifyOriginalLoop(AnnotatedLSTNode loop, ArrayList<Instruction> phiInstructions,
1393                                  ArrayList<Instruction> definingInstrInOriginalLoop,
1394                                  HashMap<Register, Register> subOptimalRegMap,
1395                                  HashMap<Register, Register> optimalRegMap) {
1396    // Remove instructions from loop header and exit, remove other
1397    // loop body blocks
1398    Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
1399    while (blocks.hasMoreElements()) {
1400      BasicBlock block = blocks.nextElement();
1401      if ((block == loop.header) || (block == loop.exit)) {
1402        IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
1403        while (instructions.hasMoreElements()) {
1404          Instruction instruction = instructions.nextElement();
1405          if (!BBend.conforms(instruction) && !Label.conforms(instruction)) {
1406            instruction.remove();
1407          }
1408        }
1409      } else {
1410        ir.cfg.removeFromCFGAndCodeOrder(block);
1411      }
1412    }
1413
1414    // Place phi instructions in loop header
1415    for (int i = 0; i < phiInstructions.size(); i++) {
1416      Instruction origInstr = definingInstrInOriginalLoop.get(i);
1417      // Did the original instructions value escape the loop?
1418      if (origInstr != null) {
1419        // Was this a phi of a phi?
1420        if (Phi.conforms(origInstr)) {
1421          Instruction phi = phiInstructions.get(i);
1422          boolean phiHasUnoptimizedArg = Phi.getNumberOfValues(phi) == 2;
1423          // Phi of a phi - so make sure that we get the value to escape the loop, not the value at the loop header
1424          boolean fixed = false;
1425          for (int index = 0; index < Phi.getNumberOfPreds(origInstr); index++) {
1426            BasicBlockOperand predOp = Phi.getPred(origInstr, index);
1427            // Only worry about values who are on the backward branch
1428            if (predOp.block == loop.exit) {
1429              if (fixed) { // We've tried to do 2 replaces => something wrong
1430                SSA.printInstructions(ir);
1431                OptimizingCompilerException.UNREACHABLE("LoopVersioning",
1432                                                            "Phi node in loop header with multiple in loop predecessors");
1433              }
1434              Operand rval = Phi.getValue(origInstr, index);
1435              if (rval.isRegister()) {
1436                // Sort out registers
1437                Register origRegPhiRval = rval.asRegister().getRegister();
1438                Register subOptRegPhiRval;
1439                Register optRegPhiRval;
1440                if (!subOptimalRegMap.containsKey(origRegPhiRval)) {
1441                  // Register comes from loop exit but it wasn't defined in the loop
1442                  subOptRegPhiRval = origRegPhiRval;
1443                  optRegPhiRval = origRegPhiRval;
1444                } else {
1445                  subOptRegPhiRval = subOptimalRegMap.get(origRegPhiRval);
1446                  optRegPhiRval = optimalRegMap.get(origRegPhiRval);
1447                }
1448                if (phiHasUnoptimizedArg) {
1449                  Phi.getValue(phi, UNOPTIMIZED_LOOP_OPERAND).asRegister().setRegister(subOptRegPhiRval);
1450                }
1451                Phi.getValue(phi, OPTIMIZED_LOOP_OPERAND).asRegister().setRegister(optRegPhiRval);
1452              } else if (rval.isConstant()) {
1453                // Sort out constants
1454                if (phiHasUnoptimizedArg) {
1455                  Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, rval.copy());
1456                }
1457                Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, rval.copy());
1458              } else if (rval instanceof HeapOperand) {
1459                // Sort out heap variables
1460                @SuppressWarnings("unchecked") // Cast to generic type
1461                    HeapVariable<Object> origPhiRval = ((HeapOperand) rval).value;
1462                HeapVariable<Object> subOptPhiRval;
1463                HeapVariable<Object> optPhiRval;
1464                if (true /*subOptimalRegMap.containsKey(origPhiRval) == false*/) {
1465                  // currently we only expect to optimise scalar SSA
1466                  // form
1467                  subOptPhiRval = origPhiRval;
1468                  optPhiRval = origPhiRval;
1469                } else {
1470                  /*
1471                  subOptPhiRval   = (HeapVariable)subOptimalRegMap.get(origPhiRval);
1472                  optPhiRval      = (HeapVariable)optimalRegMap.get(origPhiRval);
1473                  */
1474                }
1475                if (phiHasUnoptimizedArg) {
1476                  Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(subOptPhiRval));
1477                }
1478                Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(optPhiRval));
1479              } else {
1480                OptimizingCompilerException.UNREACHABLE("LoopVersioning",
1481                                                            "Unknown operand type",
1482                                                            rval.toString());
1483              }
1484              fixed = true;
1485            }
1486          }
1487        }
1488        // Add back to loop
1489        loop.header.appendInstruction(phiInstructions.get(i));
1490      }
1491    }
1492    // Remove original loop and branch to loop successor
1493    Instruction tempInstr;
1494    if (loop.header != loop.exit) {
1495      tempInstr = Goto.create(GOTO, loop.exit.makeJumpTarget());
1496      tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1497      loop.header.appendInstruction(tempInstr);
1498      loop.header.deleteNormalOut();
1499      loop.header.insertOut(loop.exit);
1500
1501    }
1502    tempInstr = Goto.create(GOTO, loop.successor.makeJumpTarget());
1503    tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1504    loop.exit.appendInstruction(tempInstr);
1505    loop.exit.deleteNormalOut();
1506    loop.exit.insertOut(loop.successor);
1507  }
1508
1509  private void removeUnoptimizedLoop(AnnotatedLSTNode loop,
1510                                     HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap) {
1511    Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
1512    report("removing unoptimized loop");
1513    BasicBlock block = unoptimizedLoopMap.get(loop.predecessor);
1514    report("removing block " + block);
1515    ir.cfg.removeFromCFGAndCodeOrder(block);
1516    while (blocks.hasMoreElements()) {
1517      block = unoptimizedLoopMap.get(blocks.nextElement());
1518      if (!loop.contains(block)) {
1519        report("removing block " + block);
1520        ir.cfg.removeFromCFGAndCodeOrder(block);
1521      } else {
1522        report("not removing block that's in the original loop" + block);
1523      }
1524    }
1525  }
1526
1527  /**
1528   * Put the optimized loop's iterator register into the hash set
1529   *
1530   * @param reg register
1531   */
1532  private void setOptimizedLoop(Register reg) {
1533    loopRegisterSet.add(reg);
1534  }
1535
1536  /**
1537   * Check whether the loop that contain such iterator register had
1538   * been optimized
1539   *
1540   * @param reg register
1541   * @return the test result
1542   */
1543  private boolean isOptimizedLoop(Register reg) {
1544    return loopRegisterSet.contains(reg);
1545  }
1546
1547  /*
1548   * TODO Convert to JavaDoc and add missing tags.
1549   * Rename the iterators for optimized loops so we can tell they are still optimized
1550   */
1551  private void renameOptimizedLoops(HashMap<Register, Register> subOptimalRegMap,
1552                                    HashMap<Register, Register> optimalRegMap) {
1553    Iterator<Register> itr = loopRegisterSet.iterator();
1554    while (itr.hasNext()) {
1555      Register reg = itr.next();
1556      if (subOptimalRegMap.containsKey(reg)) {
1557        loopRegisterSet.remove(reg);
1558        loopRegisterSet.add(subOptimalRegMap.get(reg));
1559        loopRegisterSet.add(optimalRegMap.get(reg));
1560        itr = loopRegisterSet.iterator();
1561      }
1562    }
1563  }
1564}