001 /*
002 * This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 * This file is licensed to You under the Eclipse Public License (EPL);
005 * You may not use this file except in compliance with the License. You
006 * may obtain a copy of the License at
007 *
008 * http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 * See the COPYRIGHT.txt file distributed with this work for information
011 * regarding copyright ownership.
012 */
013 package org.jikesrvm.compilers.opt.ssa;
014
015 import static org.jikesrvm.compilers.opt.driver.OptConstants.SYNTH_LOOP_VERSIONING_BCI;
016 import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH;
017 import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode;
018 import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
019 import static org.jikesrvm.compilers.opt.ir.Operators.GUARD_MOVE;
020 import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD;
021 import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode;
022 import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP;
023 import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB;
024 import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB_opcode;
025 import static org.jikesrvm.compilers.opt.ir.Operators.PHI;
026 import static org.jikesrvm.compilers.opt.ir.Operators.REF_IFCMP;
027
028 import java.lang.reflect.Constructor;
029 import java.util.ArrayList;
030 import java.util.Enumeration;
031 import java.util.HashMap;
032 import java.util.HashSet;
033 import java.util.Iterator;
034
035 import org.jikesrvm.VM;
036 import org.jikesrvm.classloader.TypeReference;
037 import org.jikesrvm.compilers.opt.DefUse;
038 import org.jikesrvm.compilers.opt.OptOptions;
039 import org.jikesrvm.compilers.opt.OptimizingCompilerException;
040 import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTGraph;
041 import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTNode;
042 import org.jikesrvm.compilers.opt.controlflow.DominatorTree;
043 import org.jikesrvm.compilers.opt.controlflow.DominatorsPhase;
044 import org.jikesrvm.compilers.opt.controlflow.LSTGraph;
045 import org.jikesrvm.compilers.opt.controlflow.LTDominators;
046 import org.jikesrvm.compilers.opt.driver.CompilerPhase;
047 import org.jikesrvm.compilers.opt.ir.BBend;
048 import org.jikesrvm.compilers.opt.ir.BasicBlock;
049 import org.jikesrvm.compilers.opt.ir.BasicBlockEnumeration;
050 import org.jikesrvm.compilers.opt.ir.Binary;
051 import org.jikesrvm.compilers.opt.ir.BoundsCheck;
052 import org.jikesrvm.compilers.opt.ir.Goto;
053 import org.jikesrvm.compilers.opt.ir.GuardedUnary;
054 import org.jikesrvm.compilers.opt.ir.IR;
055 import org.jikesrvm.compilers.opt.ir.IREnumeration;
056 import org.jikesrvm.compilers.opt.ir.IfCmp;
057 import org.jikesrvm.compilers.opt.ir.Instruction;
058 import org.jikesrvm.compilers.opt.ir.Label;
059 import org.jikesrvm.compilers.opt.ir.Move;
060 import org.jikesrvm.compilers.opt.ir.NullCheck;
061 import org.jikesrvm.compilers.opt.ir.OperandEnumeration;
062 import org.jikesrvm.compilers.opt.ir.Phi;
063 import org.jikesrvm.compilers.opt.ir.Register;
064 import org.jikesrvm.compilers.opt.ir.operand.BasicBlockOperand;
065 import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand;
066 import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand;
067 import org.jikesrvm.compilers.opt.ir.operand.HeapOperand;
068 import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand;
069 import org.jikesrvm.compilers.opt.ir.operand.NullConstantOperand;
070 import org.jikesrvm.compilers.opt.ir.operand.Operand;
071 import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
072 import org.jikesrvm.compilers.opt.util.GraphNode;
073
074 /**
075 * This optimisation works from the outer most loop inward, optimising
076 * loops that conform to being regular {@link
077 * AnnotatedLSTNode}s. The optimisations performs the following
078 * operations:
079 *
080 * 1) Determine the bound and null checks to be eliminated. These are
081 * the ones that operate on the loop iterator. If no bound checks can
082 * be eliminated, stop optimising this loop.
083 *
084 * 2) Determine the registers defined in the loop.
085 *
086 * 3) Generate phi nodes that define the original register defined by
087 * the loop and use two newly created registers.
088 *
089 * 4) Create a version of the original loop that uses the first of the
090 * newly created registers instead of the original registers.
091 *
092 * 5) Create a second version, this time with the result of the
093 * eliminated checks set to true.
094 *
095 * 6) Work out what the maximum value for all the bounds checks are
096 * and create branches to optimal or suboptimal loops
097 *
098 * 7) Fix up the phi node predecessors
099 *
100 * 8) Remove the unoptimized loop if its redundant
101 *
102 * 9) Replace register definitions in the original loop with phi
103 * instructions
104 *
105 * 10) Compact node numbering so that CFG number of nodes reflects
106 * that some basic blocks may have been deleted
107 *
108 *
109 * Example:
110 * <listing>
111 * for (int t1=0; t1 < 100; t1++) {
112 * g1 = null_check l0
113 * g2 = bounds_check l0, t1
114 * g3 = guard_combine g1,g2
115 * t2 = aload l0, t1, g3
116 * g4 = null_check l1
117 * g5 = bounds_check l1, t1
118 * g6 = guard_combine g4,g5
119 * astore t2, l1, t1, g6
120 * }
121 * </listing>
122 *
123 * becomes:
124 *
125 * <listing>
126 * goto explicit_test_block
127 * successor_to_loops:
128 * g1 = phi g1_1, g1_2
129 * g2 = phi g2_1, g2_2
130 * g3 = phi g3_1, g3_2
131 * t2 = phi t2_1, t2_2
132 * g4 = phi g4_1, g4_2
133 * g5 = phi g5_1, g5_2
134 * g6 = phi g6_1, g6_2
135 * goto after_loop
136 * explicit_test_block:
137 * if l0 == null (unlikely) goto sub_optimal_loop
138 * if 100 >= l0.length (unlikely) goto sub_optimal_loop
139 * if l1 == null (unlikely) goto sub_optimal_loop
140 * if 100 >= l1.length (unlikely) goto sub_optimal_loop
141 * goto optimal_loop
142 * sub_optimal_loop:
143 * for (int t1_1=0; t1_1 < 100; t1_1++) {
144 * g1_1 = null_check l0
145 * g2_1 = bounds_check l0, t1_1
146 * g3_1 = guard_combine g1_1,g2_1
147 * t2_1 = aload l0, t1_1, g3_1
148 * g4_1 = null_check l1
149 * g5_1 = bounds_check l1, t1_1
150 * g6_1 = guard_combine g4_1,g5_1
151 * astore t2_1, l1, t1_1, g6_1
152 * }
153 * goto successor_to_loops
154 * optimal_loop:
155 * for (int t1_2=0; t1_2 < 100; t1_2++) {
156 * g1_2 = true_guard
157 * g2_2 = true_guard
158 * g3_2 = guard_combine g1_2,g2_2
159 * t2_2 = aload l0, t1_2, g3_2
160 * g4_2 = null_check l1
161 * g5_2 = bounds_check l1, t1_2
162 * g6_2 = guard_combine g4_2,g5_2
163 * astore t2_2, l1, t1_2, g6_2
164 * }
165 * goto successor_to_loops
166 * after_loop:
167 * </listing>
168 *
169 * The optimisation works on the Heap SSA form. A more accurate
170 * example of the transformation would be:
171 *
172 * <listing>
173 * heap1 = ...; // previous heap state
174 * t1_1 = 0;
175 * if t1_1 ≥ 100 goto label2
176 * label1:
177 * t1_2 = phi t1_1, t1_3
178 * heap2 = phi heap1, heap3
179 * g1 = null_check l0
180 * g2 = bounds_check l0, t1_2
181 * g3 = guard_combine g1,g2
182 * t2 = aload l0, t1_2, g3
183 * g4 = null_check l1
184 * g5 = bounds_check l1, t1_2
185 * g6 = guard_combine g4,g5
186 * heap3 = astore t2, l1, t1_2, g6
187 * t1_3 = t1_2 + 1
188 * if t1_3 < 100 label1 * label2:
189 * </listing>
190 *
191 * becomes:
192 *
193 * <listing>
194 * heap1 = ...; // previous heap state
195 * t1_1 = 0;
196 * if t1_1 ≥ 100 goto label2
197 * goto explicit_test_block
198 * successor_to_loops:
199 * t1_2 = phi t1_2_1, t1_2_2
200 * heap2 = phi heap2_1, heap2_2
201 * g1 = phi g1_1, g1_2
202 * g2 = phi g2_1, g2_2
203 * g3 = phi g3_1, g3_2
204 * t2 = phi t2_1, t2_2
205 * g4 = phi g4_1, g4_2
206 * g5 = phi g5_1, g5_2
207 * g6 = phi g6_1, g6_2
208 * heap3 = phi heap3_1, heap3_2
209 * t1_3 = phi t1_3_1, t1_3_2
210 * goto after_loop
211 * explicit_test_block:
212 * g1_2 = if l0 == null (unlikely) goto sub_optimal_loop
213 * g2_2 = if 100 >= l0.length (unlikely) goto sub_optimal_loop
214 * g4_2 = if l1 == null (unlikely) goto sub_optimal_loop
215 * g5_2 = if 100 >= l1.length (unlikely) goto sub_optimal_loop
216 * goto optimal_loop
217 * sub_optimal_loop:
218 * label1_1:
219 * t1_2_1 = phi t1_1, t1_3_1
220 * heap2_1 = phi heap1, heap3_1
221 * g1_1 = null_check l0
222 * g2_1 = bounds_check l0, t1_2_1
223 * g3_1 = guard_combine g1_1,g2_1
224 * t2_1 = aload l0, t1_2_1, g3_1
225 * g4_1 = null_check l1
226 * g5_1 = bounds_check l1, t1_2_1
227 * g6_1 = guard_combine g4_1,g5_1
228 * heap3_1 = astore t2_1, l1, t1_2_1, g6_1
229 * t1_3_1 = t1_2_1 + 1
230 * if t1_3_1 < 100 label1_1
231 * goto successor_to_loops
232 * optimal_loop:
233 * label1_2:
234 * t1_2_2 = phi t1_1, t1_3_2
235 * heap2_2 = phi heap1, heap3_2
236 * g3_2 = guard_combine g1_2,g2_2
237 * t2_2 = aload l0, t1_2_2, g3_2
238 * g6_2 = guard_combine g4_2,g5_2
239 * heap3_2 = astore t2_2, l1, t1_2_2, g6_2
240 * t1_3_2 = t1_2_2 + 1
241 * if t1_3_2 < 100 label1_2
242 * goto successor_to_loops
243 * after_loop:
244 * label2:
245 * </listing>
246 */
247 public final class LoopVersioning extends CompilerPhase {
248 // -oO Debug variables Oo-
249 /**
250 * Flag to optionally print verbose debugging messages
251 */
252 private static final boolean DEBUG = false;
253 /**
254 * Flag to verify computed IR
255 */
256 private static final boolean VERIFY = false;
257
258 // -oO Debug routines Oo-
259 /**
260 * Human readable report of what goes on
261 *
262 * @param s String to print
263 **/
264 private static void report(String s) {
265 if (DEBUG) {
266 VM.sysWriteln(s);
267 }
268 }
269
270 /**
271 * Return a string name for this phase.
272 * @return "Loop Versioning"
273 */
274 @Override
275 public String getName() {
276 return "Loop Versioning";
277 }
278
279 // -oO Variables used throughout the optimisation phase Oo-
280 /**
281 * The phi instruction operand holding the optimized loop variable
282 */
283 private static final int OPTIMIZED_LOOP_OPERAND = 0;
284 /**
285 * The phi instruction operand holding the unoptimized loop variable
286 */
287 private static final int UNOPTIMIZED_LOOP_OPERAND = 1;
288
289 /**
290 * IR for optimisation
291 */
292 private IR ir;
293
294 /**
295 * Set used to store the loop related register
296 */
297 private HashSet<Register> loopRegisterSet;
298
299 /**
300 * SSA options
301 */
302 private SSAOptions desiredSSAOptions;
303 /**
304 * Compiler phases called from this one
305 */
306 private CompilerPhase enterSSA, leaveSSA, domPhase;
307 /**
308 * Run inside SSA sub-phase
309 */
310 private static final boolean inSSAphase = true;
311
312 // -oO Interface to the rest of the compiler Oo-
313
314 /**
315 * Constructor for this compiler phase
316 */
317 private static final Constructor<CompilerPhase> constructor =
318 getCompilerPhaseConstructor(LoopVersioning.class);
319
320 /**
321 * Get a constructor object for this compiler phase
322 * @return compiler phase constructor
323 */
324 @Override
325 public Constructor<CompilerPhase> getClassConstructor() {
326 return constructor;
327 }
328
329 /**
330 * Constructor
331 */
332 public LoopVersioning() {
333 desiredSSAOptions = new SSAOptions();
334 desiredSSAOptions.setScalarsOnly(true);
335 if (!inSSAphase) {
336 enterSSA = new EnterSSA();
337 leaveSSA = new LeaveSSA();
338 }
339 domPhase = new DominatorsPhase(false);
340 }
341
342 /**
343 * Should the optimisation be performed
344 */
345 @Override
346 public boolean shouldPerform(OptOptions options) {
347 return options.SSA_LOOP_VERSIONING;
348 }
349
350 /**
351 * The main entry point
352 *
353 * @param _ir the IR to process
354 */
355 @Override
356 public void perform(IR _ir) {
357 ir = _ir;
358
359 // Create SSA
360 ir.desiredSSAOptions = desiredSSAOptions;
361 if (!inSSAphase) {
362 enterSSA.perform(ir);
363 }
364
365 if (DEBUG) {
366 SSA.printInstructions(ir);
367 }
368
369 // Perform loop annotation
370 if (!ir.hasReachableExceptionHandlers()) {
371 // Build LST tree and dominator info
372 domPhase.perform(ir);
373 DefUse.computeDU(ir);
374 // Build annotated version
375 ir.HIRInfo.loopStructureTree = new AnnotatedLSTGraph(ir, ir.HIRInfo.loopStructureTree);
376 }
377 if (VERIFY) {
378 ir.verify(getName(), true);
379 }
380
381 // Check loop annotation has been performed
382 if (!(ir.HIRInfo.loopStructureTree instanceof AnnotatedLSTGraph)) {
383 report("Optimisation of " + ir.getMethod() + " failed as LST wasn't annotated\n");
384 } else {
385 loopRegisterSet = new HashSet<Register>();
386
387 if (DEBUG) {
388 VM.sysWriteln(ir.getMethod().toString());
389 VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString());
390 SSA.printInstructions(ir);
391 }
392
393 while (findLoopToOptimise((AnnotatedLSTNode) ir.HIRInfo.loopStructureTree.getRoot())) {
394 if (DEBUG) {
395 VM.sysWriteln("Successful optimisation of " + ir.getMethod());
396 SSA.printInstructions(ir);
397 VM.sysWriteln(ir.cfg.toString());
398 }
399 // Get IR into shape for next pass
400 DefUse.computeDU(ir);
401 LTDominators.perform(ir, true, true);
402 ir.HIRInfo.dominatorTree = new DominatorTree(ir, true);
403 LSTGraph.perform(ir);
404 AnnotatedLSTGraph.perform(ir);
405
406 if (VERIFY) {
407 ir.verify(getName(), true);
408 }
409
410 if (DEBUG) {
411 VM.sysWriteln("after an optimization pass");
412 VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString());
413 SSA.printInstructions(ir);
414 VM.sysWriteln("Finish optimize: " + ir.getMethod().toString());
415 }
416 }
417 // No longer in use
418 loopRegisterSet = null;
419 }
420
421 if (!inSSAphase) {
422 // Leave SSA
423 leaveSSA.perform(ir);
424 }
425
426 if (VERIFY) {
427 ir.verify(getName(), true);
428 }
429 }
430
431 // -oO Optimisation routines Oo-
432
433 /**
434 * Find an outermost loop to optimise and optimise it. Focus on
435 * annotated regular loops, LICM should handle possible
436 * optimisation for the non-regular loops
437 *
438 * @param loop Loop to search
439 * @return was optimisation performed
440 */
441 private boolean findLoopToOptimise(AnnotatedLSTNode loop) {
442 // Has this loop already been optimised?
443 Operand carriedLoopIterator = loop.getCarriedLoopIterator();
444 if ((carriedLoopIterator instanceof RegisterOperand) &&
445 (isOptimizedLoop(carriedLoopIterator.asRegister().getRegister()))) {
446 return false;
447 }
448
449 // Process inner loops first
450 Enumeration<GraphNode> innerLoops = loop.outNodes();
451 // Iterate over loops
452 while (innerLoops.hasMoreElements()) {
453 AnnotatedLSTNode nestedLoop = (AnnotatedLSTNode) innerLoops.nextElement();
454 // Try to optimise inner loops first
455 if (findLoopToOptimise(nestedLoop)) {
456 // Exit early if inner loop optimisation succeeded
457 return true;
458 }
459 }
460 // Don't try to optimise irregular loops
461 if (loop.isNonRegularLoop()) {
462 return false;
463 }
464 if (DEBUG) {
465 report("LoopFissionOfArrayGuards: found loop in " + ir.getMethod());
466 VM.sysWriteln("dominator tree:");
467 VM.sysWriteln(ir.HIRInfo.dominatorTree.toString());
468 }
469
470 // 1) Determine the bound and null checks to be eliminated. The
471 // bound checks are the ones that operate on the loop iterator. If
472 // no checks can be eliminated, stop optimising this loop.
473 ArrayList<Instruction> checksToEliminate = new ArrayList<Instruction>();
474 getListOfChecksToEliminate(loop, checksToEliminate);
475 if (checksToEliminate.isEmpty()) {
476 return false;
477 } else {
478 // We found instructions to eliminate
479 if (DEBUG) {
480 VM.sysWriteln("Loop being optimised:");
481 VM.sysWriteln(loop.toString());
482 VM.sysWriteln("Checks to eliminate:");
483 for (Instruction instruction : checksToEliminate) {
484 VM.sysWriteln(instruction.toString());
485 }
486 }
487 // 2) Determine the registers defined in the loop.
488 ArrayList<Register> registersDefinedInOriginalLoop = new ArrayList<Register>();
489 ArrayList<TypeReference> typesOfRegistersDefinedInOriginalLoop = new ArrayList<TypeReference>();
490 ArrayList<Instruction> definingInstructionsInOriginalLoop = new ArrayList<Instruction>();
491 getRegistersDefinedInLoop(loop,
492 registersDefinedInOriginalLoop,
493 typesOfRegistersDefinedInOriginalLoop,
494 definingInstructionsInOriginalLoop);
495 if (DEBUG) {
496 VM.sysWrite("Registers in original loop:\n{");
497 for (int i = 0; i < registersDefinedInOriginalLoop.size(); i++) {
498 VM.sysWrite(registersDefinedInOriginalLoop.get(i).toString());
499 if (definingInstructionsInOriginalLoop.get(i) != null) {
500 VM.sysWrite("(escapes),");
501 } else {
502 VM.sysWrite(",");
503 }
504 }
505 VM.sysWriteln("}");
506 }
507 // 3) Generate phi nodes that define the original register
508 // defined by the loop and use two newly created registers.
509 ArrayList<Instruction> phiInstructions = new ArrayList<Instruction>();
510 HashMap<Register, Register> subOptimalRegMap = new HashMap<Register, Register>();
511 HashMap<Register, Register> optimalRegMap = new HashMap<Register, Register>();
512 generatePhiNodes(loop,
513 registersDefinedInOriginalLoop,
514 typesOfRegistersDefinedInOriginalLoop,
515 phiInstructions,
516 subOptimalRegMap,
517 optimalRegMap);
518 if (DEBUG) {
519 VM.sysWriteln("subOptimalRegMap");
520 VM.sysWriteln(subOptimalRegMap.toString());
521 VM.sysWriteln("optimalRegMap");
522 VM.sysWriteln(optimalRegMap.toString());
523 }
524
525 // 4) Create a version of the original loop that uses the first of
526 // the newly created registers instead of the original
527 // registers.
528 HashMap<Register, BasicBlock> regToUnoptimizedBlockMap = new HashMap<Register, BasicBlock>();
529 HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap =
530 createCloneLoop(loop, subOptimalRegMap, regToUnoptimizedBlockMap);
531 if (DEBUG) {
532 VM.sysWriteln("subOptimalLoopMap");
533 VM.sysWriteln(unoptimizedLoopMap.toString());
534 }
535 // 5) Create a second version, this time with the result of the
536 // eliminated checks set to explicit test guards.
537 HashMap<Register, BasicBlock> regToOptimizedBlockMap = new HashMap<Register, BasicBlock>();
538 HashMap<BasicBlock, BasicBlock> optimizedLoopMap =
539 createOptimizedLoop(loop, optimalRegMap, checksToEliminate, regToOptimizedBlockMap);
540 if (DEBUG) {
541 VM.sysWriteln("optimalLoopMap");
542 VM.sysWriteln(optimizedLoopMap.toString());
543 }
544 // 6) Work out what the maximum value for all the bounds checks
545 // are and create branches to optimal or suboptimal loops - with
546 // the unoptimized loop possibly being unreachable
547 BasicBlock firstBranchBlock = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
548 BasicBlock temp = (BasicBlock) loop.header.prev;
549 ir.cfg.breakCodeOrder(temp, loop.header);
550 ir.cfg.linkInCodeOrder(temp, firstBranchBlock);
551 ir.cfg.linkInCodeOrder(firstBranchBlock, loop.header);
552 temp.redirectOuts(loop.header, firstBranchBlock, ir);
553 boolean isUnoptimizedLoopReachable =
554 createBranchBlocks(loop,
555 firstBranchBlock,
556 checksToEliminate,
557 unoptimizedLoopMap.get(loop.predecessor),
558 optimizedLoopMap.get(loop.predecessor),
559 optimalRegMap);
560 // 7) Fix up the phi node predecessors
561 fixUpPhiPredecessors(phiInstructions,
562 isUnoptimizedLoopReachable ? unoptimizedLoopMap.get(loop.exit) : null,
563 optimizedLoopMap.get(loop.exit));
564 // 8) Remove the unoptimized loop if its redundant
565 if (!isUnoptimizedLoopReachable) {
566 removeUnoptimizedLoop(loop, unoptimizedLoopMap);
567 }
568
569 // 9) Replace register definitions in the original
570 // loop with phi instructions
571 modifyOriginalLoop(loop, phiInstructions, definingInstructionsInOriginalLoop, subOptimalRegMap, optimalRegMap);
572 // 10) Compact node numbering so that CFG number of nodes
573 // reflects that some basic blocks may have been deleted
574 ir.cfg.compactNodeNumbering();
575 return true;
576 }
577 }
578
579 /**
580 * Create a list of instructions to be eliminated
581 * @param loop the loop to examine
582 * @param instrToEliminate the instructions to remove
583 */
584 private void getListOfChecksToEliminate(AnnotatedLSTNode loop, ArrayList<Instruction> instrToEliminate) {
585 ArrayList<Instruction> nullChecks = new ArrayList<Instruction>();
586 ArrayList<Instruction> oddBoundChecks = new ArrayList<Instruction>();
587 BasicBlockEnumeration blocks = loop.getBasicBlocks();
588 while (blocks.hasMoreElements()) {
589 BasicBlock block = blocks.next();
590 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
591 while (instructions.hasMoreElements()) {
592 Instruction instruction = instructions.next();
593 if (NullCheck.conforms(instruction)) {
594 if (loop.isInvariant(NullCheck.getRef(instruction))) {
595 instrToEliminate.add(instruction);
596 nullChecks.add(instruction);
597 }
598 } else if (loop.isMonotonic() && BoundsCheck.conforms(instruction)) {
599 if (loop.isInvariant(BoundsCheck.getRef(instruction))) {
600 if (loop.isRelatedToIterator(BoundsCheck.getIndex(instruction))) {
601 if (loop.isInvariant(BoundsCheck.getGuard(instruction))) {
602 instrToEliminate.add(instruction);
603 } else {
604 // Null check isn't invariant but reference was, check
605 // null check will be eliminated at end of loop
606 oddBoundChecks.add(instruction);
607 }
608 }
609 }
610 }
611 }
612 }
613 // Check cases where the null check isn't loop invariant, however,
614 // it will be in the optimized loop as we'll have eliminated it
615 for (Instruction oddBoundCheck : oddBoundChecks) {
616 Operand guard = BoundsCheck.getGuard(oddBoundCheck);
617 for (Instruction nullCheck : nullChecks) {
618 if (guard.similar(NullCheck.getGuardResult(nullCheck))) {
619 instrToEliminate.add(oddBoundCheck);
620 break;
621 }
622 }
623 }
624 }
625
626 /**
627 * Get registers defined in the given loop. As we're in SSA form
628 * all register definitions must be unique.
629 * @param loop - the loop to examine
630 * @param registers - vector to which defined registers are added
631 */
632 private void getRegistersDefinedInLoop(AnnotatedLSTNode loop, ArrayList<Register> registers,
633 ArrayList<TypeReference> types,
634 ArrayList<Instruction> definingInstructions) {
635 BasicBlockEnumeration blocks = loop.getBasicBlocks();
636 while (blocks.hasMoreElements()) {
637 BasicBlock block = blocks.next();
638 // can value escape
639 final boolean escapes = (block == loop.exit) || (ir.HIRInfo.dominatorTree.dominates(block, loop.exit));
640 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
641 while (instructions.hasMoreElements()) {
642 Instruction instruction = instructions.next();
643 OperandEnumeration operands = instruction.getDefs();
644 while (operands.hasMoreElements()) {
645 Operand operand = operands.next();
646 if (operand.isRegister()) {
647 registers.add(operand.asRegister().getRegister());
648 types.add(operand.asRegister().getType());
649 if (escapes) {
650 definingInstructions.add(instruction);
651 } else {
652 definingInstructions.add(null);
653 }
654 }
655 }
656 }
657 }
658 }
659
660 /**
661 * Generate into a new block phi nodes that define the original
662 * register defined by the loop and use two newly created
663 * registers.
664 * @param registers - vector to which defined registers need to be
665 * created registers.x used in creating phi nodes
666 * @param types - vector of corresponding types of registers.
667 * @param phiInstructions - created phi instructions
668 * @param subOptimalRegMap - mapping of orignal destination to the
669 * newly created destination for the unoptimized loop
670 * @param optimalRegMap - mapping of orignal destination to the
671 * newly created destination for the optimized loop
672 */
673 private void generatePhiNodes(AnnotatedLSTNode loop, ArrayList<Register> registers,
674 ArrayList<TypeReference> types, ArrayList<Instruction> phiInstructions,
675 HashMap<Register, Register> subOptimalRegMap,
676 HashMap<Register, Register> optimalRegMap) {
677 // Get the carried loop iterator's register
678 Register carriedLoopIteratorRegister = ((RegisterOperand) loop.getCarriedLoopIterator()).getRegister();
679 for (int i = 0; i < registers.size(); i++) {
680 Register register = registers.get(i);
681 TypeReference type = types.get(i);
682 Instruction phi = Phi.create(PHI, new RegisterOperand(register, type), 2);
683 phi.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
684
685 // new operand for optimized loop
686 Operand op0 = ir.regpool.makeTemp(type);
687 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, op0);
688 optimalRegMap.put(register, op0.asRegister().getRegister());
689
690 // new operand for unoptimized loop
691 Operand op1 = ir.regpool.makeTemp(type);
692 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, op1);
693 subOptimalRegMap.put(register, op1.asRegister().getRegister());
694
695 // Add the new created carried loop iterator registers to
696 // internal set to mark the optimized loops
697 if (register == carriedLoopIteratorRegister) {
698 setOptimizedLoop(op0.asRegister().getRegister());
699 setOptimizedLoop(op1.asRegister().getRegister());
700 }
701
702 phiInstructions.add(phi);
703 }
704 // rename any optimized inner loops registers
705 renameOptimizedLoops(subOptimalRegMap, optimalRegMap);
706 }
707
708 /**
709 * Create a clone of the loop replacing definitions in the cloned
710 * loop with those found in the register map
711 * @param loop - loop to clone
712 * @param regMap - mapping of original definition to new
713 * definition
714 * @return a mapping from original BBs to created BBs
715 */
716 private HashMap<BasicBlock, BasicBlock> createCloneLoop(AnnotatedLSTNode loop,
717 HashMap<Register, Register> regMap,
718 HashMap<Register, BasicBlock> regToBlockMap) {
719 HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>();
720 // After the newly created loop goto the old loop header
721 originalToCloneBBMap.put(loop.successor, loop.header);
722 // Create an empty block to be the loop predecessor
723 BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
724 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred);
725 originalToCloneBBMap.put(loop.predecessor, new_pred);
726 // Create copy blocks
727 BasicBlockEnumeration blocks = loop.getBasicBlocks();
728 while (blocks.hasMoreElements()) {
729 BasicBlock block = blocks.next();
730 block.killFallThrough(); // get rid of fall through edges to aid recomputeNormalOuts
731 // Create copy and register mapping
732 BasicBlock copy = block.copyWithoutLinks(ir);
733 originalToCloneBBMap.put(block, copy);
734 // Link into code order
735 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy);
736 // Alter register definitions and uses in copy
737 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
738 while (instructions.hasMoreElements()) {
739 Instruction instruction = instructions.next();
740 OperandEnumeration operands = instruction.getDefs();
741 while (operands.hasMoreElements()) {
742 Operand operand = operands.next();
743 if (operand.isRegister()) {
744 Register register = operand.asRegister().getRegister();
745 if (regMap.containsKey(register)) {
746 instruction.replaceRegister(register, regMap.get(register));
747 regToBlockMap.put(regMap.get(register), copy);
748 }
749 }
750 }
751 operands = instruction.getUses();
752 while (operands.hasMoreElements()) {
753 Operand operand = operands.next();
754 if (operand instanceof RegisterOperand) {
755 Register register = operand.asRegister().getRegister();
756 if (regMap.containsKey(register)) {
757 instruction.replaceRegister(register, regMap.get(register));
758 }
759 }
760 }
761 }
762 }
763 // Fix up outs
764 // loop predecessor
765 new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir);
766 // loop blocks
767 blocks = loop.getBasicBlocks();
768 while (blocks.hasMoreElements()) {
769 BasicBlock block = blocks.next();
770 BasicBlock copy = originalToCloneBBMap.get(block);
771 Enumeration<BasicBlock> outs = block.getOutNodes();
772 while (outs.hasMoreElements()) {
773 BasicBlock out = outs.nextElement();
774 if (originalToCloneBBMap.containsKey(out)) {
775 copy.redirectOuts(out, originalToCloneBBMap.get(out), ir);
776 }
777 }
778 }
779 // Fix up phis
780 blocks = loop.getBasicBlocks();
781 while (blocks.hasMoreElements()) {
782 BasicBlock block = blocks.next();
783 BasicBlock copy = originalToCloneBBMap.get(block);
784 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
785 while (instructions.hasMoreElements()) {
786 Instruction instruction = instructions.next();
787 if (Phi.conforms(instruction)) {
788 for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) {
789 BasicBlock phi_predecessor = Phi.getPred(instruction, i).block;
790 if (originalToCloneBBMap.containsKey(phi_predecessor)) {
791 Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor)));
792 } else {
793 dumpIR(ir, "Error when optimising" + ir.getMethod());
794 throw new Error("There's > 1 route to this phi node " +
795 instruction +
796 " from outside the loop: " +
797 phi_predecessor);
798 }
799 }
800 }
801 }
802 }
803 return originalToCloneBBMap;
804 }
805
806 /**
807 * Create a clone of the loop replacing definitions in the cloned
808 * loop with those found in the register map and eliminate
809 * unnecessary bound checks
810 * @param loop - loop to clone
811 * @param regMap - mapping of original definition to new
812 * definition
813 * @param instrToEliminate - instructions to eliminate
814 * @param regToBlockMap - mapping of a register to its defining BB
815 * @return a mapping from original BBs to created BBs
816 */
817 private HashMap<BasicBlock, BasicBlock> createOptimizedLoop(AnnotatedLSTNode loop,
818 HashMap<Register, Register> regMap,
819 ArrayList<Instruction> instrToEliminate,
820 HashMap<Register, BasicBlock> regToBlockMap) {
821 HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>();
822 // After the newly created loop goto the old loop header
823 originalToCloneBBMap.put(loop.successor, loop.header);
824 // Create an empty block to be the loop predecessor
825 BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
826 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred);
827 originalToCloneBBMap.put(loop.predecessor, new_pred);
828
829 // Create copy blocks
830 BasicBlockEnumeration blocks = loop.getBasicBlocks();
831 while (blocks.hasMoreElements()) {
832 BasicBlock block = blocks.next();
833 // N.B. fall through will have been killed by unoptimized loop
834 // Create copy and register mapping
835 BasicBlock copy = block.copyWithoutLinks(ir);
836 originalToCloneBBMap.put(block, copy);
837 // Link into code order
838 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy);
839
840 // Alter register definitions in copy
841 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
842 loop_over_created_instructions:
843 while (instructions.hasMoreElements()) {
844 Instruction instruction = instructions.next();
845 if (BoundsCheck.conforms(instruction)) {
846 for (Instruction anInstrToEliminate : instrToEliminate) {
847 if (instruction.similar(anInstrToEliminate)) {
848 instruction.remove();
849 continue loop_over_created_instructions;
850 }
851 }
852 } else if (NullCheck.conforms(instruction)) {
853 for (Instruction anInstrToEliminate : instrToEliminate) {
854 if (instruction.similar(anInstrToEliminate)) {
855 instruction.remove();
856 continue loop_over_created_instructions;
857 }
858 }
859 }
860 OperandEnumeration operands = instruction.getDefs();
861 while (operands.hasMoreElements()) {
862 Operand operand = operands.next();
863 if (operand instanceof RegisterOperand) {
864 Register register = operand.asRegister().getRegister();
865 if (regMap.containsKey(register)) {
866 instruction.replaceRegister(register, regMap.get(register));
867 regToBlockMap.put(regMap.get(register), copy);
868 }
869 }
870 }
871 operands = instruction.getUses();
872 while (operands.hasMoreElements()) {
873 Operand operand = operands.next();
874 if (operand.isRegister()) {
875 Register register = operand.asRegister().getRegister();
876 if (regMap.containsKey(register)) {
877 instruction.replaceRegister(register, regMap.get(register));
878 }
879 }
880 }
881 }
882 }
883 // Fix up outs
884 // loop predecessor
885 new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir);
886 blocks = loop.getBasicBlocks();
887 while (blocks.hasMoreElements()) {
888 BasicBlock block = blocks.next();
889 BasicBlock copy = originalToCloneBBMap.get(block);
890 Enumeration<BasicBlock> outs = block.getOutNodes();
891 while (outs.hasMoreElements()) {
892 BasicBlock out = outs.nextElement();
893 if (originalToCloneBBMap.containsKey(out)) {
894 copy.redirectOuts(out, originalToCloneBBMap.get(out), ir);
895 }
896 }
897 }
898 // Fix up phis
899 blocks = loop.getBasicBlocks();
900 while (blocks.hasMoreElements()) {
901 BasicBlock block = blocks.next();
902 BasicBlock copy = originalToCloneBBMap.get(block);
903 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
904 while (instructions.hasMoreElements()) {
905 Instruction instruction = instructions.next();
906 if (Phi.conforms(instruction)) {
907 for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) {
908 BasicBlock phi_predecessor = Phi.getPred(instruction, i).block;
909 if (originalToCloneBBMap.containsKey(phi_predecessor)) {
910 Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor)));
911 } else {
912 throw new Error("There's > 1 route to this phi node from outside the loop: " + phi_predecessor);
913 }
914 }
915 }
916 }
917 }
918 return originalToCloneBBMap;
919 }
920
921 /**
922 * When phi nodes were generated the basic blocks weren't known for
923 * the predecessors, fix this up now. (It may also not be possible
924 * to reach the unoptimized loop any more)
925 */
926 private void fixUpPhiPredecessors(ArrayList<Instruction> phiInstructions, BasicBlock unoptimizedLoopExit,
927 BasicBlock optimizedLoopExit) {
928 if (unoptimizedLoopExit != null) {
929 for (Instruction instruction : phiInstructions) {
930 Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit));
931 Phi.setPred(instruction, UNOPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(unoptimizedLoopExit));
932 }
933 } else {
934 for (Instruction instruction : phiInstructions) {
935 Operand operand = Phi.getValue(instruction, OPTIMIZED_LOOP_OPERAND);
936 Phi.resizeNumberOfPreds(instruction, 1);
937 Phi.resizeNumberOfValues(instruction, 1);
938 Phi.setValue(instruction, OPTIMIZED_LOOP_OPERAND, operand);
939 Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit));
940 }
941 }
942 }
943
944 /**
945 * Create the block containing explict branches to either the
946 * optimized or unoptimized loops
947 * @param optimalRegMap - mapping used to map eliminated bound and
948 * null check guards to
949 */
950 private boolean createBranchBlocks(AnnotatedLSTNode loop, BasicBlock block,
951 ArrayList<Instruction> checksToEliminate, BasicBlock unoptimizedLoopEntry,
952 BasicBlock optimizedLoopEntry,
953 HashMap<Register, Register> optimalRegMap) {
954 BasicBlock blockOnEntry = block;
955 // 1) generate null check guards
956 block = generateNullCheckBranchBlocks(loop, checksToEliminate, optimalRegMap, block, unoptimizedLoopEntry);
957
958 // 2) generate bound check guards
959 if (loop.isMonotonic()) {
960 // create new operands for values beyond initial and terminal iterator values
961 Operand terminal;
962 Operand terminalLessStrideOnce;
963 Operand terminalPlusStrideOnce;
964
965 // NB. precomputing these makes life easier and the code easier to read,
966 // it does create dead code though
967 if (loop.terminalIteratorValue.isIntConstant()) {
968 terminal = loop.terminalIteratorValue;
969 int terminalAsInt = terminal.asIntConstant().value;
970 int stride = loop.strideValue.asIntConstant().value;
971 terminalLessStrideOnce = new IntConstantOperand(terminalAsInt - stride);
972 terminalPlusStrideOnce = new IntConstantOperand(terminalAsInt + stride);
973 } else {
974 Instruction tempInstr;
975 terminal = loop.generateLoopInvariantOperand(block, loop.terminalIteratorValue);
976 terminalLessStrideOnce = ir.regpool.makeTempInt();
977 terminalPlusStrideOnce = ir.regpool.makeTempInt();
978 tempInstr =
979 Binary.create(INT_SUB, terminalLessStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy());
980 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
981 block.appendInstruction(tempInstr);
982 DefUse.updateDUForNewInstruction(tempInstr);
983
984 tempInstr =
985 Binary.create(INT_ADD, terminalPlusStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy());
986 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
987 block.appendInstruction(tempInstr);
988 DefUse.updateDUForNewInstruction(tempInstr);
989 }
990
991 // Determine maximum and minimum index values for different loop types
992 Operand phiMinIndexValue;
993 Operand phiMaxIndexValue;
994
995 if (loop.isMonotonicIncreasing()) {
996 phiMinIndexValue = loop.initialIteratorValue;
997 if ((loop.condition.isLESS() ||
998 loop.condition.isLOWER() ||
999 loop.condition.isNOT_EQUAL())) {
1000 phiMaxIndexValue = terminal;
1001 } else if ((loop.condition.isLESS_EQUAL() ||
1002 loop.condition.isLOWER_EQUAL() ||
1003 loop.condition.isEQUAL())) {
1004 phiMaxIndexValue = terminalPlusStrideOnce;
1005 } else {
1006 throw new Error("Unrecognised loop for fission " + loop);
1007 }
1008 } else if (loop.isMonotonicDecreasing()) {
1009 phiMaxIndexValue = loop.initialIteratorValue;
1010 if ((loop.condition.isGREATER() ||
1011 loop.condition.isHIGHER() ||
1012 loop.condition.isNOT_EQUAL())) {
1013 phiMinIndexValue = terminalPlusStrideOnce;
1014 } else if ((loop.condition.isGREATER_EQUAL() ||
1015 loop.condition.isHIGHER_EQUAL() ||
1016 loop.condition.isEQUAL())) {
1017 phiMinIndexValue = terminalLessStrideOnce;
1018 } else {
1019 throw new Error("Unrecognised loop for fission " + loop);
1020 }
1021 } else {
1022 throw new Error("Unrecognised loop for fission " + loop);
1023 }
1024 // Generate tests
1025 for (int i = 0; i < checksToEliminate.size(); i++) {
1026 Instruction instr = checksToEliminate.get(i);
1027 if (BoundsCheck.conforms(instr)) {
1028 // Have we already generated these tests?
1029 boolean alreadyChecked = false;
1030 for (int j = 0; j < i; j++) {
1031 Instruction old_instr = checksToEliminate.get(j);
1032 if (BoundsCheck.conforms(old_instr) &&
1033 (BoundsCheck.getRef(old_instr).similar(BoundsCheck.getRef(instr))) &&
1034 (BoundsCheck.getIndex(old_instr).similar(BoundsCheck.getIndex(instr)))) {
1035 // yes - just create a guard move
1036 alreadyChecked = true;
1037 RegisterOperand guardResult = BoundsCheck.getGuardResult(instr).copyRO();
1038 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1039 RegisterOperand guardSource = BoundsCheck.getGuardResult(old_instr).copyRO();
1040 guardSource.setRegister(optimalRegMap.get(guardSource.getRegister()));
1041 Instruction tempInstr = Move.create(GUARD_MOVE, guardResult, guardSource);
1042 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1043 block.appendInstruction(tempInstr);
1044 break;
1045 }
1046 }
1047 if (!alreadyChecked) {
1048 // no - generate tests
1049 Operand index = BoundsCheck.getIndex(instr);
1050 int distance = loop.getFixedDistanceFromPhiIterator(index);
1051 if (distance == 0) {
1052 block =
1053 generateExplicitBoundCheck(instr,
1054 phiMinIndexValue,
1055 phiMaxIndexValue,
1056 optimalRegMap,
1057 block,
1058 unoptimizedLoopEntry);
1059 } else {
1060 Instruction tempInstr;
1061 RegisterOperand minIndex = ir.regpool.makeTempInt();
1062 RegisterOperand maxIndex = ir.regpool.makeTempInt();
1063
1064 tempInstr =
1065 Binary.create(INT_ADD, minIndex, phiMinIndexValue.copy(), new IntConstantOperand(distance));
1066 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1067 block.appendInstruction(tempInstr);
1068 DefUse.updateDUForNewInstruction(tempInstr);
1069
1070 tempInstr =
1071 Binary.create(INT_ADD, maxIndex, phiMaxIndexValue.copy(), new IntConstantOperand(distance));
1072 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1073 block.appendInstruction(tempInstr);
1074 DefUse.updateDUForNewInstruction(tempInstr);
1075
1076 block = generateExplicitBoundCheck(instr, minIndex, maxIndex, optimalRegMap, block, unoptimizedLoopEntry);
1077 }
1078 }
1079 }
1080 }
1081 }
1082 // Have we had to create a new basic block since entry => we
1083 // generated a branch to the unoptimized loop
1084 boolean isUnoptimizedLoopReachable = (blockOnEntry != block);
1085 // 3) Finish up with goto and generate true guard value
1086 {
1087 Instruction branch; // the generated branch instruction
1088 branch = Goto.create(GOTO, optimizedLoopEntry.makeJumpTarget());
1089 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1090 block.appendInstruction(branch);
1091 block.deleteNormalOut();
1092 block.insertOut(optimizedLoopEntry);
1093 }
1094 return isUnoptimizedLoopReachable;
1095 }
1096
1097 /**
1098 * Generate null check branch blocks
1099 *
1100 * @param loop the current loop where checks are being eliminated
1101 * @param checksToEliminate all of the checks that are being eliminated in the pass
1102 * @param optimalRegMap a map from original register to the register used in the optimal loop
1103 * @param block the block to generate code into
1104 * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails
1105 * @return the new block to generate code into
1106 */
1107 private BasicBlock generateNullCheckBranchBlocks(AnnotatedLSTNode loop,
1108 ArrayList<Instruction> checksToEliminate,
1109 HashMap<Register, Register> optimalRegMap,
1110 BasicBlock block, BasicBlock unoptimizedLoopEntry) {
1111 // Map of already generated null check references to their
1112 // corresponding guard result
1113 HashMap<Register, Operand> refToGuardMap = new HashMap<Register, Operand>();
1114 // Iterate over checks
1115 for (Instruction instr : checksToEliminate) {
1116 // Is this a null check
1117 if (NullCheck.conforms(instr)) {
1118 // the generated branch instruction
1119 Instruction branch;
1120 // the reference to compare
1121 Operand ref = AnnotatedLSTNode.follow(NullCheck.getRef(instr));
1122 // the guard result to define
1123 RegisterOperand guardResult = NullCheck.getGuardResult(instr).copyRO();
1124 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1125 // check if we've generated this test already
1126 if (ref.isRegister() && refToGuardMap.containsKey(ref.asRegister().getRegister())) {
1127 // yes - generate just a guard move
1128 branch = Move.create(GUARD_MOVE, guardResult, refToGuardMap.get(ref.asRegister().getRegister()).copy());
1129 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1130 block.appendInstruction(branch);
1131 } else {
1132 // check if we can just move a guard from the loop predecessors
1133 RegisterOperand guard = nullCheckPerformedInLoopPredecessors(loop.header, instr);
1134 if (guard != null) {
1135 // yes - generate just a guard move
1136 branch = Move.create(GUARD_MOVE, guardResult, guard.copyRO());
1137 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1138 block.appendInstruction(branch);
1139 } else {
1140 // generate explicit null test
1141 branch =
1142 IfCmp.create(REF_IFCMP,
1143 guardResult,
1144 ref.copy(),
1145 new NullConstantOperand(),
1146 ConditionOperand.EQUAL(),
1147 unoptimizedLoopEntry.makeJumpTarget(),
1148 BranchProfileOperand.unlikely());
1149 if (ref.isRegister()) {
1150 refToGuardMap.put(ref.asRegister().getRegister(), guardResult);
1151 }
1152 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1153 block.appendInstruction(branch);
1154 // Adjust block
1155 block.insertOut(unoptimizedLoopEntry);
1156 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1157 BasicBlock temp = (BasicBlock) block.next;
1158 ir.cfg.breakCodeOrder(block, temp);
1159 ir.cfg.linkInCodeOrder(block, new_block);
1160 ir.cfg.linkInCodeOrder(new_block, temp);
1161 block.insertOut(new_block);
1162 block = new_block;
1163 }
1164 }
1165 }
1166 }
1167 return block;
1168 }
1169
1170 /**
1171 * Generate bound check branch blocks
1172 *
1173 * @param boundCheckInstr the bound check instruction in question
1174 * @param minIndexValue the min value for an iterator a loop will generate
1175 * @param maxIndexValue the max value for an iterator a loop will generate
1176 * @param optimalRegMap a map from original register to the register used in the optimal loop
1177 * @param block the block to generate code into
1178 * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails
1179 * @return the new block to generate code into
1180 */
1181 private BasicBlock generateExplicitBoundCheck(Instruction boundCheckInstr, Operand minIndexValue,
1182 Operand maxIndexValue,
1183 HashMap<Register, Register> optimalRegMap,
1184 BasicBlock block, BasicBlock unoptimizedLoopEntry) {
1185 // 1) Work out what tests are necessary. NB we don't optimise for
1186 // the case when exceptions will always be generated
1187 boolean lowerBoundTestRedundant;
1188 boolean upperBoundTestRedundant;
1189 {
1190 // as array lengths must be >= 0 the lower bound test is not
1191 // necessary if:
1192 // (minIndexValue >= 0) or ((arraylength A) + zeroOrPositiveConstant)
1193 lowerBoundTestRedundant =
1194 ((minIndexValue.isIntConstant() && (minIndexValue.asIntConstant().value >= 0)) ||
1195 ((getConstantAdjustedArrayLengthRef(minIndexValue) != null) &&
1196 (getConstantAdjustedArrayLengthDistance(minIndexValue) >= 0)));
1197 // as the upper bound must be <= arraylength the test is not
1198 // necessary if:
1199 // maxIndexValue = (arraylength A) - zeroOrPositiveConstant
1200 Operand maxIndexArrayLengthRef = getConstantAdjustedArrayLengthRef(maxIndexValue);
1201 upperBoundTestRedundant =
1202 ((maxIndexArrayLengthRef != null) &&
1203 maxIndexArrayLengthRef.similar(BoundsCheck.getRef(boundCheckInstr)) &&
1204 (getConstantAdjustedArrayLengthDistance(maxIndexValue) <= 0));
1205 }
1206
1207 // 2) Create explicit bound check
1208
1209 // register to hold result (NB it's a guard for the optimal loop)
1210 RegisterOperand guardResult = BoundsCheck.getGuardResult(boundCheckInstr).copyRO();
1211 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1212
1213 // the guard on the bound check (mapped from the optimal loop as
1214 // it should already have been generated or may already be out of
1215 // the loop)
1216 Operand origGuard = BoundsCheck.getGuard(boundCheckInstr);
1217 Operand guard = origGuard.copy();
1218 if (origGuard.isRegister() && optimalRegMap.containsKey(origGuard.asRegister().getRegister())) {
1219 guard.asRegister().setRegister(optimalRegMap.get(origGuard.asRegister().getRegister()));
1220 }
1221
1222 if (lowerBoundTestRedundant && upperBoundTestRedundant) {
1223 // both tests redundant so just generate a guard move of the
1224 // bound check guard
1225 Instruction move = Move.create(GUARD_MOVE, guardResult, guard);
1226 move.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1227 block.appendInstruction(move);
1228 } else {
1229 // 2.1) Create array length
1230 RegisterOperand array_length = ir.regpool.makeTempInt();
1231 Instruction array_length_instr =
1232 GuardedUnary.create(ARRAYLENGTH,
1233 array_length,
1234 AnnotatedLSTNode.follow(BoundsCheck.getRef(boundCheckInstr)).copy(),
1235 guard);
1236 array_length_instr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1237 block.appendInstruction(array_length_instr);
1238
1239 // 2.2) Create minimum index test unless test is redundant
1240 if (!lowerBoundTestRedundant) {
1241 RegisterOperand lowerBoundGuard = upperBoundTestRedundant ? guardResult : ir.regpool.makeTempValidation();
1242 // Generate bound check
1243 Instruction branch =
1244 IfCmp.create(INT_IFCMP,
1245 lowerBoundGuard,
1246 minIndexValue.copy(),
1247 array_length.copyRO(),
1248 ConditionOperand.LESS(),
1249 unoptimizedLoopEntry.makeJumpTarget(),
1250 BranchProfileOperand.unlikely());
1251 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1252 block.appendInstruction(branch);
1253 // Adjust block
1254 block.insertOut(unoptimizedLoopEntry);
1255 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1256 BasicBlock temp = (BasicBlock) block.next;
1257 ir.cfg.breakCodeOrder(block, temp);
1258 ir.cfg.linkInCodeOrder(block, new_block);
1259 ir.cfg.linkInCodeOrder(new_block, temp);
1260 block.insertOut(new_block);
1261 block = new_block;
1262 }
1263 // 2.3) Create maximum index test
1264 if (!upperBoundTestRedundant) {
1265 Instruction branch =
1266 IfCmp.create(INT_IFCMP,
1267 guardResult,
1268 maxIndexValue.copy(),
1269 array_length.copyRO(),
1270 ConditionOperand.GREATER(),
1271 unoptimizedLoopEntry.makeJumpTarget(),
1272 BranchProfileOperand.unlikely());
1273 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1274 block.appendInstruction(branch);
1275 // Adjust block
1276 block.insertOut(unoptimizedLoopEntry);
1277 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1278 BasicBlock temp = (BasicBlock) block.next;
1279 ir.cfg.breakCodeOrder(block, temp);
1280 ir.cfg.linkInCodeOrder(block, new_block);
1281 ir.cfg.linkInCodeOrder(new_block, temp);
1282 block.insertOut(new_block);
1283 block = new_block;
1284 }
1285 }
1286 return block;
1287 }
1288
1289 /**
1290 * Can we eliminate a null check as it has lready been performed?
1291 * NB SSA guarantees that if a value is null it must always be null
1292 *
1293 * @param instr null check instruction
1294 */
1295 private RegisterOperand nullCheckPerformedInLoopPredecessors(BasicBlock header, Instruction instr) {
1296 if (VM.VerifyAssertions) VM._assert(NullCheck.conforms(instr));
1297 BasicBlock block = header;
1298 do {
1299 block = ir.HIRInfo.dominatorTree.getParent(block);
1300 Instruction lastInst = block.lastInstruction();
1301 for (Instruction itrInst = block.firstInstruction(); itrInst != lastInst; itrInst =
1302 itrInst.nextInstructionInCodeOrder()) {
1303 if (NullCheck.conforms(itrInst) && NullCheck.getRef(itrInst).similar(NullCheck.getRef(instr))) {
1304 return NullCheck.getGuardResult(itrInst);
1305 }
1306 }
1307 } while (block != ir.cfg.entry());
1308 return null;
1309 }
1310
1311 /**
1312 * Get the array length reference ignoring instructions that adjust
1313 * its result by a fixed amount
1314 *
1315 * @param op operand to chase arraylength opcode to
1316 * constant value from an array length
1317 */
1318 private Operand getConstantAdjustedArrayLengthRef(Operand op) {
1319 Operand result = null;
1320 if (op.isRegister()) {
1321 Instruction opInstr = AnnotatedLSTNode.definingInstruction(op);
1322 if (opInstr.operator.opcode == ARRAYLENGTH_opcode) {
1323 result = GuardedUnary.getVal(opInstr);
1324 } else if ((opInstr.operator.opcode == INT_ADD_opcode) || (opInstr.operator.opcode == INT_SUB_opcode)) {
1325 Operand val1 = Binary.getVal1(opInstr);
1326 Operand val2 = Binary.getVal2(opInstr);
1327 if (val1.isConstant()) {
1328 result = getConstantAdjustedArrayLengthRef(val2);
1329 } else if (val2.isConstant()) {
1330 result = getConstantAdjustedArrayLengthRef(val1);
1331 }
1332 }
1333 }
1334 return result;
1335 }
1336
1337 /**
1338 * Get the distance from an array length by addding up instructions
1339 * that adjust the array length result by a constant amount
1340 *
1341 * @param op operand to chase arraylength opcode to
1342 */
1343 private int getConstantAdjustedArrayLengthDistance(Operand op) {
1344 Instruction opInstr = AnnotatedLSTNode.definingInstruction(op);
1345 if (opInstr.operator.opcode == ARRAYLENGTH_opcode) {
1346 return 0;
1347 } else if (opInstr.operator.opcode == INT_ADD_opcode) {
1348 Operand val1 = Binary.getVal1(opInstr);
1349 Operand val2 = Binary.getVal2(opInstr);
1350 if (val1.isConstant()) {
1351 return val1.asIntConstant().value + getConstantAdjustedArrayLengthDistance(val2);
1352 } else {
1353 VM._assert(val2.isConstant());
1354 return getConstantAdjustedArrayLengthDistance(val1) + val2.asIntConstant().value;
1355 }
1356 } else if (opInstr.operator.opcode == INT_SUB_opcode) {
1357 Operand val1 = Binary.getVal1(opInstr);
1358 Operand val2 = Binary.getVal2(opInstr);
1359 if (val1.isConstant()) {
1360 return val1.asIntConstant().value - getConstantAdjustedArrayLengthDistance(val2);
1361 } else {
1362 VM._assert(val2.isConstant());
1363 return getConstantAdjustedArrayLengthDistance(val1) - val2.asIntConstant().value;
1364 }
1365 } else {
1366 throw new Error("Unexpected opcode when computing distance " + op);
1367 }
1368 }
1369
1370 /**
1371 * Remove loop and replace register definitions in the original loop
1372 * with phi instructions
1373 */
1374 private void modifyOriginalLoop(AnnotatedLSTNode loop, ArrayList<Instruction> phiInstructions,
1375 ArrayList<Instruction> definingInstrInOriginalLoop,
1376 HashMap<Register, Register> subOptimalRegMap,
1377 HashMap<Register, Register> optimalRegMap) {
1378 // Remove instructions from loop header and exit, remove other
1379 // loop body blocks
1380 BasicBlockEnumeration blocks = loop.getBasicBlocks();
1381 while (blocks.hasMoreElements()) {
1382 BasicBlock block = blocks.next();
1383 if ((block == loop.header) || (block == loop.exit)) {
1384 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
1385 while (instructions.hasMoreElements()) {
1386 Instruction instruction = instructions.next();
1387 if (!BBend.conforms(instruction) && !Label.conforms(instruction)) {
1388 instruction.remove();
1389 }
1390 }
1391 } else {
1392 ir.cfg.removeFromCFGAndCodeOrder(block);
1393 }
1394 }
1395
1396 // Place phi instructions in loop header
1397 for (int i = 0; i < phiInstructions.size(); i++) {
1398 Instruction origInstr = definingInstrInOriginalLoop.get(i);
1399 // Did the original instructions value escape the loop?
1400 if (origInstr != null) {
1401 // Was this a phi of a phi?
1402 if (Phi.conforms(origInstr)) {
1403 Instruction phi = phiInstructions.get(i);
1404 boolean phiHasUnoptimizedArg = Phi.getNumberOfValues(phi) == 2;
1405 // Phi of a phi - so make sure that we get the value to escape the loop, not the value at the loop header
1406 boolean fixed = false;
1407 for (int index = 0; index < Phi.getNumberOfPreds(origInstr); index++) {
1408 BasicBlockOperand predOp = Phi.getPred(origInstr, index);
1409 // Only worry about values who are on the backward branch
1410 if (predOp.block == loop.exit) {
1411 if (fixed) { // We've tried to do 2 replaces => something wrong
1412 SSA.printInstructions(ir);
1413 OptimizingCompilerException.UNREACHABLE("LoopVersioning",
1414 "Phi node in loop header with multiple in loop predecessors");
1415 }
1416 Operand rval = Phi.getValue(origInstr, index);
1417 if (rval.isRegister()) {
1418 // Sort out registers
1419 Register origRegPhiRval = rval.asRegister().getRegister();
1420 Register subOptRegPhiRval;
1421 Register optRegPhiRval;
1422 if (!subOptimalRegMap.containsKey(origRegPhiRval)) {
1423 // Register comes from loop exit but it wasn't defined in the loop
1424 subOptRegPhiRval = origRegPhiRval;
1425 optRegPhiRval = origRegPhiRval;
1426 } else {
1427 subOptRegPhiRval = subOptimalRegMap.get(origRegPhiRval);
1428 optRegPhiRval = optimalRegMap.get(origRegPhiRval);
1429 }
1430 if (phiHasUnoptimizedArg) {
1431 Phi.getValue(phi, UNOPTIMIZED_LOOP_OPERAND).asRegister().setRegister(subOptRegPhiRval);
1432 }
1433 Phi.getValue(phi, OPTIMIZED_LOOP_OPERAND).asRegister().setRegister(optRegPhiRval);
1434 } else if (rval.isConstant()) {
1435 // Sort out constants
1436 if (phiHasUnoptimizedArg) {
1437 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, rval.copy());
1438 }
1439 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, rval.copy());
1440 } else if (rval instanceof HeapOperand) {
1441 // Sort out heap variables
1442 @SuppressWarnings("unchecked") // Cast to generic type
1443 HeapVariable<Object> origPhiRval = ((HeapOperand) rval).value;
1444 HeapVariable<Object> subOptPhiRval;
1445 HeapVariable<Object> optPhiRval;
1446 if (true /*subOptimalRegMap.containsKey(origPhiRval) == false*/) {
1447 // currently we only expect to optimise scalar SSA
1448 // form
1449 subOptPhiRval = origPhiRval;
1450 optPhiRval = origPhiRval;
1451 } else {
1452 /*
1453 subOptPhiRval = (HeapVariable)subOptimalRegMap.get(origPhiRval);
1454 optPhiRval = (HeapVariable)optimalRegMap.get(origPhiRval);
1455 */
1456 }
1457 if (phiHasUnoptimizedArg) {
1458 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(subOptPhiRval));
1459 }
1460 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(optPhiRval));
1461 } else {
1462 OptimizingCompilerException.UNREACHABLE("LoopVersioning",
1463 "Unknown operand type",
1464 rval.toString());
1465 }
1466 fixed = true;
1467 }
1468 }
1469 }
1470 // Add back to loop
1471 loop.header.appendInstruction(phiInstructions.get(i));
1472 }
1473 }
1474 // Remove original loop and branch to loop successor
1475 Instruction tempInstr;
1476 if (loop.header != loop.exit) {
1477 tempInstr = Goto.create(GOTO, loop.exit.makeJumpTarget());
1478 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1479 loop.header.appendInstruction(tempInstr);
1480 loop.header.deleteNormalOut();
1481 loop.header.insertOut(loop.exit);
1482
1483 }
1484 tempInstr = Goto.create(GOTO, loop.successor.makeJumpTarget());
1485 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1486 loop.exit.appendInstruction(tempInstr);
1487 loop.exit.deleteNormalOut();
1488 loop.exit.insertOut(loop.successor);
1489 }
1490
1491 /**
1492 * Remove unreachable unoptimized loop
1493 */
1494 private void removeUnoptimizedLoop(AnnotatedLSTNode loop,
1495 HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap) {
1496 BasicBlockEnumeration blocks = loop.getBasicBlocks();
1497 report("removing unoptimized loop");
1498 BasicBlock block = unoptimizedLoopMap.get(loop.predecessor);
1499 report("removing block " + block);
1500 ir.cfg.removeFromCFGAndCodeOrder(block);
1501 while (blocks.hasMoreElements()) {
1502 block = unoptimizedLoopMap.get(blocks.next());
1503 if (!loop.contains(block)) {
1504 report("removing block " + block);
1505 ir.cfg.removeFromCFGAndCodeOrder(block);
1506 } else {
1507 report("not removing block that's in the original loop" + block);
1508 }
1509 }
1510 }
1511
1512 /**
1513 * Put the optimized loop's iterator register into the hash set
1514 *
1515 * @param reg register
1516 */
1517 private void setOptimizedLoop(Register reg) {
1518 loopRegisterSet.add(reg);
1519 }
1520
1521 /**
1522 * Check whether the loop that contain such iterator register had
1523 * been optimized
1524 *
1525 * @param reg register
1526 * @return the test result
1527 */
1528 private boolean isOptimizedLoop(Register reg) {
1529 return loopRegisterSet.contains(reg);
1530 }
1531
1532 /**
1533 * Rename the iterators for optimized loops so we can tell they are still optimized
1534 */
1535 private void renameOptimizedLoops(HashMap<Register, Register> subOptimalRegMap,
1536 HashMap<Register, Register> optimalRegMap) {
1537 Iterator<Register> itr = loopRegisterSet.iterator();
1538 while (itr.hasNext()) {
1539 Register reg = itr.next();
1540 if (subOptimalRegMap.containsKey(reg)) {
1541 loopRegisterSet.remove(reg);
1542 loopRegisterSet.add(subOptimalRegMap.get(reg));
1543 loopRegisterSet.add(optimalRegMap.get(reg));
1544 itr = loopRegisterSet.iterator();
1545 }
1546 }
1547 }
1548 }