001 /*
002 * This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 * This file is licensed to You under the Eclipse Public License (EPL);
005 * You may not use this file except in compliance with the License. You
006 * may obtain a copy of the License at
007 *
008 * http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 * See the COPYRIGHT.txt file distributed with this work for information
011 * regarding copyright ownership.
012 */
013 package org.jikesrvm.compilers.opt.controlflow;
014
015 import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode;
016 import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
017 import static org.jikesrvm.compilers.opt.ir.Operators.GOTO_opcode;
018 import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD;
019 import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode;
020 import static org.jikesrvm.compilers.opt.ir.Operators.INT_AND;
021 import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP;
022 import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP_opcode;
023 import static org.jikesrvm.compilers.opt.ir.Operators.INT_MOVE;
024 import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB;
025
026 import java.lang.reflect.Constructor;
027 import java.util.Enumeration;
028
029 import org.jikesrvm.VM;
030 import org.jikesrvm.compilers.opt.DefUse;
031 import org.jikesrvm.compilers.opt.OptOptions;
032 import org.jikesrvm.compilers.opt.Simple;
033 import org.jikesrvm.compilers.opt.driver.CompilerPhase;
034 import org.jikesrvm.compilers.opt.ir.BasicBlock;
035 import org.jikesrvm.compilers.opt.ir.BasicBlockEnumeration;
036 import org.jikesrvm.compilers.opt.ir.Binary;
037 import org.jikesrvm.compilers.opt.ir.ExceptionHandlerBasicBlock;
038 import org.jikesrvm.compilers.opt.ir.Goto;
039 import org.jikesrvm.compilers.opt.ir.GuardResultCarrier;
040 import org.jikesrvm.compilers.opt.ir.GuardedUnary;
041 import org.jikesrvm.compilers.opt.ir.IR;
042 import org.jikesrvm.compilers.opt.ir.IfCmp;
043 import org.jikesrvm.compilers.opt.ir.Instruction;
044 import org.jikesrvm.compilers.opt.ir.Label;
045 import org.jikesrvm.compilers.opt.ir.Move;
046 import org.jikesrvm.compilers.opt.ir.OperandEnumeration;
047 import org.jikesrvm.compilers.opt.ir.Register;
048 import org.jikesrvm.compilers.opt.ir.RegisterOperandEnumeration;
049 import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand;
050 import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand;
051 import org.jikesrvm.compilers.opt.ir.operand.ConstantOperand;
052 import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand;
053 import org.jikesrvm.compilers.opt.ir.operand.Operand;
054 import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
055 import org.jikesrvm.compilers.opt.util.GraphNode;
056 import org.jikesrvm.util.BitVector;
057
058 /*
059 * Loop unrolling
060 */
061 public class LoopUnrolling extends CompilerPhase {
062
063 static final boolean DEBUG = false;
064 static final int MAX_BLOCKS_FOR_NAIVE_UNROLLING = 20;
065
066 /**
067 * Returns the name of the phase.
068 */
069 public String getName() {
070 return "Loop Unrolling";
071 }
072
073 /**
074 * Constructor for this compiler phase
075 */
076 private static final Constructor<CompilerPhase> constructor =
077 getCompilerPhaseConstructor(LoopUnrolling.class);
078
079 /**
080 * Get a constructor object for this compiler phase
081 * @return compiler phase constructor
082 */
083 public Constructor<CompilerPhase> getClassConstructor() {
084 return constructor;
085 }
086
087 public boolean shouldPerform(OptOptions options) {
088 return ((options.getOptLevel() >= 3) && (options.CONTROL_UNROLL_LOG >= 1) && (!options.SSA_LOOP_VERSIONING));
089 }
090
091 /**
092 * This is the method that actually does the work of the phase.
093 */
094 public void perform(IR ir) {
095 unrollFactor = (1 << ir.options.CONTROL_UNROLL_LOG);
096
097 if (ir.hasReachableExceptionHandlers()) return;
098 DefUse.computeDU(ir);
099 new Simple(1, true, true, true, false).perform(ir);
100 new BranchOptimizations(-1, true, true).perform(ir, true);
101
102 //new CFGTransformations().perform(ir);
103 // Note: the following unfactors the CFG
104 new DominatorsPhase(true).perform(ir);
105 DefUse.computeDU(ir);
106
107 ir.setInstructionScratchWord(0);
108
109 unrollLoops(ir);
110
111 CFGTransformations.splitCriticalEdges(ir);
112 ir.cfg.compactNodeNumbering();
113 }
114
115 /**
116 * unroll the loops in the given IR.
117 */
118 void unrollLoops(IR ir) {
119 LSTGraph lstg = ir.HIRInfo.loopStructureTree;
120
121 for (int i = 1; lstg != null && i <= 1; ++i) {
122 unrollLoopTree((LSTNode) lstg.firstNode(), ir, i);
123 (new BuildLST()).perform(ir);
124 }
125 }
126
127 /**
128 * loop unrolling on a given loop structure sub tree
129 * @param t
130 * @param ir
131 */
132 int unrollLoopTree(LSTNode t, IR ir, int target) {
133 int height = 1;
134 Enumeration<GraphNode> e = t.outNodes();
135 if (!e.hasMoreElements()) {
136 if (t.loop != null) {
137 report("Leaf loop in " + ir.method + ": " + t.loop + "\n");
138 // check infrequency
139 if (t.header.getInfrequent()) {
140 report("no unrolling of infrequent loop\n");
141 } else {
142 boolean doNaiveUnrolling = height == target && unrollLeaf(t, ir);
143 if (doNaiveUnrolling) naiveUnroller(t, ir);
144 }
145 }
146 } else {
147 while (e.hasMoreElements()) {
148 LSTNode n = (LSTNode) e.nextElement();
149 int heightOfTree = unrollLoopTree(n, ir, target);
150 height = Math.max(height, heightOfTree) + 1;
151 }
152 }
153 return height;
154 }
155
156 static final int MaxInstructions = 100;
157 private int unrollFactor = 1;
158
159 boolean unrollLeaf(LSTNode t, IR ir) {
160 int instructionsInLoop = 0;
161 BasicBlock exitBlock = null, backEdgeBlock = null, succBlock = null, predBlock = null;
162 BitVector nloop = t.loop;
163 BasicBlock header = t.header;
164 Instruction tmp;
165
166 if (ir.hasReachableExceptionHandlers()) {
167 report("0 IR may have exception handlers\n");
168 return false;
169 }
170
171 // determine loop structure by looking at its blocks
172 BasicBlockEnumeration loopBlocks = ir.getBasicBlocks(nloop);
173 int blocks = 0;
174 while (loopBlocks.hasMoreElements()) {
175 BasicBlock b = loopBlocks.next();
176 blocks++;
177
178 // check for size
179 instructionsInLoop += b.getNumberOfRealInstructions();
180 if (instructionsInLoop > MaxInstructions) {
181 report("1 is too big\n");
182 return false;
183 }
184
185 // look at the in edges. We want the header to be the only
186 // block with out of loop incoming edges.
187 BasicBlockEnumeration e = b.getIn();
188 if (b != header) {
189 while (e.hasMoreElements()) {
190 BasicBlock o = e.next();
191 if (!CFGTransformations.inLoop(o, nloop)) {
192 report("2 interior pointers.\n");
193 return true;
194 }
195 }
196 } else {
197 // check the headers predecessors: there should be
198 // one out of loop input and one backedge.
199 // We can extend this for loops with several backedges,
200 // if they all have the same conditions.
201 int inEdges = 0;
202 while (e.hasMoreElements()) {
203 inEdges++;
204 BasicBlock o = e.next();
205 if (!CFGTransformations.inLoop(o, nloop)) {
206 if (predBlock == null) {
207 predBlock = o;
208 } else {
209 report("3 multi entry header.\n");
210 return true;
211 }
212 } else {
213 if (backEdgeBlock == null) {
214 backEdgeBlock = o;
215 } else {
216 report("4 multiple back edges.\n");
217 return true;
218 }
219 }
220 }
221 }
222
223 // look at the out edges to find loop exits
224 e = b.getOut();
225 while (e.hasMoreElements()) {
226 BasicBlock out = e.next();
227 if (!CFGTransformations.inLoop(out, nloop)) {
228 if (exitBlock == null) {
229 exitBlock = b;
230 } else {
231 report("5 multiple exit blocks.\n");
232 return true;
233 }
234 }
235 }
236 }
237
238 // exitBlock must equal backEdgeBlock
239 if (exitBlock == null) {
240 report("6 no exit block found...infinite loop?");
241 return true;
242 }
243 if (exitBlock != backEdgeBlock) {
244 report("7 exit block is not immediate predecessor of loop head");
245 return true;
246 }
247
248 // exitBlock must exit (skip over pads in critical edges)
249 while (exitBlock.getNumberOfOut() == 1 && exitBlock.getNumberOfIn() == 1) {
250 exitBlock = exitBlock.getIn().next();
251 }
252
253 if (exitBlock == header && blocks > 1) {
254 report("6 while loop? (" + blocks + ")\n");
255 return true;
256 }
257
258 // So far, so good. Examine the exit test.
259 Instruction origBranch = exitBlock.firstBranchInstruction();
260 if (origBranch != exitBlock.lastRealInstruction()) {
261 Instruction aGoto = origBranch.nextInstructionInCodeOrder();
262 if (aGoto.operator.opcode != GOTO_opcode) {
263 report("7 too complex exit\n");
264 return true;
265 }
266 succBlock = Label.getBlock(Goto.getTarget(aGoto).target).block;
267 if (VM.VerifyAssertions) {
268 VM._assert(aGoto == exitBlock.lastRealInstruction());
269 }
270 } else {
271 succBlock = exitBlock.getFallThroughBlock();
272 }
273
274 if (origBranch.operator.opcode != INT_IFCMP_opcode) {
275 report("8 branch isn't int_ifcmp: " + origBranch.operator + ".\n");
276 return true;
277 }
278
279 // examine operands:
280 Operand op1 = follow(IfCmp.getVal1(origBranch));
281 Operand op2 = follow(IfCmp.getVal2(origBranch));
282 ConditionOperand cond = (ConditionOperand) IfCmp.getCond(origBranch).copy();
283 RegisterOperand ifcmpGuard = IfCmp.getGuardResult(origBranch);
284 float backBranchProbability = IfCmp.getBranchProfile(origBranch).takenProbability;
285 if (!loopInvariant(op2, nloop, 4)) {
286 if (loopInvariant(op1, nloop, 4)) {
287 Operand op = op1;
288 op1 = op2;
289 op2 = op;
290 cond.flipOperands();
291 } else {
292 if (DEBUG) {
293 printDefs(op1, nloop, 4);
294 printDefs(op2, nloop, 4);
295 VM.sysWrite("" + origBranch + "\n");
296 }
297 report("8a op1 and op2 may not be loop invariant\n");
298 return true;
299 }
300 }
301 BasicBlock target = Label.getBlock(IfCmp.getTarget(origBranch).target).block;
302
303 if (!(op1 instanceof RegisterOperand)) {
304 report("9 op1 of ifcmp isn't a register\n");
305 return true;
306 }
307
308 RegisterOperand rop1 = (RegisterOperand) op1;
309
310 Register reg = rop1.getRegister();
311 if (reg.isPhysical()) {
312 report("10 loops over physical register\n");
313 return false;
314 }
315 if (succBlock == header && !CFGTransformations.inLoop(target, nloop)) {
316 succBlock = target;
317 target = header;
318 cond.flipCode();
319 }
320 if (target != header) {
321 report("11 ifcmp doesn't jump to header\n");
322 return true;
323 }
324
325 Instruction iterator = null;
326 OperandEnumeration defs = new RealDefs(rop1);
327 while (defs.hasMoreElements()) {
328 Operand def = defs.next();
329 Instruction inst = def.instruction;
330 BasicBlock block = inst.getBasicBlock();
331 //VM.sysWrite (""+block+": "+inst+"\n");
332 if (CFGTransformations.inLoop(block, nloop)) {
333 if (iterator == null) {
334 iterator = inst;
335 } else {
336 report("12 iterator not unique.\n");
337 return true;
338 }
339 }
340 }
341
342 if (iterator == null) {
343 report("15 iterator not found.\n");
344 return true;
345 }
346
347 if (iterator.operator.opcode != INT_ADD_opcode) {
348 //dumpIR (ir, "malformed");
349 report("16 iterator is no addition: " + iterator.operator + "\n");
350 return true;
351 }
352
353 if (!rop1.similar(follow(Binary.getVal1(iterator)))) {
354 //dumpIR (ir, "malformed");
355 report("17 malformed iterator.\n" + iterator + "\n");
356 return true;
357 }
358
359 Operand strideOp = follow(Binary.getVal2(iterator));
360 if (!(strideOp instanceof IntConstantOperand)) {
361 report("18 stride not constant\n");
362 return true;
363 }
364
365 int stride = ((IntConstantOperand) strideOp).value;
366 if (stride != 1 && stride != -1) {
367 report("18b stride != +/-1 (" + stride + ")\n");
368 return true;
369 }
370
371 if ((stride == 1 &&
372 ((cond.value != ConditionOperand.LESS) &&
373 cond.value != ConditionOperand.LESS_EQUAL &&
374 cond.value != ConditionOperand.NOT_EQUAL)) ||
375 (stride == -1 &&
376 ((cond.value != ConditionOperand.GREATER) &&
377 cond.value != ConditionOperand.GREATER_EQUAL &&
378 cond.value != ConditionOperand.NOT_EQUAL))) {
379 report("19 unexpected condition: " + cond + "\n" + iterator + "\n" + origBranch + "\n\n");
380 return true;
381 }
382
383 RegisterOperand outerGuard;
384 BasicBlock outer = predBlock;
385 while (outer.getNumberOfOut() == 1 && outer.getNumberOfIn() == 1) {
386 outer = outer.getIn().next();
387 }
388 if (outer.getNumberOfIn() > 0 && outer.getNumberOfOut() < 2) {
389 report("23 no suitable outer guard found.\n");
390 return true;
391 }
392
393 tmp = outer.firstBranchInstruction();
394 if (tmp != null && GuardResultCarrier.conforms(tmp)) {
395 outerGuard = GuardResultCarrier.getGuardResult(tmp);
396 } else {
397 outerGuard = ir.regpool.makeTempValidation();
398 }
399
400 ////////////
401 // transfom
402
403 // transform this:
404 //
405 // Orig:
406 // B
407 // if i CC b goto Orig
408 // else goto exit
409 //
410 // exit:
411 //
412 // into this:
413 //
414 //
415 // stride == 1: common: stride == -1:
416 //--------------------------------------------------------------------------
417 // guard0:
418 // limit = b;
419 // if a > b goto Orig if b > a goto Orig
420 // else guard1
421 //
422 //
423 // guard 1:
424 // remainder = b - a; remainder = a - b;
425 // if cond == '<=' if cond == '>='
426 // remainder++; remainder++;
427 // remainder = remainder & 3
428 // limit = a + remainder limit = a - remainder
429 // if cond == '<=' if cond == '>='
430 // limit--; limit++;
431 // if remainder == 0 goto mllp
432 // goto Orig
433 //
434 // Orig:
435 // LOOP;
436 // if i CC limit goto Orig
437 // else guard2
438 //
439 // guard2: if i CC b goto mllp
440 // else exit
441 //
442 // mllp: // landing pad
443 // goto ml
444 //
445 // ml:
446 // LOOP;LOOP;LOOP;LOOP;
447 // if i CC b goto ml
448 // else exit
449 //
450 // exit:
451 //--------------------------------------------------------------------------
452 report("...transforming.\n");
453 if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) {
454 dumpIR(ir, "before unroll");
455 }
456
457 CFGTransformations.killFallThroughs(ir, nloop);
458 BasicBlock[] handles = makeSomeCopies(unrollFactor, ir, nloop, blocks, header, exitBlock, exitBlock);
459 BasicBlock mainHeader = handles[0];
460 BasicBlock mainExit = handles[1];
461
462 // test block for well formed bounds
463 BasicBlock guardBlock0 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
464 predBlock.redirectOuts(header, guardBlock0, ir);
465
466 // test block for iteration alignemnt
467 BasicBlock guardBlock1 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
468
469 // landing pad for orig loop
470 BasicBlock olp = header.createSubBlock(header.firstInstruction().bcIndex, ir);
471 olp.setLandingPad();
472
473 BasicBlock predSucc = predBlock.nextBasicBlockInCodeOrder();
474 if (predSucc != null) {
475 ir.cfg.breakCodeOrder(predBlock, predSucc);
476 ir.cfg.linkInCodeOrder(olp, predSucc);
477 }
478 ir.cfg.linkInCodeOrder(predBlock, guardBlock0);
479 ir.cfg.linkInCodeOrder(guardBlock0, guardBlock1);
480 ir.cfg.linkInCodeOrder(guardBlock1, olp);
481
482 // guard block for main loop
483 BasicBlock guardBlock2 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
484
485 // landing pad for main loop
486 BasicBlock landingPad = header.createSubBlock(header.firstInstruction().bcIndex, ir);
487 landingPad.setLandingPad();
488
489 BasicBlock mainLoop = exitBlock.nextBasicBlockInCodeOrder();
490 ir.cfg.breakCodeOrder(exitBlock, mainLoop);
491 ir.cfg.linkInCodeOrder(exitBlock, guardBlock2);
492 ir.cfg.linkInCodeOrder(guardBlock2, landingPad);
493 ir.cfg.linkInCodeOrder(landingPad, mainLoop);
494
495 RegisterOperand remainder = ir.regpool.makeTemp(rop1.getType());
496 RegisterOperand limit = ir.regpool.makeTemp(rop1.getType());
497
498 // test whether a <= b for stride == 1 and a >= b for stride == -1
499 tmp = guardBlock0.lastInstruction();
500 tmp.insertBefore(Move.create(INT_MOVE, limit, op2.copy()));
501
502 ConditionOperand g0cond = ConditionOperand.GREATER_EQUAL();
503 if (stride == -1) g0cond = ConditionOperand.LESS_EQUAL();
504
505 tmp.insertBefore(IfCmp.create(INT_IFCMP,
506 outerGuard.copyD2D(),
507 rop1.copyD2U(),
508 op2.copy(),
509 g0cond,
510 olp.makeJumpTarget(),
511 BranchProfileOperand.unlikely()));
512 tmp.insertBefore(Goto.create(GOTO, guardBlock1.makeJumpTarget()));
513
514 // align the loop iterations
515 tmp = guardBlock1.lastInstruction();
516 if (stride == 1) {
517 tmp.insertBefore(Binary.create(INT_SUB, remainder, op2.copy(), rop1.copyD2U()));
518 } else {
519 tmp.insertBefore(Binary.create(INT_SUB, remainder, rop1.copyD2U(), op2.copy()));
520 }
521
522 if (cond.isGREATER_EQUAL() || cond.isLESS_EQUAL()) {
523 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1)));
524 }
525
526 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(-1)));
527
528 tmp.insertBefore(Binary.create(INT_AND,
529 remainder.copyD2D(),
530 remainder.copyD2U(),
531 new IntConstantOperand(unrollFactor - 1)));
532
533 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1)));
534
535 if (stride == 1) {
536 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2U(), op1.copy(), remainder.copyD2U()));
537 } else {
538 tmp.insertBefore(Binary.create(INT_SUB, limit.copyD2U(), op1.copy(), remainder.copyD2U()));
539 }
540
541 if (cond.isLESS_EQUAL()) {
542 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(-1)));
543 }
544
545 if (cond.isGREATER_EQUAL()) {
546 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(1)));
547 }
548
549 tmp.insertBefore(Goto.create(GOTO, olp.makeJumpTarget()));
550
551 // build landing pad for original loop
552 tmp = olp.lastInstruction();
553 tmp.insertBefore(Goto.create(GOTO, header.makeJumpTarget()));
554
555 // change the back branch in the original loop
556 deleteBranches(exitBlock);
557 tmp = exitBlock.lastInstruction();
558 tmp.insertBefore(IfCmp.create(INT_IFCMP,
559 outerGuard.copyD2D(),
560 rop1.copyU2U(),
561 limit.copyD2U(),
562 (ConditionOperand) cond.copy(),
563 header.makeJumpTarget(),
564 new BranchProfileOperand(1.0f - 1.0f / ((float) (unrollFactor / 2)))));
565 tmp.insertBefore(Goto.create(GOTO, guardBlock2.makeJumpTarget()));
566
567 // only enter main loop if iterations left
568 tmp = guardBlock2.lastInstruction();
569 tmp.insertBefore(IfCmp.create(INT_IFCMP,
570 outerGuard.copyD2D(),
571 rop1.copyU2U(),
572 op2.copy(),
573 (ConditionOperand) cond.copy(),
574 landingPad.makeJumpTarget(),
575 new BranchProfileOperand(backBranchProbability)));
576 tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget()));
577
578 // landing pad jumps to mainHeader
579 tmp = landingPad.lastInstruction();
580 tmp.insertBefore(Goto.create(GOTO, mainHeader.makeJumpTarget()));
581
582 // repair back edge in mainExit
583 if (VM.VerifyAssertions) VM._assert(mainExit != null);
584 tmp = mainExit.lastInstruction();
585 if (VM.VerifyAssertions) {
586 VM._assert((mainExit.lastRealInstruction() == null) || !mainExit.lastRealInstruction().isBranch());
587 }
588 tmp.insertBefore(IfCmp.create(INT_IFCMP,
589 ifcmpGuard.copyU2U(),
590 rop1.copyU2U(),
591 op2.copy(),
592 (ConditionOperand) cond.copy(),
593 mainHeader.makeJumpTarget(),
594 new BranchProfileOperand(1.0f - (1.0f - backBranchProbability) * unrollFactor)));
595 tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget()));
596
597 // recompute normal outs
598 guardBlock0.recomputeNormalOut(ir);
599 guardBlock1.recomputeNormalOut(ir);
600 olp.recomputeNormalOut(ir);
601 guardBlock2.recomputeNormalOut(ir);
602 exitBlock.recomputeNormalOut(ir);
603 landingPad.recomputeNormalOut(ir);
604 mainExit.recomputeNormalOut(ir);
605 if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) {
606 dumpIR(ir, "after unroll");
607 }
608 return false;
609 }
610
611 private void naiveUnroller(LSTNode t, IR ir) {
612 BitVector nloop = t.loop;
613 BasicBlock seqStart = null;
614 BasicBlockEnumeration bs;
615
616 if (t.loop.populationCount() > MAX_BLOCKS_FOR_NAIVE_UNROLLING) {
617 report("1 is too big\n");
618 return;
619 }
620 report("Naively unrolling\n");
621
622 CFGTransformations.killFallThroughs(ir, nloop);
623
624 // first, capture the blocks in the loop body.
625 int bodyBlocks = nloop.populationCount();
626 BasicBlock[] body = new BasicBlock[bodyBlocks];
627 {
628 int i = 0;
629 bs = ir.getBasicBlocks(nloop);
630 while (bs.hasMoreElements()) {
631 BasicBlock b = bs.next();
632 if (VM.VerifyAssertions) {
633 VM._assert(!(b instanceof ExceptionHandlerBasicBlock));
634 }
635 body[i++] = b;
636 BasicBlock next = b.nextBasicBlockInCodeOrder();
637 if (next == null || !CFGTransformations.inLoop(next, nloop)) {
638 seqStart = b; // end of loop in code order
639 }
640 }
641 }
642
643 BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder();
644 if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd);
645 BasicBlock seqLast = seqStart;
646 BasicBlock firstHeaderCopy = null;
647 BasicBlock currentBlock = seqLast;
648
649 for (int i = 1; i <= unrollFactor; ++i) {
650
651 // copy body
652 for (BasicBlock bb : body) {
653 seqLast = copyAndLinkBlock(ir, seqLast, bb);
654 if (bb == t.header) {
655 if (firstHeaderCopy == null) {
656 firstHeaderCopy = seqLast;
657 }
658 }
659 }
660
661 // redirect internal branches
662 currentBlock = seqLast;
663 for (int j = 0; j < bodyBlocks; ++j) {
664 currentBlock.recomputeNormalOut(ir);
665 BasicBlockEnumeration be = currentBlock.getOut();
666 while (be.hasMoreElements()) {
667 BasicBlock out = be.next();
668 if (out != t.header && CFGTransformations.inLoop(out, nloop)) {
669 BasicBlock outCopy = (BasicBlock) out.scratchObject;
670 currentBlock.redirectOuts(out, outCopy, ir);
671 }
672 }
673 currentBlock.recomputeNormalOut(ir);
674 currentBlock = currentBlock.prevBasicBlockInCodeOrder();
675 }
676
677 if (i != 1) {
678 // redirect the branches to the header in the (i-1)th copy
679 for (int j = 0; j < bodyBlocks; ++j) {
680 BasicBlockEnumeration be = currentBlock.getOut();
681 while (be.hasMoreElements()) {
682 BasicBlock out = be.next();
683 if (out == t.header) {
684 BasicBlock headerCopy;
685 headerCopy = (BasicBlock) t.header.scratchObject;
686 currentBlock.redirectOuts(t.header, headerCopy, ir);
687 }
688 }
689 currentBlock.recomputeNormalOut(ir);
690 currentBlock = currentBlock.prevBasicBlockInCodeOrder();
691 }
692 }
693 }
694 if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd);
695
696 // in the original loop, redirect branches that go to the header
697 // and make them point to the first header copy
698
699 for (int j = 0; j < bodyBlocks; ++j) {
700 BasicBlockEnumeration be = body[j].getOut();
701 while (be.hasMoreElements()) {
702 BasicBlock out = be.next();
703 if (out == t.header) {
704 body[j].redirectOuts(t.header, firstHeaderCopy, ir);
705 }
706 }
707 body[j].recomputeNormalOut(ir);
708 }
709
710 // the following loop redirects backedges that start in the last
711 // copy to point to the first copy instead and not to the original
712 // header.
713 // | |
714 // Thus we get [ ] instead of [ ]<-.
715 // | | |
716 // [ ]<-. [ ] |
717 // | | | |
718 // [ ] | [ ] |
719 // | | | |
720 // [ ] | [ ] |
721 // |\_/ |\_/
722 //
723 // Instead of 2^(unroll_log) we only have 2^(unroll_log-1) bodies
724 // in the unrolled loop, but there is one copy of the loop's body
725 // that dominates the unrolled version. Peeling of this first
726 // version should have benefits for global code placement.
727 currentBlock = seqLast;
728 for (int j = 0; j < bodyBlocks; ++j) {
729 BasicBlockEnumeration be = currentBlock.getOut();
730 while (be.hasMoreElements()) {
731 BasicBlock out = be.next();
732 if (out == t.header) {
733 currentBlock.redirectOuts(t.header, firstHeaderCopy, ir);
734 }
735 }
736 currentBlock.recomputeNormalOut(ir);
737 currentBlock = currentBlock.prevBasicBlockInCodeOrder();
738 }
739 }
740
741 static void report(String s) {
742 if (DEBUG) VM.sysWrite("] " + s);
743 }
744
745 private static int theVisit = 1;
746
747 private static Operand follow(Operand use) {
748 theVisit++;
749 return _follow(use);
750 }
751
752 private static Operand _follow(Operand use) {
753 while (true) {
754 if (!(use instanceof RegisterOperand)) return use;
755 RegisterOperand rop = (RegisterOperand) use;
756 RegisterOperandEnumeration defs = DefUse.defs(rop.getRegister());
757 if (!defs.hasMoreElements()) {return use;}
758 Instruction def = defs.next().instruction;
759 if (!Move.conforms(def)) return use;
760 if (defs.hasMoreElements()) {return use;}
761
762 if (def.scratch == theVisit) return use;
763 def.scratch = theVisit;
764
765 use = Move.getVal(def);
766 }
767 }
768
769 private static Instruction definingInstruction(Operand op) {
770 if (!(op instanceof RegisterOperand)) return op.instruction;
771 RegisterOperandEnumeration defs = DefUse.defs(((RegisterOperand) op).getRegister());
772 if (!defs.hasMoreElements()) {return op.instruction;}
773 Instruction def = defs.next().instruction;
774 if (defs.hasMoreElements()) {return op.instruction;}
775 return def;
776 }
777
778 private static boolean loopInvariant(Operand op, BitVector nloop, int depth) {
779 if (depth <= 0) {
780 return false;
781 } else if (op instanceof ConstantOperand) {
782 return true;
783 } else if (op instanceof RegisterOperand) {
784 Register reg = ((RegisterOperand) op).getRegister();
785 RegisterOperandEnumeration defs = DefUse.defs(reg);
786 // if no definitions of this register (very strange) give up
787 if (!defs.hasMoreElements()) return false;
788 Instruction inst = defs.next().instruction;
789 // if multiple definitions of a register give up as follow may
790 // fail to give the correct invariant
791 return !defs.hasMoreElements() && !CFGTransformations.inLoop(inst.getBasicBlock(), nloop);
792 } else {
793 return false;
794 }
795 }
796
797 private static boolean printDefs(Operand op, BitVector nloop, int depth) {
798 if (depth <= 0) return false;
799 if (op instanceof ConstantOperand) {
800 VM.sysWrite(">> " + op + "\n");
801 return true;
802 }
803 if (op instanceof RegisterOperand) {
804 boolean invariant = true;
805 Register reg = ((RegisterOperand) op).getRegister();
806 RegisterOperandEnumeration defs = DefUse.defs(reg);
807 while (defs.hasMoreElements()) {
808 Instruction inst = defs.next().instruction;
809 VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n");
810 if (CFGTransformations.inLoop(inst.getBasicBlock(), nloop)) {
811 if (Move.conforms(inst)) {
812 invariant &= printDefs(Move.getVal(inst), nloop, depth - 1);
813 } else if (inst.operator.opcode == ARRAYLENGTH_opcode) {
814 invariant &= printDefs(GuardedUnary.getVal(inst), nloop, depth);
815 } else {
816 invariant = false;
817 }
818 }
819 if (!invariant) break;
820 }
821 return invariant;
822 }
823 return false;
824 }
825
826 @SuppressWarnings("unused")
827 // For debugging
828 private static void _printDefs(Operand op) {
829 if (op instanceof RegisterOperand) {
830 Register reg = ((RegisterOperand) op).getRegister();
831 RegisterOperandEnumeration defs = DefUse.defs(reg);
832 defs = DefUse.defs(reg);
833 while (defs.hasMoreElements()) {
834 Instruction inst = defs.next().instruction;
835 if (Move.conforms(inst)) {
836 inst = definingInstruction(follow(Move.getVal(inst)));
837 }
838 VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n");
839 }
840 } else {
841 VM.sysWrite(">> " + op + "\n");
842 }
843 }
844
845 static void linkToLST(IR ir) {
846 BasicBlockEnumeration e = ir.getBasicBlocks();
847 while (e.hasMoreElements()) {
848 e.next().scratchObject = null;
849 e.next().scratch = 0;
850 }
851 LSTGraph lstg = ir.HIRInfo.loopStructureTree;
852 if (lstg != null) markHeaders((LSTNode) lstg.firstNode());
853 }
854
855 // for all loops:
856 // make the header block point to the corresponding loop structure tree node.
857 private static void markHeaders(LSTNode t) {
858 BasicBlock header = t.header;
859 header.scratchObject = t;
860 Enumeration<GraphNode> e = t.outNodes();
861 while (e.hasMoreElements()) {
862 LSTNode n = (LSTNode) e.nextElement();
863 markHeaders(n);
864 }
865 }
866
867 // inserts unrollFactor copies of the loop after seqStart
868 static BasicBlock[] makeSomeCopies(int unrollFactor, IR ir, BitVector nloop, int blocks,
869 BasicBlock header, BasicBlock exitBlock, BasicBlock seqStart) {
870 // make some copies of the original loop
871
872 // first, capture the blocks in the loop body.
873 BitVector loop = new BitVector(nloop);
874 loop.clear(header.getNumber());
875 loop.clear(exitBlock.getNumber());
876 int bodyBlocks = 0;
877 BasicBlockEnumeration bs = ir.getBasicBlocks(loop);
878 while (bs.hasMoreElements()) {
879 bodyBlocks++;
880 bs.next();
881 }
882 BasicBlock[] body = new BasicBlock[bodyBlocks];
883 {
884 int i = 0;
885 bs = ir.getBasicBlocks(loop);
886 while (bs.hasMoreElements()) {
887 body[i++] = bs.next();
888 }
889 }
890
891 BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder();
892 if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd);
893 BasicBlock seqLast = seqStart;
894
895 BasicBlock firstHeader = null;
896 BasicBlock lastHeader = null;
897 BasicBlock lastExit = null;
898 BasicBlock[] handles = new BasicBlock[2];
899
900 for (int i = 0; i < unrollFactor; ++i) {
901
902 // copy header
903 seqLast = copyAndLinkBlock(ir, seqLast, header);
904 lastHeader = seqLast;
905
906 if (i == 0) {
907 firstHeader = seqLast;
908 } else {
909 // link copies by jumps
910 lastExit.appendInstruction(Goto.create(GOTO, seqLast.makeJumpTarget()));
911 lastExit.recomputeNormalOut(ir);
912 }
913
914 // copy body
915 for (BasicBlock bb : body) {
916 seqLast = copyAndLinkBlock(ir, seqLast, bb);
917 }
918
919 // copy exit block
920 if (exitBlock != header) {
921 seqLast = copyAndLinkBlock(ir, seqLast, exitBlock);
922 lastExit = seqLast;
923 } else {
924 lastExit = lastHeader;
925 }
926
927 // delete all branches in the copies of the exit block
928 deleteBranches(lastExit);
929
930 // redirect internal branches
931 BasicBlock cb = seqLast;
932 for (int j = 0; j < blocks; ++j) {
933 cb.recomputeNormalOut(ir);
934 BasicBlockEnumeration be = cb.getOut();
935 while (be.hasMoreElements()) {
936 BasicBlock out = be.next();
937 if (CFGTransformations.inLoop(out, nloop)) {
938 cb.redirectOuts(out, (BasicBlock) out.scratchObject, ir);
939 }
940 }
941 cb.recomputeNormalOut(ir);
942 cb = cb.prevBasicBlockInCodeOrder();
943 }
944 }
945 if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd);
946 handles[0] = firstHeader;
947 handles[1] = lastExit;
948 return handles;
949 }
950
951 static BasicBlock copyAndLinkBlock(IR ir, BasicBlock seqLast, BasicBlock block) {
952 BasicBlock copy = block.copyWithoutLinks(ir);
953 ir.cfg.linkInCodeOrder(seqLast, copy);
954 block.scratchObject = copy;
955 return copy;
956 }
957
958 static void deleteBranches(BasicBlock b) {
959 Instruction branch = b.lastRealInstruction();
960 while (branch.isBranch()) {
961 Instruction nextBranch = branch.prevInstructionInCodeOrder();
962 branch.remove();
963 branch = nextBranch;
964 }
965 }
966
967 static final class RealDefs implements OperandEnumeration {
968 private RegisterOperandEnumeration defs = null;
969 private Operand use;
970 private RealDefs others = null;
971
972 private void init(Operand use) {
973 this.use = use;
974 if (use instanceof RegisterOperand) {
975 RegisterOperand rop = (RegisterOperand) use;
976 defs = DefUse.defs(rop.getRegister());
977 this.use = null;
978 if (!defs.hasMoreElements()) defs = null;
979 }
980 }
981
982 public RealDefs(Operand use) {
983 this.init(use);
984 theVisit++;
985 }
986
987 public RealDefs(Operand use, int visit) {
988 this.init(use);
989 theVisit = visit;
990 }
991
992 public Operand next() {
993 Operand res = use;
994 if (res != null) {
995 use = null;
996 return res;
997 }
998 if (others != null && others.hasMoreElements()) {
999 return others.next();
1000 }
1001
1002 res = defs.next();
1003 Instruction inst = res.instruction;
1004 if (!(Move.conforms(inst)) || inst.scratch == theVisit) {
1005 return res;
1006 }
1007 inst.scratch = theVisit;
1008
1009 others = new RealDefs(Move.getVal(inst), theVisit);
1010 if (!(others.hasMoreElements())) return res;
1011 return others.next();
1012 }
1013
1014 public boolean hasMoreElements() {
1015
1016 return use != null || (others != null && others.hasMoreElements()) || (defs != null && defs.hasMoreElements());
1017 }
1018
1019 public Operand nextElement() {
1020 return next();
1021 }
1022
1023 public Operand nextClear() {
1024 Operand res = next();
1025 res.instruction = null;
1026 return res;
1027 }
1028 }
1029 }