001/*
002 *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 *  This file is licensed to You under the Eclipse Public License (EPL);
005 *  You may not use this file except in compliance with the License. You
006 *  may obtain a copy of the License at
007 *
008 *      http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 *  See the COPYRIGHT.txt file distributed with this work for information
011 *  regarding copyright ownership.
012 */
013package org.jikesrvm.compilers.opt.controlflow;
014
015import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode;
016import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
017import static org.jikesrvm.compilers.opt.ir.Operators.GOTO_opcode;
018import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD;
019import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode;
020import static org.jikesrvm.compilers.opt.ir.Operators.INT_AND;
021import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP;
022import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP_opcode;
023import static org.jikesrvm.compilers.opt.ir.Operators.INT_MOVE;
024import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB;
025
026import java.lang.reflect.Constructor;
027import java.util.Enumeration;
028import java.util.HashMap;
029import java.util.Map;
030
031import org.jikesrvm.VM;
032import org.jikesrvm.compilers.opt.DefUse;
033import org.jikesrvm.compilers.opt.OptOptions;
034import org.jikesrvm.compilers.opt.Simple;
035import org.jikesrvm.compilers.opt.driver.CompilerPhase;
036import org.jikesrvm.compilers.opt.ir.BasicBlock;
037import org.jikesrvm.compilers.opt.ir.Binary;
038import org.jikesrvm.compilers.opt.ir.ExceptionHandlerBasicBlock;
039import org.jikesrvm.compilers.opt.ir.Goto;
040import org.jikesrvm.compilers.opt.ir.GuardResultCarrier;
041import org.jikesrvm.compilers.opt.ir.GuardedUnary;
042import org.jikesrvm.compilers.opt.ir.IR;
043import org.jikesrvm.compilers.opt.ir.IfCmp;
044import org.jikesrvm.compilers.opt.ir.Instruction;
045import org.jikesrvm.compilers.opt.ir.Label;
046import org.jikesrvm.compilers.opt.ir.Move;
047import org.jikesrvm.compilers.opt.ir.Register;
048import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand;
049import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand;
050import org.jikesrvm.compilers.opt.ir.operand.ConstantOperand;
051import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand;
052import org.jikesrvm.compilers.opt.ir.operand.Operand;
053import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
054import org.jikesrvm.compilers.opt.util.GraphNode;
055import org.jikesrvm.util.BitVector;
056
057public class LoopUnrolling extends CompilerPhase {
058
059  static final boolean DEBUG = false;
060  static final int MAX_BLOCKS_FOR_NAIVE_UNROLLING = 20;
061
062  private final Map<BasicBlock, BasicBlock> copiedBlocks;
063  private int theVisit = 1;
064  private Map<Instruction, Integer> visitInts;
065
066  public LoopUnrolling() {
067    copiedBlocks = new HashMap<BasicBlock, BasicBlock>();
068  }
069
070  /**
071   * Returns the name of the phase.
072   */
073  @Override
074  public String getName() {
075    return "Loop Unrolling";
076  }
077
078  /**
079   * Constructor for this compiler phase
080   */
081  private static final Constructor<CompilerPhase> constructor =
082      getCompilerPhaseConstructor(LoopUnrolling.class);
083
084  /**
085   * Get a constructor object for this compiler phase
086   * @return compiler phase constructor
087   */
088  @Override
089  public Constructor<CompilerPhase> getClassConstructor() {
090    return constructor;
091  }
092
093  /**
094   * This phase is disabled by default.
095   * <p>
096   * It will run only on O3 but O2 is the default maximum optimization level.
097   */
098  @Override
099  public boolean shouldPerform(OptOptions options) {
100    return ((options.getOptLevel() >= 3) && (options.CONTROL_UNROLL_LOG >= 1) && (!options.SSA_LOOP_VERSIONING));
101  }
102
103  @Override
104  public void perform(IR ir) {
105    unrollFactor = (1 << ir.options.CONTROL_UNROLL_LOG);
106
107    if (ir.hasReachableExceptionHandlers()) return;
108    DefUse.computeDU(ir);
109    new Simple(1, true, true, true, false).perform(ir);
110    new BranchOptimizations(-1, true, true).perform(ir, true);
111
112    //new CFGTransformations().perform(ir);
113    // Note: the following unfactors the CFG
114    new DominatorsPhase(true).perform(ir);
115    DefUse.computeDU(ir);
116
117    visitInts = new HashMap<Instruction, Integer>();
118    Enumeration<Instruction> instEnum = ir.forwardInstrEnumerator();
119    while (instEnum.hasMoreElements()) {
120      Instruction inst = instEnum.nextElement();
121      visitInts.put(inst, Integer.valueOf(0));
122    }
123
124    unrollLoops(ir);
125
126    CFGTransformations.splitCriticalEdges(ir);
127    ir.cfg.compactNodeNumbering();
128  }
129
130  void unrollLoops(IR ir) {
131    LSTGraph lstg = ir.HIRInfo.loopStructureTree;
132
133    for (int i = 1; lstg != null && i <= 1; ++i) {
134      unrollLoopTree((LSTNode) lstg.firstNode(), ir, i);
135      (new BuildLST()).perform(ir);
136    }
137  }
138
139  int unrollLoopTree(LSTNode t, IR ir, int target) {
140    int height = 1;
141    Enumeration<GraphNode> e = t.outNodes();
142    if (!e.hasMoreElements()) {
143      if (t.loop != null) {
144        report("Leaf loop in " + ir.method + ": " + t.loop + "\n");
145        // check infrequency
146        if (t.header.getInfrequent()) {
147          report("no unrolling of infrequent loop\n");
148        } else {
149          boolean doNaiveUnrolling = height == target && unrollLeaf(t, ir);
150          if (doNaiveUnrolling) naiveUnroller(t, ir);
151        }
152      }
153    } else {
154      while (e.hasMoreElements()) {
155        LSTNode n = (LSTNode) e.nextElement();
156        int heightOfTree = unrollLoopTree(n, ir, target);
157        height = Math.max(height, heightOfTree) + 1;
158      }
159    }
160    return height;
161  }
162
163  static final int MaxInstructions = 100;
164  private int unrollFactor = 1;
165
166  boolean unrollLeaf(LSTNode t, IR ir) {
167    int instructionsInLoop = 0;
168    BasicBlock exitBlock = null, backEdgeBlock = null, succBlock = null, predBlock = null;
169    BitVector nloop = t.loop;
170    BasicBlock header = t.header;
171    Instruction tmp;
172
173    if (ir.hasReachableExceptionHandlers()) {
174      report("0 IR may have exception handlers\n");
175      return false;
176    }
177
178    // determine loop structure by looking at its blocks
179    Enumeration<BasicBlock> loopBlocks = ir.getBasicBlocks(nloop);
180    int blocks = 0;
181    while (loopBlocks.hasMoreElements()) {
182      BasicBlock b = loopBlocks.nextElement();
183      blocks++;
184
185      // check for size
186      instructionsInLoop += b.getNumberOfRealInstructions();
187      if (instructionsInLoop > MaxInstructions) {
188        report("1 is too big\n");
189        return false;
190      }
191
192      // look at the in edges. We want the header to be the only
193      // block with out of loop incoming edges.
194      Enumeration<BasicBlock> e = b.getIn();
195      if (b != header) {
196        while (e.hasMoreElements()) {
197          BasicBlock o = e.nextElement();
198          if (!CFGTransformations.inLoop(o, nloop)) {
199            report("2 interior pointers.\n");
200            return true;
201          }
202        }
203      } else {
204        // check the headers predecessors: there should be
205        // one out of loop input and one backedge.
206        // We can extend this for loops with several backedges,
207        // if they all have the same conditions.
208        int inEdges = 0;
209        while (e.hasMoreElements()) {
210          inEdges++;
211          BasicBlock o = e.nextElement();
212          if (!CFGTransformations.inLoop(o, nloop)) {
213            if (predBlock == null) {
214              predBlock = o;
215            } else {
216              report("3 multi entry header.\n");
217              return true;
218            }
219          } else {
220            if (backEdgeBlock == null) {
221              backEdgeBlock = o;
222            } else {
223              report("4 multiple back edges.\n");
224              return true;
225            }
226          }
227        }
228      }
229
230      // look at the out edges to find loop exits
231      e = b.getOut();
232      while (e.hasMoreElements()) {
233        BasicBlock out = e.nextElement();
234        if (!CFGTransformations.inLoop(out, nloop)) {
235          if (exitBlock == null) {
236            exitBlock = b;
237          } else {
238            report("5 multiple exit blocks.\n");
239            return true;
240          }
241        }
242      }
243    }
244
245    // exitBlock must equal backEdgeBlock
246    if (exitBlock == null) {
247      report("6 no exit block found...infinite loop?");
248      return true;
249    }
250    if (exitBlock != backEdgeBlock) {
251      report("7 exit block is not immediate predecessor of loop head");
252      return true;
253    }
254
255    // exitBlock must exit (skip over pads in critical edges)
256    while (exitBlock.getNumberOfOut() == 1 && exitBlock.getNumberOfIn() == 1) {
257      exitBlock = exitBlock.getIn().nextElement();
258    }
259
260    if (exitBlock == header && blocks > 1) {
261      report("6 while loop? (" + blocks + ")\n");
262      return true;
263    }
264
265    // So far, so good. Examine the exit test.
266    Instruction origBranch = exitBlock.firstBranchInstruction();
267    if (origBranch != exitBlock.lastRealInstruction()) {
268      Instruction aGoto = origBranch.nextInstructionInCodeOrder();
269      if (aGoto.getOpcode() != GOTO_opcode) {
270        report("7 too complex exit\n");
271        return true;
272      }
273      succBlock = Label.getBlock(Goto.getTarget(aGoto).target).block;
274      if (VM.VerifyAssertions) {
275        VM._assert(aGoto == exitBlock.lastRealInstruction());
276      }
277    } else {
278      succBlock = exitBlock.getFallThroughBlock();
279    }
280
281    if (origBranch.getOpcode() != INT_IFCMP_opcode) {
282      report("8 branch isn't int_ifcmp: " + origBranch.operator() + ".\n");
283      return true;
284    }
285
286    // examine operands:
287    Operand op1 = follow(IfCmp.getVal1(origBranch));
288    Operand op2 = follow(IfCmp.getVal2(origBranch));
289    ConditionOperand cond = (ConditionOperand) IfCmp.getCond(origBranch).copy();
290    RegisterOperand ifcmpGuard = IfCmp.getGuardResult(origBranch);
291    float backBranchProbability = IfCmp.getBranchProfile(origBranch).takenProbability;
292    if (!loopInvariant(op2, nloop, 4)) {
293      if (loopInvariant(op1, nloop, 4)) {
294        Operand op = op1;
295        op1 = op2;
296        op2 = op;
297        cond.flipOperands();
298      } else {
299        if (DEBUG) {
300          printDefs(op1, nloop, 4);
301          printDefs(op2, nloop, 4);
302          VM.sysWrite("" + origBranch + "\n");
303        }
304        report("8a op1 and op2 may not be loop invariant\n");
305        return true;
306      }
307    }
308    BasicBlock target = Label.getBlock(IfCmp.getTarget(origBranch).target).block;
309
310    if (!(op1 instanceof RegisterOperand)) {
311      report("9 op1 of ifcmp isn't a register\n");
312      return true;
313    }
314
315    RegisterOperand rop1 = (RegisterOperand) op1;
316
317    Register reg = rop1.getRegister();
318    if (reg.isPhysical()) {
319      report("10 loops over physical register\n");
320      return false;
321    }
322    if (succBlock == header && !CFGTransformations.inLoop(target, nloop)) {
323      succBlock = target;
324      target = header;
325      cond.flipCode();
326    }
327    if (target != header) {
328      report("11 ifcmp doesn't jump to header\n");
329      return true;
330    }
331
332    Instruction iterator = null;
333    Enumeration<Operand> defs = new RealDefs(rop1);
334    while (defs.hasMoreElements()) {
335      Operand def = defs.nextElement();
336      Instruction inst = def.instruction;
337      BasicBlock block = inst.getBasicBlock();
338      //VM.sysWrite (""+block+": "+inst+"\n");
339      if (CFGTransformations.inLoop(block, nloop)) {
340        if (iterator == null) {
341          iterator = inst;
342        } else {
343          report("12 iterator not unique.\n");
344          return true;
345        }
346      }
347    }
348
349    if (iterator == null) {
350      report("15 iterator not found.\n");
351      return true;
352    }
353
354    if (iterator.getOpcode() != INT_ADD_opcode) {
355      //dumpIR (ir, "malformed");
356      report("16 iterator is no addition: " + iterator.operator() + "\n");
357      return true;
358    }
359
360    if (!rop1.similar(follow(Binary.getVal1(iterator)))) {
361      //dumpIR (ir, "malformed");
362      report("17 malformed iterator.\n" + iterator + "\n");
363      return true;
364    }
365
366    Operand strideOp = follow(Binary.getVal2(iterator));
367    if (!(strideOp instanceof IntConstantOperand)) {
368      report("18 stride not constant\n");
369      return true;
370    }
371
372    int stride = ((IntConstantOperand) strideOp).value;
373    if (stride != 1 && stride != -1) {
374      report("18b stride != +/-1 (" + stride + ")\n");
375      return true;
376    }
377
378    if ((stride == 1 &&
379         ((cond.value != ConditionOperand.LESS) &&
380          cond.value != ConditionOperand.LESS_EQUAL &&
381          cond.value != ConditionOperand.NOT_EQUAL)) ||
382                                                         (stride == -1 &&
383                                                          ((cond.value != ConditionOperand.GREATER) &&
384                                                           cond.value != ConditionOperand.GREATER_EQUAL &&
385                                                           cond.value != ConditionOperand.NOT_EQUAL))) {
386      report("19 unexpected condition: " + cond + "\n" + iterator + "\n" + origBranch + "\n\n");
387      return true;
388    }
389
390    RegisterOperand outerGuard;
391    BasicBlock outer = predBlock;
392    while (outer.getNumberOfOut() == 1 && outer.getNumberOfIn() == 1) {
393      outer = outer.getIn().nextElement();
394    }
395    if (outer.getNumberOfIn() > 0 && outer.getNumberOfOut() < 2) {
396      report("23 no suitable outer guard found.\n");
397      return true;
398    }
399
400    tmp = outer.firstBranchInstruction();
401    if (tmp != null && GuardResultCarrier.conforms(tmp)) {
402      outerGuard = GuardResultCarrier.getGuardResult(tmp);
403    } else {
404      outerGuard = ir.regpool.makeTempValidation();
405    }
406
407    ////////////
408    // transfom
409
410    // transform this:
411    //
412    // Orig:
413    //  B
414    //  if i CC b goto Orig
415    //  else goto exit
416    //
417    // exit:
418    //
419    // into this:
420    //
421//
422//  stride == 1:           common:                      stride == -1:
423//--------------------------------------------------------------------------
424//                          guard0:
425//                           limit = b;
426//   if a > b goto Orig                                  if b > a goto Orig
427//                           else guard1
428//
429//
430//                          guard 1:
431//   remainder = b - a;                                  remainder = a - b;
432// if cond == '<='                                    if cond == '>='
433//   remainder++;                                         remainder++;
434//                           remainder = remainder & 3
435//   limit = a + remainder                               limit = a - remainder
436// if cond == '<='                                    if cond == '>='
437//   limit--;                                            limit++;
438//                           if remainder == 0 goto mllp
439//                           goto Orig
440//
441//                          Orig:
442//                           LOOP;
443//                           if i CC limit goto Orig
444//                           else guard2
445//
446//                          guard2: if i CC b goto mllp
447//                           else exit
448//
449//                           mllp: // landing pad
450//                           goto ml
451//
452//                          ml:
453//                           LOOP;LOOP;LOOP;LOOP;
454//                           if i CC b goto ml
455//                           else exit
456//
457//                          exit:
458//--------------------------------------------------------------------------
459    report("...transforming.\n");
460    if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) {
461      dumpIR(ir, "before unroll");
462    }
463
464    CFGTransformations.killFallThroughs(ir, nloop);
465    BasicBlock[] handles = makeSomeCopies(unrollFactor, ir, nloop, blocks, header, exitBlock, exitBlock);
466    BasicBlock mainHeader = handles[0];
467    BasicBlock mainExit = handles[1];
468
469    // test block for well formed bounds
470    BasicBlock guardBlock0 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
471    predBlock.redirectOuts(header, guardBlock0, ir);
472
473    // test block for iteration alignemnt
474    BasicBlock guardBlock1 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
475
476    // landing pad for orig loop
477    BasicBlock olp = header.createSubBlock(header.firstInstruction().bcIndex, ir);
478    olp.setLandingPad();
479
480    BasicBlock predSucc = predBlock.nextBasicBlockInCodeOrder();
481    if (predSucc != null) {
482      ir.cfg.breakCodeOrder(predBlock, predSucc);
483      ir.cfg.linkInCodeOrder(olp, predSucc);
484    }
485    ir.cfg.linkInCodeOrder(predBlock, guardBlock0);
486    ir.cfg.linkInCodeOrder(guardBlock0, guardBlock1);
487    ir.cfg.linkInCodeOrder(guardBlock1, olp);
488
489    // guard block for main loop
490    BasicBlock guardBlock2 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
491
492    // landing pad for main loop
493    BasicBlock landingPad = header.createSubBlock(header.firstInstruction().bcIndex, ir);
494    landingPad.setLandingPad();
495
496    BasicBlock mainLoop = exitBlock.nextBasicBlockInCodeOrder();
497    ir.cfg.breakCodeOrder(exitBlock, mainLoop);
498    ir.cfg.linkInCodeOrder(exitBlock, guardBlock2);
499    ir.cfg.linkInCodeOrder(guardBlock2, landingPad);
500    ir.cfg.linkInCodeOrder(landingPad, mainLoop);
501
502    RegisterOperand remainder = ir.regpool.makeTemp(rop1.getType());
503    RegisterOperand limit = ir.regpool.makeTemp(rop1.getType());
504
505    // test whether a <= b for stride == 1 and a >= b for stride == -1
506    tmp = guardBlock0.lastInstruction();
507    tmp.insertBefore(Move.create(INT_MOVE, limit, op2.copy()));
508
509    ConditionOperand g0cond = ConditionOperand.GREATER_EQUAL();
510    if (stride == -1) g0cond = ConditionOperand.LESS_EQUAL();
511
512    tmp.insertBefore(IfCmp.create(INT_IFCMP,
513                                  outerGuard.copyD2D(),
514                                  rop1.copyD2U(),
515                                  op2.copy(),
516                                  g0cond,
517                                  olp.makeJumpTarget(),
518                                  BranchProfileOperand.unlikely()));
519    tmp.insertBefore(Goto.create(GOTO, guardBlock1.makeJumpTarget()));
520
521    // align the loop iterations
522    tmp = guardBlock1.lastInstruction();
523    if (stride == 1) {
524      tmp.insertBefore(Binary.create(INT_SUB, remainder, op2.copy(), rop1.copyD2U()));
525    } else {
526      tmp.insertBefore(Binary.create(INT_SUB, remainder, rop1.copyD2U(), op2.copy()));
527    }
528
529    if (cond.isGREATER_EQUAL() || cond.isLESS_EQUAL()) {
530      tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1)));
531    }
532
533    tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(-1)));
534
535    tmp.insertBefore(Binary.create(INT_AND,
536                                   remainder.copyD2D(),
537                                   remainder.copyD2U(),
538                                   new IntConstantOperand(unrollFactor - 1)));
539
540    tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1)));
541
542    if (stride == 1) {
543      tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2U(), op1.copy(), remainder.copyD2U()));
544    } else {
545      tmp.insertBefore(Binary.create(INT_SUB, limit.copyD2U(), op1.copy(), remainder.copyD2U()));
546    }
547
548    if (cond.isLESS_EQUAL()) {
549      tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(-1)));
550    }
551
552    if (cond.isGREATER_EQUAL()) {
553      tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(1)));
554    }
555
556    tmp.insertBefore(Goto.create(GOTO, olp.makeJumpTarget()));
557
558    // build landing pad for original loop
559    tmp = olp.lastInstruction();
560    tmp.insertBefore(Goto.create(GOTO, header.makeJumpTarget()));
561
562    // change the back branch in the original loop
563    deleteBranches(exitBlock);
564    tmp = exitBlock.lastInstruction();
565    tmp.insertBefore(IfCmp.create(INT_IFCMP,
566                                  outerGuard.copyD2D(),
567                                  rop1.copyU2U(),
568                                  limit.copyD2U(),
569                                  (ConditionOperand) cond.copy(),
570                                  header.makeJumpTarget(),
571                                  new BranchProfileOperand(1.0f - 1.0f / (unrollFactor / 2))));
572    tmp.insertBefore(Goto.create(GOTO, guardBlock2.makeJumpTarget()));
573
574    // only enter main loop if iterations left
575    tmp = guardBlock2.lastInstruction();
576    tmp.insertBefore(IfCmp.create(INT_IFCMP,
577                                  outerGuard.copyD2D(),
578                                  rop1.copyU2U(),
579                                  op2.copy(),
580                                  (ConditionOperand) cond.copy(),
581                                  landingPad.makeJumpTarget(),
582                                  new BranchProfileOperand(backBranchProbability)));
583    tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget()));
584
585    // landing pad jumps to mainHeader
586    tmp = landingPad.lastInstruction();
587    tmp.insertBefore(Goto.create(GOTO, mainHeader.makeJumpTarget()));
588
589    // repair back edge in mainExit
590    if (VM.VerifyAssertions) VM._assert(mainExit != null);
591    tmp = mainExit.lastInstruction();
592    if (VM.VerifyAssertions) {
593      VM._assert((mainExit.lastRealInstruction() == null) || !mainExit.lastRealInstruction().isBranch());
594    }
595    tmp.insertBefore(IfCmp.create(INT_IFCMP,
596                                  ifcmpGuard.copyU2U(),
597                                  rop1.copyU2U(),
598                                  op2.copy(),
599                                  (ConditionOperand) cond.copy(),
600                                  mainHeader.makeJumpTarget(),
601                                  new BranchProfileOperand(1.0f - (1.0f - backBranchProbability) * unrollFactor)));
602    tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget()));
603
604    // recompute normal outs
605    guardBlock0.recomputeNormalOut(ir);
606    guardBlock1.recomputeNormalOut(ir);
607    olp.recomputeNormalOut(ir);
608    guardBlock2.recomputeNormalOut(ir);
609    exitBlock.recomputeNormalOut(ir);
610    landingPad.recomputeNormalOut(ir);
611    mainExit.recomputeNormalOut(ir);
612    if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) {
613      dumpIR(ir, "after unroll");
614    }
615    return false;
616  }
617
618  private void naiveUnroller(LSTNode t, IR ir) {
619    BitVector nloop = t.loop;
620    BasicBlock seqStart = null;
621    Enumeration<BasicBlock> bs;
622
623    if (t.loop.populationCount() > MAX_BLOCKS_FOR_NAIVE_UNROLLING) {
624      report("1 is too big\n");
625      return;
626    }
627    report("Naively unrolling\n");
628
629    CFGTransformations.killFallThroughs(ir, nloop);
630
631    // first, capture the blocks in the loop body.
632    int bodyBlocks = nloop.populationCount();
633    BasicBlock[] body = new BasicBlock[bodyBlocks];
634    {
635      int i = 0;
636      bs = ir.getBasicBlocks(nloop);
637      while (bs.hasMoreElements()) {
638        BasicBlock b = bs.nextElement();
639        if (VM.VerifyAssertions) {
640          VM._assert(!(b instanceof ExceptionHandlerBasicBlock));
641        }
642        body[i++] = b;
643        BasicBlock next = b.nextBasicBlockInCodeOrder();
644        if (next == null || !CFGTransformations.inLoop(next, nloop)) {
645          seqStart = b; // end of loop in code order
646        }
647      }
648    }
649
650    BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder();
651    if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd);
652    BasicBlock seqLast = seqStart;
653    BasicBlock firstHeaderCopy = null;
654    BasicBlock currentBlock = seqLast;
655
656    for (int i = 1; i <= unrollFactor; ++i) {
657
658      // copy body
659      for (BasicBlock bb : body) {
660        seqLast = copyAndLinkBlock(ir, seqLast, bb);
661        if (bb == t.header) {
662          if (firstHeaderCopy == null) {
663            firstHeaderCopy = seqLast;
664          }
665        }
666      }
667
668      // redirect internal branches
669      currentBlock = seqLast;
670      for (int j = 0; j < bodyBlocks; ++j) {
671        currentBlock.recomputeNormalOut(ir);
672        Enumeration<BasicBlock> be = currentBlock.getOut();
673        while (be.hasMoreElements()) {
674          BasicBlock out = be.nextElement();
675          if (out != t.header && CFGTransformations.inLoop(out, nloop)) {
676            BasicBlock outCopy = copiedBlocks.get(out);
677            currentBlock.redirectOuts(out, outCopy, ir);
678          }
679        }
680        currentBlock.recomputeNormalOut(ir);
681        currentBlock = currentBlock.prevBasicBlockInCodeOrder();
682      }
683
684      if (i != 1) {
685        // redirect the branches to the header in the (i-1)th copy
686        for (int j = 0; j < bodyBlocks; ++j) {
687          Enumeration<BasicBlock> be = currentBlock.getOut();
688          while (be.hasMoreElements()) {
689            BasicBlock out = be.nextElement();
690            if (out == t.header) {
691              BasicBlock headerCopy;
692              headerCopy = copiedBlocks.get(t.header);
693              currentBlock.redirectOuts(t.header, headerCopy, ir);
694            }
695          }
696          currentBlock.recomputeNormalOut(ir);
697          currentBlock = currentBlock.prevBasicBlockInCodeOrder();
698        }
699      }
700    }
701    if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd);
702
703    // in the original loop, redirect branches that go to the header
704    // and make them point to the first header copy
705
706    for (int j = 0; j < bodyBlocks; ++j) {
707      Enumeration<BasicBlock> be = body[j].getOut();
708      while (be.hasMoreElements()) {
709        BasicBlock out = be.nextElement();
710        if (out == t.header) {
711          body[j].redirectOuts(t.header, firstHeaderCopy, ir);
712        }
713      }
714      body[j].recomputeNormalOut(ir);
715    }
716
717    // the following loop redirects backedges that start in the last
718    // copy to point to the first copy instead and not to the original
719    // header.
720    //                |                    |
721    // Thus we get   [ ]     instead of   [ ]<-.
722    //                |                    |   |
723    //               [ ]<-.               [ ]  |
724    //                |   |                |   |
725    //               [ ]  |               [ ]  |
726    //                |   |                |   |
727    //               [ ]  |               [ ]  |
728    //                |\_/                 |\_/
729    //
730    // Instead of 2^(unroll_log) we only have 2^(unroll_log-1) bodies
731    // in the unrolled loop, but there is one copy of the loop's body
732    // that dominates the unrolled version. Peeling of this first
733    // version should have benefits for global code placement.
734    currentBlock = seqLast;
735    for (int j = 0; j < bodyBlocks; ++j) {
736      Enumeration<BasicBlock> be = currentBlock.getOut();
737      while (be.hasMoreElements()) {
738        BasicBlock out = be.nextElement();
739        if (out == t.header) {
740          currentBlock.redirectOuts(t.header, firstHeaderCopy, ir);
741        }
742      }
743      currentBlock.recomputeNormalOut(ir);
744      currentBlock = currentBlock.prevBasicBlockInCodeOrder();
745    }
746  }
747
748  static void report(String s) {
749    if (DEBUG) VM.sysWrite("] " + s);
750  }
751
752  private Operand follow(Operand use) {
753    theVisit++;
754    return _follow(use);
755  }
756
757  private Operand _follow(Operand use) {
758    while (true) {
759      if (!(use instanceof RegisterOperand)) return use;
760      RegisterOperand rop = (RegisterOperand) use;
761      Enumeration<RegisterOperand> defs = DefUse.defs(rop.getRegister());
762      if (!defs.hasMoreElements()) {
763        return use;
764      }
765      Instruction def = defs.nextElement().instruction;
766      if (!Move.conforms(def)) return use;
767      if (defs.hasMoreElements()) {
768        return use;
769      }
770
771      Integer defInt = visitInts.get(def);
772      if (defInt.intValue() == theVisit) {
773        return use;
774      }
775      visitInts.put(def, Integer.valueOf(theVisit));
776
777      use = Move.getVal(def);
778    }
779  }
780
781  private static Instruction definingInstruction(Operand op) {
782    if (!(op instanceof RegisterOperand)) return op.instruction;
783    Enumeration<RegisterOperand> defs = DefUse.defs(((RegisterOperand) op).getRegister());
784    if (!defs.hasMoreElements()) {
785      return op.instruction;
786    }
787    Instruction def = defs.nextElement().instruction;
788    if (defs.hasMoreElements()) {
789      return op.instruction;
790    }
791    return def;
792  }
793
794  private static boolean loopInvariant(Operand op, BitVector nloop, int depth) {
795    if (depth <= 0) {
796      return false;
797    } else if (op instanceof ConstantOperand) {
798      return true;
799    } else if (op instanceof RegisterOperand) {
800      Register reg = ((RegisterOperand) op).getRegister();
801      Enumeration<RegisterOperand> defs = DefUse.defs(reg);
802      // if no definitions of this register (very strange) give up
803      if (!defs.hasMoreElements()) return false;
804      Instruction inst = defs.nextElement().instruction;
805      // if multiple definitions of a register give up as follow may
806      // fail to give the correct invariant
807      return !defs.hasMoreElements() && !CFGTransformations.inLoop(inst.getBasicBlock(), nloop);
808    } else {
809      return false;
810    }
811  }
812
813  private static boolean printDefs(Operand op, BitVector nloop, int depth) {
814    if (depth <= 0) return false;
815    if (op instanceof ConstantOperand) {
816      VM.sysWrite(">> " + op + "\n");
817      return true;
818    }
819    if (op instanceof RegisterOperand) {
820      boolean invariant = true;
821      Register reg = ((RegisterOperand) op).getRegister();
822      Enumeration<RegisterOperand> defs = DefUse.defs(reg);
823      while (defs.hasMoreElements()) {
824        Instruction inst = defs.nextElement().instruction;
825        VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n");
826        if (CFGTransformations.inLoop(inst.getBasicBlock(), nloop)) {
827          if (Move.conforms(inst)) {
828            invariant &= printDefs(Move.getVal(inst), nloop, depth - 1);
829          } else if (inst.getOpcode() == ARRAYLENGTH_opcode) {
830            invariant &= printDefs(GuardedUnary.getVal(inst), nloop, depth);
831          } else {
832            invariant = false;
833          }
834        }
835        if (!invariant) break;
836      }
837      return invariant;
838    }
839    return false;
840  }
841
842  @SuppressWarnings("unused")
843  // For debugging
844  private void _printDefs(Operand op) {
845    if (op instanceof RegisterOperand) {
846      Register reg = ((RegisterOperand) op).getRegister();
847      Enumeration<RegisterOperand> defs = DefUse.defs(reg);
848      defs = DefUse.defs(reg);
849      while (defs.hasMoreElements()) {
850        Instruction inst = defs.nextElement().instruction;
851        if (Move.conforms(inst)) {
852          inst = definingInstruction(follow(Move.getVal(inst)));
853        }
854        VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n");
855      }
856    } else {
857      VM.sysWrite(">> " + op + "\n");
858    }
859  }
860
861  // inserts unrollFactor copies of the loop after seqStart
862  BasicBlock[] makeSomeCopies(int unrollFactor, IR ir, BitVector nloop, int blocks,
863                                         BasicBlock header, BasicBlock exitBlock, BasicBlock seqStart) {
864    // make some copies of the original loop
865
866    // first, capture the blocks in the loop body.
867    BitVector loop = new BitVector(nloop);
868    loop.clear(header.getNumber());
869    loop.clear(exitBlock.getNumber());
870    int bodyBlocks = 0;
871    Enumeration<BasicBlock> bs = ir.getBasicBlocks(loop);
872    while (bs.hasMoreElements()) {
873      bodyBlocks++;
874      bs.nextElement();
875    }
876    BasicBlock[] body = new BasicBlock[bodyBlocks];
877    {
878      int i = 0;
879      bs = ir.getBasicBlocks(loop);
880      while (bs.hasMoreElements()) {
881        body[i++] = bs.nextElement();
882      }
883    }
884
885    BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder();
886    if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd);
887    BasicBlock seqLast = seqStart;
888
889    BasicBlock firstHeader = null;
890    BasicBlock lastHeader = null;
891    BasicBlock lastExit = null;
892    BasicBlock[] handles = new BasicBlock[2];
893
894    for (int i = 0; i < unrollFactor; ++i) {
895
896      // copy header
897      seqLast = copyAndLinkBlock(ir, seqLast, header);
898      lastHeader = seqLast;
899
900      if (i == 0) {
901        firstHeader = seqLast;
902      } else {
903        // link copies by jumps
904        lastExit.appendInstruction(Goto.create(GOTO, seqLast.makeJumpTarget()));
905        lastExit.recomputeNormalOut(ir);
906      }
907
908      // copy body
909      for (BasicBlock bb : body) {
910        seqLast = copyAndLinkBlock(ir, seqLast, bb);
911      }
912
913      // copy exit block
914      if (exitBlock != header) {
915        seqLast = copyAndLinkBlock(ir, seqLast, exitBlock);
916        lastExit = seqLast;
917      } else {
918        lastExit = lastHeader;
919      }
920
921      // delete all branches in the copies of the exit block
922      deleteBranches(lastExit);
923
924      // redirect internal branches
925      BasicBlock cb = seqLast;
926      for (int j = 0; j < blocks; ++j) {
927        cb.recomputeNormalOut(ir);
928        Enumeration<BasicBlock> be = cb.getOut();
929        while (be.hasMoreElements()) {
930          BasicBlock out = be.nextElement();
931          if (CFGTransformations.inLoop(out, nloop)) {
932            cb.redirectOuts(out, copiedBlocks.get(out), ir);
933          }
934        }
935        cb.recomputeNormalOut(ir);
936        cb = cb.prevBasicBlockInCodeOrder();
937      }
938    }
939    if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd);
940    handles[0] = firstHeader;
941    handles[1] = lastExit;
942    return handles;
943  }
944
945  BasicBlock copyAndLinkBlock(IR ir, BasicBlock seqLast, BasicBlock block) {
946    BasicBlock copy = block.copyWithoutLinks(ir);
947    ir.cfg.linkInCodeOrder(seqLast, copy);
948    copiedBlocks.put(block, copy);
949    return copy;
950  }
951
952  static void deleteBranches(BasicBlock b) {
953    Instruction branch = b.lastRealInstruction();
954    while (branch.isBranch()) {
955      Instruction nextBranch = branch.prevInstructionInCodeOrder();
956      branch.remove();
957      branch = nextBranch;
958    }
959  }
960
961  final class RealDefs implements Enumeration<Operand> {
962    private Enumeration<RegisterOperand> defs = null;
963    private Operand use;
964    private RealDefs others = null;
965
966    private void init(Operand use) {
967      this.use = use;
968      if (use instanceof RegisterOperand) {
969        RegisterOperand rop = (RegisterOperand) use;
970        defs = DefUse.defs(rop.getRegister());
971        this.use = null;
972        if (!defs.hasMoreElements()) defs = null;
973      }
974    }
975
976    RealDefs(Operand use) {
977      this.init(use);
978      theVisit++;
979    }
980
981    RealDefs(Operand use, int visit) {
982      this.init(use);
983      theVisit = visit;
984    }
985
986    @Override
987    public boolean hasMoreElements() {
988      return use != null || (others != null && others.hasMoreElements()) || (defs != null && defs.hasMoreElements());
989    }
990
991    @Override
992    public Operand nextElement() {
993      Operand res = use;
994      if (res != null) {
995        use = null;
996        return res;
997      }
998      if (others != null && others.hasMoreElements()) {
999        return others.nextElement();
1000      }
1001
1002      res = defs.nextElement();
1003      Instruction inst = res.instruction;
1004      if (!(Move.conforms(inst)) || visitInts.get(inst).intValue() == theVisit) {
1005        return res;
1006      }
1007      visitInts.put(inst, Integer.valueOf(theVisit));
1008
1009      others = new RealDefs(Move.getVal(inst), theVisit);
1010      if (!(others.hasMoreElements())) return res;
1011      return others.nextElement();
1012    }
1013
1014    public Operand nextClear() {
1015      Operand res = nextElement();
1016      res.instruction = null;
1017      return res;
1018    }
1019  }
1020}