001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.controlflow;
014    
015    import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
016    
017    import java.util.HashMap;
018    import java.util.HashSet;
019    import java.util.TreeSet;
020    
021    import org.jikesrvm.VM;
022    import org.jikesrvm.compilers.opt.OptOptions;
023    import org.jikesrvm.compilers.opt.driver.CompilerPhase;
024    import org.jikesrvm.compilers.opt.ir.BasicBlock;
025    import org.jikesrvm.compilers.opt.ir.BasicBlockEnumeration;
026    import org.jikesrvm.compilers.opt.ir.Goto;
027    import org.jikesrvm.compilers.opt.ir.IR;
028    import org.jikesrvm.compilers.opt.ir.Instruction;
029    import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets;
030    import org.jikesrvm.compilers.opt.ir.operand.BranchOperand;
031    
032    /**
033     * Reorder code layout of basic blocks for improved I-cache locality and
034     * branch prediction. This code assumes that basic block frequencies have
035     * been computed and blocks have been marked infrequent.
036     * This pass actually implements two code placement algorithms:
037     * (1) A simple 'fluff' removal pass that moves all infrequent basic blocks
038     *     to the end of the code order.
039     * (2) Pettis and Hansen Algo2.
040     */
041    public final class ReorderingPhase extends CompilerPhase {
042    
043      private static final boolean DEBUG = false;
044    
045      /**
046       * Return this instance of this phase. This phase contains no
047       * per-compilation instance fields.
048       * @param ir not used
049       * @return this
050       */
051      public CompilerPhase newExecution(IR ir) {
052        return this;
053      }
054    
055      public boolean shouldPerform(OptOptions options) {
056        return options.REORDER_CODE;
057      }
058    
059      public boolean printingEnabled(OptOptions options, boolean before) {
060        return DEBUG;
061      }
062    
063      public String getName() {
064        return "Code Reordering";
065      }
066    
067      /**
068       * Reorder basic blocks either by trivially moving infrequent blocks
069       * to the end of the code order or by applying Pettis and Hansen Algo2.
070       *
071       * We will rearrange basic blocks and insert/remove
072       * unconditional GOTO's if needed.  It does not clean up branches,
073       * by reversing the branch condition, however.  That is saved for
074       * BranchOptimizations.
075       */
076      public void perform(IR ir) {
077        ir.cfg.entry().clearInfrequent();
078        if (ir.options.REORDER_CODE_PH) {
079          // Do Pettis and Hansen PLDI'90 Algo2
080          doPettisHansenAlgo2(ir);
081        } else {
082          // Simple algorithm: just move infrequent code to the end
083          exileInfrequentBlocks(ir);
084        }
085      }
086    
087      /////////////////////////
088      // Code for trivial algorithm
089      /////////////////////////
090    
091      /**
092       * Select a new basic block ordering via a simple heuristic
093       * that moves all infrequent basic blocks to the end.
094       * @param ir the IR object to reorder
095       */
096      private void exileInfrequentBlocks(IR ir) {
097        // (1) Look to see if there are infrequent blocks
098        //     Also count how many blocks there are.
099        int numBlocks = 0;
100        boolean foundSome = false;
101        for (BasicBlockEnumeration e = ir.getBasicBlocks(); e.hasMoreElements();) {
102          BasicBlock bb = e.next();
103          numBlocks++;
104          foundSome |= bb.getInfrequent();
105        }
106        if (!foundSome) return; // Nothing to do
107    
108        // Reorder the blocks to exile the infrequent blocks.
109        // Relative order within the set of frequent and infrequent is unchanged.
110        BasicBlock[] newOrdering = new BasicBlock[numBlocks];
111        int idx = 0;
112        // First append frequent blocks to newOrdering
113        for (BasicBlock bb = ir.cfg.firstInCodeOrder(); bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
114          if (!bb.getInfrequent()) {
115            newOrdering[idx++] = bb;
116          }
117        }
118        // Next append infrequent blocks to newOrdering
119        for (BasicBlock bb = ir.cfg.firstInCodeOrder(); bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
120          if (bb.getInfrequent()) {
121            newOrdering[idx++] = bb;
122          }
123        }
124    
125        if (VM.VerifyAssertions) VM._assert(idx == numBlocks); // don't lose blocks!
126        if (VM.VerifyAssertions) VM._assert(ir.cfg.entry() == newOrdering[0]);
127    
128        // Add/remove unconditional goto's as needed.
129        for (int i = 0; i < newOrdering.length; i++) {
130          Instruction lastInstr = newOrdering[i].lastRealInstruction();
131          // Append a GOTO if needed to maintain old fallthrough semantics.
132          BasicBlock fallthroughBlock = newOrdering[i].getFallThroughBlock();
133          if (fallthroughBlock != null) {
134            if (i == newOrdering.length - 1 || fallthroughBlock != newOrdering[i + 1]) {
135              // Add unconditional goto to preserve old fallthrough semantics
136              newOrdering[i].appendInstruction(fallthroughBlock.makeGOTO());
137            }
138          }
139          // Remove last instruction if it is a redundant GOTO that
140          // can be implemented by a fallthrough edge in the new ordering.
141          // (Only possible if newOrdering[i] is not the last basic block.)
142          if (i < newOrdering.length - 1 && lastInstr != null && lastInstr.operator() == GOTO) {
143            BranchOperand op = Goto.getTarget(lastInstr);
144            if (op.target.getBasicBlock() == newOrdering[i + 1]) {
145              // unconditional goto is redundant in new ordering
146              lastInstr.remove();
147            }
148          }
149        }
150    
151        // Re-insert all basic blocks according to new ordering
152        ir.cfg.clearCodeOrder();
153        for (BasicBlock bb : newOrdering) {
154          ir.cfg.addLastInCodeOrder(bb);
155        }
156      }
157    
158      /////////////////////////
159      // Code for P&H Algo2
160      /////////////////////////
161    
162      /**
163       * Reorder code using Algo2 (Bottom-Up Positioning) from
164       * Pettis and Hansen PLDI'90.
165       * @param ir the IR to reorder.
166       */
167      private void doPettisHansenAlgo2(IR ir) {
168        // (1) Setup:
169        //     (a) Count the blocks
170        //     (b) Create a sorted set of CFG edges
171        //     (c) Create a set of blocks
172        //     (d) Make fallthroughs explict by adding GOTOs
173        int numBlocks = 0;
174        TreeSet<Edge> edges = new TreeSet<Edge>();
175        HashSet<BasicBlock> chainHeads = new HashSet<BasicBlock>();
176        BasicBlock entry = ir.cfg.entry();
177        if (VM.VerifyAssertions) VM._assert(ir.cfg.entry() == ir.cfg.firstInCodeOrder());
178    
179        for (BasicBlock bb = entry; bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
180          numBlocks++;
181          chainHeads.add(bb);
182          bb.scratchObject = bb;
183          BasicBlock ft = bb.getFallThroughBlock();
184          if (ft != null) {
185            bb.appendInstruction(Goto.create(GOTO, ft.makeJumpTarget()));
186          }
187          float bw = bb.getExecutionFrequency();
188          for (WeightedBranchTargets wbt = new WeightedBranchTargets(bb); wbt.hasMoreElements(); wbt.advance()) {
189            edges.add(new Edge(bb, wbt.curBlock(), wbt.curWeight() * bw));
190          }
191        }
192    
193        if (DEBUG) VM.sysWriteln("Edges = " + edges);
194    
195        // (2) Build chains
196        ir.cfg.clearCodeOrder();
197        for (Edge e : edges) {
198          // If the source of the edge is the last block in its chain
199          // and the target of the edge is the first block in its chain
200          // then merge the chains.
201          if (DEBUG) VM.sysWriteln("Processing edge " + e);
202          if (e.target == entry) {
203            if (DEBUG) VM.sysWriteln("\tCan't put entry block in interior of chain");
204            continue;
205          }
206          if (e.source.nextBasicBlockInCodeOrder() != null) {
207            if (DEBUG) VM.sysWriteln("\tSource is not at end of a chain");
208            continue;
209          }
210          if (e.target.prevBasicBlockInCodeOrder() != null) {
211            if (DEBUG) VM.sysWriteln("\tTarget is not at start of a chain");
212            continue;
213          }
214          if (e.source.scratchObject == e.target.scratchObject) {
215            if (DEBUG) VM.sysWriteln("\tSource and target are in same chain");
216            continue;
217          }
218          if (DEBUG) VM.sysWriteln("\tMerging chains");
219          chainHeads.remove(e.target);
220          ir.cfg.linkInCodeOrder(e.source, e.target);
221          // Yuck....we should really use near-linear time union find here
222          // Doing this crappy thing makes us O(N^2) in the worst case.
223          BasicBlock newChain = (BasicBlock) e.source.scratchObject;
224          for (BasicBlock ptr = e.target; ptr != null; ptr = ptr.nextBasicBlockInCodeOrder()) {
225            ptr.scratchObject = newChain;
226          }
227        }
228    
229        if (DEBUG) VM.sysWriteln("Chains constructed ");
230        HashMap<BasicBlock, ChainInfo> chainInfo = new HashMap<BasicBlock, ChainInfo>();
231        for (BasicBlock head : chainHeads) {
232          if (DEBUG) dumpChain(head);
233          chainInfo.put(head, new ChainInfo(head));
234        }
235    
236        // (3) Summarize inter-chain edges.
237        for (Edge e : edges) {
238          if (e.source.scratchObject != e.target.scratchObject) {
239            Object sourceChain = e.source.scratchObject;
240            Object targetChain = e.target.scratchObject;
241            ChainInfo sourceInfo = chainInfo.get(sourceChain);
242            ChainInfo targetInfo = chainInfo.get(targetChain);
243            if (DEBUG) VM.sysWriteln("Inter-chain edge " + sourceChain + "->" + targetChain + " (" + e.weight + ")");
244            Object value = sourceInfo.outWeights.get(targetInfo);
245            float weight = e.weight;
246            if (value != null) {
247              weight += (Float) value;
248            }
249            sourceInfo.outWeights.put(targetInfo, weight);
250            targetInfo.inWeight += e.weight;
251            if (DEBUG) VM.sysWriteln("\t" + targetInfo + "," + sourceInfo.outWeights.get(targetInfo));
252          }
253        }
254    
255        if (DEBUG) VM.sysWriteln("Chain Info " + chainInfo);
256    
257        // (4) Construct a total order of the chains, guided by the interchain edge weights.
258        //     Constructing an optimal order is NP-Hard, so we apply the following heuristic.
259        //     The chain that starts with the entry node is placed first.
260        //     At each step, pick the chain with the maximal placedWeight (incoming edges from chains
261        //     that are already placed) and minimal inWeight (incoming edges from chains that are not
262        //     already placed). Prefer a node with non-zero placedWeight and inWeight to one that has
263        //     zeros for both. (A node with both zero placedWeight and zero inWeight is something that
264        //     the profile data predicts is not reachable via normal control flow from the entry node).
265        BasicBlock lastNode = null;
266        ChainInfo nextChoice = chainInfo.get(entry);
267        int numPlaced = 0;
268        ir.cfg.setFirstNode(entry);
269        while (true) {
270          if (DEBUG) VM.sysWriteln("Placing chain " + nextChoice);
271          // Append nextChoice to the previous chain
272          if (lastNode != null) ir.cfg.linkInCodeOrder(lastNode, nextChoice.head);
273          for (BasicBlock ptr = nextChoice.head; ptr != null; ptr = ptr.nextBasicBlockInCodeOrder()) {
274            numPlaced++;
275            lastNode = ptr;
276          }
277          // update ChainInfo
278          chainInfo.remove(nextChoice.head);
279          if (chainInfo.isEmpty()) break; // no chains left to place.
280          for (ChainInfo target : nextChoice.outWeights.keySet()) {
281            if (DEBUG) VM.sysWrite("\toutedge " + target);
282            float weight = (Float) nextChoice.outWeights.get(target);
283            if (DEBUG) VM.sysWriteln(" = " + weight);
284            target.placedWeight += weight;
285            target.inWeight -= weight;
286          }
287    
288          if (DEBUG) VM.sysWriteln("Chain Info " + chainInfo);
289    
290          // Find the next chain to append.
291          nextChoice = null;
292          for (ChainInfo cand : chainInfo.values()) {
293            if (cand.placedWeight > 0f) {
294              if (nextChoice == null) {
295                if (DEBUG) VM.sysWriteln("First reachable candidate " + cand);
296                nextChoice = cand;
297              } else if (cand.inWeight > nextChoice.inWeight ||
298                         (cand.inWeight == nextChoice.inWeight && cand.placedWeight > nextChoice.placedWeight)) {
299                if (DEBUG) VM.sysWriteln(cand + " is a better choice than " + nextChoice);
300                nextChoice = cand;
301              }
302            }
303          }
304          if (nextChoice != null) continue;
305    
306          // All remaining chains are fluff (not reachable from entry).
307          // Pick one with minimal inWeight and continue.
308          for (ChainInfo cand : chainInfo.values()) {
309            if (nextChoice == null) {
310              if (DEBUG) VM.sysWriteln("First candidate " + cand);
311              nextChoice = cand;
312            } else if (cand.inWeight < nextChoice.inWeight) {
313              if (DEBUG) VM.sysWriteln(cand + " is a better choice than " + nextChoice);
314              nextChoice = cand;
315            }
316          }
317        }
318    
319        if (VM.VerifyAssertions) VM._assert(numPlaced == numBlocks); // Don't lose blocks!!
320        ir.cfg.setLastNode(lastNode);
321      }
322    
323      private void dumpChain(BasicBlock head) {
324        VM.sysWrite("{" + head);
325        for (BasicBlock next = head.nextBasicBlockInCodeOrder(); next != null; next = next.nextBasicBlockInCodeOrder())
326        {
327          VM.sysWrite(", " + next);
328        }
329        VM.sysWriteln("}");
330      }
331    
332      private static class ChainInfo {
333        final BasicBlock head;
334        float placedWeight;
335        float inWeight;
336        final HashMap<ChainInfo, Object> outWeights = new HashMap<ChainInfo, Object>();
337    
338        ChainInfo(BasicBlock h) {
339          head = h;
340        }
341    
342        public String toString() {
343          return "[" + head + "," + placedWeight + "," + inWeight + "] " + outWeights.size();
344        }
345      }
346    
347      private static final class Edge implements Comparable<Edge> {
348        final BasicBlock source;
349        final BasicBlock target;
350        final float weight;
351    
352        Edge(BasicBlock s, BasicBlock t, float w) {
353          source = s;
354          target = t;
355          weight = w;
356        }
357    
358        public String toString() {
359          return weight + ": " + source.toString() + " -> " + target.toString();
360        }
361    
362        public int compareTo(Edge that) {
363          if (weight < that.weight) {
364            return 1;
365          } else if (weight > that.weight) {
366            return -1;
367          } else {
368            // Equal weights.
369            // Sort based on original code ordering, which is implied by block number
370            if (source.getNumber() < that.source.getNumber()) {
371              return 1;
372            } else if (source.getNumber() > that.source.getNumber()) {
373              return -1;
374            } else {
375              if (target.getNumber() > that.target.getNumber()) {
376                return 1;
377              } else if (source.getNumber() < that.target.getNumber()) {
378                return -1;
379              } else {
380                return 0;
381              }
382            }
383          }
384        }
385      }
386    }