001 /*
002 * This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 * This file is licensed to You under the Eclipse Public License (EPL);
005 * You may not use this file except in compliance with the License. You
006 * may obtain a copy of the License at
007 *
008 * http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 * See the COPYRIGHT.txt file distributed with this work for information
011 * regarding copyright ownership.
012 */
013 package org.jikesrvm.compilers.opt.controlflow;
014
015 import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
016
017 import java.util.HashMap;
018 import java.util.HashSet;
019 import java.util.TreeSet;
020
021 import org.jikesrvm.VM;
022 import org.jikesrvm.compilers.opt.OptOptions;
023 import org.jikesrvm.compilers.opt.driver.CompilerPhase;
024 import org.jikesrvm.compilers.opt.ir.BasicBlock;
025 import org.jikesrvm.compilers.opt.ir.BasicBlockEnumeration;
026 import org.jikesrvm.compilers.opt.ir.Goto;
027 import org.jikesrvm.compilers.opt.ir.IR;
028 import org.jikesrvm.compilers.opt.ir.Instruction;
029 import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets;
030 import org.jikesrvm.compilers.opt.ir.operand.BranchOperand;
031
032 /**
033 * Reorder code layout of basic blocks for improved I-cache locality and
034 * branch prediction. This code assumes that basic block frequencies have
035 * been computed and blocks have been marked infrequent.
036 * This pass actually implements two code placement algorithms:
037 * (1) A simple 'fluff' removal pass that moves all infrequent basic blocks
038 * to the end of the code order.
039 * (2) Pettis and Hansen Algo2.
040 */
041 public final class ReorderingPhase extends CompilerPhase {
042
043 private static final boolean DEBUG = false;
044
045 /**
046 * Return this instance of this phase. This phase contains no
047 * per-compilation instance fields.
048 * @param ir not used
049 * @return this
050 */
051 public CompilerPhase newExecution(IR ir) {
052 return this;
053 }
054
055 public boolean shouldPerform(OptOptions options) {
056 return options.REORDER_CODE;
057 }
058
059 public boolean printingEnabled(OptOptions options, boolean before) {
060 return DEBUG;
061 }
062
063 public String getName() {
064 return "Code Reordering";
065 }
066
067 /**
068 * Reorder basic blocks either by trivially moving infrequent blocks
069 * to the end of the code order or by applying Pettis and Hansen Algo2.
070 *
071 * We will rearrange basic blocks and insert/remove
072 * unconditional GOTO's if needed. It does not clean up branches,
073 * by reversing the branch condition, however. That is saved for
074 * BranchOptimizations.
075 */
076 public void perform(IR ir) {
077 ir.cfg.entry().clearInfrequent();
078 if (ir.options.REORDER_CODE_PH) {
079 // Do Pettis and Hansen PLDI'90 Algo2
080 doPettisHansenAlgo2(ir);
081 } else {
082 // Simple algorithm: just move infrequent code to the end
083 exileInfrequentBlocks(ir);
084 }
085 }
086
087 /////////////////////////
088 // Code for trivial algorithm
089 /////////////////////////
090
091 /**
092 * Select a new basic block ordering via a simple heuristic
093 * that moves all infrequent basic blocks to the end.
094 * @param ir the IR object to reorder
095 */
096 private void exileInfrequentBlocks(IR ir) {
097 // (1) Look to see if there are infrequent blocks
098 // Also count how many blocks there are.
099 int numBlocks = 0;
100 boolean foundSome = false;
101 for (BasicBlockEnumeration e = ir.getBasicBlocks(); e.hasMoreElements();) {
102 BasicBlock bb = e.next();
103 numBlocks++;
104 foundSome |= bb.getInfrequent();
105 }
106 if (!foundSome) return; // Nothing to do
107
108 // Reorder the blocks to exile the infrequent blocks.
109 // Relative order within the set of frequent and infrequent is unchanged.
110 BasicBlock[] newOrdering = new BasicBlock[numBlocks];
111 int idx = 0;
112 // First append frequent blocks to newOrdering
113 for (BasicBlock bb = ir.cfg.firstInCodeOrder(); bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
114 if (!bb.getInfrequent()) {
115 newOrdering[idx++] = bb;
116 }
117 }
118 // Next append infrequent blocks to newOrdering
119 for (BasicBlock bb = ir.cfg.firstInCodeOrder(); bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
120 if (bb.getInfrequent()) {
121 newOrdering[idx++] = bb;
122 }
123 }
124
125 if (VM.VerifyAssertions) VM._assert(idx == numBlocks); // don't lose blocks!
126 if (VM.VerifyAssertions) VM._assert(ir.cfg.entry() == newOrdering[0]);
127
128 // Add/remove unconditional goto's as needed.
129 for (int i = 0; i < newOrdering.length; i++) {
130 Instruction lastInstr = newOrdering[i].lastRealInstruction();
131 // Append a GOTO if needed to maintain old fallthrough semantics.
132 BasicBlock fallthroughBlock = newOrdering[i].getFallThroughBlock();
133 if (fallthroughBlock != null) {
134 if (i == newOrdering.length - 1 || fallthroughBlock != newOrdering[i + 1]) {
135 // Add unconditional goto to preserve old fallthrough semantics
136 newOrdering[i].appendInstruction(fallthroughBlock.makeGOTO());
137 }
138 }
139 // Remove last instruction if it is a redundant GOTO that
140 // can be implemented by a fallthrough edge in the new ordering.
141 // (Only possible if newOrdering[i] is not the last basic block.)
142 if (i < newOrdering.length - 1 && lastInstr != null && lastInstr.operator() == GOTO) {
143 BranchOperand op = Goto.getTarget(lastInstr);
144 if (op.target.getBasicBlock() == newOrdering[i + 1]) {
145 // unconditional goto is redundant in new ordering
146 lastInstr.remove();
147 }
148 }
149 }
150
151 // Re-insert all basic blocks according to new ordering
152 ir.cfg.clearCodeOrder();
153 for (BasicBlock bb : newOrdering) {
154 ir.cfg.addLastInCodeOrder(bb);
155 }
156 }
157
158 /////////////////////////
159 // Code for P&H Algo2
160 /////////////////////////
161
162 /**
163 * Reorder code using Algo2 (Bottom-Up Positioning) from
164 * Pettis and Hansen PLDI'90.
165 * @param ir the IR to reorder.
166 */
167 private void doPettisHansenAlgo2(IR ir) {
168 // (1) Setup:
169 // (a) Count the blocks
170 // (b) Create a sorted set of CFG edges
171 // (c) Create a set of blocks
172 // (d) Make fallthroughs explict by adding GOTOs
173 int numBlocks = 0;
174 TreeSet<Edge> edges = new TreeSet<Edge>();
175 HashSet<BasicBlock> chainHeads = new HashSet<BasicBlock>();
176 BasicBlock entry = ir.cfg.entry();
177 if (VM.VerifyAssertions) VM._assert(ir.cfg.entry() == ir.cfg.firstInCodeOrder());
178
179 for (BasicBlock bb = entry; bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
180 numBlocks++;
181 chainHeads.add(bb);
182 bb.scratchObject = bb;
183 BasicBlock ft = bb.getFallThroughBlock();
184 if (ft != null) {
185 bb.appendInstruction(Goto.create(GOTO, ft.makeJumpTarget()));
186 }
187 float bw = bb.getExecutionFrequency();
188 for (WeightedBranchTargets wbt = new WeightedBranchTargets(bb); wbt.hasMoreElements(); wbt.advance()) {
189 edges.add(new Edge(bb, wbt.curBlock(), wbt.curWeight() * bw));
190 }
191 }
192
193 if (DEBUG) VM.sysWriteln("Edges = " + edges);
194
195 // (2) Build chains
196 ir.cfg.clearCodeOrder();
197 for (Edge e : edges) {
198 // If the source of the edge is the last block in its chain
199 // and the target of the edge is the first block in its chain
200 // then merge the chains.
201 if (DEBUG) VM.sysWriteln("Processing edge " + e);
202 if (e.target == entry) {
203 if (DEBUG) VM.sysWriteln("\tCan't put entry block in interior of chain");
204 continue;
205 }
206 if (e.source.nextBasicBlockInCodeOrder() != null) {
207 if (DEBUG) VM.sysWriteln("\tSource is not at end of a chain");
208 continue;
209 }
210 if (e.target.prevBasicBlockInCodeOrder() != null) {
211 if (DEBUG) VM.sysWriteln("\tTarget is not at start of a chain");
212 continue;
213 }
214 if (e.source.scratchObject == e.target.scratchObject) {
215 if (DEBUG) VM.sysWriteln("\tSource and target are in same chain");
216 continue;
217 }
218 if (DEBUG) VM.sysWriteln("\tMerging chains");
219 chainHeads.remove(e.target);
220 ir.cfg.linkInCodeOrder(e.source, e.target);
221 // Yuck....we should really use near-linear time union find here
222 // Doing this crappy thing makes us O(N^2) in the worst case.
223 BasicBlock newChain = (BasicBlock) e.source.scratchObject;
224 for (BasicBlock ptr = e.target; ptr != null; ptr = ptr.nextBasicBlockInCodeOrder()) {
225 ptr.scratchObject = newChain;
226 }
227 }
228
229 if (DEBUG) VM.sysWriteln("Chains constructed ");
230 HashMap<BasicBlock, ChainInfo> chainInfo = new HashMap<BasicBlock, ChainInfo>();
231 for (BasicBlock head : chainHeads) {
232 if (DEBUG) dumpChain(head);
233 chainInfo.put(head, new ChainInfo(head));
234 }
235
236 // (3) Summarize inter-chain edges.
237 for (Edge e : edges) {
238 if (e.source.scratchObject != e.target.scratchObject) {
239 Object sourceChain = e.source.scratchObject;
240 Object targetChain = e.target.scratchObject;
241 ChainInfo sourceInfo = chainInfo.get(sourceChain);
242 ChainInfo targetInfo = chainInfo.get(targetChain);
243 if (DEBUG) VM.sysWriteln("Inter-chain edge " + sourceChain + "->" + targetChain + " (" + e.weight + ")");
244 Object value = sourceInfo.outWeights.get(targetInfo);
245 float weight = e.weight;
246 if (value != null) {
247 weight += (Float) value;
248 }
249 sourceInfo.outWeights.put(targetInfo, weight);
250 targetInfo.inWeight += e.weight;
251 if (DEBUG) VM.sysWriteln("\t" + targetInfo + "," + sourceInfo.outWeights.get(targetInfo));
252 }
253 }
254
255 if (DEBUG) VM.sysWriteln("Chain Info " + chainInfo);
256
257 // (4) Construct a total order of the chains, guided by the interchain edge weights.
258 // Constructing an optimal order is NP-Hard, so we apply the following heuristic.
259 // The chain that starts with the entry node is placed first.
260 // At each step, pick the chain with the maximal placedWeight (incoming edges from chains
261 // that are already placed) and minimal inWeight (incoming edges from chains that are not
262 // already placed). Prefer a node with non-zero placedWeight and inWeight to one that has
263 // zeros for both. (A node with both zero placedWeight and zero inWeight is something that
264 // the profile data predicts is not reachable via normal control flow from the entry node).
265 BasicBlock lastNode = null;
266 ChainInfo nextChoice = chainInfo.get(entry);
267 int numPlaced = 0;
268 ir.cfg.setFirstNode(entry);
269 while (true) {
270 if (DEBUG) VM.sysWriteln("Placing chain " + nextChoice);
271 // Append nextChoice to the previous chain
272 if (lastNode != null) ir.cfg.linkInCodeOrder(lastNode, nextChoice.head);
273 for (BasicBlock ptr = nextChoice.head; ptr != null; ptr = ptr.nextBasicBlockInCodeOrder()) {
274 numPlaced++;
275 lastNode = ptr;
276 }
277 // update ChainInfo
278 chainInfo.remove(nextChoice.head);
279 if (chainInfo.isEmpty()) break; // no chains left to place.
280 for (ChainInfo target : nextChoice.outWeights.keySet()) {
281 if (DEBUG) VM.sysWrite("\toutedge " + target);
282 float weight = (Float) nextChoice.outWeights.get(target);
283 if (DEBUG) VM.sysWriteln(" = " + weight);
284 target.placedWeight += weight;
285 target.inWeight -= weight;
286 }
287
288 if (DEBUG) VM.sysWriteln("Chain Info " + chainInfo);
289
290 // Find the next chain to append.
291 nextChoice = null;
292 for (ChainInfo cand : chainInfo.values()) {
293 if (cand.placedWeight > 0f) {
294 if (nextChoice == null) {
295 if (DEBUG) VM.sysWriteln("First reachable candidate " + cand);
296 nextChoice = cand;
297 } else if (cand.inWeight > nextChoice.inWeight ||
298 (cand.inWeight == nextChoice.inWeight && cand.placedWeight > nextChoice.placedWeight)) {
299 if (DEBUG) VM.sysWriteln(cand + " is a better choice than " + nextChoice);
300 nextChoice = cand;
301 }
302 }
303 }
304 if (nextChoice != null) continue;
305
306 // All remaining chains are fluff (not reachable from entry).
307 // Pick one with minimal inWeight and continue.
308 for (ChainInfo cand : chainInfo.values()) {
309 if (nextChoice == null) {
310 if (DEBUG) VM.sysWriteln("First candidate " + cand);
311 nextChoice = cand;
312 } else if (cand.inWeight < nextChoice.inWeight) {
313 if (DEBUG) VM.sysWriteln(cand + " is a better choice than " + nextChoice);
314 nextChoice = cand;
315 }
316 }
317 }
318
319 if (VM.VerifyAssertions) VM._assert(numPlaced == numBlocks); // Don't lose blocks!!
320 ir.cfg.setLastNode(lastNode);
321 }
322
323 private void dumpChain(BasicBlock head) {
324 VM.sysWrite("{" + head);
325 for (BasicBlock next = head.nextBasicBlockInCodeOrder(); next != null; next = next.nextBasicBlockInCodeOrder())
326 {
327 VM.sysWrite(", " + next);
328 }
329 VM.sysWriteln("}");
330 }
331
332 private static class ChainInfo {
333 final BasicBlock head;
334 float placedWeight;
335 float inWeight;
336 final HashMap<ChainInfo, Object> outWeights = new HashMap<ChainInfo, Object>();
337
338 ChainInfo(BasicBlock h) {
339 head = h;
340 }
341
342 public String toString() {
343 return "[" + head + "," + placedWeight + "," + inWeight + "] " + outWeights.size();
344 }
345 }
346
347 private static final class Edge implements Comparable<Edge> {
348 final BasicBlock source;
349 final BasicBlock target;
350 final float weight;
351
352 Edge(BasicBlock s, BasicBlock t, float w) {
353 source = s;
354 target = t;
355 weight = w;
356 }
357
358 public String toString() {
359 return weight + ": " + source.toString() + " -> " + target.toString();
360 }
361
362 public int compareTo(Edge that) {
363 if (weight < that.weight) {
364 return 1;
365 } else if (weight > that.weight) {
366 return -1;
367 } else {
368 // Equal weights.
369 // Sort based on original code ordering, which is implied by block number
370 if (source.getNumber() < that.source.getNumber()) {
371 return 1;
372 } else if (source.getNumber() > that.source.getNumber()) {
373 return -1;
374 } else {
375 if (target.getNumber() > that.target.getNumber()) {
376 return 1;
377 } else if (source.getNumber() < that.target.getNumber()) {
378 return -1;
379 } else {
380 return 0;
381 }
382 }
383 }
384 }
385 }
386 }