001 /*
002 * This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 * This file is licensed to You under the Eclipse Public License (EPL);
005 * You may not use this file except in compliance with the License. You
006 * may obtain a copy of the License at
007 *
008 * http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 * See the COPYRIGHT.txt file distributed with this work for information
011 * regarding copyright ownership.
012 */
013 package org.jikesrvm.compilers.opt.controlflow;
014
015 import java.lang.reflect.Constructor;
016 import java.util.Arrays;
017 import java.util.Enumeration;
018
019 import org.jikesrvm.VM;
020 import org.jikesrvm.compilers.opt.OptOptions;
021 import org.jikesrvm.compilers.opt.driver.CompilerPhase;
022 import org.jikesrvm.compilers.opt.ir.BasicBlock;
023 import org.jikesrvm.compilers.opt.ir.BasicBlockEnumeration;
024 import org.jikesrvm.compilers.opt.ir.IR;
025 import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets;
026
027 /**
028 * Derive relative basic block execution frequencies from branch probabilities.<p>
029 *
030 * This code assumes that the loop structure tree can be constructed for
031 * the CFG in question. This implies that the CFG is reducible. <p>
032 *
033 * The basic algorithm is as follows:
034 * <ul>
035 * <li> Construct the loop structure tree for the CFG. </li>
036 * <li> In a postorder traversal, compute the loop multiplier for each loop.
037 * The loop multiplier is a number such that the execution frequency of
038 * the loop pre-header times the loop multiplier is equal to the
039 * execution frequency of the loop head. This can be derived by computing
040 * the loop exit weight (the probability of exiting the loop) and applying
041 * Kirchoff's law that flow in is equal to flow out. Loop exit weight
042 * can be computed in a single topological (ignoring backedges) traversal
043 * of the nodes in the loop. </li>
044 * <li> Assign the entry node weight 1. In a topological traversal of the CFG
045 * (ignoring backedges), propagate the weights. When processing a loop head,
046 * multiply the incoming weight by the loop multiplier.</li>
047 * </ul>
048 */
049 public class EstimateBlockFrequencies extends CompilerPhase {
050
051 /**
052 * The IR on which to operate.
053 */
054 private IR ir;
055
056 /**
057 * The loop structure tree of said IR
058 */
059 private LSTGraph lst;
060
061 /**
062 * Constructor for this compiler phase
063 */
064 private static final Constructor<CompilerPhase> constructor =
065 getCompilerPhaseConstructor(EstimateBlockFrequencies.class);
066
067 /**
068 * Get a constructor object for this compiler phase
069 * @return compiler phase constructor
070 */
071 public Constructor<CompilerPhase> getClassConstructor() {
072 return constructor;
073 }
074
075 /**
076 * Topological ordering (ignoring backedges) of CFG
077 */
078 private BasicBlock[] topOrder;
079
080 public String getName() { return "Estimate Block Frequencies"; }
081
082 public void reportAdditionalStats() {
083 VM.sysWrite(" ");
084 VM.sysWrite(container.counter1 / container.counter2 * 100, 2);
085 VM.sysWrite("% Infrequent BBs");
086 }
087
088 /**
089 * Compute relative basic block frequencies for the argument IR based on the
090 * branch probability information on each conditional and multiway branch.
091 * Assumptions: (1) LST is valid
092 * (2) basic block numbering is dense (compact has been called).
093 * @param _ir the IR on which to apply the phase
094 */
095 public void perform(IR _ir) {
096 // Prepare
097 ir = _ir;
098
099 if (ir.options.PROFILE_FREQUENCY_STRATEGY == OptOptions.PROFILE_DUMB_FREQ) {
100 setDumbFrequencies(ir);
101 return;
102 }
103
104 ir.cfg.resetTopSorted();
105 ir.cfg.buildTopSort();
106 topOrder = new BasicBlock[ir.cfg.numberOfNodes()];
107 int idx = 0;
108 for (BasicBlock ptr = ir.cfg.entry(); ptr != null; ptr = (BasicBlock) ptr.getForwardSortedNext()) {
109 topOrder[idx++] = ptr;
110 ptr.setExecutionFrequency(0f);
111 ptr.clearScratchFlag();
112 }
113
114 // Get pre-computed LST from IR.
115 lst = ir.HIRInfo.loopStructureTree;
116
117 // Compute loop multipliers
118 if (lst != null) {
119 computeLoopMultipliers(lst.getRoot());
120 for (BasicBlockEnumeration e = ir.getBasicBlocks(); e.hasMoreElements();) {
121 BasicBlock bb = e.next();
122 bb.setExecutionFrequency(0f);
123 bb.clearScratchFlag();
124 }
125 }
126
127 // Compute execution frequency of each basic block
128 computeBlockFrequencies();
129
130 // Set infrequent bits on basic blocks
131 computeInfrequentBlocks(ir);
132 }
133
134 /**
135 * Set the frequency of each basic block to 1.0f.
136 */
137 private void setDumbFrequencies(IR ir) {
138 for (BasicBlockEnumeration e = ir.getBasicBlocks(); e.hasMoreElements();) {
139 BasicBlock bb = e.next();
140 bb.setExecutionFrequency(1f);
141 }
142 }
143
144 /**
145 * Compute which blocks are infrequent.
146 * Algorithm: let f = INFREQUENT_THRESHOLD.
147 * Start with S = {all basic blocks}.
148 * Sort the blocks by frequency. Starting with the most frequent
149 * blocks, remove blocks from S until the sum of block frequencies in S
150 * <= f. Then blocks in S are infrequent.
151 *
152 * @param ir the governing IR.
153 */
154 private void computeInfrequentBlocks(IR ir) {
155 int i = 0;
156 float[] freq = new float[ir.getMaxBasicBlockNumber()];
157 float total = 0f;
158 // count the total frequency of all blocks
159 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
160 BasicBlock bb = e.nextElement();
161 freq[i] = bb.getExecutionFrequency();
162 total += freq[i];
163 i++;
164 }
165 // sort the frequencies (ascending);
166 Arrays.sort(freq);
167 float f = ir.options.PROFILE_INFREQUENT_THRESHOLD;
168 float goal = (1f - f) * total;
169 total = 0f;
170 float threshold = 0f;
171 // add up the frequencies (descending) until we real the goal.
172 for (i = freq.length - 1; i >= 0 && total < goal; i--) {
173 threshold = freq[i];
174 total += threshold;
175 }
176 // go back and set infrequent bits.
177 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
178 BasicBlock bb = e.nextElement();
179 if (bb.getExecutionFrequency() < threshold) {
180 bb.setInfrequent();
181 container.counter1++;
182 } else {
183 bb.clearInfrequent();
184 }
185 container.counter2++;
186 }
187 }
188
189 /**
190 * Postorder traversal of LST computing loop multiplier and loop exits
191 * for each loop.
192 */
193 private void computeLoopMultipliers(LSTNode n) {
194 for (Enumeration<LSTNode> e = n.getChildren(); e.hasMoreElements();) {
195 computeLoopMultipliers(e.nextElement());
196 }
197 if (n != lst.getRoot()) {
198 computeMultiplier(n);
199 n.header.clearScratchFlag(); // so we won't ignore when processing enclosing loop
200 }
201 }
202
203 /**
204 * Compute the loop multiplier for this loop nest
205 */
206 private void computeMultiplier(LSTNode n) {
207 n.initializeLoopExits();
208 computeNodeWeights(n);
209 float loopExitWeight = computeLoopExitWeight(n);
210 n.loopMultiplier = 1.0f / loopExitWeight;
211 }
212
213 /**
214 * Propagate execution frequencies through the loop.
215 * Also records loop exit edges in loopExits.
216 */
217 private void computeNodeWeights(LSTNode n) {
218 n.header.setExecutionFrequency(1f);
219 int idx = 0;
220 while (topOrder[idx] != n.header) idx++;
221 for (int numNodes = n.loop.populationCount(); numNodes > 0;) {
222 if (idx >= topOrder.length) {
223 numNodes--;
224 continue;
225 }
226 BasicBlock cur = topOrder[idx++];
227 if (cur == null) {
228 numNodes--;
229 continue;
230 }
231 if (!n.loop.get(cur.getNumber())) continue; // node was not in the loop nest being processed.
232 LSTNode other = lst.getLoop(cur);
233 if (other != n) {
234 if (cur == other.header) {
235 // loop header of nested loop
236 numNodes -= other.loop.populationCount();
237 }
238 continue; // skip over nodes in nested loop.
239 }
240
241 numNodes--;
242 cur.setScratchFlag();
243 float weight = cur.getExecutionFrequency();
244 for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) {
245 processEdge(n, cur, wbt.curBlock(), wbt.curWeight(), weight);
246 }
247 }
248 }
249
250 private void processEdge(LSTNode n, BasicBlock source, BasicBlock target, float prob, float weight) {
251 if (target.getScratchFlag()) return; // ignore backedge
252 if (n.loop.get(target.getNumber())) {
253 LSTNode other = lst.getLoop(target);
254 if (other == n) {
255 target.augmentExecutionFrequency(prob * weight);
256 } else {
257 // header of nested loop; pass prob and weight through to targets of loop exits
258 // Algorithm: find the total loopExitWeight, then distribute prob and weight
259 // in ratio to the exit weight for each exit.
260 // Effectively we are treating the nested loop as an n-way branch to its loop exits.
261 target.setScratchFlag();
262 float exitWeight = computeLoopExitWeight(other);
263 for (LSTNode.Edge exit : other.loopExits) {
264 float myWeight = exit.source.getExecutionFrequency() * exit.probability;
265 float myFraction = myWeight / exitWeight;
266 processEdge(n, source, exit.target, prob * myFraction, weight);
267 }
268 target.clearScratchFlag();
269 }
270 } else {
271 n.addLoopExit(source, target, prob);
272 }
273 }
274
275 private float computeLoopExitWeight(LSTNode n) {
276 float exitWeight = 0f;
277 for (LSTNode.Edge exit : n.loopExits) {
278 exitWeight += (exit.source.getExecutionFrequency() * exit.probability);
279 }
280 // Kludge: if we think the loop has no exits, lets pretend that there is a 1%
281 // chance of exiting to avoid getting NaN's in our computation.
282 return exitWeight == 0f ? 0.01f : exitWeight;
283 }
284
285 private void computeBlockFrequencies() {
286 ir.cfg.entry().setExecutionFrequency(1f);
287 for (BasicBlock cur : topOrder) {
288 if (cur == null || cur.isExit()) continue; // ignore exit node.
289 if (lst != null) {
290 LSTNode loop = lst.getLoop(cur);
291 if (loop != null && loop.header == cur) {
292 cur.setExecutionFrequency(cur.getExecutionFrequency() * loop.loopMultiplier);
293 }
294 }
295 float weight = cur.getExecutionFrequency();
296 cur.setScratchFlag();
297
298 for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) {
299 BasicBlock target = wbt.curBlock();
300 if (!target.getScratchFlag()) {
301 target.augmentExecutionFrequency(wbt.curWeight() * weight);
302 }
303 }
304 }
305 }
306 }