001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.controlflow;
014    
015    import java.lang.reflect.Constructor;
016    import java.util.Arrays;
017    import java.util.Enumeration;
018    
019    import org.jikesrvm.VM;
020    import org.jikesrvm.compilers.opt.OptOptions;
021    import org.jikesrvm.compilers.opt.driver.CompilerPhase;
022    import org.jikesrvm.compilers.opt.ir.BasicBlock;
023    import org.jikesrvm.compilers.opt.ir.BasicBlockEnumeration;
024    import org.jikesrvm.compilers.opt.ir.IR;
025    import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets;
026    
027    /**
028     * Derive relative basic block execution frequencies from branch probabilities.<p>
029     *
030     * This code assumes that the loop structure tree can be constructed for
031     * the CFG in question.  This implies that the CFG is reducible. <p>
032     *
033     * The basic algorithm is as follows:
034     * <ul>
035     * <li> Construct the loop structure tree for the CFG. </li>
036     * <li> In a postorder traversal, compute the loop multiplier for each loop.
037     *      The loop multiplier is a number such that the execution frequency of
038     *      the loop pre-header times the loop multiplier is equal to the
039     *      execution frequency of the loop head.  This can be derived by computing
040     *      the loop exit weight (the probability of exiting the loop) and applying
041     *      Kirchoff's law that flow in is equal to flow out.  Loop exit weight
042     *      can be computed in a single topological (ignoring backedges) traversal
043     *      of the nodes in the loop. </li>
044     * <li> Assign the entry node weight 1.  In a topological traversal of the CFG
045     *      (ignoring backedges), propagate the weights.  When processing a loop head,
046     *      multiply the incoming weight by the loop multiplier.</li>
047     * </ul>
048     */
049    public class EstimateBlockFrequencies extends CompilerPhase {
050    
051      /**
052       * The IR on which to operate.
053       */
054      private IR ir;
055    
056      /**
057       * The loop structure tree of said IR
058       */
059      private LSTGraph lst;
060    
061      /**
062       * Constructor for this compiler phase
063       */
064      private static final Constructor<CompilerPhase> constructor =
065          getCompilerPhaseConstructor(EstimateBlockFrequencies.class);
066    
067      /**
068       * Get a constructor object for this compiler phase
069       * @return compiler phase constructor
070       */
071      public Constructor<CompilerPhase> getClassConstructor() {
072        return constructor;
073      }
074    
075      /**
076       * Topological ordering (ignoring backedges) of CFG
077       */
078      private BasicBlock[] topOrder;
079    
080      public String getName() { return "Estimate Block Frequencies"; }
081    
082      public void reportAdditionalStats() {
083        VM.sysWrite("  ");
084        VM.sysWrite(container.counter1 / container.counter2 * 100, 2);
085        VM.sysWrite("% Infrequent BBs");
086      }
087    
088      /**
089       * Compute relative basic block frequencies for the argument IR based on the
090       * branch probability information on each conditional and multiway branch.
091       * Assumptions: (1) LST is valid
092       *              (2) basic block numbering is dense (compact has been called).
093       * @param _ir the IR on which to apply the phase
094       */
095      public void perform(IR _ir) {
096        // Prepare
097        ir = _ir;
098    
099        if (ir.options.PROFILE_FREQUENCY_STRATEGY == OptOptions.PROFILE_DUMB_FREQ) {
100          setDumbFrequencies(ir);
101          return;
102        }
103    
104        ir.cfg.resetTopSorted();
105        ir.cfg.buildTopSort();
106        topOrder = new BasicBlock[ir.cfg.numberOfNodes()];
107        int idx = 0;
108        for (BasicBlock ptr = ir.cfg.entry(); ptr != null; ptr = (BasicBlock) ptr.getForwardSortedNext()) {
109          topOrder[idx++] = ptr;
110          ptr.setExecutionFrequency(0f);
111          ptr.clearScratchFlag();
112        }
113    
114        // Get pre-computed LST from IR.
115        lst = ir.HIRInfo.loopStructureTree;
116    
117        // Compute loop multipliers
118        if (lst != null) {
119          computeLoopMultipliers(lst.getRoot());
120          for (BasicBlockEnumeration e = ir.getBasicBlocks(); e.hasMoreElements();) {
121            BasicBlock bb = e.next();
122            bb.setExecutionFrequency(0f);
123            bb.clearScratchFlag();
124          }
125        }
126    
127        // Compute execution frequency of each basic block
128        computeBlockFrequencies();
129    
130        // Set infrequent bits on basic blocks
131        computeInfrequentBlocks(ir);
132      }
133    
134      /**
135       * Set the frequency of each basic block to 1.0f.
136       */
137      private void setDumbFrequencies(IR ir) {
138        for (BasicBlockEnumeration e = ir.getBasicBlocks(); e.hasMoreElements();) {
139          BasicBlock bb = e.next();
140          bb.setExecutionFrequency(1f);
141        }
142      }
143    
144      /**
145       * Compute which blocks are infrequent.
146       * Algorithm: let f = INFREQUENT_THRESHOLD.
147       * Start with S = {all basic blocks}.
148       * Sort the blocks by frequency.  Starting with the most frequent
149       * blocks, remove blocks from S until the sum of block frequencies in S
150       * <= f.  Then blocks in S are infrequent.
151       *
152       * @param ir the governing IR.
153       */
154      private void computeInfrequentBlocks(IR ir) {
155        int i = 0;
156        float[] freq = new float[ir.getMaxBasicBlockNumber()];
157        float total = 0f;
158        // count the total frequency of all blocks
159        for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
160          BasicBlock bb = e.nextElement();
161          freq[i] = bb.getExecutionFrequency();
162          total += freq[i];
163          i++;
164        }
165        // sort the frequencies (ascending);
166        Arrays.sort(freq);
167        float f = ir.options.PROFILE_INFREQUENT_THRESHOLD;
168        float goal = (1f - f) * total;
169        total = 0f;
170        float threshold = 0f;
171        // add up the frequencies (descending) until we real the goal.
172        for (i = freq.length - 1; i >= 0 && total < goal; i--) {
173          threshold = freq[i];
174          total += threshold;
175        }
176        // go back and set infrequent bits.
177        for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
178          BasicBlock bb = e.nextElement();
179          if (bb.getExecutionFrequency() < threshold) {
180            bb.setInfrequent();
181            container.counter1++;
182          } else {
183            bb.clearInfrequent();
184          }
185          container.counter2++;
186        }
187      }
188    
189      /**
190       * Postorder traversal of LST computing loop multiplier and loop exits
191       * for each loop.
192       */
193      private void computeLoopMultipliers(LSTNode n) {
194        for (Enumeration<LSTNode> e = n.getChildren(); e.hasMoreElements();) {
195          computeLoopMultipliers(e.nextElement());
196        }
197        if (n != lst.getRoot()) {
198          computeMultiplier(n);
199          n.header.clearScratchFlag(); // so we won't ignore when processing enclosing loop
200        }
201      }
202    
203      /**
204       * Compute the loop multiplier for this loop nest
205       */
206      private void computeMultiplier(LSTNode n) {
207        n.initializeLoopExits();
208        computeNodeWeights(n);
209        float loopExitWeight = computeLoopExitWeight(n);
210        n.loopMultiplier = 1.0f / loopExitWeight;
211      }
212    
213      /**
214       * Propagate execution frequencies through the loop.
215       * Also records loop exit edges in loopExits.
216       */
217      private void computeNodeWeights(LSTNode n) {
218        n.header.setExecutionFrequency(1f);
219        int idx = 0;
220        while (topOrder[idx] != n.header) idx++;
221        for (int numNodes = n.loop.populationCount(); numNodes > 0;) {
222          if (idx >= topOrder.length) {
223            numNodes--;
224            continue;
225          }
226          BasicBlock cur = topOrder[idx++];
227          if (cur == null) {
228            numNodes--;
229            continue;
230          }
231          if (!n.loop.get(cur.getNumber())) continue; // node was not in the loop nest being processed.
232          LSTNode other = lst.getLoop(cur);
233          if (other != n) {
234            if (cur == other.header) {
235              // loop header of nested loop
236              numNodes -= other.loop.populationCount();
237            }
238            continue; // skip over nodes in nested loop.
239          }
240    
241          numNodes--;
242          cur.setScratchFlag();
243          float weight = cur.getExecutionFrequency();
244          for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) {
245            processEdge(n, cur, wbt.curBlock(), wbt.curWeight(), weight);
246          }
247        }
248      }
249    
250      private void processEdge(LSTNode n, BasicBlock source, BasicBlock target, float prob, float weight) {
251        if (target.getScratchFlag()) return; // ignore backedge
252        if (n.loop.get(target.getNumber())) {
253          LSTNode other = lst.getLoop(target);
254          if (other == n) {
255            target.augmentExecutionFrequency(prob * weight);
256          } else {
257            // header of nested loop; pass prob and weight through to targets of loop exits
258            // Algorithm: find the total loopExitWeight, then distribute prob and weight
259            //            in ratio to the exit weight for each exit.
260            //            Effectively we are treating the nested loop as an n-way branch to its loop exits.
261            target.setScratchFlag();
262            float exitWeight = computeLoopExitWeight(other);
263            for (LSTNode.Edge exit : other.loopExits) {
264              float myWeight = exit.source.getExecutionFrequency() * exit.probability;
265              float myFraction = myWeight / exitWeight;
266              processEdge(n, source, exit.target, prob * myFraction, weight);
267            }
268            target.clearScratchFlag();
269          }
270        } else {
271          n.addLoopExit(source, target, prob);
272        }
273      }
274    
275      private float computeLoopExitWeight(LSTNode n) {
276        float exitWeight = 0f;
277        for (LSTNode.Edge exit : n.loopExits) {
278          exitWeight += (exit.source.getExecutionFrequency() * exit.probability);
279        }
280        // Kludge: if we think the loop has no exits, lets pretend that there is a 1%
281        //         chance of exiting to avoid getting NaN's in our computation.
282        return exitWeight == 0f ? 0.01f : exitWeight;
283      }
284    
285      private void computeBlockFrequencies() {
286        ir.cfg.entry().setExecutionFrequency(1f);
287        for (BasicBlock cur : topOrder) {
288          if (cur == null || cur.isExit()) continue; // ignore exit node.
289          if (lst != null) {
290            LSTNode loop = lst.getLoop(cur);
291            if (loop != null && loop.header == cur) {
292              cur.setExecutionFrequency(cur.getExecutionFrequency() * loop.loopMultiplier);
293            }
294          }
295          float weight = cur.getExecutionFrequency();
296          cur.setScratchFlag();
297    
298          for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) {
299            BasicBlock target = wbt.curBlock();
300            if (!target.getScratchFlag()) {
301              target.augmentExecutionFrequency(wbt.curWeight() * weight);
302            }
303          }
304        }
305      }
306    }