001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.regalloc.ia32;
014    
015    import java.util.Enumeration;
016    import java.util.Iterator;
017    import org.jikesrvm.classloader.TypeReference;
018    import org.jikesrvm.compilers.opt.OptimizingCompilerException;
019    import org.jikesrvm.compilers.opt.ir.Empty;
020    import org.jikesrvm.compilers.opt.ir.MIR_BinaryAcc;
021    import org.jikesrvm.compilers.opt.ir.MIR_FSave;
022    import org.jikesrvm.compilers.opt.ir.MIR_Lea;
023    import org.jikesrvm.compilers.opt.ir.MIR_Move;
024    import org.jikesrvm.compilers.opt.ir.MIR_Nullary;
025    import org.jikesrvm.compilers.opt.ir.MIR_TrapIf;
026    import org.jikesrvm.compilers.opt.ir.MIR_UnaryNoRes;
027    import org.jikesrvm.compilers.opt.ir.IR;
028    import org.jikesrvm.compilers.opt.ir.Instruction;
029    import org.jikesrvm.compilers.opt.ir.InstructionEnumeration;
030    import org.jikesrvm.compilers.opt.ir.OperandEnumeration;
031    import org.jikesrvm.compilers.opt.ir.Operator;
032    import static org.jikesrvm.compilers.opt.ir.Operators.ADVISE_ESP;
033    import static org.jikesrvm.compilers.opt.ir.Operators.BBEND;
034    import static org.jikesrvm.compilers.opt.ir.Operators.CALL_SAVE_VOLATILE;
035    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_ADD;
036    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FCLEAR;
037    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FMOV;
038    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FMOV_opcode;
039    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FNINIT;
040    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FNSAVE;
041    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FRSTOR;
042    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_LEA;
043    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOV;
044    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOVQ;
045    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOVSD;
046    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOVSD_opcode;
047    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOVSS;
048    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOVSS_opcode;
049    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOV_opcode;
050    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_POP;
051    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_PUSH;
052    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_RET_opcode;
053    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_SYSCALL;
054    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_TRAPIF;
055    import static org.jikesrvm.compilers.opt.ir.Operators.NOP;
056    import static org.jikesrvm.compilers.opt.ir.Operators.REQUIRE_ESP;
057    import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_BACKEDGE;
058    import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_EPILOGUE;
059    import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_OSR;
060    import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_PROLOGUE;
061    import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.DOUBLE_REG;
062    import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.DOUBLE_VALUE;
063    import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.FLOAT_VALUE;
064    import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.INT_REG;
065    import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.INT_VALUE;
066    
067    import org.jikesrvm.compilers.opt.ir.Register;
068    import org.jikesrvm.compilers.opt.ir.ia32.PhysicalDefUse;
069    import org.jikesrvm.compilers.opt.ir.ia32.PhysicalRegisterSet;
070    import org.jikesrvm.compilers.opt.ir.operand.MemoryOperand;
071    import org.jikesrvm.compilers.opt.ir.operand.Operand;
072    import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
073    import org.jikesrvm.compilers.opt.ir.operand.StackLocationOperand;
074    import org.jikesrvm.compilers.opt.ir.operand.TrapCodeOperand;
075    import org.jikesrvm.compilers.opt.ir.operand.ia32.IA32ConditionOperand;
076    import org.jikesrvm.compilers.opt.regalloc.GenericStackManager;
077    import org.jikesrvm.compilers.opt.regalloc.RegisterAllocatorState;
078    import org.jikesrvm.ia32.ArchConstants;
079    import static org.jikesrvm.ia32.StackframeLayoutConstants.STACKFRAME_ALIGNMENT;
080    import org.jikesrvm.runtime.ArchEntrypoints;
081    import org.jikesrvm.runtime.Entrypoints;
082    import org.vmmagic.unboxed.Offset;
083    
084    /**
085     * Class to manage the allocation of the "compiler-specific" portion of
086     * the stackframe.  This class holds only the architecture-specific
087     * functions.
088     * <p>
089     */
090    public abstract class StackManager extends GenericStackManager {
091    
092      /**
093       * A frame offset for 108 bytes of stack space to store the
094       * floating point state in the SaveVolatile protocol.
095       */
096      private int fsaveLocation;
097    
098      /**
099       * We allow the stack pointer to float from its normal position at the
100       * bottom of the frame.  This field holds the 'current' offset of the
101       * SP.
102       */
103      private int ESPOffset = 0;
104    
105      /**
106       * Should we allow the stack pointer to float in order to avoid scratch
107       * registers in move instructions.  Note: as of Feb. 02, we think this
108       * is a bad idea.
109       */
110      private static boolean FLOAT_ESP = false;
111    
112      /**
113       * Return the size of the fixed portion of the stack.
114       * (in other words, the difference between the framepointer and
115       * the stackpointer after the prologue of the method completes).
116       * @return size in bytes of the fixed portion of the stackframe
117       */
118      public final int getFrameFixedSize() {
119        return frameSize - WORDSIZE;
120      }
121    
122      /**
123       * Return the size of a type of value, in bytes.
124       * NOTE: For the purpose of register allocation, an x87 FLOAT_VALUE is 64 bits!
125       *
126       * @param type one of INT_VALUE, FLOAT_VALUE, or DOUBLE_VALUE
127       */
128      private static byte getSizeOfType(byte type) {
129        switch (type) {
130          case INT_VALUE:
131            return (byte) (WORDSIZE);
132          case FLOAT_VALUE:
133            if (ArchConstants.SSE2_FULL) return (byte) WORDSIZE;
134          case DOUBLE_VALUE:
135            return (byte) (2 * WORDSIZE);
136          default:
137            OptimizingCompilerException.TODO("getSizeOfValue: unsupported");
138            return 0;
139        }
140      }
141    
142      /**
143       * Return the move operator for a type of value.
144       *
145       * @param type one of INT_VALUE, FLOAT_VALUE, or DOUBLE_VALUE
146       */
147      private static Operator getMoveOperator(byte type) {
148        switch (type) {
149          case INT_VALUE:
150            return IA32_MOV;
151          case DOUBLE_VALUE:
152            if (ArchConstants.SSE2_FULL) return IA32_MOVSD;
153          case FLOAT_VALUE:
154            if (ArchConstants.SSE2_FULL) return IA32_MOVSS;
155            return IA32_FMOV;
156          default:
157            OptimizingCompilerException.TODO("getMoveOperator: unsupported");
158            return null;
159        }
160      }
161    
162      /**
163       * Allocate a new spill location and grow the
164       * frame size to reflect the new layout.
165       *
166       * @param type the type to spill
167       * @return the spill location
168       */
169      public final int allocateNewSpillLocation(int type) {
170    
171        // increment by the spill size
172        spillPointer += PhysicalRegisterSet.getSpillSize(type);
173    
174        if (spillPointer + WORDSIZE > frameSize) {
175          frameSize = spillPointer + WORDSIZE;
176        }
177        return spillPointer;
178      }
179    
180      /**
181       * Insert a spill of a physical register before instruction s.
182       *
183       * @param s the instruction before which the spill should occur
184       * @param r the register (should be physical) to spill
185       * @param type one of INT_VALUE, FLOAT_VALUE, DOUBLE_VALUE, or
186       *                    CONDITION_VALUE
187       * @param location the spill location, as an offset from the frame
188       * pointer
189       */
190      public final void insertSpillBefore(Instruction s, Register r, byte type, int location) {
191    
192        Operator move = getMoveOperator(type);
193        byte size = getSizeOfType(type);
194        RegisterOperand rOp;
195        switch (type) {
196          case FLOAT_VALUE:
197            rOp = F(r);
198            break;
199          case DOUBLE_VALUE:
200            rOp = D(r);
201            break;
202          default:
203            rOp = new RegisterOperand(r, TypeReference.Int);
204            break;
205        }
206        StackLocationOperand spill = new StackLocationOperand(true, -location, size);
207        s.insertBefore(MIR_Move.create(move, spill, rOp));
208      }
209    
210      /**
211       * Insert a load of a physical register from a spill location before
212       * instruction s.
213       *
214       * @param s the instruction before which the spill should occur
215       * @param r the register (should be physical) to spill
216       * @param type one of INT_VALUE, FLOAT_VALUE, DOUBLE_VALUE, or
217       *                    CONDITION_VALUE
218       * @param location the spill location
219       */
220      public final void insertUnspillBefore(Instruction s, Register r, byte type, int location) {
221        Operator move = getMoveOperator(type);
222        byte size = getSizeOfType(type);
223        RegisterOperand rOp;
224        switch (type) {
225          case FLOAT_VALUE:
226            rOp = F(r);
227            break;
228          case DOUBLE_VALUE:
229            rOp = D(r);
230            break;
231          default:
232            rOp = new RegisterOperand(r, TypeReference.Int);
233            break;
234        }
235        StackLocationOperand spill = new StackLocationOperand(true, -location, size);
236        s.insertBefore(MIR_Move.create(move, rOp, spill));
237      }
238    
239      /**
240       * Compute the number of stack words needed to hold nonvolatile
241       * registers.
242       *
243       * Side effects:
244       * <ul>
245       * <li> updates the OptCompiler structure
246       * <li> updates the <code>frameSize</code> field of this object
247       * <li> updates the <code>frameRequired</code> field of this object
248       * </ul>
249       */
250      public void computeNonVolatileArea() {
251        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
252    
253        if (ir.compiledMethod.isSaveVolatile()) {
254          // Record that we use every nonvolatile GPR
255          int numGprNv = PhysicalRegisterSet.getNumberOfNonvolatileGPRs();
256          ir.compiledMethod.setNumberOfNonvolatileGPRs((short) numGprNv);
257    
258          // set the frame size
259          frameSize += numGprNv * WORDSIZE;
260          frameSize = align(frameSize, STACKFRAME_ALIGNMENT);
261    
262          // TODO!!
263          ir.compiledMethod.setNumberOfNonvolatileFPRs((short) 0);
264    
265          // Record that we need a stack frame.
266          setFrameRequired();
267    
268          if (ArchConstants.SSE2_FULL) {
269            for(int i=0; i < 8; i++) {
270              fsaveLocation = allocateNewSpillLocation(DOUBLE_REG);
271            }
272          } else {
273            // Grab 108 bytes (same as 27 4-byte spills) in the stack
274            // frame, as a place to store the floating-point state with FSAVE
275            for (int i = 0; i < 27; i++) {
276              fsaveLocation = allocateNewSpillLocation(INT_REG);
277            }
278          }
279    
280          // Map each volatile register to a spill location.
281          int i = 0;
282          for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) {
283            e.nextElement();
284            // Note that as a side effect, the following call bumps up the
285            // frame size.
286            saveVolatileGPRLocation[i] = allocateNewSpillLocation(INT_REG);
287          }
288    
289          // Map each non-volatile register to a spill location.
290          i = 0;
291          for (Enumeration<Register> e = phys.enumerateNonvolatileGPRs(); e.hasMoreElements(); i++) {
292            e.nextElement();
293            // Note that as a side effect, the following call bumps up the
294            // frame size.
295            nonVolatileGPRLocation[i] = allocateNewSpillLocation(INT_REG);
296          }
297    
298          // Set the offset to find non-volatiles.
299          int gprOffset = getNonvolatileGPROffset(0);
300          ir.compiledMethod.setUnsignedNonVolatileOffset(gprOffset);
301    
302        } else {
303          // Count the number of nonvolatiles used.
304          int numGprNv = 0;
305          int i = 0;
306          for (Enumeration<Register> e = phys.enumerateNonvolatileGPRs(); e.hasMoreElements();) {
307            Register r = e.nextElement();
308            if (r.isTouched()) {
309              // Note that as a side effect, the following call bumps up the
310              // frame size.
311              nonVolatileGPRLocation[i++] = allocateNewSpillLocation(INT_REG);
312              numGprNv++;
313            }
314          }
315          // Update the OptCompiledMethod object.
316          ir.compiledMethod.setNumberOfNonvolatileGPRs((short) numGprNv);
317          if (numGprNv > 0) {
318            int gprOffset = getNonvolatileGPROffset(0);
319            ir.compiledMethod.setUnsignedNonVolatileOffset(gprOffset);
320            // record that we need a stack frame
321            setFrameRequired();
322          } else {
323            ir.compiledMethod.setUnsignedNonVolatileOffset(0);
324          }
325    
326          ir.compiledMethod.setNumberOfNonvolatileFPRs((short) 0);
327    
328        }
329      }
330    
331      /**
332       * Clean up some junk that's left in the IR after register allocation,
333       * and add epilogue code.
334       */
335      public void cleanUpAndInsertEpilogue() {
336    
337        Instruction inst = ir.firstInstructionInCodeOrder().nextInstructionInCodeOrder();
338        for (; inst != null; inst = inst.nextInstructionInCodeOrder()) {
339          switch (inst.getOpcode()) {
340            case IA32_MOV_opcode:
341              // remove frivolous moves
342              Operand result = MIR_Move.getResult(inst);
343              Operand val = MIR_Move.getValue(inst);
344              if (result.similar(val)) {
345                inst = inst.remove();
346              }
347              break;
348            case IA32_FMOV_opcode:
349            case IA32_MOVSS_opcode:
350            case IA32_MOVSD_opcode:
351              // remove frivolous moves
352              result = MIR_Move.getResult(inst);
353              val = MIR_Move.getValue(inst);
354              if (result.similar(val)) {
355                inst = inst.remove();
356              }
357              break;
358            case IA32_RET_opcode:
359              if (frameIsRequired()) {
360                insertEpilogue(inst);
361              }
362            default:
363              break;
364          }
365        }
366        // now that the frame size is fixed, fix up the spill location code
367        rewriteStackLocations();
368      }
369    
370      /**
371       * Insert an explicit stack overflow check in the prologue <em>after</em>
372       * buying the stack frame.
373       * SIDE EFFECT: mutates the plg into a trap instruction.  We need to
374       * mutate so that the trap instruction is in the GC map data structures.
375       *
376       * @param plg the prologue instruction
377       */
378      private void insertNormalStackOverflowCheck(Instruction plg) {
379        if (!ir.method.isInterruptible()) {
380          plg.remove();
381          return;
382        }
383    
384        if (ir.compiledMethod.isSaveVolatile()) {
385          return;
386        }
387    
388        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
389        Register ESP = phys.getESP();
390        MemoryOperand M =
391            MemoryOperand.BD(ir.regpool.makeTROp(),
392                                 Entrypoints.stackLimitField.getOffset(),
393                                 (byte) WORDSIZE,
394                                 null,
395                                 null);
396    
397        //    Trap if ESP <= active Thread Stack Limit
398        MIR_TrapIf.mutate(plg,
399                          IA32_TRAPIF,
400                          null,
401                          new RegisterOperand(ESP, TypeReference.Int),
402                          M,
403                          IA32ConditionOperand.LE(),
404                          TrapCodeOperand.StackOverflow());
405      }
406    
407      /**
408       * Insert an explicit stack overflow check in the prologue <em>before</em>
409       * buying the stack frame.
410       * SIDE EFFECT: mutates the plg into a trap instruction.  We need to
411       * mutate so that the trap instruction is in the GC map data structures.
412       *
413       * @param plg the prologue instruction
414       */
415      private void insertBigFrameStackOverflowCheck(Instruction plg) {
416        if (!ir.method.isInterruptible()) {
417          plg.remove();
418          return;
419        }
420    
421        if (ir.compiledMethod.isSaveVolatile()) {
422          return;
423        }
424    
425        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
426        Register ESP = phys.getESP();
427        Register ECX = phys.getECX();
428    
429        //    ECX := active Thread Stack Limit
430        MemoryOperand M =
431            MemoryOperand.BD(ir.regpool.makeTROp(),
432                                 Entrypoints.stackLimitField.getOffset(),
433                                 (byte) WORDSIZE,
434                                 null,
435                                 null);
436        plg.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand((ECX), TypeReference.Int), M));
437    
438        //    ECX += frame Size
439        int frameSize = getFrameFixedSize();
440        plg.insertBefore(MIR_BinaryAcc.create(IA32_ADD, new RegisterOperand(ECX, TypeReference.Int), IC(frameSize)));
441        //    Trap if ESP <= ECX
442        MIR_TrapIf.mutate(plg,
443                          IA32_TRAPIF,
444                          null,
445                          new RegisterOperand(ESP, TypeReference.Int),
446                          new RegisterOperand(ECX, TypeReference.Int),
447                          IA32ConditionOperand.LE(),
448                          TrapCodeOperand.StackOverflow());
449      }
450    
451      /**
452       * Insert the prologue for a normal method.
453       *
454       * Assume we are inserting the prologue for method B called from method
455       * A.
456       *    <ul>
457       *    <li> Perform a stack overflow check.
458       *    <li> Store a back pointer to A's frame
459       *    <li> Store B's compiled method id
460       *    <li> Adjust frame pointer to point to B's frame
461       *    <li> Save any used non-volatile registers
462       *    </ul>
463       */
464      public void insertNormalPrologue() {
465        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
466        Register ESP = phys.getESP();
467        MemoryOperand fpHome =
468            MemoryOperand.BD(ir.regpool.makeTROp(),
469                                 ArchEntrypoints.framePointerField.getOffset(),
470                                 (byte) WORDSIZE,
471                                 null,
472                                 null);
473    
474        // the prologue instruction
475        Instruction plg = ir.firstInstructionInCodeOrder().nextInstructionInCodeOrder();
476        // inst is the instruction immediately after the IR_PROLOGUE
477        // instruction
478        Instruction inst = plg.nextInstructionInCodeOrder();
479    
480        int frameFixedSize = getFrameFixedSize();
481        ir.compiledMethod.setFrameFixedSize(frameFixedSize);
482    
483        // I. Buy a stackframe (including overflow check)
484        // NOTE: We play a little game here.  If the frame we are buying is
485        //       very small (less than 256) then we can be sloppy with the
486        //       stackoverflow check and actually allocate the frame in the guard
487        //       region.  We'll notice when this frame calls someone and take the
488        //       stackoverflow in the callee. We can't do this if the frame is too big,
489        //       because growing the stack in the callee and/or handling a hardware trap
490        //       in this frame will require most of the guard region to complete.
491        //       See libvm.C.
492        if (frameFixedSize >= 256) {
493          // 1. Insert Stack overflow check.
494          insertBigFrameStackOverflowCheck(plg);
495    
496          // 2. Save caller's frame pointer
497          inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, fpHome));
498    
499          // 3. Set my frame pointer to current value of stackpointer
500          inst.insertBefore(MIR_Move.create(IA32_MOV, fpHome.copy(), new RegisterOperand(ESP, TypeReference.Int)));
501    
502          // 4. Store my compiled method id
503          int cmid = ir.compiledMethod.getId();
504          inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, IC(cmid)));
505        } else {
506          // 1. Save caller's frame pointer
507          inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, fpHome));
508    
509          // 2. Set my frame pointer to current value of stackpointer
510          inst.insertBefore(MIR_Move.create(IA32_MOV, fpHome.copy(), new RegisterOperand(ESP, TypeReference.Int)));
511    
512          // 3. Store my compiled method id
513          int cmid = ir.compiledMethod.getId();
514          inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, IC(cmid)));
515    
516          // 4. Insert Stack overflow check.
517          insertNormalStackOverflowCheck(plg);
518        }
519    
520        // II. Save any used volatile and non-volatile registers
521        if (ir.compiledMethod.isSaveVolatile()) {
522          saveVolatiles(inst);
523          saveFloatingPointState(inst);
524        }
525        saveNonVolatiles(inst);
526      }
527    
528      /**
529       * Insert code into the prologue to save any used non-volatile
530       * registers.
531       *
532       * @param inst the first instruction after the prologue.
533       */
534      private void saveNonVolatiles(Instruction inst) {
535        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
536        int nNonvolatileGPRS = ir.compiledMethod.getNumberOfNonvolatileGPRs();
537    
538        // Save each non-volatile GPR used by this method.
539        int n = nNonvolatileGPRS - 1;
540        for (Enumeration<Register> e = phys.enumerateNonvolatileGPRsBackwards(); e.hasMoreElements() && n >= 0; n--) {
541          Register nv = e.nextElement();
542          int offset = getNonvolatileGPROffset(n);
543          Operand M = new StackLocationOperand(true, -offset, 4);
544          inst.insertBefore(MIR_Move.create(IA32_MOV, M, new RegisterOperand(nv, TypeReference.Int)));
545        }
546      }
547    
548      /**
549       * Insert code before a return instruction to restore the nonvolatile
550       * registers.
551       *
552       * @param inst the return instruction
553       */
554      private void restoreNonVolatiles(Instruction inst) {
555        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
556        int nNonvolatileGPRS = ir.compiledMethod.getNumberOfNonvolatileGPRs();
557    
558        int n = nNonvolatileGPRS - 1;
559        for (Enumeration<Register> e = phys.enumerateNonvolatileGPRsBackwards(); e.hasMoreElements() && n >= 0; n--) {
560          Register nv = e.nextElement();
561          int offset = getNonvolatileGPROffset(n);
562          Operand M = new StackLocationOperand(true, -offset, 4);
563          inst.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand(nv, TypeReference.Int), M));
564        }
565      }
566    
567      /**
568       * Insert code into the prologue to save the floating point state.
569       *
570       * @param inst the first instruction after the prologue.
571       */
572      private void saveFloatingPointState(Instruction inst) {
573    
574        if (ArchConstants.SSE2_FULL) {
575          PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
576          for (int i=0; i < 8; i++) {
577            inst.insertBefore(MIR_Move.create(IA32_MOVQ,
578                new StackLocationOperand(true, -fsaveLocation + (i * 8), 8),
579                new RegisterOperand(phys.getFPR(i), TypeReference.Double)));
580          }
581        } else {
582          Operand M = new StackLocationOperand(true, -fsaveLocation, 4);
583          inst.insertBefore(MIR_FSave.create(IA32_FNSAVE, M));
584        }
585      }
586    
587      /**
588       * Insert code into the epilogue to restore the floating point state.
589       *
590       * @param inst the return instruction after the epilogue.
591       */
592      private void restoreFloatingPointState(Instruction inst) {
593        if (ArchConstants.SSE2_FULL) {
594          PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
595          for (int i=0; i < 8; i++) {
596            inst.insertBefore(MIR_Move.create(IA32_MOVQ,
597                new RegisterOperand(phys.getFPR(i), TypeReference.Double),
598                new StackLocationOperand(true, -fsaveLocation + (i * 8), 8)));
599          }
600        } else {
601          Operand M = new StackLocationOperand(true, -fsaveLocation, 4);
602          inst.insertBefore(MIR_FSave.create(IA32_FRSTOR, M));
603        }
604      }
605    
606      /**
607       * Insert code into the prologue to save all volatile
608       * registers.
609       *
610       * @param inst the first instruction after the prologue.
611       */
612      private void saveVolatiles(Instruction inst) {
613        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
614    
615        // Save each GPR.
616        int i = 0;
617        for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) {
618          Register r = e.nextElement();
619          int location = saveVolatileGPRLocation[i];
620          Operand M = new StackLocationOperand(true, -location, 4);
621          inst.insertBefore(MIR_Move.create(IA32_MOV, M, new RegisterOperand(r, TypeReference.Int)));
622        }
623      }
624    
625      /**
626       * Insert code before a return instruction to restore the volatile
627       * and volatile registers.
628       *
629       * @param inst the return instruction
630       */
631      private void restoreVolatileRegisters(Instruction inst) {
632        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
633    
634        // Restore every GPR
635        int i = 0;
636        for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) {
637          Register r = e.nextElement();
638          int location = saveVolatileGPRLocation[i];
639          Operand M = new StackLocationOperand(true, -location, 4);
640          inst.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand(r, TypeReference.Int), M));
641        }
642      }
643    
644      /**
645       * Insert the epilogue before a particular return instruction.
646       *
647       * @param ret the return instruction.
648       */
649      private void insertEpilogue(Instruction ret) {
650        // 1. Restore any saved registers
651        if (ir.compiledMethod.isSaveVolatile()) {
652          restoreVolatileRegisters(ret);
653          restoreFloatingPointState(ret);
654        }
655        restoreNonVolatiles(ret);
656    
657        // 2. Restore caller's stackpointer and framepointer
658        int frameSize = getFrameFixedSize();
659        ret.insertBefore(MIR_UnaryNoRes.create(REQUIRE_ESP, IC(frameSize)));
660        MemoryOperand fpHome =
661            MemoryOperand.BD(ir.regpool.makeTROp(),
662                                 ArchEntrypoints.framePointerField.getOffset(),
663                                 (byte) WORDSIZE,
664                                 null,
665                                 null);
666        ret.insertBefore(MIR_Nullary.create(IA32_POP, fpHome));
667      }
668    
669      /**
670       * In instruction s, replace all appearances of a symbolic register
671       * operand with uses of the appropriate spill location, as cached by the
672       * register allocator.
673       *
674       * @param s the instruction to mutate.
675       * @param symb the symbolic register operand to replace
676       */
677      public void replaceOperandWithSpillLocation(Instruction s, RegisterOperand symb) {
678    
679        // Get the spill location previously assigned to the symbolic
680        // register.
681        int location = RegisterAllocatorState.getSpill(symb.getRegister());
682    
683        // Create a memory operand M representing the spill location.
684        int size;
685        if (ArchConstants.SSE2_FULL) {
686          size = symb.getType().getMemoryBytes();
687          if (size < 4)
688            size = 4;
689        } else {
690          int type = PhysicalRegisterSet.getPhysicalRegisterType(symb.getRegister());
691          size = PhysicalRegisterSet.getSpillSize(type);
692        }
693        StackLocationOperand M = new StackLocationOperand(true, -location, (byte) size);
694    
695        M = new StackLocationOperand(true, -location, (byte) size);
696    
697        // replace the register operand with the memory operand
698        s.replaceOperand(symb, M);
699      }
700    
701      /**
702       * Does a memory operand hold a symbolic register?
703       */
704      private boolean hasSymbolicRegister(MemoryOperand M) {
705        if (M.base != null && !M.base.getRegister().isPhysical()) return true;
706        if (M.index != null && !M.index.getRegister().isPhysical()) return true;
707        return false;
708      }
709    
710      /**
711       * Is s a MOVE instruction that can be generated without resorting to
712       * scratch registers?
713       */
714      private boolean isScratchFreeMove(Instruction s) {
715        if (s.operator() != IA32_MOV) return false;
716    
717        // if we don't allow ESP to float, we will always use scratch
718        // registers in these move instructions.
719        if (!FLOAT_ESP) return false;
720    
721        Operand result = MIR_Move.getResult(s);
722        Operand value = MIR_Move.getValue(s);
723    
724        // We need scratch registers for spilled registers that appear in
725        // memory operands.
726        if (result.isMemory()) {
727          MemoryOperand M = result.asMemory();
728          if (hasSymbolicRegister(M)) return false;
729          // We will perform this transformation by changing the MOV to a PUSH
730          // or POP.  Note that IA32 cannot PUSH/POP 8-bit quantities, so
731          // disable the transformation for that case.  Also, (TODO), our
732          // assembler does not emit the prefix to allow 16-bit push/pops, so
733          // disable these too.  What's left?  32-bit only.
734          if (M.size != 4) return false;
735        }
736        if (value.isMemory()) {
737          MemoryOperand M = value.asMemory();
738          if (hasSymbolicRegister(M)) return false;
739          // We will perform this transformation by changing the MOV to a PUSH
740          // or POP.  Note that IA32 cannot PUSH/POP 8-bit quantities, so
741          // disable the transformation for that case.  Also, (TODO), our
742          // assembler does not emit the prefix to allow 16-bit push/pops, so
743          // disable these too.  What's left?  32-bit only.
744          if (M.size != 4) return false;
745        }
746        // If we get here, all is kosher.
747        return true;
748      }
749    
750      /**
751       * Given symbolic register r in instruction s, do we need to ensure that
752       * r is in a scratch register is s (as opposed to a memory operand)
753       */
754      public boolean needScratch(Register r, Instruction s) {
755        // We never need a scratch register for a floating point value in an
756        // FMOV instruction.
757        if (r.isFloatingPoint() && s.operator == IA32_FMOV) return false;
758    
759        // never need a scratch register for a YIELDPOINT_OSR
760        if (s.operator == YIELDPOINT_OSR) return false;
761    
762        // Some MOVEs never need scratch registers
763        if (isScratchFreeMove(s)) return false;
764    
765        // If s already has a memory operand, it is illegal to introduce
766        // another.
767        if (s.hasMemoryOperand()) return true;
768    
769        // Check the architecture restrictions.
770        if (RegisterRestrictions.mustBeInRegister(r, s)) return true;
771    
772        // Otherwise, everything is OK.
773        return false;
774      }
775    
776      /**
777       * Before instruction s, insert code to adjust ESP so that it lies at a
778       * particular offset from its usual location.
779       */
780      private void moveESPBefore(Instruction s, int desiredOffset) {
781        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
782        Register ESP = phys.getESP();
783        int delta = desiredOffset - ESPOffset;
784        if (delta != 0) {
785          if (canModifyEFLAGS(s)) {
786            s.insertBefore(MIR_BinaryAcc.create(IA32_ADD, new RegisterOperand(ESP, TypeReference.Int), IC(delta)));
787          } else {
788            MemoryOperand M =
789                MemoryOperand.BD(new RegisterOperand(ESP, TypeReference.Int),
790                                     Offset.fromIntSignExtend(delta),
791                                     (byte) 4,
792                                     null,
793                                     null);
794            s.insertBefore(MIR_Lea.create(IA32_LEA, new RegisterOperand(ESP, TypeReference.Int), M));
795          }
796          ESPOffset = desiredOffset;
797        }
798      }
799    
800      private boolean canModifyEFLAGS(Instruction s) {
801        if (PhysicalDefUse.usesEFLAGS(s.operator())) {
802          return false;
803        }
804        if (PhysicalDefUse.definesEFLAGS(s.operator())) {
805          return true;
806        }
807        if (s.operator == BBEND) return true;
808        return canModifyEFLAGS(s.nextInstructionInCodeOrder());
809      }
810    
811      /**
812       * Attempt to rewrite a move instruction to a NOP.
813       *
814       * @return true iff the transformation applies
815       */
816      private boolean mutateMoveToNop(Instruction s) {
817        Operand result = MIR_Move.getResult(s);
818        Operand val = MIR_Move.getValue(s);
819        if (result.isStackLocation() && val.isStackLocation()) {
820          if (result.similar(val)) {
821            Empty.mutate(s, NOP);
822            return true;
823          }
824        }
825        return false;
826      }
827    
828      /**
829       * Rewrite a move instruction if it has 2 memory operands.
830       * One of the 2 memory operands must be a stack location operand.  Move
831       * the SP to the appropriate location and use a push or pop instruction.
832       */
833      private void rewriteMoveInstruction(Instruction s) {
834        // first attempt to mutate the move into a noop
835        if (mutateMoveToNop(s)) return;
836    
837        Operand result = MIR_Move.getResult(s);
838        Operand val = MIR_Move.getValue(s);
839        if (result instanceof StackLocationOperand) {
840          if (val instanceof MemoryOperand || val instanceof StackLocationOperand) {
841            int offset = ((StackLocationOperand) result).getOffset();
842            byte size = ((StackLocationOperand) result).getSize();
843            offset = FPOffset2SPOffset(offset) + size;
844            moveESPBefore(s, offset);
845            MIR_UnaryNoRes.mutate(s, IA32_PUSH, val);
846          }
847        } else {
848          if (result instanceof MemoryOperand) {
849            if (val instanceof StackLocationOperand) {
850              int offset = ((StackLocationOperand) val).getOffset();
851              offset = FPOffset2SPOffset(offset);
852              moveESPBefore(s, offset);
853              MIR_Nullary.mutate(s, IA32_POP, result);
854            }
855          }
856        }
857      }
858    
859      /**
860       * Walk through the IR.  For each StackLocationOperand, replace the
861       * operand with the appropriate MemoryOperand.
862       */
863      private void rewriteStackLocations() {
864        // ESP is initially 4 bytes above where the framepointer is going to be.
865        ESPOffset = getFrameFixedSize() + 4;
866        Register ESP = ir.regpool.getPhysicalRegisterSet().getESP();
867    
868        boolean seenReturn = false;
869        for (InstructionEnumeration e = ir.forwardInstrEnumerator(); e.hasMoreElements();) {
870          Instruction s = e.next();
871    
872          if (s.isReturn()) {
873            seenReturn = true;
874            continue;
875          }
876    
877          if (s.isBranch()) {
878            // restore ESP to home location at end of basic block.
879            moveESPBefore(s, 0);
880            continue;
881          }
882    
883          if (s.operator() == BBEND) {
884            if (seenReturn) {
885              // at a return ESP will be at FrameFixedSize,
886              seenReturn = false;
887              ESPOffset = 0;
888            } else {
889              moveESPBefore(s, 0);
890            }
891            continue;
892          }
893    
894          if (s.operator() == ADVISE_ESP) {
895            ESPOffset = MIR_UnaryNoRes.getVal(s).asIntConstant().value;
896            continue;
897          }
898    
899          if (s.operator() == REQUIRE_ESP) {
900            // ESP is required to be at the given offset from the bottom of the frame
901            moveESPBefore(s, MIR_UnaryNoRes.getVal(s).asIntConstant().value);
902            continue;
903          }
904    
905          if (s.operator() == YIELDPOINT_PROLOGUE ||
906              s.operator() == YIELDPOINT_BACKEDGE ||
907              s.operator() == YIELDPOINT_EPILOGUE) {
908            moveESPBefore(s, 0);
909            continue;
910          }
911    
912          if (s.operator() == IA32_MOV) {
913            rewriteMoveInstruction(s);
914          }
915    
916          // pop computes the effective address of its operand after ESP
917          // is incremented.  Therefore update ESPOffset before rewriting
918          // stacklocation and memory operands.
919          if (s.operator() == IA32_POP) {
920            ESPOffset += 4;
921          }
922    
923          for (OperandEnumeration ops = s.getOperands(); ops.hasMoreElements();) {
924            Operand op = ops.next();
925            if (op instanceof StackLocationOperand) {
926              StackLocationOperand sop = (StackLocationOperand) op;
927              int offset = sop.getOffset();
928              if (sop.isFromTop()) {
929                offset = FPOffset2SPOffset(offset);
930              }
931              offset -= ESPOffset;
932              byte size = sop.getSize();
933              MemoryOperand M =
934                  MemoryOperand.BD(new RegisterOperand(ESP, TypeReference.Int),
935                                       Offset.fromIntSignExtend(offset),
936                                       size,
937                                       null,
938                                       null);
939              s.replaceOperand(op, M);
940            } else if (op instanceof MemoryOperand) {
941              MemoryOperand M = op.asMemory();
942              if ((M.base != null && M.base.getRegister() == ESP) || (M.index != null && M.index.getRegister() == ESP)) {
943                M.disp = M.disp.minus(ESPOffset);
944              }
945            }
946          }
947    
948          // push computes the effective address of its operand after ESP
949          // is decremented.  Therefore update ESPOffset after rewriting
950          // stacklocation and memory operands.
951          if (s.operator() == IA32_PUSH) {
952            ESPOffset -= 4;
953          }
954        }
955      }
956    
957      /**
958       * @param fpOffset offset in bytes from the top of the stack frame
959       * @return offset in bytes from the stack pointer.
960       *
961       * PRECONDITION: The final frameSize is calculated before calling this
962       * routine.
963       */
964      private int FPOffset2SPOffset(int fpOffset) {
965        // Note that SP = FP - frameSize + WORDSIZE;
966        // So, FP + fpOffset = SP + frameSize - WORDSIZE
967        // + fpOffset
968        return frameSize + fpOffset - WORDSIZE;
969      }
970    
971      /**
972       * Walk over the currently available scratch registers.
973       *
974       * <p>For any scratch register r which is def'ed by instruction s,
975       * spill r before s and remove r from the pool of available scratch
976       * registers.
977       *
978       * <p>For any scratch register r which is used by instruction s,
979       * restore r before s and remove r from the pool of available scratch
980       * registers.
981       *
982       * <p>For any scratch register r which has current contents symb, and
983       * symb is spilled to location M, and s defs M: the old value of symb is
984       * dead.  Mark this.
985       *
986       * <p>Invalidate any scratch register assignments that are illegal in s.
987       */
988      public void restoreScratchRegistersBefore(Instruction s) {
989        for (Iterator<ScratchRegister> i = scratchInUse.iterator(); i.hasNext();) {
990          ScratchRegister scratch = i.next();
991    
992          if (scratch.currentContents == null) continue;
993          if (VERBOSE_DEBUG) {
994            System.out.println("RESTORE: consider " + scratch);
995          }
996          boolean removed = false;
997          boolean unloaded = false;
998          if (definedIn(scratch.scratch, s) ||
999              (s.isCall() && s.operator != CALL_SAVE_VOLATILE && scratch.scratch.isVolatile()) ||
1000              (s.operator == IA32_FNINIT && scratch.scratch.isFloatingPoint()) ||
1001              (s.operator == IA32_FCLEAR && scratch.scratch.isFloatingPoint())) {
1002            // s defines the scratch register, so save its contents before they
1003            // are killed.
1004            if (VERBOSE_DEBUG) {
1005              System.out.println("RESTORE : unload because defined " + scratch);
1006            }
1007            unloadScratchRegisterBefore(s, scratch);
1008    
1009            // update mapping information
1010            if (VERBOSE_DEBUG) {
1011              System.out.println("RSRB: End scratch interval " + scratch.scratch + " " + s);
1012            }
1013            scratchMap.endScratchInterval(scratch.scratch, s);
1014            Register scratchContents = scratch.currentContents;
1015            if (scratchContents != null) {
1016              if (VERBOSE_DEBUG) {
1017                System.out.println("RSRB: End symbolic interval " + scratch.currentContents + " " + s);
1018              }
1019              scratchMap.endSymbolicInterval(scratch.currentContents, s);
1020            }
1021    
1022            i.remove();
1023            removed = true;
1024            unloaded = true;
1025          }
1026    
1027          if (usedIn(scratch.scratch, s) ||
1028              !isLegal(scratch.currentContents, scratch.scratch, s) ||
1029              (s.operator == IA32_FCLEAR && scratch.scratch.isFloatingPoint())) {
1030            // first spill the currents contents of the scratch register to
1031            // memory
1032            if (!unloaded) {
1033              if (VERBOSE_DEBUG) {
1034                System.out.println("RESTORE : unload because used " + scratch);
1035              }
1036              unloadScratchRegisterBefore(s, scratch);
1037    
1038              // update mapping information
1039              if (VERBOSE_DEBUG) {
1040                System.out.println("RSRB2: End scratch interval " + scratch.scratch + " " + s);
1041              }
1042              scratchMap.endScratchInterval(scratch.scratch, s);
1043              Register scratchContents = scratch.currentContents;
1044              if (scratchContents != null) {
1045                if (VERBOSE_DEBUG) {
1046                  System.out.println("RSRB2: End symbolic interval " + scratch.currentContents + " " + s);
1047                }
1048                scratchMap.endSymbolicInterval(scratch.currentContents, s);
1049              }
1050    
1051            }
1052            // s or some future instruction uses the scratch register,
1053            // so restore the correct contents.
1054            if (VERBOSE_DEBUG) {
1055              System.out.println("RESTORE : reload because used " + scratch);
1056            }
1057            reloadScratchRegisterBefore(s, scratch);
1058    
1059            if (!removed) {
1060              i.remove();
1061              removed = true;
1062            }
1063          }
1064        }
1065      }
1066    
1067      /**
1068       * Initialize some architecture-specific state needed for register
1069       * allocation.
1070       */
1071      public void initForArch(IR ir) {
1072        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
1073    
1074        // We reserve the last (bottom) slot in the FPR stack as a scratch register.
1075        // This allows us to do one push/pop sequence in order to use the
1076        // top of the stack as a scratch location
1077        phys.getFPR(7).reserveRegister();
1078      }
1079    
1080      /**
1081       * Is a particular instruction a system call?
1082       */
1083      public boolean isSysCall(Instruction s) {
1084        return s.operator == IA32_SYSCALL;
1085      }
1086    }