001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.mir2mc.ia32;
014    
015    import org.jikesrvm.VM;
016    import org.jikesrvm.classloader.RVMMethod;
017    import org.jikesrvm.compilers.opt.ir.BBend;
018    import org.jikesrvm.compilers.opt.ir.Label;
019    import org.jikesrvm.compilers.opt.ir.MIR_BinaryAcc;
020    import org.jikesrvm.compilers.opt.ir.MIR_Branch;
021    import org.jikesrvm.compilers.opt.ir.MIR_Call;
022    import org.jikesrvm.compilers.opt.ir.MIR_Compare;
023    import org.jikesrvm.compilers.opt.ir.MIR_CondBranch;
024    import org.jikesrvm.compilers.opt.ir.MIR_CondBranch2;
025    import org.jikesrvm.compilers.opt.ir.MIR_Empty;
026    import org.jikesrvm.compilers.opt.ir.MIR_Lea;
027    import org.jikesrvm.compilers.opt.ir.MIR_Move;
028    import org.jikesrvm.compilers.opt.ir.MIR_Nullary;
029    import org.jikesrvm.compilers.opt.ir.MIR_Set;
030    import org.jikesrvm.compilers.opt.ir.MIR_Test;
031    import org.jikesrvm.compilers.opt.ir.MIR_Trap;
032    import org.jikesrvm.compilers.opt.ir.MIR_TrapIf;
033    import org.jikesrvm.compilers.opt.ir.MIR_Unary;
034    import org.jikesrvm.compilers.opt.ir.MIR_UnaryNoRes;
035    import org.jikesrvm.compilers.opt.ir.MIR_XChng;
036    import org.jikesrvm.compilers.opt.ir.NullCheck;
037    import org.jikesrvm.compilers.opt.ir.BasicBlock;
038    import org.jikesrvm.compilers.opt.ir.IR;
039    import org.jikesrvm.compilers.opt.ir.IRTools;
040    import org.jikesrvm.compilers.opt.ir.Instruction;
041    import org.jikesrvm.compilers.opt.ir.OperandEnumeration;
042    
043    import static org.jikesrvm.compilers.opt.ir.Operators.ADVISE_ESP_opcode;
044    import static org.jikesrvm.compilers.opt.ir.Operators.CALL_SAVE_VOLATILE;
045    import static org.jikesrvm.compilers.opt.ir.Operators.CALL_SAVE_VOLATILE_opcode;
046    import static org.jikesrvm.compilers.opt.ir.Operators.DUMMY_DEF_opcode;
047    import static org.jikesrvm.compilers.opt.ir.Operators.DUMMY_USE_opcode;
048    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_ADD;
049    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_CALL;
050    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_CMP;
051    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_CMPXCHG;
052    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_CMPXCHG8B;
053    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FCLEAR_opcode;
054    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FFREE;
055    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FLD;
056    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FMOV_ENDING_LIVE_RANGE_opcode;
057    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FMOV_opcode;
058    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FST;
059    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FSTP;
060    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_FXCH;
061    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_INT;
062    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_JCC;
063    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_JCC2_opcode;
064    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_JMP;
065    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_LEA_opcode;
066    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_LOCK;
067    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_LOCK_CMPXCHG8B_opcode;
068    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_LOCK_CMPXCHG_opcode;
069    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOV;
070    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOV_opcode;
071    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_MOVZX__B;
072    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_SET__B_opcode;
073    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_SHL;
074    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_TEST_opcode;
075    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_TRAPIF;
076    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_TRAPIF_opcode;
077    import static org.jikesrvm.compilers.opt.ir.Operators.IA32_XOR;
078    import static org.jikesrvm.compilers.opt.ir.Operators.NULL_CHECK_opcode;
079    import static org.jikesrvm.compilers.opt.ir.Operators.REQUIRE_ESP_opcode;
080    import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_BACKEDGE_opcode;
081    import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_EPILOGUE_opcode;
082    import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_OSR_opcode;
083    import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_PROLOGUE_opcode;
084    import org.jikesrvm.compilers.opt.ir.Register;
085    import org.jikesrvm.compilers.opt.ir.ia32.PhysicalDefUse;
086    import org.jikesrvm.compilers.opt.ir.ia32.PhysicalRegisterSet;
087    import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand;
088    import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand;
089    import org.jikesrvm.compilers.opt.ir.operand.LocationOperand;
090    import org.jikesrvm.compilers.opt.ir.operand.MemoryOperand;
091    import org.jikesrvm.compilers.opt.ir.operand.MethodOperand;
092    import org.jikesrvm.compilers.opt.ir.operand.Operand;
093    import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
094    import org.jikesrvm.compilers.opt.ir.operand.TrapCodeOperand;
095    import org.jikesrvm.compilers.opt.ir.operand.ia32.IA32ConditionOperand;
096    import org.jikesrvm.runtime.ArchEntrypoints;
097    import org.jikesrvm.runtime.Entrypoints;
098    import org.jikesrvm.runtime.Magic;
099    import org.vmmagic.unboxed.Offset;
100    
101    /**
102     * Final acts of MIR expansion for the IA32 architecture.
103     * Things that are expanded here (immediately before final assembly)
104     * should only be those sequences that cannot be expanded earlier
105     * due to difficulty in keeping optimizations from interfering with them.
106     *
107     * One job of this phase is to handle the expansion of the remains of
108     * table switch.  The code looks like a mess (which it is), but there
109     * is little choice for relocatable IA32 code that does this.  And the
110     * details of this code are shared with the baseline compiler and
111     * dependent in detail on the Assembler (see {@link
112     * org.jikesrvm.compilers.common.assembler.ia32.Assembler#emitOFFSET_Imm_ImmOrLabel}).  If you want to mess with
113     * it, you will probably need to mess with them as well.
114     */
115    public class FinalMIRExpansion extends IRTools {
116    
117      /**
118       * @param ir the IR to expand
119       * @return return value is garbage for IA32
120       */
121      public static int expand(IR ir) {
122        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
123    
124        for (Instruction next, p = ir.firstInstructionInCodeOrder(); p != null; p = next) {
125          next = p.nextInstructionInCodeOrder();
126          p.setmcOffset(-1);
127          p.scratchObject = null;
128    
129          switch (p.getOpcode()) {
130            case IA32_TEST_opcode:
131              // don't bother telling rest of compiler that memory operand
132              // must be first; we can just commute it here.
133              if (MIR_Test.getVal2(p).isMemory()) {
134                Operand tmp = MIR_Test.getClearVal1(p);
135                MIR_Test.setVal1(p, MIR_Test.getClearVal2(p));
136                MIR_Test.setVal2(p, tmp);
137              }
138              break;
139    
140            case NULL_CHECK_opcode: {
141              // mutate this into a TRAPIF, and then fall through to the the
142              // TRAP_IF case.
143              Operand ref = NullCheck.getRef(p);
144              MIR_TrapIf.mutate(p,
145                                IA32_TRAPIF,
146                                null,
147                                ref.copy(),
148                                IC(0),
149                                IA32ConditionOperand.EQ(),
150                                TrapCodeOperand.NullPtr());
151            }
152            // There is no break statement here on purpose!
153            case IA32_TRAPIF_opcode: {
154              // split the basic block right before the IA32_TRAPIF
155              BasicBlock thisBlock = p.getBasicBlock();
156              BasicBlock trap = thisBlock.createSubBlock(p.bcIndex, ir, 0f);
157              thisBlock.insertOut(trap);
158              BasicBlock nextBlock = thisBlock.splitNodeWithLinksAt(p, ir);
159              thisBlock.insertOut(trap);
160              TrapCodeOperand tc = MIR_TrapIf.getClearTrapCode(p);
161              p.remove();
162              nextBlock.firstInstruction().setmcOffset(-1);
163              // add code to thisBlock to conditionally jump to trap
164              Instruction cmp = MIR_Compare.create(IA32_CMP, MIR_TrapIf.getVal1(p), MIR_TrapIf.getVal2(p));
165              if (p.isMarkedAsPEI()) {
166                // The trap if was explictly marked, which means that it has
167                // a memory operand into which we've folded a null check.
168                // Actually need a GC map for both the compare and the INT.
169                cmp.markAsPEI();
170                cmp.copyPosition(p);
171                ir.MIRInfo.gcIRMap.insertTwin(p, cmp);
172              }
173              thisBlock.appendInstruction(cmp);
174              thisBlock.appendInstruction(MIR_CondBranch.create(IA32_JCC,
175                                                                MIR_TrapIf.getCond(p),
176                                                                trap.makeJumpTarget(),
177                                                                null));
178    
179              // add block at end to hold trap instruction, and
180              // insert trap sequence
181              ir.cfg.addLastInCodeOrder(trap);
182              if (tc.isArrayBounds()) {
183                // attempt to store index expression in processor object for
184                // C trap handler
185                Operand index = MIR_TrapIf.getVal2(p);
186                if (!(index instanceof RegisterOperand || index instanceof IntConstantOperand)) {
187                  index = IC(0xdeadbeef); // index was spilled, and
188                  // we can't get it back here.
189                }
190                MemoryOperand mo =
191                    MemoryOperand.BD(ir.regpool.makeTROp(),
192                                         ArchEntrypoints.arrayIndexTrapParamField.getOffset(),
193                                         (byte) 4,
194                                         null,
195                                         null);
196                trap.appendInstruction(MIR_Move.create(IA32_MOV, mo, index.copy()));
197              }
198              // NOTE: must make p the trap instruction: it is the GC point!
199              // IMPORTANT: must also inform the GCMap that the instruction has
200              // been moved!!!
201              trap.appendInstruction(MIR_Trap.mutate(p, IA32_INT, null, tc));
202              ir.MIRInfo.gcIRMap.moveToEnd(p);
203    
204              if (tc.isStackOverflow()) {
205                // only stackoverflow traps resume at next instruction.
206                trap.appendInstruction(MIR_Branch.create(IA32_JMP, nextBlock.makeJumpTarget()));
207              }
208            }
209            break;
210    
211            case IA32_FMOV_ENDING_LIVE_RANGE_opcode: {
212              Operand result = MIR_Move.getResult(p);
213              Operand value = MIR_Move.getValue(p);
214              if (result.isRegister() && value.isRegister()) {
215                if (result.similar(value)) {
216                  // eliminate useless move
217                  p.remove();
218                } else {
219                  int i = PhysicalRegisterSet.getFPRIndex(result.asRegister().getRegister());
220                  int j = PhysicalRegisterSet.getFPRIndex(value.asRegister().getRegister());
221                  if (i == 0) {
222                    MIR_XChng.mutate(p, IA32_FXCH, result, value);
223                  } else if (j == 0) {
224                    MIR_XChng.mutate(p, IA32_FXCH, value, result);
225                  } else {
226                    expandFmov(p, phys);
227                  }
228                }
229              } else {
230                expandFmov(p, phys);
231              }
232              break;
233            }
234    
235            case DUMMY_DEF_opcode:
236            case DUMMY_USE_opcode:
237            case REQUIRE_ESP_opcode:
238            case ADVISE_ESP_opcode:
239              p.remove();
240              break;
241    
242            case IA32_FMOV_opcode:
243              expandFmov(p, phys);
244              break;
245    
246            case IA32_MOV_opcode:
247              // Replace result = IA32_MOV 0 with result = IA32_XOR result, result
248              if (MIR_Move.getResult(p).isRegister() &&
249                  MIR_Move.getValue(p).isIntConstant() &&
250                  MIR_Move.getValue(p).asIntConstant().value == 0) {
251                // Calculate what flags are defined in coming instructions before a use of a flag or BBend
252                Instruction x = next;
253                int futureDefs = 0;
254                while(!BBend.conforms(x) && !PhysicalDefUse.usesEFLAGS(x.operator)) {
255                  futureDefs |= x.operator.implicitDefs;
256                  x = x.nextInstructionInCodeOrder();
257                }
258                // If the flags will be destroyed prior to use or we reached the end of the basic block
259                if (BBend.conforms(x) ||
260                    (futureDefs & PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) == PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) {
261                  Operand result = MIR_Move.getClearResult(p);
262                  MIR_BinaryAcc.mutate(p, IA32_XOR, result, result.copy());
263                }
264              }
265              break;
266    
267            case IA32_SET__B_opcode:
268              // Replace <cmp>, set__b, movzx__b with xor, <cmp>, set__b
269              if (MIR_Set.getResult(p).isRegister() &&
270                  MIR_Unary.conforms(next) &&
271                  (next.operator() == IA32_MOVZX__B) &&
272                  MIR_Unary.getResult(next).isRegister() &&
273                  MIR_Unary.getVal(next).similar(MIR_Unary.getResult(next)) &&
274                  MIR_Unary.getVal(next).similar(MIR_Set.getResult(p))) {
275                // Find instruction in this basic block that defines flags
276                Instruction x = p.prevInstructionInCodeOrder();
277                Operand result = MIR_Unary.getResult(next);
278                boolean foundCmp = false;
279                outer:
280                while(!Label.conforms(x)) {
281                  OperandEnumeration e = x.getUses();
282                  while(e.hasMoreElements()) {
283                    // We can't use an xor to clear the register if that register is
284                    // used by the <cmp> or intervening instruction
285                    if (e.next().similar(result)) {
286                      break outer;
287                    }
288                  }
289                  if (PhysicalDefUse.definesEFLAGS(x.operator) &&
290                      !PhysicalDefUse.usesEFLAGS(x.operator)) {
291                    // we found a <cmp> that doesn't use the result or the flags
292                    // that would be clobbered by the xor
293                    foundCmp = true;
294                    break outer;
295                  }
296                  x = x.prevInstructionInCodeOrder();
297                }
298                if (foundCmp) {
299                  // We found the <cmp>, mutate the movzx__b into an xor and insert it before the <cmp>
300                  next.remove();
301                  MIR_BinaryAcc.mutate(next, IA32_XOR, result, MIR_Unary.getVal(next));
302                  x.insertBefore(next);
303                  // get ready for the next instruction
304                  next = p.nextInstructionInCodeOrder();
305                }
306              }
307              break;
308    
309            case IA32_LEA_opcode: {
310              // Sometimes we're over eager in BURS in using LEAs and after register
311              // allocation we can simplify to the accumulate form
312              // replace reg1 = LEA [reg1 + reg2] with reg1 = reg1 + reg2
313              // replace reg1 = LEA [reg1 + c1] with reg1 = reg1 + c1
314              // replace reg1 = LEA [reg1 << c1] with reg1 = reg1 << c1
315              MemoryOperand value = MIR_Lea.getValue(p);
316              RegisterOperand result = MIR_Lea.getResult(p);
317              if ((value.base != null && value.base.getRegister() == result.getRegister()) ||
318                  (value.index != null && value.index.getRegister() == result.getRegister())) {
319                // Calculate what flags are defined in coming instructions before a use of a flag or BBend
320                Instruction x = next;
321                int futureDefs = 0;
322                while(!BBend.conforms(x) && !PhysicalDefUse.usesEFLAGS(x.operator)) {
323                  futureDefs |= x.operator.implicitDefs;
324                  x = x.nextInstructionInCodeOrder();
325                }
326                // If the flags will be destroyed prior to use or we reached the end of the basic block
327                if (BBend.conforms(x) ||
328                    (futureDefs & PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) == PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) {
329                  if (value.base != null &&
330                      value.index != null && value.index.getRegister() == result.getRegister() &&
331                      value.disp.isZero() &&
332                      value.scale == 0) {
333                    // reg1 = lea [base + reg1] -> add reg1, base
334                    MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.base);
335                  } else if (value.base != null && value.base.getRegister() == result.getRegister() &&
336                             value.index != null &&
337                             value.disp.isZero() &&
338                             value.scale == 0) {
339                    // reg1 = lea [reg1 + index] -> add reg1, index
340                    MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.index);
341                  } else if (value.base != null && value.base.getRegister() == result.getRegister() &&
342                             value.index == null) {
343                    // reg1 = lea [reg1 + disp] -> add reg1, disp
344                    MIR_BinaryAcc.mutate(p, IA32_ADD, result, IC(value.disp.toInt()));
345                  } else if (value.base == null &&
346                             value.index != null && value.index.getRegister() == result.getRegister() &&
347                             value.scale == 0) {
348                    // reg1 = lea [reg1 + disp] -> add reg1, disp
349                    MIR_BinaryAcc.mutate(p, IA32_ADD, result, IC(value.disp.toInt()));
350                  } else if (value.base == null &&
351                             value.index != null && value.index.getRegister() == result.getRegister() &&
352                             value.disp.isZero()) {
353                    // reg1 = lea [reg1 << scale] -> shl reg1, scale
354                    if (value.scale == 0) {
355                      p.remove();
356                    } else if (value.scale == 1) {
357                      MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.index);
358                    } else {
359                      MIR_BinaryAcc.mutate(p, IA32_SHL, result, IC(value.scale));
360                    }
361                  }
362                }
363              }
364            }
365            break;
366    
367            case IA32_FCLEAR_opcode:
368              expandFClear(p, ir);
369              break;
370    
371            case IA32_JCC2_opcode:
372              p.insertBefore(MIR_CondBranch.create(IA32_JCC,
373                                                   MIR_CondBranch2.getCond1(p),
374                                                   MIR_CondBranch2.getTarget1(p),
375                                                   MIR_CondBranch2.getBranchProfile1(p)));
376              MIR_CondBranch.mutate(p,
377                                    IA32_JCC,
378                                    MIR_CondBranch2.getCond2(p),
379                                    MIR_CondBranch2.getTarget2(p),
380                                    MIR_CondBranch2.getBranchProfile2(p));
381              break;
382    
383            case CALL_SAVE_VOLATILE_opcode:
384              p.operator = IA32_CALL;
385              break;
386    
387            case IA32_LOCK_CMPXCHG_opcode:
388              p.insertBefore(MIR_Empty.create(IA32_LOCK));
389              p.operator = IA32_CMPXCHG;
390              break;
391    
392            case IA32_LOCK_CMPXCHG8B_opcode:
393              p.insertBefore(MIR_Empty.create(IA32_LOCK));
394              p.operator = IA32_CMPXCHG8B;
395              break;
396    
397            case YIELDPOINT_PROLOGUE_opcode:
398              expandYieldpoint(p, ir, Entrypoints.optThreadSwitchFromPrologueMethod, IA32ConditionOperand.NE());
399              break;
400    
401            case YIELDPOINT_EPILOGUE_opcode:
402              expandYieldpoint(p, ir, Entrypoints.optThreadSwitchFromEpilogueMethod, IA32ConditionOperand.NE());
403              break;
404    
405            case YIELDPOINT_BACKEDGE_opcode:
406              expandYieldpoint(p, ir, Entrypoints.optThreadSwitchFromBackedgeMethod, IA32ConditionOperand.GT());
407              break;
408    
409            case YIELDPOINT_OSR_opcode:
410              // must yield, does not check threadSwitch request
411              expandUnconditionalYieldpoint(p, ir, Entrypoints.optThreadSwitchFromOsrOptMethod);
412              break;
413    
414          }
415        }
416        return 0;
417      }
418    
419      /**
420       * expand an FCLEAR pseudo-insruction using FFREEs.
421       *
422       * @param s the instruction to expand
423       * @param ir the containing IR
424       */
425      private static void expandFClear(Instruction s, IR ir) {
426        int nSave = MIR_UnaryNoRes.getVal(s).asIntConstant().value;
427        int fpStackHeight = ir.MIRInfo.fpStackHeight;
428        PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
429    
430        for (int i = nSave; i < fpStackHeight; i++) {
431          Register f = phys.getFPR(i);
432          s.insertBefore(MIR_Nullary.create(IA32_FFREE, D(f)));
433        }
434    
435        // Remove the FCLEAR.
436        s.remove();
437      }
438    
439      /**
440       * expand an FMOV pseudo-insruction.
441       *
442       * @param s the instruction to expand
443       * @param phys controlling physical register set
444       */
445      private static void expandFmov(Instruction s, PhysicalRegisterSet phys) {
446        Operand result = MIR_Move.getResult(s);
447        Operand value = MIR_Move.getValue(s);
448    
449        if (result.isRegister() && value.isRegister()) {
450          if (result.similar(value)) {
451            // eliminate useless move
452            s.remove();
453          } else {
454            int i = PhysicalRegisterSet.getFPRIndex(result.asRegister().getRegister());
455            int j = PhysicalRegisterSet.getFPRIndex(value.asRegister().getRegister());
456            if (j == 0) {
457              // We have FMOV Fi, F0
458              // Expand as:
459              //        FST F(i)  (copy F0 to F(i))
460              MIR_Move.mutate(s, IA32_FST, D(phys.getFPR(i)), D(phys.getFPR(0)));
461            } else {
462              // We have FMOV Fi, Fj
463              // Expand as:
464              //        FLD Fj  (push Fj on FP stack).
465              //        FSTP F(i+1)  (copy F0 to F(i+1) and then pop register stack)
466              s.insertBefore(MIR_Move.create(IA32_FLD, D(phys.getFPR(0)), value));
467    
468              MIR_Move.mutate(s, IA32_FSTP, D(phys.getFPR(i + 1)), D(phys.getFPR(0)));
469            }
470    
471          }
472        } else if (value instanceof MemoryOperand) {
473          if (result instanceof MemoryOperand) {
474            // We have FMOV M1, M2
475            // Expand as:
476            //        FLD M1   (push M1 on FP stack).
477            //        FSTP M2  (copy F0 to M2 and pop register stack)
478            s.insertBefore(MIR_Move.create(IA32_FLD, D(phys.getFPR(0)), value));
479            MIR_Move.mutate(s, IA32_FSTP, result, D(phys.getFPR(0)));
480          } else {
481            // We have FMOV Fi, M
482            // Expand as:
483            //        FLD M    (push M on FP stack).
484            //        FSTP F(i+1)  (copy F0 to F(i+1) and pop register stack)
485            if (VM.VerifyAssertions) VM._assert(result.isRegister());
486            int i = PhysicalRegisterSet.getFPRIndex(result.asRegister().getRegister());
487            s.insertBefore(MIR_Move.create(IA32_FLD, D(phys.getFPR(0)), value));
488            MIR_Move.mutate(s, IA32_FSTP, D(phys.getFPR(i + 1)), D(phys.getFPR(0)));
489          }
490        } else {
491          // We have FMOV M, Fi
492          if (VM.VerifyAssertions) VM._assert(value.isRegister());
493          if (VM.VerifyAssertions) {
494            VM._assert(result instanceof MemoryOperand);
495          }
496          int i = PhysicalRegisterSet.getFPRIndex(value.asRegister().getRegister());
497          if (i != 0) {
498            // Expand as:
499            //        FLD Fi    (push Fi on FP stack).
500            //        FSTP M    (store F0 in M and pop register stack);
501            s.insertBefore(MIR_Move.create(IA32_FLD, D(phys.getFPR(0)), value));
502            MIR_Move.mutate(s, IA32_FSTP, result, D(phys.getFPR(0)));
503          } else {
504            // Expand as:
505            //        FST M    (store F0 in M);
506            MIR_Move.mutate(s, IA32_FST, result, value);
507          }
508        }
509      }
510    
511      private static void expandYieldpoint(Instruction s, IR ir, RVMMethod meth, IA32ConditionOperand ypCond) {
512        // split the basic block after the yieldpoint, create a new
513        // block at the end of the IR to hold the yieldpoint,
514        // remove the yieldpoint (to prepare to out it in the new block at the end)
515        BasicBlock thisBlock = s.getBasicBlock();
516        BasicBlock nextBlock = thisBlock.splitNodeWithLinksAt(s, ir);
517        BasicBlock yieldpoint = thisBlock.createSubBlock(s.bcIndex, ir, 0);
518        thisBlock.insertOut(yieldpoint);
519        yieldpoint.insertOut(nextBlock);
520        ir.cfg.addLastInCodeOrder(yieldpoint);
521        s.remove();
522    
523        // change thread switch instruction into call to thread switch routine
524        // NOTE: must make s the call instruction: it is the GC point!
525        //       must also inform the GCMap that s has been moved!!!
526        Offset offset = meth.getOffset();
527        LocationOperand loc = new LocationOperand(offset);
528        Operand guard = TG();
529        Operand target = MemoryOperand.D(Magic.getTocPointer().plus(offset), (byte) 4, loc, guard);
530        MIR_Call.mutate0(s, CALL_SAVE_VOLATILE, null, null, target, MethodOperand.STATIC(meth));
531        yieldpoint.appendInstruction(s);
532        ir.MIRInfo.gcIRMap.moveToEnd(s);
533    
534        yieldpoint.appendInstruction(MIR_Branch.create(IA32_JMP, nextBlock.makeJumpTarget()));
535    
536        // Check to see if threadSwitch requested
537        Offset tsr = Entrypoints.takeYieldpointField.getOffset();
538        MemoryOperand M =
539            MemoryOperand.BD(ir.regpool.makeTROp(), tsr, (byte) 4, null, null);
540        thisBlock.appendInstruction(MIR_Compare.create(IA32_CMP, M, IC(0)));
541        thisBlock.appendInstruction(MIR_CondBranch.create(IA32_JCC,
542                                                          ypCond,
543                                                          yieldpoint.makeJumpTarget(),
544                                                          BranchProfileOperand.never()));
545      }
546    
547      /* generate yieldpoint without checking threadSwith request
548       */
549      private static void expandUnconditionalYieldpoint(Instruction s, IR ir, RVMMethod meth) {
550        // split the basic block after the yieldpoint, create a new
551        // block at the end of the IR to hold the yieldpoint,
552        // remove the yieldpoint (to prepare to out it in the new block at the end)
553        BasicBlock thisBlock = s.getBasicBlock();
554        BasicBlock nextBlock = thisBlock.splitNodeWithLinksAt(s, ir);
555        BasicBlock yieldpoint = thisBlock.createSubBlock(s.bcIndex, ir);
556        thisBlock.insertOut(yieldpoint);
557        yieldpoint.insertOut(nextBlock);
558        ir.cfg.addLastInCodeOrder(yieldpoint);
559        s.remove();
560    
561        // change thread switch instruction into call to thread switch routine
562        // NOTE: must make s the call instruction: it is the GC point!
563        //       must also inform the GCMap that s has been moved!!!
564        Offset offset = meth.getOffset();
565        LocationOperand loc = new LocationOperand(offset);
566        Operand guard = TG();
567        Operand target = MemoryOperand.D(Magic.getTocPointer().plus(offset), (byte) 4, loc, guard);
568        MIR_Call.mutate0(s, CALL_SAVE_VOLATILE, null, null, target, MethodOperand.STATIC(meth));
569        yieldpoint.appendInstruction(s);
570        ir.MIRInfo.gcIRMap.moveToEnd(s);
571    
572        yieldpoint.appendInstruction(MIR_Branch.create(IA32_JMP, nextBlock.makeJumpTarget()));
573    
574        // make a jump to yield block
575        thisBlock.appendInstruction(MIR_Branch.create(IA32_JMP, yieldpoint.makeJumpTarget()));
576      }
577    }