1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Arm64 Code Generator XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
25 #include "gcinfoencoder.h"
28 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
29 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
33 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
34 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
37 //------------------------------------------------------------------------
38 // genInstrWithConstant: we will typically generate one instruction
40 // ins reg1, reg2, imm
42 // However the imm might not fit as a directly encodable immediate,
43 // when it doesn't fit we generate extra instruction(s) that sets up
44 // the 'regTmp' with the proper immediate value.
47 // ins reg1, reg2, regTmp
51 // attr - operation size and GC attribute
52 // reg1, reg2 - first and second register operands
53 // imm - immediate value (third operand when it fits)
54 // tmpReg - temp register to use when the 'imm' doesn't fit
55 // inUnwindRegion - true if we are in a prolog/epilog region with unwind codes
58 // returns true if the immediate was too large and tmpReg was used and modified.
60 bool CodeGen::genInstrWithConstant(instruction ins,
66 bool inUnwindRegion /* = false */)
68 bool immFitsInIns = false;
69 emitAttr size = EA_SIZE(attr);
71 // reg1 is usually a dest register
72 // reg2 is always source register
73 assert(tmpReg != reg2); // regTmp can not match any source register
82 ins = (ins == INS_add) ? INS_sub : INS_add;
84 immFitsInIns = emitter::emitIns_valid_imm_for_add(imm, size);
90 // reg1 is a source register for store instructions
91 assert(tmpReg != reg1); // regTmp can not match any source register
92 immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
101 immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
105 assert(!"Unexpected instruction in genInstrWithConstant");
111 // generate a single instruction that encodes the immediate directly
112 getEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm);
116 // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit
117 assert(tmpReg != REG_NA);
119 // generate two or more instructions
121 // first we load the immediate into tmpReg
122 instGen_Set_Reg_To_Imm(size, tmpReg, imm);
123 regTracker.rsTrackRegTrash(tmpReg);
125 // when we are in an unwind code region
126 // we record the extra instructions using unwindPadding()
129 compiler->unwindPadding();
132 // generate the instruction using a three register encoding with the immediate in tmpReg
133 getEmitter()->emitIns_R_R_R(ins, attr, reg1, reg2, tmpReg);
138 //------------------------------------------------------------------------
139 // genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
140 // or the epilog. The unwind codes for the generated instructions are produced. An available temporary
141 // register is required to be specified, in case the constant is too large to encode in an "add"
142 // instruction (or "sub" instruction if we choose to use one), such that we need to load the constant
143 // into a register first, before using it.
146 // spDelta - the value to add to SP (can be negative)
147 // tmpReg - an available temporary register
148 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
149 // Otherwise, we don't touch it.
154 void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
156 // Even though INS_add is specified here, the encoder will choose either
157 // an INS_add or an INS_sub and encode the immediate as a positive value
159 if (genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true))
161 if (pTmpRegIsZero != nullptr)
163 *pTmpRegIsZero = false;
167 // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive
169 ssize_t spDeltaAbs = abs(spDelta);
170 unsigned unwindSpDelta = (unsigned)spDeltaAbs;
171 assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned
173 compiler->unwindAllocStack(unwindSpDelta);
176 //------------------------------------------------------------------------
177 // genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
178 // prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
179 // The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
183 // reg1 - First register of pair to save.
184 // reg2 - Second register of pair to save.
185 // spOffset - The offset from SP to store reg1 (must be positive or zero).
186 // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
188 // lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This
189 // allows us to emit the "save_next" unwind code.
190 // tmpReg - An available temporary register. Needed for the case of large frames.
191 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
192 // Otherwise, we don't touch it.
197 void CodeGen::genPrologSaveRegPair(regNumber reg1,
201 bool lastSavedWasPreviousPair,
205 assert(spOffset >= 0);
206 assert(spDelta <= 0);
207 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
208 assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
211 bool needToSaveRegs = true;
214 if ((spOffset == 0) && (spDelta >= -512))
216 // We can use pre-indexed addressing.
217 // stp REG, REG + 1, [SP, #spDelta]!
218 // 64-bit STP offset range: -512 to 504, multiple of 8.
219 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX);
220 compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta);
222 needToSaveRegs = false;
224 else // (spDelta < -512))
226 // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the
229 // generate sub SP,SP,imm
230 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
236 // stp REG, REG + 1, [SP, #offset]
237 // 64-bit STP offset range: -512 to 504, multiple of 8.
238 assert(spOffset <= 504);
239 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
241 if (lastSavedWasPreviousPair)
243 // This works as long as we've only been saving pairs, in order, and we've saved the previous one just
245 compiler->unwindSaveNext();
249 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
254 //------------------------------------------------------------------------
255 // genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
256 // floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
257 // then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
258 // below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
259 // (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the
260 // STR instruction, and that spOffset will be in the legal range for that instruction.
263 // reg1 - Register to save.
264 // spOffset - The offset from SP to store reg1 (must be positive or zero).
265 // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
267 // tmpReg - An available temporary register. Needed for the case of large frames.
268 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
269 // Otherwise, we don't touch it.
274 void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
276 assert(spOffset >= 0);
277 assert(spDelta <= 0);
278 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
282 // generate sub SP,SP,imm
283 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
286 // str REG, [SP, #offset]
287 // 64-bit STR offset range: 0 to 32760, multiple of 8.
288 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
289 compiler->unwindSaveReg(reg1, spOffset);
292 //------------------------------------------------------------------------
293 // genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
294 // The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
295 // The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that
299 // reg1 - First register of pair to restore.
300 // reg2 - Second register of pair to restore.
301 // spOffset - The offset from SP to load reg1 (must be positive or zero).
302 // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
304 // tmpReg - An available temporary register. Needed for the case of large frames.
305 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
306 // Otherwise, we don't touch it.
311 void CodeGen::genEpilogRestoreRegPair(
312 regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
314 assert(spOffset >= 0);
315 assert(spDelta >= 0);
316 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
320 if ((spOffset == 0) && (spDelta <= 504))
322 // Fold the SP change into this instruction.
323 // ldp reg1, reg2, [SP], #spDelta
324 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX);
325 compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta);
327 else // (spDelta > 504))
329 // Can't fold in the SP change; need to use a separate ADD instruction.
331 // ldp reg1, reg2, [SP, #offset]
332 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
333 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
335 // generate add SP,SP,imm
336 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
341 // ldp reg1, reg2, [SP, #offset]
342 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
343 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
347 //------------------------------------------------------------------------
348 // genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
351 // reg1 - Register to restore.
352 // spOffset - The offset from SP to restore reg1 (must be positive or zero).
353 // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
355 // tmpReg - An available temporary register. Needed for the case of large frames.
356 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
357 // Otherwise, we don't touch it.
362 void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
364 assert(spOffset >= 0);
365 assert(spDelta >= 0);
366 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
368 // ldr reg1, [SP, #offset]
369 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
370 compiler->unwindSaveReg(reg1, spOffset);
374 // generate add SP,SP,imm
375 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
379 //------------------------------------------------------------------------
380 // genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
381 // in the function or funclet prolog. The save set does not contain FP, since that is
382 // guaranteed to be saved separately, so we can set up chaining. We can only use the instructions
383 // that are allowed by the unwind codes. Integer registers are stored at lower addresses,
384 // FP/SIMD registers are stored at higher addresses. There are no gaps. The caller ensures that
385 // there is enough space on the frame to store these registers, and that the store instructions
386 // we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to
387 // use. Note that the save set can contain LR if this is a frame without a frame pointer, in
388 // which case LR is saved along with the other callee-saved registers. The caller can tell us
389 // to fold in a stack pointer adjustment, which we will do with the first instruction. Note that
390 // the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
391 // stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
392 // registers, though, we will have an empty aligment slot somewhere. It turns out we will put
393 // it below (at a lower address) the callee-saved registers, as that is currently how we
394 // do frame layout. This means that the first stack offset will be 8 and the stack pointer
395 // adjustment must be done by a SUB, and not folded in to a pre-indexed store.
398 // regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing.
399 // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
400 // if non-zero spDelta, then this is the offset of the first save *after* that
402 // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
408 void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
410 assert(spDelta <= 0);
411 unsigned regsToSaveCount = genCountBits(regsToSaveMask);
412 if (regsToSaveCount == 0)
416 // Currently this is the case for varargs only
417 // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
418 genStackPointerAdjustment(spDelta, REG_NA, nullptr);
423 assert((spDelta % 16) == 0);
424 assert((regsToSaveMask & RBM_FP) == 0); // we never save FP here
425 assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in
428 regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
429 regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
431 int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP.
433 unsigned intRegsToSaveCount = genCountBits(maskSaveRegsInt);
434 unsigned floatRegsToSaveCount = genCountBits(maskSaveRegsFloat);
435 bool isPairSave = false;
437 bool isRegsToSaveCountOdd = ((intRegsToSaveCount + floatRegsToSaveCount) % 2 != 0);
440 // Save the integer registers
442 bool lastSavedWasPair = false;
444 while (maskSaveRegsInt != RBM_NONE)
446 // If this is the first store that needs to change SP (spDelta != 0),
447 // then the offset must be 8 to account for alignment for the odd count
448 // or it must be 0 for the even count.
449 assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) ||
450 (!isRegsToSaveCountOdd && spOffset == 0));
452 isPairSave = (intRegsToSaveCount >= 2);
453 regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsInt);
454 regNumber reg1 = genRegNumFromMask(reg1Mask);
455 maskSaveRegsInt &= ~reg1Mask;
456 intRegsToSaveCount -= 1;
460 // We can use a STP instruction.
462 regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsInt);
463 regNumber reg2 = genRegNumFromMask(reg2Mask);
464 assert((reg2 == REG_NEXT(reg1)) || (reg2 == REG_LR));
465 maskSaveRegsInt &= ~reg2Mask;
466 intRegsToSaveCount -= 1;
468 genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
470 // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
471 // this epilog, to get the codes to match. Turn this off until that is better understood.
472 // lastSavedWasPair = true;
474 spOffset += 2 * REGSIZE_BYTES;
478 // No register pair; we use a STR instruction.
480 genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
482 lastSavedWasPair = false;
483 spOffset += REGSIZE_BYTES;
486 spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
489 assert(intRegsToSaveCount == 0);
491 // Save the floating-point/SIMD registers
493 lastSavedWasPair = false;
495 while (maskSaveRegsFloat != RBM_NONE)
497 // If this is the first store that needs to change SP (spDelta != 0),
498 // then the offset must be 8 to account for alignment for the odd count
499 // or it must be 0 for the even count.
500 assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) ||
501 (!isRegsToSaveCountOdd && spOffset == 0));
503 isPairSave = (floatRegsToSaveCount >= 2);
504 regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsFloat);
505 regNumber reg1 = genRegNumFromMask(reg1Mask);
506 maskSaveRegsFloat &= ~reg1Mask;
507 floatRegsToSaveCount -= 1;
511 // We can use a STP instruction.
513 regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsFloat);
514 regNumber reg2 = genRegNumFromMask(reg2Mask);
515 assert(reg2 == REG_NEXT(reg1));
516 maskSaveRegsFloat &= ~reg2Mask;
517 floatRegsToSaveCount -= 1;
519 genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
521 // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
522 // this epilog, to get the codes to match. Turn this off until that is better understood.
523 // lastSavedWasPair = true;
525 spOffset += 2 * FPSAVE_REGSIZE_BYTES;
529 // No register pair; we use a STR instruction.
531 genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
533 lastSavedWasPair = false;
534 spOffset += FPSAVE_REGSIZE_BYTES;
537 spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
540 assert(floatRegsToSaveCount == 0);
543 //------------------------------------------------------------------------
544 // genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
545 // in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
548 // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing.
549 // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
550 // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
553 // Here's an example restore sequence:
554 // ldp x27, x28, [sp,#96]
555 // ldp x25, x26, [sp,#80]
556 // ldp x23, x24, [sp,#64]
557 // ldp x21, x22, [sp,#48]
558 // ldp x19, x20, [sp,#32]
560 // For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and
561 // the last restore adjusts SP by the specified amount. For example:
562 // ldp x27, x28, [sp,#64]
563 // ldp x25, x26, [sp,#48]
564 // ldp x23, x24, [sp,#32]
565 // ldp x21, x22, [sp,#16]
566 // ldp x19, x20, [sp], #80
568 // Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
569 // generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
574 void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
576 assert(spDelta >= 0);
577 unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
578 if (regsToRestoreCount == 0)
582 // Currently this is the case for varargs only
583 // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
584 genStackPointerAdjustment(spDelta, REG_NA, nullptr);
589 assert((spDelta % 16) == 0);
590 assert((regsToRestoreMask & RBM_FP) == 0); // we never restore FP here
591 assert(regsToRestoreCount <=
592 genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED.
594 regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
595 regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
597 assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
598 int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We
599 // predecrement to find the offset to
602 unsigned floatRegsToRestoreCount = genCountBits(maskRestoreRegsFloat);
603 unsigned intRegsToRestoreCount = genCountBits(maskRestoreRegsInt);
605 bool isPairRestore = false;
606 bool thisIsTheLastRestoreInstruction = false;
608 bool isRegsToRestoreCountOdd = ((floatRegsToRestoreCount + intRegsToRestoreCount) % 2 != 0);
611 // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of
612 // callee-saved registers properly.
614 // Restore the floating-point/SIMD registers
616 while (maskRestoreRegsFloat != RBM_NONE)
618 thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount <= 2) && (maskRestoreRegsInt == RBM_NONE);
619 isPairRestore = (floatRegsToRestoreCount % 2) == 0;
621 // Update stack delta only if it is the last restore (the first save).
622 if (thisIsTheLastRestoreInstruction)
624 assert(stackDelta == 0);
625 stackDelta = spDelta;
628 // Update stack offset.
631 spOffset -= 2 * FPSAVE_REGSIZE_BYTES;
635 spOffset -= FPSAVE_REGSIZE_BYTES;
638 // If this is the last restore (the first save) that needs to change SP (stackDelta != 0),
639 // then the offset must be 8 to account for alignment for the odd count
640 // or it must be 0 for the even count.
641 assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == FPSAVE_REGSIZE_BYTES) ||
642 (!isRegsToRestoreCountOdd && spOffset == 0));
644 regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsFloat);
645 regNumber reg2 = genRegNumFromMask(reg2Mask);
646 maskRestoreRegsFloat &= ~reg2Mask;
647 floatRegsToRestoreCount -= 1;
651 regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat);
652 regNumber reg1 = genRegNumFromMask(reg1Mask);
653 maskRestoreRegsFloat &= ~reg1Mask;
654 floatRegsToRestoreCount -= 1;
656 genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP1, nullptr);
660 genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP1, nullptr);
664 assert(floatRegsToRestoreCount == 0);
666 // Restore the integer registers
668 while (maskRestoreRegsInt != RBM_NONE)
670 thisIsTheLastRestoreInstruction = (intRegsToRestoreCount <= 2);
671 isPairRestore = (intRegsToRestoreCount % 2) == 0;
673 // Update stack delta only if it is the last restore (the first save).
674 if (thisIsTheLastRestoreInstruction)
676 assert(stackDelta == 0);
677 stackDelta = spDelta;
680 // Update stack offset.
681 spOffset -= REGSIZE_BYTES;
684 spOffset -= REGSIZE_BYTES;
687 // If this is the last restore (the first save) that needs to change SP (stackDelta != 0),
688 // then the offset must be 8 to account for alignment for the odd count
689 // or it must be 0 for the even count.
690 assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == REGSIZE_BYTES) ||
691 (!isRegsToRestoreCountOdd && spOffset == 0));
693 regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsInt);
694 regNumber reg2 = genRegNumFromMask(reg2Mask);
695 maskRestoreRegsInt &= ~reg2Mask;
696 intRegsToRestoreCount -= 1;
700 regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt);
701 regNumber reg1 = genRegNumFromMask(reg1Mask);
702 maskRestoreRegsInt &= ~reg1Mask;
703 intRegsToRestoreCount -= 1;
705 genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP1, nullptr);
709 genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP1, nullptr);
713 assert(intRegsToRestoreCount == 0);
717 /*****************************************************************************
719 * Generates code for an EH funclet prolog.
721 * Funclets have the following incoming arguments:
723 * catch: x0 = the exception object that was caught (see GT_CATCH_ARG)
724 * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function
725 * finally/fault: none
727 * Funclets set the following registers on exit:
729 * catch: x0 = the address at which execution should resume (see BBJ_EHCATCHRET)
730 * filter: x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
731 * finally/fault: none
733 * The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size,
734 * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
737 * For #outsz == 0 and #framesz <= 512:
738 * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR
739 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
741 * The funclet frame is thus:
744 * |-----------------------|
747 * +=======================+ <---- Caller's SP
748 * |Callee saved registers | // multiple of 8 bytes
749 * |-----------------------|
750 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
751 * |-----------------------|
752 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
753 * |-----------------------|
754 * | Saved FP, LR | // 16 bytes
755 * |-----------------------| <---- Ambient SP
762 * For #outsz != 0 and #framesz <= 512:
763 * sub sp,sp,#framesz ; establish the frame
764 * stp fp,lr,[sp,#outsz] ; save FP/LR.
765 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
767 * The funclet frame is thus:
770 * |-----------------------|
773 * +=======================+ <---- Caller's SP
774 * |Callee saved registers | // multiple of 8 bytes
775 * |-----------------------|
776 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
777 * |-----------------------|
778 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
779 * |-----------------------|
780 * | Saved FP, LR | // 16 bytes
781 * |-----------------------|
782 * | Outgoing arg space | // multiple of 8 bytes
783 * |-----------------------| <---- Ambient SP
790 * For #framesz > 512:
791 * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168
792 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
793 * sub sp,sp,#outsz ; create space for outgoing argument space
795 * The funclet frame is thus:
798 * |-----------------------|
801 * +=======================+ <---- Caller's SP
802 * |Callee saved registers | // multiple of 8 bytes
803 * |-----------------------|
804 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
805 * |-----------------------|
806 * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
807 * |-----------------------|
808 * | Saved FP, LR | // 16 bytes
809 * |-----------------------|
810 * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
811 * |-----------------------|
812 * | Outgoing arg space | // multiple of 8 bytes
813 * |-----------------------| <---- Ambient SP
819 * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
820 * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
821 * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes:
822 * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a
823 * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
824 * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call).
825 * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym,
826 * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the
827 * outgoing argument space. Both changes to SP might need to add alignment padding.
829 * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
830 * as in the main function.
832 * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
833 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
835 * if (this is a filter funclet)
837 * // x1 on entry to a filter funclet is CallerSP of the containing function:
838 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
839 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
840 * // a funclet. Consider:
844 * // throw new Exception();
845 * // } catch(Exception) {
846 * // throw new Exception(); // The exception thrown here ...
848 * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack
849 * // } filter-handler {
852 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
853 * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
854 * // create a main function PSP for any function with a filter.
856 * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
857 * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP
858 * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer
862 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
863 * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction.
865 * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch.
866 * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP
869 * An example epilog sequence is then:
871 * add sp,sp,#outsz ; if any outgoing argument space
872 * ... ; restore callee-saved registers
873 * ldp x19,x20,[sp,#xxx]
874 * ldp fp,lr,[sp],#framesz
877 * The funclet frame is thus:
880 * |-----------------------|
883 * +=======================+ <---- Caller's SP
884 * |Callee saved registers | // multiple of 8 bytes
885 * |-----------------------|
886 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
887 * |-----------------------|
888 * | Saved FP, LR | // 16 bytes
889 * |-----------------------|
890 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
891 * |-----------------------|
892 * | Outgoing arg space | // multiple of 8 bytes
893 * |-----------------------| <---- Ambient SP
901 void CodeGen::genFuncletProlog(BasicBlock* block)
905 printf("*************** In genFuncletProlog()\n");
908 assert(block != NULL);
909 assert(block->bbFlags & BBF_FUNCLET_BEG);
911 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
913 gcInfo.gcResetForBB();
915 compiler->unwindBegProlog();
917 regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
918 regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
920 // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
921 assert((maskSaveRegsInt & RBM_LR) != 0);
922 assert((maskSaveRegsInt & RBM_FP) != 0);
924 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
926 regMaskTP maskArgRegsLiveIn;
929 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
931 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
933 maskArgRegsLiveIn = RBM_NONE;
937 maskArgRegsLiveIn = RBM_R0;
940 int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
942 if (genFuncletInfo.fiFrameType == 1)
944 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
946 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
948 assert(genFuncletInfo.fiSpDelta2 == 0);
949 assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
951 else if (genFuncletInfo.fiFrameType == 2)
953 // fiFrameType==2 constraints:
954 assert(genFuncletInfo.fiSpDelta1 < 0);
955 assert(genFuncletInfo.fiSpDelta1 >= -512);
957 // generate sub SP,SP,imm
958 genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
960 assert(genFuncletInfo.fiSpDelta2 == 0);
962 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
963 genFuncletInfo.fiSP_to_FPLR_save_delta);
964 compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
968 assert(genFuncletInfo.fiFrameType == 3);
969 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
971 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
973 lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet.
975 maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
977 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0);
979 if (genFuncletInfo.fiFrameType == 3)
981 // Note that genFuncletInfo.fiSpDelta2 is always a negative value
982 assert(genFuncletInfo.fiSpDelta2 < 0);
984 // generate sub SP,SP,imm
985 genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
988 // This is the end of the OS-reported prolog for purposes of unwinding
989 compiler->unwindEndProlog();
991 // If there is no PSPSym (CoreRT ABI), we are done.
992 if (compiler->lvaPSPSym == BAD_VAR_NUM)
999 // This is the first block of a filter
1000 // Note that register x1 = CallerSP of the containing function
1001 // X1 is overwritten by the first Load (new callerSP)
1002 // X2 is scratch when we have a large constant offset
1004 // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
1005 genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
1006 genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false);
1007 regTracker.rsTrackRegTrash(REG_R1);
1009 // Store the PSP value (aka CallerSP)
1010 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
1011 genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
1013 // re-establish the frame pointer
1014 genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
1017 else // This is a non-filter funclet
1019 // X3 is scratch, X2 can also become scratch
1021 // compute the CallerSP, given the frame pointer. x3 is scratch.
1022 genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
1024 regTracker.rsTrackRegTrash(REG_R3);
1026 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
1027 genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
1031 /*****************************************************************************
1033 * Generates code for an EH funclet epilog.
1036 void CodeGen::genFuncletEpilog()
1040 printf("*************** In genFuncletEpilog()\n");
1043 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
1045 bool unwindStarted = false;
1049 // We can delay this until we know we'll generate an unwindable instruction, if necessary.
1050 compiler->unwindBegEpilog();
1051 unwindStarted = true;
1054 regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
1055 regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
1057 // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
1058 assert((maskRestoreRegsInt & RBM_LR) != 0);
1059 assert((maskRestoreRegsInt & RBM_FP) != 0);
1061 maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
1063 int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
1065 if (genFuncletInfo.fiFrameType == 3)
1067 // Note that genFuncletInfo.fiSpDelta2 is always a negative value
1068 assert(genFuncletInfo.fiSpDelta2 < 0);
1070 // generate add SP,SP,imm
1071 genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
1073 lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2;
1076 regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
1077 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0);
1079 if (genFuncletInfo.fiFrameType == 1)
1081 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
1082 INS_OPTS_POST_INDEX);
1083 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
1085 assert(genFuncletInfo.fiSpDelta2 == 0);
1086 assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
1088 else if (genFuncletInfo.fiFrameType == 2)
1090 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
1091 genFuncletInfo.fiSP_to_FPLR_save_delta);
1092 compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
1094 // fiFrameType==2 constraints:
1095 assert(genFuncletInfo.fiSpDelta1 < 0);
1096 assert(genFuncletInfo.fiSpDelta1 >= -512);
1098 // generate add SP,SP,imm
1099 genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
1101 assert(genFuncletInfo.fiSpDelta2 == 0);
1105 assert(genFuncletInfo.fiFrameType == 3);
1107 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
1108 INS_OPTS_POST_INDEX);
1109 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
1112 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
1113 compiler->unwindReturn(REG_LR);
1115 compiler->unwindEndEpilog();
1118 /*****************************************************************************
1120 * Capture the information used to generate the funclet prologs and epilogs.
1121 * Note that all funclet prologs are identical, and all funclet epilogs are
1122 * identical (per type: filters are identical, and non-filters are identical).
1123 * Thus, we compute the data used for these just once.
1125 * See genFuncletProlog() for more information about the prolog/epilog sequences.
1128 void CodeGen::genCaptureFuncletPrologEpilogInfo()
1130 if (!compiler->ehAnyFunclets())
1133 assert(isFramePointerUsed());
1134 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
1137 genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
1139 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
1140 assert((rsMaskSaveRegs & RBM_LR) != 0);
1141 assert((rsMaskSaveRegs & RBM_FP) != 0);
1143 unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
1145 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
1146 unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize;
1147 if (compiler->info.compIsVarArgs)
1149 // For varargs we always save all of the integer register arguments
1150 // so that they are contiguous with the incoming stack arguments.
1151 saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES;
1153 unsigned saveRegsPlusPSPSizeAligned = (unsigned)roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
1155 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
1156 unsigned outgoingArgSpaceAligned = (unsigned)roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
1158 unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
1159 assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
1161 int SP_to_FPLR_save_delta;
1162 int SP_to_PSP_slot_delta;
1163 int CallerSP_to_PSP_slot_delta;
1165 if (maxFuncletFrameSizeAligned <= 512)
1167 unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
1168 unsigned funcletFrameSizeAligned = (unsigned)roundUp(funcletFrameSize, STACK_ALIGN);
1169 assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
1171 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
1172 assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
1174 SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
1175 SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
1176 CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
1178 if (compiler->lvaOutgoingArgSpaceSize == 0)
1180 genFuncletInfo.fiFrameType = 1;
1184 genFuncletInfo.fiFrameType = 2;
1186 genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
1187 genFuncletInfo.fiSpDelta2 = 0;
1189 assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned);
1193 unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
1194 assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
1196 SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
1197 SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
1198 CallerSP_to_PSP_slot_delta =
1199 -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
1201 genFuncletInfo.fiFrameType = 3;
1202 genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned;
1203 genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;
1205 assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
1208 /* Now save it for future use */
1210 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
1211 genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta;
1212 genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta;
1213 genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES;
1214 genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
1220 printf("Funclet prolog / epilog info\n");
1221 printf(" Save regs: ");
1222 dspRegMask(genFuncletInfo.fiSaveRegs);
1224 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
1225 printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta);
1226 printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta);
1227 printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta);
1228 printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta);
1229 printf(" Frame type: %d\n", genFuncletInfo.fiFrameType);
1230 printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
1231 printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2);
1233 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1235 if (CallerSP_to_PSP_slot_delta !=
1236 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
1238 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
1239 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
1244 assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0);
1245 assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0);
1246 assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0);
1247 assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0);
1249 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1251 assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta ==
1252 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
1259 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1260 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1262 XX End Prolog / Epilog XX
1264 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1265 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1268 BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
1270 // Generate a call to the finally, like this:
1271 // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used
1272 // bl finally-funclet
1273 // b finally-return // Only for non-retless finally calls
1274 // The 'b' can be a NOP if we're going to the next block.
1276 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1278 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0);
1282 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE);
1284 getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest);
1286 if (block->bbFlags & BBF_RETLESS_CALL)
1288 // We have a retless call, and the last instruction generated was a call.
1289 // If the next block is in a different EH region (or is the end of the code
1290 // block), then we need to generate a breakpoint here (since it will never
1291 // get executed) to get proper unwind behavior.
1293 if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
1295 instGen(INS_bkpt); // This should never get executed
1300 // Because of the way the flowgraph is connected, the liveness info for this one instruction
1301 // after the call is not (can not be) correct in cases where a variable has a last use in the
1302 // handler. So turn off GC reporting for this single instruction.
1303 getEmitter()->emitDisableGC();
1305 // Now go to where the finally funclet needs to return to.
1306 if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
1309 // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly
1310 // to the next instruction? This would depend on stack walking from within the finally
1311 // handler working without this instruction being in this special EH region.
1316 inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
1319 getEmitter()->emitEnableGC();
1322 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
1323 // jump target using bbJumpDest - that is already used to point
1324 // to the finally block. So just skip past the BBJ_ALWAYS unless the
1325 // block is RETLESS.
1326 if (!(block->bbFlags & BBF_RETLESS_CALL))
1328 assert(block->isBBCallAlwaysPair());
1329 block = block->bbNext;
1334 void CodeGen::genEHCatchRet(BasicBlock* block)
1336 // For long address (default): `adrp + add` will be emitted.
1337 // For short address (proven later): `adr` will be emitted.
1338 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
1341 // move an immediate value into an integer register
1343 void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
1345 // reg cannot be a FP register
1346 assert(!genIsValidFloatReg(reg));
1347 if (!compiler->opts.compReloc)
1349 size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
1352 if (EA_IS_RELOC(size))
1354 // This emits a pair of adrp/add (two instructions) with fix-ups.
1355 getEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm);
1359 instGen_Set_Reg_To_Zero(size, reg, flags);
1363 if (emitter::emitIns_valid_imm_for_mov(imm, size))
1365 getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
1369 // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword
1370 // There are three forms
1371 // movk which loads into any halfword preserving the remaining halfwords
1372 // movz which loads into any halfword zeroing the remaining halfwords
1373 // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting the register
1374 // In some cases it is preferable to use movn, because it has the side effect of filling the other halfwords
1377 // Determine whether movn or movz will require the fewest instructions to populate the immediate
1380 for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16)
1382 if (uint16_t(imm >> i) == 0xffff)
1383 ++preferMovn; // a single movk 0xffff could be skipped if movn was used
1384 else if (uint16_t(imm >> i) == 0x0000)
1385 --preferMovn; // a single movk 0 could be skipped if movz was used
1388 // Select the first instruction. Any additional instruction will use movk
1389 instruction ins = (preferMovn > 0) ? INS_movn : INS_movz;
1391 // Initial movz or movn will fill the remaining bytes with the skipVal
1392 // This can allow skipping filling a halfword
1393 uint16_t skipVal = (preferMovn > 0) ? 0xffff : 0;
1395 unsigned bits = (size == EA_8BYTE) ? 64 : 32;
1397 // Iterate over imm examining 16 bits at a time
1398 for (unsigned i = 0; i < bits; i += 16)
1400 uint16_t imm16 = uint16_t(imm >> i);
1402 if (imm16 != skipVal)
1404 if (ins == INS_movn)
1406 // For the movn case, we need to bitwise invert the immediate. This is because
1407 // (movn x0, ~imm16) === (movz x0, imm16; or x0, x0, #0xffff`ffff`ffff`0000)
1411 getEmitter()->emitIns_R_I_I(ins, size, reg, imm16, i, INS_OPTS_LSL);
1413 // Once the initial movz/movn is emitted the remaining instructions will all use movk
1418 // We must emit a movn or movz or we have not done anything
1419 // The cases which hit this assert should be (emitIns_valid_imm_for_mov() == true) and
1420 // should not be in this else condition
1421 assert(ins == INS_movk);
1423 // The caller may have requested that the flags be set on this mov (rarely/never)
1424 if (flags == INS_FLAGS_SET)
1426 getEmitter()->emitIns_R_I(INS_tst, size, reg, 0);
1430 regTracker.rsTrackRegIntCns(reg, imm);
1433 /***********************************************************************************
1435 * Generate code to set a register 'targetReg' of type 'targetType' to the constant
1436 * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
1437 * genProduceReg() on the target register.
1439 void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
1441 switch (tree->gtOper)
1445 // relocatable values tend to come down as a CNS_INT of native int type
1446 // so the line between these two opcodes is kind of blurry
1447 GenTreeIntConCommon* con = tree->AsIntConCommon();
1448 ssize_t cnsVal = con->IconValue();
1450 if (con->ImmedValNeedsReloc(compiler))
1452 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
1453 regTracker.rsTrackRegTrash(targetReg);
1457 genSetRegToIcon(targetReg, cnsVal, targetType);
1464 emitter* emit = getEmitter();
1465 emitAttr size = emitActualTypeSize(tree);
1466 double constValue = tree->AsDblCon()->gtDconVal;
1468 // Make sure we use "movi reg, 0x00" only for positive zero (0.0) and not for negative zero (-0.0)
1469 if (*(__int64*)&constValue == 0)
1471 // A faster/smaller way to generate 0.0
1472 // We will just zero out the entire vector register for both float and double
1473 emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
1475 else if (emitter::emitIns_valid_imm_for_fmov(constValue))
1477 // We can load the FP constant using the fmov FP-immediate for this constValue
1478 emit->emitIns_R_F(INS_fmov, size, targetReg, constValue);
1482 // Get a temp integer register to compute long address.
1483 regNumber addrReg = tree->GetSingleTempReg();
1485 // We must load the FP constant from the constant pool
1486 // Emit a data section constant for the float or double constant.
1487 CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size);
1488 // For long address (default): `adrp + ldr + fmov` will be emitted.
1489 // For short address (proven later), `ldr` will be emitted.
1490 emit->emitIns_R_C(INS_ldr, size, targetReg, addrReg, hnd, 0);
1500 // Generate code to get the high N bits of a N*N=2N bit multiplication result
1501 void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
1503 assert(!treeNode->gtOverflowEx());
1505 genConsumeOperands(treeNode);
1507 regNumber targetReg = treeNode->gtRegNum;
1508 var_types targetType = treeNode->TypeGet();
1509 emitter* emit = getEmitter();
1510 emitAttr attr = emitActualTypeSize(treeNode);
1511 unsigned isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED);
1513 GenTree* op1 = treeNode->gtGetOp1();
1514 GenTree* op2 = treeNode->gtGetOp2();
1516 assert(!varTypeIsFloating(targetType));
1518 // The arithmetic node must be sitting in a register (since it's not contained)
1519 assert(targetReg != REG_NA);
1521 if (EA_SIZE(attr) == EA_8BYTE)
1523 instruction ins = isUnsigned ? INS_umulh : INS_smulh;
1525 regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2);
1527 assert(r == targetReg);
1531 assert(EA_SIZE(attr) == EA_4BYTE);
1533 instruction ins = isUnsigned ? INS_umull : INS_smull;
1535 regNumber r = emit->emitInsTernary(ins, EA_4BYTE, treeNode, op1, op2);
1537 emit->emitIns_R_R_I(isUnsigned ? INS_lsr : INS_asr, EA_8BYTE, targetReg, targetReg, 32);
1540 genProduceReg(treeNode);
1543 // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
1544 // This method is expected to have called genConsumeOperands() before calling it.
1545 void CodeGen::genCodeForBinary(GenTree* treeNode)
1547 const genTreeOps oper = treeNode->OperGet();
1548 regNumber targetReg = treeNode->gtRegNum;
1549 var_types targetType = treeNode->TypeGet();
1550 emitter* emit = getEmitter();
1552 assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND ||
1553 oper == GT_OR || oper == GT_XOR);
1555 GenTree* op1 = treeNode->gtGetOp1();
1556 GenTree* op2 = treeNode->gtGetOp2();
1557 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
1559 if ((treeNode->gtFlags & GTF_SET_FLAGS) != 0)
1573 noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set");
1577 // The arithmetic node must be sitting in a register (since it's not contained)
1578 assert(targetReg != REG_NA);
1580 regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
1581 assert(r == targetReg);
1583 genProduceReg(treeNode);
1586 //------------------------------------------------------------------------
1587 // genCodeForLclVar: Produce code for a GT_LCL_VAR node.
1590 // tree - the GT_LCL_VAR node
1592 void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
1594 var_types targetType = tree->TypeGet();
1595 emitter* emit = getEmitter();
1597 unsigned varNum = tree->gtLclNum;
1598 assert(varNum < compiler->lvaCount);
1599 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1600 bool isRegCandidate = varDsc->lvIsRegCandidate();
1602 // lcl_vars are not defs
1603 assert((tree->gtFlags & GTF_VAR_DEF) == 0);
1605 // If this is a register candidate that has been spilled, genConsumeReg() will
1606 // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
1608 if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED))
1610 // targetType must be a normal scalar type and not a TYP_STRUCT
1611 assert(targetType != TYP_STRUCT);
1613 instruction ins = ins_Load(targetType);
1614 emitAttr attr = emitTypeSize(targetType);
1616 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1618 emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0);
1619 genProduceReg(tree);
1623 //------------------------------------------------------------------------
1624 // genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node.
1627 // tree - the GT_STORE_LCL_FLD node
1629 void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
1631 var_types targetType = tree->TypeGet();
1632 regNumber targetReg = tree->gtRegNum;
1633 emitter* emit = getEmitter();
1634 noway_assert(targetType != TYP_STRUCT);
1637 // storing of TYP_SIMD12 (i.e. Vector3) field
1638 if (tree->TypeGet() == TYP_SIMD12)
1640 genStoreLclTypeSIMD12(tree);
1643 #endif // FEATURE_SIMD
1645 // record the offset
1646 unsigned offset = tree->gtLclOffs;
1648 // We must have a stack store with GT_STORE_LCL_FLD
1649 noway_assert(targetReg == REG_NA);
1651 unsigned varNum = tree->gtLclNum;
1652 assert(varNum < compiler->lvaCount);
1653 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1655 // Ensure that lclVar nodes are typed correctly.
1656 assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
1658 GenTree* data = tree->gtOp1;
1659 genConsumeRegs(data);
1661 regNumber dataReg = REG_NA;
1662 if (data->isContainedIntOrIImmed())
1664 assert(data->IsIntegralConst(0));
1669 assert(!data->isContained());
1670 dataReg = data->gtRegNum;
1672 assert(dataReg != REG_NA);
1674 instruction ins = ins_Store(targetType);
1676 emitAttr attr = emitTypeSize(targetType);
1678 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1680 emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
1682 genUpdateLife(tree);
1684 varDsc->lvRegNum = REG_STK;
1687 //------------------------------------------------------------------------
1688 // genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node.
1691 // tree - the GT_STORE_LCL_VAR node
1693 void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
1695 var_types targetType = tree->TypeGet();
1696 regNumber targetReg = tree->gtRegNum;
1697 emitter* emit = getEmitter();
1699 unsigned varNum = tree->gtLclNum;
1700 assert(varNum < compiler->lvaCount);
1701 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1703 // Ensure that lclVar nodes are typed correctly.
1704 assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
1706 GenTree* data = tree->gtOp1;
1708 // var = call, where call returns a multi-reg return value
1709 // case is handled separately.
1710 if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
1712 genMultiRegCallStoreToLocal(tree);
1717 // storing of TYP_SIMD12 (i.e. Vector3) field
1718 if (tree->TypeGet() == TYP_SIMD12)
1720 genStoreLclTypeSIMD12(tree);
1723 #endif // FEATURE_SIMD
1725 genConsumeRegs(data);
1727 regNumber dataReg = REG_NA;
1728 if (data->isContainedIntOrIImmed())
1730 // This is only possible for a zero-init.
1731 assert(data->IsIntegralConst(0));
1733 if (varTypeIsSIMD(targetType))
1735 assert(targetReg != REG_NA);
1736 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
1737 genProduceReg(tree);
1745 assert(!data->isContained());
1746 dataReg = data->gtRegNum;
1748 assert(dataReg != REG_NA);
1750 if (targetReg == REG_NA) // store into stack based LclVar
1752 inst_set_SV_var(tree);
1754 instruction ins = ins_Store(targetType);
1755 emitAttr attr = emitTypeSize(targetType);
1757 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1759 emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
1761 genUpdateLife(tree);
1763 varDsc->lvRegNum = REG_STK;
1765 else // store into register (i.e move into register)
1767 if (dataReg != targetReg)
1769 // Assign into targetReg when dataReg (from op1) is not the same register
1770 inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
1772 genProduceReg(tree);
1777 //------------------------------------------------------------------------
1778 // genSimpleReturn: Generates code for simple return statement for arm64.
1780 // Note: treeNode's and op1's registers are already consumed.
1783 // treeNode - The GT_RETURN or GT_RETFILT tree node with non-struct and non-void type
1788 void CodeGen::genSimpleReturn(GenTree* treeNode)
1790 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1791 GenTree* op1 = treeNode->gtGetOp1();
1792 var_types targetType = treeNode->TypeGet();
1794 assert(!isStructReturn(treeNode));
1795 assert(targetType != TYP_VOID);
1797 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
1799 bool movRequired = (op1->gtRegNum != retReg);
1803 if (op1->OperGet() == GT_LCL_VAR)
1805 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
1806 bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
1807 if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0))
1809 // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR
1811 unsigned lclNum = lcl->gtLclNum;
1812 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1813 var_types op1Type = genActualType(op1->TypeGet());
1814 var_types lclType = genActualType(varDsc->TypeGet());
1816 if (genTypeSize(op1Type) < genTypeSize(lclType))
1825 emitAttr attr = emitActualTypeSize(targetType);
1826 getEmitter()->emitIns_R_R(INS_mov, attr, retReg, op1->gtRegNum);
1830 /***********************************************************************************************
1831 * Generate code for localloc
1833 void CodeGen::genLclHeap(GenTree* tree)
1835 assert(tree->OperGet() == GT_LCLHEAP);
1837 GenTree* size = tree->gtOp.gtOp1;
1838 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
1840 regNumber targetReg = tree->gtRegNum;
1841 regNumber regCnt = REG_NA;
1842 regNumber pspSymReg = REG_NA;
1843 var_types type = genActualType(size->gtType);
1844 emitAttr easz = emitTypeSize(type);
1845 BasicBlock* endLabel = nullptr;
1846 BasicBlock* loop = nullptr;
1847 unsigned stackAdjustment = 0;
1851 if (compiler->opts.compStackCheckOnRet)
1853 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
1854 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
1855 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
1856 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
1858 BasicBlock* esp_check = genCreateTempLabel();
1859 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1860 inst_JMP(jmpEqual, esp_check);
1861 getEmitter()->emitIns(INS_bkpt);
1862 genDefineTempLabel(esp_check);
1866 noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
1867 noway_assert(genStackLevel == 0); // Can't have anything on the stack
1869 // Whether method has PSPSym.
1871 #if FEATURE_EH_FUNCLETS
1872 hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
1877 // compute the amount of memory to allocate to properly STACK_ALIGN.
1879 if (size->IsCnsIntOrI())
1881 // If size is a constant, then it must be contained.
1882 assert(size->isContained());
1884 // If amount is zero then return null in targetReg
1885 amount = size->gtIntCon.gtIconVal;
1888 instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
1892 // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN
1893 amount = AlignUp(amount, STACK_ALIGN);
1897 // If 0 bail out by returning null in targetReg
1898 genConsumeRegAndCopy(size, targetReg);
1899 endLabel = genCreateTempLabel();
1900 getEmitter()->emitIns_R_R(INS_tst, easz, targetReg, targetReg);
1901 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1902 inst_JMP(jmpEqual, endLabel);
1904 // Compute the size of the block to allocate and perform alignment.
1905 // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt,
1906 // since we don't need any internal registers.
1907 if (!hasPspSym && compiler->info.compInitMem)
1909 assert(tree->AvailableTempRegCount() == 0);
1914 regCnt = tree->ExtractTempReg();
1915 if (regCnt != targetReg)
1917 inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
1921 // Align to STACK_ALIGN
1922 // regCnt will be the total number of bytes to localloc
1923 inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
1924 inst_RV_IV(INS_and, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
1927 stackAdjustment = 0;
1928 #if FEATURE_EH_FUNCLETS
1929 // If we have PSPsym, then need to re-locate it after localloc.
1932 stackAdjustment += STACK_ALIGN;
1934 // Save a copy of PSPSym
1935 pspSymReg = tree->ExtractTempReg();
1936 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
1940 #if FEATURE_FIXED_OUT_ARGS
1941 // If we have an outgoing arg area then we must adjust the SP by popping off the
1942 // outgoing arg area. We will restore it right before we return from this method.
1944 // Localloc is supposed to return stack space that is STACK_ALIGN'ed. The following
1945 // are the cases that needs to be handled:
1946 // i) Method has PSPSym + out-going arg area.
1947 // It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs).
1948 // Therefore, we will pop-off RSP upto out-going arg area before locallocating.
1949 // We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area.
1950 // ii) Method has no PSPSym but out-going arg area.
1951 // Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed.
1952 // iii) Method has PSPSym but no out-going arg area.
1953 // Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded.
1954 // iv) Method has neither PSPSym nor out-going arg area.
1955 // Nothing needs to popped off from stack nor relocated.
1956 if (compiler->lvaOutgoingArgSpaceSize > 0)
1958 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
1960 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
1961 stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
1965 if (size->IsCnsIntOrI())
1967 // We should reach here only for non-zero, constant size allocations.
1970 // For small allocations we will generate up to four stp instructions
1971 size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT);
1972 if (cntStackAlignedWidthItems <= 4)
1974 while (cntStackAlignedWidthItems != 0)
1976 // We can use pre-indexed addressing.
1977 // stp ZR, ZR, [SP, #-16]!
1978 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
1979 cntStackAlignedWidthItems -= 1;
1984 else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <=
1986 // Since the size is a page or less, simply adjust the SP value
1987 // The SP might already be in the guard page, must touch it BEFORE
1988 // the alloc, not after.
1990 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SP, 0);
1992 inst_RV_IV(INS_sub, REG_SP, amount, EA_PTRSIZE);
1997 // else, "mov regCnt, amount"
1998 // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt.
1999 // Since size is a constant, regCnt is not yet initialized.
2000 assert(regCnt == REG_NA);
2001 if (!hasPspSym && compiler->info.compInitMem)
2003 assert(tree->AvailableTempRegCount() == 0);
2008 regCnt = tree->ExtractTempReg();
2010 genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
2013 if (compiler->info.compInitMem)
2015 BasicBlock* loop = genCreateTempLabel();
2017 // At this point 'regCnt' is set to the total number of bytes to locAlloc.
2018 // Since we have to zero out the allocated memory AND ensure that RSP is always valid
2019 // by tickling the pages, we will just push 0's on the stack.
2021 // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
2022 // and localloc size is a multiple of STACK_ALIGN.
2025 genDefineTempLabel(loop);
2027 // We can use pre-indexed addressing.
2028 // stp ZR, ZR, [SP, #-16]!
2029 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
2031 // If not done, loop
2032 // Note that regCnt is the number of bytes to stack allocate.
2033 // Therefore we need to subtract 16 from regcnt here.
2034 assert(genIsValidIntReg(regCnt));
2035 inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type));
2036 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
2037 inst_JMP(jmpNotEqual, loop);
2041 // At this point 'regCnt' is set to the total number of bytes to locAlloc.
2043 // We don't need to zero out the allocated memory. However, we do have
2044 // to tickle the pages to ensure that SP is always valid and is
2045 // in sync with the "stack guard page". Note that in the worst
2046 // case SP is on the last byte of the guard page. Thus you must
2047 // touch SP-0 first not SP-0x1000.
2049 // Another subtlety is that you don't want SP to be exactly on the
2050 // boundary of the guard page because PUSH is predecrement, thus
2051 // call setup would not touch the guard page but just beyond it
2053 // Note that we go through a few hoops so that SP never points to
2054 // illegal pages at any time during the tickling process
2056 // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
2057 // bvc Loop // result is smaller than orignial SP (no wrap around)
2058 // mov regCnt, #0 // Overflow, pick lowest possible value
2061 // ldr wzr, [SP + 0] // tickle the page - read from the page
2062 // sub regTmp, SP, PAGE_SIZE // decrement SP by eeGetPageSize()
2063 // cmp regTmp, regCnt
2073 regNumber regTmp = tree->GetSingleTempReg();
2075 BasicBlock* loop = genCreateTempLabel();
2076 BasicBlock* done = genCreateTempLabel();
2078 // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
2079 getEmitter()->emitIns_R_R_R(INS_subs, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
2081 inst_JMP(EJ_vc, loop); // branch if the V flag is not set
2083 // Overflow, set regCnt to lowest possible value
2084 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
2086 genDefineTempLabel(loop);
2088 // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
2089 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, 0);
2091 // decrement SP by eeGetPageSize()
2092 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, compiler->eeGetPageSize());
2094 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt);
2095 emitJumpKind jmpLTU = genJumpKindForOper(GT_LT, CK_UNSIGNED);
2096 inst_JMP(jmpLTU, done);
2098 // Update SP to be at the next page of stack that we will tickle
2099 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp);
2101 // Jump to loop and tickle new stack address
2102 inst_JMP(EJ_jmp, loop);
2104 // Done with stack tickle loop
2105 genDefineTempLabel(done);
2107 // Now just move the final value to SP
2108 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
2112 // Re-adjust SP to allocate PSPSym and out-going arg area
2113 if (stackAdjustment != 0)
2115 assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
2116 assert(stackAdjustment > 0);
2117 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int)stackAdjustment);
2119 #if FEATURE_EH_FUNCLETS
2120 // Write PSPSym to its new location.
2123 assert(genIsValidIntReg(pspSymReg));
2124 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
2127 // Return the stackalloc'ed address in result register.
2128 // TargetReg = RSP + stackAdjustment.
2130 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int)stackAdjustment);
2132 else // stackAdjustment == 0
2134 // Move the final value of SP to targetReg
2135 inst_RV_RV(INS_mov, targetReg, REG_SPBASE);
2139 if (endLabel != nullptr)
2140 genDefineTempLabel(endLabel);
2142 // Write the lvaLocAllocSPvar stack frame slot
2143 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
2145 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0);
2149 if (compiler->opts.compNeedStackProbes)
2151 genGenerateStackProbe();
2157 if (compiler->opts.compStackCheckOnRet)
2159 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
2160 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
2161 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
2162 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaReturnEspCheck, 0);
2166 genProduceReg(tree);
2169 //------------------------------------------------------------------------
2170 // genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node.
2175 void CodeGen::genCodeForNegNot(GenTree* tree)
2177 assert(tree->OperIs(GT_NEG, GT_NOT));
2179 var_types targetType = tree->TypeGet();
2181 assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType));
2183 regNumber targetReg = tree->gtRegNum;
2184 instruction ins = genGetInsForOper(tree->OperGet(), targetType);
2186 // The arithmetic node must be sitting in a register (since it's not contained)
2187 assert(!tree->isContained());
2188 // The dst can only be a register.
2189 assert(targetReg != REG_NA);
2191 GenTree* operand = tree->gtGetOp1();
2192 assert(!operand->isContained());
2193 // The src must be a register.
2194 regNumber operandReg = genConsumeReg(operand);
2196 getEmitter()->emitIns_R_R(ins, emitActualTypeSize(tree), targetReg, operandReg);
2198 genProduceReg(tree);
2201 //------------------------------------------------------------------------
2202 // genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD:
2203 // (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph;
2204 // (2) float/double MOD is morphed into a helper call by front-end.
2209 void CodeGen::genCodeForDivMod(GenTreeOp* tree)
2211 assert(tree->OperIs(GT_DIV, GT_UDIV));
2213 var_types targetType = tree->TypeGet();
2214 emitter* emit = getEmitter();
2216 genConsumeOperands(tree);
2218 if (varTypeIsFloating(targetType))
2220 // Floating point divide never raises an exception
2221 genCodeForBinary(tree);
2223 else // an integer divide operation
2225 GenTree* divisorOp = tree->gtGetOp2();
2226 emitAttr size = EA_ATTR(genTypeSize(genActualType(tree->TypeGet())));
2228 if (divisorOp->IsIntegralConst(0))
2230 // We unconditionally throw a divide by zero exception
2231 genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
2233 // We still need to call genProduceReg
2234 genProduceReg(tree);
2236 else // the divisor is not the constant zero
2238 regNumber divisorReg = divisorOp->gtRegNum;
2240 // Generate the require runtime checks for GT_DIV or GT_UDIV
2241 if (tree->gtOper == GT_DIV)
2243 BasicBlock* sdivLabel = genCreateTempLabel();
2245 // Two possible exceptions:
2246 // (AnyVal / 0) => DivideByZeroException
2247 // (MinInt / -1) => ArithmeticException
2249 bool checkDividend = true;
2251 // Do we have an immediate for the 'divisorOp'?
2253 if (divisorOp->IsCnsIntOrI())
2255 GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon();
2256 ssize_t intConstValue = intConstTree->IconValue();
2257 assert(intConstValue != 0); // already checked above by IsIntegralConst(0))
2258 if (intConstValue != -1)
2260 checkDividend = false; // We statically know that the dividend is not -1
2263 else // insert check for divison by zero
2265 // Check if the divisor is zero throw a DivideByZeroException
2266 emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
2267 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2268 genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
2273 // Check if the divisor is not -1 branch to 'sdivLabel'
2274 emit->emitIns_R_I(INS_cmp, size, divisorReg, -1);
2276 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
2277 inst_JMP(jmpNotEqual, sdivLabel);
2278 // If control flow continues past here the 'divisorReg' is known to be -1
2280 regNumber dividendReg = tree->gtGetOp1()->gtRegNum;
2281 // At this point the divisor is known to be -1
2283 // Issue the 'adds zr, dividendReg, dividendReg' instruction
2284 // this will set both the Z and V flags only when dividendReg is MinInt
2286 emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
2287 inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear
2288 genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw
2289 // ArithmeticException
2291 genDefineTempLabel(sdivLabel);
2293 genCodeForBinary(tree); // Generate the sdiv instruction
2295 else // (tree->gtOper == GT_UDIV)
2297 // Only one possible exception
2298 // (AnyVal / 0) => DivideByZeroException
2300 // Note that division by the constant 0 was already checked for above by the
2301 // op2->IsIntegralConst(0) check
2303 if (!divisorOp->IsCnsIntOrI())
2305 // divisorOp is not a constant, so it could be zero
2307 emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
2308 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2309 genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
2311 genCodeForBinary(tree);
2317 // Generate code for InitBlk by performing a loop unroll
2319 // a) Both the size and fill byte value are integer constants.
2320 // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
2321 void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
2323 // Make sure we got the arguments of the initblk/initobj operation in the right registers
2324 unsigned size = initBlkNode->Size();
2325 GenTree* dstAddr = initBlkNode->Addr();
2326 GenTree* initVal = initBlkNode->Data();
2327 if (initVal->OperIsInitVal())
2329 initVal = initVal->gtGetOp1();
2332 assert(dstAddr->isUsedFromReg());
2333 assert(initVal->isUsedFromReg() && !initVal->IsIntegralConst(0) || initVal->IsIntegralConst(0));
2335 assert(size <= INITBLK_UNROLL_LIMIT);
2337 emitter* emit = getEmitter();
2339 genConsumeOperands(initBlkNode);
2341 if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
2343 // issue a full memory barrier before a volatile initBlockUnroll operation
2344 instGen_MemoryBarrier();
2347 regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum;
2349 assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR));
2351 unsigned offset = 0;
2353 // Perform an unroll using stp.
2354 if (size >= 2 * REGSIZE_BYTES)
2356 // Determine how many 16 byte slots
2357 size_t slots = size / (2 * REGSIZE_BYTES);
2361 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, valReg, valReg, dstAddr->gtRegNum, offset);
2362 offset += (2 * REGSIZE_BYTES);
2366 // Fill the remainder (15 bytes or less) if there's any.
2367 if ((size & 0xf) != 0)
2369 if ((size & 8) != 0)
2371 emit->emitIns_R_R_I(INS_str, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
2374 if ((size & 4) != 0)
2376 emit->emitIns_R_R_I(INS_str, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2379 if ((size & 2) != 0)
2381 emit->emitIns_R_R_I(INS_strh, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
2384 if ((size & 1) != 0)
2386 emit->emitIns_R_R_I(INS_strb, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
2391 // Generate code for a load pair from some address + offset
2392 // base: tree node which can be either a local address or arbitrary node
2393 // offset: distance from the base from which to load
2394 void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset)
2396 emitter* emit = getEmitter();
2398 if (base->OperIsLocalAddr())
2400 if (base->gtOper == GT_LCL_FLD_ADDR)
2401 offset += base->gtLclFld.gtLclOffs;
2403 emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset);
2407 emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, dst, dst2, base->gtRegNum, offset);
2411 // Generate code for a store pair to some address + offset
2412 // base: tree node which can be either a local address or arbitrary node
2413 // offset: distance from the base from which to load
2414 void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset)
2416 emitter* emit = getEmitter();
2418 if (base->OperIsLocalAddr())
2420 if (base->gtOper == GT_LCL_FLD_ADDR)
2421 offset += base->gtLclFld.gtLclOffs;
2423 emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, src, src2, base->gtLclVarCommon.gtLclNum, offset);
2427 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, src, src2, base->gtRegNum, offset);
2431 // Generate code for CpObj nodes wich copy structs that have interleaved
2433 // For this case we'll generate a sequence of loads/stores in the case of struct
2434 // slots that don't contain GC pointers. The generated code will look like:
2435 // ldr tempReg, [R13, #8]
2436 // str tempReg, [R14, #8]
2438 // In the case of a GC-Pointer we'll call the ByRef write barrier helper
2439 // who happens to use the same registers as the previous call to maintain
2440 // the same register requirements and register killsets:
2441 // bl CORINFO_HELP_ASSIGN_BYREF
2443 // So finally an example would look like this:
2444 // ldr tempReg, [R13, #8]
2445 // str tempReg, [R14, #8]
2446 // bl CORINFO_HELP_ASSIGN_BYREF
2447 // ldr tempReg, [R13, #8]
2448 // str tempReg, [R14, #8]
2449 // bl CORINFO_HELP_ASSIGN_BYREF
2450 // ldr tempReg, [R13, #8]
2451 // str tempReg, [R14, #8]
2452 void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
2454 GenTree* dstAddr = cpObjNode->Addr();
2455 GenTree* source = cpObjNode->Data();
2456 var_types srcAddrType = TYP_BYREF;
2457 bool sourceIsLocal = false;
2459 assert(source->isContained());
2460 if (source->gtOper == GT_IND)
2462 GenTree* srcAddr = source->gtGetOp1();
2463 assert(!srcAddr->isContained());
2464 srcAddrType = srcAddr->TypeGet();
2468 noway_assert(source->IsLocal());
2469 sourceIsLocal = true;
2472 bool dstOnStack = dstAddr->OperIsLocalAddr();
2475 assert(!dstAddr->isContained());
2477 // This GenTree node has data about GC pointers, this means we're dealing
2479 assert(cpObjNode->gtGcPtrCount > 0);
2482 // Consume the operands and get them into the right registers.
2483 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
2484 genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA);
2485 gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
2486 gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
2488 unsigned slots = cpObjNode->gtSlots;
2490 // Temp register(s) used to perform the sequence of loads and stores.
2491 regNumber tmpReg = cpObjNode->ExtractTempReg();
2492 regNumber tmpReg2 = REG_NA;
2494 assert(genIsValidIntReg(tmpReg));
2495 assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF);
2496 assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF);
2500 tmpReg2 = cpObjNode->GetSingleTempReg();
2501 assert(tmpReg2 != tmpReg);
2502 assert(genIsValidIntReg(tmpReg2));
2503 assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
2504 assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF);
2507 if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
2509 // issue a full memory barrier before a volatile CpObj operation
2510 instGen_MemoryBarrier();
2513 emitter* emit = getEmitter();
2515 BYTE* gcPtrs = cpObjNode->gtGcPtrs;
2517 // If we can prove it's on the stack we don't need to use the write barrier.
2521 // Check if two or more remaining slots and use a ldp/stp sequence
2522 while (i < slots - 1)
2524 emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0]));
2525 emitAttr attr1 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 1]));
2527 emit->emitIns_R_R_R_I(INS_ldp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE,
2528 INS_OPTS_POST_INDEX, attr1);
2529 emit->emitIns_R_R_R_I(INS_stp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE,
2530 INS_OPTS_POST_INDEX, attr1);
2534 // Use a ldr/str sequence for the last remainder
2537 emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0]));
2539 emit->emitIns_R_R_I(INS_ldr, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
2540 INS_OPTS_POST_INDEX);
2541 emit->emitIns_R_R_I(INS_str, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
2542 INS_OPTS_POST_INDEX);
2547 unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
2555 // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
2556 if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE))
2558 emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
2559 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
2560 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
2561 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
2562 ++i; // extra increment of i, since we are copying two items
2566 emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
2567 INS_OPTS_POST_INDEX);
2568 emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
2569 INS_OPTS_POST_INDEX);
2574 // In the case of a GC-Pointer we'll call the ByRef write barrier helper
2575 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
2582 assert(gcPtrCount == 0);
2585 if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
2587 // issue a INS_BARRIER_ISHLD after a volatile CpObj operation
2588 instGen_MemoryBarrier(INS_BARRIER_ISHLD);
2591 // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
2592 // While we normally update GC info prior to the last instruction that uses them,
2593 // these actually live into the helper call.
2594 gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
2597 // generate code do a switch statement based on a table of ip-relative offsets
2598 void CodeGen::genTableBasedSwitch(GenTree* treeNode)
2600 genConsumeOperands(treeNode->AsOp());
2601 regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
2602 regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
2604 regNumber tmpReg = treeNode->GetSingleTempReg();
2606 // load the ip-relative offset (which is relative to start of fgFirstBB)
2607 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL);
2609 // add it to the absolute address of fgFirstBB
2610 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
2611 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
2612 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, baseReg, baseReg, tmpReg);
2615 getEmitter()->emitIns_R(INS_br, emitActualTypeSize(TYP_I_IMPL), baseReg);
2618 // emits the table and an instruction to get the address of the first element
2619 void CodeGen::genJumpTable(GenTree* treeNode)
2621 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
2622 assert(treeNode->OperGet() == GT_JMPTABLE);
2624 unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
2625 BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
2626 unsigned jmpTabOffs;
2627 unsigned jmpTabBase;
2629 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
2633 JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
2635 for (unsigned i = 0; i < jumpCount; i++)
2637 BasicBlock* target = *jumpTable++;
2638 noway_assert(target->bbFlags & BBF_JMP_TARGET);
2640 JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
2642 getEmitter()->emitDataGenData(i, target);
2645 getEmitter()->emitDataGenEnd();
2647 // Access to inline data is 'abstracted' by a special type of static member
2648 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
2649 // to constant data, not a real static field.
2650 getEmitter()->emitIns_R_C(INS_adr, emitActualTypeSize(TYP_I_IMPL), treeNode->gtRegNum, REG_NA,
2651 compiler->eeFindJitDataOffs(jmpTabBase), 0);
2652 genProduceReg(treeNode);
2655 // generate code for the locked operations:
2656 // GT_LOCKADD, GT_XCHG, GT_XADD
2657 void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
2659 GenTree* data = treeNode->gtOp.gtOp2;
2660 GenTree* addr = treeNode->gtOp.gtOp1;
2661 regNumber targetReg = treeNode->gtRegNum;
2662 regNumber dataReg = data->gtRegNum;
2663 regNumber addrReg = addr->gtRegNum;
2665 regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
2666 regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT);
2667 regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg;
2669 // Check allocator assumptions
2671 // The register allocator should have extended the lifetimes of all input and internal registers so that
2672 // none interfere with the target.
2673 noway_assert(addrReg != targetReg);
2675 noway_assert(addrReg != loadReg);
2676 noway_assert(dataReg != loadReg);
2678 noway_assert(addrReg != storeDataReg);
2679 noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg));
2681 assert(addr->isUsedFromReg());
2682 noway_assert(exResultReg != REG_NA);
2683 noway_assert(exResultReg != targetReg);
2684 noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG));
2686 // Store exclusive unpredictable cases must be avoided
2687 noway_assert(exResultReg != storeDataReg);
2688 noway_assert(exResultReg != addrReg);
2690 genConsumeAddress(addr);
2691 genConsumeRegs(data);
2693 // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers
2694 // die at the first instruction generated by the node. This is not the case for these atomics as the input
2695 // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until
2696 // we are finished generating the code for this node.
2698 gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());
2700 // TODO-ARM64-CQ Use ARMv8.1 atomics if available
2701 // https://github.com/dotnet/coreclr/issues/11881
2703 // Emit code like this:
2705 // ldxr loadReg, [addrReg]
2706 // add storeDataReg, loadReg, dataReg # Only for GT_XADD & GT_LOCKADD
2707 // # GT_XCHG storeDataReg === dataReg
2708 // stxr exResult, storeDataReg, [addrReg]
2709 // cbnz exResult, retry
2712 BasicBlock* labelRetry = genCreateTempLabel();
2713 genDefineTempLabel(labelRetry);
2715 emitAttr dataSize = emitActualTypeSize(data);
2717 // The following instruction includes a acquire half barrier
2718 getEmitter()->emitIns_R_R(INS_ldaxr, dataSize, loadReg, addrReg);
2720 switch (treeNode->OperGet())
2724 if (data->isContainedIntOrIImmed())
2726 // Even though INS_add is specified here, the encoder will choose either
2727 // an INS_add or an INS_sub and encode the immediate as a positive value
2728 genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(),
2733 getEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg);
2737 assert(!data->isContained());
2738 storeDataReg = dataReg;
2744 // The following instruction includes a release half barrier
2745 getEmitter()->emitIns_R_R_R(INS_stlxr, dataSize, exResultReg, storeDataReg, addrReg);
2747 getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
2749 instGen_MemoryBarrier(INS_BARRIER_ISH);
2751 gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
2753 if (treeNode->gtRegNum != REG_NA)
2755 genProduceReg(treeNode);
2759 //------------------------------------------------------------------------
2760 // genCodeForCmpXchg: Produce code for a GT_CMPXCHG node.
2763 // tree - the GT_CMPXCHG node
2765 void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
2767 assert(treeNode->OperIs(GT_CMPXCHG));
2769 GenTree* addr = treeNode->gtOpLocation; // arg1
2770 GenTree* data = treeNode->gtOpValue; // arg2
2771 GenTree* comparand = treeNode->gtOpComparand; // arg3
2773 regNumber targetReg = treeNode->gtRegNum;
2774 regNumber dataReg = data->gtRegNum;
2775 regNumber addrReg = addr->gtRegNum;
2776 regNumber comparandReg = comparand->gtRegNum;
2777 regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
2779 // Check allocator assumptions
2781 // The register allocator should have extended the lifetimes of all input and internal registers so that
2782 // none interfere with the target.
2783 noway_assert(addrReg != targetReg);
2784 noway_assert(dataReg != targetReg);
2785 noway_assert(comparandReg != targetReg);
2786 noway_assert(addrReg != dataReg);
2787 noway_assert(targetReg != REG_NA);
2788 noway_assert(exResultReg != REG_NA);
2789 noway_assert(exResultReg != targetReg);
2791 assert(addr->isUsedFromReg());
2792 assert(data->isUsedFromReg());
2793 assert(!comparand->isUsedFromMemory());
2795 // Store exclusive unpredictable cases must be avoided
2796 noway_assert(exResultReg != dataReg);
2797 noway_assert(exResultReg != addrReg);
2799 genConsumeAddress(addr);
2800 genConsumeRegs(data);
2801 genConsumeRegs(comparand);
2803 // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers
2804 // die at the first instruction generated by the node. This is not the case for these atomics as the input
2805 // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until
2806 // we are finished generating the code for this node.
2808 gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());
2810 // TODO-ARM64-CQ Use ARMv8.1 atomics if available
2811 // https://github.com/dotnet/coreclr/issues/11881
2813 // Emit code like this:
2815 // ldxr targetReg, [addrReg]
2816 // cmp targetReg, comparandReg
2818 // stxr exResult, dataReg, [addrReg]
2819 // cbnz exResult, retry
2823 BasicBlock* labelRetry = genCreateTempLabel();
2824 BasicBlock* labelCompareFail = genCreateTempLabel();
2825 genDefineTempLabel(labelRetry);
2827 // The following instruction includes a acquire half barrier
2828 getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), targetReg, addrReg);
2830 if (comparand->isContainedIntOrIImmed())
2832 if (comparand->IsIntegralConst(0))
2834 getEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg);
2838 getEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg,
2839 comparand->AsIntConCommon()->IconValue());
2840 getEmitter()->emitIns_J(INS_bne, labelCompareFail);
2845 getEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg);
2846 getEmitter()->emitIns_J(INS_bne, labelCompareFail);
2849 // The following instruction includes a release half barrier
2850 getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, dataReg, addrReg);
2852 getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
2854 genDefineTempLabel(labelCompareFail);
2856 instGen_MemoryBarrier(INS_BARRIER_ISH);
2858 gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
2860 genProduceReg(treeNode);
2863 instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
2865 instruction ins = INS_brk;
2867 if (varTypeIsFloating(type))
2888 NYI("Unhandled oper in genGetInsForOper() - float");
2941 NYI("Unhandled oper in genGetInsForOper() - integer");
2949 //------------------------------------------------------------------------
2950 // genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node.
2953 // tree - the GT_RETURNTRAP node
2955 void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
2957 assert(tree->OperGet() == GT_RETURNTRAP);
2959 // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
2960 // based on the contents of 'data'
2962 GenTree* data = tree->gtOp1;
2963 genConsumeRegs(data);
2964 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0);
2966 BasicBlock* skipLabel = genCreateTempLabel();
2968 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2969 inst_JMP(jmpEqual, skipLabel);
2970 // emit the call to the EE-helper that stops for GC (or other reasons)
2972 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
2973 genDefineTempLabel(skipLabel);
2976 //------------------------------------------------------------------------
2977 // genCodeForStoreInd: Produce code for a GT_STOREIND node.
2980 // tree - the GT_STOREIND node
2982 void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
2984 GenTree* data = tree->Data();
2985 GenTree* addr = tree->Addr();
2986 var_types targetType = tree->TypeGet();
2987 emitter* emit = getEmitter();
2988 emitAttr attr = emitTypeSize(tree);
2989 instruction ins = ins_Store(targetType);
2992 // Storing Vector3 of size 12 bytes through indirection
2993 if (tree->TypeGet() == TYP_SIMD12)
2995 genStoreIndTypeSIMD12(tree);
2998 #endif // FEATURE_SIMD
3000 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data);
3001 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
3003 // data and addr must be in registers.
3004 // Consume both registers so that any copies of interfering
3005 // registers are taken care of.
3006 genConsumeOperands(tree);
3008 // At this point, we should not have any interference.
3009 // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
3010 // as that is where 'addr' must go.
3011 noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF);
3013 // 'addr' goes into x14 (REG_WRITE_BARRIER_DST)
3014 genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST);
3016 // 'data' goes into x15 (REG_WRITE_BARRIER_SRC)
3017 genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC);
3019 genGCWriteBarrier(tree, writeBarrierForm);
3021 else // A normal store, not a WriteBarrier store
3023 bool dataIsUnary = false;
3024 GenTree* nonRMWsrc = nullptr;
3025 // We must consume the operands in the proper execution order,
3026 // so that liveness is updated appropriately.
3027 genConsumeAddress(addr);
3029 if (!data->isContained())
3031 genConsumeRegs(data);
3034 regNumber dataReg = REG_NA;
3035 if (data->isContainedIntOrIImmed())
3037 assert(data->IsIntegralConst(0));
3040 else // data is not contained, so evaluate it into a register
3042 assert(!data->isContained());
3043 dataReg = data->gtRegNum;
3046 assert((attr != EA_1BYTE) || !(tree->gtFlags & GTF_IND_UNALIGNED));
3048 if (tree->gtFlags & GTF_IND_VOLATILE)
3050 bool useStoreRelease =
3051 genIsValidIntReg(dataReg) && !addr->isContained() && !(tree->gtFlags & GTF_IND_UNALIGNED);
3053 if (useStoreRelease)
3055 switch (EA_SIZE(attr))
3058 assert(ins == INS_strb);
3062 assert(ins == INS_strh);
3067 assert(ins == INS_str);
3071 assert(false); // We should not get here
3076 // issue a full memory barrier before a volatile StInd
3077 instGen_MemoryBarrier();
3081 emit->emitInsLoadStoreOp(ins, attr, dataReg, tree);
3085 //------------------------------------------------------------------------
3086 // genCodeForSwap: Produce code for a GT_SWAP node.
3089 // tree - the GT_SWAP node
3091 void CodeGen::genCodeForSwap(GenTreeOp* tree)
3093 assert(tree->OperIs(GT_SWAP));
3095 // Swap is only supported for lclVar operands that are enregistered
3096 // We do not consume or produce any registers. Both operands remain enregistered.
3097 // However, the gc-ness may change.
3098 assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2));
3100 GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon();
3101 LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
3102 var_types type1 = varDsc1->TypeGet();
3103 GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon();
3104 LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
3105 var_types type2 = varDsc2->TypeGet();
3107 // We must have both int or both fp regs
3108 assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
3110 // FP swap is not yet implemented (and should have NYI'd in LSRA)
3111 assert(!varTypeIsFloating(type1));
3113 regNumber oldOp1Reg = lcl1->gtRegNum;
3114 regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
3115 regNumber oldOp2Reg = lcl2->gtRegNum;
3116 regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
3118 // We don't call genUpdateVarReg because we don't have a tree node with the new register.
3119 varDsc1->lvRegNum = oldOp2Reg;
3120 varDsc2->lvRegNum = oldOp1Reg;
3123 emitAttr size = EA_PTRSIZE;
3124 if (varTypeGCtype(type1) != varTypeGCtype(type2))
3126 // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
3127 // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
3131 NYI("register swap");
3132 // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
3134 // Update the gcInfo.
3135 // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
3136 gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
3137 gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
3139 // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
3140 // It will also dump the updates.
3141 gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
3142 gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
3145 //-------------------------------------------------------------------------------------------
3146 // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
3147 // corresponding to a binary Relational operator result.
3150 // dstReg - The target register to set to 1 or 0
3151 // tree - The GenTree Relop node that was used to set the Condition codes
3153 // Return Value: none
3156 // A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
3157 //-------------------------------------------------------------------------------------------
3159 void CodeGen::genSetRegToCond(regNumber dstReg, GenTree* tree)
3161 emitJumpKind jumpKind[2];
3162 bool branchToTrueLabel[2];
3163 genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
3164 assert(jumpKind[0] != EJ_NONE);
3166 // Set the reg according to the flags
3167 inst_SET(jumpKind[0], dstReg);
3169 // Do we need to use two operation to set the flags?
3171 if (jumpKind[1] != EJ_NONE)
3173 emitter* emit = getEmitter();
3174 bool ordered = ((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
3177 // The only ones that require two operations are the
3178 // floating point compare operations of BEQ or BNE.UN
3180 if (tree->gtOper == GT_EQ)
3182 // This must be an ordered comparison.
3184 assert(jumpKind[1] == EJ_vs); // We complement this value
3185 secondCond = INS_COND_VC; // for the secondCond
3187 else // gtOper == GT_NE
3189 // This must be BNE.UN (unordered comparison)
3190 assert((tree->gtOper == GT_NE) && !ordered);
3191 assert(jumpKind[1] == EJ_lo); // We complement this value
3192 secondCond = INS_COND_HS; // for the secondCond
3195 // The second instruction is a 'csinc' instruction that either selects the previous dstReg
3196 // or increments the ZR register, which produces a 1 result.
3198 emit->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, dstReg, dstReg, REG_ZR, secondCond);
3202 //------------------------------------------------------------------------
3203 // genIntToFloatCast: Generate code to cast an int/long to float/double
3206 // treeNode - The GT_CAST node
3212 // Cast is a non-overflow conversion.
3213 // The treeNode must have an assigned register.
3214 // SrcType= int32/uint32/int64/uint64 and DstType=float/double.
3216 void CodeGen::genIntToFloatCast(GenTree* treeNode)
3218 // int type --> float/double conversions are always non-overflow ones
3219 assert(treeNode->OperGet() == GT_CAST);
3220 assert(!treeNode->gtOverflow());
3222 regNumber targetReg = treeNode->gtRegNum;
3223 assert(genIsValidFloatReg(targetReg));
3225 GenTree* op1 = treeNode->gtOp.gtOp1;
3226 assert(!op1->isContained()); // Cannot be contained
3227 assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg.
3229 var_types dstType = treeNode->CastToType();
3230 var_types srcType = genActualType(op1->TypeGet());
3231 assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
3233 // force the srcType to unsigned if GT_UNSIGNED flag is set
3234 if (treeNode->gtFlags & GTF_UNSIGNED)
3236 srcType = genUnsignedType(srcType);
3239 // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
3240 emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
3241 noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
3243 instruction ins = varTypeIsUnsigned(srcType) ? INS_ucvtf : INS_scvtf;
3244 insOpts cvtOption = INS_OPTS_NONE; // invalid value
3246 if (dstType == TYP_DOUBLE)
3248 if (srcSize == EA_4BYTE)
3250 cvtOption = INS_OPTS_4BYTE_TO_D;
3254 assert(srcSize == EA_8BYTE);
3255 cvtOption = INS_OPTS_8BYTE_TO_D;
3260 assert(dstType == TYP_FLOAT);
3261 if (srcSize == EA_4BYTE)
3263 cvtOption = INS_OPTS_4BYTE_TO_S;
3267 assert(srcSize == EA_8BYTE);
3268 cvtOption = INS_OPTS_8BYTE_TO_S;
3272 genConsumeOperands(treeNode->AsOp());
3274 getEmitter()->emitIns_R_R(ins, emitActualTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
3276 genProduceReg(treeNode);
3279 //------------------------------------------------------------------------
3280 // genFloatToIntCast: Generate code to cast float/double to int/long
3283 // treeNode - The GT_CAST node
3289 // Cast is a non-overflow conversion.
3290 // The treeNode must have an assigned register.
3291 // SrcType=float/double and DstType= int32/uint32/int64/uint64
3293 void CodeGen::genFloatToIntCast(GenTree* treeNode)
3295 // we don't expect to see overflow detecting float/double --> int type conversions here
3296 // as they should have been converted into helper calls by front-end.
3297 assert(treeNode->OperGet() == GT_CAST);
3298 assert(!treeNode->gtOverflow());
3300 regNumber targetReg = treeNode->gtRegNum;
3301 assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
3303 GenTree* op1 = treeNode->gtOp.gtOp1;
3304 assert(!op1->isContained()); // Cannot be contained
3305 assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
3307 var_types dstType = treeNode->CastToType();
3308 var_types srcType = op1->TypeGet();
3309 assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
3311 // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
3312 // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
3313 // we expect the front-end or lowering phase to have generated two levels of cast.
3315 emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
3316 noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
3318 instruction ins = INS_fcvtzs; // default to sign converts
3319 insOpts cvtOption = INS_OPTS_NONE; // invalid value
3321 if (varTypeIsUnsigned(dstType))
3323 ins = INS_fcvtzu; // use unsigned converts
3326 if (srcType == TYP_DOUBLE)
3328 if (dstSize == EA_4BYTE)
3330 cvtOption = INS_OPTS_D_TO_4BYTE;
3334 assert(dstSize == EA_8BYTE);
3335 cvtOption = INS_OPTS_D_TO_8BYTE;
3340 assert(srcType == TYP_FLOAT);
3341 if (dstSize == EA_4BYTE)
3343 cvtOption = INS_OPTS_S_TO_4BYTE;
3347 assert(dstSize == EA_8BYTE);
3348 cvtOption = INS_OPTS_S_TO_8BYTE;
3352 genConsumeOperands(treeNode->AsOp());
3354 getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption);
3356 genProduceReg(treeNode);
3359 //------------------------------------------------------------------------
3360 // genCkfinite: Generate code for ckfinite opcode.
3363 // treeNode - The GT_CKFINITE node
3369 // GT_CKFINITE node has reserved an internal register.
3371 void CodeGen::genCkfinite(GenTree* treeNode)
3373 assert(treeNode->OperGet() == GT_CKFINITE);
3375 GenTree* op1 = treeNode->gtOp.gtOp1;
3376 var_types targetType = treeNode->TypeGet();
3377 int expMask = (targetType == TYP_FLOAT) ? 0x7F8 : 0x7FF; // Bit mask to extract exponent.
3378 int shiftAmount = targetType == TYP_FLOAT ? 20 : 52;
3380 emitter* emit = getEmitter();
3382 // Extract exponent into a register.
3383 regNumber intReg = treeNode->GetSingleTempReg();
3384 regNumber fpReg = genConsumeReg(op1);
3386 emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), intReg, fpReg);
3387 emit->emitIns_R_R_I(INS_lsr, emitActualTypeSize(targetType), intReg, intReg, shiftAmount);
3389 // Mask of exponent with all 1's and check if the exponent is all 1's
3390 emit->emitIns_R_R_I(INS_and, EA_4BYTE, intReg, intReg, expMask);
3391 emit->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, expMask);
3393 // If exponent is all 1's, throw ArithmeticException
3394 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
3395 genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN);
3397 // if it is a finite value copy it to targetReg
3398 if (treeNode->gtRegNum != fpReg)
3400 emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), treeNode->gtRegNum, fpReg);
3402 genProduceReg(treeNode);
3405 //------------------------------------------------------------------------
3406 // genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE node.
3411 void CodeGen::genCodeForCompare(GenTreeOp* tree)
3413 regNumber targetReg = tree->gtRegNum;
3414 emitter* emit = getEmitter();
3416 GenTree* op1 = tree->gtOp1;
3417 GenTree* op2 = tree->gtOp2;
3418 var_types op1Type = genActualType(op1->TypeGet());
3419 var_types op2Type = genActualType(op2->TypeGet());
3421 assert(!op1->isUsedFromMemory());
3422 assert(!op2->isUsedFromMemory());
3424 genConsumeOperands(tree);
3426 emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
3428 assert(genTypeSize(op1Type) == genTypeSize(op2Type));
3430 if (varTypeIsFloating(op1Type))
3432 assert(varTypeIsFloating(op2Type));
3433 assert(!op1->isContained());
3434 assert(op1Type == op2Type);
3436 if (op2->IsIntegralConst(0))
3438 assert(op2->isContained());
3439 emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0);
3443 assert(!op2->isContained());
3444 emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
3449 assert(!varTypeIsFloating(op2Type));
3450 // We don't support swapping op1 and op2 to generate cmp reg, imm
3451 assert(!op1->isContainedIntOrIImmed());
3453 instruction ins = tree->OperIs(GT_TEST_EQ, GT_TEST_NE) ? INS_tst : INS_cmp;
3455 if (op2->isContainedIntOrIImmed())
3457 GenTreeIntConCommon* intConst = op2->AsIntConCommon();
3458 emit->emitIns_R_I(ins, cmpSize, op1->gtRegNum, intConst->IconValue());
3462 emit->emitIns_R_R(ins, cmpSize, op1->gtRegNum, op2->gtRegNum);
3466 // Are we evaluating this into a register?
3467 if (targetReg != REG_NA)
3469 genSetRegToCond(targetReg, tree);
3470 genProduceReg(tree);
3474 //------------------------------------------------------------------------
3475 // genCodeForJumpCompare: Generates code for jmpCompare statement.
3477 // A GT_JCMP node is created when a comparison and conditional branch
3478 // can be executed in a single instruction.
3480 // Arm64 has a few instructions with this behavior.
3481 // - cbz/cbnz -- Compare and branch register zero/not zero
3482 // - tbz/tbnz -- Test and branch register bit zero/not zero
3484 // The cbz/cbnz supports the normal +/- 1MB branch range for conditional branches
3485 // The tbz/tbnz supports a smaller +/- 32KB branch range
3487 // A GT_JCMP cbz/cbnz node is created when there is a GT_EQ or GT_NE
3488 // integer/unsigned comparison against #0 which is used by a GT_JTRUE
3489 // condition jump node.
3491 // A GT_JCMP tbz/tbnz node is created when there is a GT_TEST_EQ or GT_TEST_NE
3492 // integer/unsigned comparison against against a mask with a single bit set
3493 // which is used by a GT_JTRUE condition jump node.
3495 // This node is repsonsible for consuming the register, and emitting the
3496 // appropriate fused compare/test and branch instruction
3498 // Two flags guide code generation
3499 // GTF_JCMP_TST -- Set if this is a tbz/tbnz rather than cbz/cbnz
3500 // GTF_JCMP_EQ -- Set if this is cbz/tbz rather than cbnz/tbnz
3503 // tree - The GT_JCMP tree node.
3508 void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
3510 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
3512 GenTree* op1 = tree->gtGetOp1();
3513 GenTree* op2 = tree->gtGetOp2();
3515 assert(tree->OperIs(GT_JCMP));
3516 assert(!varTypeIsFloating(tree));
3517 assert(!op1->isUsedFromMemory());
3518 assert(!op2->isUsedFromMemory());
3519 assert(op2->IsCnsIntOrI());
3520 assert(op2->isContained());
3522 genConsumeOperands(tree);
3524 regNumber reg = op1->gtRegNum;
3525 emitAttr attr = emitActualTypeSize(op1->TypeGet());
3527 if (tree->gtFlags & GTF_JCMP_TST)
3529 ssize_t compareImm = op2->gtIntCon.IconValue();
3531 assert(isPow2(compareImm));
3533 instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz;
3534 int imm = genLog2((size_t)compareImm);
3536 getEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm);
3540 assert(op2->IsIntegralConst(0));
3542 instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_cbz : INS_cbnz;
3544 getEmitter()->emitIns_J_R(ins, attr, compiler->compCurBB->bbJumpDest, reg);
3548 int CodeGenInterface::genSPtoFPdelta()
3552 // We place the saved frame pointer immediately above the outgoing argument space.
3553 delta = (int)compiler->lvaOutgoingArgSpaceSize;
3559 //---------------------------------------------------------------------
3560 // genTotalFrameSize - return the total size of the stack frame, including local size,
3561 // callee-saved register size, etc.
3567 int CodeGenInterface::genTotalFrameSize()
3569 // For varargs functions, we home all the incoming register arguments. They are not
3570 // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but
3571 // since we don't use "push" instructions to save them, we don't have to do the
3572 // save of these varargs register arguments as the first thing in the prolog.
3574 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
3576 int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) +
3577 compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
3579 assert(totalFrameSize >= 0);
3580 return totalFrameSize;
3583 //---------------------------------------------------------------------
3584 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
3585 // This number is going to be negative, since the Caller-SP is at a higher
3586 // address than the frame pointer.
3588 // There must be a frame pointer to call this function!
3590 int CodeGenInterface::genCallerSPtoFPdelta()
3592 assert(isFramePointerUsed());
3593 int callerSPtoFPdelta;
3595 callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
3597 assert(callerSPtoFPdelta <= 0);
3598 return callerSPtoFPdelta;
3601 //---------------------------------------------------------------------
3602 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
3604 // This number will be negative.
3606 int CodeGenInterface::genCallerSPtoInitialSPdelta()
3608 int callerSPtoSPdelta = 0;
3610 callerSPtoSPdelta -= genTotalFrameSize();
3612 assert(callerSPtoSPdelta <= 0);
3613 return callerSPtoSPdelta;
3616 /*****************************************************************************
3617 * Emit a call to a helper function.
3621 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
3623 void* addr = nullptr;
3624 void* pAddr = nullptr;
3626 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
3627 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
3628 regNumber callTarget = REG_NA;
3630 if (addr == nullptr)
3632 // This is call to a runtime helper.
3633 // adrp x, [reloc:rel page addr]
3634 // add x, x, [reloc:page offset]
3638 if (callTargetReg == REG_NA)
3640 // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
3641 // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
3642 callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
3645 regMaskTP callTargetMask = genRegMask(callTargetReg);
3646 regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
3648 // assert that all registers in callTargetMask are in the callKillSet
3649 noway_assert((callTargetMask & callKillSet) == callTargetMask);
3651 callTarget = callTargetReg;
3653 // adrp + add with relocations will be emitted
3654 getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
3655 getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget);
3656 callType = emitter::EC_INDIR_R;
3659 getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
3660 retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
3661 gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */
3662 callTarget, /* ireg */
3663 REG_NA, 0, 0, /* xreg, xmul, disp */
3665 emitter::emitNoGChelper(helper));
3667 regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
3668 regTracker.rsTrashRegSet(killMask);
3673 //------------------------------------------------------------------------
3674 // genSIMDIntrinsic: Generate code for a SIMD Intrinsic. This is the main
3675 // routine which in turn calls apropriate genSIMDIntrinsicXXX() routine.
3678 // simdNode - The GT_SIMD node
3684 // Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and
3685 // a limited set of methods.
3687 // TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp.
3688 void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
3690 // NYI for unsupported base types
3691 if (simdNode->gtSIMDBaseType != TYP_INT && simdNode->gtSIMDBaseType != TYP_LONG &&
3692 simdNode->gtSIMDBaseType != TYP_FLOAT && simdNode->gtSIMDBaseType != TYP_DOUBLE &&
3693 simdNode->gtSIMDBaseType != TYP_USHORT && simdNode->gtSIMDBaseType != TYP_UBYTE &&
3694 simdNode->gtSIMDBaseType != TYP_SHORT && simdNode->gtSIMDBaseType != TYP_BYTE &&
3695 simdNode->gtSIMDBaseType != TYP_UINT && simdNode->gtSIMDBaseType != TYP_ULONG)
3697 noway_assert(!"SIMD intrinsic with unsupported base type.");
3700 switch (simdNode->gtSIMDIntrinsicID)
3702 case SIMDIntrinsicInit:
3703 genSIMDIntrinsicInit(simdNode);
3706 case SIMDIntrinsicInitN:
3707 genSIMDIntrinsicInitN(simdNode);
3710 case SIMDIntrinsicSqrt:
3711 case SIMDIntrinsicAbs:
3712 case SIMDIntrinsicCast:
3713 case SIMDIntrinsicConvertToSingle:
3714 case SIMDIntrinsicConvertToInt32:
3715 case SIMDIntrinsicConvertToDouble:
3716 case SIMDIntrinsicConvertToInt64:
3717 genSIMDIntrinsicUnOp(simdNode);
3720 case SIMDIntrinsicWidenLo:
3721 case SIMDIntrinsicWidenHi:
3722 genSIMDIntrinsicWiden(simdNode);
3725 case SIMDIntrinsicNarrow:
3726 genSIMDIntrinsicNarrow(simdNode);
3729 case SIMDIntrinsicAdd:
3730 case SIMDIntrinsicSub:
3731 case SIMDIntrinsicMul:
3732 case SIMDIntrinsicDiv:
3733 case SIMDIntrinsicBitwiseAnd:
3734 case SIMDIntrinsicBitwiseAndNot:
3735 case SIMDIntrinsicBitwiseOr:
3736 case SIMDIntrinsicBitwiseXor:
3737 case SIMDIntrinsicMin:
3738 case SIMDIntrinsicMax:
3739 case SIMDIntrinsicEqual:
3740 case SIMDIntrinsicLessThan:
3741 case SIMDIntrinsicGreaterThan:
3742 case SIMDIntrinsicLessThanOrEqual:
3743 case SIMDIntrinsicGreaterThanOrEqual:
3744 genSIMDIntrinsicBinOp(simdNode);
3747 case SIMDIntrinsicOpEquality:
3748 case SIMDIntrinsicOpInEquality:
3749 genSIMDIntrinsicRelOp(simdNode);
3752 case SIMDIntrinsicDotProduct:
3753 genSIMDIntrinsicDotProduct(simdNode);
3756 case SIMDIntrinsicGetItem:
3757 genSIMDIntrinsicGetItem(simdNode);
3760 case SIMDIntrinsicSetX:
3761 case SIMDIntrinsicSetY:
3762 case SIMDIntrinsicSetZ:
3763 case SIMDIntrinsicSetW:
3764 genSIMDIntrinsicSetItem(simdNode);
3767 case SIMDIntrinsicUpperSave:
3768 genSIMDIntrinsicUpperSave(simdNode);
3771 case SIMDIntrinsicUpperRestore:
3772 genSIMDIntrinsicUpperRestore(simdNode);
3775 case SIMDIntrinsicSelect:
3776 NYI("SIMDIntrinsicSelect lowered during import to (a & sel) | (b & ~sel)");
3780 noway_assert(!"Unimplemented SIMD intrinsic.");
3785 insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType)
3787 assert((size == EA_16BYTE) || (size == EA_8BYTE));
3788 insOpts result = INS_OPTS_NONE;
3790 switch (elementType)
3795 result = (size == EA_16BYTE) ? INS_OPTS_2D : INS_OPTS_1D;
3800 result = (size == EA_16BYTE) ? INS_OPTS_4S : INS_OPTS_2S;
3804 result = (size == EA_16BYTE) ? INS_OPTS_8H : INS_OPTS_4H;
3808 result = (size == EA_16BYTE) ? INS_OPTS_16B : INS_OPTS_8B;
3811 assert(!"Unsupported element type");
3818 // getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
3821 // intrinsicId - SIMD intrinsic Id
3822 // baseType - Base type of the SIMD vector
3823 // immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
3827 // Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
3829 instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
3831 instruction result = INS_invalid;
3832 if (varTypeIsFloating(baseType))
3834 switch (intrinsicId)
3836 case SIMDIntrinsicAbs:
3839 case SIMDIntrinsicAdd:
3842 case SIMDIntrinsicBitwiseAnd:
3845 case SIMDIntrinsicBitwiseAndNot:
3848 case SIMDIntrinsicBitwiseOr:
3851 case SIMDIntrinsicBitwiseXor:
3854 case SIMDIntrinsicCast:
3857 case SIMDIntrinsicConvertToInt32:
3858 case SIMDIntrinsicConvertToInt64:
3859 result = INS_fcvtns;
3861 case SIMDIntrinsicDiv:
3864 case SIMDIntrinsicEqual:
3867 case SIMDIntrinsicGreaterThan:
3870 case SIMDIntrinsicGreaterThanOrEqual:
3873 case SIMDIntrinsicLessThan:
3876 case SIMDIntrinsicLessThanOrEqual:
3879 case SIMDIntrinsicMax:
3882 case SIMDIntrinsicMin:
3885 case SIMDIntrinsicMul:
3888 case SIMDIntrinsicNarrow:
3889 // Use INS_fcvtn lower bytes of result followed by INS_fcvtn2 for upper bytes
3890 // Return lower bytes instruction here
3893 case SIMDIntrinsicSelect:
3896 case SIMDIntrinsicSqrt:
3899 case SIMDIntrinsicSub:
3902 case SIMDIntrinsicWidenLo:
3905 case SIMDIntrinsicWidenHi:
3906 result = INS_fcvtl2;
3909 assert(!"Unsupported SIMD intrinsic");
3915 bool isUnsigned = varTypeIsUnsigned(baseType);
3917 switch (intrinsicId)
3919 case SIMDIntrinsicAbs:
3920 assert(!isUnsigned);
3923 case SIMDIntrinsicAdd:
3926 case SIMDIntrinsicBitwiseAnd:
3929 case SIMDIntrinsicBitwiseAndNot:
3932 case SIMDIntrinsicBitwiseOr:
3935 case SIMDIntrinsicBitwiseXor:
3938 case SIMDIntrinsicCast:
3941 case SIMDIntrinsicConvertToDouble:
3942 case SIMDIntrinsicConvertToSingle:
3943 result = isUnsigned ? INS_ucvtf : INS_scvtf;
3945 case SIMDIntrinsicEqual:
3948 case SIMDIntrinsicGreaterThan:
3949 result = isUnsigned ? INS_cmhi : INS_cmgt;
3951 case SIMDIntrinsicGreaterThanOrEqual:
3952 result = isUnsigned ? INS_cmhs : INS_cmge;
3954 case SIMDIntrinsicLessThan:
3955 assert(!isUnsigned);
3958 case SIMDIntrinsicLessThanOrEqual:
3959 assert(!isUnsigned);
3962 case SIMDIntrinsicMax:
3963 result = isUnsigned ? INS_umax : INS_smax;
3965 case SIMDIntrinsicMin:
3966 result = isUnsigned ? INS_umin : INS_smin;
3968 case SIMDIntrinsicMul:
3971 case SIMDIntrinsicNarrow:
3972 // Use INS_xtn lower bytes of result followed by INS_xtn2 for upper bytes
3973 // Return lower bytes instruction here
3976 case SIMDIntrinsicSelect:
3979 case SIMDIntrinsicSub:
3982 case SIMDIntrinsicWidenLo:
3983 result = isUnsigned ? INS_uxtl : INS_sxtl;
3985 case SIMDIntrinsicWidenHi:
3986 result = isUnsigned ? INS_uxtl2 : INS_sxtl2;
3989 assert(!"Unsupported SIMD intrinsic");
3994 noway_assert(result != INS_invalid);
3998 //------------------------------------------------------------------------
3999 // genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize.
4002 // simdNode - The GT_SIMD node
4007 void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
4009 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit);
4011 GenTree* op1 = simdNode->gtGetOp1();
4012 var_types baseType = simdNode->gtSIMDBaseType;
4013 regNumber targetReg = simdNode->gtRegNum;
4014 assert(targetReg != REG_NA);
4015 var_types targetType = simdNode->TypeGet();
4017 genConsumeOperands(simdNode);
4018 regNumber op1Reg = op1->IsIntegralConst(0) ? REG_ZR : op1->gtRegNum;
4020 // TODO-ARM64-CQ Add LD1R to allow SIMDIntrinsicInit from contained memory
4021 // TODO-ARM64-CQ Add MOVI to allow SIMDIntrinsicInit from contained immediate small constants
4023 assert(op1->isContained() == op1->IsIntegralConst(0));
4024 assert(!op1->isUsedFromMemory());
4026 assert(genIsValidFloatReg(targetReg));
4027 assert(genIsValidIntReg(op1Reg) || genIsValidFloatReg(op1Reg));
4029 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4030 insOpts opt = genGetSimdInsOpt(attr, baseType);
4032 if (genIsValidIntReg(op1Reg))
4034 getEmitter()->emitIns_R_R(INS_dup, attr, targetReg, op1Reg, opt);
4038 getEmitter()->emitIns_R_R_I(INS_dup, attr, targetReg, op1Reg, 0, opt);
4041 genProduceReg(simdNode);
4044 //-------------------------------------------------------------------------------------------
4045 // genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes
4046 // a number of arguments equal to the length of the Vector.
4049 // simdNode - The GT_SIMD node
4054 void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
4056 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN);
4058 regNumber targetReg = simdNode->gtRegNum;
4059 assert(targetReg != REG_NA);
4061 var_types targetType = simdNode->TypeGet();
4063 var_types baseType = simdNode->gtSIMDBaseType;
4065 regNumber vectorReg = targetReg;
4067 if (varTypeIsFloating(baseType))
4069 // Note that we cannot use targetReg before consuming all float source operands.
4070 // Therefore use an internal temp register
4071 vectorReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4074 emitAttr baseTypeSize = emitTypeSize(baseType);
4076 // We will first consume the list items in execution (left to right) order,
4077 // and record the registers.
4078 regNumber operandRegs[FP_REGSIZE_BYTES];
4079 unsigned initCount = 0;
4080 for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2())
4082 assert(list->OperGet() == GT_LIST);
4083 GenTree* listItem = list->gtGetOp1();
4084 assert(listItem->TypeGet() == baseType);
4085 assert(!listItem->isContained());
4086 regNumber operandReg = genConsumeReg(listItem);
4087 operandRegs[initCount] = operandReg;
4091 assert((initCount * baseTypeSize) <= simdNode->gtSIMDSize);
4093 if (initCount * baseTypeSize < EA_16BYTE)
4095 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, vectorReg, 0x00, INS_OPTS_16B);
4098 if (varTypeIsIntegral(baseType))
4100 for (unsigned i = 0; i < initCount; i++)
4102 getEmitter()->emitIns_R_R_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i);
4107 for (unsigned i = 0; i < initCount; i++)
4109 getEmitter()->emitIns_R_R_I_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i, 0);
4113 // Load the initialized value.
4114 if (targetReg != vectorReg)
4116 getEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, vectorReg);
4119 genProduceReg(simdNode);
4122 //----------------------------------------------------------------------------------
4123 // genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt.
4126 // simdNode - The GT_SIMD node
4131 void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
4133 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast ||
4134 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs ||
4135 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle ||
4136 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 ||
4137 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble ||
4138 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64);
4140 GenTree* op1 = simdNode->gtGetOp1();
4141 var_types baseType = simdNode->gtSIMDBaseType;
4142 regNumber targetReg = simdNode->gtRegNum;
4143 assert(targetReg != REG_NA);
4144 var_types targetType = simdNode->TypeGet();
4146 genConsumeOperands(simdNode);
4147 regNumber op1Reg = op1->gtRegNum;
4149 assert(genIsValidFloatReg(op1Reg));
4150 assert(genIsValidFloatReg(targetReg));
4152 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4153 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4154 insOpts opt = (ins == INS_mov) ? INS_OPTS_NONE : genGetSimdInsOpt(attr, baseType);
4156 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
4158 genProduceReg(simdNode);
4161 //--------------------------------------------------------------------------------
4162 // genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations
4165 // simdNode - The GT_SIMD node
4168 // The Widen intrinsics are broken into separate intrinsics for the two results.
4170 void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
4172 assert((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenLo) ||
4173 (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi));
4175 GenTree* op1 = simdNode->gtGetOp1();
4176 var_types baseType = simdNode->gtSIMDBaseType;
4177 regNumber targetReg = simdNode->gtRegNum;
4178 assert(targetReg != REG_NA);
4179 var_types simdType = simdNode->TypeGet();
4181 genConsumeOperands(simdNode);
4182 regNumber op1Reg = op1->gtRegNum;
4183 regNumber srcReg = op1Reg;
4184 emitAttr emitSize = emitActualTypeSize(simdType);
4186 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4188 if (varTypeIsFloating(baseType))
4190 getEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg);
4194 emitAttr attr = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi) ? EA_16BYTE : EA_8BYTE;
4195 insOpts opt = genGetSimdInsOpt(attr, baseType);
4197 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
4200 genProduceReg(simdNode);
4203 //--------------------------------------------------------------------------------
4204 // genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations
4207 // simdNode - The GT_SIMD node
4210 // This intrinsic takes two arguments. The first operand is narrowed to produce the
4211 // lower elements of the results, and the second operand produces the high elements.
4213 void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
4215 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicNarrow);
4217 GenTree* op1 = simdNode->gtGetOp1();
4218 GenTree* op2 = simdNode->gtGetOp2();
4219 var_types baseType = simdNode->gtSIMDBaseType;
4220 regNumber targetReg = simdNode->gtRegNum;
4221 assert(targetReg != REG_NA);
4222 var_types simdType = simdNode->TypeGet();
4223 emitAttr emitSize = emitTypeSize(simdType);
4225 genConsumeOperands(simdNode);
4226 regNumber op1Reg = op1->gtRegNum;
4227 regNumber op2Reg = op2->gtRegNum;
4229 assert(genIsValidFloatReg(op1Reg));
4230 assert(genIsValidFloatReg(op2Reg));
4231 assert(genIsValidFloatReg(targetReg));
4232 assert(op2Reg != targetReg);
4233 assert(simdNode->gtSIMDSize == 16);
4235 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4236 assert((ins == INS_fcvtn) || (ins == INS_xtn));
4238 if (ins == INS_fcvtn)
4240 getEmitter()->emitIns_R_R(INS_fcvtn, EA_8BYTE, targetReg, op1Reg);
4241 getEmitter()->emitIns_R_R(INS_fcvtn2, EA_8BYTE, targetReg, op2Reg);
4245 insOpts opt = INS_OPTS_NONE;
4246 insOpts opt2 = INS_OPTS_NONE;
4248 // This is not the same as genGetSimdInsOpt()
4249 // Basetype is the soure operand type
4250 // However encoding is based on the destination operand type which is 1/2 the basetype.
4266 opt2 = INS_OPTS_16B;
4269 assert(!"Unsupported narrowing element type");
4272 getEmitter()->emitIns_R_R(INS_xtn, EA_8BYTE, targetReg, op1Reg, opt);
4273 getEmitter()->emitIns_R_R(INS_xtn2, EA_16BYTE, targetReg, op2Reg, opt2);
4276 genProduceReg(simdNode);
4279 //--------------------------------------------------------------------------------
4280 // genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
4281 // add, sub, mul, bit-wise And, AndNot and Or.
4284 // simdNode - The GT_SIMD node
4289 void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
4291 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
4292 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
4293 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
4294 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot ||
4295 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr ||
4296 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin ||
4297 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual ||
4298 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan ||
4299 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan ||
4300 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual ||
4301 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThanOrEqual);
4303 GenTree* op1 = simdNode->gtGetOp1();
4304 GenTree* op2 = simdNode->gtGetOp2();
4305 var_types baseType = simdNode->gtSIMDBaseType;
4306 regNumber targetReg = simdNode->gtRegNum;
4307 assert(targetReg != REG_NA);
4308 var_types targetType = simdNode->TypeGet();
4310 genConsumeOperands(simdNode);
4311 regNumber op1Reg = op1->gtRegNum;
4312 regNumber op2Reg = op2->gtRegNum;
4314 assert(genIsValidFloatReg(op1Reg));
4315 assert(genIsValidFloatReg(op2Reg));
4316 assert(genIsValidFloatReg(targetReg));
4318 // TODO-ARM64-CQ Contain integer constants where posible
4320 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4321 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4322 insOpts opt = genGetSimdInsOpt(attr, baseType);
4324 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt);
4326 genProduceReg(simdNode);
4329 //--------------------------------------------------------------------------------
4330 // genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater
4334 // simdNode - The GT_SIMD node
4339 void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
4341 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality ||
4342 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
4344 GenTree* op1 = simdNode->gtGetOp1();
4345 GenTree* op2 = simdNode->gtGetOp2();
4346 var_types baseType = simdNode->gtSIMDBaseType;
4347 regNumber targetReg = simdNode->gtRegNum;
4348 var_types targetType = simdNode->TypeGet();
4350 genConsumeOperands(simdNode);
4351 regNumber op1Reg = op1->gtRegNum;
4352 regNumber op2Reg = op2->gtRegNum;
4353 regNumber otherReg = op2Reg;
4355 instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, baseType);
4356 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4357 insOpts opt = genGetSimdInsOpt(attr, baseType);
4359 // TODO-ARM64-CQ Contain integer constants where posible
4361 regNumber tmpFloatReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4363 getEmitter()->emitIns_R_R_R(ins, attr, tmpFloatReg, op1Reg, op2Reg, opt);
4365 if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
4367 // For 12Byte vectors we must set upper bits to get correct comparison
4368 // We do not assume upper bits are zero.
4369 instGen_Set_Reg_To_Imm(EA_4BYTE, targetReg, -1);
4370 getEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpFloatReg, targetReg, 3);
4373 getEmitter()->emitIns_R_R(INS_uminv, attr, tmpFloatReg, tmpFloatReg,
4374 (simdNode->gtSIMDSize > 8) ? INS_OPTS_16B : INS_OPTS_8B);
4376 getEmitter()->emitIns_R_R_I(INS_mov, EA_1BYTE, targetReg, tmpFloatReg, 0);
4378 if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality)
4380 getEmitter()->emitIns_R_R_I(INS_eor, EA_4BYTE, targetReg, targetReg, 0x1);
4383 getEmitter()->emitIns_R_R_I(INS_and, EA_4BYTE, targetReg, targetReg, 0x1);
4385 genProduceReg(simdNode);
4388 //--------------------------------------------------------------------------------
4389 // genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
4392 // simdNode - The GT_SIMD node
4397 void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
4399 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDotProduct);
4401 GenTree* op1 = simdNode->gtGetOp1();
4402 GenTree* op2 = simdNode->gtGetOp2();
4403 var_types baseType = simdNode->gtSIMDBaseType;
4404 var_types simdType = op1->TypeGet();
4406 regNumber targetReg = simdNode->gtRegNum;
4407 assert(targetReg != REG_NA);
4409 var_types targetType = simdNode->TypeGet();
4410 assert(targetType == baseType);
4412 genConsumeOperands(simdNode);
4413 regNumber op1Reg = op1->gtRegNum;
4414 regNumber op2Reg = op2->gtRegNum;
4415 regNumber tmpReg = targetReg;
4417 if (!varTypeIsFloating(baseType))
4419 tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4422 instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicMul, baseType);
4423 emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
4424 insOpts opt = genGetSimdInsOpt(attr, baseType);
4427 getEmitter()->emitIns_R_R_R(ins, attr, tmpReg, op1Reg, op2Reg, opt);
4429 if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
4431 // For 12Byte vectors we must zero upper bits to get correct dot product
4432 // We do not assume upper bits are zero.
4433 getEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpReg, REG_ZR, 3);
4436 // Vector add horizontal
4437 if (varTypeIsFloating(baseType))
4439 if (baseType == TYP_FLOAT)
4441 if (opt == INS_OPTS_4S)
4443 getEmitter()->emitIns_R_R_R(INS_faddp, attr, tmpReg, tmpReg, tmpReg, INS_OPTS_4S);
4445 getEmitter()->emitIns_R_R(INS_faddp, EA_4BYTE, targetReg, tmpReg);
4449 getEmitter()->emitIns_R_R(INS_faddp, EA_8BYTE, targetReg, tmpReg);
4454 ins = varTypeIsUnsigned(baseType) ? INS_uaddlv : INS_saddlv;
4456 getEmitter()->emitIns_R_R(ins, attr, tmpReg, tmpReg, opt);
4458 // Mov to integer register
4459 if (varTypeIsUnsigned(baseType) || (genTypeSize(baseType) < 4))
4461 getEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(baseType), targetReg, tmpReg, 0);
4465 getEmitter()->emitIns_R_R_I(INS_smov, emitActualTypeSize(baseType), targetReg, tmpReg, 0);
4469 genProduceReg(simdNode);
4472 //------------------------------------------------------------------------------------
4473 // genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
4476 // simdNode - The GT_SIMD node
4481 void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
4483 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);
4485 GenTree* op1 = simdNode->gtGetOp1();
4486 GenTree* op2 = simdNode->gtGetOp2();
4487 var_types simdType = op1->TypeGet();
4488 assert(varTypeIsSIMD(simdType));
4490 // op1 of TYP_SIMD12 should be considered as TYP_SIMD16
4491 if (simdType == TYP_SIMD12)
4493 simdType = TYP_SIMD16;
4496 var_types baseType = simdNode->gtSIMDBaseType;
4497 regNumber targetReg = simdNode->gtRegNum;
4498 assert(targetReg != REG_NA);
4499 var_types targetType = simdNode->TypeGet();
4500 assert(targetType == genActualType(baseType));
4502 // GetItem has 2 operands:
4503 // - the source of SIMD type (op1)
4504 // - the index of the value to be returned.
4505 genConsumeOperands(simdNode);
4507 emitAttr baseTypeSize = emitTypeSize(baseType);
4508 unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
4510 if (op2->IsCnsIntOrI())
4512 assert(op2->isContained());
4514 ssize_t index = op2->gtIntCon.gtIconVal;
4516 // We only need to generate code for the get if the index is valid
4517 // If the index is invalid, previously generated for the range check will throw
4518 if (getEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index))
4520 if (op1->isContained())
4522 int offset = (int)index * genTypeSize(baseType);
4523 instruction ins = ins_Load(baseType);
4524 baseTypeSize = varTypeIsFloating(baseType)
4526 : getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize);
4528 assert(!op1->isUsedFromReg());
4530 if (op1->OperIsLocal())
4532 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
4534 getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset);
4538 assert(op1->OperGet() == GT_IND);
4540 GenTree* addr = op1->AsIndir()->Addr();
4541 assert(!addr->isContained());
4542 regNumber baseReg = addr->gtRegNum;
4544 // ldr targetReg, [baseReg, #offset]
4545 getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset);
4550 assert(op1->isUsedFromReg());
4551 regNumber srcReg = op1->gtRegNum;
4553 // mov targetReg, srcReg[#index]
4554 getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index);
4560 assert(!op2->isContained());
4562 regNumber baseReg = REG_NA;
4563 regNumber indexReg = op2->gtRegNum;
4565 if (op1->isContained())
4567 // Optimize the case of op1 is in memory and trying to access ith element.
4568 assert(!op1->isUsedFromReg());
4569 if (op1->OperIsLocal())
4571 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
4573 baseReg = simdNode->ExtractTempReg();
4575 // Load the address of varNum
4576 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0);
4580 // Require GT_IND addr to be not contained.
4581 assert(op1->OperGet() == GT_IND);
4583 GenTree* addr = op1->AsIndir()->Addr();
4584 assert(!addr->isContained());
4586 baseReg = addr->gtRegNum;
4591 assert(op1->isUsedFromReg());
4592 regNumber srcReg = op1->gtRegNum;
4594 unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
4595 noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
4597 baseReg = simdNode->ExtractTempReg();
4599 // Load the address of simdInitTempVarNum
4600 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0);
4602 // Store the vector to simdInitTempVarNum
4603 getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg);
4606 assert(genIsValidIntReg(indexReg));
4607 assert(genIsValidIntReg(baseReg));
4608 assert(baseReg != indexReg);
4610 // Load item at baseReg[index]
4611 getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL,
4615 genProduceReg(simdNode);
4618 //------------------------------------------------------------------------------------
4619 // genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i.
4622 // simdNode - The GT_SIMD node
4627 void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
4629 // Determine index based on intrinsic ID
4631 switch (simdNode->gtSIMDIntrinsicID)
4633 case SIMDIntrinsicSetX:
4636 case SIMDIntrinsicSetY:
4639 case SIMDIntrinsicSetZ:
4642 case SIMDIntrinsicSetW:
4649 assert(index != -1);
4651 // op1 is the SIMD vector
4652 // op2 is the value to be set
4653 GenTree* op1 = simdNode->gtGetOp1();
4654 GenTree* op2 = simdNode->gtGetOp2();
4656 var_types baseType = simdNode->gtSIMDBaseType;
4657 regNumber targetReg = simdNode->gtRegNum;
4658 assert(targetReg != REG_NA);
4659 var_types targetType = simdNode->TypeGet();
4660 assert(varTypeIsSIMD(targetType));
4662 assert(op2->TypeGet() == baseType);
4663 assert(simdNode->gtSIMDSize >= ((index + 1) * genTypeSize(baseType)));
4665 genConsumeOperands(simdNode);
4666 regNumber op1Reg = op1->gtRegNum;
4667 regNumber op2Reg = op2->gtRegNum;
4669 assert(genIsValidFloatReg(targetReg));
4670 assert(genIsValidFloatReg(op1Reg));
4671 assert(genIsValidIntReg(op2Reg) || genIsValidFloatReg(op2Reg));
4672 assert(targetReg != op2Reg);
4674 emitAttr attr = emitTypeSize(baseType);
4676 // Insert mov if register assignment requires it
4677 getEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, op1Reg);
4679 if (genIsValidIntReg(op2Reg))
4681 getEmitter()->emitIns_R_R_I(INS_ins, attr, targetReg, op2Reg, index);
4685 getEmitter()->emitIns_R_R_I_I(INS_ins, attr, targetReg, op2Reg, index, 0);
4688 genProduceReg(simdNode);
4691 //-----------------------------------------------------------------------------
4692 // genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to
4693 // the given register, if any, or to memory.
4696 // simdNode - The GT_SIMD node
4702 // The upper half of all SIMD registers are volatile, even the callee-save registers.
4703 // When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic
4704 // to cause the upper half to be saved. It will first attempt to find another, unused, callee-save
4705 // register. If such a register cannot be found, it will save it to an available caller-save register.
4706 // In that case, this node will be marked GTF_SPILL, which will cause genProduceReg to save the 8 byte
4707 // value to the stack. (Note that if there are no caller-save registers available, the entire 16 byte
4708 // value will be spilled to the stack.)
4710 void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
4712 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave);
4714 GenTree* op1 = simdNode->gtGetOp1();
4715 assert(op1->IsLocal());
4716 assert(emitTypeSize(op1->TypeGet()) == 16);
4717 regNumber targetReg = simdNode->gtRegNum;
4718 regNumber op1Reg = genConsumeReg(op1);
4719 assert(op1Reg != REG_NA);
4720 assert(targetReg != REG_NA);
4721 getEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, targetReg, op1Reg, 0, 1);
4723 genProduceReg(simdNode);
4726 //-----------------------------------------------------------------------------
4727 // genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to
4728 // the given register, if any, or to memory.
4731 // simdNode - The GT_SIMD node
4737 // For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always
4738 // have their home register, this node has its targetReg on the lclVar child, and its source
4740 // Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled
4741 // an upper-half to a caller save register, this node will be marked GTF_SPILLED. However, unlike
4742 // most spill scenarios, the saved tree will be different from the restored tree, but the spill
4743 // restore logic, which is triggered by the call to genConsumeReg, requires us to provide the
4744 // spilled tree (saveNode) in order to perform the reload. We can easily find that tree,
4745 // as it is in the spill descriptor for the register from which it was saved.
4747 void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
4749 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
4751 GenTree* op1 = simdNode->gtGetOp1();
4752 assert(op1->IsLocal());
4753 assert(emitTypeSize(op1->TypeGet()) == 16);
4754 regNumber srcReg = simdNode->gtRegNum;
4755 regNumber lclVarReg = genConsumeReg(op1);
4756 unsigned varNum = op1->AsLclVarCommon()->gtLclNum;
4757 assert(lclVarReg != REG_NA);
4758 assert(srcReg != REG_NA);
4759 if (simdNode->gtFlags & GTF_SPILLED)
4761 GenTree* saveNode = regSet.rsSpillDesc[srcReg]->spillTree;
4762 noway_assert(saveNode != nullptr && (saveNode->gtRegNum == srcReg));
4763 genConsumeReg(saveNode);
4765 getEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, lclVarReg, srcReg, 1, 0);
4768 //-----------------------------------------------------------------------------
4769 // genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
4770 // Since Vector3 is not a hardware supported write size, it is performed
4771 // as two writes: 8 byte followed by 4-byte.
4774 // treeNode - tree node that is attempting to store indirect
4780 void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
4782 assert(treeNode->OperGet() == GT_STOREIND);
4784 GenTree* addr = treeNode->gtOp.gtOp1;
4785 GenTree* data = treeNode->gtOp.gtOp2;
4787 // addr and data should not be contained.
4788 assert(!data->isContained());
4789 assert(!addr->isContained());
4792 // Should not require a write barrier
4793 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
4794 assert(writeBarrierForm == GCInfo::WBF_NoBarrier);
4797 genConsumeOperands(treeNode->AsOp());
4799 // Need an addtional integer register to extract upper 4 bytes from data.
4800 regNumber tmpReg = treeNode->GetSingleTempReg();
4801 assert(tmpReg != addr->gtRegNum);
4804 getEmitter()->emitIns_R_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum);
4806 // Extract upper 4-bytes from data
4807 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, data->gtRegNum, 2);
4810 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, tmpReg, addr->gtRegNum, 8);
4813 //-----------------------------------------------------------------------------
4814 // genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
4815 // Since Vector3 is not a hardware supported write size, it is performed
4816 // as two loads: 8 byte followed by 4-byte.
4819 // treeNode - tree node of GT_IND
4825 void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
4827 assert(treeNode->OperGet() == GT_IND);
4829 GenTree* addr = treeNode->gtOp.gtOp1;
4830 regNumber targetReg = treeNode->gtRegNum;
4832 assert(!addr->isContained());
4834 regNumber operandReg = genConsumeReg(addr);
4836 // Need an addtional int register to read upper 4 bytes, which is different from targetReg
4837 regNumber tmpReg = treeNode->GetSingleTempReg();
4840 getEmitter()->emitIns_R_R(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, addr->gtRegNum);
4843 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, addr->gtRegNum, 8);
4845 // Insert upper 4-bytes into data
4846 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, targetReg, tmpReg, 2);
4848 genProduceReg(treeNode);
4851 //-----------------------------------------------------------------------------
4852 // genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
4853 // Since Vector3 is not a hardware supported write size, it is performed
4854 // as two stores: 8 byte followed by 4-byte.
4857 // treeNode - tree node that is attempting to store TYP_SIMD12 field
4862 void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
4864 assert((treeNode->OperGet() == GT_STORE_LCL_FLD) || (treeNode->OperGet() == GT_STORE_LCL_VAR));
4867 unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
4868 assert(varNum < compiler->lvaCount);
4870 if (treeNode->OperGet() == GT_LCL_FLD)
4872 offs = treeNode->gtLclFld.gtLclOffs;
4875 GenTree* op1 = treeNode->gtOp.gtOp1;
4876 assert(!op1->isContained());
4877 regNumber operandReg = genConsumeReg(op1);
4879 // Need an addtional integer register to extract upper 4 bytes from data.
4880 regNumber tmpReg = treeNode->GetSingleTempReg();
4882 // store lower 8 bytes
4883 getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs);
4885 // Extract upper 4-bytes from data
4886 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, operandReg, 2);
4889 getEmitter()->emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offs + 8);
4892 #endif // FEATURE_SIMD
4894 #ifdef FEATURE_HW_INTRINSICS
4895 #include "hwintrinsicArm64.h"
4897 instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType)
4899 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
4901 unsigned int instrTypeIndex = varTypeIsFloating(instrType) ? 0 : varTypeIsUnsigned(instrType) ? 2 : 1;
4903 instruction ins = compiler->getHWIntrinsicInfo(intrinsicID).instrs[instrTypeIndex];
4904 assert(ins != INS_invalid);
4909 //------------------------------------------------------------------------
4910 // genHWIntrinsic: Produce code for a GT_HWIntrinsic node.
4912 // This is the main routine which in turn calls the genHWIntrinsicXXX() routines.
4915 // node - the GT_HWIntrinsic node
4920 void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
4922 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
4924 switch (compiler->getHWIntrinsicInfo(intrinsicID).form)
4926 case HWIntrinsicInfo::UnaryOp:
4927 genHWIntrinsicUnaryOp(node);
4929 case HWIntrinsicInfo::CrcOp:
4930 genHWIntrinsicCrcOp(node);
4932 case HWIntrinsicInfo::SimdBinaryOp:
4933 genHWIntrinsicSimdBinaryOp(node);
4935 case HWIntrinsicInfo::SimdExtractOp:
4936 genHWIntrinsicSimdExtractOp(node);
4938 case HWIntrinsicInfo::SimdInsertOp:
4939 genHWIntrinsicSimdInsertOp(node);
4941 case HWIntrinsicInfo::SimdSelectOp:
4942 genHWIntrinsicSimdSelectOp(node);
4944 case HWIntrinsicInfo::SimdSetAllOp:
4945 genHWIntrinsicSimdSetAllOp(node);
4947 case HWIntrinsicInfo::SimdUnaryOp:
4948 genHWIntrinsicSimdUnaryOp(node);
4950 case HWIntrinsicInfo::SimdBinaryRMWOp:
4951 genHWIntrinsicSimdBinaryRMWOp(node);
4953 case HWIntrinsicInfo::SimdTernaryRMWOp:
4954 genHWIntrinsicSimdTernaryRMWOp(node);
4956 case HWIntrinsicInfo::Sha1HashOp:
4957 genHWIntrinsicShaHashOp(node);
4959 case HWIntrinsicInfo::Sha1RotateOp:
4960 genHWIntrinsicShaRotateOp(node);
4964 NYI("HWIntrinsic form not implemented");
4968 //------------------------------------------------------------------------
4969 // genHWIntrinsicUnaryOp:
4971 // Produce code for a GT_HWIntrinsic node with form UnaryOp.
4973 // Consumes one scalar operand produces a scalar
4976 // node - the GT_HWIntrinsic node
4981 void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node)
4983 GenTree* op1 = node->gtGetOp1();
4984 regNumber targetReg = node->gtRegNum;
4985 emitAttr attr = emitActualTypeSize(node);
4987 assert(targetReg != REG_NA);
4988 var_types targetType = node->TypeGet();
4990 genConsumeOperands(node);
4992 regNumber op1Reg = op1->gtRegNum;
4994 instruction ins = getOpForHWIntrinsic(node, node->TypeGet());
4996 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg);
4998 genProduceReg(node);
5001 //------------------------------------------------------------------------
5002 // genHWIntrinsicCrcOp:
5004 // Produce code for a GT_HWIntrinsic node with form CrcOp.
5006 // Consumes two scalar operands and produces a scalar result
5008 // This form differs from BinaryOp because the attr depends on the size of op2
5011 // node - the GT_HWIntrinsic node
5016 void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node)
5018 NYI("genHWIntrinsicCrcOp not implemented");
5021 //------------------------------------------------------------------------
5022 // genHWIntrinsicSimdBinaryOp:
5024 // Produce code for a GT_HWIntrinsic node with form SimdBinaryOp.
5026 // Consumes two SIMD operands and produces a SIMD result
5029 // node - the GT_HWIntrinsic node
5034 void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node)
5036 GenTree* op1 = node->gtGetOp1();
5037 GenTree* op2 = node->gtGetOp2();
5038 var_types baseType = node->gtSIMDBaseType;
5039 regNumber targetReg = node->gtRegNum;
5041 assert(targetReg != REG_NA);
5042 var_types targetType = node->TypeGet();
5044 genConsumeOperands(node);
5046 regNumber op1Reg = op1->gtRegNum;
5047 regNumber op2Reg = op2->gtRegNum;
5049 assert(genIsValidFloatReg(op1Reg));
5050 assert(genIsValidFloatReg(op2Reg));
5051 assert(genIsValidFloatReg(targetReg));
5053 instruction ins = getOpForHWIntrinsic(node, baseType);
5054 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5055 insOpts opt = genGetSimdInsOpt(attr, baseType);
5057 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt);
5059 genProduceReg(node);
5062 //------------------------------------------------------------------------
5063 // genHWIntrinsicSwitchTable:
5065 // Generate code for an immediate switch table
5067 // In cases where an instruction only supports const immediate operands, we
5068 // need to generate functionally correct code when the operand is not constant
5070 // This is required by the HW Intrinsic design to handle indirect calls, such as:
5075 // Generated code implements a switch of this form
5080 // ins0; // emitSwCase(0)
5083 // ins1; // emitSwCase(1)
5089 // insLast; // emitSwCase(swMax - 1)
5092 // throw ArgumentOutOfRangeException
5095 // Generated code looks like:
5097 // cmp swReg, #swMax
5098 // b.hs ThrowArgumentOutOfRangeExceptionHelper
5099 // adr tmpReg, labelFirst
5100 // add tmpReg, tmpReg, swReg, LSL #3
5104 // b labelBreakTarget
5106 // b labelBreakTarget
5111 // b labelBreakTarget
5112 // labelBreakTarget:
5116 // swReg - register containing the switch case to execute
5117 // tmpReg - temporary integer register for calculating the switch indirect branch target
5118 // swMax - the number of switch cases. If swReg >= swMax throw SCK_ARG_RNG_EXCPN
5119 // emitSwCase - function like argument taking an immediate value and emitting one instruction
5124 template <typename HWIntrinsicSwitchCaseBody>
5125 void CodeGen::genHWIntrinsicSwitchTable(regNumber swReg,
5128 HWIntrinsicSwitchCaseBody emitSwCase)
5131 assert(swMax <= 256);
5133 assert(genIsValidIntReg(tmpReg));
5134 assert(genIsValidIntReg(swReg));
5136 BasicBlock* labelFirst = genCreateTempLabel();
5137 BasicBlock* labelBreakTarget = genCreateTempLabel();
5139 // Detect and throw out of range exception
5140 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, swReg, swMax);
5142 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
5143 genJumpToThrowHlpBlk(jmpGEU, SCK_ARG_RNG_EXCPN);
5145 // Calculate switch target
5146 labelFirst->bbFlags |= BBF_JMP_TARGET;
5148 // tmpReg = labelFirst
5149 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, labelFirst, tmpReg);
5151 // tmpReg = labelFirst + swReg * 8
5152 getEmitter()->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, tmpReg, swReg, 3, INS_OPTS_LSL);
5155 getEmitter()->emitIns_R(INS_br, EA_PTRSIZE, tmpReg);
5157 genDefineTempLabel(labelFirst);
5158 for (int i = 0; i < swMax; ++i)
5160 unsigned prevInsCount = getEmitter()->emitInsCount;
5164 assert(getEmitter()->emitInsCount == prevInsCount + 1);
5166 inst_JMP(EJ_jmp, labelBreakTarget);
5168 assert(getEmitter()->emitInsCount == prevInsCount + 2);
5170 genDefineTempLabel(labelBreakTarget);
5173 //------------------------------------------------------------------------
5174 // genHWIntrinsicSimdExtractOp:
5176 // Produce code for a GT_HWIntrinsic node with form SimdExtractOp.
5178 // Consumes one SIMD operand and one scalar
5180 // The element index operand is typically a const immediate
5181 // When it is not, a switch table is generated
5183 // See genHWIntrinsicSwitchTable comments
5186 // node - the GT_HWIntrinsic node
5191 void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node)
5193 GenTree* op1 = node->gtGetOp1();
5194 GenTree* op2 = node->gtGetOp2();
5195 var_types simdType = op1->TypeGet();
5196 var_types targetType = node->TypeGet();
5197 regNumber targetReg = node->gtRegNum;
5199 assert(targetReg != REG_NA);
5201 genConsumeOperands(node);
5203 regNumber op1Reg = op1->gtRegNum;
5205 assert(genIsValidFloatReg(op1Reg));
5207 emitAttr baseTypeSize = emitTypeSize(targetType);
5209 int elements = emitTypeSize(simdType) / baseTypeSize;
5211 auto emitSwCase = [&](int element) {
5212 assert(element >= 0);
5213 assert(element < elements);
5215 if (varTypeIsFloating(targetType))
5217 assert(genIsValidFloatReg(targetReg));
5218 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op1Reg, 0, element);
5220 else if (varTypeIsUnsigned(targetType) || (baseTypeSize == EA_8BYTE))
5222 assert(genIsValidIntReg(targetReg));
5223 getEmitter()->emitIns_R_R_I(INS_umov, baseTypeSize, targetReg, op1Reg, element);
5227 assert(genIsValidIntReg(targetReg));
5228 getEmitter()->emitIns_R_R_I(INS_smov, baseTypeSize, targetReg, op1Reg, element);
5232 if (op2->isContainedIntOrIImmed())
5234 int element = (int)op2->AsIntConCommon()->IconValue();
5236 emitSwCase(element);
5240 regNumber elementReg = op2->gtRegNum;
5241 regNumber tmpReg = node->GetSingleTempReg();
5243 genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase);
5246 genProduceReg(node);
5249 //------------------------------------------------------------------------
5250 // genHWIntrinsicSimdInsertOp:
5252 // Produce code for a GT_HWIntrinsic node with form SimdInsertOp.
5254 // Consumes one SIMD operand and two scalars
5256 // The element index operand is typically a const immediate
5257 // When it is not, a switch table is generated
5259 // See genHWIntrinsicSwitchTable comments
5262 // node - the GT_HWIntrinsic node
5267 void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node)
5269 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5270 GenTree* op1 = argList->Current();
5271 GenTree* op2 = argList->Rest()->Current();
5272 GenTree* op3 = argList->Rest()->Rest()->Current();
5273 var_types simdType = op1->TypeGet();
5274 var_types baseType = node->gtSIMDBaseType;
5275 regNumber targetReg = node->gtRegNum;
5277 assert(targetReg != REG_NA);
5279 genConsumeRegs(op1);
5280 genConsumeRegs(op2);
5281 genConsumeRegs(op3);
5283 regNumber op1Reg = op1->gtRegNum;
5285 assert(genIsValidFloatReg(targetReg));
5286 assert(genIsValidFloatReg(op1Reg));
5288 emitAttr baseTypeSize = emitTypeSize(baseType);
5290 int elements = emitTypeSize(simdType) / baseTypeSize;
5292 if (targetReg != op1Reg)
5294 getEmitter()->emitIns_R_R(INS_mov, baseTypeSize, targetReg, op1Reg);
5297 if (op3->isContained())
5299 // Handle vector element to vector element case
5301 // If op3 is contained this is because lowering found an opportunity to contain a Simd.Extract in a Simd.Insert
5303 regNumber op3Reg = op3->gtGetOp1()->gtRegNum;
5305 assert(genIsValidFloatReg(op3Reg));
5307 // op3 containment currently only occurs when
5308 // + op3 is a Simd.Extract() (gtHWIntrinsicId == NI_ARM64_SIMD_GetItem)
5309 // + element & srcLane are immediate constants
5310 assert(op2->isContainedIntOrIImmed());
5311 assert(op3->OperIs(GT_HWIntrinsic));
5312 assert(op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem);
5313 assert(op3->gtGetOp2()->isContainedIntOrIImmed());
5315 int element = (int)op2->AsIntConCommon()->IconValue();
5316 int srcLane = (int)op3->gtGetOp2()->AsIntConCommon()->IconValue();
5318 // Emit mov targetReg[element], op3Reg[srcLane]
5319 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, srcLane);
5323 // Handle scalar to vector element case
5324 // TODO-ARM64-CQ handle containing op3 scalar const where possible
5325 regNumber op3Reg = op3->gtRegNum;
5327 auto emitSwCase = [&](int element) {
5328 assert(element >= 0);
5329 assert(element < elements);
5331 if (varTypeIsFloating(baseType))
5333 assert(genIsValidFloatReg(op3Reg));
5334 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, 0);
5338 assert(genIsValidIntReg(op3Reg));
5339 getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, op3Reg, element);
5343 if (op2->isContainedIntOrIImmed())
5345 int element = (int)op2->AsIntConCommon()->IconValue();
5347 emitSwCase(element);
5351 regNumber elementReg = op2->gtRegNum;
5352 regNumber tmpReg = node->GetSingleTempReg();
5354 genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase);
5358 genProduceReg(node);
5361 //------------------------------------------------------------------------
5362 // genHWIntrinsicSimdSelectOp:
5364 // Produce code for a GT_HWIntrinsic node with form SimdSelectOp.
5366 // Consumes three SIMD operands and produces a SIMD result
5368 // This intrinsic form requires one of the source registers to be the
5369 // destination register. Inserts a INS_mov if this requirement is not met.
5372 // node - the GT_HWIntrinsic node
5377 void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node)
5379 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5380 GenTree* op1 = argList->Current();
5381 GenTree* op2 = argList->Rest()->Current();
5382 GenTree* op3 = argList->Rest()->Rest()->Current();
5383 var_types baseType = node->gtSIMDBaseType;
5384 regNumber targetReg = node->gtRegNum;
5386 assert(targetReg != REG_NA);
5387 var_types targetType = node->TypeGet();
5389 genConsumeRegs(op1);
5390 genConsumeRegs(op2);
5391 genConsumeRegs(op3);
5393 regNumber op1Reg = op1->gtRegNum;
5394 regNumber op2Reg = op2->gtRegNum;
5395 regNumber op3Reg = op3->gtRegNum;
5397 assert(genIsValidFloatReg(op1Reg));
5398 assert(genIsValidFloatReg(op2Reg));
5399 assert(genIsValidFloatReg(op3Reg));
5400 assert(genIsValidFloatReg(targetReg));
5402 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5404 // Arm64 has three bit select forms; each uses three source registers
5405 // One of the sources is also the destination
5406 if (targetReg == op3Reg)
5408 // op3 is target use bit insert if true
5409 // op3 = op3 ^ (op1 & (op2 ^ op3))
5410 getEmitter()->emitIns_R_R_R(INS_bit, attr, op3Reg, op2Reg, op1Reg);
5412 else if (targetReg == op2Reg)
5414 // op2 is target use bit insert if false
5415 // op2 = op2 ^ (~op1 & (op2 ^ op3))
5416 getEmitter()->emitIns_R_R_R(INS_bif, attr, op2Reg, op3Reg, op1Reg);
5420 if (targetReg != op1Reg)
5422 // target is not one of the sources, copy op1 to use bit select form
5423 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5426 // targetReg = op3 ^ (targetReg & (op2 ^ op3))
5427 getEmitter()->emitIns_R_R_R(INS_bsl, attr, targetReg, op2Reg, op3Reg);
5430 genProduceReg(node);
5433 //------------------------------------------------------------------------
5434 // genHWIntrinsicSimdSetAllOp:
5436 // Produce code for a GT_HWIntrinsic node with form SimdSetAllOp.
5438 // Consumes single scalar operand and produces a SIMD result
5441 // node - the GT_HWIntrinsic node
5446 void CodeGen::genHWIntrinsicSimdSetAllOp(GenTreeHWIntrinsic* node)
5448 GenTree* op1 = node->gtGetOp1();
5449 var_types baseType = node->gtSIMDBaseType;
5450 regNumber targetReg = node->gtRegNum;
5452 assert(targetReg != REG_NA);
5453 var_types targetType = node->TypeGet();
5455 genConsumeOperands(node);
5457 regNumber op1Reg = op1->gtRegNum;
5459 assert(genIsValidFloatReg(targetReg));
5460 assert(genIsValidIntReg(op1Reg) || genIsValidFloatReg(op1Reg));
5462 instruction ins = getOpForHWIntrinsic(node, baseType);
5463 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5464 insOpts opt = genGetSimdInsOpt(attr, baseType);
5466 // TODO-ARM64-CQ Support contained immediate cases
5468 if (genIsValidIntReg(op1Reg))
5470 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
5474 getEmitter()->emitIns_R_R_I(ins, attr, targetReg, op1Reg, 0, opt);
5477 genProduceReg(node);
5480 //------------------------------------------------------------------------
5481 // genHWIntrinsicSimdUnaryOp:
5483 // Produce code for a GT_HWIntrinsic node with form SimdUnaryOp.
5485 // Consumes single SIMD operand and produces a SIMD result
5488 // node - the GT_HWIntrinsic node
5493 void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node)
5495 GenTree* op1 = node->gtGetOp1();
5496 var_types baseType = node->gtSIMDBaseType;
5497 regNumber targetReg = node->gtRegNum;
5499 assert(targetReg != REG_NA);
5500 var_types targetType = node->TypeGet();
5502 genConsumeOperands(node);
5504 regNumber op1Reg = op1->gtRegNum;
5506 assert(genIsValidFloatReg(op1Reg));
5507 assert(genIsValidFloatReg(targetReg));
5509 instruction ins = getOpForHWIntrinsic(node, baseType);
5510 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5511 insOpts opt = genGetSimdInsOpt(attr, baseType);
5513 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
5515 genProduceReg(node);
5518 //------------------------------------------------------------------------
5519 // genHWIntrinsicSimdBinaryRMWOp:
5521 // Produce code for a GT_HWIntrinsic node with form SimdBinaryRMWOp.
5523 // Consumes two SIMD operands and produces a SIMD result.
5524 // First operand is both source and destination.
5527 // node - the GT_HWIntrinsic node
5532 void CodeGen::genHWIntrinsicSimdBinaryRMWOp(GenTreeHWIntrinsic* node)
5534 GenTree* op1 = node->gtGetOp1();
5535 GenTree* op2 = node->gtGetOp2();
5536 var_types baseType = node->gtSIMDBaseType;
5537 regNumber targetReg = node->gtRegNum;
5539 assert(targetReg != REG_NA);
5541 genConsumeOperands(node);
5543 regNumber op1Reg = op1->gtRegNum;
5544 regNumber op2Reg = op2->gtRegNum;
5546 assert(genIsValidFloatReg(op1Reg));
5547 assert(genIsValidFloatReg(op2Reg));
5548 assert(genIsValidFloatReg(targetReg));
5550 instruction ins = getOpForHWIntrinsic(node, baseType);
5551 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5552 insOpts opt = genGetSimdInsOpt(attr, baseType);
5554 if (targetReg != op1Reg)
5556 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5558 getEmitter()->emitIns_R_R(ins, attr, targetReg, op2Reg, opt);
5560 genProduceReg(node);
5563 //------------------------------------------------------------------------
5564 // genHWIntrinsicSimdTernaryRMWOp:
5566 // Produce code for a GT_HWIntrinsic node with form SimdTernaryRMWOp
5568 // Consumes three SIMD operands and produces a SIMD result.
5569 // First operand is both source and destination.
5572 // node - the GT_HWIntrinsic node
5577 void CodeGen::genHWIntrinsicSimdTernaryRMWOp(GenTreeHWIntrinsic* node)
5579 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5580 GenTree* op1 = argList->Current();
5581 GenTree* op2 = argList->Rest()->Current();
5582 GenTree* op3 = argList->Rest()->Rest()->Current();
5583 var_types baseType = node->gtSIMDBaseType;
5584 regNumber targetReg = node->gtRegNum;
5586 assert(targetReg != REG_NA);
5587 var_types targetType = node->TypeGet();
5589 genConsumeRegs(op1);
5590 genConsumeRegs(op2);
5591 genConsumeRegs(op3);
5593 regNumber op1Reg = op1->gtRegNum;
5594 regNumber op2Reg = op2->gtRegNum;
5595 regNumber op3Reg = op3->gtRegNum;
5597 assert(genIsValidFloatReg(op1Reg));
5598 assert(genIsValidFloatReg(op2Reg));
5599 assert(genIsValidFloatReg(op3Reg));
5600 assert(genIsValidFloatReg(targetReg));
5601 assert(targetReg != op2Reg);
5602 assert(targetReg != op3Reg);
5604 instruction ins = getOpForHWIntrinsic(node, baseType);
5605 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5607 if (targetReg != op1Reg)
5609 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5612 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg);
5614 genProduceReg(node);
5617 //------------------------------------------------------------------------
5618 // genHWIntrinsicShaHashOp:
5620 // Produce code for a GT_HWIntrinsic node with form Sha1HashOp.
5621 // Used in Arm64 SHA1 Hash operations.
5623 // Consumes three operands and returns a Simd result.
5624 // First Simd operand is both source and destination.
5625 // Second Operand is an unsigned int.
5626 // Third operand is a simd operand.
5629 // node - the GT_HWIntrinsic node
5634 void CodeGen::genHWIntrinsicShaHashOp(GenTreeHWIntrinsic* node)
5636 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5637 GenTree* op1 = argList->Current();
5638 GenTree* op2 = argList->Rest()->Current();
5639 GenTree* op3 = argList->Rest()->Rest()->Current();
5640 var_types baseType = node->gtSIMDBaseType;
5641 regNumber targetReg = node->gtRegNum;
5643 assert(targetReg != REG_NA);
5644 var_types targetType = node->TypeGet();
5646 genConsumeRegs(op1);
5647 genConsumeRegs(op2);
5648 genConsumeRegs(op3);
5650 regNumber op1Reg = op1->gtRegNum;
5651 regNumber op2Reg = op2->gtRegNum;
5652 regNumber op3Reg = op3->gtRegNum;
5654 assert(genIsValidFloatReg(op1Reg));
5655 assert(genIsValidFloatReg(op3Reg));
5656 assert(targetReg != op2Reg);
5657 assert(targetReg != op3Reg);
5659 instruction ins = getOpForHWIntrinsic(node, baseType);
5660 emitAttr attr = (node->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE;
5662 assert(genIsValidIntReg(op2Reg));
5663 regNumber elementReg = op2->gtRegNum;
5664 regNumber tmpReg = node->GetSingleTempReg(RBM_ALLFLOAT);
5666 getEmitter()->emitIns_R_R(INS_fmov, EA_4BYTE, tmpReg, elementReg);
5668 if (targetReg != op1Reg)
5670 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5673 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, tmpReg, op3Reg);
5675 genProduceReg(node);
5678 //------------------------------------------------------------------------
5679 // genHWIntrinsicShaRotateOp:
5681 // Produce code for a GT_HWIntrinsic node with form Sha1RotateOp.
5682 // Used in Arm64 SHA1 Rotate operations.
5684 // Consumes one integer operand and returns unsigned int result.
5687 // node - the GT_HWIntrinsic node
5692 void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node)
5694 GenTree* op1 = node->gtGetOp1();
5695 regNumber targetReg = node->gtRegNum;
5696 emitAttr attr = emitActualTypeSize(node);
5698 assert(targetReg != REG_NA);
5699 var_types targetType = node->TypeGet();
5701 genConsumeOperands(node);
5703 instruction ins = getOpForHWIntrinsic(node, node->TypeGet());
5704 regNumber elementReg = op1->gtRegNum;
5705 regNumber tmpReg = node->GetSingleTempReg(RBM_ALLFLOAT);
5707 getEmitter()->emitIns_R_R(INS_fmov, EA_4BYTE, tmpReg, elementReg);
5708 getEmitter()->emitIns_R_R(ins, EA_4BYTE, tmpReg, tmpReg);
5709 getEmitter()->emitIns_R_R(INS_fmov, attr, targetReg, tmpReg);
5711 genProduceReg(node);
5714 #endif // FEATURE_HW_INTRINSICS
5716 /*****************************************************************************
5717 * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
5718 * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
5719 * disassembler thinks the instructions as the same as we do.
5722 // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
5723 // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
5724 //#define ALL_ARM64_EMITTER_UNIT_TESTS
5727 void CodeGen::genArm64EmitterUnitTests()
5734 if (!compiler->opts.altJit)
5736 // No point doing this in a "real" JIT.
5740 // Mark the "fake" instructions in the output.
5741 printf("*************** In genArm64EmitterUnitTests()\n");
5743 emitter* theEmitter = getEmitter();
5745 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5747 // genDefineTempLabel(genCreateTempLabel());
5748 // to create artificial labels to help separate groups of tests.
5751 // Loads/Stores basic general register
5754 genDefineTempLabel(genCreateTempLabel());
5756 // ldr/str Xt, [reg]
5757 theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9);
5758 theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
5759 theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
5760 theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9);
5761 theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
5762 theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
5764 // ldr/str Wt, [reg]
5765 theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9);
5766 theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
5767 theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
5768 theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9);
5769 theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
5770 theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
5772 theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt
5773 theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt
5774 theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt
5775 theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt
5776 theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt
5778 theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1);
5779 theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1);
5780 theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1);
5781 theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1);
5782 theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1);
5783 theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1);
5784 theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1);
5785 theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1);
5786 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1);
5787 theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1);
5788 theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1);
5789 theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1);
5790 theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1);
5793 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1);
5794 theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1);
5795 theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1);
5798 theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1);
5799 theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2);
5800 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4);
5801 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8);
5803 // pre-/post-indexed (unscaled)
5804 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
5805 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
5806 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
5807 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
5809 // ldar/stlr Rt, [reg]
5810 theEmitter->emitIns_R_R(INS_ldar, EA_8BYTE, REG_R9, REG_R8);
5811 theEmitter->emitIns_R_R(INS_ldar, EA_4BYTE, REG_R7, REG_R10);
5812 theEmitter->emitIns_R_R(INS_ldarb, EA_4BYTE, REG_R5, REG_R11);
5813 theEmitter->emitIns_R_R(INS_ldarh, EA_4BYTE, REG_R5, REG_R12);
5815 theEmitter->emitIns_R_R(INS_stlr, EA_8BYTE, REG_R9, REG_R8);
5816 theEmitter->emitIns_R_R(INS_stlr, EA_4BYTE, REG_R7, REG_R13);
5817 theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14);
5818 theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15);
5821 theEmitter->emitIns_R_R(INS_ldaxr, EA_8BYTE, REG_R9, REG_R8);
5822 theEmitter->emitIns_R_R(INS_ldaxr, EA_4BYTE, REG_R7, REG_R10);
5823 theEmitter->emitIns_R_R(INS_ldaxrb, EA_4BYTE, REG_R5, REG_R11);
5824 theEmitter->emitIns_R_R(INS_ldaxrh, EA_4BYTE, REG_R5, REG_R12);
5827 theEmitter->emitIns_R_R(INS_ldxr, EA_8BYTE, REG_R9, REG_R8);
5828 theEmitter->emitIns_R_R(INS_ldxr, EA_4BYTE, REG_R7, REG_R10);
5829 theEmitter->emitIns_R_R(INS_ldxrb, EA_4BYTE, REG_R5, REG_R11);
5830 theEmitter->emitIns_R_R(INS_ldxrh, EA_4BYTE, REG_R5, REG_R12);
5832 // stxr Ws, Rt, [reg]
5833 theEmitter->emitIns_R_R_R(INS_stxr, EA_8BYTE, REG_R1, REG_R9, REG_R8);
5834 theEmitter->emitIns_R_R_R(INS_stxr, EA_4BYTE, REG_R3, REG_R7, REG_R13);
5835 theEmitter->emitIns_R_R_R(INS_stxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14);
5836 theEmitter->emitIns_R_R_R(INS_stxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15);
5838 // stlxr Ws, Rt, [reg]
5839 theEmitter->emitIns_R_R_R(INS_stlxr, EA_8BYTE, REG_R1, REG_R9, REG_R8);
5840 theEmitter->emitIns_R_R_R(INS_stlxr, EA_4BYTE, REG_R3, REG_R7, REG_R13);
5841 theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14);
5842 theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15);
5844 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5846 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5851 genDefineTempLabel(genCreateTempLabel());
5854 theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9);
5855 theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9);
5858 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0);
5859 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095);
5860 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12);
5861 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12);
5863 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0);
5864 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095);
5865 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12);
5866 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12);
5868 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1);
5869 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff);
5870 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
5871 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
5873 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1);
5874 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff);
5875 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
5876 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
5878 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5880 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5884 genDefineTempLabel(genCreateTempLabel());
5886 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12);
5887 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13);
5888 theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14);
5889 theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15);
5890 theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0);
5891 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1);
5893 theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2);
5894 theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3);
5895 theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4);
5896 theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5);
5897 theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6);
5899 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5901 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5906 genDefineTempLabel(genCreateTempLabel());
5908 // mov reg, imm(i16,hw)
5909 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234);
5910 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000);
5911 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000);
5912 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000);
5913 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234);
5914 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF);
5915 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF);
5916 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF);
5918 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234);
5919 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000);
5920 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234);
5921 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF);
5923 // mov reg, imm(N,r,s)
5924 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
5925 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666);
5926 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000);
5927 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555);
5928 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003);
5929 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707);
5931 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0);
5932 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666);
5933 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000);
5934 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555);
5935 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003);
5936 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707);
5938 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003);
5939 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
5940 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666);
5941 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707);
5942 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000);
5943 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555);
5945 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003);
5946 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0);
5947 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666);
5948 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707);
5949 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000);
5950 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555);
5952 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5954 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5959 genDefineTempLabel(genCreateTempLabel());
5962 theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10);
5965 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10);
5966 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP);
5967 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9);
5969 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11);
5970 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12);
5971 theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13);
5973 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10);
5974 theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11);
5975 theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12);
5976 theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13);
5978 theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10);
5979 theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11);
5980 theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12);
5981 theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt
5982 theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt
5984 theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10);
5985 theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11);
5986 theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13);
5987 theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14);
5989 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5991 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5996 genDefineTempLabel(genCreateTempLabel());
5998 // mov reg, imm(i16,hw)
5999 theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL);
6000 theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
6002 theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
6003 theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL);
6004 theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL);
6006 theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
6007 theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL);
6008 theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL);
6010 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6012 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6017 genDefineTempLabel(genCreateTempLabel());
6019 theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1);
6020 theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18);
6021 theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37);
6022 theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2);
6023 theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53);
6024 theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18);
6026 theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555);
6027 theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666);
6028 theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707);
6029 theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000);
6030 theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003);
6032 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1);
6033 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31);
6034 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32);
6035 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63);
6037 theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1);
6038 theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31);
6040 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6041 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1);
6042 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1);
6043 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6044 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6045 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6046 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6047 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6048 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6050 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6051 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1);
6052 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1);
6053 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6054 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6055 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6056 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6057 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6058 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6060 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6061 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1);
6062 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1);
6063 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6064 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6065 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6066 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6067 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6068 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6070 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6071 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1);
6072 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1);
6073 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6074 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6075 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6076 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6077 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6078 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6080 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6081 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1);
6082 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1);
6083 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6084 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6085 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6086 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6087 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6088 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6090 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6091 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1);
6092 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1);
6093 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6094 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6095 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6096 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6097 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6098 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6100 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6101 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1);
6102 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1);
6103 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6104 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6105 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6106 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6107 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6108 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6110 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6111 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1);
6112 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1);
6113 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6114 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6115 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6116 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6117 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6118 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6120 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6122 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6128 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0);
6129 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0);
6131 // CMP (shifted register)
6132 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
6133 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
6134 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
6136 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
6137 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
6138 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
6140 // TST (shifted register)
6141 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
6142 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
6143 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
6144 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR);
6146 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
6147 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
6148 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
6149 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR);
6151 // CMP (extended register)
6152 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
6153 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH);
6154 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis
6155 // disassembles this "cmp x8,x9",
6156 // which looks like an msdis issue.
6157 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX);
6159 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
6160 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH);
6161 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW);
6162 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX);
6164 // CMP 64-bit (extended register) and left shift
6165 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB);
6166 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
6167 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW);
6168 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX);
6170 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB);
6171 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
6172 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW);
6173 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX);
6175 // CMP 32-bit (extended register) and left shift
6176 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
6177 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
6178 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW);
6180 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
6181 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
6182 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW);
6184 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6186 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6191 genDefineTempLabel(genCreateTempLabel());
6193 theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6194 theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6195 theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6196 theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6197 theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6198 theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6199 theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6200 theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6201 theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6202 theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6203 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6204 theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6205 theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6206 theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6207 theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6208 theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6209 theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6210 theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6211 theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6212 theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6213 theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6214 theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6216 theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6217 theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6218 theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6219 theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6220 theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6221 theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6222 theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6223 theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6224 theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6225 theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6226 theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6227 theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6228 theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6229 theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6230 theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6231 theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6232 theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6233 theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6234 theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6235 theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6236 theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6237 theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6239 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6241 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6246 genDefineTempLabel(genCreateTempLabel());
6248 theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39);
6249 theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23);
6250 theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7);
6252 theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37);
6253 theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21);
6254 theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5);
6256 theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24);
6257 theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16);
6258 theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8);
6260 theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19);
6261 theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13);
6262 theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7);
6264 theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17);
6265 theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11);
6266 theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5);
6268 theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14);
6269 theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9);
6270 theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8);
6272 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6274 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6279 genDefineTempLabel(genCreateTempLabel());
6281 // ADD (extended register)
6282 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
6283 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
6284 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
6285 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
6286 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
6287 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
6288 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
6289 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
6291 // ADD (extended register) and left shift
6292 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
6293 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
6294 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
6295 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
6296 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
6297 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
6298 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
6299 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
6301 // ADD (shifted register)
6302 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6303 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL);
6304 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR);
6305 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR);
6307 // EXTR (extract field from register pair)
6308 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1);
6309 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31);
6310 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32);
6311 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63);
6313 theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1);
6314 theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31);
6316 // SUB (extended register)
6317 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
6318 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
6319 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
6320 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
6321 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
6322 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
6323 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
6324 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
6326 // SUB (extended register) and left shift
6327 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
6328 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
6329 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
6330 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
6331 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
6332 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
6333 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
6334 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
6336 // SUB (shifted register)
6337 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6338 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL);
6339 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR);
6340 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR);
6343 theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6344 theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6345 theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6346 theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6347 theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6348 theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6349 theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6350 theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6352 theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
6353 theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
6354 theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
6355 theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
6356 theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
6357 theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
6358 theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
6359 theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
6361 theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6362 theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6363 theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6364 theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6365 theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6366 theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6367 theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6368 theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6370 theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
6371 theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
6372 theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
6373 theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
6374 theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
6375 theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
6376 theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
6377 theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
6379 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6381 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6383 // R_R_R_I -- load/store pair
6386 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6387 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6388 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
6389 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
6391 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6392 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6393 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
6394 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
6396 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6397 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6398 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
6399 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
6400 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6401 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6402 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6403 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6405 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6406 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6407 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
6408 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
6409 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6410 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6411 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6412 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6414 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6415 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16);
6416 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6417 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6420 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
6421 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
6422 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
6423 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
6424 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX);
6425 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX);
6427 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6429 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6431 // R_R_R_Ext -- load/store shifted/extend
6434 genDefineTempLabel(genCreateTempLabel());
6437 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6438 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6439 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
6440 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6441 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
6442 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6443 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
6444 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6445 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
6446 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6447 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
6449 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6450 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6451 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6452 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6453 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6454 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6455 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6456 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6457 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6458 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6459 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6461 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
6462 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6463 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6464 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6465 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6466 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6467 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6468 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6469 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6470 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6471 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6473 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
6474 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6475 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6476 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6477 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6479 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6480 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6481 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6482 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6483 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6484 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6485 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6486 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6487 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6488 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6489 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6491 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6492 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6493 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6494 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6495 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6496 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6497 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6498 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6499 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6500 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6501 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6502 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6504 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6505 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6506 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6507 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6508 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6509 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6512 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6513 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6514 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
6515 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6516 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
6517 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6518 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
6519 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6520 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
6521 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6522 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
6524 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6525 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6526 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6527 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6528 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6529 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6530 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6531 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6532 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6533 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6534 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6536 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
6537 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6538 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6539 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6540 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6541 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6542 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6543 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6544 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6545 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6546 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6548 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
6549 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6550 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6551 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6552 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6554 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6556 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6561 genDefineTempLabel(genCreateTempLabel());
6563 theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10);
6564 theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11);
6565 theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12);
6566 theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13);
6567 theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14);
6568 theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15);
6570 theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19);
6571 theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20);
6572 theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21);
6573 theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22);
6574 theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23);
6575 theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24);
6577 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6579 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6584 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq
6585 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne
6586 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs
6587 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo
6588 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi
6589 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl
6590 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs
6591 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc
6592 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi
6593 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls
6594 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge
6595 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt
6596 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt
6597 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le
6600 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq
6601 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne
6602 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs
6603 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo
6604 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi
6605 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl
6606 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs
6607 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc
6608 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi
6609 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls
6610 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge
6611 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt
6612 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt
6613 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le
6615 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6617 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6621 // cinc reg, reg, cond
6622 // cinv reg, reg, cond
6623 // cneg reg, reg, cond
6624 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq
6625 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne
6626 theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs
6627 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo
6628 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi
6629 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl
6630 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs
6631 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc
6632 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi
6633 theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls
6634 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge
6635 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt
6636 theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt
6637 theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le
6639 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6641 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6645 // csel reg, reg, reg, cond
6646 // csinc reg, reg, reg, cond
6647 // csinv reg, reg, reg, cond
6648 // csneg reg, reg, reg, cond
6649 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ); // eq
6650 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne
6651 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs
6652 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo
6653 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI); // mi
6654 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl
6655 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs
6656 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc
6657 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI); // hi
6658 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls
6659 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge
6660 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt
6661 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt
6662 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le
6664 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6666 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6670 // ccmp reg1, reg2, nzcv, cond
6671 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
6672 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
6673 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
6674 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
6675 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
6676 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
6677 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
6678 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
6679 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
6680 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
6681 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
6682 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
6683 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
6684 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
6686 // ccmp reg1, imm, nzcv, cond
6687 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
6688 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
6689 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
6690 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
6691 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
6692 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
6693 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
6694 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
6695 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
6696 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
6697 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
6698 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
6699 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
6700 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
6702 // ccmp reg1, imm, nzcv, cond -- encoded as ccmn
6703 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ); // eq
6704 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE); // ne
6705 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS); // hs
6706 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO); // lo
6707 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI); // mi
6708 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL); // pl
6709 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS); // vs
6710 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC); // vc
6711 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI); // hi
6712 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS); // ls
6713 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge
6714 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT); // lt
6715 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT); // gt
6716 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE); // le
6718 // ccmn reg1, reg2, nzcv, cond
6719 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
6720 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
6721 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
6722 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
6723 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
6724 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
6725 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
6726 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
6727 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
6728 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
6729 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
6730 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
6731 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
6732 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
6734 // ccmn reg1, imm, nzcv, cond
6735 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
6736 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
6737 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
6738 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
6739 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
6740 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
6741 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
6742 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
6743 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
6744 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
6745 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
6746 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
6747 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
6748 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
6750 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6752 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6754 // Branch to register
6757 genDefineTempLabel(genCreateTempLabel());
6759 theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
6760 theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
6761 theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
6762 theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);
6764 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6766 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6771 genDefineTempLabel(genCreateTempLabel());
6773 theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0);
6774 theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535);
6776 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD);
6777 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST);
6778 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH);
6780 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD);
6781 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST);
6782 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH);
6784 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD);
6785 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST);
6786 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH);
6788 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD);
6789 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST);
6790 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY);
6792 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6794 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6795 ////////////////////////////////////////////////////////////////////////////////
6797 // SIMD and Floating point
6799 ////////////////////////////////////////////////////////////////////////////////
6802 // Load/Stores vector register
6805 genDefineTempLabel(genCreateTempLabel());
6807 // ldr/str Vt, [reg]
6808 theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9);
6809 theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8);
6810 theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7);
6811 theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6);
6812 theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5);
6813 theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4);
6814 theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3);
6815 theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2);
6816 theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1);
6817 theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0);
6819 // ldr/str Vt, [reg+cns] -- scaled
6820 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1);
6821 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2);
6822 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4);
6823 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8);
6824 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16);
6826 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1);
6827 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2);
6828 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4);
6829 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8);
6830 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16);
6832 // ldr/str Vt, [reg],cns -- post-indexed (unscaled)
6833 // ldr/str Vt, [reg+cns]! -- post-indexed (unscaled)
6834 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6835 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6836 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6837 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6838 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6840 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6841 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6842 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6843 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6844 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6846 theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6847 theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6848 theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6849 theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6850 theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6852 theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6853 theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6854 theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6855 theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6856 theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6858 theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2);
6859 theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3);
6860 theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5);
6861 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9);
6862 theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17);
6864 theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2);
6865 theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3);
6866 theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5);
6867 theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9);
6868 theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17);
6871 theEmitter->emitIns_R_R_R(INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
6872 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0);
6873 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8);
6874 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24);
6876 theEmitter->emitIns_R_R_R(INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP);
6877 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0);
6878 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4);
6879 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12);
6881 theEmitter->emitIns_R_R_R(INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10);
6882 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0);
6883 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16);
6884 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48);
6886 theEmitter->emitIns_R_R_R(INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
6887 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0);
6888 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8);
6889 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16);
6890 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX);
6891 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX);
6892 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX);
6893 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX);
6895 theEmitter->emitIns_R_R_R(INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10);
6896 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0);
6897 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4);
6898 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8);
6899 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX);
6900 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX);
6901 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX);
6902 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX);
6904 theEmitter->emitIns_R_R_R(INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10);
6905 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0);
6906 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16);
6907 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32);
6908 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX);
6909 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX);
6910 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX);
6911 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX);
6914 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9);
6915 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
6916 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3);
6917 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
6918 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3);
6919 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
6920 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3);
6921 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
6922 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3);
6923 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
6924 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
6926 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9);
6927 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
6928 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2);
6929 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
6930 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2);
6931 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
6932 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2);
6933 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
6934 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2);
6935 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
6936 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6938 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9);
6939 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
6940 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4);
6941 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
6942 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4);
6943 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
6944 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4);
6945 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
6946 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4);
6947 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
6948 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4);
6950 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9);
6951 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
6952 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1);
6953 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
6954 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1);
6955 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
6956 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1);
6957 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
6958 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1);
6959 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
6960 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6962 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9);
6963 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW);
6964 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW);
6965 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX);
6966 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX);
6968 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6970 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6972 // R_R mov and aliases for mov
6975 // mov vector to vector
6976 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1);
6977 theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3);
6979 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13);
6980 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15);
6981 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17);
6983 // mov vector to general
6984 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4);
6985 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5);
6986 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6);
6987 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7);
6989 // mov general to vector
6990 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4);
6991 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5);
6992 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6);
6993 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7);
6995 // mov vector[index] to vector
6996 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1);
6997 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3);
6998 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7);
6999 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15);
7001 // mov to general from vector[index]
7002 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1);
7003 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2);
7004 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3);
7005 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4);
7007 // mov to vector[index] from general
7008 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1);
7009 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2);
7010 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6);
7011 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8);
7013 // mov vector[index] to vector[index2]
7014 theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0);
7015 theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1);
7016 theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2);
7017 theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3);
7019 //////////////////////////////////////////////////////////////////////////////////
7022 theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1);
7023 theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3);
7024 theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7);
7025 theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15);
7027 // mov/ins vector element
7028 theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1);
7029 theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2);
7030 theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3);
7031 theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4);
7033 // umov to general from vector element
7034 theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1);
7035 theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2);
7036 theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4);
7037 theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8);
7039 // ins to vector element from general
7040 theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1);
7041 theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3);
7042 theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7);
7043 theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15);
7045 // smov to general from vector element
7046 theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2);
7047 theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4);
7048 theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8);
7050 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7052 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7054 // R_I movi and mvni
7057 // movi imm8 (vector)
7058 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B);
7059 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B);
7060 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B);
7061 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B);
7063 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H);
7064 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL 8
7065 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H);
7066 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL 8
7068 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S);
7069 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S); // LSL 8
7070 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S); // LSL 16
7071 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24
7073 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S); // MSL 8
7074 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16
7076 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S);
7077 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S); // LSL 8
7078 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S); // LSL 16
7079 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24
7081 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S); // MSL 8
7082 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16
7084 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H); // mvni
7085 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni
7087 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S); // mvni
7088 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL 8
7089 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S); // mvni LSL 16
7090 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24
7092 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S); // mvni MSL 8
7093 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16
7095 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D);
7096 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D);
7097 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
7098 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);
7100 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7101 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7102 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7103 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7105 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7106 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7107 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7108 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7110 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S); // MSL 8
7111 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16
7113 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7114 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7115 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7116 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7118 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8
7119 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16
7121 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7123 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7125 // R_I orr/bic vector immediate
7128 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7129 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7130 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7131 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7133 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7134 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7135 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7136 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7138 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7139 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7140 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7141 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7143 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7144 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7145 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7146 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7148 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7149 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7150 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7151 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7153 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7154 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7155 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7156 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7158 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7160 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7162 // R_F cmp/fmov immediate
7165 // fmov imm8 (scalar)
7166 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0);
7167 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0);
7168 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0
7169 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0);
7170 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0);
7171 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value
7172 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31);
7173 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25);
7174 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25);
7175 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value
7176 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125);
7178 // fmov imm8 (vector)
7179 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S);
7180 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S);
7181 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S);
7182 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D);
7183 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S);
7184 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S);
7185 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D);
7186 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S);
7187 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S);
7188 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D);
7189 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S);
7190 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S);
7191 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D);
7194 theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0);
7195 theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0);
7196 theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0);
7197 theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0);
7199 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7201 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7203 // R_R fmov/fcmp/fcvt
7206 // fmov to vector to vector
7207 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2);
7208 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3);
7210 // fmov to vector to general
7211 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4);
7212 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5);
7213 // using the optional conversion specifier
7214 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE);
7215 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE);
7217 // fmov to general to vector
7218 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4);
7219 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5);
7220 // using the optional conversion specifier
7221 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D);
7222 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S);
7225 theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16);
7226 theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17);
7227 theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18);
7228 theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19);
7231 theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double
7232 theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single
7234 theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S);
7235 theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D);
7237 theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H);
7238 theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H);
7240 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7242 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7244 // R_R floating point conversions
7248 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1);
7249 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3);
7251 // fcvtas scalar to general
7252 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7253 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7254 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7255 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7258 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7259 theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7260 theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7263 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1);
7264 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3);
7266 // fcvtau scalar to general
7267 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7268 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7269 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7270 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7273 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7274 theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7275 theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7277 ////////////////////////////////////////////////////////////////////////////////
7280 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1);
7281 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3);
7283 // fcvtms scalar to general
7284 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7285 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7286 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7287 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7290 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7291 theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7292 theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7295 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1);
7296 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3);
7298 // fcvtmu scalar to general
7299 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7300 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7301 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7302 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7305 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7306 theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7307 theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7309 ////////////////////////////////////////////////////////////////////////////////
7312 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1);
7313 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3);
7315 // fcvtns scalar to general
7316 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7317 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7318 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7319 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7322 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7323 theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7324 theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7327 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1);
7328 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3);
7330 // fcvtnu scalar to general
7331 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7332 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7333 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7334 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7337 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7338 theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7339 theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7341 ////////////////////////////////////////////////////////////////////////////////
7344 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1);
7345 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3);
7347 // fcvtps scalar to general
7348 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7349 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7350 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7351 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7354 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7355 theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7356 theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7359 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1);
7360 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3);
7362 // fcvtpu scalar to general
7363 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7364 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7365 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7366 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7369 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7370 theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7371 theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7373 ////////////////////////////////////////////////////////////////////////////////
7376 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1);
7377 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3);
7379 // fcvtzs scalar to general
7380 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7381 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7382 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7383 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7386 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7387 theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7388 theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7391 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1);
7392 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3);
7394 // fcvtzu scalar to general
7395 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7396 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7397 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7398 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7401 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7402 theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7403 theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7405 ////////////////////////////////////////////////////////////////////////////////
7408 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1);
7409 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3);
7411 // scvtf scalar from general
7412 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
7413 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
7414 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
7415 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
7418 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7419 theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7420 theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7423 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1);
7424 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3);
7426 // ucvtf scalar from general
7427 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
7428 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
7429 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
7430 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
7433 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7434 theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7435 theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7437 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7439 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7441 // R_R floating point operations, one dest, one source
7445 theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1);
7446 theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3);
7449 theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7450 theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7451 theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7454 theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1);
7455 theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3);
7458 theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7459 theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7460 theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7463 theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1);
7464 theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3);
7467 theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7468 theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7469 theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7471 genDefineTempLabel(genCreateTempLabel());
7474 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3);
7477 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7478 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7479 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7480 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7481 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7482 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7483 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
7486 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3);
7489 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7490 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7491 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7492 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7493 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7494 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7495 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
7498 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5);
7499 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B);
7500 theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9);
7501 theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B);
7504 theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B);
7505 theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B);
7507 // not vector (the same encoding as mvn)
7508 theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13);
7509 theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B);
7510 theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17);
7511 theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B);
7514 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7515 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7516 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7517 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7518 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7519 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7522 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7523 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7524 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7525 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7526 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7527 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7530 theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7531 theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7534 theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7535 theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7538 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7539 theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7540 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7541 theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7544 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7545 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7546 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7547 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7548 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7549 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7552 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7553 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7554 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7555 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7556 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7557 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7560 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7561 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7562 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7563 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7564 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7565 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7568 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7569 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7570 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7571 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7572 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7573 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7576 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7577 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7578 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7579 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7580 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7581 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7584 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7585 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7586 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7587 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7588 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7589 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7592 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7593 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7594 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7595 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7596 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7597 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7600 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7601 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7602 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7603 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7604 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7605 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7608 theEmitter->emitIns_R_R(INS_faddp, EA_4BYTE, REG_V0, REG_V1);
7609 theEmitter->emitIns_R_R(INS_faddp, EA_8BYTE, REG_V2, REG_V3);
7612 theEmitter->emitIns_R_R(INS_fcvtl, EA_4BYTE, REG_V0, REG_V1);
7615 theEmitter->emitIns_R_R(INS_fcvtl2, EA_4BYTE, REG_V0, REG_V1);
7618 theEmitter->emitIns_R_R(INS_fcvtn, EA_8BYTE, REG_V0, REG_V1);
7621 theEmitter->emitIns_R_R(INS_fcvtn2, EA_8BYTE, REG_V0, REG_V1);
7624 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7626 // R_R floating point round to int, one dest, one source
7630 theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1);
7631 theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3);
7634 theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7635 theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7636 theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7639 theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1);
7640 theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3);
7643 theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7644 theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7645 theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7648 theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1);
7649 theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3);
7652 theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7653 theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7654 theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7657 theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1);
7658 theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3);
7661 theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7662 theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7663 theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7666 theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1);
7667 theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3);
7670 theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7671 theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7672 theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7675 theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1);
7676 theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3);
7679 theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7680 theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7681 theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7684 theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1);
7685 theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3);
7688 theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7689 theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7690 theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7692 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7694 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7696 // R_R_R floating point operations, one dest, two source
7699 genDefineTempLabel(genCreateTempLabel());
7701 theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7702 theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7703 theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7704 theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7705 theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7707 theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7708 theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7709 theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7710 theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7711 theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7713 theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7714 theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7715 theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7716 theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7717 theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7719 theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7720 theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7721 theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7722 theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7723 theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7725 theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7726 theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7727 theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7728 theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7729 theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7732 theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7733 theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7734 theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7735 theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7736 theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7738 genDefineTempLabel(genCreateTempLabel());
7740 theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7741 theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7742 theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7743 theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7744 theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7746 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
7747 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
7748 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
7749 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
7750 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
7752 theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7753 theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7754 theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7755 theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7756 theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7758 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
7759 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
7760 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
7761 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
7762 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
7764 theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7765 theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7767 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7769 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7771 // R_R_I vector operations, one dest, one source reg, one immed
7774 genDefineTempLabel(genCreateTempLabel());
7777 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1);
7778 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14);
7779 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27);
7780 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40);
7781 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63);
7784 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7785 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7786 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7787 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7788 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7789 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7790 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7791 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7794 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1);
7795 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14);
7796 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27);
7797 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40);
7798 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63);
7801 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7802 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7803 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7804 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7805 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7806 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7807 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7808 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7811 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1);
7812 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14);
7813 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27);
7814 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40);
7815 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63);
7818 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7819 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7820 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7821 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7822 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7823 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7824 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7825 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7828 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1);
7829 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14);
7830 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27);
7831 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40);
7832 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63);
7835 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7836 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7837 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7838 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7839 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7840 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7841 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7842 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7845 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1);
7846 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14);
7847 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27);
7848 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40);
7849 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63);
7852 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7853 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7854 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7855 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7856 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7857 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7858 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7859 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7862 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1);
7863 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14);
7864 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27);
7865 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40);
7866 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63);
7869 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7870 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7871 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7872 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7873 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7874 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7875 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7876 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7879 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1);
7880 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14);
7881 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27);
7882 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40);
7883 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63);
7886 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7887 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7888 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7889 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7890 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7891 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7892 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7893 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7896 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1);
7897 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14);
7898 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27);
7899 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40);
7900 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63);
7903 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7904 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7905 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7906 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7907 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7908 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7909 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7910 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7913 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1);
7914 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14);
7915 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27);
7916 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40);
7917 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63);
7920 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7921 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7922 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7923 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7924 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7925 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7926 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7927 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7930 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1);
7931 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14);
7932 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27);
7933 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40);
7934 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63);
7937 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7938 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7939 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7940 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7941 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7942 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7943 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7944 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7947 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1);
7948 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14);
7949 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27);
7950 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40);
7951 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63);
7954 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7955 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7956 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7957 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7958 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7959 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7960 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7961 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7964 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7965 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7966 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7967 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7968 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7969 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7972 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7973 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7974 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7975 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7976 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7977 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7980 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7981 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7982 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7983 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7984 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7985 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7988 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7989 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7990 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7991 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7992 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7993 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7996 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7997 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7998 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
7999 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
8000 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
8001 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
8004 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
8005 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
8006 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
8007 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
8008 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
8009 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
8011 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8013 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8015 // R_R_R vector operations, one dest, two source
8018 genDefineTempLabel(genCreateTempLabel());
8020 // Specifying an Arrangement is optional
8022 theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8);
8023 theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11);
8024 theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14);
8025 theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17);
8026 theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20);
8027 theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23);
8028 theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26);
8029 theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29);
8030 theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0);
8031 theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3);
8033 theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6);
8034 theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9);
8035 theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12);
8036 theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15);
8037 theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18);
8038 theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21);
8040 // Default Arrangement as per the ARM64 manual
8042 theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B);
8043 theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B);
8044 theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B);
8045 theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B);
8046 theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
8047 theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
8048 theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B);
8049 theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B);
8050 theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B);
8051 theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B);
8053 theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
8054 theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B);
8055 theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B);
8056 theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
8057 theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B);
8058 theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B);
8060 genDefineTempLabel(genCreateTempLabel());
8062 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE
8063 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B);
8064 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8065 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S);
8066 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B);
8067 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H);
8068 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S);
8069 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D);
8071 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE
8072 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
8073 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H);
8074 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S);
8075 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
8076 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H);
8077 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S);
8078 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D);
8080 genDefineTempLabel(genCreateTempLabel());
8083 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8084 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8085 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8086 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8087 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8088 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8091 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8092 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8093 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8094 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8095 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8096 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8099 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8100 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8101 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8102 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8103 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8104 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8107 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8108 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8109 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8110 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8111 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8112 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8113 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8115 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8117 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8118 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8119 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8120 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8121 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8122 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8125 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8126 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8127 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8128 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8129 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8130 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8133 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8134 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8135 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8136 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8137 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8138 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8141 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8142 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8143 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8144 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8145 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8146 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8149 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8150 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8151 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8152 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8153 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8154 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8155 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8156 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8159 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8160 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8161 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8162 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8163 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8164 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8165 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8166 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8169 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8170 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8171 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8172 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8173 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8174 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8175 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8176 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8179 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8180 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8181 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8182 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8183 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8184 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8185 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8186 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8189 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8190 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8191 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8192 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8193 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8194 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8195 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8196 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8199 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8200 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8201 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8202 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8203 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8204 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8205 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8206 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8209 theEmitter->emitIns_R_R_R(INS_faddp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8210 theEmitter->emitIns_R_R_R(INS_faddp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8211 theEmitter->emitIns_R_R_R(INS_faddp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8214 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8215 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8216 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8219 theEmitter->emitIns_R_R_R(INS_fcmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8220 theEmitter->emitIns_R_R_R(INS_fcmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8221 theEmitter->emitIns_R_R_R(INS_fcmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8224 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8225 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8226 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8227 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8229 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8231 // R_R_R vector multiply
8234 genDefineTempLabel(genCreateTempLabel());
8236 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8237 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
8238 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8239 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
8240 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
8241 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8243 theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
8244 theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
8246 // 'mul' vector by elem
8247 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8248 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8249 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8250 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8251 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8252 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8253 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8254 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8255 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8256 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8257 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8258 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8260 // 'mla' vector by elem
8261 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8262 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8263 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8264 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8265 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8266 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8267 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8268 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8269 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8270 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8271 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8272 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8274 // 'mls' vector by elem
8275 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8276 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8277 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8278 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8279 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8280 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8281 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8282 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8283 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8284 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8285 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8286 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8288 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8290 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8292 // R_R_R floating point operations, one source/dest, and two source
8295 genDefineTempLabel(genCreateTempLabel());
8297 theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8298 theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
8299 theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
8301 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
8302 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
8303 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
8304 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
8305 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
8307 theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8308 theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
8309 theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
8311 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
8312 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
8313 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
8314 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
8315 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
8317 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8319 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8321 // R_R_R_R floating point operations, one dest, and three source
8324 theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24);
8325 theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25);
8326 theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26);
8327 theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27);
8329 theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28);
8330 theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29);
8331 theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30);
8332 theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31);
8336 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8338 BasicBlock* label = genCreateTempLabel();
8339 genDefineTempLabel(label);
8344 theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0);
8346 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8348 printf("*************** End of genArm64EmitterUnitTests()\n");
8350 #endif // defined(DEBUG)
8352 #endif // _TARGET_ARM64_
8354 #endif // !LEGACY_BACKEND