1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Arm64 Code Generator XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
25 #include "gcinfoencoder.h"
28 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
29 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
33 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
34 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
37 //------------------------------------------------------------------------
38 // genInstrWithConstant: we will typically generate one instruction
40 // ins reg1, reg2, imm
42 // However the imm might not fit as a directly encodable immediate,
43 // when it doesn't fit we generate extra instruction(s) that sets up
44 // the 'regTmp' with the proper immediate value.
47 // ins reg1, reg2, regTmp
51 // attr - operation size and GC attribute
52 // reg1, reg2 - first and second register operands
53 // imm - immediate value (third operand when it fits)
54 // tmpReg - temp register to use when the 'imm' doesn't fit
55 // inUnwindRegion - true if we are in a prolog/epilog region with unwind codes
58 // returns true if the immediate was too large and tmpReg was used and modified.
60 bool CodeGen::genInstrWithConstant(instruction ins,
66 bool inUnwindRegion /* = false */)
68 bool immFitsInIns = false;
69 emitAttr size = EA_SIZE(attr);
71 // reg1 is usually a dest register
72 // reg2 is always source register
73 assert(tmpReg != reg2); // regTmp can not match any source register
82 ins = (ins == INS_add) ? INS_sub : INS_add;
84 immFitsInIns = emitter::emitIns_valid_imm_for_add(imm, size);
90 // reg1 is a source register for store instructions
91 assert(tmpReg != reg1); // regTmp can not match any source register
92 immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
101 immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
105 assert(!"Unexpected instruction in genInstrWithConstant");
111 // generate a single instruction that encodes the immediate directly
112 getEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm);
116 // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit
117 assert(tmpReg != REG_NA);
119 // generate two or more instructions
121 // first we load the immediate into tmpReg
122 instGen_Set_Reg_To_Imm(size, tmpReg, imm);
123 regTracker.rsTrackRegTrash(tmpReg);
125 // when we are in an unwind code region
126 // we record the extra instructions using unwindPadding()
129 compiler->unwindPadding();
132 // generate the instruction using a three register encoding with the immediate in tmpReg
133 getEmitter()->emitIns_R_R_R(ins, attr, reg1, reg2, tmpReg);
138 //------------------------------------------------------------------------
139 // genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
140 // or the epilog. The unwind codes for the generated instructions are produced. An available temporary
141 // register is required to be specified, in case the constant is too large to encode in an "add"
142 // instruction (or "sub" instruction if we choose to use one), such that we need to load the constant
143 // into a register first, before using it.
146 // spDelta - the value to add to SP (can be negative)
147 // tmpReg - an available temporary register
148 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
149 // Otherwise, we don't touch it.
154 void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
156 // Even though INS_add is specified here, the encoder will choose either
157 // an INS_add or an INS_sub and encode the immediate as a positive value
159 if (genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true))
161 if (pTmpRegIsZero != nullptr)
163 *pTmpRegIsZero = false;
167 // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive
169 ssize_t spDeltaAbs = abs(spDelta);
170 unsigned unwindSpDelta = (unsigned)spDeltaAbs;
171 assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned
173 compiler->unwindAllocStack(unwindSpDelta);
176 //------------------------------------------------------------------------
177 // genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
178 // prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
179 // The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
183 // reg1 - First register of pair to save.
184 // reg2 - Second register of pair to save.
185 // spOffset - The offset from SP to store reg1 (must be positive or zero).
186 // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
188 // lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This
189 // allows us to emit the "save_next" unwind code.
190 // tmpReg - An available temporary register. Needed for the case of large frames.
191 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
192 // Otherwise, we don't touch it.
197 void CodeGen::genPrologSaveRegPair(regNumber reg1,
201 bool lastSavedWasPreviousPair,
205 assert(spOffset >= 0);
206 assert(spDelta <= 0);
207 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
208 assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
211 bool needToSaveRegs = true;
214 if ((spOffset == 0) && (spDelta >= -512))
216 // We can use pre-indexed addressing.
217 // stp REG, REG + 1, [SP, #spDelta]!
218 // 64-bit STP offset range: -512 to 504, multiple of 8.
219 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX);
220 compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta);
222 needToSaveRegs = false;
224 else // (spDelta < -512))
226 // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the
229 // generate sub SP,SP,imm
230 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
236 // stp REG, REG + 1, [SP, #offset]
237 // 64-bit STP offset range: -512 to 504, multiple of 8.
238 assert(spOffset <= 504);
239 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
241 if (lastSavedWasPreviousPair)
243 // This works as long as we've only been saving pairs, in order, and we've saved the previous one just
245 compiler->unwindSaveNext();
249 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
254 //------------------------------------------------------------------------
255 // genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
256 // floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
257 // then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
258 // below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
259 // (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the
260 // STR instruction, and that spOffset will be in the legal range for that instruction.
263 // reg1 - Register to save.
264 // spOffset - The offset from SP to store reg1 (must be positive or zero).
265 // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
267 // tmpReg - An available temporary register. Needed for the case of large frames.
268 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
269 // Otherwise, we don't touch it.
274 void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
276 assert(spOffset >= 0);
277 assert(spDelta <= 0);
278 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
282 // generate sub SP,SP,imm
283 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
286 // str REG, [SP, #offset]
287 // 64-bit STR offset range: 0 to 32760, multiple of 8.
288 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
289 compiler->unwindSaveReg(reg1, spOffset);
292 //------------------------------------------------------------------------
293 // genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
294 // The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
295 // The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that
299 // reg1 - First register of pair to restore.
300 // reg2 - Second register of pair to restore.
301 // spOffset - The offset from SP to load reg1 (must be positive or zero).
302 // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
304 // tmpReg - An available temporary register. Needed for the case of large frames.
305 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
306 // Otherwise, we don't touch it.
311 void CodeGen::genEpilogRestoreRegPair(
312 regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
314 assert(spOffset >= 0);
315 assert(spDelta >= 0);
316 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
320 if ((spOffset == 0) && (spDelta <= 504))
322 // Fold the SP change into this instruction.
323 // ldp reg1, reg2, [SP], #spDelta
324 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX);
325 compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta);
327 else // (spDelta > 504))
329 // Can't fold in the SP change; need to use a separate ADD instruction.
331 // ldp reg1, reg2, [SP, #offset]
332 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
333 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
335 // generate add SP,SP,imm
336 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
341 // ldp reg1, reg2, [SP, #offset]
342 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
343 compiler->unwindSaveRegPair(reg1, reg2, spOffset);
347 //------------------------------------------------------------------------
348 // genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
351 // reg1 - Register to restore.
352 // spOffset - The offset from SP to restore reg1 (must be positive or zero).
353 // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
355 // tmpReg - An available temporary register. Needed for the case of large frames.
356 // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
357 // Otherwise, we don't touch it.
362 void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
364 assert(spOffset >= 0);
365 assert(spDelta >= 0);
366 assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
368 // ldr reg1, [SP, #offset]
369 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
370 compiler->unwindSaveReg(reg1, spOffset);
374 // generate add SP,SP,imm
375 genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
379 //------------------------------------------------------------------------
380 // genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
381 // in the function or funclet prolog. The save set does not contain FP, since that is
382 // guaranteed to be saved separately, so we can set up chaining. We can only use the instructions
383 // that are allowed by the unwind codes. Integer registers are stored at lower addresses,
384 // FP/SIMD registers are stored at higher addresses. There are no gaps. The caller ensures that
385 // there is enough space on the frame to store these registers, and that the store instructions
386 // we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to
387 // use. Note that the save set can contain LR if this is a frame without a frame pointer, in
388 // which case LR is saved along with the other callee-saved registers. The caller can tell us
389 // to fold in a stack pointer adjustment, which we will do with the first instruction. Note that
390 // the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
391 // stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
392 // registers, though, we will have an empty aligment slot somewhere. It turns out we will put
393 // it below (at a lower address) the callee-saved registers, as that is currently how we
394 // do frame layout. This means that the first stack offset will be 8 and the stack pointer
395 // adjustment must be done by a SUB, and not folded in to a pre-indexed store.
398 // regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing.
399 // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
400 // if non-zero spDelta, then this is the offset of the first save *after* that
402 // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
408 void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
410 assert(spDelta <= 0);
411 unsigned regsToSaveCount = genCountBits(regsToSaveMask);
412 if (regsToSaveCount == 0)
416 // Currently this is the case for varargs only
417 // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
418 genStackPointerAdjustment(spDelta, REG_NA, nullptr);
423 assert((spDelta % 16) == 0);
424 assert((regsToSaveMask & RBM_FP) == 0); // we never save FP here
425 assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in
428 regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
429 regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
431 int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP.
433 unsigned intRegsToSaveCount = genCountBits(maskSaveRegsInt);
434 unsigned floatRegsToSaveCount = genCountBits(maskSaveRegsFloat);
435 bool isPairSave = false;
437 bool isRegsToSaveCountOdd = ((intRegsToSaveCount + floatRegsToSaveCount) % 2 != 0);
440 // Save the integer registers
442 bool lastSavedWasPair = false;
444 while (maskSaveRegsInt != RBM_NONE)
446 // If this is the first store that needs to change SP (spDelta != 0),
447 // then the offset must be 8 to account for alignment for the odd count
448 // or it must be 0 for the even count.
449 assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) ||
450 (!isRegsToSaveCountOdd && spOffset == 0));
452 isPairSave = (intRegsToSaveCount >= 2);
453 regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsInt);
454 regNumber reg1 = genRegNumFromMask(reg1Mask);
455 maskSaveRegsInt &= ~reg1Mask;
456 intRegsToSaveCount -= 1;
460 // We can use a STP instruction.
462 regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsInt);
463 regNumber reg2 = genRegNumFromMask(reg2Mask);
464 assert((reg2 == REG_NEXT(reg1)) || (reg2 == REG_LR));
465 maskSaveRegsInt &= ~reg2Mask;
466 intRegsToSaveCount -= 1;
468 genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
470 // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
471 // this epilog, to get the codes to match. Turn this off until that is better understood.
472 // lastSavedWasPair = true;
474 spOffset += 2 * REGSIZE_BYTES;
478 // No register pair; we use a STR instruction.
480 genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
482 lastSavedWasPair = false;
483 spOffset += REGSIZE_BYTES;
486 spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
489 assert(intRegsToSaveCount == 0);
491 // Save the floating-point/SIMD registers
493 lastSavedWasPair = false;
495 while (maskSaveRegsFloat != RBM_NONE)
497 // If this is the first store that needs to change SP (spDelta != 0),
498 // then the offset must be 8 to account for alignment for the odd count
499 // or it must be 0 for the even count.
500 assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) ||
501 (!isRegsToSaveCountOdd && spOffset == 0));
503 isPairSave = (floatRegsToSaveCount >= 2);
504 regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsFloat);
505 regNumber reg1 = genRegNumFromMask(reg1Mask);
506 maskSaveRegsFloat &= ~reg1Mask;
507 floatRegsToSaveCount -= 1;
511 // We can use a STP instruction.
513 regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsFloat);
514 regNumber reg2 = genRegNumFromMask(reg2Mask);
515 assert(reg2 == REG_NEXT(reg1));
516 maskSaveRegsFloat &= ~reg2Mask;
517 floatRegsToSaveCount -= 1;
519 genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
521 // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
522 // this epilog, to get the codes to match. Turn this off until that is better understood.
523 // lastSavedWasPair = true;
525 spOffset += 2 * FPSAVE_REGSIZE_BYTES;
529 // No register pair; we use a STR instruction.
531 genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
533 lastSavedWasPair = false;
534 spOffset += FPSAVE_REGSIZE_BYTES;
537 spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
540 assert(floatRegsToSaveCount == 0);
543 //------------------------------------------------------------------------
544 // genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
545 // in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
548 // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing.
549 // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
550 // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
553 // Here's an example restore sequence:
554 // ldp x27, x28, [sp,#96]
555 // ldp x25, x26, [sp,#80]
556 // ldp x23, x24, [sp,#64]
557 // ldp x21, x22, [sp,#48]
558 // ldp x19, x20, [sp,#32]
560 // For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and
561 // the last restore adjusts SP by the specified amount. For example:
562 // ldp x27, x28, [sp,#64]
563 // ldp x25, x26, [sp,#48]
564 // ldp x23, x24, [sp,#32]
565 // ldp x21, x22, [sp,#16]
566 // ldp x19, x20, [sp], #80
568 // Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
569 // generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
574 void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
576 assert(spDelta >= 0);
577 unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
578 if (regsToRestoreCount == 0)
582 // Currently this is the case for varargs only
583 // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
584 genStackPointerAdjustment(spDelta, REG_NA, nullptr);
589 assert((spDelta % 16) == 0);
590 assert((regsToRestoreMask & RBM_FP) == 0); // we never restore FP here
591 assert(regsToRestoreCount <=
592 genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED.
594 regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
595 regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
597 assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
598 int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We
599 // predecrement to find the offset to
602 unsigned floatRegsToRestoreCount = genCountBits(maskRestoreRegsFloat);
603 unsigned intRegsToRestoreCount = genCountBits(maskRestoreRegsInt);
605 bool isPairRestore = false;
606 bool thisIsTheLastRestoreInstruction = false;
608 bool isRegsToRestoreCountOdd = ((floatRegsToRestoreCount + intRegsToRestoreCount) % 2 != 0);
611 // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of
612 // callee-saved registers properly.
614 // Restore the floating-point/SIMD registers
616 while (maskRestoreRegsFloat != RBM_NONE)
618 thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount <= 2) && (maskRestoreRegsInt == RBM_NONE);
619 isPairRestore = (floatRegsToRestoreCount % 2) == 0;
621 // Update stack delta only if it is the last restore (the first save).
622 if (thisIsTheLastRestoreInstruction)
624 assert(stackDelta == 0);
625 stackDelta = spDelta;
628 // Update stack offset.
631 spOffset -= 2 * FPSAVE_REGSIZE_BYTES;
635 spOffset -= FPSAVE_REGSIZE_BYTES;
638 // If this is the last restore (the first save) that needs to change SP (stackDelta != 0),
639 // then the offset must be 8 to account for alignment for the odd count
640 // or it must be 0 for the even count.
641 assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == FPSAVE_REGSIZE_BYTES) ||
642 (!isRegsToRestoreCountOdd && spOffset == 0));
644 regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsFloat);
645 regNumber reg2 = genRegNumFromMask(reg2Mask);
646 maskRestoreRegsFloat &= ~reg2Mask;
647 floatRegsToRestoreCount -= 1;
651 regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat);
652 regNumber reg1 = genRegNumFromMask(reg1Mask);
653 maskRestoreRegsFloat &= ~reg1Mask;
654 floatRegsToRestoreCount -= 1;
656 genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP1, nullptr);
660 genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP1, nullptr);
664 assert(floatRegsToRestoreCount == 0);
666 // Restore the integer registers
668 while (maskRestoreRegsInt != RBM_NONE)
670 thisIsTheLastRestoreInstruction = (intRegsToRestoreCount <= 2);
671 isPairRestore = (intRegsToRestoreCount % 2) == 0;
673 // Update stack delta only if it is the last restore (the first save).
674 if (thisIsTheLastRestoreInstruction)
676 assert(stackDelta == 0);
677 stackDelta = spDelta;
680 // Update stack offset.
681 spOffset -= REGSIZE_BYTES;
684 spOffset -= REGSIZE_BYTES;
687 // If this is the last restore (the first save) that needs to change SP (stackDelta != 0),
688 // then the offset must be 8 to account for alignment for the odd count
689 // or it must be 0 for the even count.
690 assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == REGSIZE_BYTES) ||
691 (!isRegsToRestoreCountOdd && spOffset == 0));
693 regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsInt);
694 regNumber reg2 = genRegNumFromMask(reg2Mask);
695 maskRestoreRegsInt &= ~reg2Mask;
696 intRegsToRestoreCount -= 1;
700 regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt);
701 regNumber reg1 = genRegNumFromMask(reg1Mask);
702 maskRestoreRegsInt &= ~reg1Mask;
703 intRegsToRestoreCount -= 1;
705 genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP1, nullptr);
709 genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP1, nullptr);
713 assert(intRegsToRestoreCount == 0);
717 /*****************************************************************************
719 * Generates code for an EH funclet prolog.
721 * Funclets have the following incoming arguments:
723 * catch: x0 = the exception object that was caught (see GT_CATCH_ARG)
724 * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function
725 * finally/fault: none
727 * Funclets set the following registers on exit:
729 * catch: x0 = the address at which execution should resume (see BBJ_EHCATCHRET)
730 * filter: x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
731 * finally/fault: none
733 * The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size,
734 * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
737 * For #outsz == 0 and #framesz <= 512:
738 * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR
739 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
741 * The funclet frame is thus:
744 * |-----------------------|
747 * +=======================+ <---- Caller's SP
748 * |Callee saved registers | // multiple of 8 bytes
749 * |-----------------------|
750 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
751 * |-----------------------|
752 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
753 * |-----------------------|
754 * | Saved FP, LR | // 16 bytes
755 * |-----------------------| <---- Ambient SP
762 * For #outsz != 0 and #framesz <= 512:
763 * sub sp,sp,#framesz ; establish the frame
764 * stp fp,lr,[sp,#outsz] ; save FP/LR.
765 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
767 * The funclet frame is thus:
770 * |-----------------------|
773 * +=======================+ <---- Caller's SP
774 * |Callee saved registers | // multiple of 8 bytes
775 * |-----------------------|
776 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
777 * |-----------------------|
778 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
779 * |-----------------------|
780 * | Saved FP, LR | // 16 bytes
781 * |-----------------------|
782 * | Outgoing arg space | // multiple of 8 bytes
783 * |-----------------------| <---- Ambient SP
790 * For #framesz > 512:
791 * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168
792 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
793 * sub sp,sp,#outsz ; create space for outgoing argument space
795 * The funclet frame is thus:
798 * |-----------------------|
801 * +=======================+ <---- Caller's SP
802 * |Callee saved registers | // multiple of 8 bytes
803 * |-----------------------|
804 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
805 * |-----------------------|
806 * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
807 * |-----------------------|
808 * | Saved FP, LR | // 16 bytes
809 * |-----------------------|
810 * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
811 * |-----------------------|
812 * | Outgoing arg space | // multiple of 8 bytes
813 * |-----------------------| <---- Ambient SP
819 * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
820 * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
821 * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes:
822 * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a
823 * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
824 * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call).
825 * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym,
826 * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the
827 * outgoing argument space. Both changes to SP might need to add alignment padding.
829 * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
830 * as in the main function.
832 * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
833 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
835 * if (this is a filter funclet)
837 * // x1 on entry to a filter funclet is CallerSP of the containing function:
838 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
839 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
840 * // a funclet. Consider:
844 * // throw new Exception();
845 * // } catch(Exception) {
846 * // throw new Exception(); // The exception thrown here ...
848 * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack
849 * // } filter-handler {
852 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
853 * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
854 * // create a main function PSP for any function with a filter.
856 * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
857 * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP
858 * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer
862 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
863 * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction.
865 * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch.
866 * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP
869 * An example epilog sequence is then:
871 * add sp,sp,#outsz ; if any outgoing argument space
872 * ... ; restore callee-saved registers
873 * ldp x19,x20,[sp,#xxx]
874 * ldp fp,lr,[sp],#framesz
877 * The funclet frame is thus:
880 * |-----------------------|
883 * +=======================+ <---- Caller's SP
884 * |Callee saved registers | // multiple of 8 bytes
885 * |-----------------------|
886 * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
887 * |-----------------------|
888 * | Saved FP, LR | // 16 bytes
889 * |-----------------------|
890 * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
891 * |-----------------------|
892 * | Outgoing arg space | // multiple of 8 bytes
893 * |-----------------------| <---- Ambient SP
901 void CodeGen::genFuncletProlog(BasicBlock* block)
905 printf("*************** In genFuncletProlog()\n");
908 assert(block != NULL);
909 assert(block->bbFlags & BBF_FUNCLET_BEG);
911 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
913 gcInfo.gcResetForBB();
915 compiler->unwindBegProlog();
917 regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
918 regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
920 // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
921 assert((maskSaveRegsInt & RBM_LR) != 0);
922 assert((maskSaveRegsInt & RBM_FP) != 0);
924 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
926 regMaskTP maskArgRegsLiveIn;
929 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
931 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
933 maskArgRegsLiveIn = RBM_NONE;
937 maskArgRegsLiveIn = RBM_R0;
940 int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
942 if (genFuncletInfo.fiFrameType == 1)
944 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
946 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
948 assert(genFuncletInfo.fiSpDelta2 == 0);
949 assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
951 else if (genFuncletInfo.fiFrameType == 2)
953 // fiFrameType==2 constraints:
954 assert(genFuncletInfo.fiSpDelta1 < 0);
955 assert(genFuncletInfo.fiSpDelta1 >= -512);
957 // generate sub SP,SP,imm
958 genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
960 assert(genFuncletInfo.fiSpDelta2 == 0);
962 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
963 genFuncletInfo.fiSP_to_FPLR_save_delta);
964 compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
968 assert(genFuncletInfo.fiFrameType == 3);
969 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
971 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
973 lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet.
975 maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
977 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0);
979 if (genFuncletInfo.fiFrameType == 3)
981 // Note that genFuncletInfo.fiSpDelta2 is always a negative value
982 assert(genFuncletInfo.fiSpDelta2 < 0);
984 // generate sub SP,SP,imm
985 genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
988 // This is the end of the OS-reported prolog for purposes of unwinding
989 compiler->unwindEndProlog();
991 // If there is no PSPSym (CoreRT ABI), we are done.
992 if (compiler->lvaPSPSym == BAD_VAR_NUM)
999 // This is the first block of a filter
1000 // Note that register x1 = CallerSP of the containing function
1001 // X1 is overwritten by the first Load (new callerSP)
1002 // X2 is scratch when we have a large constant offset
1004 // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
1005 genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
1006 genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false);
1007 regTracker.rsTrackRegTrash(REG_R1);
1009 // Store the PSP value (aka CallerSP)
1010 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
1011 genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
1013 // re-establish the frame pointer
1014 genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
1017 else // This is a non-filter funclet
1019 // X3 is scratch, X2 can also become scratch
1021 // compute the CallerSP, given the frame pointer. x3 is scratch.
1022 genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
1024 regTracker.rsTrackRegTrash(REG_R3);
1026 genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
1027 genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
1031 /*****************************************************************************
1033 * Generates code for an EH funclet epilog.
1036 void CodeGen::genFuncletEpilog()
1040 printf("*************** In genFuncletEpilog()\n");
1043 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
1045 bool unwindStarted = false;
1049 // We can delay this until we know we'll generate an unwindable instruction, if necessary.
1050 compiler->unwindBegEpilog();
1051 unwindStarted = true;
1054 regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
1055 regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
1057 // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
1058 assert((maskRestoreRegsInt & RBM_LR) != 0);
1059 assert((maskRestoreRegsInt & RBM_FP) != 0);
1061 maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
1063 int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
1065 if (genFuncletInfo.fiFrameType == 3)
1067 // Note that genFuncletInfo.fiSpDelta2 is always a negative value
1068 assert(genFuncletInfo.fiSpDelta2 < 0);
1070 // generate add SP,SP,imm
1071 genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
1073 lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2;
1076 regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
1077 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0);
1079 if (genFuncletInfo.fiFrameType == 1)
1081 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
1082 INS_OPTS_POST_INDEX);
1083 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
1085 assert(genFuncletInfo.fiSpDelta2 == 0);
1086 assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
1088 else if (genFuncletInfo.fiFrameType == 2)
1090 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
1091 genFuncletInfo.fiSP_to_FPLR_save_delta);
1092 compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
1094 // fiFrameType==2 constraints:
1095 assert(genFuncletInfo.fiSpDelta1 < 0);
1096 assert(genFuncletInfo.fiSpDelta1 >= -512);
1098 // generate add SP,SP,imm
1099 genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
1101 assert(genFuncletInfo.fiSpDelta2 == 0);
1105 assert(genFuncletInfo.fiFrameType == 3);
1107 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
1108 INS_OPTS_POST_INDEX);
1109 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
1112 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
1113 compiler->unwindReturn(REG_LR);
1115 compiler->unwindEndEpilog();
1118 /*****************************************************************************
1120 * Capture the information used to generate the funclet prologs and epilogs.
1121 * Note that all funclet prologs are identical, and all funclet epilogs are
1122 * identical (per type: filters are identical, and non-filters are identical).
1123 * Thus, we compute the data used for these just once.
1125 * See genFuncletProlog() for more information about the prolog/epilog sequences.
1128 void CodeGen::genCaptureFuncletPrologEpilogInfo()
1130 if (!compiler->ehAnyFunclets())
1133 assert(isFramePointerUsed());
1134 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
1137 genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
1139 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
1140 assert((rsMaskSaveRegs & RBM_LR) != 0);
1141 assert((rsMaskSaveRegs & RBM_FP) != 0);
1143 unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
1145 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
1146 unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize;
1147 if (compiler->info.compIsVarArgs)
1149 // For varargs we always save all of the integer register arguments
1150 // so that they are contiguous with the incoming stack arguments.
1151 saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES;
1153 unsigned saveRegsPlusPSPSizeAligned = (unsigned)roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
1155 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
1156 unsigned outgoingArgSpaceAligned = (unsigned)roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
1158 unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
1159 assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
1161 int SP_to_FPLR_save_delta;
1162 int SP_to_PSP_slot_delta;
1163 int CallerSP_to_PSP_slot_delta;
1165 if (maxFuncletFrameSizeAligned <= 512)
1167 unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
1168 unsigned funcletFrameSizeAligned = (unsigned)roundUp(funcletFrameSize, STACK_ALIGN);
1169 assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
1171 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
1172 assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
1174 SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
1175 SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
1176 CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
1178 if (compiler->lvaOutgoingArgSpaceSize == 0)
1180 genFuncletInfo.fiFrameType = 1;
1184 genFuncletInfo.fiFrameType = 2;
1186 genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
1187 genFuncletInfo.fiSpDelta2 = 0;
1189 assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned);
1193 unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
1194 assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
1196 SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
1197 SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
1198 CallerSP_to_PSP_slot_delta =
1199 -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
1201 genFuncletInfo.fiFrameType = 3;
1202 genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned;
1203 genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;
1205 assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
1208 /* Now save it for future use */
1210 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
1211 genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta;
1212 genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta;
1213 genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES;
1214 genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
1220 printf("Funclet prolog / epilog info\n");
1221 printf(" Save regs: ");
1222 dspRegMask(genFuncletInfo.fiSaveRegs);
1224 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
1225 printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta);
1226 printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta);
1227 printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta);
1228 printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta);
1229 printf(" Frame type: %d\n", genFuncletInfo.fiFrameType);
1230 printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
1231 printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2);
1233 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1235 if (CallerSP_to_PSP_slot_delta !=
1236 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
1238 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
1239 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
1244 assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0);
1245 assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0);
1246 assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0);
1247 assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0);
1249 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1251 assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta ==
1252 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
1259 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1260 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1262 XX End Prolog / Epilog XX
1264 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1265 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1268 BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
1270 // Generate a call to the finally, like this:
1271 // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used
1272 // bl finally-funclet
1273 // b finally-return // Only for non-retless finally calls
1274 // The 'b' can be a NOP if we're going to the next block.
1276 if (compiler->lvaPSPSym != BAD_VAR_NUM)
1278 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0);
1282 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE);
1284 getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest);
1286 if (block->bbFlags & BBF_RETLESS_CALL)
1288 // We have a retless call, and the last instruction generated was a call.
1289 // If the next block is in a different EH region (or is the end of the code
1290 // block), then we need to generate a breakpoint here (since it will never
1291 // get executed) to get proper unwind behavior.
1293 if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
1295 instGen(INS_bkpt); // This should never get executed
1300 // Because of the way the flowgraph is connected, the liveness info for this one instruction
1301 // after the call is not (can not be) correct in cases where a variable has a last use in the
1302 // handler. So turn off GC reporting for this single instruction.
1303 getEmitter()->emitDisableGC();
1305 // Now go to where the finally funclet needs to return to.
1306 if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
1309 // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly
1310 // to the next instruction? This would depend on stack walking from within the finally
1311 // handler working without this instruction being in this special EH region.
1316 inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
1319 getEmitter()->emitEnableGC();
1322 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
1323 // jump target using bbJumpDest - that is already used to point
1324 // to the finally block. So just skip past the BBJ_ALWAYS unless the
1325 // block is RETLESS.
1326 if (!(block->bbFlags & BBF_RETLESS_CALL))
1328 assert(block->isBBCallAlwaysPair());
1329 block = block->bbNext;
1334 void CodeGen::genEHCatchRet(BasicBlock* block)
1336 // For long address (default): `adrp + add` will be emitted.
1337 // For short address (proven later): `adr` will be emitted.
1338 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
1341 // move an immediate value into an integer register
1343 void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
1345 // reg cannot be a FP register
1346 assert(!genIsValidFloatReg(reg));
1347 if (!compiler->opts.compReloc)
1349 size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
1352 if (EA_IS_RELOC(size))
1354 // This emits a pair of adrp/add (two instructions) with fix-ups.
1355 getEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm);
1359 instGen_Set_Reg_To_Zero(size, reg, flags);
1363 if (emitter::emitIns_valid_imm_for_mov(imm, size))
1365 getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
1369 // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword
1370 // There are three forms
1371 // movk which loads into any halfword preserving the remaining halfwords
1372 // movz which loads into any halfword zeroing the remaining halfwords
1373 // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting the register
1374 // In some cases it is preferable to use movn, because it has the side effect of filling the other halfwords
1377 // Determine whether movn or movz will require the fewest instructions to populate the immediate
1380 for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16)
1382 if (uint16_t(imm >> i) == 0xffff)
1383 ++preferMovn; // a single movk 0xffff could be skipped if movn was used
1384 else if (uint16_t(imm >> i) == 0x0000)
1385 --preferMovn; // a single movk 0 could be skipped if movz was used
1388 // Select the first instruction. Any additional instruction will use movk
1389 instruction ins = (preferMovn > 0) ? INS_movn : INS_movz;
1391 // Initial movz or movn will fill the remaining bytes with the skipVal
1392 // This can allow skipping filling a halfword
1393 uint16_t skipVal = (preferMovn > 0) ? 0xffff : 0;
1395 unsigned bits = (size == EA_8BYTE) ? 64 : 32;
1397 // Iterate over imm examining 16 bits at a time
1398 for (unsigned i = 0; i < bits; i += 16)
1400 uint16_t imm16 = uint16_t(imm >> i);
1402 if (imm16 != skipVal)
1404 if (ins == INS_movn)
1406 // For the movn case, we need to bitwise invert the immediate. This is because
1407 // (movn x0, ~imm16) === (movz x0, imm16; or x0, x0, #0xffff`ffff`ffff`0000)
1411 getEmitter()->emitIns_R_I_I(ins, size, reg, imm16, i, INS_OPTS_LSL);
1413 // Once the initial movz/movn is emitted the remaining instructions will all use movk
1418 // We must emit a movn or movz or we have not done anything
1419 // The cases which hit this assert should be (emitIns_valid_imm_for_mov() == true) and
1420 // should not be in this else condition
1421 assert(ins == INS_movk);
1423 // The caller may have requested that the flags be set on this mov (rarely/never)
1424 if (flags == INS_FLAGS_SET)
1426 getEmitter()->emitIns_R_I(INS_tst, size, reg, 0);
1430 regTracker.rsTrackRegIntCns(reg, imm);
1433 /***********************************************************************************
1435 * Generate code to set a register 'targetReg' of type 'targetType' to the constant
1436 * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
1437 * genProduceReg() on the target register.
1439 void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
1441 switch (tree->gtOper)
1445 // relocatable values tend to come down as a CNS_INT of native int type
1446 // so the line between these two opcodes is kind of blurry
1447 GenTreeIntConCommon* con = tree->AsIntConCommon();
1448 ssize_t cnsVal = con->IconValue();
1450 if (con->ImmedValNeedsReloc(compiler))
1452 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
1453 regTracker.rsTrackRegTrash(targetReg);
1457 genSetRegToIcon(targetReg, cnsVal, targetType);
1464 emitter* emit = getEmitter();
1465 emitAttr size = emitActualTypeSize(tree);
1466 double constValue = tree->AsDblCon()->gtDconVal;
1468 // Make sure we use "movi reg, 0x00" only for positive zero (0.0) and not for negative zero (-0.0)
1469 if (*(__int64*)&constValue == 0)
1471 // A faster/smaller way to generate 0.0
1472 // We will just zero out the entire vector register for both float and double
1473 emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
1475 else if (emitter::emitIns_valid_imm_for_fmov(constValue))
1477 // We can load the FP constant using the fmov FP-immediate for this constValue
1478 emit->emitIns_R_F(INS_fmov, size, targetReg, constValue);
1482 // Get a temp integer register to compute long address.
1483 regNumber addrReg = tree->GetSingleTempReg();
1485 // We must load the FP constant from the constant pool
1486 // Emit a data section constant for the float or double constant.
1487 CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size);
1488 // For long address (default): `adrp + ldr + fmov` will be emitted.
1489 // For short address (proven later), `ldr` will be emitted.
1490 emit->emitIns_R_C(INS_ldr, size, targetReg, addrReg, hnd, 0);
1500 // Generate code to get the high N bits of a N*N=2N bit multiplication result
1501 void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
1503 assert(!treeNode->gtOverflowEx());
1505 genConsumeOperands(treeNode);
1507 regNumber targetReg = treeNode->gtRegNum;
1508 var_types targetType = treeNode->TypeGet();
1509 emitter* emit = getEmitter();
1510 emitAttr attr = emitActualTypeSize(treeNode);
1511 unsigned isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED);
1513 GenTree* op1 = treeNode->gtGetOp1();
1514 GenTree* op2 = treeNode->gtGetOp2();
1516 assert(!varTypeIsFloating(targetType));
1518 // The arithmetic node must be sitting in a register (since it's not contained)
1519 assert(targetReg != REG_NA);
1521 if (EA_SIZE(attr) == EA_8BYTE)
1523 instruction ins = isUnsigned ? INS_umulh : INS_smulh;
1525 regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2);
1527 assert(r == targetReg);
1531 assert(EA_SIZE(attr) == EA_4BYTE);
1533 instruction ins = isUnsigned ? INS_umull : INS_smull;
1535 regNumber r = emit->emitInsTernary(ins, EA_4BYTE, treeNode, op1, op2);
1537 emit->emitIns_R_R_I(isUnsigned ? INS_lsr : INS_asr, EA_8BYTE, targetReg, targetReg, 32);
1540 genProduceReg(treeNode);
1543 // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
1544 // This method is expected to have called genConsumeOperands() before calling it.
1545 void CodeGen::genCodeForBinary(GenTree* treeNode)
1547 const genTreeOps oper = treeNode->OperGet();
1548 regNumber targetReg = treeNode->gtRegNum;
1549 var_types targetType = treeNode->TypeGet();
1550 emitter* emit = getEmitter();
1552 assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND ||
1553 oper == GT_OR || oper == GT_XOR);
1555 GenTree* op1 = treeNode->gtGetOp1();
1556 GenTree* op2 = treeNode->gtGetOp2();
1557 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
1559 if ((treeNode->gtFlags & GTF_SET_FLAGS) != 0)
1573 noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set");
1577 // The arithmetic node must be sitting in a register (since it's not contained)
1578 assert(targetReg != REG_NA);
1580 regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
1581 assert(r == targetReg);
1583 genProduceReg(treeNode);
1586 //------------------------------------------------------------------------
1587 // genCodeForLclVar: Produce code for a GT_LCL_VAR node.
1590 // tree - the GT_LCL_VAR node
1592 void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
1594 var_types targetType = tree->TypeGet();
1595 emitter* emit = getEmitter();
1597 unsigned varNum = tree->gtLclNum;
1598 assert(varNum < compiler->lvaCount);
1599 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1600 bool isRegCandidate = varDsc->lvIsRegCandidate();
1602 // lcl_vars are not defs
1603 assert((tree->gtFlags & GTF_VAR_DEF) == 0);
1605 // If this is a register candidate that has been spilled, genConsumeReg() will
1606 // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
1608 if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED))
1610 // targetType must be a normal scalar type and not a TYP_STRUCT
1611 assert(targetType != TYP_STRUCT);
1613 instruction ins = ins_Load(targetType);
1614 emitAttr attr = emitTypeSize(targetType);
1616 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1618 emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0);
1619 genProduceReg(tree);
1623 //------------------------------------------------------------------------
1624 // genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node.
1627 // tree - the GT_STORE_LCL_FLD node
1629 void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
1631 var_types targetType = tree->TypeGet();
1632 regNumber targetReg = tree->gtRegNum;
1633 emitter* emit = getEmitter();
1634 noway_assert(targetType != TYP_STRUCT);
1637 // storing of TYP_SIMD12 (i.e. Vector3) field
1638 if (tree->TypeGet() == TYP_SIMD12)
1640 genStoreLclTypeSIMD12(tree);
1643 #endif // FEATURE_SIMD
1645 // record the offset
1646 unsigned offset = tree->gtLclOffs;
1648 // We must have a stack store with GT_STORE_LCL_FLD
1649 noway_assert(targetReg == REG_NA);
1651 unsigned varNum = tree->gtLclNum;
1652 assert(varNum < compiler->lvaCount);
1653 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1655 // Ensure that lclVar nodes are typed correctly.
1656 assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
1658 GenTree* data = tree->gtOp1;
1659 genConsumeRegs(data);
1661 regNumber dataReg = REG_NA;
1662 if (data->isContainedIntOrIImmed())
1664 assert(data->IsIntegralConst(0));
1669 assert(!data->isContained());
1670 dataReg = data->gtRegNum;
1672 assert(dataReg != REG_NA);
1674 instruction ins = ins_Store(targetType);
1676 emitAttr attr = emitTypeSize(targetType);
1678 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1680 emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
1682 genUpdateLife(tree);
1684 varDsc->lvRegNum = REG_STK;
1687 //------------------------------------------------------------------------
1688 // genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node.
1691 // tree - the GT_STORE_LCL_VAR node
1693 void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
1695 var_types targetType = tree->TypeGet();
1696 regNumber targetReg = tree->gtRegNum;
1697 emitter* emit = getEmitter();
1699 unsigned varNum = tree->gtLclNum;
1700 assert(varNum < compiler->lvaCount);
1701 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1703 // Ensure that lclVar nodes are typed correctly.
1704 assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
1706 GenTree* data = tree->gtOp1;
1708 // var = call, where call returns a multi-reg return value
1709 // case is handled separately.
1710 if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
1712 genMultiRegCallStoreToLocal(tree);
1717 // storing of TYP_SIMD12 (i.e. Vector3) field
1718 if (tree->TypeGet() == TYP_SIMD12)
1720 genStoreLclTypeSIMD12(tree);
1723 #endif // FEATURE_SIMD
1725 genConsumeRegs(data);
1727 regNumber dataReg = REG_NA;
1728 if (data->isContainedIntOrIImmed())
1730 // This is only possible for a zero-init.
1731 assert(data->IsIntegralConst(0));
1733 if (varTypeIsSIMD(targetType))
1735 assert(targetReg != REG_NA);
1736 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
1737 genProduceReg(tree);
1745 assert(!data->isContained());
1746 dataReg = data->gtRegNum;
1748 assert(dataReg != REG_NA);
1750 if (targetReg == REG_NA) // store into stack based LclVar
1752 inst_set_SV_var(tree);
1754 instruction ins = ins_Store(targetType);
1755 emitAttr attr = emitTypeSize(targetType);
1757 attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr);
1759 emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
1761 genUpdateLife(tree);
1763 varDsc->lvRegNum = REG_STK;
1765 else // store into register (i.e move into register)
1767 if (dataReg != targetReg)
1769 // Assign into targetReg when dataReg (from op1) is not the same register
1770 inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
1772 genProduceReg(tree);
1777 //------------------------------------------------------------------------
1778 // genReturn: Generates code for return statement.
1779 // In case of struct return, delegates to the genStructReturn method.
1782 // treeNode - The GT_RETURN or GT_RETFILT tree node.
1787 void CodeGen::genReturn(GenTree* treeNode)
1789 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1790 GenTree* op1 = treeNode->gtGetOp1();
1791 var_types targetType = treeNode->TypeGet();
1793 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
1794 // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
1795 // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
1796 assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
1799 if (targetType == TYP_VOID)
1801 assert(op1 == nullptr);
1805 if (isStructReturn(treeNode))
1807 genStructReturn(treeNode);
1809 else if (targetType != TYP_VOID)
1811 assert(op1 != nullptr);
1812 noway_assert(op1->gtRegNum != REG_NA);
1816 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
1818 bool movRequired = (op1->gtRegNum != retReg);
1822 if (op1->OperGet() == GT_LCL_VAR)
1824 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
1825 bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
1826 if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0))
1828 // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR
1830 unsigned lclNum = lcl->gtLclNum;
1831 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1832 var_types op1Type = genActualType(op1->TypeGet());
1833 var_types lclType = genActualType(varDsc->TypeGet());
1835 if (genTypeSize(op1Type) < genTypeSize(lclType))
1845 emitAttr attr = emitActualTypeSize(targetType);
1846 getEmitter()->emitIns_R_R(INS_mov, attr, retReg, op1->gtRegNum);
1850 #ifdef PROFILING_SUPPORTED
1851 // There will be a single return block while generating profiler ELT callbacks.
1853 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
1854 // In flowgraph and other places assert that the last node of a block marked as
1855 // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
1856 // maintain such an invariant irrespective of whether profiler hook needed or not.
1857 // Also, there is not much to be gained by materializing it as an explicit node.
1858 if (compiler->compCurBB == compiler->genReturnBB)
1860 genProfilingLeaveCallback();
1865 /***********************************************************************************************
1866 * Generate code for localloc
1868 void CodeGen::genLclHeap(GenTree* tree)
1870 assert(tree->OperGet() == GT_LCLHEAP);
1872 GenTree* size = tree->gtOp.gtOp1;
1873 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
1875 regNumber targetReg = tree->gtRegNum;
1876 regNumber regCnt = REG_NA;
1877 regNumber pspSymReg = REG_NA;
1878 var_types type = genActualType(size->gtType);
1879 emitAttr easz = emitTypeSize(type);
1880 BasicBlock* endLabel = nullptr;
1881 BasicBlock* loop = nullptr;
1882 unsigned stackAdjustment = 0;
1886 if (compiler->opts.compStackCheckOnRet)
1888 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
1889 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
1890 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
1891 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
1893 BasicBlock* esp_check = genCreateTempLabel();
1894 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1895 inst_JMP(jmpEqual, esp_check);
1896 getEmitter()->emitIns(INS_bkpt);
1897 genDefineTempLabel(esp_check);
1901 noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
1902 noway_assert(genStackLevel == 0); // Can't have anything on the stack
1904 // Whether method has PSPSym.
1906 #if FEATURE_EH_FUNCLETS
1907 hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
1912 // compute the amount of memory to allocate to properly STACK_ALIGN.
1914 if (size->IsCnsIntOrI())
1916 // If size is a constant, then it must be contained.
1917 assert(size->isContained());
1919 // If amount is zero then return null in targetReg
1920 amount = size->gtIntCon.gtIconVal;
1923 instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
1927 // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN
1928 amount = AlignUp(amount, STACK_ALIGN);
1932 // If 0 bail out by returning null in targetReg
1933 genConsumeRegAndCopy(size, targetReg);
1934 endLabel = genCreateTempLabel();
1935 getEmitter()->emitIns_R_R(INS_tst, easz, targetReg, targetReg);
1936 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1937 inst_JMP(jmpEqual, endLabel);
1939 // Compute the size of the block to allocate and perform alignment.
1940 // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt,
1941 // since we don't need any internal registers.
1942 if (!hasPspSym && compiler->info.compInitMem)
1944 assert(tree->AvailableTempRegCount() == 0);
1949 regCnt = tree->ExtractTempReg();
1950 if (regCnt != targetReg)
1952 inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
1956 // Align to STACK_ALIGN
1957 // regCnt will be the total number of bytes to localloc
1958 inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
1959 inst_RV_IV(INS_and, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
1962 stackAdjustment = 0;
1963 #if FEATURE_EH_FUNCLETS
1964 // If we have PSPsym, then need to re-locate it after localloc.
1967 stackAdjustment += STACK_ALIGN;
1969 // Save a copy of PSPSym
1970 pspSymReg = tree->ExtractTempReg();
1971 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
1975 #if FEATURE_FIXED_OUT_ARGS
1976 // If we have an outgoing arg area then we must adjust the SP by popping off the
1977 // outgoing arg area. We will restore it right before we return from this method.
1979 // Localloc is supposed to return stack space that is STACK_ALIGN'ed. The following
1980 // are the cases that needs to be handled:
1981 // i) Method has PSPSym + out-going arg area.
1982 // It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs).
1983 // Therefore, we will pop-off RSP upto out-going arg area before locallocating.
1984 // We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area.
1985 // ii) Method has no PSPSym but out-going arg area.
1986 // Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed.
1987 // iii) Method has PSPSym but no out-going arg area.
1988 // Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded.
1989 // iv) Method has neither PSPSym nor out-going arg area.
1990 // Nothing needs to popped off from stack nor relocated.
1991 if (compiler->lvaOutgoingArgSpaceSize > 0)
1993 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
1995 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
1996 stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
2000 if (size->IsCnsIntOrI())
2002 // We should reach here only for non-zero, constant size allocations.
2005 // For small allocations we will generate up to four stp instructions
2006 size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT);
2007 if (cntStackAlignedWidthItems <= 4)
2009 while (cntStackAlignedWidthItems != 0)
2011 // We can use pre-indexed addressing.
2012 // stp ZR, ZR, [SP, #-16]!
2013 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
2014 cntStackAlignedWidthItems -= 1;
2019 else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <=
2021 // Since the size is a page or less, simply adjust the SP value
2022 // The SP might already be in the guard page, must touch it BEFORE
2023 // the alloc, not after.
2025 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SP, 0);
2027 inst_RV_IV(INS_sub, REG_SP, amount, EA_PTRSIZE);
2032 // else, "mov regCnt, amount"
2033 // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt.
2034 // Since size is a constant, regCnt is not yet initialized.
2035 assert(regCnt == REG_NA);
2036 if (!hasPspSym && compiler->info.compInitMem)
2038 assert(tree->AvailableTempRegCount() == 0);
2043 regCnt = tree->ExtractTempReg();
2045 genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
2048 if (compiler->info.compInitMem)
2050 BasicBlock* loop = genCreateTempLabel();
2052 // At this point 'regCnt' is set to the total number of bytes to locAlloc.
2053 // Since we have to zero out the allocated memory AND ensure that RSP is always valid
2054 // by tickling the pages, we will just push 0's on the stack.
2056 // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
2057 // and localloc size is a multiple of STACK_ALIGN.
2060 genDefineTempLabel(loop);
2062 // We can use pre-indexed addressing.
2063 // stp ZR, ZR, [SP, #-16]!
2064 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
2066 // If not done, loop
2067 // Note that regCnt is the number of bytes to stack allocate.
2068 // Therefore we need to subtract 16 from regcnt here.
2069 assert(genIsValidIntReg(regCnt));
2070 inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type));
2071 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
2072 inst_JMP(jmpNotEqual, loop);
2076 // At this point 'regCnt' is set to the total number of bytes to locAlloc.
2078 // We don't need to zero out the allocated memory. However, we do have
2079 // to tickle the pages to ensure that SP is always valid and is
2080 // in sync with the "stack guard page". Note that in the worst
2081 // case SP is on the last byte of the guard page. Thus you must
2082 // touch SP-0 first not SP-0x1000.
2084 // Another subtlety is that you don't want SP to be exactly on the
2085 // boundary of the guard page because PUSH is predecrement, thus
2086 // call setup would not touch the guard page but just beyond it
2088 // Note that we go through a few hoops so that SP never points to
2089 // illegal pages at any time during the tickling process
2091 // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
2092 // bvc Loop // result is smaller than orignial SP (no wrap around)
2093 // mov regCnt, #0 // Overflow, pick lowest possible value
2096 // ldr wzr, [SP + 0] // tickle the page - read from the page
2097 // sub regTmp, SP, PAGE_SIZE // decrement SP by eeGetPageSize()
2098 // cmp regTmp, regCnt
2108 regNumber regTmp = tree->GetSingleTempReg();
2110 BasicBlock* loop = genCreateTempLabel();
2111 BasicBlock* done = genCreateTempLabel();
2113 // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
2114 getEmitter()->emitIns_R_R_R(INS_subs, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
2116 inst_JMP(EJ_vc, loop); // branch if the V flag is not set
2118 // Overflow, set regCnt to lowest possible value
2119 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
2121 genDefineTempLabel(loop);
2123 // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
2124 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, 0);
2126 // decrement SP by eeGetPageSize()
2127 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, compiler->eeGetPageSize());
2129 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt);
2130 emitJumpKind jmpLTU = genJumpKindForOper(GT_LT, CK_UNSIGNED);
2131 inst_JMP(jmpLTU, done);
2133 // Update SP to be at the next page of stack that we will tickle
2134 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regTmp);
2136 // Jump to loop and tickle new stack address
2137 inst_JMP(EJ_jmp, loop);
2139 // Done with stack tickle loop
2140 genDefineTempLabel(done);
2142 // Now just move the final value to SP
2143 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
2147 // Re-adjust SP to allocate PSPSym and out-going arg area
2148 if (stackAdjustment != 0)
2150 assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
2151 assert(stackAdjustment > 0);
2152 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int)stackAdjustment);
2154 #if FEATURE_EH_FUNCLETS
2155 // Write PSPSym to its new location.
2158 assert(genIsValidIntReg(pspSymReg));
2159 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
2162 // Return the stackalloc'ed address in result register.
2163 // TargetReg = RSP + stackAdjustment.
2165 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int)stackAdjustment);
2167 else // stackAdjustment == 0
2169 // Move the final value of SP to targetReg
2170 inst_RV_RV(INS_mov, targetReg, REG_SPBASE);
2174 if (endLabel != nullptr)
2175 genDefineTempLabel(endLabel);
2177 // Write the lvaLocAllocSPvar stack frame slot
2178 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
2180 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0);
2184 if (compiler->opts.compNeedStackProbes)
2186 genGenerateStackProbe();
2192 if (compiler->opts.compStackCheckOnRet)
2194 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
2195 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
2196 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
2197 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaReturnEspCheck, 0);
2201 genProduceReg(tree);
2204 //------------------------------------------------------------------------
2205 // genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node.
2210 void CodeGen::genCodeForNegNot(GenTree* tree)
2212 assert(tree->OperIs(GT_NEG, GT_NOT));
2214 var_types targetType = tree->TypeGet();
2216 assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType));
2218 regNumber targetReg = tree->gtRegNum;
2219 instruction ins = genGetInsForOper(tree->OperGet(), targetType);
2221 // The arithmetic node must be sitting in a register (since it's not contained)
2222 assert(!tree->isContained());
2223 // The dst can only be a register.
2224 assert(targetReg != REG_NA);
2226 GenTree* operand = tree->gtGetOp1();
2227 assert(!operand->isContained());
2228 // The src must be a register.
2229 regNumber operandReg = genConsumeReg(operand);
2231 getEmitter()->emitIns_R_R(ins, emitActualTypeSize(tree), targetReg, operandReg);
2233 genProduceReg(tree);
2236 //------------------------------------------------------------------------
2237 // genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD:
2238 // (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph;
2239 // (2) float/double MOD is morphed into a helper call by front-end.
2244 void CodeGen::genCodeForDivMod(GenTreeOp* tree)
2246 assert(tree->OperIs(GT_DIV, GT_UDIV));
2248 var_types targetType = tree->TypeGet();
2249 emitter* emit = getEmitter();
2251 genConsumeOperands(tree);
2253 if (varTypeIsFloating(targetType))
2255 // Floating point divide never raises an exception
2256 genCodeForBinary(tree);
2258 else // an integer divide operation
2260 GenTree* divisorOp = tree->gtGetOp2();
2261 emitAttr size = EA_ATTR(genTypeSize(genActualType(tree->TypeGet())));
2263 if (divisorOp->IsIntegralConst(0))
2265 // We unconditionally throw a divide by zero exception
2266 genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
2268 // We still need to call genProduceReg
2269 genProduceReg(tree);
2271 else // the divisor is not the constant zero
2273 regNumber divisorReg = divisorOp->gtRegNum;
2275 // Generate the require runtime checks for GT_DIV or GT_UDIV
2276 if (tree->gtOper == GT_DIV)
2278 BasicBlock* sdivLabel = genCreateTempLabel();
2280 // Two possible exceptions:
2281 // (AnyVal / 0) => DivideByZeroException
2282 // (MinInt / -1) => ArithmeticException
2284 bool checkDividend = true;
2286 // Do we have an immediate for the 'divisorOp'?
2288 if (divisorOp->IsCnsIntOrI())
2290 GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon();
2291 ssize_t intConstValue = intConstTree->IconValue();
2292 assert(intConstValue != 0); // already checked above by IsIntegralConst(0))
2293 if (intConstValue != -1)
2295 checkDividend = false; // We statically know that the dividend is not -1
2298 else // insert check for divison by zero
2300 // Check if the divisor is zero throw a DivideByZeroException
2301 emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
2302 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2303 genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
2308 // Check if the divisor is not -1 branch to 'sdivLabel'
2309 emit->emitIns_R_I(INS_cmp, size, divisorReg, -1);
2311 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
2312 inst_JMP(jmpNotEqual, sdivLabel);
2313 // If control flow continues past here the 'divisorReg' is known to be -1
2315 regNumber dividendReg = tree->gtGetOp1()->gtRegNum;
2316 // At this point the divisor is known to be -1
2318 // Issue the 'adds zr, dividendReg, dividendReg' instruction
2319 // this will set both the Z and V flags only when dividendReg is MinInt
2321 emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
2322 inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear
2323 genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw
2324 // ArithmeticException
2326 genDefineTempLabel(sdivLabel);
2328 genCodeForBinary(tree); // Generate the sdiv instruction
2330 else // (tree->gtOper == GT_UDIV)
2332 // Only one possible exception
2333 // (AnyVal / 0) => DivideByZeroException
2335 // Note that division by the constant 0 was already checked for above by the
2336 // op2->IsIntegralConst(0) check
2338 if (!divisorOp->IsCnsIntOrI())
2340 // divisorOp is not a constant, so it could be zero
2342 emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
2343 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2344 genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
2346 genCodeForBinary(tree);
2352 // Generate code for InitBlk by performing a loop unroll
2354 // a) Both the size and fill byte value are integer constants.
2355 // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
2356 void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
2358 // Make sure we got the arguments of the initblk/initobj operation in the right registers
2359 unsigned size = initBlkNode->Size();
2360 GenTree* dstAddr = initBlkNode->Addr();
2361 GenTree* initVal = initBlkNode->Data();
2362 if (initVal->OperIsInitVal())
2364 initVal = initVal->gtGetOp1();
2367 assert(dstAddr->isUsedFromReg());
2368 assert(initVal->isUsedFromReg() && !initVal->IsIntegralConst(0) || initVal->IsIntegralConst(0));
2370 assert(size <= INITBLK_UNROLL_LIMIT);
2372 emitter* emit = getEmitter();
2374 genConsumeOperands(initBlkNode);
2376 if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
2378 // issue a full memory barrier before a volatile initBlockUnroll operation
2379 instGen_MemoryBarrier();
2382 regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum;
2384 assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR));
2386 unsigned offset = 0;
2388 // Perform an unroll using stp.
2389 if (size >= 2 * REGSIZE_BYTES)
2391 // Determine how many 16 byte slots
2392 size_t slots = size / (2 * REGSIZE_BYTES);
2396 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, valReg, valReg, dstAddr->gtRegNum, offset);
2397 offset += (2 * REGSIZE_BYTES);
2401 // Fill the remainder (15 bytes or less) if there's any.
2402 if ((size & 0xf) != 0)
2404 if ((size & 8) != 0)
2406 emit->emitIns_R_R_I(INS_str, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
2409 if ((size & 4) != 0)
2411 emit->emitIns_R_R_I(INS_str, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
2414 if ((size & 2) != 0)
2416 emit->emitIns_R_R_I(INS_strh, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
2419 if ((size & 1) != 0)
2421 emit->emitIns_R_R_I(INS_strb, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
2426 // Generate code for a load pair from some address + offset
2427 // base: tree node which can be either a local address or arbitrary node
2428 // offset: distance from the base from which to load
2429 void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset)
2431 emitter* emit = getEmitter();
2433 if (base->OperIsLocalAddr())
2435 if (base->gtOper == GT_LCL_FLD_ADDR)
2436 offset += base->gtLclFld.gtLclOffs;
2438 emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset);
2442 emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, dst, dst2, base->gtRegNum, offset);
2446 // Generate code for a store pair to some address + offset
2447 // base: tree node which can be either a local address or arbitrary node
2448 // offset: distance from the base from which to load
2449 void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset)
2451 emitter* emit = getEmitter();
2453 if (base->OperIsLocalAddr())
2455 if (base->gtOper == GT_LCL_FLD_ADDR)
2456 offset += base->gtLclFld.gtLclOffs;
2458 emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, src, src2, base->gtLclVarCommon.gtLclNum, offset);
2462 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, src, src2, base->gtRegNum, offset);
2466 // Generate code for CpObj nodes wich copy structs that have interleaved
2468 // For this case we'll generate a sequence of loads/stores in the case of struct
2469 // slots that don't contain GC pointers. The generated code will look like:
2470 // ldr tempReg, [R13, #8]
2471 // str tempReg, [R14, #8]
2473 // In the case of a GC-Pointer we'll call the ByRef write barrier helper
2474 // who happens to use the same registers as the previous call to maintain
2475 // the same register requirements and register killsets:
2476 // bl CORINFO_HELP_ASSIGN_BYREF
2478 // So finally an example would look like this:
2479 // ldr tempReg, [R13, #8]
2480 // str tempReg, [R14, #8]
2481 // bl CORINFO_HELP_ASSIGN_BYREF
2482 // ldr tempReg, [R13, #8]
2483 // str tempReg, [R14, #8]
2484 // bl CORINFO_HELP_ASSIGN_BYREF
2485 // ldr tempReg, [R13, #8]
2486 // str tempReg, [R14, #8]
2487 void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
2489 GenTree* dstAddr = cpObjNode->Addr();
2490 GenTree* source = cpObjNode->Data();
2491 var_types srcAddrType = TYP_BYREF;
2492 bool sourceIsLocal = false;
2494 assert(source->isContained());
2495 if (source->gtOper == GT_IND)
2497 GenTree* srcAddr = source->gtGetOp1();
2498 assert(!srcAddr->isContained());
2499 srcAddrType = srcAddr->TypeGet();
2503 noway_assert(source->IsLocal());
2504 sourceIsLocal = true;
2507 bool dstOnStack = dstAddr->OperIsLocalAddr();
2510 assert(!dstAddr->isContained());
2512 // This GenTree node has data about GC pointers, this means we're dealing
2514 assert(cpObjNode->gtGcPtrCount > 0);
2517 // Consume the operands and get them into the right registers.
2518 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
2519 genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA);
2520 gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
2521 gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
2523 unsigned slots = cpObjNode->gtSlots;
2525 // Temp register(s) used to perform the sequence of loads and stores.
2526 regNumber tmpReg = cpObjNode->ExtractTempReg();
2527 regNumber tmpReg2 = REG_NA;
2529 assert(genIsValidIntReg(tmpReg));
2530 assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF);
2531 assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF);
2535 tmpReg2 = cpObjNode->GetSingleTempReg();
2536 assert(tmpReg2 != tmpReg);
2537 assert(genIsValidIntReg(tmpReg2));
2538 assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
2539 assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF);
2542 if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
2544 // issue a full memory barrier before a volatile CpObj operation
2545 instGen_MemoryBarrier();
2548 emitter* emit = getEmitter();
2550 BYTE* gcPtrs = cpObjNode->gtGcPtrs;
2552 // If we can prove it's on the stack we don't need to use the write barrier.
2556 // Check if two or more remaining slots and use a ldp/stp sequence
2557 while (i < slots - 1)
2559 emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0]));
2560 emitAttr attr1 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 1]));
2562 emit->emitIns_R_R_R_I(INS_ldp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE,
2563 INS_OPTS_POST_INDEX, attr1);
2564 emit->emitIns_R_R_R_I(INS_stp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE,
2565 INS_OPTS_POST_INDEX, attr1);
2569 // Use a ldr/str sequence for the last remainder
2572 emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0]));
2574 emit->emitIns_R_R_I(INS_ldr, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
2575 INS_OPTS_POST_INDEX);
2576 emit->emitIns_R_R_I(INS_str, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
2577 INS_OPTS_POST_INDEX);
2582 unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
2590 // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
2591 if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE))
2593 emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
2594 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
2595 emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
2596 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
2597 ++i; // extra increment of i, since we are copying two items
2601 emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
2602 INS_OPTS_POST_INDEX);
2603 emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
2604 INS_OPTS_POST_INDEX);
2609 // In the case of a GC-Pointer we'll call the ByRef write barrier helper
2610 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
2617 assert(gcPtrCount == 0);
2620 if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
2622 // issue a INS_BARRIER_ISHLD after a volatile CpObj operation
2623 instGen_MemoryBarrier(INS_BARRIER_ISHLD);
2626 // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
2627 // While we normally update GC info prior to the last instruction that uses them,
2628 // these actually live into the helper call.
2629 gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
2632 // generate code do a switch statement based on a table of ip-relative offsets
2633 void CodeGen::genTableBasedSwitch(GenTree* treeNode)
2635 genConsumeOperands(treeNode->AsOp());
2636 regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
2637 regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
2639 regNumber tmpReg = treeNode->GetSingleTempReg();
2641 // load the ip-relative offset (which is relative to start of fgFirstBB)
2642 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL);
2644 // add it to the absolute address of fgFirstBB
2645 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
2646 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
2647 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, baseReg, baseReg, tmpReg);
2650 getEmitter()->emitIns_R(INS_br, emitActualTypeSize(TYP_I_IMPL), baseReg);
2653 // emits the table and an instruction to get the address of the first element
2654 void CodeGen::genJumpTable(GenTree* treeNode)
2656 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
2657 assert(treeNode->OperGet() == GT_JMPTABLE);
2659 unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
2660 BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
2661 unsigned jmpTabOffs;
2662 unsigned jmpTabBase;
2664 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
2668 JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
2670 for (unsigned i = 0; i < jumpCount; i++)
2672 BasicBlock* target = *jumpTable++;
2673 noway_assert(target->bbFlags & BBF_JMP_TARGET);
2675 JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
2677 getEmitter()->emitDataGenData(i, target);
2680 getEmitter()->emitDataGenEnd();
2682 // Access to inline data is 'abstracted' by a special type of static member
2683 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
2684 // to constant data, not a real static field.
2685 getEmitter()->emitIns_R_C(INS_adr, emitActualTypeSize(TYP_I_IMPL), treeNode->gtRegNum, REG_NA,
2686 compiler->eeFindJitDataOffs(jmpTabBase), 0);
2687 genProduceReg(treeNode);
2690 // generate code for the locked operations:
2691 // GT_LOCKADD, GT_XCHG, GT_XADD
2692 void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
2694 GenTree* data = treeNode->gtOp.gtOp2;
2695 GenTree* addr = treeNode->gtOp.gtOp1;
2696 regNumber targetReg = treeNode->gtRegNum;
2697 regNumber dataReg = data->gtRegNum;
2698 regNumber addrReg = addr->gtRegNum;
2700 regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
2701 regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT);
2702 regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg;
2704 // Check allocator assumptions
2706 // The register allocator should have extended the lifetimes of all input and internal registers so that
2707 // none interfere with the target.
2708 noway_assert(addrReg != targetReg);
2710 noway_assert(addrReg != loadReg);
2711 noway_assert(dataReg != loadReg);
2713 noway_assert(addrReg != storeDataReg);
2714 noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg));
2716 assert(addr->isUsedFromReg());
2717 noway_assert(exResultReg != REG_NA);
2718 noway_assert(exResultReg != targetReg);
2719 noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG));
2721 // Store exclusive unpredictable cases must be avoided
2722 noway_assert(exResultReg != storeDataReg);
2723 noway_assert(exResultReg != addrReg);
2725 genConsumeAddress(addr);
2726 genConsumeRegs(data);
2728 // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers
2729 // die at the first instruction generated by the node. This is not the case for these atomics as the input
2730 // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until
2731 // we are finished generating the code for this node.
2733 gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());
2735 // TODO-ARM64-CQ Use ARMv8.1 atomics if available
2736 // https://github.com/dotnet/coreclr/issues/11881
2738 // Emit code like this:
2740 // ldxr loadReg, [addrReg]
2741 // add storeDataReg, loadReg, dataReg # Only for GT_XADD & GT_LOCKADD
2742 // # GT_XCHG storeDataReg === dataReg
2743 // stxr exResult, storeDataReg, [addrReg]
2744 // cbnz exResult, retry
2746 BasicBlock* labelRetry = genCreateTempLabel();
2747 genDefineTempLabel(labelRetry);
2749 emitAttr dataSize = emitActualTypeSize(data);
2750 // The following instruction includes a acquire half barrier
2751 // TODO-ARM64-CQ Evaluate whether this is necessary
2752 // https://github.com/dotnet/coreclr/issues/14346
2753 getEmitter()->emitIns_R_R(INS_ldaxr, dataSize, loadReg, addrReg);
2755 switch (treeNode->OperGet())
2759 if (data->isContainedIntOrIImmed())
2761 // Even though INS_add is specified here, the encoder will choose either
2762 // an INS_add or an INS_sub and encode the immediate as a positive value
2763 genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(),
2768 getEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg);
2772 assert(!data->isContained());
2773 storeDataReg = dataReg;
2779 // The following instruction includes a release half barrier
2780 // TODO-ARM64-CQ Evaluate whether this is necessary
2781 // https://github.com/dotnet/coreclr/issues/14346
2782 getEmitter()->emitIns_R_R_R(INS_stlxr, dataSize, exResultReg, storeDataReg, addrReg);
2784 getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
2786 gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
2788 if (treeNode->gtRegNum != REG_NA)
2790 genProduceReg(treeNode);
2794 //------------------------------------------------------------------------
2795 // genCodeForSwap: Produce code for a GT_CMPXCHG node.
2798 // tree - the GT_CMPXCHG node
2800 void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
2802 assert(treeNode->OperIs(GT_CMPXCHG));
2804 GenTree* addr = treeNode->gtOpLocation; // arg1
2805 GenTree* data = treeNode->gtOpValue; // arg2
2806 GenTree* comparand = treeNode->gtOpComparand; // arg3
2808 regNumber targetReg = treeNode->gtRegNum;
2809 regNumber dataReg = data->gtRegNum;
2810 regNumber addrReg = addr->gtRegNum;
2811 regNumber comparandReg = comparand->gtRegNum;
2812 regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
2814 // Check allocator assumptions
2816 // The register allocator should have extended the lifetimes of all input and internal registers so that
2817 // none interfere with the target.
2818 noway_assert(addrReg != targetReg);
2819 noway_assert(dataReg != targetReg);
2820 noway_assert(comparandReg != targetReg);
2821 noway_assert(addrReg != dataReg);
2822 noway_assert(targetReg != REG_NA);
2823 noway_assert(exResultReg != REG_NA);
2824 noway_assert(exResultReg != targetReg);
2826 assert(addr->isUsedFromReg());
2827 assert(data->isUsedFromReg());
2828 assert(!comparand->isUsedFromMemory());
2830 // Store exclusive unpredictable cases must be avoided
2831 noway_assert(exResultReg != dataReg);
2832 noway_assert(exResultReg != addrReg);
2834 genConsumeAddress(addr);
2835 genConsumeRegs(data);
2836 genConsumeRegs(comparand);
2838 // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers
2839 // die at the first instruction generated by the node. This is not the case for these atomics as the input
2840 // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until
2841 // we are finished generating the code for this node.
2843 gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());
2845 // TODO-ARM64-CQ Use ARMv8.1 atomics if available
2846 // https://github.com/dotnet/coreclr/issues/11881
2848 // Emit code like this:
2850 // ldxr targetReg, [addrReg]
2851 // cmp targetReg, comparandReg
2853 // stxr exResult, dataReg, [addrReg]
2854 // cbnz exResult, retry
2857 BasicBlock* labelRetry = genCreateTempLabel();
2858 BasicBlock* labelCompareFail = genCreateTempLabel();
2859 genDefineTempLabel(labelRetry);
2861 // The following instruction includes a acquire half barrier
2862 // TODO-ARM64-CQ Evaluate whether this is necessary
2863 // https://github.com/dotnet/coreclr/issues/14346
2864 getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), targetReg, addrReg);
2866 if (comparand->isContainedIntOrIImmed())
2868 if (comparand->IsIntegralConst(0))
2870 getEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg);
2874 getEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg,
2875 comparand->AsIntConCommon()->IconValue());
2876 getEmitter()->emitIns_J(INS_bne, labelCompareFail);
2881 getEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg);
2882 getEmitter()->emitIns_J(INS_bne, labelCompareFail);
2885 // The following instruction includes a release half barrier
2886 // TODO-ARM64-CQ Evaluate whether this is necessary
2887 // https://github.com/dotnet/coreclr/issues/14346
2888 getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, dataReg, addrReg);
2890 getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
2892 genDefineTempLabel(labelCompareFail);
2894 gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
2896 genProduceReg(treeNode);
2899 instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
2901 instruction ins = INS_brk;
2903 if (varTypeIsFloating(type))
2924 NYI("Unhandled oper in genGetInsForOper() - float");
2977 NYI("Unhandled oper in genGetInsForOper() - integer");
2985 //------------------------------------------------------------------------
2986 // genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node.
2989 // tree - the GT_RETURNTRAP node
2991 void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
2993 assert(tree->OperGet() == GT_RETURNTRAP);
2995 // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
2996 // based on the contents of 'data'
2998 GenTree* data = tree->gtOp1;
2999 genConsumeRegs(data);
3000 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0);
3002 BasicBlock* skipLabel = genCreateTempLabel();
3004 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
3005 inst_JMP(jmpEqual, skipLabel);
3006 // emit the call to the EE-helper that stops for GC (or other reasons)
3008 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
3009 genDefineTempLabel(skipLabel);
3012 //------------------------------------------------------------------------
3013 // genCodeForStoreInd: Produce code for a GT_STOREIND node.
3016 // tree - the GT_STOREIND node
3018 void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
3020 GenTree* data = tree->Data();
3021 GenTree* addr = tree->Addr();
3022 var_types targetType = tree->TypeGet();
3023 emitter* emit = getEmitter();
3024 emitAttr attr = emitTypeSize(tree);
3025 instruction ins = ins_Store(targetType);
3028 // Storing Vector3 of size 12 bytes through indirection
3029 if (tree->TypeGet() == TYP_SIMD12)
3031 genStoreIndTypeSIMD12(tree);
3034 #endif // FEATURE_SIMD
3036 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data);
3037 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
3039 // data and addr must be in registers.
3040 // Consume both registers so that any copies of interfering
3041 // registers are taken care of.
3042 genConsumeOperands(tree);
3044 #if NOGC_WRITE_BARRIERS
3045 // At this point, we should not have any interference.
3046 // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
3047 // as that is where 'addr' must go.
3048 noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF);
3050 // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
3051 if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF)
3053 inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet());
3056 // 'data' goes into x15 (REG_WRITE_BARRIER)
3057 if (data->gtRegNum != REG_WRITE_BARRIER)
3059 inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet());
3062 // At this point, we should not have any interference.
3063 // That is, 'data' must not be in REG_ARG_0,
3064 // as that is where 'addr' must go.
3065 noway_assert(data->gtRegNum != REG_ARG_0);
3067 // addr goes in REG_ARG_0
3068 if (addr->gtRegNum != REG_ARG_0)
3070 inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
3073 // data goes in REG_ARG_1
3074 if (data->gtRegNum != REG_ARG_1)
3076 inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
3078 #endif // NOGC_WRITE_BARRIERS
3080 genGCWriteBarrier(tree, writeBarrierForm);
3082 else // A normal store, not a WriteBarrier store
3084 bool dataIsUnary = false;
3085 GenTree* nonRMWsrc = nullptr;
3086 // We must consume the operands in the proper execution order,
3087 // so that liveness is updated appropriately.
3088 genConsumeAddress(addr);
3090 if (!data->isContained())
3092 genConsumeRegs(data);
3095 regNumber dataReg = REG_NA;
3096 if (data->isContainedIntOrIImmed())
3098 assert(data->IsIntegralConst(0));
3101 else // data is not contained, so evaluate it into a register
3103 assert(!data->isContained());
3104 dataReg = data->gtRegNum;
3107 assert((attr != EA_1BYTE) || !(tree->gtFlags & GTF_IND_UNALIGNED));
3109 if (tree->gtFlags & GTF_IND_VOLATILE)
3111 bool useStoreRelease =
3112 genIsValidIntReg(dataReg) && !addr->isContained() && !(tree->gtFlags & GTF_IND_UNALIGNED);
3114 if (useStoreRelease)
3116 switch (EA_SIZE(attr))
3119 assert(ins == INS_strb);
3123 assert(ins == INS_strh);
3128 assert(ins == INS_str);
3132 assert(false); // We should not get here
3137 // issue a full memory barrier before a volatile StInd
3138 instGen_MemoryBarrier();
3142 emit->emitInsLoadStoreOp(ins, attr, dataReg, tree);
3146 //------------------------------------------------------------------------
3147 // genCodeForSwap: Produce code for a GT_SWAP node.
3150 // tree - the GT_SWAP node
3152 void CodeGen::genCodeForSwap(GenTreeOp* tree)
3154 // Swap is only supported for lclVar operands that are enregistered
3155 // We do not consume or produce any registers. Both operands remain enregistered.
3156 // However, the gc-ness may change.
3157 assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2));
3159 GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon();
3160 LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
3161 var_types type1 = varDsc1->TypeGet();
3162 GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon();
3163 LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
3164 var_types type2 = varDsc2->TypeGet();
3166 // We must have both int or both fp regs
3167 assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
3169 // FP swap is not yet implemented (and should have NYI'd in LSRA)
3170 assert(!varTypeIsFloating(type1));
3172 regNumber oldOp1Reg = lcl1->gtRegNum;
3173 regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
3174 regNumber oldOp2Reg = lcl2->gtRegNum;
3175 regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
3177 // We don't call genUpdateVarReg because we don't have a tree node with the new register.
3178 varDsc1->lvRegNum = oldOp2Reg;
3179 varDsc2->lvRegNum = oldOp1Reg;
3182 emitAttr size = EA_PTRSIZE;
3183 if (varTypeGCtype(type1) != varTypeGCtype(type2))
3185 // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
3186 // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
3190 NYI("register swap");
3191 // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
3193 // Update the gcInfo.
3194 // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
3195 gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
3196 gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
3198 // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
3199 // It will also dump the updates.
3200 gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
3201 gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
3204 //-------------------------------------------------------------------------------------------
3205 // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
3206 // corresponding to a binary Relational operator result.
3209 // dstReg - The target register to set to 1 or 0
3210 // tree - The GenTree Relop node that was used to set the Condition codes
3212 // Return Value: none
3215 // A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
3216 //-------------------------------------------------------------------------------------------
3218 void CodeGen::genSetRegToCond(regNumber dstReg, GenTree* tree)
3220 emitJumpKind jumpKind[2];
3221 bool branchToTrueLabel[2];
3222 genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
3223 assert(jumpKind[0] != EJ_NONE);
3225 // Set the reg according to the flags
3226 inst_SET(jumpKind[0], dstReg);
3228 // Do we need to use two operation to set the flags?
3230 if (jumpKind[1] != EJ_NONE)
3232 emitter* emit = getEmitter();
3233 bool ordered = ((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
3236 // The only ones that require two operations are the
3237 // floating point compare operations of BEQ or BNE.UN
3239 if (tree->gtOper == GT_EQ)
3241 // This must be an ordered comparison.
3243 assert(jumpKind[1] == EJ_vs); // We complement this value
3244 secondCond = INS_COND_VC; // for the secondCond
3246 else // gtOper == GT_NE
3248 // This must be BNE.UN (unordered comparison)
3249 assert((tree->gtOper == GT_NE) && !ordered);
3250 assert(jumpKind[1] == EJ_lo); // We complement this value
3251 secondCond = INS_COND_HS; // for the secondCond
3254 // The second instruction is a 'csinc' instruction that either selects the previous dstReg
3255 // or increments the ZR register, which produces a 1 result.
3257 emit->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, dstReg, dstReg, REG_ZR, secondCond);
3261 //------------------------------------------------------------------------
3262 // genIntToFloatCast: Generate code to cast an int/long to float/double
3265 // treeNode - The GT_CAST node
3271 // Cast is a non-overflow conversion.
3272 // The treeNode must have an assigned register.
3273 // SrcType= int32/uint32/int64/uint64 and DstType=float/double.
3275 void CodeGen::genIntToFloatCast(GenTree* treeNode)
3277 // int type --> float/double conversions are always non-overflow ones
3278 assert(treeNode->OperGet() == GT_CAST);
3279 assert(!treeNode->gtOverflow());
3281 regNumber targetReg = treeNode->gtRegNum;
3282 assert(genIsValidFloatReg(targetReg));
3284 GenTree* op1 = treeNode->gtOp.gtOp1;
3285 assert(!op1->isContained()); // Cannot be contained
3286 assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg.
3288 var_types dstType = treeNode->CastToType();
3289 var_types srcType = genActualType(op1->TypeGet());
3290 assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
3292 // force the srcType to unsigned if GT_UNSIGNED flag is set
3293 if (treeNode->gtFlags & GTF_UNSIGNED)
3295 srcType = genUnsignedType(srcType);
3298 // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
3299 emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
3300 noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
3302 instruction ins = varTypeIsUnsigned(srcType) ? INS_ucvtf : INS_scvtf;
3303 insOpts cvtOption = INS_OPTS_NONE; // invalid value
3305 if (dstType == TYP_DOUBLE)
3307 if (srcSize == EA_4BYTE)
3309 cvtOption = INS_OPTS_4BYTE_TO_D;
3313 assert(srcSize == EA_8BYTE);
3314 cvtOption = INS_OPTS_8BYTE_TO_D;
3319 assert(dstType == TYP_FLOAT);
3320 if (srcSize == EA_4BYTE)
3322 cvtOption = INS_OPTS_4BYTE_TO_S;
3326 assert(srcSize == EA_8BYTE);
3327 cvtOption = INS_OPTS_8BYTE_TO_S;
3331 genConsumeOperands(treeNode->AsOp());
3333 getEmitter()->emitIns_R_R(ins, emitActualTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
3335 genProduceReg(treeNode);
3338 //------------------------------------------------------------------------
3339 // genFloatToIntCast: Generate code to cast float/double to int/long
3342 // treeNode - The GT_CAST node
3348 // Cast is a non-overflow conversion.
3349 // The treeNode must have an assigned register.
3350 // SrcType=float/double and DstType= int32/uint32/int64/uint64
3352 void CodeGen::genFloatToIntCast(GenTree* treeNode)
3354 // we don't expect to see overflow detecting float/double --> int type conversions here
3355 // as they should have been converted into helper calls by front-end.
3356 assert(treeNode->OperGet() == GT_CAST);
3357 assert(!treeNode->gtOverflow());
3359 regNumber targetReg = treeNode->gtRegNum;
3360 assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
3362 GenTree* op1 = treeNode->gtOp.gtOp1;
3363 assert(!op1->isContained()); // Cannot be contained
3364 assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
3366 var_types dstType = treeNode->CastToType();
3367 var_types srcType = op1->TypeGet();
3368 assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
3370 // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
3371 // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
3372 // we expect the front-end or lowering phase to have generated two levels of cast.
3374 emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
3375 noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
3377 instruction ins = INS_fcvtzs; // default to sign converts
3378 insOpts cvtOption = INS_OPTS_NONE; // invalid value
3380 if (varTypeIsUnsigned(dstType))
3382 ins = INS_fcvtzu; // use unsigned converts
3385 if (srcType == TYP_DOUBLE)
3387 if (dstSize == EA_4BYTE)
3389 cvtOption = INS_OPTS_D_TO_4BYTE;
3393 assert(dstSize == EA_8BYTE);
3394 cvtOption = INS_OPTS_D_TO_8BYTE;
3399 assert(srcType == TYP_FLOAT);
3400 if (dstSize == EA_4BYTE)
3402 cvtOption = INS_OPTS_S_TO_4BYTE;
3406 assert(dstSize == EA_8BYTE);
3407 cvtOption = INS_OPTS_S_TO_8BYTE;
3411 genConsumeOperands(treeNode->AsOp());
3413 getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption);
3415 genProduceReg(treeNode);
3418 //------------------------------------------------------------------------
3419 // genCkfinite: Generate code for ckfinite opcode.
3422 // treeNode - The GT_CKFINITE node
3428 // GT_CKFINITE node has reserved an internal register.
3430 void CodeGen::genCkfinite(GenTree* treeNode)
3432 assert(treeNode->OperGet() == GT_CKFINITE);
3434 GenTree* op1 = treeNode->gtOp.gtOp1;
3435 var_types targetType = treeNode->TypeGet();
3436 int expMask = (targetType == TYP_FLOAT) ? 0x7F8 : 0x7FF; // Bit mask to extract exponent.
3437 int shiftAmount = targetType == TYP_FLOAT ? 20 : 52;
3439 emitter* emit = getEmitter();
3441 // Extract exponent into a register.
3442 regNumber intReg = treeNode->GetSingleTempReg();
3443 regNumber fpReg = genConsumeReg(op1);
3445 emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), intReg, fpReg);
3446 emit->emitIns_R_R_I(INS_lsr, emitActualTypeSize(targetType), intReg, intReg, shiftAmount);
3448 // Mask of exponent with all 1's and check if the exponent is all 1's
3449 emit->emitIns_R_R_I(INS_and, EA_4BYTE, intReg, intReg, expMask);
3450 emit->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, expMask);
3452 // If exponent is all 1's, throw ArithmeticException
3453 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
3454 genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN);
3456 // if it is a finite value copy it to targetReg
3457 if (treeNode->gtRegNum != fpReg)
3459 emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), treeNode->gtRegNum, fpReg);
3461 genProduceReg(treeNode);
3464 //------------------------------------------------------------------------
3465 // genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE node.
3470 void CodeGen::genCodeForCompare(GenTreeOp* tree)
3472 regNumber targetReg = tree->gtRegNum;
3473 emitter* emit = getEmitter();
3475 GenTree* op1 = tree->gtOp1;
3476 GenTree* op2 = tree->gtOp2;
3477 var_types op1Type = genActualType(op1->TypeGet());
3478 var_types op2Type = genActualType(op2->TypeGet());
3480 assert(!op1->isUsedFromMemory());
3481 assert(!op2->isUsedFromMemory());
3483 genConsumeOperands(tree);
3485 emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
3487 assert(genTypeSize(op1Type) == genTypeSize(op2Type));
3489 if (varTypeIsFloating(op1Type))
3491 assert(varTypeIsFloating(op2Type));
3492 assert(!op1->isContained());
3493 assert(op1Type == op2Type);
3495 if (op2->IsIntegralConst(0))
3497 assert(op2->isContained());
3498 emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0);
3502 assert(!op2->isContained());
3503 emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
3508 assert(!varTypeIsFloating(op2Type));
3509 // We don't support swapping op1 and op2 to generate cmp reg, imm
3510 assert(!op1->isContainedIntOrIImmed());
3512 instruction ins = tree->OperIs(GT_TEST_EQ, GT_TEST_NE) ? INS_tst : INS_cmp;
3514 if (op2->isContainedIntOrIImmed())
3516 GenTreeIntConCommon* intConst = op2->AsIntConCommon();
3517 emit->emitIns_R_I(ins, cmpSize, op1->gtRegNum, intConst->IconValue());
3521 emit->emitIns_R_R(ins, cmpSize, op1->gtRegNum, op2->gtRegNum);
3525 // Are we evaluating this into a register?
3526 if (targetReg != REG_NA)
3528 genSetRegToCond(targetReg, tree);
3529 genProduceReg(tree);
3533 //------------------------------------------------------------------------
3534 // genCodeForJumpCompare: Generates code for jmpCompare statement.
3536 // A GT_JCMP node is created when a comparison and conditional branch
3537 // can be executed in a single instruction.
3539 // Arm64 has a few instructions with this behavior.
3540 // - cbz/cbnz -- Compare and branch register zero/not zero
3541 // - tbz/tbnz -- Test and branch register bit zero/not zero
3543 // The cbz/cbnz supports the normal +/- 1MB branch range for conditional branches
3544 // The tbz/tbnz supports a smaller +/- 32KB branch range
3546 // A GT_JCMP cbz/cbnz node is created when there is a GT_EQ or GT_NE
3547 // integer/unsigned comparison against #0 which is used by a GT_JTRUE
3548 // condition jump node.
3550 // A GT_JCMP tbz/tbnz node is created when there is a GT_TEST_EQ or GT_TEST_NE
3551 // integer/unsigned comparison against against a mask with a single bit set
3552 // which is used by a GT_JTRUE condition jump node.
3554 // This node is repsonsible for consuming the register, and emitting the
3555 // appropriate fused compare/test and branch instruction
3557 // Two flags guide code generation
3558 // GTF_JCMP_TST -- Set if this is a tbz/tbnz rather than cbz/cbnz
3559 // GTF_JCMP_EQ -- Set if this is cbz/tbz rather than cbnz/tbnz
3562 // tree - The GT_JCMP tree node.
3567 void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
3569 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
3571 GenTree* op1 = tree->gtGetOp1();
3572 GenTree* op2 = tree->gtGetOp2();
3574 assert(tree->OperIs(GT_JCMP));
3575 assert(!varTypeIsFloating(tree));
3576 assert(!op1->isUsedFromMemory());
3577 assert(!op2->isUsedFromMemory());
3578 assert(op2->IsCnsIntOrI());
3579 assert(op2->isContained());
3581 genConsumeOperands(tree);
3583 regNumber reg = op1->gtRegNum;
3584 emitAttr attr = emitActualTypeSize(op1->TypeGet());
3586 if (tree->gtFlags & GTF_JCMP_TST)
3588 ssize_t compareImm = op2->gtIntCon.IconValue();
3590 assert(isPow2(compareImm));
3592 instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz;
3593 int imm = genLog2((size_t)compareImm);
3595 getEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm);
3599 assert(op2->IsIntegralConst(0));
3601 instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_cbz : INS_cbnz;
3603 getEmitter()->emitIns_J_R(ins, attr, compiler->compCurBB->bbJumpDest, reg);
3607 int CodeGenInterface::genSPtoFPdelta()
3611 // We place the saved frame pointer immediately above the outgoing argument space.
3612 delta = (int)compiler->lvaOutgoingArgSpaceSize;
3618 //---------------------------------------------------------------------
3619 // genTotalFrameSize - return the total size of the stack frame, including local size,
3620 // callee-saved register size, etc.
3626 int CodeGenInterface::genTotalFrameSize()
3628 // For varargs functions, we home all the incoming register arguments. They are not
3629 // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but
3630 // since we don't use "push" instructions to save them, we don't have to do the
3631 // save of these varargs register arguments as the first thing in the prolog.
3633 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
3635 int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) +
3636 compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
3638 assert(totalFrameSize >= 0);
3639 return totalFrameSize;
3642 //---------------------------------------------------------------------
3643 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
3644 // This number is going to be negative, since the Caller-SP is at a higher
3645 // address than the frame pointer.
3647 // There must be a frame pointer to call this function!
3649 int CodeGenInterface::genCallerSPtoFPdelta()
3651 assert(isFramePointerUsed());
3652 int callerSPtoFPdelta;
3654 callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
3656 assert(callerSPtoFPdelta <= 0);
3657 return callerSPtoFPdelta;
3660 //---------------------------------------------------------------------
3661 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
3663 // This number will be negative.
3665 int CodeGenInterface::genCallerSPtoInitialSPdelta()
3667 int callerSPtoSPdelta = 0;
3669 callerSPtoSPdelta -= genTotalFrameSize();
3671 assert(callerSPtoSPdelta <= 0);
3672 return callerSPtoSPdelta;
3675 /*****************************************************************************
3676 * Emit a call to a helper function.
3680 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
3682 void* addr = nullptr;
3683 void* pAddr = nullptr;
3685 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
3686 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
3687 regNumber callTarget = REG_NA;
3689 if (addr == nullptr)
3691 // This is call to a runtime helper.
3692 // adrp x, [reloc:rel page addr]
3693 // add x, x, [reloc:page offset]
3697 if (callTargetReg == REG_NA)
3699 // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
3700 // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
3701 callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
3704 regMaskTP callTargetMask = genRegMask(callTargetReg);
3705 regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
3707 // assert that all registers in callTargetMask are in the callKillSet
3708 noway_assert((callTargetMask & callKillSet) == callTargetMask);
3710 callTarget = callTargetReg;
3712 // adrp + add with relocations will be emitted
3713 getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
3714 getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget);
3715 callType = emitter::EC_INDIR_R;
3718 getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
3719 retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
3720 gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */
3721 callTarget, /* ireg */
3722 REG_NA, 0, 0, /* xreg, xmul, disp */
3724 emitter::emitNoGChelper(helper));
3726 regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
3727 regTracker.rsTrashRegSet(killMask);
3732 //------------------------------------------------------------------------
3733 // genSIMDIntrinsic: Generate code for a SIMD Intrinsic. This is the main
3734 // routine which in turn calls apropriate genSIMDIntrinsicXXX() routine.
3737 // simdNode - The GT_SIMD node
3743 // Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and
3744 // a limited set of methods.
3746 // TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp.
3747 void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
3749 // NYI for unsupported base types
3750 if (simdNode->gtSIMDBaseType != TYP_INT && simdNode->gtSIMDBaseType != TYP_LONG &&
3751 simdNode->gtSIMDBaseType != TYP_FLOAT && simdNode->gtSIMDBaseType != TYP_DOUBLE &&
3752 simdNode->gtSIMDBaseType != TYP_USHORT && simdNode->gtSIMDBaseType != TYP_UBYTE &&
3753 simdNode->gtSIMDBaseType != TYP_SHORT && simdNode->gtSIMDBaseType != TYP_BYTE &&
3754 simdNode->gtSIMDBaseType != TYP_UINT && simdNode->gtSIMDBaseType != TYP_ULONG)
3756 noway_assert(!"SIMD intrinsic with unsupported base type.");
3759 switch (simdNode->gtSIMDIntrinsicID)
3761 case SIMDIntrinsicInit:
3762 genSIMDIntrinsicInit(simdNode);
3765 case SIMDIntrinsicInitN:
3766 genSIMDIntrinsicInitN(simdNode);
3769 case SIMDIntrinsicSqrt:
3770 case SIMDIntrinsicAbs:
3771 case SIMDIntrinsicCast:
3772 case SIMDIntrinsicConvertToSingle:
3773 case SIMDIntrinsicConvertToInt32:
3774 case SIMDIntrinsicConvertToDouble:
3775 case SIMDIntrinsicConvertToInt64:
3776 genSIMDIntrinsicUnOp(simdNode);
3779 case SIMDIntrinsicWidenLo:
3780 case SIMDIntrinsicWidenHi:
3781 genSIMDIntrinsicWiden(simdNode);
3784 case SIMDIntrinsicNarrow:
3785 genSIMDIntrinsicNarrow(simdNode);
3788 case SIMDIntrinsicAdd:
3789 case SIMDIntrinsicSub:
3790 case SIMDIntrinsicMul:
3791 case SIMDIntrinsicDiv:
3792 case SIMDIntrinsicBitwiseAnd:
3793 case SIMDIntrinsicBitwiseAndNot:
3794 case SIMDIntrinsicBitwiseOr:
3795 case SIMDIntrinsicBitwiseXor:
3796 case SIMDIntrinsicMin:
3797 case SIMDIntrinsicMax:
3798 case SIMDIntrinsicEqual:
3799 case SIMDIntrinsicLessThan:
3800 case SIMDIntrinsicGreaterThan:
3801 case SIMDIntrinsicLessThanOrEqual:
3802 case SIMDIntrinsicGreaterThanOrEqual:
3803 genSIMDIntrinsicBinOp(simdNode);
3806 case SIMDIntrinsicOpEquality:
3807 case SIMDIntrinsicOpInEquality:
3808 genSIMDIntrinsicRelOp(simdNode);
3811 case SIMDIntrinsicDotProduct:
3812 genSIMDIntrinsicDotProduct(simdNode);
3815 case SIMDIntrinsicGetItem:
3816 genSIMDIntrinsicGetItem(simdNode);
3819 case SIMDIntrinsicSetX:
3820 case SIMDIntrinsicSetY:
3821 case SIMDIntrinsicSetZ:
3822 case SIMDIntrinsicSetW:
3823 genSIMDIntrinsicSetItem(simdNode);
3826 case SIMDIntrinsicUpperSave:
3827 genSIMDIntrinsicUpperSave(simdNode);
3830 case SIMDIntrinsicUpperRestore:
3831 genSIMDIntrinsicUpperRestore(simdNode);
3834 case SIMDIntrinsicSelect:
3835 NYI("SIMDIntrinsicSelect lowered during import to (a & sel) | (b & ~sel)");
3839 noway_assert(!"Unimplemented SIMD intrinsic.");
3844 insOpts CodeGen::genGetSimdInsOpt(bool is16Byte, var_types elementType)
3846 insOpts result = INS_OPTS_NONE;
3848 switch (elementType)
3853 result = is16Byte ? INS_OPTS_2D : INS_OPTS_1D;
3858 result = is16Byte ? INS_OPTS_4S : INS_OPTS_2S;
3862 result = is16Byte ? INS_OPTS_8H : INS_OPTS_4H;
3866 result = is16Byte ? INS_OPTS_16B : INS_OPTS_8B;
3869 assert(!"Unsupported element type");
3876 // getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
3879 // intrinsicId - SIMD intrinsic Id
3880 // baseType - Base type of the SIMD vector
3881 // immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
3885 // Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
3887 instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
3889 instruction result = INS_invalid;
3890 if (varTypeIsFloating(baseType))
3892 switch (intrinsicId)
3894 case SIMDIntrinsicAbs:
3897 case SIMDIntrinsicAdd:
3900 case SIMDIntrinsicBitwiseAnd:
3903 case SIMDIntrinsicBitwiseAndNot:
3906 case SIMDIntrinsicBitwiseOr:
3909 case SIMDIntrinsicBitwiseXor:
3912 case SIMDIntrinsicCast:
3915 case SIMDIntrinsicConvertToInt32:
3916 case SIMDIntrinsicConvertToInt64:
3917 result = INS_fcvtns;
3919 case SIMDIntrinsicDiv:
3922 case SIMDIntrinsicEqual:
3925 case SIMDIntrinsicGreaterThan:
3928 case SIMDIntrinsicGreaterThanOrEqual:
3931 case SIMDIntrinsicLessThan:
3934 case SIMDIntrinsicLessThanOrEqual:
3937 case SIMDIntrinsicMax:
3940 case SIMDIntrinsicMin:
3943 case SIMDIntrinsicMul:
3946 case SIMDIntrinsicNarrow:
3947 // Use INS_fcvtn lower bytes of result followed by INS_fcvtn2 for upper bytes
3948 // Return lower bytes instruction here
3951 case SIMDIntrinsicSelect:
3954 case SIMDIntrinsicSqrt:
3957 case SIMDIntrinsicSub:
3960 case SIMDIntrinsicWidenLo:
3963 case SIMDIntrinsicWidenHi:
3964 result = INS_fcvtl2;
3967 assert(!"Unsupported SIMD intrinsic");
3973 bool isUnsigned = varTypeIsUnsigned(baseType);
3975 switch (intrinsicId)
3977 case SIMDIntrinsicAbs:
3978 assert(!isUnsigned);
3981 case SIMDIntrinsicAdd:
3984 case SIMDIntrinsicBitwiseAnd:
3987 case SIMDIntrinsicBitwiseAndNot:
3990 case SIMDIntrinsicBitwiseOr:
3993 case SIMDIntrinsicBitwiseXor:
3996 case SIMDIntrinsicCast:
3999 case SIMDIntrinsicConvertToDouble:
4000 case SIMDIntrinsicConvertToSingle:
4001 result = isUnsigned ? INS_ucvtf : INS_scvtf;
4003 case SIMDIntrinsicEqual:
4006 case SIMDIntrinsicGreaterThan:
4007 result = isUnsigned ? INS_cmhi : INS_cmgt;
4009 case SIMDIntrinsicGreaterThanOrEqual:
4010 result = isUnsigned ? INS_cmhs : INS_cmge;
4012 case SIMDIntrinsicLessThan:
4013 assert(!isUnsigned);
4016 case SIMDIntrinsicLessThanOrEqual:
4017 assert(!isUnsigned);
4020 case SIMDIntrinsicMax:
4021 result = isUnsigned ? INS_umax : INS_smax;
4023 case SIMDIntrinsicMin:
4024 result = isUnsigned ? INS_umin : INS_smin;
4026 case SIMDIntrinsicMul:
4029 case SIMDIntrinsicNarrow:
4030 // Use INS_xtn lower bytes of result followed by INS_xtn2 for upper bytes
4031 // Return lower bytes instruction here
4034 case SIMDIntrinsicSelect:
4037 case SIMDIntrinsicSub:
4040 case SIMDIntrinsicWidenLo:
4041 result = isUnsigned ? INS_uxtl : INS_sxtl;
4043 case SIMDIntrinsicWidenHi:
4044 result = isUnsigned ? INS_uxtl2 : INS_sxtl2;
4047 assert(!"Unsupported SIMD intrinsic");
4052 noway_assert(result != INS_invalid);
4056 //------------------------------------------------------------------------
4057 // genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize.
4060 // simdNode - The GT_SIMD node
4065 void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
4067 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit);
4069 GenTree* op1 = simdNode->gtGetOp1();
4070 var_types baseType = simdNode->gtSIMDBaseType;
4071 regNumber targetReg = simdNode->gtRegNum;
4072 assert(targetReg != REG_NA);
4073 var_types targetType = simdNode->TypeGet();
4075 genConsumeOperands(simdNode);
4076 regNumber op1Reg = op1->IsIntegralConst(0) ? REG_ZR : op1->gtRegNum;
4078 // TODO-ARM64-CQ Add LD1R to allow SIMDIntrinsicInit from contained memory
4079 // TODO-ARM64-CQ Add MOVI to allow SIMDIntrinsicInit from contained immediate small constants
4081 assert(op1->isContained() == op1->IsIntegralConst(0));
4082 assert(!op1->isUsedFromMemory());
4084 assert(genIsValidFloatReg(targetReg));
4085 assert(genIsValidIntReg(op1Reg) || genIsValidFloatReg(op1Reg));
4087 bool is16Byte = (simdNode->gtSIMDSize > 8);
4088 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
4089 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
4091 if (genIsValidIntReg(op1Reg))
4093 getEmitter()->emitIns_R_R(INS_dup, attr, targetReg, op1Reg, opt);
4097 getEmitter()->emitIns_R_R_I(INS_dup, attr, targetReg, op1Reg, 0, opt);
4100 genProduceReg(simdNode);
4103 //-------------------------------------------------------------------------------------------
4104 // genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes
4105 // a number of arguments equal to the length of the Vector.
4108 // simdNode - The GT_SIMD node
4113 void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
4115 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN);
4117 regNumber targetReg = simdNode->gtRegNum;
4118 assert(targetReg != REG_NA);
4120 var_types targetType = simdNode->TypeGet();
4122 var_types baseType = simdNode->gtSIMDBaseType;
4124 regNumber vectorReg = targetReg;
4126 if (varTypeIsFloating(baseType))
4128 // Note that we cannot use targetReg before consuming all float source operands.
4129 // Therefore use an internal temp register
4130 vectorReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4133 emitAttr baseTypeSize = emitTypeSize(baseType);
4135 // We will first consume the list items in execution (left to right) order,
4136 // and record the registers.
4137 regNumber operandRegs[FP_REGSIZE_BYTES];
4138 unsigned initCount = 0;
4139 for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2())
4141 assert(list->OperGet() == GT_LIST);
4142 GenTree* listItem = list->gtGetOp1();
4143 assert(listItem->TypeGet() == baseType);
4144 assert(!listItem->isContained());
4145 regNumber operandReg = genConsumeReg(listItem);
4146 operandRegs[initCount] = operandReg;
4150 assert((initCount * baseTypeSize) <= simdNode->gtSIMDSize);
4152 if (initCount * baseTypeSize < EA_16BYTE)
4154 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, vectorReg, 0x00, INS_OPTS_16B);
4157 if (varTypeIsIntegral(baseType))
4159 for (unsigned i = 0; i < initCount; i++)
4161 getEmitter()->emitIns_R_R_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i);
4166 for (unsigned i = 0; i < initCount; i++)
4168 getEmitter()->emitIns_R_R_I_I(INS_ins, baseTypeSize, vectorReg, operandRegs[i], i, 0);
4172 // Load the initialized value.
4173 if (targetReg != vectorReg)
4175 getEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, vectorReg);
4178 genProduceReg(simdNode);
4181 //----------------------------------------------------------------------------------
4182 // genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt.
4185 // simdNode - The GT_SIMD node
4190 void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
4192 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast ||
4193 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs ||
4194 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle ||
4195 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 ||
4196 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble ||
4197 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64);
4199 GenTree* op1 = simdNode->gtGetOp1();
4200 var_types baseType = simdNode->gtSIMDBaseType;
4201 regNumber targetReg = simdNode->gtRegNum;
4202 assert(targetReg != REG_NA);
4203 var_types targetType = simdNode->TypeGet();
4205 genConsumeOperands(simdNode);
4206 regNumber op1Reg = op1->gtRegNum;
4208 assert(genIsValidFloatReg(op1Reg));
4209 assert(genIsValidFloatReg(targetReg));
4211 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4213 bool is16Byte = (simdNode->gtSIMDSize > 8);
4214 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
4215 insOpts opt = (ins == INS_mov) ? INS_OPTS_NONE : genGetSimdInsOpt(is16Byte, baseType);
4217 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
4219 genProduceReg(simdNode);
4222 //--------------------------------------------------------------------------------
4223 // genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations
4226 // simdNode - The GT_SIMD node
4229 // The Widen intrinsics are broken into separate intrinsics for the two results.
4231 void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
4233 assert((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenLo) ||
4234 (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi));
4236 GenTree* op1 = simdNode->gtGetOp1();
4237 var_types baseType = simdNode->gtSIMDBaseType;
4238 regNumber targetReg = simdNode->gtRegNum;
4239 assert(targetReg != REG_NA);
4240 var_types simdType = simdNode->TypeGet();
4242 genConsumeOperands(simdNode);
4243 regNumber op1Reg = op1->gtRegNum;
4244 regNumber srcReg = op1Reg;
4245 emitAttr emitSize = emitActualTypeSize(simdType);
4247 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4249 if (varTypeIsFloating(baseType))
4251 getEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, op1Reg);
4255 bool is16Byte = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicWidenHi);
4256 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
4258 getEmitter()->emitIns_R_R(ins, is16Byte ? EA_16BYTE : EA_8BYTE, targetReg, op1Reg, opt);
4261 genProduceReg(simdNode);
4264 //--------------------------------------------------------------------------------
4265 // genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations
4268 // simdNode - The GT_SIMD node
4271 // This intrinsic takes two arguments. The first operand is narrowed to produce the
4272 // lower elements of the results, and the second operand produces the high elements.
4274 void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
4276 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicNarrow);
4278 GenTree* op1 = simdNode->gtGetOp1();
4279 GenTree* op2 = simdNode->gtGetOp2();
4280 var_types baseType = simdNode->gtSIMDBaseType;
4281 regNumber targetReg = simdNode->gtRegNum;
4282 assert(targetReg != REG_NA);
4283 var_types simdType = simdNode->TypeGet();
4284 emitAttr emitSize = emitTypeSize(simdType);
4286 genConsumeOperands(simdNode);
4287 regNumber op1Reg = op1->gtRegNum;
4288 regNumber op2Reg = op2->gtRegNum;
4290 assert(genIsValidFloatReg(op1Reg));
4291 assert(genIsValidFloatReg(op2Reg));
4292 assert(genIsValidFloatReg(targetReg));
4293 assert(op2Reg != targetReg);
4294 assert(simdNode->gtSIMDSize == 16);
4296 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4297 assert((ins == INS_fcvtn) || (ins == INS_xtn));
4299 if (ins == INS_fcvtn)
4301 getEmitter()->emitIns_R_R(INS_fcvtn, EA_8BYTE, targetReg, op1Reg);
4302 getEmitter()->emitIns_R_R(INS_fcvtn2, EA_8BYTE, targetReg, op2Reg);
4306 insOpts opt = INS_OPTS_NONE;
4307 insOpts opt2 = INS_OPTS_NONE;
4309 // This is not the same as genGetSimdInsOpt()
4310 // Basetype is the soure operand type
4311 // However encoding is based on the destination operand type which is 1/2 the basetype.
4327 opt2 = INS_OPTS_16B;
4330 assert(!"Unsupported narrowing element type");
4333 getEmitter()->emitIns_R_R(INS_xtn, EA_8BYTE, targetReg, op1Reg, opt);
4334 getEmitter()->emitIns_R_R(INS_xtn2, EA_16BYTE, targetReg, op2Reg, opt2);
4337 genProduceReg(simdNode);
4340 //--------------------------------------------------------------------------------
4341 // genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
4342 // add, sub, mul, bit-wise And, AndNot and Or.
4345 // simdNode - The GT_SIMD node
4350 void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
4352 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
4353 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
4354 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
4355 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot ||
4356 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr ||
4357 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin ||
4358 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual ||
4359 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan ||
4360 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan ||
4361 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual ||
4362 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThanOrEqual);
4364 GenTree* op1 = simdNode->gtGetOp1();
4365 GenTree* op2 = simdNode->gtGetOp2();
4366 var_types baseType = simdNode->gtSIMDBaseType;
4367 regNumber targetReg = simdNode->gtRegNum;
4368 assert(targetReg != REG_NA);
4369 var_types targetType = simdNode->TypeGet();
4371 genConsumeOperands(simdNode);
4372 regNumber op1Reg = op1->gtRegNum;
4373 regNumber op2Reg = op2->gtRegNum;
4375 assert(genIsValidFloatReg(op1Reg));
4376 assert(genIsValidFloatReg(op2Reg));
4377 assert(genIsValidFloatReg(targetReg));
4379 // TODO-ARM64-CQ Contain integer constants where posible
4381 instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
4383 bool is16Byte = (simdNode->gtSIMDSize > 8);
4384 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
4385 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
4387 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt);
4389 genProduceReg(simdNode);
4392 //--------------------------------------------------------------------------------
4393 // genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater
4397 // simdNode - The GT_SIMD node
4402 void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
4404 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality ||
4405 simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
4407 GenTree* op1 = simdNode->gtGetOp1();
4408 GenTree* op2 = simdNode->gtGetOp2();
4409 var_types baseType = simdNode->gtSIMDBaseType;
4410 regNumber targetReg = simdNode->gtRegNum;
4411 var_types targetType = simdNode->TypeGet();
4413 genConsumeOperands(simdNode);
4414 regNumber op1Reg = op1->gtRegNum;
4415 regNumber op2Reg = op2->gtRegNum;
4416 regNumber otherReg = op2Reg;
4418 instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, baseType);
4420 bool is16Byte = (simdNode->gtSIMDSize > 8);
4421 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
4422 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
4424 // TODO-ARM64-CQ Contain integer constants where posible
4426 regNumber tmpFloatReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4428 getEmitter()->emitIns_R_R_R(ins, attr, tmpFloatReg, op1Reg, op2Reg, opt);
4430 if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
4432 // For 12Byte vectors we must set upper bits to get correct comparison
4433 // We do not assume upper bits are zero.
4434 instGen_Set_Reg_To_Imm(EA_4BYTE, targetReg, -1);
4435 getEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpFloatReg, targetReg, 3);
4438 getEmitter()->emitIns_R_R(INS_uminv, attr, tmpFloatReg, tmpFloatReg,
4439 (simdNode->gtSIMDSize > 8) ? INS_OPTS_16B : INS_OPTS_8B);
4441 getEmitter()->emitIns_R_R_I(INS_mov, EA_1BYTE, targetReg, tmpFloatReg, 0);
4443 if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality)
4445 getEmitter()->emitIns_R_R_I(INS_eor, EA_4BYTE, targetReg, targetReg, 0x1);
4448 getEmitter()->emitIns_R_R_I(INS_and, EA_4BYTE, targetReg, targetReg, 0x1);
4450 genProduceReg(simdNode);
4453 //--------------------------------------------------------------------------------
4454 // genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
4457 // simdNode - The GT_SIMD node
4462 void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
4464 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDotProduct);
4466 GenTree* op1 = simdNode->gtGetOp1();
4467 GenTree* op2 = simdNode->gtGetOp2();
4468 var_types baseType = simdNode->gtSIMDBaseType;
4469 var_types simdType = op1->TypeGet();
4471 regNumber targetReg = simdNode->gtRegNum;
4472 assert(targetReg != REG_NA);
4474 var_types targetType = simdNode->TypeGet();
4475 assert(targetType == baseType);
4477 genConsumeOperands(simdNode);
4478 regNumber op1Reg = op1->gtRegNum;
4479 regNumber op2Reg = op2->gtRegNum;
4480 regNumber tmpReg = targetReg;
4482 if (!varTypeIsFloating(baseType))
4484 tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
4487 instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicMul, baseType);
4489 bool is16Byte = (simdNode->gtSIMDSize > 8);
4490 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
4491 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
4494 getEmitter()->emitIns_R_R_R(ins, attr, tmpReg, op1Reg, op2Reg, opt);
4496 if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
4498 // For 12Byte vectors we must zero upper bits to get correct dot product
4499 // We do not assume upper bits are zero.
4500 getEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpReg, REG_ZR, 3);
4503 // Vector add horizontal
4504 if (varTypeIsFloating(baseType))
4506 if (baseType == TYP_FLOAT)
4508 if (opt == INS_OPTS_4S)
4510 getEmitter()->emitIns_R_R_R(INS_faddp, attr, tmpReg, tmpReg, tmpReg, INS_OPTS_4S);
4512 getEmitter()->emitIns_R_R(INS_faddp, EA_4BYTE, targetReg, tmpReg);
4516 getEmitter()->emitIns_R_R(INS_faddp, EA_8BYTE, targetReg, tmpReg);
4521 ins = varTypeIsUnsigned(baseType) ? INS_uaddlv : INS_saddlv;
4523 getEmitter()->emitIns_R_R(ins, attr, tmpReg, tmpReg, opt);
4525 // Mov to integer register
4526 if (varTypeIsUnsigned(baseType) || (genTypeSize(baseType) < 4))
4528 getEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(baseType), targetReg, tmpReg, 0);
4532 getEmitter()->emitIns_R_R_I(INS_smov, emitActualTypeSize(baseType), targetReg, tmpReg, 0);
4536 genProduceReg(simdNode);
4539 //------------------------------------------------------------------------------------
4540 // genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
4543 // simdNode - The GT_SIMD node
4548 void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
4550 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);
4552 GenTree* op1 = simdNode->gtGetOp1();
4553 GenTree* op2 = simdNode->gtGetOp2();
4554 var_types simdType = op1->TypeGet();
4555 assert(varTypeIsSIMD(simdType));
4557 // op1 of TYP_SIMD12 should be considered as TYP_SIMD16
4558 if (simdType == TYP_SIMD12)
4560 simdType = TYP_SIMD16;
4563 var_types baseType = simdNode->gtSIMDBaseType;
4564 regNumber targetReg = simdNode->gtRegNum;
4565 assert(targetReg != REG_NA);
4566 var_types targetType = simdNode->TypeGet();
4567 assert(targetType == genActualType(baseType));
4569 // GetItem has 2 operands:
4570 // - the source of SIMD type (op1)
4571 // - the index of the value to be returned.
4572 genConsumeOperands(simdNode);
4574 emitAttr baseTypeSize = emitTypeSize(baseType);
4575 unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
4577 if (op2->IsCnsIntOrI())
4579 assert(op2->isContained());
4581 ssize_t index = op2->gtIntCon.gtIconVal;
4583 // We only need to generate code for the get if the index is valid
4584 // If the index is invalid, previously generated for the range check will throw
4585 if (getEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index))
4587 if (op1->isContained())
4589 int offset = (int)index * genTypeSize(baseType);
4590 instruction ins = ins_Load(baseType);
4591 baseTypeSize = varTypeIsFloating(baseType)
4593 : getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize);
4595 assert(!op1->isUsedFromReg());
4597 if (op1->OperIsLocal())
4599 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
4601 getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset);
4605 assert(op1->OperGet() == GT_IND);
4607 GenTree* addr = op1->AsIndir()->Addr();
4608 assert(!addr->isContained());
4609 regNumber baseReg = addr->gtRegNum;
4611 // ldr targetReg, [baseReg, #offset]
4612 getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset);
4617 assert(op1->isUsedFromReg());
4618 regNumber srcReg = op1->gtRegNum;
4620 // mov targetReg, srcReg[#index]
4621 getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index);
4627 assert(!op2->isContained());
4629 regNumber baseReg = REG_NA;
4630 regNumber indexReg = op2->gtRegNum;
4632 if (op1->isContained())
4634 // Optimize the case of op1 is in memory and trying to access ith element.
4635 assert(!op1->isUsedFromReg());
4636 if (op1->OperIsLocal())
4638 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
4640 baseReg = simdNode->ExtractTempReg();
4642 // Load the address of varNum
4643 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0);
4647 // Require GT_IND addr to be not contained.
4648 assert(op1->OperGet() == GT_IND);
4650 GenTree* addr = op1->AsIndir()->Addr();
4651 assert(!addr->isContained());
4653 baseReg = addr->gtRegNum;
4658 assert(op1->isUsedFromReg());
4659 regNumber srcReg = op1->gtRegNum;
4661 unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
4662 noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
4664 baseReg = simdNode->ExtractTempReg();
4666 // Load the address of simdInitTempVarNum
4667 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0);
4669 // Store the vector to simdInitTempVarNum
4670 getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg);
4673 assert(genIsValidIntReg(indexReg));
4674 assert(genIsValidIntReg(baseReg));
4675 assert(baseReg != indexReg);
4677 // Load item at baseReg[index]
4678 getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL,
4682 genProduceReg(simdNode);
4685 //------------------------------------------------------------------------------------
4686 // genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i.
4689 // simdNode - The GT_SIMD node
4694 void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
4696 // Determine index based on intrinsic ID
4698 switch (simdNode->gtSIMDIntrinsicID)
4700 case SIMDIntrinsicSetX:
4703 case SIMDIntrinsicSetY:
4706 case SIMDIntrinsicSetZ:
4709 case SIMDIntrinsicSetW:
4716 assert(index != -1);
4718 // op1 is the SIMD vector
4719 // op2 is the value to be set
4720 GenTree* op1 = simdNode->gtGetOp1();
4721 GenTree* op2 = simdNode->gtGetOp2();
4723 var_types baseType = simdNode->gtSIMDBaseType;
4724 regNumber targetReg = simdNode->gtRegNum;
4725 assert(targetReg != REG_NA);
4726 var_types targetType = simdNode->TypeGet();
4727 assert(varTypeIsSIMD(targetType));
4729 assert(op2->TypeGet() == baseType);
4730 assert(simdNode->gtSIMDSize >= ((index + 1) * genTypeSize(baseType)));
4732 genConsumeOperands(simdNode);
4733 regNumber op1Reg = op1->gtRegNum;
4734 regNumber op2Reg = op2->gtRegNum;
4736 assert(genIsValidFloatReg(targetReg));
4737 assert(genIsValidFloatReg(op1Reg));
4738 assert(genIsValidIntReg(op2Reg) || genIsValidFloatReg(op2Reg));
4739 assert(targetReg != op2Reg);
4741 emitAttr attr = emitTypeSize(baseType);
4743 // Insert mov if register assignment requires it
4744 getEmitter()->emitIns_R_R(INS_mov, EA_16BYTE, targetReg, op1Reg);
4746 if (genIsValidIntReg(op2Reg))
4748 getEmitter()->emitIns_R_R_I(INS_ins, attr, targetReg, op2Reg, index);
4752 getEmitter()->emitIns_R_R_I_I(INS_ins, attr, targetReg, op2Reg, index, 0);
4755 genProduceReg(simdNode);
4758 //-----------------------------------------------------------------------------
4759 // genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to
4760 // the given register, if any, or to memory.
4763 // simdNode - The GT_SIMD node
4769 // The upper half of all SIMD registers are volatile, even the callee-save registers.
4770 // When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic
4771 // to cause the upper half to be saved. It will first attempt to find another, unused, callee-save
4772 // register. If such a register cannot be found, it will save it to an available caller-save register.
4773 // In that case, this node will be marked GTF_SPILL, which will cause genProduceReg to save the 8 byte
4774 // value to the stack. (Note that if there are no caller-save registers available, the entire 16 byte
4775 // value will be spilled to the stack.)
4777 void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
4779 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave);
4781 GenTree* op1 = simdNode->gtGetOp1();
4782 assert(op1->IsLocal());
4783 assert(emitTypeSize(op1->TypeGet()) == 16);
4784 regNumber targetReg = simdNode->gtRegNum;
4785 regNumber op1Reg = genConsumeReg(op1);
4786 assert(op1Reg != REG_NA);
4787 assert(targetReg != REG_NA);
4788 getEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, targetReg, op1Reg, 0, 1);
4790 genProduceReg(simdNode);
4793 //-----------------------------------------------------------------------------
4794 // genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to
4795 // the given register, if any, or to memory.
4798 // simdNode - The GT_SIMD node
4804 // For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always
4805 // have their home register, this node has its targetReg on the lclVar child, and its source
4807 // Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled
4808 // an upper-half to a caller save register, this node will be marked GTF_SPILLED. However, unlike
4809 // most spill scenarios, the saved tree will be different from the restored tree, but the spill
4810 // restore logic, which is triggered by the call to genConsumeReg, requires us to provide the
4811 // spilled tree (saveNode) in order to perform the reload. We can easily find that tree,
4812 // as it is in the spill descriptor for the register from which it was saved.
4814 void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
4816 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
4818 GenTree* op1 = simdNode->gtGetOp1();
4819 assert(op1->IsLocal());
4820 assert(emitTypeSize(op1->TypeGet()) == 16);
4821 regNumber srcReg = simdNode->gtRegNum;
4822 regNumber lclVarReg = genConsumeReg(op1);
4823 unsigned varNum = op1->AsLclVarCommon()->gtLclNum;
4824 assert(lclVarReg != REG_NA);
4825 assert(srcReg != REG_NA);
4826 if (simdNode->gtFlags & GTF_SPILLED)
4828 GenTree* saveNode = regSet.rsSpillDesc[srcReg]->spillTree;
4829 noway_assert(saveNode != nullptr && (saveNode->gtRegNum == srcReg));
4830 genConsumeReg(saveNode);
4832 getEmitter()->emitIns_R_R_I_I(INS_mov, EA_8BYTE, lclVarReg, srcReg, 1, 0);
4835 //-----------------------------------------------------------------------------
4836 // genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
4837 // Since Vector3 is not a hardware supported write size, it is performed
4838 // as two writes: 8 byte followed by 4-byte.
4841 // treeNode - tree node that is attempting to store indirect
4847 void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
4849 assert(treeNode->OperGet() == GT_STOREIND);
4851 GenTree* addr = treeNode->gtOp.gtOp1;
4852 GenTree* data = treeNode->gtOp.gtOp2;
4854 // addr and data should not be contained.
4855 assert(!data->isContained());
4856 assert(!addr->isContained());
4859 // Should not require a write barrier
4860 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
4861 assert(writeBarrierForm == GCInfo::WBF_NoBarrier);
4864 genConsumeOperands(treeNode->AsOp());
4866 // Need an addtional integer register to extract upper 4 bytes from data.
4867 regNumber tmpReg = treeNode->GetSingleTempReg();
4868 assert(tmpReg != addr->gtRegNum);
4871 getEmitter()->emitIns_R_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum);
4873 // Extract upper 4-bytes from data
4874 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, data->gtRegNum, 2);
4877 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, tmpReg, addr->gtRegNum, 8);
4880 //-----------------------------------------------------------------------------
4881 // genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
4882 // Since Vector3 is not a hardware supported write size, it is performed
4883 // as two loads: 8 byte followed by 4-byte.
4886 // treeNode - tree node of GT_IND
4892 void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
4894 assert(treeNode->OperGet() == GT_IND);
4896 GenTree* addr = treeNode->gtOp.gtOp1;
4897 regNumber targetReg = treeNode->gtRegNum;
4899 assert(!addr->isContained());
4901 regNumber operandReg = genConsumeReg(addr);
4903 // Need an addtional int register to read upper 4 bytes, which is different from targetReg
4904 regNumber tmpReg = treeNode->GetSingleTempReg();
4907 getEmitter()->emitIns_R_R(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, addr->gtRegNum);
4910 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, addr->gtRegNum, 8);
4912 // Insert upper 4-bytes into data
4913 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, targetReg, tmpReg, 2);
4915 genProduceReg(treeNode);
4918 //-----------------------------------------------------------------------------
4919 // genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
4920 // Since Vector3 is not a hardware supported write size, it is performed
4921 // as two stores: 8 byte followed by 4-byte.
4924 // treeNode - tree node that is attempting to store TYP_SIMD12 field
4929 void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
4931 assert((treeNode->OperGet() == GT_STORE_LCL_FLD) || (treeNode->OperGet() == GT_STORE_LCL_VAR));
4934 unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
4935 assert(varNum < compiler->lvaCount);
4937 if (treeNode->OperGet() == GT_LCL_FLD)
4939 offs = treeNode->gtLclFld.gtLclOffs;
4942 GenTree* op1 = treeNode->gtOp.gtOp1;
4943 assert(!op1->isContained());
4944 regNumber operandReg = genConsumeReg(op1);
4946 // Need an addtional integer register to extract upper 4 bytes from data.
4947 regNumber tmpReg = treeNode->GetSingleTempReg();
4949 // store lower 8 bytes
4950 getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs);
4952 // Extract upper 4-bytes from data
4953 getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, operandReg, 2);
4956 getEmitter()->emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offs + 8);
4959 #endif // FEATURE_SIMD
4961 #ifdef FEATURE_HW_INTRINSICS
4962 #include "hwintrinsicArm64.h"
4964 instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType)
4966 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
4968 unsigned int instrTypeIndex = varTypeIsFloating(instrType) ? 0 : varTypeIsUnsigned(instrType) ? 2 : 1;
4970 return compiler->getHWIntrinsicInfo(intrinsicID).instrs[instrTypeIndex];
4973 //------------------------------------------------------------------------
4974 // genHWIntrinsic: Produce code for a GT_HWIntrinsic node.
4976 // This is the main routine which in turn calls the genHWIntrinsicXXX() routines.
4979 // node - the GT_HWIntrinsic node
4984 void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
4986 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
4988 switch (compiler->getHWIntrinsicInfo(intrinsicID).form)
4990 case HWIntrinsicInfo::UnaryOp:
4991 genHWIntrinsicUnaryOp(node);
4993 case HWIntrinsicInfo::CrcOp:
4994 genHWIntrinsicCrcOp(node);
4996 case HWIntrinsicInfo::SimdBinaryOp:
4997 genHWIntrinsicSimdBinaryOp(node);
4999 case HWIntrinsicInfo::SimdExtractOp:
5000 genHWIntrinsicSimdExtractOp(node);
5002 case HWIntrinsicInfo::SimdInsertOp:
5003 genHWIntrinsicSimdInsertOp(node);
5005 case HWIntrinsicInfo::SimdSelectOp:
5006 genHWIntrinsicSimdSelectOp(node);
5008 case HWIntrinsicInfo::SimdSetAllOp:
5009 genHWIntrinsicSimdSetAllOp(node);
5011 case HWIntrinsicInfo::SimdUnaryOp:
5012 genHWIntrinsicSimdUnaryOp(node);
5014 case HWIntrinsicInfo::SimdBinaryRMWOp:
5015 genHWIntrinsicSimdBinaryRMWOp(node);
5018 NYI("HWIntrinsic form not implemented");
5022 //------------------------------------------------------------------------
5023 // genHWIntrinsicUnaryOp:
5025 // Produce code for a GT_HWIntrinsic node with form UnaryOp.
5027 // Consumes one scalar operand produces a scalar
5030 // node - the GT_HWIntrinsic node
5035 void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node)
5037 GenTree* op1 = node->gtGetOp1();
5038 regNumber targetReg = node->gtRegNum;
5039 emitAttr attr = emitActualTypeSize(node);
5041 assert(targetReg != REG_NA);
5042 var_types targetType = node->TypeGet();
5044 genConsumeOperands(node);
5046 regNumber op1Reg = op1->gtRegNum;
5048 instruction ins = getOpForHWIntrinsic(node, node->TypeGet());
5049 assert(ins != INS_invalid);
5051 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg);
5053 genProduceReg(node);
5056 //------------------------------------------------------------------------
5057 // genHWIntrinsicCrcOp:
5059 // Produce code for a GT_HWIntrinsic node with form CrcOp.
5061 // Consumes two scalar operands and produces a scalar result
5063 // This form differs from BinaryOp because the attr depends on the size of op2
5066 // node - the GT_HWIntrinsic node
5071 void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node)
5073 NYI("genHWIntrinsicCrcOp not implemented");
5076 //------------------------------------------------------------------------
5077 // genHWIntrinsicSimdBinaryOp:
5079 // Produce code for a GT_HWIntrinsic node with form SimdBinaryOp.
5081 // Consumes two SIMD operands and produces a SIMD result
5084 // node - the GT_HWIntrinsic node
5089 void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node)
5091 GenTree* op1 = node->gtGetOp1();
5092 GenTree* op2 = node->gtGetOp2();
5093 var_types baseType = node->gtSIMDBaseType;
5094 regNumber targetReg = node->gtRegNum;
5096 assert(targetReg != REG_NA);
5097 var_types targetType = node->TypeGet();
5099 genConsumeOperands(node);
5101 regNumber op1Reg = op1->gtRegNum;
5102 regNumber op2Reg = op2->gtRegNum;
5104 assert(genIsValidFloatReg(op1Reg));
5105 assert(genIsValidFloatReg(op2Reg));
5106 assert(genIsValidFloatReg(targetReg));
5108 instruction ins = getOpForHWIntrinsic(node, baseType);
5109 assert(ins != INS_invalid);
5111 bool is16Byte = (node->gtSIMDSize > 8);
5112 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
5113 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
5115 getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt);
5117 genProduceReg(node);
5120 //------------------------------------------------------------------------
5121 // genHWIntrinsicSwitchTable:
5123 // Generate code for an immediate switch table
5125 // In cases where an instruction only supports const immediate operands, we
5126 // need to generate functionally correct code when the operand is not constant
5128 // This is required by the HW Intrinsic design to handle indirect calls, such as:
5133 // Generated code implements a switch of this form
5138 // ins0; // emitSwCase(0)
5141 // ins1; // emitSwCase(1)
5147 // insLast; // emitSwCase(swMax - 1)
5150 // throw ArgumentOutOfRangeException
5153 // Generated code looks like:
5155 // cmp swReg, #swMax
5156 // b.hs ThrowArgumentOutOfRangeExceptionHelper
5157 // adr tmpReg, labelFirst
5158 // add tmpReg, tmpReg, swReg, LSL #3
5162 // b labelBreakTarget
5164 // b labelBreakTarget
5169 // b labelBreakTarget
5170 // labelBreakTarget:
5174 // swReg - register containing the switch case to execute
5175 // tmpReg - temporary integer register for calculating the switch indirect branch target
5176 // swMax - the number of switch cases. If swReg >= swMax throw SCK_ARG_RNG_EXCPN
5177 // emitSwCase - function like argument taking an immediate value and emitting one instruction
5182 template <typename HWIntrinsicSwitchCaseBody>
5183 void CodeGen::genHWIntrinsicSwitchTable(regNumber swReg,
5186 HWIntrinsicSwitchCaseBody emitSwCase)
5189 assert(swMax <= 256);
5191 assert(genIsValidIntReg(tmpReg));
5192 assert(genIsValidIntReg(swReg));
5194 BasicBlock* labelFirst = genCreateTempLabel();
5195 BasicBlock* labelBreakTarget = genCreateTempLabel();
5197 // Detect and throw out of range exception
5198 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, swReg, swMax);
5200 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
5201 genJumpToThrowHlpBlk(jmpGEU, SCK_ARG_RNG_EXCPN);
5203 // Calculate switch target
5204 labelFirst->bbFlags |= BBF_JMP_TARGET;
5206 // tmpReg = labelFirst
5207 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, labelFirst, tmpReg);
5209 // tmpReg = labelFirst + swReg * 8
5210 getEmitter()->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, tmpReg, swReg, 3, INS_OPTS_LSL);
5213 getEmitter()->emitIns_R(INS_br, EA_PTRSIZE, tmpReg);
5215 genDefineTempLabel(labelFirst);
5216 for (int i = 0; i < swMax; ++i)
5218 unsigned prevInsCount = getEmitter()->emitInsCount;
5222 assert(getEmitter()->emitInsCount == prevInsCount + 1);
5224 inst_JMP(EJ_jmp, labelBreakTarget);
5226 assert(getEmitter()->emitInsCount == prevInsCount + 2);
5228 genDefineTempLabel(labelBreakTarget);
5231 //------------------------------------------------------------------------
5232 // genHWIntrinsicSimdExtractOp:
5234 // Produce code for a GT_HWIntrinsic node with form SimdExtractOp.
5236 // Consumes one SIMD operand and one scalar
5238 // The element index operand is typically a const immediate
5239 // When it is not, a switch table is generated
5241 // See genHWIntrinsicSwitchTable comments
5244 // node - the GT_HWIntrinsic node
5249 void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node)
5251 GenTree* op1 = node->gtGetOp1();
5252 GenTree* op2 = node->gtGetOp2();
5253 var_types simdType = op1->TypeGet();
5254 var_types targetType = node->TypeGet();
5255 regNumber targetReg = node->gtRegNum;
5257 assert(targetReg != REG_NA);
5259 genConsumeOperands(node);
5261 regNumber op1Reg = op1->gtRegNum;
5263 assert(genIsValidFloatReg(op1Reg));
5265 emitAttr baseTypeSize = emitTypeSize(targetType);
5267 int elements = emitTypeSize(simdType) / baseTypeSize;
5269 auto emitSwCase = [&](int element) {
5270 assert(element >= 0);
5271 assert(element < elements);
5273 if (varTypeIsFloating(targetType))
5275 assert(genIsValidFloatReg(targetReg));
5276 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op1Reg, 0, element);
5278 else if (varTypeIsUnsigned(targetType) || (baseTypeSize == EA_8BYTE))
5280 assert(genIsValidIntReg(targetReg));
5281 getEmitter()->emitIns_R_R_I(INS_umov, baseTypeSize, targetReg, op1Reg, element);
5285 assert(genIsValidIntReg(targetReg));
5286 getEmitter()->emitIns_R_R_I(INS_smov, baseTypeSize, targetReg, op1Reg, element);
5290 if (op2->isContainedIntOrIImmed())
5292 int element = (int)op2->AsIntConCommon()->IconValue();
5294 emitSwCase(element);
5298 regNumber elementReg = op2->gtRegNum;
5299 regNumber tmpReg = node->GetSingleTempReg();
5301 genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase);
5304 genProduceReg(node);
5307 //------------------------------------------------------------------------
5308 // genHWIntrinsicSimdInsertOp:
5310 // Produce code for a GT_HWIntrinsic node with form SimdInsertOp.
5312 // Consumes one SIMD operand and two scalars
5314 // The element index operand is typically a const immediate
5315 // When it is not, a switch table is generated
5317 // See genHWIntrinsicSwitchTable comments
5320 // node - the GT_HWIntrinsic node
5325 void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node)
5327 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5328 GenTree* op1 = argList->Current();
5329 GenTree* op2 = argList->Rest()->Current();
5330 GenTree* op3 = argList->Rest()->Rest()->Current();
5331 var_types simdType = op1->TypeGet();
5332 var_types baseType = node->gtSIMDBaseType;
5333 regNumber targetReg = node->gtRegNum;
5335 assert(targetReg != REG_NA);
5337 genConsumeRegs(op1);
5338 genConsumeRegs(op2);
5339 genConsumeRegs(op3);
5341 regNumber op1Reg = op1->gtRegNum;
5343 assert(genIsValidFloatReg(targetReg));
5344 assert(genIsValidFloatReg(op1Reg));
5346 emitAttr baseTypeSize = emitTypeSize(baseType);
5348 int elements = emitTypeSize(simdType) / baseTypeSize;
5350 if (targetReg != op1Reg)
5352 bool is16Byte = (node->gtSIMDSize > 8);
5353 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
5354 getEmitter()->emitIns_R_R(INS_mov, baseTypeSize, targetReg, op1Reg);
5357 if (op3->isContained())
5359 // Handle vector element to vector element case
5361 // If op3 is contained this is because lowering found an opportunity to contain a Simd.Extract in a Simd.Insert
5363 regNumber op3Reg = op3->gtGetOp1()->gtRegNum;
5365 assert(genIsValidFloatReg(op3Reg));
5367 // op3 containment currently only occurs when
5368 // + op3 is a Simd.Extract() (gtHWIntrinsicId == NI_ARM64_SIMD_GetItem)
5369 // + element & srcLane are immediate constants
5370 assert(op2->isContainedIntOrIImmed());
5371 assert(op3->OperIs(GT_HWIntrinsic));
5372 assert(op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_ARM64_SIMD_GetItem);
5373 assert(op3->gtGetOp2()->isContainedIntOrIImmed());
5375 int element = (int)op2->AsIntConCommon()->IconValue();
5376 int srcLane = (int)op3->gtGetOp2()->AsIntConCommon()->IconValue();
5378 // Emit mov targetReg[element], op3Reg[srcLane]
5379 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, srcLane);
5383 // Handle scalar to vector element case
5384 // TODO-ARM64-CQ handle containing op3 scalar const where possible
5385 regNumber op3Reg = op3->gtRegNum;
5387 auto emitSwCase = [&](int element) {
5388 assert(element >= 0);
5389 assert(element < elements);
5391 if (varTypeIsFloating(baseType))
5393 assert(genIsValidFloatReg(op3Reg));
5394 getEmitter()->emitIns_R_R_I_I(INS_mov, baseTypeSize, targetReg, op3Reg, element, 0);
5398 assert(genIsValidIntReg(op3Reg));
5399 getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, op3Reg, element);
5403 if (op2->isContainedIntOrIImmed())
5405 int element = (int)op2->AsIntConCommon()->IconValue();
5407 emitSwCase(element);
5411 regNumber elementReg = op2->gtRegNum;
5412 regNumber tmpReg = node->GetSingleTempReg();
5414 genHWIntrinsicSwitchTable(elementReg, tmpReg, elements, emitSwCase);
5418 genProduceReg(node);
5421 //------------------------------------------------------------------------
5422 // genHWIntrinsicSimdSelectOp:
5424 // Produce code for a GT_HWIntrinsic node with form SimdSelectOp.
5426 // Consumes three SIMD operands and produces a SIMD result
5428 // This intrinsic form requires one of the source registers to be the
5429 // destination register. Inserts a INS_mov if this requirement is not met.
5432 // node - the GT_HWIntrinsic node
5437 void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node)
5439 GenTreeArgList* argList = node->gtGetOp1()->AsArgList();
5440 GenTree* op1 = argList->Current();
5441 GenTree* op2 = argList->Rest()->Current();
5442 GenTree* op3 = argList->Rest()->Rest()->Current();
5443 var_types baseType = node->gtSIMDBaseType;
5444 regNumber targetReg = node->gtRegNum;
5446 assert(targetReg != REG_NA);
5447 var_types targetType = node->TypeGet();
5449 genConsumeRegs(op1);
5450 genConsumeRegs(op2);
5451 genConsumeRegs(op3);
5453 regNumber op1Reg = op1->gtRegNum;
5454 regNumber op2Reg = op2->gtRegNum;
5455 regNumber op3Reg = op3->gtRegNum;
5457 assert(genIsValidFloatReg(op1Reg));
5458 assert(genIsValidFloatReg(op2Reg));
5459 assert(genIsValidFloatReg(op3Reg));
5460 assert(genIsValidFloatReg(targetReg));
5462 bool is16Byte = (node->gtSIMDSize > 8);
5463 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
5465 // Arm64 has three bit select forms; each uses three source registers
5466 // One of the sources is also the destination
5467 if (targetReg == op3Reg)
5469 // op3 is target use bit insert if true
5470 // op3 = op3 ^ (op1 & (op2 ^ op3))
5471 getEmitter()->emitIns_R_R_R(INS_bit, attr, op3Reg, op2Reg, op1Reg);
5473 else if (targetReg == op2Reg)
5475 // op2 is target use bit insert if false
5476 // op2 = op2 ^ (~op1 & (op2 ^ op3))
5477 getEmitter()->emitIns_R_R_R(INS_bif, attr, op2Reg, op3Reg, op1Reg);
5481 if (targetReg != op1Reg)
5483 // target is not one of the sources, copy op1 to use bit select form
5484 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5487 // targetReg = op3 ^ (targetReg & (op2 ^ op3))
5488 getEmitter()->emitIns_R_R_R(INS_bsl, attr, targetReg, op2Reg, op3Reg);
5491 genProduceReg(node);
5494 //------------------------------------------------------------------------
5495 // genHWIntrinsicSimdSetAllOp:
5497 // Produce code for a GT_HWIntrinsic node with form SimdSetAllOp.
5499 // Consumes single scalar operand and produces a SIMD result
5502 // node - the GT_HWIntrinsic node
5507 void CodeGen::genHWIntrinsicSimdSetAllOp(GenTreeHWIntrinsic* node)
5509 GenTree* op1 = node->gtGetOp1();
5510 var_types baseType = node->gtSIMDBaseType;
5511 regNumber targetReg = node->gtRegNum;
5513 assert(targetReg != REG_NA);
5514 var_types targetType = node->TypeGet();
5516 genConsumeOperands(node);
5518 regNumber op1Reg = op1->gtRegNum;
5520 assert(genIsValidFloatReg(targetReg));
5521 assert(genIsValidIntReg(op1Reg) || genIsValidFloatReg(op1Reg));
5523 instruction ins = getOpForHWIntrinsic(node, baseType);
5524 assert(ins != INS_invalid);
5526 bool is16Byte = (node->gtSIMDSize > 8);
5527 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
5528 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
5530 // TODO-ARM64-CQ Support contained immediate cases
5532 if (genIsValidIntReg(op1Reg))
5534 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
5538 getEmitter()->emitIns_R_R_I(ins, attr, targetReg, op1Reg, 0, opt);
5541 genProduceReg(node);
5544 //------------------------------------------------------------------------
5545 // genHWIntrinsicSimdUnaryOp:
5547 // Produce code for a GT_HWIntrinsic node with form SimdUnaryOp.
5549 // Consumes single SIMD operand and produces a SIMD result
5552 // node - the GT_HWIntrinsic node
5557 void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node)
5559 GenTree* op1 = node->gtGetOp1();
5560 var_types baseType = node->gtSIMDBaseType;
5561 regNumber targetReg = node->gtRegNum;
5563 assert(targetReg != REG_NA);
5564 var_types targetType = node->TypeGet();
5566 genConsumeOperands(node);
5568 regNumber op1Reg = op1->gtRegNum;
5570 assert(genIsValidFloatReg(op1Reg));
5571 assert(genIsValidFloatReg(targetReg));
5573 instruction ins = getOpForHWIntrinsic(node, baseType);
5574 assert(ins != INS_invalid);
5576 bool is16Byte = (node->gtSIMDSize > 8);
5577 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
5578 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
5580 getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
5582 genProduceReg(node);
5585 //------------------------------------------------------------------------
5586 // genHWIntrinsicSimdBinaryRMWOp:
5588 // Produce code for a GT_HWIntrinsic node with form SimdBinaryRMWOp.
5590 // Consumes two SIMD operands and produces a SIMD result.
5591 // First operand is both source and destination.
5594 // node - the GT_HWIntrinsic node
5599 void CodeGen::genHWIntrinsicSimdBinaryRMWOp(GenTreeHWIntrinsic* node)
5601 GenTree* op1 = node->gtGetOp1();
5602 GenTree* op2 = node->gtGetOp2();
5603 var_types baseType = node->gtSIMDBaseType;
5604 regNumber targetReg = node->gtRegNum;
5606 assert(targetReg != REG_NA);
5608 genConsumeOperands(node);
5610 regNumber op1Reg = op1->gtRegNum;
5611 regNumber op2Reg = op2->gtRegNum;
5613 assert(genIsValidFloatReg(op1Reg));
5614 assert(genIsValidFloatReg(op2Reg));
5615 assert(genIsValidFloatReg(targetReg));
5617 instruction ins = getOpForHWIntrinsic(node, baseType);
5618 assert(ins != INS_invalid);
5620 bool is16Byte = (node->gtSIMDSize > 8);
5621 emitAttr attr = is16Byte ? EA_16BYTE : EA_8BYTE;
5622 insOpts opt = genGetSimdInsOpt(is16Byte, baseType);
5624 if (targetReg != op1Reg)
5626 getEmitter()->emitIns_R_R(INS_mov, attr, targetReg, op1Reg);
5628 getEmitter()->emitIns_R_R(ins, attr, targetReg, op2Reg, opt);
5630 genProduceReg(node);
5633 #endif // FEATURE_HW_INTRINSICS
5635 /*****************************************************************************
5636 * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
5637 * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
5638 * disassembler thinks the instructions as the same as we do.
5641 // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
5642 // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
5643 //#define ALL_ARM64_EMITTER_UNIT_TESTS
5646 void CodeGen::genArm64EmitterUnitTests()
5653 if (!compiler->opts.altJit)
5655 // No point doing this in a "real" JIT.
5659 // Mark the "fake" instructions in the output.
5660 printf("*************** In genArm64EmitterUnitTests()\n");
5662 emitter* theEmitter = getEmitter();
5664 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5666 // genDefineTempLabel(genCreateTempLabel());
5667 // to create artificial labels to help separate groups of tests.
5670 // Loads/Stores basic general register
5673 genDefineTempLabel(genCreateTempLabel());
5675 // ldr/str Xt, [reg]
5676 theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9);
5677 theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
5678 theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
5679 theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9);
5680 theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
5681 theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
5683 // ldr/str Wt, [reg]
5684 theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9);
5685 theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
5686 theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
5687 theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9);
5688 theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
5689 theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
5691 theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt
5692 theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt
5693 theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt
5694 theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt
5695 theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt
5697 theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1);
5698 theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1);
5699 theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1);
5700 theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1);
5701 theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1);
5702 theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1);
5703 theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1);
5704 theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1);
5705 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1);
5706 theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1);
5707 theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1);
5708 theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1);
5709 theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1);
5712 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1);
5713 theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1);
5714 theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1);
5717 theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1);
5718 theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2);
5719 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4);
5720 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8);
5722 // pre-/post-indexed (unscaled)
5723 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
5724 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
5725 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
5726 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
5728 // ldar/stlr Rt, [reg]
5729 theEmitter->emitIns_R_R(INS_ldar, EA_8BYTE, REG_R9, REG_R8);
5730 theEmitter->emitIns_R_R(INS_ldar, EA_4BYTE, REG_R7, REG_R10);
5731 theEmitter->emitIns_R_R(INS_ldarb, EA_4BYTE, REG_R5, REG_R11);
5732 theEmitter->emitIns_R_R(INS_ldarh, EA_4BYTE, REG_R5, REG_R12);
5734 theEmitter->emitIns_R_R(INS_stlr, EA_8BYTE, REG_R9, REG_R8);
5735 theEmitter->emitIns_R_R(INS_stlr, EA_4BYTE, REG_R7, REG_R13);
5736 theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14);
5737 theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15);
5740 theEmitter->emitIns_R_R(INS_ldaxr, EA_8BYTE, REG_R9, REG_R8);
5741 theEmitter->emitIns_R_R(INS_ldaxr, EA_4BYTE, REG_R7, REG_R10);
5742 theEmitter->emitIns_R_R(INS_ldaxrb, EA_4BYTE, REG_R5, REG_R11);
5743 theEmitter->emitIns_R_R(INS_ldaxrh, EA_4BYTE, REG_R5, REG_R12);
5746 theEmitter->emitIns_R_R(INS_ldxr, EA_8BYTE, REG_R9, REG_R8);
5747 theEmitter->emitIns_R_R(INS_ldxr, EA_4BYTE, REG_R7, REG_R10);
5748 theEmitter->emitIns_R_R(INS_ldxrb, EA_4BYTE, REG_R5, REG_R11);
5749 theEmitter->emitIns_R_R(INS_ldxrh, EA_4BYTE, REG_R5, REG_R12);
5751 // stxr Ws, Rt, [reg]
5752 theEmitter->emitIns_R_R_R(INS_stxr, EA_8BYTE, REG_R1, REG_R9, REG_R8);
5753 theEmitter->emitIns_R_R_R(INS_stxr, EA_4BYTE, REG_R3, REG_R7, REG_R13);
5754 theEmitter->emitIns_R_R_R(INS_stxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14);
5755 theEmitter->emitIns_R_R_R(INS_stxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15);
5757 // stlxr Ws, Rt, [reg]
5758 theEmitter->emitIns_R_R_R(INS_stlxr, EA_8BYTE, REG_R1, REG_R9, REG_R8);
5759 theEmitter->emitIns_R_R_R(INS_stlxr, EA_4BYTE, REG_R3, REG_R7, REG_R13);
5760 theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14);
5761 theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15);
5763 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5765 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5770 genDefineTempLabel(genCreateTempLabel());
5773 theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9);
5774 theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9);
5777 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0);
5778 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095);
5779 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12);
5780 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12);
5782 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0);
5783 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095);
5784 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12);
5785 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12);
5787 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1);
5788 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff);
5789 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
5790 theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
5792 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1);
5793 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff);
5794 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
5795 theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
5797 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5799 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5803 genDefineTempLabel(genCreateTempLabel());
5805 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12);
5806 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13);
5807 theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14);
5808 theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15);
5809 theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0);
5810 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1);
5812 theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2);
5813 theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3);
5814 theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4);
5815 theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5);
5816 theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6);
5818 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5820 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5825 genDefineTempLabel(genCreateTempLabel());
5827 // mov reg, imm(i16,hw)
5828 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234);
5829 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000);
5830 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000);
5831 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000);
5832 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234);
5833 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF);
5834 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF);
5835 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF);
5837 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234);
5838 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000);
5839 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234);
5840 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF);
5842 // mov reg, imm(N,r,s)
5843 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
5844 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666);
5845 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000);
5846 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555);
5847 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003);
5848 theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707);
5850 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0);
5851 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666);
5852 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000);
5853 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555);
5854 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003);
5855 theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707);
5857 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003);
5858 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
5859 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666);
5860 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707);
5861 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000);
5862 theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555);
5864 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003);
5865 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0);
5866 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666);
5867 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707);
5868 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000);
5869 theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555);
5871 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5873 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5878 genDefineTempLabel(genCreateTempLabel());
5881 theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10);
5884 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10);
5885 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP);
5886 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9);
5888 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11);
5889 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12);
5890 theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13);
5892 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10);
5893 theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11);
5894 theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12);
5895 theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13);
5897 theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10);
5898 theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11);
5899 theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12);
5900 theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt
5901 theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt
5903 theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10);
5904 theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11);
5905 theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13);
5906 theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14);
5908 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5910 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5915 genDefineTempLabel(genCreateTempLabel());
5917 // mov reg, imm(i16,hw)
5918 theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL);
5919 theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
5921 theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
5922 theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL);
5923 theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL);
5925 theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
5926 theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL);
5927 theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL);
5929 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
5931 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
5936 genDefineTempLabel(genCreateTempLabel());
5938 theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1);
5939 theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18);
5940 theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37);
5941 theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2);
5942 theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53);
5943 theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18);
5945 theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555);
5946 theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666);
5947 theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707);
5948 theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000);
5949 theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003);
5951 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1);
5952 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31);
5953 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32);
5954 theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63);
5956 theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1);
5957 theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31);
5959 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
5960 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1);
5961 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1);
5962 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff);
5963 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff);
5964 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000);
5965 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
5966 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
5967 theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
5969 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
5970 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1);
5971 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1);
5972 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff);
5973 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff);
5974 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000);
5975 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
5976 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
5977 theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
5979 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
5980 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1);
5981 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1);
5982 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff);
5983 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff);
5984 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000);
5985 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
5986 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
5987 theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
5989 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
5990 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1);
5991 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1);
5992 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff);
5993 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff);
5994 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000);
5995 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
5996 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
5997 theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
5999 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6000 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1);
6001 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1);
6002 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6003 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6004 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6005 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6006 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6007 theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6009 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6010 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1);
6011 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1);
6012 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6013 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6014 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6015 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6016 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6017 theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6019 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
6020 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1);
6021 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1);
6022 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff);
6023 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff);
6024 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000);
6025 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
6026 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6027 theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6029 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
6030 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1);
6031 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1);
6032 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff);
6033 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff);
6034 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000);
6035 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
6036 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
6037 theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
6039 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6041 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6047 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0);
6048 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0);
6050 // CMP (shifted register)
6051 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
6052 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
6053 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
6055 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
6056 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
6057 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
6059 // TST (shifted register)
6060 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
6061 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
6062 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
6063 theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR);
6065 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
6066 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
6067 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
6068 theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR);
6070 // CMP (extended register)
6071 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
6072 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH);
6073 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis
6074 // disassembles this "cmp x8,x9",
6075 // which looks like an msdis issue.
6076 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX);
6078 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
6079 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH);
6080 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW);
6081 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX);
6083 // CMP 64-bit (extended register) and left shift
6084 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB);
6085 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
6086 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW);
6087 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX);
6089 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB);
6090 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
6091 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW);
6092 theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX);
6094 // CMP 32-bit (extended register) and left shift
6095 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
6096 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
6097 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW);
6099 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
6100 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
6101 theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW);
6103 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6105 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6110 genDefineTempLabel(genCreateTempLabel());
6112 theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6113 theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6114 theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6115 theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6116 theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6117 theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6118 theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6119 theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6120 theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6121 theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6122 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6123 theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6124 theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6125 theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6126 theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6127 theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6128 theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6129 theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6130 theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6131 theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6132 theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6133 theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
6135 theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6136 theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6137 theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6138 theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6139 theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6140 theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6141 theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6142 theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6143 theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6144 theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6145 theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6146 theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6147 theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6148 theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6149 theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6150 theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6151 theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6152 theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6153 theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6154 theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6155 theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6156 theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
6158 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6160 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6165 genDefineTempLabel(genCreateTempLabel());
6167 theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39);
6168 theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23);
6169 theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7);
6171 theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37);
6172 theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21);
6173 theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5);
6175 theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24);
6176 theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16);
6177 theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8);
6179 theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19);
6180 theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13);
6181 theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7);
6183 theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17);
6184 theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11);
6185 theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5);
6187 theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14);
6188 theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9);
6189 theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8);
6191 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6193 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6198 genDefineTempLabel(genCreateTempLabel());
6200 // ADD (extended register)
6201 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
6202 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
6203 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
6204 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
6205 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
6206 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
6207 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
6208 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
6210 // ADD (extended register) and left shift
6211 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
6212 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
6213 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
6214 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
6215 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
6216 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
6217 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
6218 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
6220 // ADD (shifted register)
6221 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6222 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL);
6223 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR);
6224 theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR);
6226 // EXTR (extract field from register pair)
6227 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1);
6228 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31);
6229 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32);
6230 theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63);
6232 theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1);
6233 theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31);
6235 // SUB (extended register)
6236 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
6237 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
6238 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
6239 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
6240 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
6241 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
6242 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
6243 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
6245 // SUB (extended register) and left shift
6246 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
6247 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
6248 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
6249 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
6250 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
6251 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
6252 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
6253 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
6255 // SUB (shifted register)
6256 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6257 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL);
6258 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR);
6259 theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR);
6262 theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6263 theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6264 theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6265 theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6266 theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6267 theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6268 theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6269 theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6271 theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
6272 theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
6273 theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
6274 theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
6275 theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
6276 theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
6277 theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
6278 theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
6280 theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6281 theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6282 theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6283 theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6284 theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6285 theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6286 theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6287 theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6289 theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
6290 theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
6291 theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
6292 theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
6293 theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
6294 theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
6295 theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
6296 theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
6298 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6300 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6302 // R_R_R_I -- load/store pair
6305 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6306 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6307 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
6308 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
6310 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6311 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6312 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
6313 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
6315 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6316 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
6317 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
6318 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
6319 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6320 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6321 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6322 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6324 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6325 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
6326 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
6327 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
6328 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6329 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6330 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6331 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6333 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
6334 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16);
6335 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
6336 theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
6339 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
6340 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
6341 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
6342 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
6343 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX);
6344 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX);
6346 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6348 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6350 // R_R_R_Ext -- load/store shifted/extend
6353 genDefineTempLabel(genCreateTempLabel());
6356 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6357 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6358 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
6359 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6360 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
6361 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6362 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
6363 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6364 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
6365 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6366 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
6368 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6369 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6370 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6371 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6372 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6373 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6374 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6375 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6376 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6377 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6378 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6380 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
6381 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6382 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6383 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6384 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6385 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6386 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6387 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6388 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6389 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6390 theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6392 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
6393 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6394 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6395 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6396 theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6398 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6399 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6400 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6401 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6402 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6403 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6404 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6405 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6406 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6407 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6408 theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6410 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6411 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6412 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6413 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6414 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6415 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6416 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6417 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6418 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6419 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6420 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6421 theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6423 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6424 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6425 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6426 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6427 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6428 theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6431 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9);
6432 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6433 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
6434 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6435 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
6436 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6437 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
6438 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6439 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
6440 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6441 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
6443 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9);
6444 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6445 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
6446 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6447 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
6448 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6449 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
6450 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6451 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
6452 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6453 theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6455 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
6456 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
6457 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
6458 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6459 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
6460 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6461 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
6462 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6463 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
6464 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6465 theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6467 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
6468 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
6469 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
6470 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
6471 theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
6473 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6475 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6480 genDefineTempLabel(genCreateTempLabel());
6482 theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10);
6483 theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11);
6484 theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12);
6485 theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13);
6486 theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14);
6487 theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15);
6489 theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19);
6490 theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20);
6491 theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21);
6492 theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22);
6493 theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23);
6494 theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24);
6496 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6498 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6503 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq
6504 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne
6505 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs
6506 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo
6507 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi
6508 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl
6509 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs
6510 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc
6511 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi
6512 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls
6513 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge
6514 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt
6515 theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt
6516 theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le
6519 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq
6520 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne
6521 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs
6522 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo
6523 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi
6524 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl
6525 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs
6526 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc
6527 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi
6528 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls
6529 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge
6530 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt
6531 theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt
6532 theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le
6534 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6536 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6540 // cinc reg, reg, cond
6541 // cinv reg, reg, cond
6542 // cneg reg, reg, cond
6543 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq
6544 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne
6545 theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs
6546 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo
6547 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi
6548 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl
6549 theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs
6550 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc
6551 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi
6552 theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls
6553 theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge
6554 theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt
6555 theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt
6556 theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le
6558 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6560 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6564 // csel reg, reg, reg, cond
6565 // csinc reg, reg, reg, cond
6566 // csinv reg, reg, reg, cond
6567 // csneg reg, reg, reg, cond
6568 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ); // eq
6569 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne
6570 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs
6571 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo
6572 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI); // mi
6573 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl
6574 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs
6575 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc
6576 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI); // hi
6577 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls
6578 theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge
6579 theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt
6580 theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt
6581 theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le
6583 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6585 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6589 // ccmp reg1, reg2, nzcv, cond
6590 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
6591 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
6592 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
6593 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
6594 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
6595 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
6596 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
6597 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
6598 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
6599 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
6600 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
6601 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
6602 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
6603 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
6605 // ccmp reg1, imm, nzcv, cond
6606 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
6607 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
6608 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
6609 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
6610 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
6611 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
6612 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
6613 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
6614 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
6615 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
6616 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
6617 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
6618 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
6619 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
6621 // ccmp reg1, imm, nzcv, cond -- encoded as ccmn
6622 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ); // eq
6623 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE); // ne
6624 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS); // hs
6625 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO); // lo
6626 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI); // mi
6627 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL); // pl
6628 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS); // vs
6629 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC); // vc
6630 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI); // hi
6631 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS); // ls
6632 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge
6633 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT); // lt
6634 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT); // gt
6635 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE); // le
6637 // ccmn reg1, reg2, nzcv, cond
6638 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
6639 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
6640 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
6641 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
6642 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
6643 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
6644 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
6645 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
6646 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
6647 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
6648 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
6649 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
6650 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
6651 theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
6653 // ccmn reg1, imm, nzcv, cond
6654 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
6655 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
6656 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
6657 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
6658 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
6659 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
6660 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
6661 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
6662 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
6663 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
6664 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
6665 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
6666 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
6667 theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
6669 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6671 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6673 // Branch to register
6676 genDefineTempLabel(genCreateTempLabel());
6678 theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
6679 theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
6680 theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
6681 theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);
6683 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6685 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6690 genDefineTempLabel(genCreateTempLabel());
6692 theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0);
6693 theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535);
6695 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD);
6696 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST);
6697 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH);
6699 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD);
6700 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST);
6701 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH);
6703 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD);
6704 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST);
6705 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH);
6707 theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD);
6708 theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST);
6709 theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY);
6711 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6713 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6714 ////////////////////////////////////////////////////////////////////////////////
6716 // SIMD and Floating point
6718 ////////////////////////////////////////////////////////////////////////////////
6721 // Load/Stores vector register
6724 genDefineTempLabel(genCreateTempLabel());
6726 // ldr/str Vt, [reg]
6727 theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9);
6728 theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8);
6729 theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7);
6730 theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6);
6731 theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5);
6732 theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4);
6733 theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3);
6734 theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2);
6735 theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1);
6736 theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0);
6738 // ldr/str Vt, [reg+cns] -- scaled
6739 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1);
6740 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2);
6741 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4);
6742 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8);
6743 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16);
6745 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1);
6746 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2);
6747 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4);
6748 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8);
6749 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16);
6751 // ldr/str Vt, [reg],cns -- post-indexed (unscaled)
6752 // ldr/str Vt, [reg+cns]! -- post-indexed (unscaled)
6753 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6754 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6755 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6756 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6757 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6759 theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6760 theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6761 theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6762 theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6763 theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6765 theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6766 theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6767 theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6768 theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6769 theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
6771 theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6772 theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6773 theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6774 theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6775 theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
6777 theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2);
6778 theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3);
6779 theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5);
6780 theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9);
6781 theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17);
6783 theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2);
6784 theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3);
6785 theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5);
6786 theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9);
6787 theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17);
6790 theEmitter->emitIns_R_R_R(INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
6791 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0);
6792 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8);
6793 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24);
6795 theEmitter->emitIns_R_R_R(INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP);
6796 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0);
6797 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4);
6798 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12);
6800 theEmitter->emitIns_R_R_R(INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10);
6801 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0);
6802 theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16);
6803 theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48);
6805 theEmitter->emitIns_R_R_R(INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
6806 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0);
6807 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8);
6808 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16);
6809 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX);
6810 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX);
6811 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX);
6812 theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX);
6814 theEmitter->emitIns_R_R_R(INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10);
6815 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0);
6816 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4);
6817 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8);
6818 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX);
6819 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX);
6820 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX);
6821 theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX);
6823 theEmitter->emitIns_R_R_R(INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10);
6824 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0);
6825 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16);
6826 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32);
6827 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX);
6828 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX);
6829 theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX);
6830 theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX);
6833 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9);
6834 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
6835 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3);
6836 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
6837 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3);
6838 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
6839 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3);
6840 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
6841 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3);
6842 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
6843 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
6845 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9);
6846 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
6847 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2);
6848 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
6849 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2);
6850 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
6851 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2);
6852 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
6853 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2);
6854 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
6855 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
6857 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9);
6858 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
6859 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4);
6860 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
6861 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4);
6862 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
6863 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4);
6864 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
6865 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4);
6866 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
6867 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4);
6869 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9);
6870 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
6871 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1);
6872 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
6873 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1);
6874 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
6875 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1);
6876 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
6877 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1);
6878 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
6879 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
6881 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9);
6882 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW);
6883 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW);
6884 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX);
6885 theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX);
6887 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6889 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6891 // R_R mov and aliases for mov
6894 // mov vector to vector
6895 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1);
6896 theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3);
6898 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13);
6899 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15);
6900 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17);
6902 // mov vector to general
6903 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4);
6904 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5);
6905 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6);
6906 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7);
6908 // mov general to vector
6909 theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4);
6910 theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5);
6911 theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6);
6912 theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7);
6914 // mov vector[index] to vector
6915 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1);
6916 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3);
6917 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7);
6918 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15);
6920 // mov to general from vector[index]
6921 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1);
6922 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2);
6923 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3);
6924 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4);
6926 // mov to vector[index] from general
6927 theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1);
6928 theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2);
6929 theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6);
6930 theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8);
6932 // mov vector[index] to vector[index2]
6933 theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0);
6934 theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1);
6935 theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2);
6936 theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3);
6938 //////////////////////////////////////////////////////////////////////////////////
6941 theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1);
6942 theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3);
6943 theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7);
6944 theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15);
6946 // mov/ins vector element
6947 theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1);
6948 theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2);
6949 theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3);
6950 theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4);
6952 // umov to general from vector element
6953 theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1);
6954 theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2);
6955 theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4);
6956 theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8);
6958 // ins to vector element from general
6959 theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1);
6960 theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3);
6961 theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7);
6962 theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15);
6964 // smov to general from vector element
6965 theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2);
6966 theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4);
6967 theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8);
6969 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
6971 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
6973 // R_I movi and mvni
6976 // movi imm8 (vector)
6977 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B);
6978 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B);
6979 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B);
6980 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B);
6982 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H);
6983 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL 8
6984 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H);
6985 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL 8
6987 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S);
6988 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S); // LSL 8
6989 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S); // LSL 16
6990 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24
6992 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S); // MSL 8
6993 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16
6995 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S);
6996 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S); // LSL 8
6997 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S); // LSL 16
6998 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24
7000 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S); // MSL 8
7001 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16
7003 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H); // mvni
7004 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni
7006 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S); // mvni
7007 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL 8
7008 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S); // mvni LSL 16
7009 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24
7011 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S); // mvni MSL 8
7012 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16
7014 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D);
7015 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D);
7016 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
7017 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);
7019 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7020 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7021 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7022 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7024 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7025 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7026 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7027 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7029 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S); // MSL 8
7030 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16
7032 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7033 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7034 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7035 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7037 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8
7038 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16
7040 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7042 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7044 // R_I orr/bic vector immediate
7047 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7048 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7049 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7050 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7052 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7053 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7054 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7055 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7057 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7058 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7059 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7060 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7062 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
7063 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
7064 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
7065 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
7067 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
7068 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
7069 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
7070 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
7072 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
7073 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
7074 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
7075 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
7077 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7079 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7081 // R_F cmp/fmov immediate
7084 // fmov imm8 (scalar)
7085 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0);
7086 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0);
7087 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0
7088 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0);
7089 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0);
7090 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value
7091 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31);
7092 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25);
7093 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25);
7094 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value
7095 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125);
7097 // fmov imm8 (vector)
7098 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S);
7099 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S);
7100 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S);
7101 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D);
7102 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S);
7103 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S);
7104 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D);
7105 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S);
7106 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S);
7107 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D);
7108 theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S);
7109 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S);
7110 theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D);
7113 theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0);
7114 theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0);
7115 theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0);
7116 theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0);
7118 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7120 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7122 // R_R fmov/fcmp/fcvt
7125 // fmov to vector to vector
7126 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2);
7127 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3);
7129 // fmov to vector to general
7130 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4);
7131 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5);
7132 // using the optional conversion specifier
7133 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE);
7134 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE);
7136 // fmov to general to vector
7137 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4);
7138 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5);
7139 // using the optional conversion specifier
7140 theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D);
7141 theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S);
7144 theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16);
7145 theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17);
7146 theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18);
7147 theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19);
7150 theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double
7151 theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single
7153 theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S);
7154 theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D);
7156 theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H);
7157 theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H);
7159 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7161 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7163 // R_R floating point conversions
7167 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1);
7168 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3);
7170 // fcvtas scalar to general
7171 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7172 theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7173 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7174 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7177 theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7178 theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7179 theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7182 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1);
7183 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3);
7185 // fcvtau scalar to general
7186 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7187 theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7188 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7189 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7192 theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7193 theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7194 theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7196 ////////////////////////////////////////////////////////////////////////////////
7199 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1);
7200 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3);
7202 // fcvtms scalar to general
7203 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7204 theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7205 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7206 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7209 theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7210 theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7211 theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7214 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1);
7215 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3);
7217 // fcvtmu scalar to general
7218 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7219 theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7220 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7221 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7224 theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7225 theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7226 theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7228 ////////////////////////////////////////////////////////////////////////////////
7231 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1);
7232 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3);
7234 // fcvtns scalar to general
7235 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7236 theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7237 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7238 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7241 theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7242 theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7243 theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7246 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1);
7247 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3);
7249 // fcvtnu scalar to general
7250 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7251 theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7252 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7253 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7256 theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7257 theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7258 theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7260 ////////////////////////////////////////////////////////////////////////////////
7263 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1);
7264 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3);
7266 // fcvtps scalar to general
7267 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7268 theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7269 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7270 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7273 theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7274 theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7275 theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7278 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1);
7279 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3);
7281 // fcvtpu scalar to general
7282 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7283 theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7284 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7285 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7288 theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7289 theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7290 theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7292 ////////////////////////////////////////////////////////////////////////////////
7295 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1);
7296 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3);
7298 // fcvtzs scalar to general
7299 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7300 theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7301 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7302 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7305 theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7306 theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7307 theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7310 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1);
7311 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3);
7313 // fcvtzu scalar to general
7314 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
7315 theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
7316 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
7317 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
7320 theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7321 theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7322 theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7324 ////////////////////////////////////////////////////////////////////////////////
7327 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1);
7328 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3);
7330 // scvtf scalar from general
7331 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
7332 theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
7333 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
7334 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
7337 theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7338 theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7339 theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7342 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1);
7343 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3);
7345 // ucvtf scalar from general
7346 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
7347 theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
7348 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
7349 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
7352 theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7353 theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7354 theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
7356 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7358 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7360 // R_R floating point operations, one dest, one source
7364 theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1);
7365 theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3);
7368 theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7369 theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7370 theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7373 theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1);
7374 theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3);
7377 theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7378 theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7379 theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7382 theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1);
7383 theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3);
7386 theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7387 theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7388 theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7390 genDefineTempLabel(genCreateTempLabel());
7393 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3);
7396 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7397 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7398 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7399 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7400 theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7401 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7402 theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
7405 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3);
7408 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7409 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7410 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7411 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7412 theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7413 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7414 theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
7417 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5);
7418 theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B);
7419 theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9);
7420 theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B);
7423 theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B);
7424 theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B);
7426 // not vector (the same encoding as mvn)
7427 theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13);
7428 theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B);
7429 theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17);
7430 theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B);
7433 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7434 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7435 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7436 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7437 theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7438 theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7441 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7442 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7443 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7444 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7445 theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7446 theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7449 theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7450 theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7453 theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7454 theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7457 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7458 theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7459 theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7460 theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7463 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7464 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7465 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7466 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7467 theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7468 theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7471 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7472 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7473 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7474 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7475 theEmitter->emitIns_R_R(INS_addv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7476 theEmitter->emitIns_R_R(INS_addv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7479 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7480 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7481 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7482 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7483 theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7484 theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7487 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7488 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7489 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7490 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7491 theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7492 theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7495 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7496 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7497 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7498 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7499 theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7500 theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7503 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7504 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7505 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7506 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7507 theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7508 theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7511 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7512 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7513 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7514 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7515 theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7516 theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7519 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
7520 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
7521 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
7522 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
7523 theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
7524 theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
7527 theEmitter->emitIns_R_R(INS_faddp, EA_4BYTE, REG_V0, REG_V1);
7528 theEmitter->emitIns_R_R(INS_faddp, EA_8BYTE, REG_V2, REG_V3);
7531 theEmitter->emitIns_R_R(INS_fcvtl, EA_4BYTE, REG_V0, REG_V1);
7534 theEmitter->emitIns_R_R(INS_fcvtl2, EA_4BYTE, REG_V0, REG_V1);
7537 theEmitter->emitIns_R_R(INS_fcvtn, EA_8BYTE, REG_V0, REG_V1);
7540 theEmitter->emitIns_R_R(INS_fcvtn2, EA_8BYTE, REG_V0, REG_V1);
7543 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7545 // R_R floating point round to int, one dest, one source
7549 theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1);
7550 theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3);
7553 theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7554 theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7555 theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7558 theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1);
7559 theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3);
7562 theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7563 theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7564 theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7567 theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1);
7568 theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3);
7571 theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7572 theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7573 theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7576 theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1);
7577 theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3);
7580 theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7581 theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7582 theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7585 theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1);
7586 theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3);
7589 theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7590 theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7591 theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7594 theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1);
7595 theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3);
7598 theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7599 theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7600 theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7603 theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1);
7604 theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3);
7607 theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
7608 theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
7609 theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
7611 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7613 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7615 // R_R_R floating point operations, one dest, two source
7618 genDefineTempLabel(genCreateTempLabel());
7620 theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7621 theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7622 theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7623 theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7624 theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7626 theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7627 theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7628 theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7629 theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7630 theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7632 theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7633 theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7634 theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7635 theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7636 theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7638 theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7639 theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7640 theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7641 theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7642 theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7644 theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7645 theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7646 theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7647 theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7648 theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7651 theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7652 theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7653 theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7654 theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7655 theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7657 genDefineTempLabel(genCreateTempLabel());
7659 theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7660 theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7661 theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7662 theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7663 theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7665 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
7666 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
7667 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
7668 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
7669 theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
7671 theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7672 theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7673 theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
7674 theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
7675 theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
7677 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
7678 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
7679 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
7680 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
7681 theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
7683 theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
7684 theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
7686 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7688 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7690 // R_R_I vector operations, one dest, one source reg, one immed
7693 genDefineTempLabel(genCreateTempLabel());
7696 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1);
7697 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14);
7698 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27);
7699 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40);
7700 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63);
7703 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7704 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7705 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7706 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7707 theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7708 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7709 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7710 theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7713 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1);
7714 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14);
7715 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27);
7716 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40);
7717 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63);
7720 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7721 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7722 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7723 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7724 theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7725 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7726 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7727 theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7730 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1);
7731 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14);
7732 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27);
7733 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40);
7734 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63);
7737 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7738 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7739 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7740 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7741 theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7742 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7743 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7744 theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7747 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1);
7748 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14);
7749 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27);
7750 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40);
7751 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63);
7754 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7755 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7756 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7757 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7758 theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7759 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7760 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7761 theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7764 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1);
7765 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14);
7766 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27);
7767 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40);
7768 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63);
7771 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7772 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7773 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7774 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7775 theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7776 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7777 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7778 theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7781 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1);
7782 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14);
7783 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27);
7784 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40);
7785 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63);
7788 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7789 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7790 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7791 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7792 theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7793 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7794 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7795 theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7798 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1);
7799 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14);
7800 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27);
7801 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40);
7802 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63);
7805 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7806 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7807 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7808 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7809 theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7810 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7811 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7812 theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7815 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1);
7816 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14);
7817 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27);
7818 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40);
7819 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63);
7822 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7823 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7824 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7825 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7826 theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7827 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7828 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7829 theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7832 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1);
7833 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14);
7834 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27);
7835 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40);
7836 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63);
7839 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7840 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7841 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7842 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7843 theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7844 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7845 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7846 theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7849 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1);
7850 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14);
7851 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27);
7852 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40);
7853 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63);
7856 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7857 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7858 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7859 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7860 theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7861 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7862 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7863 theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7866 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1);
7867 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14);
7868 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27);
7869 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40);
7870 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63);
7873 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7874 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7875 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7876 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7877 theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7878 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7879 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
7880 theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
7883 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7884 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7885 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7886 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7887 theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7888 theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7891 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7892 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7893 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7894 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7895 theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7896 theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7899 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7900 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7901 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7902 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7903 theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7904 theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7907 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
7908 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
7909 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
7910 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
7911 theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
7912 theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
7915 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7916 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7917 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
7918 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
7919 theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7920 theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7923 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
7924 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
7925 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
7926 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
7927 theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
7928 theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
7930 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
7932 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
7934 // R_R_R vector operations, one dest, two source
7937 genDefineTempLabel(genCreateTempLabel());
7939 // Specifying an Arrangement is optional
7941 theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8);
7942 theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11);
7943 theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14);
7944 theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17);
7945 theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20);
7946 theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23);
7947 theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26);
7948 theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29);
7949 theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0);
7950 theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3);
7952 theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6);
7953 theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9);
7954 theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12);
7955 theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15);
7956 theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18);
7957 theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21);
7959 // Default Arrangement as per the ARM64 manual
7961 theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B);
7962 theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B);
7963 theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B);
7964 theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B);
7965 theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
7966 theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
7967 theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B);
7968 theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B);
7969 theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B);
7970 theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B);
7972 theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
7973 theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B);
7974 theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B);
7975 theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
7976 theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B);
7977 theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B);
7979 genDefineTempLabel(genCreateTempLabel());
7981 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE
7982 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B);
7983 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
7984 theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S);
7985 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B);
7986 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H);
7987 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S);
7988 theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D);
7990 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE
7991 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
7992 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H);
7993 theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S);
7994 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
7995 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H);
7996 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S);
7997 theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D);
7999 genDefineTempLabel(genCreateTempLabel());
8002 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8003 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8004 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8005 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8006 theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8007 theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8010 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8011 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8012 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8013 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8014 theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8015 theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8018 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8019 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8020 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8021 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8022 theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8023 theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8026 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8027 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8028 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8029 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8030 theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8031 theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8032 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8034 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8036 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8037 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8038 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8039 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8040 theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8041 theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8044 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8045 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8046 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8047 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8048 theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8049 theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8052 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8053 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8054 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8055 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8056 theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8057 theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8060 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8061 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8062 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8063 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8064 theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8065 theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8068 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8069 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8070 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8071 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8072 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8073 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8074 theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8075 theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8078 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8079 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8080 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8081 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8082 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8083 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8084 theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8085 theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8088 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8089 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8090 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8091 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8092 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8093 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8094 theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8095 theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8098 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8099 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8100 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8101 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8102 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8103 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8104 theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8105 theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8108 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8109 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8110 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8111 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8112 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8113 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8114 theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8115 theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8118 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8119 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
8120 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
8121 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
8122 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8123 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8124 theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D);
8125 theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8128 theEmitter->emitIns_R_R_R(INS_faddp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8129 theEmitter->emitIns_R_R_R(INS_faddp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8130 theEmitter->emitIns_R_R_R(INS_faddp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8133 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8134 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8135 theEmitter->emitIns_R_R_R(INS_fcmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8138 theEmitter->emitIns_R_R_R(INS_fcmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8139 theEmitter->emitIns_R_R_R(INS_fcmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8140 theEmitter->emitIns_R_R_R(INS_fcmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8143 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
8144 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8145 theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D);
8146 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8148 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8150 // R_R_R vector multiply
8153 genDefineTempLabel(genCreateTempLabel());
8155 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
8156 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
8157 theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8158 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
8159 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
8160 theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
8162 theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
8163 theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
8165 // 'mul' vector by elem
8166 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8167 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8168 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8169 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8170 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8171 theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8172 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8173 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8174 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8175 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8176 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8177 theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8179 // 'mla' vector by elem
8180 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8181 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8182 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8183 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8184 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8185 theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8186 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8187 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8188 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8189 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8190 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8191 theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8193 // 'mls' vector by elem
8194 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
8195 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
8196 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
8197 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
8198 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
8199 theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
8200 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
8201 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
8202 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
8203 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
8204 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
8205 theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
8207 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8209 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8211 // R_R_R floating point operations, one source/dest, and two source
8214 genDefineTempLabel(genCreateTempLabel());
8216 theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8217 theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
8218 theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
8220 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
8221 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
8222 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
8223 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
8224 theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
8226 theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
8227 theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
8228 theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
8230 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
8231 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
8232 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
8233 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
8234 theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
8236 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8238 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8240 // R_R_R_R floating point operations, one dest, and three source
8243 theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24);
8244 theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25);
8245 theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26);
8246 theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27);
8248 theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28);
8249 theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29);
8250 theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30);
8251 theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31);
8255 #ifdef ALL_ARM64_EMITTER_UNIT_TESTS
8257 BasicBlock* label = genCreateTempLabel();
8258 genDefineTempLabel(label);
8263 theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0);
8265 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
8267 printf("*************** End of genArm64EmitterUnitTests()\n");
8269 #endif // defined(DEBUG)
8271 #endif // _TARGET_ARM64_
8273 #endif // !LEGACY_BACKEND