const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride,
- bool StreamingSVEModeDisabled)
+ bool StreamingSVEMode,
+ bool StreamingCompatibleSVEMode)
: AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
- StreamingSVEModeDisabled(StreamingSVEModeDisabled),
+ StreamingSVEMode(StreamingSVEMode),
+ StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
if (!hasNEON())
return false;
- return !ForceStreamingCompatibleSVE;
+ // The 'force-streaming-comaptible-sve' flag overrides the streaming
+ // function attributes.
+ if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)
+ return !ForceStreamingCompatibleSVE;
+
+ return !isStreaming() && !isStreamingCompatible();
}
bool IsLittle;
- bool StreamingSVEModeDisabled;
+ bool StreamingSVEMode;
+ bool StreamingCompatibleSVEMode;
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 2;
StringRef FS, const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0,
- bool StreamingSVEModeDisabled = true);
+ bool StreamingSVEMode = false,
+ bool StreamingCompatibleSVEMode = false);
// Getters for SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
bool isXRaySupported() const override { return true; }
+ /// Returns true if the function has the streaming attribute.
+ bool isStreaming() const { return StreamingSVEMode; }
+
+ /// Returns true if the function has the streaming-compatible attribute.
+ bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }
+
/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
/// mode, which disables NEON instructions).
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0.
- if (!isStreamingSVEModeDisabled())
+ if (StreamingSVEMode || StreamingCompatibleSVEMode)
return 0;
return MinVectorRegisterBitWidth;
}
return "__security_check_cookie_arm64ec";
return "__security_check_cookie";
}
-
- bool isStreamingSVEModeDisabled() const { return StreamingSVEModeDisabled; }
};
} // End llvm namespace
StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
- bool StreamingSVEModeDisabled =
- !F.hasFnAttribute("aarch64_pstate_sm_enabled") &&
- !F.hasFnAttribute("aarch64_pstate_sm_compatible") &&
- !F.hasFnAttribute("aarch64_pstate_sm_body");
+ bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
+ F.hasFnAttribute("aarch64_pstate_sm_body");
+ bool StreamingCompatibleSVEMode =
+ F.hasFnAttribute("aarch64_pstate_sm_compatible");
unsigned MinSVEVectorSize = 0;
unsigned MaxSVEVectorSize = 0;
SmallString<512> Key;
raw_svector_ostream(Key) << "SVEMin" << MinSVEVectorSize << "SVEMax"
- << MaxSVEVectorSize << "StreamingSVEModeDisabled="
- << StreamingSVEModeDisabled << CPU << TuneCPU << FS;
+ << MaxSVEVectorSize
+ << "StreamingSVEMode=" << StreamingSVEMode
+ << "StreamingCompatibleSVEMode="
+ << StreamingCompatibleSVEMode << CPU << TuneCPU
+ << FS;
auto &I = SubtargetMap[Key];
if (!I) {
resetTargetOptions(F);
I = std::make_unique<AArch64Subtarget>(
TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
- MaxSVEVectorSize, StreamingSVEModeDisabled);
+ MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode);
}
+
+ assert((!StreamingSVEMode || I->hasSME()) &&
+ "Expected SME to be available");
+ assert((!StreamingCompatibleSVEMode || I->hasSVEorSME()) &&
+ "Expected SVE or SME to be available");
+
return I.get();
}
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(64);
case TargetTransformInfo::RGK_FixedWidthVector:
- if (!ST->isStreamingSVEModeDisabled() &&
- !EnableFixedwidthAutovecInStreamingMode)
+ if (!ST->isNeonAvailable() && !EnableFixedwidthAutovecInStreamingMode)
return TypeSize::getFixed(0);
if (ST->hasSVE())
return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
case TargetTransformInfo::RGK_ScalableVector:
- if (!ST->isStreamingSVEModeDisabled() && !EnableScalableAutovecInStreamingMode)
+ if ((ST->isStreaming() || ST->isStreamingCompatible()) &&
+ !EnableScalableAutovecInStreamingMode)
return TypeSize::getScalable(0);
return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind #0 {
; CHECK-LABEL: streaming_compatible_with_neon_vectors:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #112
-; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: bl normal_callee_vec_arg
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
-; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d
-; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #112
+; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call <2 x double> @normal_callee_vec_arg(<2 x double> %arg)
%fadd = fadd <2 x double> %res, %arg
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=loop-vectorize -force-streaming-compatible-sve -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=SC_SVE
+; RUN: opt < %s -passes=loop-vectorize -force-streaming-compatible-sve -enable-fixedwidth-autovec-in-streaming-mode -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=SC_SVE
; RUN: opt < %s -passes=loop-vectorize -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=NO_SC_SVE
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"