"Image Gather4 D16 hardware bug"
>;
+def FeatureMADIntraFwdBug : SubtargetFeature<"mad-intra-fwd-bug",
+ "HasMADIntraFwdBug",
+ "true",
+ "MAD_U64/I64 intra instruction forwarding bug"
+>;
+
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
FeatureImageInsts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
- FeatureBackOffBarrier]>;
+ FeatureBackOffBarrier,
+ FeatureMADIntraFwdBug]>;
def FeatureISAVersion11_0_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">,
AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>;
+def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;
+
+def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
SDLoc SL(N);
bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
unsigned Opc;
- if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
+ if (Subtarget->hasMADIntraFwdBug())
Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
: AMDGPU::V_MAD_U64_U32_gfx11_e64;
else
SDLoc SL(N);
bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
unsigned Opc;
- if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
+ if (Subtarget->hasMADIntraFwdBug())
Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
: AMDGPU::V_MAD_U64_U32_gfx11_e64;
else
bool HasImageStoreD16Bug = false;
bool HasImageGather4D16Bug = false;
bool HasGFX11FullVGPRs = false;
+ bool HasMADIntraFwdBug = false;
bool HasVOPDInsts = false;
// Dummy feature to use for assembler in tablegen.
bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
+ bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
+
bool hasNSAEncoding() const { return HasNSAEncoding; }
unsigned getNSAMaxSize() const { return NSAMaxSize; }
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
} // End SubtargetPredicate = isGFX7Plus
-let isCommutable = 1 in {
-let SchedRW = [WriteIntMul, WriteSALU] in {
-let SubtargetPredicate = isGFX7GFX8GFX9GFX10 in {
-defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
-defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
-}
-let SubtargetPredicate = isGFX11Only, Constraints = "@earlyclobber $vdst" in {
-defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32_gfx11", VOP3b_I64_I1_I32_I32_I64>;
-defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32_gfx11", VOP3b_I64_I1_I32_I32_I64>;
-} // End SubtargetPredicate = isGFX11Only, Constraints = "@earlyclobber $vdst"
-} // End SchedRW = [WriteIntMul, WriteSALU]
-} // End isCommutable = 1
+let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
+ let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in {
+ defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
+ defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
+ }
+ let SubtargetPredicate = isGFX11Only, OtherPredicates = [HasMADIntraFwdBug],
+ Constraints = "@earlyclobber $vdst" in {
+ defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
+ defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
+ }
+} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU]
let FPDPRounding = 1 in {
>;
}
-let SubtargetPredicate = isGFX9GFX10 in // exclude pre-GFX9 where it was slow
-defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
-let SubtargetPredicate = isGFX11Only in
-defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
+// exclude pre-GFX9 where it was slow
+let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in
+ defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
+let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in
+ defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,