switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set L1 cache policy to MISS_EVICT.
+ // Note: there is no L2 cache bypass policy at the ISA level.
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
- /// The L1 cache is write through so does not need to be bypassed. There is no
- /// bypass control for the L2 cache at the isa level.
+ /// Do not set GLC for RMW atomic operations as L0/L1 cache is automatically
+ /// bypassed, and the GLC bit is instead used to indicate if they are
+ /// return or no-return.
+ /// Note: there is no L2 cache coherent bypass control at the ISA level.
return Changed;
}
bool Changed = false;
if (IsVolatile) {
+ // Set L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache bypass policy at the ISA level.
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
}
if (IsNonTemporal) {
- // Request L1 MISS_EVICT and L2 STREAM for load and store instructions.
+ // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
+ // for both loads and stores, and the L2 cache policy to STREAM.
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set the L1 cache policy to MISS_LRU.
+ // Note: there is no L2 cache bypass policy at the ISA level.
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
bool Changed = false;
if (IsVolatile) {
+ // Set L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache bypass policy at the ISA level.
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
}
if (IsNonTemporal) {
- // Request L1 MISS_EVICT and L2 STREAM for load and store instructions.
+ // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
+ // for both loads and stores, and the L2 cache policy to STREAM.
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
- /// TODO Do not set glc for rmw atomic operations as they
- /// implicitly bypass the L0/L1 caches.
-
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set the L0 and L1 cache policies to MISS_EVICT.
+ // Note: there is no L2 cache coherent bypass control at the ISA level.
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
break;
bool Changed = false;
if (IsVolatile) {
+ // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache coherent bypass control at the ISA level.
if (Op == SIMemOp::LOAD) {
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
}
if (IsNonTemporal) {
- // Request L0/L1 HIT_EVICT and L2 STREAM for load and store instructions.
+ // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
+ // and L2 cache policy to STREAM.
+ // For stores setting both GLC and SLC configures L0 and L1 cache policy
+ // to MISS_EVICT and the L2 cache policy to STREAM.
+ if (Op == SIMemOp::STORE)
+ Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
+
return Changed;
}
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 slc
+; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 glc slc
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: flat_nontemporal_store_0:
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 slc
+; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 glc slc
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_0:
; GFX10-WGP-NEXT: flat_load_dword v2, v[1:2]
; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 slc
+; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 glc slc
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: flat_nontemporal_store_1:
; GFX10-CU-NEXT: flat_load_dword v2, v[1:2]
; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 slc
+; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 glc slc
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_1:
; GFX10-WGP-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] slc
+; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] glc slc
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: global_nontemporal_store_0:
; GFX10-CU-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] slc
+; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] glc slc
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: global_nontemporal_store_0:
; GFX10-WGP-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] slc
+; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] glc slc
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: global_nontemporal_store_1:
; GFX10-CU-NEXT: s_load_dword s0, s[0:1], 0x0
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] slc
+; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] glc slc
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: global_nontemporal_store_1:
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-WGP-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen slc
+; GFX10-WGP-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen glc slc
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: private_nontemporal_store_0:
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-CU-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen slc
+; GFX10-CU-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen glc slc
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: private_nontemporal_store_0:
; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s2
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-WGP-NEXT: buffer_store_dword v1, v0, s[8:11], 0 offen slc
+; GFX10-WGP-NEXT: buffer_store_dword v1, v0, s[8:11], 0 offen glc slc
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: private_nontemporal_store_1:
; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s2
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-CU-NEXT: buffer_store_dword v1, v0, s[8:11], 0 offen slc
+; GFX10-CU-NEXT: buffer_store_dword v1, v0, s[8:11], 0 offen glc slc
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: private_nontemporal_store_1: