Take advantage of D117117 to simplify all {{\[}} to [ and {{\]}} to ].
Differential Revision: https://reviews.llvm.org/D117298
; CHECK-DAG: ldr x20, [x20]
; CHECK-DAG: mov [[CSREG:x[1-9].*]], x8
; CHECK: bl {{_?}}thisreturn_attribute
-; CHECK: str x0, {{\[}}[[CSREG]]
+; CHECK: str x0, [[[CSREG]]
; CHECK: ret
define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret(i8*), i8** noalias nocapture readonly swiftself) {
entry:
; AS-DAG: add [[ADD:x[0-9]+]], [[LSL]], #64
; AS-DAG: and [[AND:x[0-9]+]], [[ADD]], #0xfffffffffffffff0
; AS-DAG: add [[ADR:x[0-9]+]], x0, [[AND]]
-; AS-DAG: ld4 { v[[V0:[0-9]+]].4s, v[[V1:[0-9]+]].4s, v[[V2:[0-9]+]].4s, v[[V3:[0-9]+]].4s }, {{\[}}[[ADR]]{{\]}}
+; AS-DAG: ld4 { v[[V0:[0-9]+]].4s, v[[V1:[0-9]+]].4s, v[[V2:[0-9]+]].4s, v[[V3:[0-9]+]].4s }, [[[ADR]]]
; AS-DAG: str q[[V0]]
; AS-DAG: str q[[V1]]
; AS-DAG: str q[[V2]]
; AS-DAG: add [[ADD:x[0-9]+]], x0, #4
; AS-DAG: and [[AND:x[0-9]+]], [[LSL]], #0xfffffffffffffff0
; AS-DAG: add [[ADR:x[0-9]+]], [[ADD]], [[AND]]
-; AS-DAG: ld4 { v[[V0:[0-9]+]].4s, v[[V1:[0-9]+]].4s, v[[V2:[0-9]+]].4s, v[[V3:[0-9]+]].4s }, {{\[}}[[ADR]]{{\]}}
+; AS-DAG: ld4 { v[[V0:[0-9]+]].4s, v[[V1:[0-9]+]].4s, v[[V2:[0-9]+]].4s, v[[V3:[0-9]+]].4s }, [[[ADR]]]
; AS-DAG: str q[[V0]]
; AS-DAG: str q[[V1]]
; AS-DAG: str q[[V2]]
; FAST: ldr x7, [{{x[0-9]+}}]
; FAST: mov x[[R0:[0-9]+]], sp
; FAST: mov w[[R1:[0-9]+]], #8
-; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
+; FAST: str w[[R1]], [x[[R0]]]
%0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16
%call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
i32 6, i32 7, i64 %0, i32 8) #5
; CHECK: test
; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp, #32]
; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp]
-; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE:x[0-9]+]], #32]
-; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE]]]
+; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [[[BASE:x[0-9]+]], #32]
+; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [[[BASE]]]
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
; RUN: llc -mtriple=arm64-apple-darwin19 -code-model=large -O2 -o - %s | FileCheck %s
; CHECK: adrp [[REG1:x[0-9]+]], _bar@GOTPAGE
-; CHECK: ldr [[REG1]], {{\[}}[[REG1]], _bar@GOTPAGEOFF]
+; CHECK: ldr [[REG1]], [[[REG1]], _bar@GOTPAGEOFF]
; CHECK: blr [[REG1]]
declare void @bar()
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr w0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldrsw x0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF]
; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], [x[[LDRGOT_REG]]]
; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
; CHECK-NEXT: str [[ADD]], [x[[LDRGOT_REG]]]
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _C@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: str w0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr w0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ldr w0, [[[ADDGOT_REG]], #16]
; CHECK-NEXT: ret
; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
define i32 @getInternalCPlus4() {
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldrsw x0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ldrsw x0, [[[ADDGOT_REG]], #16]
; CHECK-NEXT: ret
; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
define i64 @getSExtInternalCPlus4() {
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
-; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], [[[ADDGOT_REG]], #16]
; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
-; CHECK-NEXT: str [[ADD]], {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: str [[ADD]], [[[ADDGOT_REG]], #16]
; CHECK-NEXT: ret
; CHECK: .loh AdrpAdd [[ADRP_LABEL]], [[ADDGOT_LABEL]]
define void @getSeveralInternalCPlus4(i32 %t) {
; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: str w0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: str w0, [[[ADDGOT_REG]], #16]
; CHECK-NEXT: ret
; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
define void @setInternalCPlus4(i32 %t) {
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr w0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ldr w0, [[[ADRP_REG]], _InternalC@PAGEOFF]
; CHECK-NEXT: ret
; CHECK: .loh AdrpLdr [[ADRP_LABEL]], [[LDR_LABEL]]
define i32 @getInternalC() {
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldrsw x0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ldrsw x0, [[[ADRP_REG]], _InternalC@PAGEOFF]
; CHECK-NEXT: ret
; CHECK: .loh AdrpLdr [[ADRP_LABEL]], [[LDR_LABEL]]
define i64 @getSExtInternalC() {
; there is not much we can do about it.
; CHECK-LABEL: _getSeveralInternalC
; CHECK: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
-; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], [[[ADRP_REG]], _InternalC@PAGEOFF]
; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
-; CHECK-NEXT: str [[ADD]], {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: str [[ADD]], [[[ADRP_REG]], _InternalC@PAGEOFF]
; CHECK-NEXT: ret
define void @getSeveralInternalC(i32 %t) {
entry:
; Indeed, strs do not support litterals.
; CHECK-LABEL: _setInternalC
; CHECK: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
-; CHECK-NEXT: str w0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: str w0, [[[ADRP_REG]], _InternalC@PAGEOFF]
; CHECK-NEXT: ret
define void @setInternalC(i32 %t) {
entry:
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _D@GOTPAGEOFF]
; CHECK-NEXT: ldrb w0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _D@GOTPAGEOFF]
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: strb w0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _D@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldrsb w0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _D@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldrsb x0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _E@GOTPAGEOFF]
; CHECK-NEXT: ldrh w0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _E@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldrsh w0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _E@GOTPAGEOFF]
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: strh w0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _E@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldrsh x0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _F@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr x0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _F@GOTPAGEOFF]
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: str x0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _G@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr s0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _G@GOTPAGEOFF]
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: str s0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _H@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr h0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _H@GOTPAGEOFF]
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: str h0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _I@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr d0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _I@GOTPAGEOFF]
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: str d0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _J@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr d0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _J@GOTPAGEOFF]
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: str d0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _K@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr q0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _K@GOTPAGEOFF]
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: str q0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _L@GOTPAGEOFF]
; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr b0, [x[[LDRGOT_REG]]]
; CHECK-NEXT: ret
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
; CHECK-NEXT: ; kill
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
-; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF]
+; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _L@GOTPAGEOFF]
; Ultimately we should generate str b0, but right now, we match the vector
; variant which does not allow to fold the immediate into the store.
; CHECK-NEXT: st1.b { v0 }[0], [x[[LDRGOT_REG]]]
; CHECK: [[LOH_LABEL0:Lloh[0-9]+]]:
; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE
; CHECK: [[LOH_LABEL1:Lloh[0-9]+]]:
-; CHECK: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF]
+; CHECK: ldr q[[IDX:[0-9]+]], [[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF]
; The tuple comes from the next instruction.
; CHECK: ext.16b v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, #1
; CHECK: ret
; CHECK-LABEL: @test
; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3
-; CHECK: ldp [[CPLX1_I:s[0-9]+]], [[CPLX1_R:s[0-9]+]], {{\[}}[[BASE]]]
-; CHECK: ldp [[CPLX2_I:s[0-9]+]], [[CPLX2_R:s[0-9]+]], {{\[}}[[BASE]], #64]
+; CHECK: ldp [[CPLX1_I:s[0-9]+]], [[CPLX1_R:s[0-9]+]], [[[BASE]]]
+; CHECK: ldp [[CPLX2_I:s[0-9]+]], [[CPLX2_R:s[0-9]+]], [[[BASE]], #64]
; CHECK: fadd {{s[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]]
; CHECK: fadd {{s[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]]
; CHECK: ret
; CHECK-LABEL: @test_int
; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3
-; CHECK: ldp [[CPLX1_I:w[0-9]+]], [[CPLX1_R:w[0-9]+]], {{\[}}[[BASE]]]
-; CHECK: ldp [[CPLX2_I:w[0-9]+]], [[CPLX2_R:w[0-9]+]], {{\[}}[[BASE]], #64]
+; CHECK: ldp [[CPLX1_I:w[0-9]+]], [[CPLX1_R:w[0-9]+]], [[[BASE]]]
+; CHECK: ldp [[CPLX2_I:w[0-9]+]], [[CPLX2_R:w[0-9]+]], [[[BASE]], #64]
; CHECK: add {{w[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]]
; CHECK: add {{w[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]]
; CHECK: ret
; CHECK-LABEL: @test_long
; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #4
-; CHECK: ldp [[CPLX1_I:x[0-9]+]], [[CPLX1_R:x[0-9]+]], {{\[}}[[BASE]]]
-; CHECK: ldp [[CPLX2_I:x[0-9]+]], [[CPLX2_R:x[0-9]+]], {{\[}}[[BASE]], #128]
+; CHECK: ldp [[CPLX1_I:x[0-9]+]], [[CPLX1_R:x[0-9]+]], [[[BASE]]]
+; CHECK: ldp [[CPLX2_I:x[0-9]+]], [[CPLX2_R:x[0-9]+]], [[[BASE]], #128]
; CHECK: add {{x[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]]
; CHECK: add {{x[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]]
; CHECK: ret
; CHECK: bl _call0
; LARGE-LABEL: foo0
; LARGE: adrp [[REG0:x[0-9]+]], _call0@GOTPAGE
-; LARGE: ldr [[REG1:x[0-9]+]], {{\[}}[[REG0]], _call0@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG1:x[0-9]+]], [[[REG0]], _call0@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG1]]
call void @call0()
ret void
entry:
; CHECK: @Initrand
; CHECK: adrp [[REG:x[0-9]+]], _seed@GOTPAGE
-; CHECK: ldr [[REG2:x[0-9]+]], {{\[}}[[REG]], _seed@GOTPAGEOFF{{\]}}
-; CHECK: str {{x[0-9]+}}, {{\[}}[[REG2]]{{\]}}
+; CHECK: ldr [[REG2:x[0-9]+]], [[[REG]], _seed@GOTPAGEOFF]
+; CHECK: str {{x[0-9]+}}, [[[REG2]]]
store i64 74755, i64* @seed, align 8
ret void
}
entry:
; CHECK: @Rand
; CHECK: adrp [[REG1:x[0-9]+]], _seed@GOTPAGE
-; CHECK: ldr [[REG2:x[0-9]+]], {{\[}}[[REG1]], _seed@GOTPAGEOFF{{\]}}
-; CHECK: ldr [[REG5:x[0-9]+]], {{\[}}[[REG2]]{{\]}}
+; CHECK: ldr [[REG2:x[0-9]+]], [[[REG1]], _seed@GOTPAGEOFF]
+; CHECK: ldr [[REG5:x[0-9]+]], [[[REG2]]]
; CHECK: mov [[REG4:x[0-9]+]], #1309
; CHECK: mul [[REG6:x[0-9]+]], [[REG5]], [[REG4]]
; CHECK: mov [[REG3:x[0-9]+]], #13849
; CHECK: add [[REG7:x[0-9]+]], [[REG6]], [[REG3]]
; CHECK: and [[REG8:x[0-9]+]], [[REG7]], #0xffff
; CHECK: adrp [[REG1:x[0-9]+]], _seed@GOTPAGE
-; CHECK: ldr [[REG1]], {{\[}}[[REG1]], _seed@GOTPAGEOFF{{\]}}
-; CHECK: str [[REG8]], {{\[}}[[REG1]]{{\]}}
+; CHECK: ldr [[REG1]], [[[REG1]], _seed@GOTPAGEOFF]
+; CHECK: str [[REG8]], [[[REG1]]]
; CHECK: adrp [[REG1:x[0-9]+]], _seed@GOTPAGE
-; CHECK: ldr [[REG1]], {{\[}}[[REG1]], _seed@GOTPAGEOFF{{\]}}
-; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]]{{\]}}
+; CHECK: ldr [[REG1]], [[[REG1]], _seed@GOTPAGEOFF]
+; CHECK: ldr {{x[0-9]+}}, [[[REG1]]]
%0 = load i64, i64* @seed, align 8
%mul = mul nsw i64 %0, 1309
%add = add nsw i64 %mul, 13849
; ARM64: ldr [[REG0:x[0-9]+]], [x8, _temp@GOTPAGEOFF]
; ARM64: adrp [[REG1:x[0-9]+]], _message@PAGE
; ARM64: add [[REG2:x[0-9]+]], [[REG1]], _message@PAGEOFF
-; ARM64: ldr x10, {{\[}}[[REG2]]{{\]}}
-; ARM64: str x10, {{\[}}[[REG0]]{{\]}}
-; ARM64: ldr x10, {{\[}}[[REG2]], #8]
-; ARM64: str x10, {{\[}}[[REG0]], #8]
-; ARM64: ldrb [[REG3:w[0-9]+]], {{\[}}[[REG2]], #16]
-; ARM64: strb [[REG3]], {{\[}}[[REG0]], #16]
+; ARM64: ldr x10, [[[REG2]]]
+; ARM64: str x10, [[[REG0]]]
+; ARM64: ldr x10, [[[REG2]], #8]
+; ARM64: str x10, [[[REG0]], #8]
+; ARM64: ldrb [[REG3:w[0-9]+]], [[[REG2]], #16]
+; ARM64: strb [[REG3]], [[[REG0]], #16]
; ARM64: ret
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false)
ret void
; ARM64: ldr [[REG0:x[0-9]+]], [x8, _temp@GOTPAGEOFF]
; ARM64: adrp [[REG3:x[0-9]+]], _message@PAGE
; ARM64: add [[REG1:x[0-9]+]], [[REG3]], _message@PAGEOFF
-; ARM64: ldr x10, {{\[}}[[REG1]]]
-; ARM64: str x10, {{\[}}[[REG0]]]
-; ARM64: ldr x10, {{\[}}[[REG1]], #8]
-; ARM64: str x10, {{\[}}[[REG0]], #8]
-; ARM64: ldrb [[REG4:w[0-9]+]], {{\[}}[[REG1]], #16]
-; ARM64: strb [[REG4]], {{\[}}[[REG0]], #16]
+; ARM64: ldr x10, [[[REG1]]]
+; ARM64: str x10, [[[REG0]]]
+; ARM64: ldr x10, [[[REG1]], #8]
+; ARM64: str x10, [[[REG0]], #8]
+; ARM64: ldrb [[REG4:w[0-9]+]], [[[REG1]], #16]
+; ARM64: strb [[REG4]], [[[REG0]], #16]
; ARM64: ret
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false)
ret void
; ARM64: ldr [[REG0:x[0-9]+]], [x8, _temp@GOTPAGEOFF]
; ARM64: adrp [[REG1:x[0-9]+]], _message@PAGE
; ARM64: add [[REG2:x[0-9]+]], [[REG1]], _message@PAGEOFF
-; ARM64: ldr w10, {{\[}}[[REG2]]]
-; ARM64: str w10, {{\[}}[[REG0]]]
-; ARM64: ldr w10, {{\[}}[[REG2]], #4]
-; ARM64: str w10, {{\[}}[[REG0]], #4]
-; ARM64: ldrb [[REG3:w[0-9]+]], {{\[}}[[REG2]], #8]
-; ARM64: strb [[REG3]], {{\[}}[[REG0]], #8]
+; ARM64: ldr w10, [[[REG2]]]
+; ARM64: str w10, [[[REG0]]]
+; ARM64: ldr w10, [[[REG2]], #4]
+; ARM64: str w10, [[[REG0]], #4]
+; ARM64: ldrb [[REG3:w[0-9]+]], [[[REG2]], #8]
+; ARM64: strb [[REG3]], [[[REG0]], #8]
; ARM64: ret
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i1 false)
ret void
; ARM64: ldr [[REG0:x[0-9]+]], [x8, _temp@GOTPAGEOFF]
; ARM64: adrp [[REG1:x[0-9]+]], _message@PAGE
; ARM64: add [[REG2:x[0-9]+]], [[REG1]], _message@PAGEOFF
-; ARM64: ldrh w10, {{\[}}[[REG2]]]
-; ARM64: strh w10, {{\[}}[[REG0]]]
-; ARM64: ldrh w10, {{\[}}[[REG2]], #2]
-; ARM64: strh w10, {{\[}}[[REG0]], #2]
-; ARM64: ldrh w10, {{\[}}[[REG2]], #4]
-; ARM64: strh w10, {{\[}}[[REG0]], #4]
-; ARM64: ldrb [[REG3:w[0-9]+]], {{\[}}[[REG2]], #6]
-; ARM64: strb [[REG3]], {{\[}}[[REG0]], #6]
+; ARM64: ldrh w10, [[[REG2]]]
+; ARM64: strh w10, [[[REG0]]]
+; ARM64: ldrh w10, [[[REG2]], #2]
+; ARM64: strh w10, [[[REG0]], #2]
+; ARM64: ldrh w10, [[[REG2]], #4]
+; ARM64: strh w10, [[[REG0]], #4]
+; ARM64: ldrb [[REG3:w[0-9]+]], [[[REG2]], #6]
+; ARM64: strb [[REG3]], [[[REG0]], #6]
; ARM64: ret
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i1 false)
ret void
; ARM64: ldr [[REG0:x[0-9]+]], [x8, _temp@GOTPAGEOFF]
; ARM64: adrp [[REG1:x[0-9]+]], _message@PAGE
; ARM64: add [[REG2:x[0-9]+]], [[REG1:x[0-9]+]], _message@PAGEOFF
-; ARM64: ldrb w10, {{\[}}[[REG2]]]
-; ARM64: strb w10, {{\[}}[[REG0]]]
-; ARM64: ldrb w10, {{\[}}[[REG2]], #1]
-; ARM64: strb w10, {{\[}}[[REG0]], #1]
-; ARM64: ldrb w10, {{\[}}[[REG2]], #2]
-; ARM64: strb w10, {{\[}}[[REG0]], #2]
-; ARM64: ldrb [[REG3:w[0-9]+]], {{\[}}[[REG2]], #3]
-; ARM64: strb [[REG3]], {{\[}}[[REG0]], #3]
+; ARM64: ldrb w10, [[[REG2]]]
+; ARM64: strb w10, [[[REG0]]]
+; ARM64: ldrb w10, [[[REG2]], #1]
+; ARM64: strb w10, [[[REG0]], #1]
+; ARM64: ldrb w10, [[[REG2]], #2]
+; ARM64: strb w10, [[[REG0]], #2]
+; ARM64: ldrb [[REG3:w[0-9]+]], [[[REG2]], #3]
+; ARM64: strb [[REG3]], [[[REG0]], #3]
; ARM64: ret
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i1 false)
ret void
define float @cp_float() {
; CHECK-LABEL: cp_float
; CHECK: adrp [[REG:x[0-9]+]], {{lCPI[0-9]+_0}}@PAGE
-; CHECK-NEXT: ldr s0, {{\[}}[[REG]], {{lCPI[0-9]+_0}}@PAGEOFF{{\]}}
+; CHECK-NEXT: ldr s0, [[[REG]], {{lCPI[0-9]+_0}}@PAGEOFF]
ret float 0x400921FB60000000
}
define double @cp_double() {
; CHECK-LABEL: cp_double
; CHECK: adrp [[REG:x[0-9]+]], {{lCPI[0-9]+_0}}@PAGE
-; CHECK-NEXT: ldr d0, {{\[}}[[REG]], {{lCPI[0-9]+_0}}@PAGEOFF{{\]}}
+; CHECK-NEXT: ldr d0, [[[REG]], {{lCPI[0-9]+_0}}@PAGEOFF]
ret double 0x400921FB54442D18
}
; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], __PromotedConst@PAGE
; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], __PromotedConst@PAGEOFF
; Destination registers are defined by the ABI
-; PROMOTED-NEXT: ldp q0, q1, {{\[}}[[BASEADDR]]]
-; PROMOTED-NEXT: ldp q2, q3, {{\[}}[[BASEADDR]], #32]
+; PROMOTED-NEXT: ldp q0, q1, [[[BASEADDR]]]
+; PROMOTED-NEXT: ldp q2, q3, [[[BASEADDR]], #32]
; PROMOTED-NEXT: ret
; REGULAR-LABEL: test1:
; the structure
; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
; Destination registers are defined by the ABI
-; REGULAR: ldr q0, {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
+; REGULAR: ldr q0, [[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR: ldr q1, {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
+; REGULAR: ldr q1, [[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
; REGULAR: adrp [[PAGEADDR2:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
-; REGULAR: ldr q2, {{\[}}[[PAGEADDR2]], [[CSTLABEL2]]@PAGEOFF]
+; REGULAR: ldr q2, [[[PAGEADDR2]], [[CSTLABEL2]]@PAGEOFF]
; REGULAR: adrp [[PAGEADDR3:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
-; REGULAR: ldr q3, {{\[}}[[PAGEADDR3]], [[CSTLABEL3]]@PAGEOFF]
+; REGULAR: ldr q3, [[[PAGEADDR3]], [[CSTLABEL3]]@PAGEOFF]
; REGULAR-NEXT: ret
entry:
ret %struct.uint8x16x4_t { [4 x <16 x i8>] [<16 x i8> <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>, <16 x i8> <i8 32, i8 124, i8 121, i8 120, i8 8, i8 117, i8 -56, i8 113, i8 -76, i8 110, i8 -53, i8 107, i8 7, i8 105, i8 103, i8 102>, <16 x i8> <i8 -24, i8 99, i8 -121, i8 97, i8 66, i8 95, i8 24, i8 93, i8 6, i8 91, i8 12, i8 89, i8 39, i8 87, i8 86, i8 85>, <16 x i8> <i8 -104, i8 83, i8 -20, i8 81, i8 81, i8 80, i8 -59, i8 78, i8 73, i8 77, i8 -37, i8 75, i8 122, i8 74, i8 37, i8 73>] }
; PROMOTED-LABEL: test2:
; In stress mode, constant vector are promoted
; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1:__PromotedConst.[0-9]+]]@PAGE
-; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
+; PROMOTED: ldr q[[REGNUM:[0-9]+]], [[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
; Destination register is defined by ABI
; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
; PROMOTED-NEXT: mla.16b v0, v0, v[[REGNUM]]
; The difference is that the address (and thus the space in memory) is not
; shared between constants
; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
+; REGULAR: ldr q[[REGNUM:[0-9]+]], [[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
; Destination register is defined by ABI
; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
; REGULAR-NEXT: mla.16b v0, v0, v[[REGNUM]]
; CHECK: .p2align 2
; CHECK:_foo: ; @foo
; CHECK: adrp [[BASE:x[0-9]+]], lCPI0_0@PAGE
-; CHECK: ldr q[[REG:[0-9]+]], {{\[}}[[BASE]], lCPI0_0@PAGEOFF]
+; CHECK: ldr q[[REG:[0-9]+]], [[[BASE]], lCPI0_0@PAGEOFF]
; CHECK: tbl.16b v0, { v0 }, v[[REG]]
; CHECK: ret
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+; CHECK: str [[DEST]], [[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset
%tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
%tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
; CHECK-LABEL: fct2_64x2:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
+; CHECK: str [[DEST]], [[[BASE]], #80]
%arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3
%tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
%tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+; CHECK: str [[DEST]], [[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset
%tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
%tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
; CHECK-LABEL: fct2_32x4:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
+; CHECK: str [[DEST]], [[[BASE]], #80]
%arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3
%tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
%tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+; CHECK: str [[DEST]], [[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset
%tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
%tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
; CHECK-LABEL: fct2_16x8:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
+; CHECK: str [[DEST]], [[[BASE]], #80]
%arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3
%tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
%tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+; CHECK: str [[DEST]], [[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset
%tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
%tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
; CHECK-LABEL: fct2_8x16:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
+; CHECK: str [[DEST]], [[[BASE]], #80]
%arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3
%tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
%tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+; CHECK: str [[DEST]], [[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset
%tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
%tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
; CHECK-LABEL: fct2_64x1:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
+; CHECK: str [[DEST]], [[[BASE]], #40]
%arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3
%tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
%tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+; CHECK: str [[DEST]], [[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset
%tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
%tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
; CHECK-LABEL: fct2_32x2:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
+; CHECK: str [[DEST]], [[[BASE]], #40]
%arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3
%tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
%tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+; CHECK: str [[DEST]], [[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset
%tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
%tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
; CHECK-LABEL: fct2_16x4:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
+; CHECK: str [[DEST]], [[[BASE]], #40]
%arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3
%tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
%tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+; CHECK: str [[DEST]], [[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset
%tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8
%tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8
define void @fct8(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct8:
-; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+; CHECK: ldur [[DESTREG:d[0-9]+]], [[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], [[[BASEREG]], #4]
%p = getelementptr inbounds i8, i8* %str, i64 3
%q = bitcast i8* %p to <1 x i64>*
%0 = load <1 x i64>, <1 x i64>* %q, align 8
define void @fct9(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct9:
-; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+; CHECK: ldur [[DESTREG:d[0-9]+]], [[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], [[[BASEREG]], #4]
%p = getelementptr inbounds i8, i8* %str, i64 3
%q = bitcast i8* %p to <2 x i32>*
%0 = load <2 x i32>, <2 x i32>* %q, align 8
define void @fct10(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct10:
-; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+; CHECK: ldur [[DESTREG:d[0-9]+]], [[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], [[[BASEREG]], #4]
%p = getelementptr inbounds i8, i8* %str, i64 3
%q = bitcast i8* %p to <4 x i16>*
%0 = load <4 x i16>, <4 x i16>* %q, align 8
define void @fct11(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct11:
-; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+; CHECK: ldur [[DESTREG:d[0-9]+]], [[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], [[[BASEREG]], #4]
%p = getelementptr inbounds i8, i8* %str, i64 3
%q = bitcast i8* %p to <8 x i8>*
%0 = load <8 x i8>, <8 x i8>* %q, align 8
define void @fct12(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct12:
-; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+; CHECK: ldur [[DESTREG:q[0-9]+]], [[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], [[[BASEREG]], #4]
%p = getelementptr inbounds i8, i8* %str, i64 3
%q = bitcast i8* %p to <2 x i64>*
%0 = load <2 x i64>, <2 x i64>* %q, align 16
define void @fct13(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct13:
-; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+; CHECK: ldur [[DESTREG:q[0-9]+]], [[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], [[[BASEREG]], #4]
%p = getelementptr inbounds i8, i8* %str, i64 3
%q = bitcast i8* %p to <4 x i32>*
%0 = load <4 x i32>, <4 x i32>* %q, align 16
define void @fct14(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct14:
-; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+; CHECK: ldur [[DESTREG:q[0-9]+]], [[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], [[[BASEREG]], #4]
%p = getelementptr inbounds i8, i8* %str, i64 3
%q = bitcast i8* %p to <8 x i16>*
%0 = load <8 x i16>, <8 x i16>* %q, align 16
define void @fct15(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct15:
-; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+; CHECK: ldur [[DESTREG:q[0-9]+]], [[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], [[[BASEREG]], #4]
%p = getelementptr inbounds i8, i8* %str, i64 3
%q = bitcast i8* %p to <16 x i8>*
%0 = load <16 x i8>, <16 x i8>* %q, align 16
; CHECK: Precompute_Patch_Values
; CHECK: ldr [[VAL2:q[0-9]+]], [x0, #272]
; CHECK-NEXT: ldr [[VAL:x[0-9]+]], [x0, #288]
-; CHECK-NEXT: stur [[VAL2]], {{\[}}sp, #216]
+; CHECK-NEXT: stur [[VAL2]], [sp, #216]
; CHECK-NEXT: str [[VAL]], [sp, #232]
entry:
%Control_Points = alloca [16 x [3 x double]], align 8
; CHECK-LABEL: test_cmpxchg_8:
; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxrb [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldaxrb [[OLD:w[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD]], w1, uxtb
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxrb [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxrb [[STATUS:w[0-9]+]], w2, [[[ADDR]]]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1, uxtb
; CHECK-LABEL: test_cmpxchg_16:
; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxrh [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldaxrh [[OLD:w[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD]], w1, uxth
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxrh [[STATUS:w[3-9]]], w2, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxrh [[STATUS:w[3-9]]], w2, [[[ADDR]]]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
; CHECK-LABEL: test_cmpxchg_32:
; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldaxr [[OLD:w[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD]], w1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxr [[STATUS:w[0-9]+]], w2, [[[ADDR]]]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
; CHECK-LABEL: test_cmpxchg_64:
; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr [[OLD:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldaxr [[OLD:x[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD]], x1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxr [[STATUS:w[0-9]+]], x2, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxr [[STATUS:w[0-9]+]], x2, [[[ADDR]]]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{x[0-9]+}}, [[OLD]], x1
; CHECK-LABEL: test_cmpxchg_128:
; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD_LO]], x2
; CHECK: cset [[CMP_TMP:w[0-9]+]], ne
; CHECK: cmp [[OLD_HI]], x3
; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne
; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, [[[ADDR]]]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
%res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst monotonic
; CHECK: ldp [[DESIRED_LO:x[0-9]+]], [[DESIRED_HI:x[0-9]+]], [x[[VAR128]]]
; CHECK: ldp [[NEW_LO:x[0-9]+]], [[NEW_HI:x[0-9]+]], [x[[VAR128]]]
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD_LO]], [[DESIRED_LO]]
; CHECK: cset [[CMP_TMP:w[0-9]+]], ne
; CHECK: cmp [[OLD_HI]], [[DESIRED_HI]]
; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne
; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], [[[ADDR]]]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK-LABEL: get_var_pointer
; CHECK: adrp [[REG1:x[0-9]+]], __imp_var
-; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]], :lo12:__imp_var]
+; CHECK: ldr {{x[0-9]+}}, [[[REG1]], :lo12:__imp_var]
; CHECK: ret
define i32 @call_external() {
; CHECK-LABEL: atomic_store_release_8_off:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: add [[REG0:x[0-9]+]], x0, #1
-; CHECK-NEXT: stlrb w1, {{\[}}[[REG0]]]
+; CHECK-NEXT: stlrb w1, [[[REG0]]]
; CHECK-NEXT: ret
define void @atomic_store_release_8_off(i8* %p, i8 %val) #0 {
%tmp0 = getelementptr i8, i8* %p, i32 1
; CHECK-LABEL: atomic_store_release_16_off:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: add [[REG0:x[0-9]+]], x0, #2
-; CHECK-NEXT: stlrh w1, {{\[}}[[REG0]]]
+; CHECK-NEXT: stlrh w1, [[[REG0]]]
; CHECK-NEXT: ret
define void @atomic_store_release_16_off(i16* %p, i16 %val) #0 {
%tmp0 = getelementptr i16, i16* %p, i32 1
; CHECK-LABEL: atomic_store_release_32_off:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: add [[REG0:x[0-9]+]], x0, #4
-; CHECK-NEXT: stlr w1, {{\[}}[[REG0]]]
+; CHECK-NEXT: stlr w1, [[[REG0]]]
; CHECK-NEXT: ret
define void @atomic_store_release_32_off(i32* %p, i32 %val) #0 {
%tmp0 = getelementptr i32, i32* %p, i32 1
; CHECK-LABEL: atomic_store_release_64_off:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: add [[REG0:x[0-9]+]], x0, #8
-; CHECK-NEXT: stlr x1, {{\[}}[[REG0]]]
+; CHECK-NEXT: stlr x1, [[[REG0]]]
; CHECK-NEXT: ret
define void @atomic_store_release_64_off(i64* %p, i64 %val) #0 {
%tmp0 = getelementptr i64, i64* %p, i32 1
; CHECK-LABEL: atomic_store_seq_cst_8_off:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: add [[REG0:x[0-9]+]], x0, #1
-; CHECK-NEXT: stlrb w1, {{\[}}[[REG0]]]
+; CHECK-NEXT: stlrb w1, [[[REG0]]]
; CHECK-NEXT: ret
define void @atomic_store_seq_cst_8_off(i8* %p, i8 %val) #0 {
%tmp0 = getelementptr i8, i8* %p, i32 1
; CHECK-LABEL: atomic_store_seq_cst_16_off:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: add [[REG0:x[0-9]+]], x0, #2
-; CHECK-NEXT: stlrh w1, {{\[}}[[REG0]]]
+; CHECK-NEXT: stlrh w1, [[[REG0]]]
; CHECK-NEXT: ret
define void @atomic_store_seq_cst_16_off(i16* %p, i16 %val) #0 {
%tmp0 = getelementptr i16, i16* %p, i32 1
; CHECK-LABEL: atomic_store_seq_cst_32_off:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: add [[REG0:x[0-9]+]], x0, #4
-; CHECK-NEXT: stlr w1, {{\[}}[[REG0]]]
+; CHECK-NEXT: stlr w1, [[[REG0]]]
; CHECK-NEXT: ret
define void @atomic_store_seq_cst_32_off(i32* %p, i32 %val) #0 {
%tmp0 = getelementptr i32, i32* %p, i32 1
; CHECK-LABEL: atomic_store_seq_cst_64_off:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: add [[REG0:x[0-9]+]], x0, #8
-; CHECK-NEXT: stlr x1, {{\[}}[[REG0]]]
+; CHECK-NEXT: stlr x1, [[[REG0]]]
; CHECK-NEXT: ret
define void @atomic_store_seq_cst_64_off(i64* %p, i64 %val) #0 {
%tmp0 = getelementptr i64, i64* %p, i32 1
define void @test(i64 %a, i64 %b, i2* %c) {
; CHECK-LABEL: test
; CHECK: and [[REG1:w[0-9]+]], {{w[0-9]+}}, #0x3
-; CHECK-NEXT: strb [[REG1]], {{\[}}x2{{\]}}
+; CHECK-NEXT: strb [[REG1]], [x2]
; CHECK-NEXT: tbz {{w[0-9]+}}, #0,
%1 = trunc i64 %a to i2
%2 = trunc i64 %b to i1
; CHECK-LABEL: cmpxchg_monotonic_32:
; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: ldaxr w0, [[[ADDR]]]
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp w0, w1
; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: ldr [[NEW:w[0-9]+]], [x2]
; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: ldaxr w0, [[[ADDR]]]
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp w0, w1
; CHECK-LABEL: cmpxchg_seq_cst_64:
; CHECK: mov [[ADDR:x[0-9]+]], x0
; CHECK: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT: ldaxr x0, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: ldaxr x0, [[[ADDR]]]
; CHECK-NEXT: cmp x0, x1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT: stlxr [[STATUS]], x2, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT: stlxr [[STATUS]], x2, [[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp x0, x1
; SMALL: bl _fmodf
; LARGE-LABEL: frem_f32
; LARGE: adrp [[REG:x[0-9]+]], _fmodf@GOTPAGE
-; LARGE: ldr [[REG]], {{\[}}[[REG]], _fmodf@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG]], [[[REG]], _fmodf@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG]]
%1 = frem float %a, %b
ret float %1
; SMALL: bl _fmod
; LARGE-LABEL: frem_f64
; LARGE: adrp [[REG:x[0-9]+]], _fmod@GOTPAGE
-; LARGE: ldr [[REG]], {{\[}}[[REG]], _fmod@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG]], [[[REG]], _fmod@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG]]
%1 = frem double %a, %b
ret double %1
; SMALL: bl _sinf
; LARGE-LABEL: sin_f32
; LARGE: adrp [[REG:x[0-9]+]], _sinf@GOTPAGE
-; LARGE: ldr [[REG]], {{\[}}[[REG]], _sinf@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG]], [[[REG]], _sinf@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG]]
%1 = call float @llvm.sin.f32(float %a)
ret float %1
; SMALL: bl _sin
; LARGE-LABEL: sin_f64
; LARGE: adrp [[REG:x[0-9]+]], _sin@GOTPAGE
-; LARGE: ldr [[REG]], {{\[}}[[REG]], _sin@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG]], [[[REG]], _sin@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG]]
%1 = call double @llvm.sin.f64(double %a)
ret double %1
; SMALL: bl _cosf
; LARGE-LABEL: cos_f32
; LARGE: adrp [[REG:x[0-9]+]], _cosf@GOTPAGE
-; LARGE: ldr [[REG]], {{\[}}[[REG]], _cosf@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG]], [[[REG]], _cosf@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG]]
%1 = call float @llvm.cos.f32(float %a)
ret float %1
; SMALL: bl _cos
; LARGE-LABEL: cos_f64
; LARGE: adrp [[REG:x[0-9]+]], _cos@GOTPAGE
-; LARGE: ldr [[REG]], {{\[}}[[REG]], _cos@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG]], [[[REG]], _cos@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG]]
%1 = call double @llvm.cos.f64(double %a)
ret double %1
; SMALL: bl _powf
; LARGE-LABEL: pow_f32
; LARGE: adrp [[REG:x[0-9]+]], _powf@GOTPAGE
-; LARGE: ldr [[REG]], {{\[}}[[REG]], _powf@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG]], [[[REG]], _powf@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG]]
%1 = call float @llvm.pow.f32(float %a, float %b)
ret float %1
; SMALL: bl _pow
; LARGE-LABEL: pow_f64
; LARGE: adrp [[REG:x[0-9]+]], _pow@GOTPAGE
-; LARGE: ldr [[REG]], {{\[}}[[REG]], _pow@GOTPAGEOFF{{\]}}
+; LARGE: ldr [[REG]], [[[REG]], _pow@GOTPAGEOFF]
; LARGE-NEXT: blr [[REG]]
%1 = call double @llvm.pow.f64(double %a, double %b)
ret double %1
; CHECK-LABEL: ldst_double:
; CHECK: adrp [[RD:x[0-9]+]], var_double
-; CHECK-NEXT: ldr {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
+; CHECK-NEXT: ldr {{d[0-9]+}}, [[[RD]], {{#?}}:lo12:var_double]
; CHECK: adrp [[RQ:x[0-9]+]], var_double2
-; CHECK-NEXT: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
+; CHECK-NEXT: str {{q[0-9]+}}, [[[RQ]], {{#?}}:lo12:var_double2]
}
define dso_local void @ldst_double_tune_a53() #0 {
; CHECK-LABEL: ldst_double_tune_a53:
; CHECK: adrp [[RD:x[0-9]+]], var_double
-; CHECK-NEXT: ldr {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
+; CHECK-NEXT: ldr {{d[0-9]+}}, [[[RD]], {{#?}}:lo12:var_double]
; CHECK-NEXT: adrp [[RQ:x[0-9]+]], var_double2
; CHECK: fcvt
-; CHECK: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
+; CHECK: str {{q[0-9]+}}, [[[RQ]], {{#?}}:lo12:var_double2]
}
attributes #0 = { "tune-cpu"="cortex-a53" }
; CHECK-LABEL: ldst_8bit:
; CHECK: adrp [[RB:x[0-9]+]], var_8bit
-; CHECK-NEXT: ldrb {{w[0-9]+}}, {{\[}}[[RB]], {{#?}}:lo12:var_8bit{{\]}}
+; CHECK-NEXT: ldrb {{w[0-9]+}}, [[[RB]], {{#?}}:lo12:var_8bit]
; CHECK: adrp [[RH:x[0-9]+]], var_16bit
-; CHECK-NEXT: strh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}}
+; CHECK-NEXT: strh {{w[0-9]+}}, [[[RH]], {{#?}}:lo12:var_16bit]
}
define dso_local void @ldst_16bit() {
; CHECK-LABEL: ldst_16bit:
; CHECK: adrp [[RH:x[0-9]+]], var_16bit
-; CHECK-NEXT: ldrh {{w[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_16bit{{\]}}
+; CHECK-NEXT: ldrh {{w[0-9]+}}, [[[RH]], {{#?}}:lo12:var_16bit]
; CHECK: adrp [[RW:x[0-9]+]], var_32bit
-; CHECK-NEXT: str {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}}
+; CHECK-NEXT: str {{w[0-9]+}}, [[[RW]], {{#?}}:lo12:var_32bit]
}
define dso_local void @ldst_32bit() {
; CHECK-LABEL: ldst_32bit:
; CHECK: adrp [[RW:x[0-9]+]], var_32bit
-; CHECK-NEXT: ldr {{w[0-9]+}}, {{\[}}[[RW]], {{#?}}:lo12:var_32bit{{\]}}
+; CHECK-NEXT: ldr {{w[0-9]+}}, [[[RW]], {{#?}}:lo12:var_32bit]
; CHECK: adrp [[RL:x[0-9]+]], var_64bit
-; CHECK-NEXT: str {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}}
+; CHECK-NEXT: str {{x[0-9]+}}, [[[RL]], {{#?}}:lo12:var_64bit]
}
define dso_local void @ldst_64bit() {
; CHECK-LABEL: ldst_64bit:
; CHECK: adrp [[RL:x[0-9]+]], var_64bit
-; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}}
+; CHECK-NEXT: ldr {{x[0-9]+}}, [[[RL]], {{#?}}:lo12:var_64bit]
; CHECK: adrp [[RQ:x[0-9]+]], var_128bit
; CHECK-NEXT: add {{x[0-9]+}}, [[RQ]], {{#?}}:lo12:var_128bit
}
; CHECK-LABEL: ldst_half:
; CHECK: adrp [[RH:x[0-9]+]], var_half
-; CHECK-NEXT: ldr {{h[0-9]+}}, {{\[}}[[RH]], {{#?}}:lo12:var_half{{\]}}
+; CHECK-NEXT: ldr {{h[0-9]+}}, [[[RH]], {{#?}}:lo12:var_half]
; CHECK: adrp [[RF:x[0-9]+]], var_float
-; CHECK-NEXT: str {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}}
+; CHECK-NEXT: str {{s[0-9]+}}, [[[RF]], {{#?}}:lo12:var_float]
}
define dso_local void @ldst_float() {
; CHECK-LABEL: ldst_float:
; CHECK: adrp [[RF:x[0-9]+]], var_float
-; CHECK-NEXT: ldr {{s[0-9]+}}, {{\[}}[[RF]], {{#?}}:lo12:var_float{{\]}}
+; CHECK-NEXT: ldr {{s[0-9]+}}, [[[RF]], {{#?}}:lo12:var_float]
; CHECK: adrp [[RD:x[0-9]+]], var_double
-; CHECK-NEXT: str {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
+; CHECK-NEXT: str {{d[0-9]+}}, [[[RD]], {{#?}}:lo12:var_double]
}
define dso_local void @ldst_double() {
; CHECK-LABEL: ldst_double:
; CHECK: adrp [[RD:x[0-9]+]], var_double
-; CHECK-NEXT: ldr {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
+; CHECK-NEXT: ldr {{d[0-9]+}}, [[[RD]], {{#?}}:lo12:var_double]
; CHECK: adrp [[RQ:x[0-9]+]], var_double2
-; CHECK-NEXT: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
+; CHECK-NEXT: str {{q[0-9]+}}, [[[RQ]], {{#?}}:lo12:var_double2]
}
; frame, covering the locals.
; CHECK-LABEL: fn:
; CHECK: adrp [[REG:x[0-9]+]], __stack_chk_guard
-; CHECK-NEXT: ldr [[REG]], {{\[}}[[REG]], :lo12:__stack_chk_guard]
+; CHECK-NEXT: ldr [[REG]], [[[REG]], :lo12:__stack_chk_guard]
; CHECK-NEXT: stur [[REG]], [x29, #-8]
; CHECK: addvl sp, sp, #-2
; CHECK-DAG: addvl [[ADDR:x[0-9]+]], x29, #-1
; CHECK-DAG: ldr [[VAL:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
-; CHECK-DAG: str [[VAL]], {{\[}}[[ADDR]]]
+; CHECK-DAG: str [[VAL]], [[[ADDR]]]
; CHECK-DAG: addvl x0, x29, #-2
; CHECK: bl ptr_fn
define void @call_ptr_strong() #1 {
; CHECK: addvl sp, sp, #-3
; CHECK-DAG: addvl [[ADDR:x[0-9]+]], x29, #-1
; CHECK-DAG: ldr [[VAL:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
-; CHECK-DAG: str [[VAL]], {{\[}}[[ADDR]]]
+; CHECK-DAG: str [[VAL]], [[[ADDR]]]
; CHECK-DAG: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [x29, #-2, mul vl]
; CHECK: bl val_fn
; CHECK: addvl x0, x29, #-3
; CHECK: addvl sp, sp, #-1
; CHECK-NOT: __stack_chk_guard
; CHECK: addvl [[REG:x[0-9]+]], x29, #-11
-; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, {{\[}}[[REG]], #-8, mul vl]
+; CHECK: st1w { {{z[0-9]+.s}} }, {{p[0-9]+}}, [[[REG]], #-8, mul vl]
define void @callee_save(<vscale x 4 x float> %x) #0 {
entry:
%x.addr = alloca <vscale x 4 x float>, align 16
; CHECK: addvl sp, sp, #-2
; CHECK-DAG: addvl [[ADDR:x[0-9]+]], x29, #-19
; CHECK-DAG: ldr [[VAL:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
-; CHECK-DAG: str [[VAL]], {{\[}}[[ADDR]]]
+; CHECK-DAG: str [[VAL]], [[[ADDR]]]
; CHECK-DAG: addvl [[ADDR2:x[0-9]+]], x29, #-12
-; CHECK-DAG: st1w { z0.s }, p0, {{\[}}[[ADDR2]], #-8, mul vl]
+; CHECK-DAG: st1w { z0.s }, p0, [[[ADDR2]], #-8, mul vl]
define void @callee_save_strong(<vscale x 4 x float> %x) #1 {
entry:
%x.addr = alloca <vscale x 4 x float>, align 16
; Stack guard is placed below the SVE stack area
; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
; CHECK-DAG: addvl [[STACK_GUARD_POS:x[0-9]+]], x29, #-2
-; CHECK-DAG: stur [[STACK_GUARD]], {{\[}}[[STACK_GUARD_POS]], #-8]
+; CHECK-DAG: stur [[STACK_GUARD]], [[[STACK_GUARD_POS]], #-8]
; char_arr is below the stack guard
; CHECK-DAG: sub [[CHAR_ARR_1:x[0-9]+]], x29, #16
; CHECK-DAG: addvl [[CHAR_ARR_2:x[0-9]+]], [[CHAR_ARR_1]], #-2
-; CHECK-DAG: strb wzr, {{\[}}[[CHAR_ARR_2]]]
+; CHECK-DAG: strb wzr, [[[CHAR_ARR_2]]]
; large1 is accessed via a virtual base register
; CHECK-DAG: add [[LARGE1:x[0-9]+]], sp, #8, lsl #12
-; CHECK-DAG: stp x0, x0, {{\[}}[[LARGE1]]]
+; CHECK-DAG: stp x0, x0, [[[LARGE1]]]
; large2 is at the bottom of the stack
; CHECK-DAG: stp x0, x0, [sp]
; Stack guard is placed at the top of the SVE stack area
; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
; CHECK-DAG: addvl [[STACK_GUARD_POS:x[0-9]+]], x29, #-1
-; CHECK-DAG: str [[STACK_GUARD]], {{\[}}[[STACK_GUARD_POS]]]
+; CHECK-DAG: str [[STACK_GUARD]], [[[STACK_GUARD_POS]]]
; char_arr is below the SVE stack area
; CHECK-DAG: addvl [[CHAR_ARR:x[0-9]+]], x29, #-3
-; CHECK-DAG: sturb wzr, {{\[}}[[CHAR_ARR]], #-8]
+; CHECK-DAG: sturb wzr, [[[CHAR_ARR]], #-8]
; large1 is accessed via a virtual base register
; CHECK-DAG: add [[LARGE1:x[0-9]+]], sp, #8, lsl #12
-; CHECK-DAG: stp x0, x0, {{\[}}[[LARGE1]], #8]
+; CHECK-DAG: stp x0, x0, [[[LARGE1]], #8]
; large2 is at the bottom of the stack
; CHECK-DAG: stp x0, x0, [sp, #8]
declare void @_Z7CapturePi(i32*)
; ANDROID-AARCH64: mrs [[A:.*]], TPIDR_EL0
-; ANDROID-AARCH64: ldr [[B:.*]], {{\[}}[[A]], #40]
+; ANDROID-AARCH64: ldr [[B:.*]], [[[A]], #40]
; ANDROID-AARCH64: str [[B]], [sp,
-; ANDROID-AARCH64: ldr [[C:.*]], {{\[}}[[A]], #40]
+; ANDROID-AARCH64: ldr [[C:.*]], [[[A]], #40]
; ANDROID-AARCH64: ldr [[D:.*]], [sp,
; ANDROID-AARCH64: cmp [[C]], [[D]]
; FUCHSIA-AARCH64-USER: mrs [[A:.*]], TPIDR_EL0
; FUCHSIA-AARCH64-KERNEL: mrs [[A:.*]], TPIDR_EL1
-; FUCHSIA-AARCH64-COMMON: ldur [[B:.*]], {{\[}}[[A]], #-16]
+; FUCHSIA-AARCH64-COMMON: ldur [[B:.*]], [[[A]], #-16]
; FUCHSIA-AARCH64-COMMON: str [[B]], [sp,
-; FUCHSIA-AARCH64-COMMON: ldur [[C:.*]], {{\[}}[[A]], #-16]
+; FUCHSIA-AARCH64-COMMON: ldur [[C:.*]], [[[A]], #-16]
; FUCHSIA-AARCH64-COMMON: ldr [[D:.*]], [sp,
; FUCHSIA-AARCH64-COMMON: cmp [[C]], [[D]]
; ALWAYS-DAG: ldg [[PA:x.*]], [x{{.*}}]
; ALWAYS-DAG: ldrb [[B:w.*]], [sp]
-; ALWAYS-DAG: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}}
+; ALWAYS-DAG: ldrb [[A:w.*]], [[[PA]]]
; COMMON: ret
; DARWIN: foo2
; DARWIN: adrp [[R0:x[0-9]+]], ___stack_chk_guard@GOTPAGE
-; DARWIN: ldr [[R1:x[0-9]+]], {{\[}}[[R0]], ___stack_chk_guard@GOTPAGEOFF{{\]}}
-; DARWIN: ldr {{x[0-9]+}}, {{\[}}[[R1]]{{\]}}
+; DARWIN: ldr [[R1:x[0-9]+]], [[[R0]], ___stack_chk_guard@GOTPAGEOFF]
+; DARWIN: ldr {{x[0-9]+}}, [[[R1]]]
; PIC-LINUX: foo2
; PIC-LINUX: adrp [[R0:x[0-9]+]], :got:__stack_chk_guard
-; PIC-LINUX: ldr [[R1:x[0-9]+]], {{\[}}[[R0]], :got_lo12:__stack_chk_guard{{\]}}
-; PIC-LINUX: ldr {{x[0-9]+}}, {{\[}}[[R1]]{{\]}}
+; PIC-LINUX: ldr [[R1:x[0-9]+]], [[[R0]], :got_lo12:__stack_chk_guard]
+; PIC-LINUX: ldr {{x[0-9]+}}, [[[R1]]]
; STATIC-LARGE: foo2
; STATIC-LARGE: movz [[R0:x[0-9]+]], #:abs_g0_nc:__stack_chk_guard
; STATIC-LARGE: movk [[R0]], #:abs_g1_nc:__stack_chk_guard
; STATIC-LARGE: movk [[R0]], #:abs_g2_nc:__stack_chk_guard
; STATIC-LARGE: movk [[R0]], #:abs_g3:__stack_chk_guard
-; STATIC-LARGE: ldr {{x[0-9]+}}, {{\[}}[[R0]]{{\]}}
+; STATIC-LARGE: ldr {{x[0-9]+}}, [[[R0]]]
; STATIC-SMALL: foo2
; STATIC-SMALL: adrp [[R0:x[0-9]+]], __stack_chk_guard
-; STATIC-SMALL: ldr {{x[0-9]+}}, {{\[}}[[R0]], :lo12:__stack_chk_guard{{\]}}
+; STATIC-SMALL: ldr {{x[0-9]+}}, [[[R0]], :lo12:__stack_chk_guard]
; FALLBACK-NOT: remark:{{.*}}llvm.lifetime.end
; FALLBACK-NOT: remark:{{.*}}llvm.lifetime.start
entry:
; CHECK-LABEL: stgp1004:
; CHECK: add [[R:x[0-9]+]], x2, #1004
-; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
+; CHECK: stgp x0, x1, [[[R]]]
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 1004
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
entry:
; CHECK-LABEL: stgp1024:
; CHECK: add [[R:x[0-9]+]], x2, #1024
-; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
+; CHECK: stgp x0, x1, [[[R]]]
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 1024
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
; OPTAARCH64-DAG: ldr x20, [x20]
; OPTAARCH64-DAG: mov [[CSREG:x[1-9].*]], x8
; OPTAARCH64: bl {{_?}}thisreturn_attribute
-; OPTAARCH64: str x0, {{\[}}[[CSREG]]
+; OPTAARCH64: str x0, [[[CSREG]]
; OPTAARCH64: ret
; OPTARM64_32-LABEL: swiftself_nothisreturn:
; OPTARM64_32-DAG: ldr w20, [x20]
; OPTARM64_32-DAG: mov [[CSREG:x[1-9].*]], x8
; OPTARM64_32: bl {{_?}}thisreturn_attribute
-; OPTARM64_32: str w0, {{\[}}[[CSREG]]
+; OPTARM64_32: str w0, [[[CSREG]]
; OPTARM64_32: ret
define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret(i8*), i8** noalias nocapture readonly swiftself) {
entry:
define i32* @global_addr() #0 {
; CHECK-PIC: global_addr:
; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global
- ; CHECK-PIC: ldr x0, {{\[}}[[REG]], :got_lo12:global]
+ ; CHECK-PIC: ldr x0, [[[REG]], :got_lo12:global]
; CHECK-PIC: ret
ret i32* @global
define i32 @global_load() #0 {
; CHECK-SELECTIONDAGISEL: global_load:
; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global
- ; CHECK-SELECTIONDAGISEL: ldr w0, {{\[}}[[REG]], :lo12:global{{\]}}
+ ; CHECK-SELECTIONDAGISEL: ldr w0, [[[REG]], :lo12:global]
; CHECK-SELECTIONDAGISEL: ret
; CHECK-GLOBALISEL: global_load:
; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global
; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296
; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global
- ; CHECK-GLOBALISEL: ldr w0, {{\[}}[[REG]]{{\]}}
+ ; CHECK-GLOBALISEL: ldr w0, [[[REG]]]
; CHECK-GLOBALISEL: ret
; CHECK-PIC: global_load:
; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global
- ; CHECK-PIC: ldr [[REG]], {{\[}}[[REG]], :got_lo12:global]
- ; CHECK-PIC: ldr w0, {{\[}}[[REG]]{{\]}}
+ ; CHECK-PIC: ldr [[REG]], [[[REG]], :got_lo12:global]
+ ; CHECK-PIC: ldr w0, [[[REG]]]
; CHECK-PIC: ret
%load = load i32, i32* @global
define void @global_store() #0 {
; CHECK-SELECTIONDAGISEL: global_store:
; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global
- ; CHECK-SELECTIONDAGISEL: str wzr, {{\[}}[[REG]], :lo12:global{{\]}}
+ ; CHECK-SELECTIONDAGISEL: str wzr, [[[REG]], :lo12:global]
; CHECK-SELECTIONDAGISEL: ret
; CHECK-GLOBALISEL: global_store:
; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global
; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296
; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global
- ; CHECK-GLOBALISEL: str wzr, {{\[}}[[REG]]{{\]}}
+ ; CHECK-GLOBALISEL: str wzr, [[[REG]]]
; CHECK-GLOBALISEL: ret
; CHECK-PIC: global_store:
; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global
- ; CHECK-PIC: ldr [[REG]], {{\[}}[[REG]], :got_lo12:global]
- ; CHECK-PIC: str wzr, {{\[}}[[REG]]{{\]}}
+ ; CHECK-PIC: ldr [[REG]], [[[REG]], :got_lo12:global]
+ ; CHECK-PIC: str wzr, [[[REG]]]
; CHECK-PIC: ret
store i32 0, i32* @global
define void ()* @func_addr() #0 {
; CHECK-PIC: func_addr:
; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:func
- ; CHECK-PIC: ldr x0, {{\[}}[[REG]], :got_lo12:func]
+ ; CHECK-PIC: ldr x0, [[[REG]], :got_lo12:func]
; CHECK-PIC: ret
ret void ()* @func
define i32 @global_load() #0 {
; CHECK-SELECTIONDAGISEL: global_load:
; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global
- ; CHECK-SELECTIONDAGISEL: ldr w0, {{\[}}[[REG]], :lo12:global{{\]}}
+ ; CHECK-SELECTIONDAGISEL: ldr w0, [[[REG]], :lo12:global]
; CHECK-SELECTIONDAGISEL: ret
; CHECK-GLOBALISEL: global_load:
; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global
; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296
; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global
- ; CHECK-GLOBALISEL: ldr w0, {{\[}}[[REG]]{{\]}}
+ ; CHECK-GLOBALISEL: ldr w0, [[[REG]]]
; CHECK-GLOBALISEL: ret
%load = load i32, i32* @global
define void @global_store() #0 {
; CHECK-SELECTIONDAGISEL: global_store:
; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global
- ; CHECK-SELECTIONDAGISEL: str wzr, {{\[}}[[REG]], :lo12:global{{\]}}
+ ; CHECK-SELECTIONDAGISEL: str wzr, [[[REG]], :lo12:global]
; CHECK-SELECTIONDAGISEL: ret
; CHECK-GLOBALISEL: global_store:
; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global
; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296
; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global
- ; CHECK-GLOBALISEL: str wzr, {{\[}}[[REG]]{{\]}}
+ ; CHECK-GLOBALISEL: str wzr, [[[REG]]]
; CHECK-GLOBALISEL: ret
store i32 0, i32* @global
; CHECK-LABEL: getVar
; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index
; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88]
-; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], :lo12:_tls_index]
+; CHECK: ldr w[[TLS_INDEX:[0-9]+]], [[[TLS_INDEX_ADDR]], :lo12:_tls_index]
-; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3]
+; CHECK: ldr [[TLS:x[0-9]+]], [[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3]
; CHECK: add [[TLS]], [[TLS]], :secrel_hi12:tlsVar
-; CHECK: ldr w0, {{\[}}[[TLS]], :secrel_lo12:tlsVar{{\]}}
+; CHECK: ldr w0, [[[TLS]], :secrel_lo12:tlsVar]
; CHECK-LABEL: getPtr
; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index
; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88]
-; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], :lo12:_tls_index]
+; CHECK: ldr w[[TLS_INDEX:[0-9]+]], [[[TLS_INDEX_ADDR]], :lo12:_tls_index]
-; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3]
+; CHECK: ldr [[TLS:x[0-9]+]], [[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3]
; CHECK: add [[TLS]], [[TLS]], :secrel_hi12:tlsVar
; CHECK: add x0, [[TLS]], :secrel_lo12:tlsVar
; CHECK-LABEL: setVar
; CHECK: adrp [[TLS_INDEX_ADDR:x[0-9]+]], _tls_index
; CHECK: ldr [[TLS_POINTER:x[0-9]+]], [x18, #88]
-; CHECK: ldr w[[TLS_INDEX:[0-9]+]], {{\[}}[[TLS_INDEX_ADDR]], :lo12:_tls_index]
+; CHECK: ldr w[[TLS_INDEX:[0-9]+]], [[[TLS_INDEX_ADDR]], :lo12:_tls_index]
-; CHECK: ldr [[TLS:x[0-9]+]], {{\[}}[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3]
+; CHECK: ldr [[TLS:x[0-9]+]], [[[TLS_POINTER]], x[[TLS_INDEX]], lsl #3]
; CHECK: add [[TLS]], [[TLS]], :secrel_hi12:tlsVar
-; CHECK: str w0, {{\[}}[[TLS]], :secrel_lo12:tlsVar{{\]}}
+; CHECK: str w0, [[[TLS]], :secrel_lo12:tlsVar]
; CHECK-LABEL: getVar8
; CHECK: add [[TLS:x[0-9]+]], [[TLS]], :secrel_hi12:tlsVar8
-; CHECK: ldrb w0, {{\[}}[[TLS]], :secrel_lo12:tlsVar8{{\]}}
+; CHECK: ldrb w0, [[[TLS]], :secrel_lo12:tlsVar8]
; CHECK-LABEL: getVar64
; CHECK: add [[TLS:x[0-9]+]], [[TLS]], :secrel_hi12:tlsVar64
-; CHECK: ldr x0, {{\[}}[[TLS]], :secrel_lo12:tlsVar64{{\]}}
+; CHECK: ldr x0, [[[TLS]], :secrel_lo12:tlsVar64]
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @dispatch_id(i64 addrspace(1)* %out) #0 {
%tmp0 = call i64 @llvm.amdgcn.dispatch.id()
store i64 %tmp0, i64 addrspace(1)* %out
; GCN-LABEL: {{^}}test_load1_mfma_store1:
; GCN: global_load_dword a{{[0-9]+}}, v{{[0-9:]+}}, s[{{[0-9:]+}}]
; GCN-NOT: v_accvgpr_read
-; GCN: v_mfma_f32_32x32x1f32 a{{\[}}[[N:[0-9]+]]:
+; GCN: v_mfma_f32_32x32x1f32 a[[[N:[0-9]+]]:
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 2
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN: v_mfma_i32_4x4x4i8 a{{\[}}[[N:[0-9]+]]:
+; GCN: v_mfma_i32_4x4x4i8 a[[[N:[0-9]+]]:
; GCN: v_accvgpr_read_b32 [[V:v[0-9]+]], a[[N]]{{$}}
; GCN: global_atomic_add v{{[0-9]+}}, v{{[0-9:]+}}, [[V]], s[{{[0-9:]+}}] glc
; GCN: global_store_dword v{{[0-9]+}}, v{{[0-9]+}},
; GCN-LABEL: {{^}}test_atomic_mfma_4xi32_atomic64_store:
; GCN: global_atomic_sub_x2 v[{{[0-9:]+}}], v{{[0-9:]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}] glc
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN: v_mfma_i32_4x4x4i8 a{{\[}}[[N:[0-9]+]]:
+; GCN: v_mfma_i32_4x4x4i8 a[[[N:[0-9]+]]:
; GCN: v_accvgpr_read_b32 v{{[0-9]+}}, a{{[0-9]+}}
; GCN: v_accvgpr_read_b32 v{{[0-9]+}}, a{{[0-9]+}}
; GCN: global_atomic_add_x2 v[{{[0-9:]+}}], v{{[0-9:]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}] glc
; GCN-LABEL: {{^}}test_load_mfma_ds2_store:
; GCN-DAG: ds_read_b128 [[IN:a\[[0-9:]+\]]], v{{[0-9:]+}}
; GCN-NOT: v_accvgpr_write
-; GCN-DAG: v_mfma_i32_4x4x4i8 a{{\[}}[[N:[0-9]+]]:{{[0-9]+}}], v{{[0-9:]+}}, v{{[0-9:]+}}, [[IN]]
+; GCN-DAG: v_mfma_i32_4x4x4i8 a[[[N:[0-9]+]]:{{[0-9]+}}], v{{[0-9:]+}}, v{{[0-9:]+}}, [[IN]]
; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
; GCN-NOT: v_accvgpr_read
; GCN: ds_write_b32 v{{[0-9]+}}, a[[N]] offset:128
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
-; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:{{[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; VI: buffer_store_dwordx2 v[[[ADD]]:{{[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define amdgpu_kernel void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; VI-NEXT: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @v_test_add_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
; GFX9PLUS: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9PLUS-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9PLUS-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9PLUS: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}}
+; GFX9PLUS: buffer_store_dwordx2 v[[[ELT0]]:[[ELT1]]]
; VI: flat_load_dword v[[A:[0-9]+]]
; VI: flat_load_dword v[[B:[0-9]+]]
; VI: v_add_u16_sdwa v[[ADD_HI:[0-9]+]], v[[A]], v[[B]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NOT: and
; VI-NOT: shl
-; VI: buffer_store_dwordx2 v{{\[}}[[ADD_LO]]:[[ADD_HI]]{{\]}}
+; VI: buffer_store_dwordx2 v[[[ADD_LO]]:[[ADD_HI]]]
define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
; GFX9PLUS: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9PLUS-DAG: v_bfe_i32 v[[ELT0:[0-9]+]], [[ADD]], 0, 16
; GFX9PLUS-DAG: v_ashrrev_i32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9PLUS: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}}
+; GFX9PLUS: buffer_store_dwordx2 v[[[ELT0]]:[[ELT1]]]
; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI: v_add_u16_e32
; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
; GCN-NEXT: v_addc_u32_e32 v[[HI:[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
-; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]],
+; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI]]],
define amdgpu_kernel void @test_i128_vreg(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %inA, i128 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
%a_ptr = getelementptr i128, i128 addrspace(1)* %inA, i32 %tid
; GFX9-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
-; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
+; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
; At most 2 digits. Make sure src_shared_base is not counted as a high
; number SGPR.
; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0, vcc
; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
-; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
+; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32*
store volatile i32 7, i32* %stof
; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
-; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
+; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0
-; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
+; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
+; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(1)* %ptr to i32*
store volatile i32 7, i32* %stof
; no-op
; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast:
-; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
+; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
-; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
+; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(4)* %ptr to i32*
%ld = load volatile i32, i32* %stof
}
; HSA-LABEl: {{^}}use_constant_to_global_addrspacecast:
-; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
+; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
-; CI: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
+; CI: {{flat|global}}_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}
+; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s[[[PTRLO]]:[[PTRHI]]]
define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
%ld = load volatile i32, i32 addrspace(1)* %stof
; HSA: enable_sgpr_dispatch_ptr = 0
; HSA: enable_sgpr_queue_ptr = 0
-; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
-; CI-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
+; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
+; CI-DAG: v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
; CI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
; CI-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
-; GFX9-DAG: s_cmp_lg_u64 s{{\[}}[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]{{\]}}, 0
+; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0
; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1
; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
; HSA: enable_sgpr_dispatch_ptr = 0
; HSA: enable_sgpr_queue_ptr = 0
-; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
-; CI-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
+; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
+; CI-DAG: v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
; CI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
; CI-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
-; GFX9-DAG: s_cmp_lg_u64 s{{\[}}[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]{{\]}}, 0
+; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0
; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1
; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0
-; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
+; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
-; CI: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
+; CI: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
-; GFX9: global_store_dword [[ZERO]], [[ZERO]], s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]$}}
+; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}}
define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
%ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0
-; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
-; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
+; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
+; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
%ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
load volatile i32, i32 addrspace(4)* %ftos
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
+; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(3)* null to i32*
store volatile i32 7, i32* %cast
; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
+; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32*
store volatile i32 7, i32* %cast
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
+; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(5)* null to i32*
store volatile i32 7, i32* %cast
; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
+; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(5)* inttoptr (i32 -1 to i32 addrspace(5)*) to i32*
store volatile i32 7, i32* %cast
; HSA-LABEL: {{^}}use_constant_to_constant32_addrspacecast
; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}}
-; GFX9: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}, [[PTRPTR]], 0x0{{$}}
+; GFX9: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], [[PTRPTR]], 0x0{{$}}
; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
-; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x0{{$}}
+; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace(4)* addrspace(4)* %ptr.ptr, i32 %offset) #0 {
%ptr = load volatile i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %ptr.ptr
%addrspacecast = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(6)*
; HSA-LABEL: {{^}}use_global_to_constant32_addrspacecast
; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}}
-; GFX9: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}, [[PTRPTR]], 0x0{{$}}
+; GFX9: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], [[PTRPTR]], 0x0{{$}}
; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
-; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x0{{$}}
+; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1)* addrspace(4)* %ptr.ptr, i32 %offset) #0 {
%ptr = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* %ptr.ptr
%addrspacecast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(6)*
; GCN: s_load_dword [[PTR:s[0-9]+]],
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
-; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(i32 addrspace(6)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(6)* %ptr to i32*
%load = load volatile i32, i32* %stof
; GCN: s_load_dword [[PTR:s[0-9]+]],
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
-; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(i32 addrspace(6)* %ptr) #3 {
%stof = addrspacecast i32 addrspace(6)* %ptr to i32*
%load = load volatile i32, i32* %stof
; GCN-LABEL: {{^}}alignbit_shr_pat:
; GCN-DAG: s_load_dword s[[SHR:[0-9]+]]
-; GCN-DAG: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]]
define amdgpu_kernel void @alignbit_shr_pat(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
; GCN-LABEL: {{^}}alignbit_shr_pat_v:
; GCN-DAG: load_dword v[[SHR:[0-9]+]],
-; GCN-DAG: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], v[[SHR]]
define amdgpu_kernel void @alignbit_shr_pat_v(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
}
; GCN-LABEL: {{^}}alignbit_shr_pat_const30:
-; GCN: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], 30
define amdgpu_kernel void @alignbit_shr_pat_const30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
declare i32 @llvm.amdgcn.readfirstlane(i32)
; GCN-LABEL: readfirstlane_uniform
-; GCN: s_load_dwordx2 s{{\[}}[[IN_ADDR:[0-9]+]]:1{{\]}}, s[4:5], 0x0
+; GCN: s_load_dwordx2 s[[[IN_ADDR:[0-9]+]]:1], s[4:5], 0x0
; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0
; GCN: s_add_u32 s[[LOAD_ADDR:[0-9]+]], s[[IN_ADDR]], s[[SCALAR]]
-; GCN: s_load_dword s{{[0-9]+}}, s{{\[}}[[LOAD_ADDR]]
+; GCN: s_load_dword s{{[0-9]+}}, s[[[LOAD_ADDR]]
define amdgpu_kernel void @readfirstlane_uniform(float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly) {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
; where the high half of the address comes from s_getpc.
; PAL-LABEL: {{^}}scratch:
-; PAL: s_getpc_b64 s{{\[}}[[GITPTR:[0-9]+]]:
+; PAL: s_getpc_b64 s[[[GITPTR:[0-9]+]]:
; PAL: s_mov_b32 s[[GITPTR]], s0
-; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:
-; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]:
+; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
+; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, i32 addrspace(5)* %out) {
entry:
; PAL-LABEL: {{^}}scratch2:
; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
-; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:
-; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]:
+; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
+; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, i32 addrspace(5)* %out) #0 {
entry:
; PAL-LABEL: {{^}}scratch2_cs:
; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
-; CI: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:{{[0-9]+\]}}, 0x4
-; VI: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:{{[0-9]+\]}}, 0x10
-; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]:
+; CI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x4
+; VI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x10
+; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) #0 {
entry:
; than s0.
; GCN-LABEL: {{^}}_amdgpu_hs_main:
-; GCN: s_getpc_b64 s{{\[}}[[GITPTR:[0-9]+]]:
+; GCN: s_getpc_b64 s[[[GITPTR:[0-9]+]]:
; PREGFX9: s_mov_b32 s[[GITPTR]], s0
; GFX9: s_mov_b32 s[[GITPTR]], s8
; FUNC-LABEL: {{^}}s_and_multi_use_constant_i64:
; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}}
; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}}
-; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}
+; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[KLO]]:[[KHI]]]
define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%and0 = and i64 %a, 549756338176
%and1 = and i64 %b, 549756338176
}
; FUNC-LABEL: {{^}}v_and_multi_use_constant_i64:
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}}
+; SI-DAG: buffer_load_dwordx2 v[[[LO0:[0-9]+]]:[[HI0:[0-9]+]]]
+; SI-DAG: buffer_load_dwordx2 v[[[LO1:[0-9]+]]:[[HI1:[0-9]+]]]
; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}}
; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207{{$}}
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO0]]
}
; FUNC-LABEL: {{^}}v_and_multi_use_inline_imm_i64:
-; SI: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}}
+; SI: buffer_load_dwordx2 v[[[LO0:[0-9]+]]:[[HI0:[0-9]+]]]
; SI-NOT: and
-; SI: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}}
+; SI: buffer_load_dwordx2 v[[[LO1:[0-9]+]]:[[HI1:[0-9]+]]]
; SI-NOT: and
; SI: v_and_b32_e32 v[[RESLO0:[0-9]+]], 63, v[[LO0]]
; SI: v_and_b32_e32 v[[RESLO1:[0-9]+]], 63, v[[LO1]]
; SI-NOT: and
-; SI: buffer_store_dwordx2 v{{\[}}[[RESLO0]]
-; SI: buffer_store_dwordx2 v{{\[}}[[RESLO1]]
+; SI: buffer_store_dwordx2 v[[[RESLO0]]
+; SI: buffer_store_dwordx2 v[[[RESLO1]]
define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load volatile i64, i64 addrspace(1)* %aptr
%b = load volatile i64, i64 addrspace(1)* %aptr
; FIXME: Should be able to reduce load width
; FUNC-LABEL: {{^}}v_and_inline_neg_imm_i64:
-; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: {{buffer|flat}}_load_dwordx2 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; SI-NOT: and
; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]]
; SI-NOT: and
-; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[VAL_LO]]:[[VAL_HI]]]
define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
; SICIVI-DAG: s_mov_b32 m0
; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SICI-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SICI-DAG: s_load_dwordx2 s[[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; GFX89-DAG: s_load_dwordx2 s[[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
+; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVCMP]]:[[HIVCMP]]], v[[[LOSWAPV]]:[[HISWAPV]]] offset:32
; GCN: [[RESULT]]
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
; SICIVI-DAG: s_mov_b32 m0
; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SICI-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SICI-DAG: s_load_dwordx2 s[[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
-; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GFX89-DAG: s_load_dwordx2 s[[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
+; GCN: ds_cmpst_b64 [[VPTR]], v[[[LOVCMP]]:[[HIVCMP]]], v[[[LOSWAPV]]:[[HISWAPV]]] offset:32
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
; GCN-LABEL: add_i32_constant:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[value:[0-9]+]], s[[popcount]], 5
; GCN: v_mov_b32_e32 v[[data:[0-9]+]], s[[value]]
; GCN: buffer_atomic_add v[[data]]
; GCN-LABEL: add_i32_uniform:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]]
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GCN: buffer_atomic_add v[[value]]
; GCN-LABEL: sub_i32_constant:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[value:[0-9]+]], s[[popcount]], 5
; GCN: v_mov_b32_e32 v[[data:[0-9]+]], s[[value]]
; GCN: buffer_atomic_sub v[[data]]
; GCN-LABEL: sub_i32_uniform:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]]
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GCN: buffer_atomic_sub v[[value]]
; GCN-LABEL: add_i32_constant:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[popcount]], s[[popcount]], 5
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[popcount]]
; GCN: buffer_atomic_add v[[value]]
; GCN-LABEL: add_i32_uniform:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]]
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GCN: buffer_atomic_add v[[value]]
; GCN-LABEL: sub_i32_constant:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[popcount]], s[[popcount]], 5
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[popcount]]
; GCN: buffer_atomic_sub v[[value]]
; GCN-LABEL: sub_i32_uniform:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]]
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GCN: buffer_atomic_sub v[[value]]
; GCN-LABEL: add_i32_constant:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[popcount]], s[[popcount]], 5
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[popcount]]
; GCN: buffer_atomic_add v[[value]]
; GCN-LABEL: add_i32_uniform:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]]
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GCN: buffer_atomic_add v[[value]]
; GCN-LABEL: sub_i32_constant:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[popcount]], s[[popcount]], 5
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[popcount]]
; GCN: buffer_atomic_sub v[[value]]
; GCN-LABEL: sub_i32_uniform:
; GCN32: s_mov_b32 s[[exec_lo:[0-9]+]], exec_lo
-; GCN64: s_mov_b64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, exec
+; GCN64: s_mov_b64 s[[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]], exec
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
; GCN64: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt]], s[[exec_hi]], v[[mbcnt]]
; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc{{(_lo)?}}, 0, v[[mbcnt]]
; GCN: s_and_saveexec_b{{32|64}} s[[exec:\[?[0-9:]+\]?]], vcc
; GCN32: s_bcnt1_i32_b32 s[[popcount:[0-9]+]], s[[exec_lo]]
-; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
+; GCN64: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s[[[exec_lo]]:[[exec_hi]]]
; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]]
; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GCN: buffer_atomic_sub v[[value]]
; CI: v_and_b32_e32 v[[ADDRLO:[0-9]+]], 0x3fc, v[[SHR]]
; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
-; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
+; GCN: load_dword v{{[0-9]+}}, v[[[ADDRLO]]:
define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx = add i32 %x, %id
; VI: v_lshlrev_b32_e32 v[[ADDRBASE:[0-9]+]], {{[^,]+}}, v[[BFE]]
; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 15
; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE1:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SDWA: v_lshlrev_b64 v{{\[}}[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v{{\[}}[[ADDRBASE1]]:{{[^\]+}}]
+; VI-SDWA: v_lshlrev_b64 v[[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v[[[ADDRBASE1]]:{{[^\]+}}]
; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 1, v{{[0-9]+}}
; CI: v_and_b32_e32 v[[AND:[0-9]+]], 0x7fff8000, v[[SHR]]
-; CI: v_lshl_b64 v{{\[}}[[ADDRLO:[0-9]+]]:{{[^\]+}}], v{{\[}}[[AND]]:{{[^\]+}}], 2
+; CI: v_lshl_b64 v[[[ADDRLO:[0-9]+]]:{{[^\]+}}], v[[[AND]]:{{[^\]+}}], 2
; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
-; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
+; GCN: load_dword v{{[0-9]+}}, v[[[ADDRLO]]:
define amdgpu_kernel void @bfe_combine16(i32 addrspace(1)* nocapture %arg, i32 %x) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx = add i32 %x, %id
}
; GCN-LABEL: {{^}}s_ubfe_sub_i32:
-; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
+; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}}
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]]
; GCN: s_lshr_b32 s{{[0-9]+}}, [[TMP]], [[SUB]]
}
; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
-; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
+; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}}
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
}
; GCN-LABEL: {{^}}s_sbfe_sub_i32:
-; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
+; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}}
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]]
; GCN: s_ashr_i32 s{{[0-9]+}}, [[TMP]], [[SUB]]
}
; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
-; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
+; GCN: s_load_dwordx2 s[[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]], s[0:1], {{0xb|0x2c}}
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
; GCN-LABEL: {{^}}materialize_0_i64:
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_0_i64(i64 addrspace(1)* %out) {
store i64 0, i64 addrspace(1)* %out
ret void
; GCN-LABEL: {{^}}materialize_neg1_i64:
; GCN: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
; GCN: v_mov_b32_e32 v[[HIK:[0-9]+]], v[[LOK]]{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_neg1_i64(i64 addrspace(1)* %out) {
store i64 -1, i64 addrspace(1)* %out
ret void
; GCN-LABEL: {{^}}materialize_signbit_i64:
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], 1{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_signbit_i64(i64 addrspace(1)* %out) {
store i64 -9223372036854775808, i64 addrspace(1)* %out
ret void
; GCN-LABEL: {{^}}materialize_rev_neg16_i64:
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], -16{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_rev_neg16_i64(i64 addrspace(1)* %out) {
store i64 1152921504606846975, i64 addrspace(1)* %out
ret void
; GCN-LABEL: {{^}}materialize_rev_neg17_i64:
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], -1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0xf7ffffff{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_rev_neg17_i64(i64 addrspace(1)* %out) {
store i64 -576460752303423489, i64 addrspace(1)* %out
ret void
; GCN-LABEL: {{^}}materialize_rev_64_i64:
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HIK:[0-9]+]], 64{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_rev_64_i64(i64 addrspace(1)* %out) {
store i64 144115188075855872, i64 addrspace(1)* %out
ret void
; GCN-LABEL: {{^}}materialize_rev_65_i64:
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0x82000000{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_rev_65_i64(i64 addrspace(1)* %out) {
store i64 -9079256848778919936, i64 addrspace(1)* %out
ret void
; GCN-LABEL: {{^}}materialize_rev_3_i64:
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], -2.0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_rev_3_i64(i64 addrspace(1)* %out) {
store i64 -4611686018427387904, i64 addrspace(1)* %out
ret void
; GCN-LABEL: {{^}}materialize_rev_1.0_i64:
; GCN-DAG: v_mov_b32_e32 v[[LOK:[0-9]+]], 0x1fc{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIK:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOK]]:[[HIK]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LOK]]:[[HIK]]]
define amdgpu_kernel void @materialize_rev_1.0_i64(i64 addrspace(1)* %out) {
store i64 508, i64 addrspace(1)* %out
ret void
# GCN: .LBB0_5: ; %bb
# GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value]
# GCN-NEXT: .loc 1 0 42 is_stmt 0 ; /tmp/test_debug_value.cl:0:42
-# GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+# GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
# GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
# GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.LBB0_4-[[POST_GETPC]])&4294967295
# GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.LBB0_4-[[POST_GETPC]])>>32
; GCN-NEXT: s_cbranch_scc0 [[LONGBB:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
-; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[ENDBB]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[LONGBB]]:
; GCN-NEXT: ;;#ASMSTART
; GCN: s_cbranch_vccz [[LONGBB:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
-; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[ENDBB]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[LONGBB]]:
; GCN: v_nop_e64
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb2
; GCN-NEXT: ; in Loop: Header=[[LOOPBB]] Depth=1
-; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOPBB]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOPBB]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[ENDBB]]:
; GCN-NEXT: s_endpgm
; GCN: s_cbranch_scc{{[0-1]}} [[BB1:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0
-; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB4:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB4]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC0_LO]]:[[PC0_HI]]]
; GCN: [[BB1]]:
; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %loop
; GCN-NEXT: ; in Loop: Header=[[LOOP]] Depth=1
-; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOP]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOP]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
entry:
; GCN: s_cbranch_vccz [[BB2:.LBB[0-9]_[0-9]+]]
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}:
-; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_getpc_b64 s[[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC1_LO]]:[[PC1_HI]]]
; GCN-NEXT: [[BB2]]: ; %bb2
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: s_cbranch_execnz [[IF:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %entry
-; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB2:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB2:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[IF]]: ; %if
; GCN: s_cmp_lg_u32
; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %loop
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
-; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (.L[[LOOP_BODY]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (.L[[LOOP_BODY]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
; GCN-NEXT: s_endpgm
; GCN: s_cbranch_scc{{[0-1]}} [[LONG_BR_0:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
-; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LONG_BR_DEST0:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LONG_BR_DEST0]]-[[POST_GETPC]])>>32
-; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[LONG_BR_0]]:
; GCN: [[LONG_BR_DEST0]]:
; R600-NOT: MOV
; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
-; GFX678: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
+; GFX678: buffer_store_dwordx2 v[[[X]]:[[Y]]]
; GFX10: global_store_dwordx2 v2, v[0:1], s[0:1]
define amdgpu_kernel void @build_vector2 (<2 x i32> addrspace(1)* %out) {
entry:
; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
; SI-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
; SI-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
-; GFX678: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
+; GFX678: buffer_store_dwordx4 v[[[X]]:[[W]]]
; GFX10: global_store_dwordx4 v4, v[0:3], s[0:1]
define amdgpu_kernel void @build_vector4 (<4 x i32> addrspace(1)* %out) {
entry:
; MESA-DAG: s_mov_b64 s[0:1], s[36:37]
-; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1@rel32@hi+12
; GCN-DAG: v_mov_b32_e32 v0, 1{{$}}
; MESA-DAG: s_mov_b64 s[2:3], s[38:39]
-; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
call void @external_void_func_i1(i1 true)
; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]]
; MESA-DAG: s_mov_b32 s32, 0{{$}}
-; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+12
; GCN-NEXT: v_bfe_i32 v0, [[VAR]], 0, 1
-; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
%var = load volatile i1, i1 addrspace(1)* undef
; MESA: buffer_load_ubyte [[VAL:v[0-9]+]]
; MESA-DAG: s_mov_b32 s32, 0{{$}}
-; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+12
; GCN-NEXT: v_and_b32_e32 v0, 1, [[VAL]]
-; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN-NEXT: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
%var = load volatile i1, i1 addrspace(1)* undef
; GCN-LABEL: {{^}}test_call_external_void_func_i8_imm:
-; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+12
; GCN-DAG: v_mov_b32_e32 v0, 0x7b
; GCN-DAG: s_mov_b32 s32, 0{{$}}
-; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
call void @external_void_func_i8(i8 123)
; GCN-LABEL: {{^}}test_call_external_void_func_i8_signext:
; GCN-DAG: buffer_load_sbyte [[VAL:v[0-9]+]]
-; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+12
; GCN-DAG: s_mov_b32 s32, 0
; GCN-NOT: s_waitcnt
-; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
%var = load volatile i8, i8 addrspace(1)* undef
; GCN-LABEL: {{^}}test_call_external_void_func_i8_zeroext:
; GCN-DAG: buffer_load_ubyte [[VAL:v[0-9]+]]
-; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+12
; GCN-DAG: s_mov_b32 s32, 0
; GCN-NOT: s_waitcnt
-; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
%var = load volatile i8, i8 addrspace(1)* undef
; GCN-LABEL: {{^}}test_call_external_void_func_i16_signext:
; GCN-DAG: buffer_load_sshort [[VAL:v[0-9]+]]
-; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+12
; GCN-DAG: s_mov_b32 s32, 0
; GCN-NOT: s_waitcnt
-; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
%var = load volatile i16, i16 addrspace(1)* undef
; GCN-LABEL: {{^}}test_call_external_void_func_i16_zeroext:
-; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+12
; GCN-DAG: s_mov_b32 s32, 0
; GCN-NOT: s_waitcnt
-; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
%var = load volatile i16, i16 addrspace(1)* undef
; GCN-LABEL: {{^}}test_call_external_void_func_i32_imm:
-; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+12
; GCN-DAG: v_mov_b32_e32 v0, 42
; GCN-DAG: s_mov_b32 s32, 0
-; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
call void @external_void_func_i32(i32 42)
; GCN-LABEL: {{^}}test_call_external_void_func_i64_imm:
; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}}
; GCN-DAG: v_mov_b32_e32 v1, 0{{$}}
-; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i64@rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i64@rel32@hi+12
-; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
+; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
call void @external_void_func_i64(i64 123)
; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
-; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
-; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]]
+; CIVI: {{flat|global}}_store_dword v[[[LO]]:[[HI]]]
define hidden void @use_queue_ptr_addrspacecast() #1 {
%asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
store volatile i32 0, i32* %asc
; on the leftover AssertZext's ValueType operand.
; GCN-LABEL: {{^}}cannot_select_assertzext_valuetype:
-; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC_LO]], g1@gotpcrel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC_HI]], g1@gotpcrel32@hi+12
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
}
; GCN-LABEL: {{^}}v_clamp_add_src_v2f32:
-; GCN: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}
+; GCN: {{buffer|flat|global}}_load_dwordx2 v[[[A:[0-9]+]]:[[B:[0-9]+]]]
; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, v[[A]], 1.0 clamp{{$}}
; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, v[[B]], 1.0 clamp{{$}}
define amdgpu_kernel void @v_clamp_add_src_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %aptr) #0 {
; GCN: global_load_dwordx4 v[{{[0-9]*[02468]:[0-9]*[13579]}}], v{{[0-9]+}}, s[{{[0-9:]+}}]
; GCN-NEXT: s_waitcnt
; GCN-NEXT: v_mov_b32_e32 v{{[0-9]*}}[[LO:[02468]]], v{{[0-9]+}}
-; GCN-NEXT: global_store_dwordx2 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+\]}}, s[{{[0-9:]+}}]
+; GCN-NEXT: global_store_dwordx2 v{{[0-9]+}}, v[[[LO]]:{{[0-9]+\]}}, s[{{[0-9:]+}}]
define amdgpu_kernel void @test_odd_int4(<4 x i32> addrspace(1)* %arg, <2 x i32> addrspace(1)* %arg1) {
bb:
; GCN-LABEL: {{^}}combine_ftrunc_frint_v2f32:
; GCN: s_load_dwordx2
-; GCN: s_load_dwordx2 s{{\[}}[[SRC1:[0-9]+]]:[[SRC2:[0-9]+]]{{\]}}
+; GCN: s_load_dwordx2 s[[[SRC1:[0-9]+]]:[[SRC2:[0-9]+]]]
; GCN-DAG: v_rndne_f32_e32 v[[RND1:[0-9]+]], s[[SRC1]]
; GCN-DAG: v_rndne_f32_e32 v[[RND2:[0-9]+]], s[[SRC2]]
-; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RND1]]:[[RND2]]{{\]}}
+; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v[[[RND1]]:[[RND2]]]
define amdgpu_kernel void @combine_ftrunc_frint_v2f32(<2 x float> addrspace(1)* %p) {
%v = load <2 x float>, <2 x float> addrspace(1)* %p, align 8
%round = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %v)
; GCN-LABEL: {{^}}commute_ule_64_i64:
; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}}
-; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
+; GCN: v_cmp_gt_u64_e32 vcc, s[[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
; GCN-LABEL: {{^}}vgpr_arg_src:
; GCN: v_readfirstlane_b32 s[[READLANE:[0-9]+]], v0
; GCN: s_mov_b32 s[[ZERO:[0-9]+]]
-; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[READLANE]]:[[ZERO]]{{\]}}
+; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[[[READLANE]]:[[ZERO]]]
define amdgpu_vs float @vgpr_arg_src(<4 x i32> addrspace(6)* %arg) {
main_body:
%tmp9 = load <4 x i32>, <4 x i32> addrspace(6)* %arg
; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]]
; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}}
-; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; GCN-NEXT: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
%vreg = load volatile i64, i64 addrspace(1)* undef
%ctpop = call i64 @llvm.ctpop.i64(i64 %vreg)
; The neg1 appears after folding the not 0
; GCN-LABEL: {{^}}fold_mi_or_neg1:
; GCN: buffer_load_dwordx2
-; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]]
; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]]
; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]]
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
%vreg0 = load volatile i64, i64 addrspace(1)* undef
%vreg1 = load volatile i64, i64 addrspace(1)* undef
; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, s{{[0-9]+}}
; Spill saved exec
-; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
+; GCN: s_mov_b64 s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1
; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
-; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
+; GCN: s_and_b64 s[[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]], s[[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]], [[CMP0]]
+; GCN: s_mov_b64 exec, s[[[ANDEXEC_LO]]:[[ANDEXEC_HI]]]
; GCN: s_cbranch_execz [[ENDIF:.LBB[0-9]+_[0-9]+]]
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
-; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}}
+; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
; Restore val
; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
; Spill load
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
+; GCN: s_mov_b64 s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], exec
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
-; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
+; GCN: s_and_b64 s[[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]], s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], [[CMP0]]
+; GCN: s_mov_b64 exec, s[[[ANDEXEC_LO]]:[[ANDEXEC_HI]]]
; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
-; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}}
+; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]]
; GCN: s_mov_b32 [[ZERO:s[0-9]+]], 0
; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, [[ZERO]]
-; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
-; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
-; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
+; GCN: s_mov_b64 s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], exec
+; GCN: s_and_b64 s[[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]], s[[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]], [[CMP0]]
+; GCN: s_xor_b64 s[[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]], s[[[ANDEXEC_LO]]:[[ANDEXEC_HI]]], s[[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]]
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 1
-; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}
+; GCN: s_or_saveexec_b64 s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]], s[[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]]
; Regular spill value restored after exec modification
; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload
; Followed by spill
; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], 0 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_and_b64 s{{\[}}[[FLOW_AND_EXEC_LO:[0-9]+]]:[[FLOW_AND_EXEC_HI:[0-9]+]]{{\]}}, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]]{{\]}}
+; GCN: s_and_b64 s[[[FLOW_AND_EXEC_LO:[0-9]+]]:[[FLOW_AND_EXEC_HI:[0-9]+]]], exec, s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]]]
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_AND_EXEC_LO]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]]
; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC]], s[[FLOW_AND_EXEC_HI]], 1
; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC]], off, s[0:3], 0 offset:[[FLOW_SAVEEXEC_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_AND_EXEC_LO]]:[[FLOW_AND_EXEC_HI]]{{\]}}
+; GCN: s_xor_b64 exec, exec, s[[[FLOW_AND_EXEC_LO]]:[[FLOW_AND_EXEC_HI]]]
; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9]+_[0-9]+]]
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
-; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}}
+; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]]
}
; FUNC-LABEL: {{^}}v_ctpop_i64:
-; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; GCN: {{buffer|flat}}_load_dwordx2 v[[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
}
; FUNC-LABEL: {{^}}v_ctpop_i64_user:
-; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; GCN: {{buffer|flat}}_load_dwordx2 v[[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]],
; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
}
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
-; VI-DAG: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
-; GCN-DAG: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
+; SI-DAG: s_load_dwordx2 s[[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0xd
+; VI-DAG: s_load_dwordx2 s[[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x34
+; GCN-DAG: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]]
; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[ZERO]]
-; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
+; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]]
; GCN: s_endpgm
define amdgpu_kernel void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
entry:
; FIXME: Should not have extra add
; FUNC-LABEL: {{^}}v_ctpop_i128:
-; SI: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
-; VI: flat_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}
+; SI: buffer_load_dwordx4 v[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+; VI: flat_load_dwordx4 v[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], v{{\[[0-9]+:[0-9]+\]}}
; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0
; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]]
}
; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align4:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Z:[0-9]+]]:[[REG_W:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
+; CI-DAG: ds_read2_b32 v[[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
+; CI-DAG: ds_read2_b32 v[[[REG_Z:[0-9]+]]:[[REG_W:[0-9]+]]], v{{[0-9]+}} offset0:2 offset1:3{{$}}
; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_X]], v[[REG_Z]]
; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_Y]], v[[REG_W]]
; CI: v_add_f32_e32 v[[ADD2:[0-9]+]], v[[ADD0]], v[[ADD1]]
}
; CI-LABEL: {{^}}simple_read2_v3f32_superreg_align4:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
+; CI-DAG: ds_read2_b32 v[[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
; CI-DAG: ds_read_b32 v[[REG_Z:[0-9]+]], v{{[0-9]+}} offset:8{{$}}
; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_X]], v[[REG_Z]]
; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[ADD0]], v[[REG_Y]]
; Do scalar loads into the super register we need.
; CI-LABEL: {{^}}simple_read2_v2f32_superreg_scalar_loads_align4:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
+; CI-DAG: ds_read2_b32 v[[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
; CI-NOT: v_mov {{v[0-9]+}}, {{[sv][0-9]+}}
-; CI: buffer_store_dwordx2 v{{\[}}[[REG_ELT0]]:[[REG_ELT1]]{{\]}}
+; CI: buffer_store_dwordx2 v[[[REG_ELT0]]:[[REG_ELT1]]]
; CI: s_endpgm
define amdgpu_kernel void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x float> addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; Do scalar loads into the super register we need.
; CI-LABEL: {{^}}simple_read2_v4f32_superreg_scalar_loads_align4:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT2:[0-9]+]]:[[REG_ELT3:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
+; CI-DAG: ds_read2_b32 v[[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
+; CI-DAG: ds_read2_b32 v[[[REG_ELT2:[0-9]+]]:[[REG_ELT3:[0-9]+]]], v{{[0-9]+}} offset0:2 offset1:3{{$}}
; CI-NOT: v_mov {{v[0-9]+}}, {{[sv][0-9]+}}
-; CI: buffer_store_dwordx4 v{{\[}}[[REG_ELT0]]:[[REG_ELT3]]{{\]}}
+; CI: buffer_store_dwordx4 v[[[REG_ELT0]]:[[REG_ELT3]]]
; CI: s_endpgm
define amdgpu_kernel void @simple_read2_v4f32_superreg_scalar_loads_align4(<4 x float> addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; CI: s_mov_b32 m0
; GFX9-NOT: m0
-; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
+; GCN: ds_read2st64_b32 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset1:1
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; CI: buffer_store_dword [[RESULT]]
; CI: s_mov_b32 m0
; GFX9-NOT: m0
-; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
+; GCN: ds_read2st64_b32 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset0:1 offset1:2
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; CI: buffer_store_dword [[RESULT]]
; CI: s_mov_b32 m0
; GFX9-NOT: m0
-; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
+; GCN: ds_read2st64_b32 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset0:1 offset1:255
; GCN: s_waitcnt lgkmcnt(0)
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
; CI: buffer_store_dword [[RESULT]]
; CI: s_mov_b32 m0
; GFX9-NOT: m0
-; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
+; GCN: ds_read2st64_b64 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset1:1
; GCN: s_waitcnt lgkmcnt(0)
-; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]]
; CI: buffer_store_dwordx2 [[RESULT]]
; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
; CI: s_mov_b32 m0
; GFX9-NOT: m0
-; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
+; GCN: ds_read2st64_b64 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset0:1 offset1:2
; GCN: s_waitcnt lgkmcnt(0)
-; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]]
; CI: buffer_store_dwordx2 [[RESULT]]
; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
; CI: s_mov_b32 m0
; GFX9-NOT: m0
-; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
+; GCN: ds_read2st64_b64 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]], v{{[0-9]+}} offset0:4 offset1:127
; GCN: s_waitcnt lgkmcnt(0)
-; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]]
; CI: buffer_store_dwordx2 [[RESULT]]
; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
; heuristics. Should not need -stress-early-ifcvt
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle64:
-; GCN: buffer_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
-; GCN: v_cmp_neq_f64_e32 vcc, 1.0, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
-; GCN: v_add_f64 v{{\[}}[[ADD_LO:[0-9]+]]:[[ADD_HI:[0-9]+]]{{\]}}, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}, v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
+; GCN: v_cmp_neq_f64_e32 vcc, 1.0, v[[[VAL_LO]]:[[VAL_HI]]]
+; GCN: v_add_f64 v[[[ADD_LO:[0-9]+]]:[[ADD_HI:[0-9]+]]], v[[[VAL_LO]]:[[VAL_HI]]], v[[[VAL_LO]]:[[VAL_HI]]]
; GCN-DAG: v_cndmask_b32_e32 v[[RESULT_LO:[0-9]+]], v[[ADD_LO]], v[[VAL_LO]], vcc
; GCN-DAG: v_cndmask_b32_e32 v[[RESULT_HI:[0-9]+]], v[[ADD_HI]], v[[VAL_HI]], vcc
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
entry:
%v = load double, double addrspace(1)* %in
; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00
; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200
; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4
-; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]]
+; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]]
; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
; GCN: store_short v[{{[0-9:]+}}], v[[VRL]]
define amdgpu_kernel void @half4_extelt(half addrspace(1)* %out, i32 %sel) {
; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
-; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
+; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double8_extelt(double addrspace(1)* %out, i32 %sel) {
entry:
%ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, i32 %sel
; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
-; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
+; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double7_extelt(double addrspace(1)* %out, i32 %sel) {
entry:
%ext = extractelement <7 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, i32 %sel
; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
-; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
+; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double15_extelt(double addrspace(1)* %out, i32 %sel) {
entry:
%ext = extractelement <15 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>, i32 %sel
; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
-; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
+; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]]
define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) {
entry:
%ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201
; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605
; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3
-; GCN: s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]]
+; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]]
; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
; GCN: store_byte v[{{[0-9:]+}}], v[[VRL]]
define amdgpu_kernel void @byte8_extelt(i8 addrspace(1)* %out, i32 %sel) {
; GCN-LABEL: {{^}}v_insertelement_v4f16_dynamic_vgpr:
; GCN-DAG: {{flat|global|buffer}}_load_dword [[IDX:v[0-9]+]],
-; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, [[IDX]]
-; GFX89: v_lshrrev_b64 v{{\[}}[[SHIFT_LO:[0-9]+]]:[[SHIFT_HI:[0-9]+]]{{\]}}, [[SCALED_IDX]], v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GFX89: v_lshrrev_b64 v[[[SHIFT_LO:[0-9]+]]:[[SHIFT_HI:[0-9]+]]], [[SCALED_IDX]], v[[[LO]]:[[HI]]]
; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[SHIFT_LO]]
-; SI: v_lshr_b64 v{{\[}}[[SHIFT_LO:[0-9]+]]:[[SHIFT_HI:[0-9]+]]{{\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}, [[SCALED_IDX]]
+; SI: v_lshr_b64 v[[[SHIFT_LO:[0-9]+]]:[[SHIFT_HI:[0-9]+]]], v[[[LO]]:[[HI]]], [[SCALED_IDX]]
; SI: buffer_store_short v[[SHIFT_LO]]
define amdgpu_kernel void @v_insertelement_v4f16_dynamic_vgpr(half addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
; SI: buffer_store_short
; SI: buffer_store_short
-; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]{{\]}}, s[0:1], 0x2c
+; GFX89-DAG: s_load_dwordx2 s[[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]], s[0:1], 0x2c
; GFX89-DAG: v_mov_b32_e32 [[VLOAD0:v[0-9]+]], s[[LOAD0]]
; GFX89-DAG: buffer_store_short [[VLOAD0]], off
; GFX89-DAG: v_mov_b32_e32 [[VLOAD1:v[0-9]+]], s[[LOAD1]]
; SI: s_load_dwordx2 s
; SI: s_load_dwordx2 s
-; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]{{\]}}, s[0:1], 0x24
-; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]{{\]}}, s[0:1], 0x4c
+; GFX89-DAG: s_load_dwordx2 s[[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]], s[0:1], 0x24
+; GFX89-DAG: s_load_dwordx2 s[[[LOAD0:[0-9]+]]:[[LOAD1:[0-9]+]]], s[0:1], 0x4c
; GFX89-DAG: s_load_dword s{{[0-9]+}}, s[0:1], 0x54
; GCN-NOT: {{buffer|flat|global}}
; VI: s_load_dwordx2 [[VEC8:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0x0
; VI: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 3
-; VI: s_lshr_b64 s{{\[}}[[EXTRACT_LO:[0-9]+]]:{{[0-9]+\]}}, [[VEC8]], [[SCALED_IDX]]
+; VI: s_lshr_b64 s[[[EXTRACT_LO:[0-9]+]]:{{[0-9]+\]}}, [[VEC8]], [[SCALED_IDX]]
; VI: v_mov_b32_e32 [[V_EXTRACT:v[0-9]+]], s[[EXTRACT_LO]]
; VI: buffer_store_byte [[V_EXTRACT]]
define amdgpu_kernel void @dynamic_extract_vector_elt_v8i8(i8 addrspace(1)* %out, <8 x i8> addrspace(4)* %vec.ptr, i32 %idx) #0 {
}
; GCN-LABEL: {{^}}s_fabs_v4f16:
-; CI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2
-; GFX89: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
+; CI: s_load_dwordx2 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2
+; GFX89: s_load_dwordx2 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x8
; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x7fff7fff
; GCN-DAG: s_and_b32 s{{[0-9]+}}, s[[LO]], [[MASK]]
}
; GCN-LABEL: {{^}}fabs_fn_fold:
-; SI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xb
-; VI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x2c
+; SI: s_load_dwordx2 s[[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dwordx2 s[[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[MUL_VAL]]
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[ABS_VALUE]]|, [[V_MUL_VI]]
}
; FUNC-LABEL: {{^}}fabs_fold:
-; SI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xb
-; VI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x2c
+; SI: s_load_dwordx2 s[[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dwordx2 s[[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[MUL_VAL]]
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[ABS_VALUE]]|, [[V_MUL_VI]]
; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 0.0)
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double -0.0)
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 1.0)
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double -1.0)
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 16.0)
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
store double %canonicalized, double addrspace(1)* %out
; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64:
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
store double %canonicalized, double addrspace(1)* %out
; VI: v_cmp_nlt_f16_e32 vcc, v[[B_F16_1]], v[[A_F16_1]]
; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[R_I32_0]]:[[R_I32_1]]]
; GCN: s_endpgm
define amdgpu_kernel void @fcmp_v2f16_nlt(
<2 x i32> addrspace(1)* %r,
; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f16_sign_f64:
; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[MAG:[0-9]+]]
-; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
+; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v[[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]]
; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; GCN-DAG: v_cvt_f32_f16_e32 v[[MAG_EXT:[0-9]+]], v[[MAG]]
-; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[MAG_EXT_LO:[0-9]+]]:[[MAG_EXT_HI:[0-9]+]]{{\]}}, v[[MAG_EXT]]
+; GCN-DAG: v_cvt_f64_f32_e32 v[[[MAG_EXT_LO:[0-9]+]]:[[MAG_EXT_HI:[0-9]+]]], v[[MAG_EXT]]
; GCN: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_EXT_HI]], v[[SIGN_HI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[MAG_EXT_LO]]:[[OUT_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[MAG_EXT_LO]]:[[OUT_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copysign_out_f64_mag_f16_sign_f64(
double addrspace(1)* %arg_out,
}
; GCN-LABEL: {{^}}test_copysign_out_f64_mag_f64_sign_f16:
-; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]{{\]}}
+; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v[[[MAG_LO:[0-9]+]]:[[MAG_HI:[0-9]+]]]
; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[SIGN:[0-9]+]]
; GCN-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
; SI: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_HI]], v[[SIGN_F32]]
; GFX89-DAG: v_lshlrev_b32_e32 v[[SIGN_SHIFT:[0-9]+]], 16, v[[SIGN]]
; GFX89: v_bfi_b32 v[[OUT_HI:[0-9]+]], s[[CONST]], v[[MAG_HI]], v[[SIGN_SHIFT]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[MAG_LO]]:[[OUT_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[MAG_LO]]:[[OUT_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copysign_out_f64_mag_f64_sign_f16(
double addrspace(1)* %arg_out,
; GCN-LABEL: {{^}}test_copysign_out_f16_mag_f16_sign_f64:
; GCN-DAG: {{buffer|flat|global}}_load_ushort v[[MAG:[0-9]+]]
-; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]{{\]}}
+; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v[[[SIGN_LO:[0-9]+]]:[[SIGN_HI:[0-9]+]]]
; SI-DAG: s_brev_b32 s[[CONST:[0-9]+]], -2
; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_HI]]
; Try to identify arg based on higher address.
; FUNC-LABEL: {{^}}test_copysign_f32:
-; SI: s_load_dwordx2 s{{\[}}[[SMAG:[0-9]+]]:[[SSIGN:[0-9]+]]{{\]}}, {{.*}} 0xb
-; VI: s_load_dwordx2 s{{\[}}[[SMAG:[0-9]+]]:[[SSIGN:[0-9]+]]{{\]}}, {{.*}} 0x2c
+; SI: s_load_dwordx2 s[[[SMAG:[0-9]+]]:[[SSIGN:[0-9]+]]], {{.*}} 0xb
+; VI: s_load_dwordx2 s[[[SMAG:[0-9]+]]:[[SSIGN:[0-9]+]]], {{.*}} 0x2c
; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], s[[SSIGN]]
; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], s[[SMAG]]
declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
; FUNC-LABEL: {{^}}test_copysign_f64:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
-; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x1d
-; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x4c
-; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x74
+; SI-DAG: s_load_dwordx2 s[[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
+; SI-DAG: s_load_dwordx2 s[[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x1d
+; VI-DAG: s_load_dwordx2 s[[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x4c
+; VI-DAG: s_load_dwordx2 s[[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x74
; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2
; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[VMAG_LO]]:[[VRESULT_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @test_copysign_f64(double addrspace(1)* %out, [8 x i32], double %mag, [8 x i32], double %sign) nounwind {
%result = call double @llvm.copysign.f64(double %mag, double %sign)
}
; FUNC-LABEL: {{^}}test_copysign_f64_f32:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
-; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x4c
+; SI-DAG: s_load_dwordx2 s[[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
+; VI-DAG: s_load_dwordx2 s[[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x4c
; GCN-DAG: s_load_dword s[[SSIGN:[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}
; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VSIGN:[0-9]+]], s[[SSIGN]]
; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[VMAG_LO]]:[[VRESULT_HI]]]
define amdgpu_kernel void @test_copysign_f64_f32(double addrspace(1)* %out, [8 x i32], double %mag, float %sign) nounwind {
%c = fpext float %sign to double
%result = call double @llvm.copysign.f64(double %mag, double %c)
; GCN-LABEL: {{^}}div_fast_k_x_pat_f64:
; GCN-DAG: v_mov_b32_e32 v[[K_LO:[0-9]+]], 0x9999999a
; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0x3fb99999
-; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]]
; GCN: buffer_store_dwordx2 [[MUL]]
define amdgpu_kernel void @div_fast_k_x_pat_f64(double addrspace(1)* %out) #1 {
%x = load double, double addrspace(1)* undef
; GCN-LABEL: {{^}}div_fast_neg_k_x_pat_f64:
; GCN-DAG: v_mov_b32_e32 v[[K_LO:[0-9]+]], 0x9999999a
; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0xbfb99999
-; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]]
; GCN: buffer_store_dwordx2 [[MUL]]
define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(double addrspace(1)* %out) #1 {
%x = load double, double addrspace(1)* undef
}
; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp:
-; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
+; GCN-DAG: s_load_dwordx4 s[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
; GCN-FLUSH: v_rcp_f32_e32
; GCN-FLUSH: v_rcp_f32_e32
; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]]
-; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
+; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
%div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %load, !fpmath !0
}
; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp:
-; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
+; GCN-DAG: s_load_dwordx4 s[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
}
; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp:
-; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
+; GCN-DAG: s_load_dwordx4 s[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
; GCN-FLUSH: v_rcp_f32_e64
; GCN-FLUSH: v_rcp_f32_e64
; GCN-FLUSH: v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]]
-; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
+; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
%neg = fneg <4 x float> %load
}
; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp:
-; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
+; GCN-DAG: s_load_dwordx4 s[[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
; GCN-FLUSH: v_rcp_f32_e32
; GCN-FLUSH: v_rcp_f32_e32
; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]]
-; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
+; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
%neg = fneg <4 x float> %load
; GCN: s_not_b64 exec, exec
%tmp1189 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 4, i32 1)
-; GCN: s_or_saveexec_b64 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, -1
+; GCN: s_or_saveexec_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], -1
; GCN: v_lshlrev_b32_e32 v[[tmp1191:[0-9]+]], 2, v[[tmp1189]]
%tmp1191 = mul i32 %tmp1189, 4
-; GCN: s_mov_b64 exec, s{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: s_mov_b64 exec, s[[[LO]]:[[HI]]]
%tmp1196 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp1191)
%tmp34 = icmp eq i32 %arg, 0
; GCN: s_not_b64 exec, exec
%tmp1189 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 4, i32 1)
-; GCN: s_or_saveexec_b64 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, -1
+; GCN: s_or_saveexec_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], -1
; GCN: v_lshlrev_b32_e32 v[[tmp1191:[0-9]+]], 2, v[[tmp1189]]
%tmp1191 = mul i32 %tmp1189, 4
-; GCN: s_mov_b64 exec, s{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: s_mov_b64 exec, s[[[LO]]:[[HI]]]
%tmp1196 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp1191)
%tmp34 = icmp eq i32 %arg, 0
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
; CHECK-LABEL: {{^}}store_flat_i32:
-; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
+; CHECK-DAG: s_load_dwordx2 s[[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
; CHECK: s_waitcnt lgkmcnt(0)
; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
-; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
+; CHECK: flat_store_dword v[[[LO_VREG]]:[[HI_VREG]]], v[[DATA]]
define amdgpu_kernel void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
store volatile i32 %x, i32* %fptr, align 4
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
-; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
+; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) {
entry:
%gep = getelementptr i64, i64* %out, i64 4
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
-; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
+; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64* %out, i64 %index
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
-; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
-; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
+; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) {
entry:
%val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
-; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
-; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
+; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64* %out, i64 %index
}
; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dwordx2 s[[[A:[0-9]+]]:[[B:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
; Nsz also needed
; FIXME: Should separate tests
; GCN-LABEL: {{^}}s_test_fmin_legacy_ule_f32_nnan_src:
-; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN: s_load_dwordx2 s[[[A:[0-9]+]]:[[B:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-DAG: v_add_f32_e64 [[ADD_A:v[0-9]+]], s[[A]], 1.0
; GCN-DAG: v_add_f32_e64 [[ADD_B:v[0-9]+]], s[[B]], 2.0
}
; GCN-LABEL: {{^}}fmul_v4f16:
-; GFX9: buffer_load_dwordx2 v{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}
-; GFX9: buffer_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
+; GFX9: buffer_load_dwordx2 v[[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]]
+; GFX9: buffer_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
; GFX9-DAG: v_pk_mul_f16 v[[MUL_LO:[0-9]+]], v[[A_LO]], v[[B_LO]]
; GFX9-DAG: v_pk_mul_f16 v[[MUL_HI:[0-9]+]], v[[A_HI]], v[[B_HI]]
-; GFX9: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[MUL_HI]]{{\]}}
+; GFX9: buffer_store_dwordx2 v[[[MUL_LO]]:[[MUL_HI]]]
-; VI: buffer_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
-; VI: buffer_load_dwordx2 v{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}
+; VI: buffer_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
+; VI: buffer_load_dwordx2 v[[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]]
; VI: v_mul_f16_sdwa
; VI: v_mul_f16_e32
; VI: v_mul_f16_sdwa
}
; GCN-LABEL: {{^}}fmul_v4f16_imm_a:
-; GFX89-DAG: buffer_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
+; GFX89-DAG: buffer_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
; GFX9-DAG: s_mov_b32 [[K1:s[0-9]+]], 0x44004200
; GFX9-DAG: s_mov_b32 [[K0:s[0-9]+]], 0x40004800
; GFX9-DAG: v_pk_mul_f16 v[[MUL_LO:[0-9]+]], v[[A_LO]], [[K0]]
; GFX9-DAG: v_pk_mul_f16 v[[MUL_HI:[0-9]+]], v[[A_HI]], [[K1]]
-; GFX9: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[MUL_HI]]{{\]}}
+; GFX9: buffer_store_dwordx2 v[[[MUL_LO]]:[[MUL_HI]]]
; VI-DAG: v_mov_b32_e32 [[K4:v[0-9]+]], 0x4400
; VI-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[MUL_LO_LO]], v[[MUL_LO_HI]]
; VI-DAG: v_or_b32_e32 v[[OR1:[0-9]+]], v[[MUL_HI_LO]], v[[MUL_HI_HI]]
-; VI: buffer_store_dwordx2 v{{\[}}[[OR0]]:[[OR1]]{{\]}}
+; VI: buffer_store_dwordx2 v[[[OR0]]:[[OR1]]]
define amdgpu_kernel void @fmul_v4f16_imm_a(
<4 x half> addrspace(1)* %r,
<4 x half> addrspace(1)* %b) {
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
-; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; SI: v_max_f64 v[[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]], [[NEG_QUIET]], s[[[K_LO]]:[[K_HI]]]
-; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
+; VI: v_min_f64 v[[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]], [[A]], 0.15915494
; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
-; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s[[[K_LO]]:[[K_HI]]]
; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
-; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
+; GCN-DAG: v_cvt_f64_f32_e32 v[[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]], [[A]]
; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[CVT_LO]]:[[CVT_HI]]]
define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
-; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
+; GCN-DAG: v_cvt_f64_f32_e32 v[[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]], [[A]]
; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
-; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
+; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v[[[CVT_LO]]:[[CVT_HI]]], 4.0
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
}
; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
-; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
-; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
+; GCN: {{buffer|flat}}_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
+; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v[[[A_LO]]:[[A_HI]]]
; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[A_LO]]:[[NEG_A_HI]]]
define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
-; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
+; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s[
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
}
; GCN-LABEL: {{^}}fneg_fabs_f64:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x13
-; VI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x4c
+; SI-DAG: s_load_dwordx2 s[[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x13
+; VI-DAG: s_load_dwordx2 s[[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]], s[{{[0-9]+:[0-9]+}}], 0x4c
; GCN-DAG: s_bitset1_b32 s[[HI_X]], 31
; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
; GCN-DAG: v_mov_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO_V]]:[[HI_V]]]
define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, [8 x i32], double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
%fsub = fsub double -0.000000e+00, %fabs
; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
-; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
+; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s[[[K0_LO]]:[[K0_HI]]]
; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
-; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
+; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]], [[TRUNC]]
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
-; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
-; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
+; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s[[[K0_LO]]:[[K0_HI]]]
; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
-; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
+; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]], [[TRUNC]]
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
-; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep = getelementptr double, double addrspace(1)* %in, i32 %tid
; GCN-LABEL: {{^}}fpext_f16_to_f64
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
-; GCN: v_cvt_f64_f32_e32 v{{\[}}[[R_F64_0:[0-9]+]]:[[R_F64_1:[0-9]+]]{{\]}}, v[[A_F32]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[R_F64_0]]:[[R_F64_1]]{{\]}}
+; GCN: v_cvt_f64_f32_e32 v[[[R_F64_0:[0-9]+]]:[[R_F64_1:[0-9]+]]], v[[A_F32]]
+; GCN: buffer_store_dwordx2 v[[[R_F64_0]]:[[R_F64_1]]]
; GCN: s_endpgm
define amdgpu_kernel void @fpext_f16_to_f64(
double addrspace(1)* %r,
; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[R_F32_1:[0-9]+]], v[[A_F16_1]]
; GFX89: v_cvt_f32_f16_sdwa v[[R_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GCN: buffer_store_dwordx2 v{{\[}}[[R_F32_0]]:[[R_F32_1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[R_F32_0]]:[[R_F32_1]]]
; GCN: s_endpgm
define amdgpu_kernel void @fpext_v2f16_to_v2f32(
; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; GCN: v_cvt_i32_f32_e32 v[[R_I64_Low:[0-9]+]], v[[A_F32]]
; GCN: v_ashrrev_i32_e32 v[[R_I64_High:[0-9]+]], 31, v[[R_I64_Low]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[R_I64_Low]]{{\:}}[[R_I64_High]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_f16_to_i64(
i64 addrspace(1)* %r,
; VI-NOT: DEADBEEF
; VI-DAG: v_ashrrev_i32_e32 v[[R_I64_1_High:[0-9]+]], 31, v[[R_I64_1_Low]]
; VI-DAG: v_ashrrev_i32_e32 v[[R_I64_0_High:[0-9]+]], 31, v[[R_I64_0_Low]]
-; GCN: buffer_store_dwordx4 v{{\[}}[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]{{\]}}
+; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_v2f16_to_v2i64(
<2 x i64> addrspace(1)* %r,
; GCN: v_mov_b32_e32 v[[R_I64_High:[0-9]+]], 0
; GCN: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; GCN: v_cvt_u32_f32_e32 v[[R_I64_Low:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[R_I64_Low]]{{\:}}[[R_I64_High]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_f16_to_i64(
i64 addrspace(1)* %r,
; VI: v_cvt_u32_f32_e32 v[[R_I64_0_Low:[0-9]+]], v[[A_F32_0]]
; VI: v_cvt_u32_f32_e32 v[[R_I64_1_Low:[0-9]+]], v[[A_F32_1]]
; GCN: v_mov_b32_e32 v[[R_I64_0_High:[0-9]+]], 0
-; GCN: buffer_store_dwordx4 v{{\[}}[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]{{\]}}
+; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_v2f16_to_v2i64(
<2 x i64> addrspace(1)* %r,
}
; GCN-LABEL: {{^}}fptrunc_f64_to_f16:
-; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}}
-; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]]
+; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v[[[A_F64_0]]:[[A_F64_1]]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
}
; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16:
-; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]]
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
}
; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16:
-; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}}
-; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}}
-; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}}
+; GCN: buffer_load_dwordx4 v[[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]]
+; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v[[[A_F64_0]]:{{[0-9]+}}]
+; GCN-DAG: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v[{{[0-9]+}}:[[A_F64_3]]]
; VI: v_cvt_f16_f32_sdwa v[[R_F16_HI:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
;
declare double @llvm.floor.f64(double) #0
; FUNC-LABEL: {{^}}fract_f64:
-; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
+; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
-; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
-; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
+; SI-DAG: v_min_f64 v[[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v[[[UPLO]]:[[UPHI]]]
+; SI-DAG: v_cmp_class_f64_e64 vcc, v[[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
-; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
-; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]]
+; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], v[[[LO]]:[[HI]]], -v[[[RESLO]]:[[RESHI]]]
+; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], v[[[LO]]:[[HI]]], -[[SUB0]]
; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
; CI: v_floor_f64_e32 [[FLOORX:v\[[0-9]+:[0-9]+\]]], [[X]]
}
; FUNC-LABEL: {{^}}fract_f64_neg:
-; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
+; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
-; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
-; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
+; SI-DAG: v_min_f64 v[[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v[[[UPLO]]:[[UPHI]]]
+; SI-DAG: v_cmp_class_f64_e64 vcc, v[[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
-; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
-; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]]
+; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -v[[[LO]]:[[HI]]], -v[[[RESLO]]:[[RESHI]]]
+; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -v[[[LO]]:[[HI]]], -[[SUB0]]
; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
; CI: v_floor_f64_e64 [[FLOORX:v\[[0-9]+:[0-9]+\]]], -[[X]]
}
; FUNC-LABEL: {{^}}fract_f64_neg_abs:
-; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
+; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
-; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]]
-; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
+; SI-DAG: v_min_f64 v[[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v[[[UPLO]]:[[UPHI]]]
+; SI-DAG: v_cmp_class_f64_e64 vcc, v[[[LO]]:[[HI]]], 3
; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
-; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
-; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -[[SUB0]]
+; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -|v[[[LO]]:[[HI]]]|, -v[[[RESLO]]:[[RESHI]]]
+; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|v[[[LO]]:[[HI]]]|, -[[SUB0]]
; CI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]]
; CI: v_floor_f64_e64 [[FLOORX:v\[[0-9]+:[0-9]+\]]], -|[[X]]|
; GCN-DAG: buffer_load_dword v[[ARG1_LOAD0:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off
-; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ARG1_LOAD0]]:[[ARG1_LOAD1]]{{\]}}, off
+; GCN-DAG: buffer_store_dwordx2 v[[[ARG1_LOAD0]]:[[ARG1_LOAD1]]], off
define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 {
%arg0.load = load i32, i32 addrspace(5)* %arg0
%arg1.load = load i64, i64 addrspace(5)* %arg1
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:12
; GCN: buffer_store_dword v[[LOAD_ARG1]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off
+; GCN: buffer_store_dwordx2 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]], off
define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile i32 %arg1, i32 addrspace(1)* undef
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:16{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_1]]{{\]}}, off
-; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off
+; GCN: buffer_store_dwordx2 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_1]]], off
+; GCN: buffer_store_dwordx2 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]], off
define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s32 offset:28{{$}}
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s32 offset:32{{$}}
-; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
-; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
+; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]], off
+; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off
define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s32 offset:28{{$}}
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s32 offset:32{{$}}
-; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
-; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
+; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]], off
+; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off
define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s32 offset:60{{$}}
; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s32 offset:64{{$}}
-; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_4]]:[[LOAD_ARG1_7]]{{\]}}, off
-; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
-; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_4]]:[[LOAD_ARG2_7]]{{\]}}, off
-; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
+; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_4]]:[[LOAD_ARG1_7]]], off
+; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]], off
+; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_4]]:[[LOAD_ARG2_7]]], off
+; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off
define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef
declare hidden void @hidden_func(i32 addrspace(1)* %out)
; CHECK-LABEL: call_func:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], func@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], func@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0
-; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]], 0x0
+; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]]
define amdgpu_kernel void @call_func(i32 addrspace(1)* %out) {
call void @func(i32 addrspace(1)* %out)
ret void
}
; CHECK-LABEL: call_protected_func:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], protected_func@rel32@lo+4
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], protected_func@rel32@hi+12
-; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
+; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]]
define amdgpu_kernel void @call_protected_func(i32 addrspace(1)* %out) {
call void @protected_func(i32 addrspace(1)* %out)
ret void
}
; CHECK-LABEL: call_hidden_func:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], hidden_func@rel32@lo+4
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], hidden_func@rel32@hi+12
-; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
+; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]]
define amdgpu_kernel void @call_hidden_func(i32 addrspace(1)* %out) {
call void @hidden_func(i32 addrspace(1)* %out)
ret void
declare i64 @funci()
; CHECK-LABEL: tail_call_func:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], funci@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], funci@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0
-; CHECK: s_setpc_b64 s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]], 0x0
+; CHECK: s_setpc_b64 s[[[ADDR_LO]]:[[ADDR_HI]]]
define i64 @tail_call_func() {
%ret = tail call i64 @funci()
ret i64 %ret
@available_externally = available_externally addrspace(4) global [256 x i32] zeroinitializer
; GCN-LABEL: {{^}}private_test:
-; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
+; GCN: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]]
; Non-R600 OSes use relocations.
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+12
-; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
+; GCN: s_getpc_b64 s[[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]]
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+12
}
; GCN-LABEL: {{^}}available_externally_test:
-; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
+; GCN: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]]
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+12
; R600-LABEL: available_externally_test
; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
; SI-DAG: buffer_load_ushort v[[LO:[0-9]+]],
; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16, i16 addrspace(1)* %in
%ext = zext i16 %a to i64
@external_w_init = addrspace(1) global [256 x i32] zeroinitializer
; CHECK-LABEL: private_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], private@rel32@lo+8
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], private@rel32@hi+16
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]]
}
; CHECK-LABEL: internal_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], internal@rel32@lo+8
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], internal@rel32@hi+16
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]]
}
; CHECK-LABEL: available_externally_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], available_externally@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], available_externally@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @available_externally_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @available_externally, i32 0, i32 1
}
; CHECK-LABEL: linkonce_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], linkonce@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @linkonce_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce, i32 0, i32 1
}
; CHECK-LABEL: weak_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], weak@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @weak_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak, i32 0, i32 1
}
; CHECK-LABEL: common_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], common@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], common@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @common_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @common, i32 0, i32 1
}
; CHECK-LABEL: extern_weak_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], extern_weak@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], extern_weak@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @extern_weak_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @extern_weak, i32 0, i32 1
}
; CHECK-LABEL: linkonce_odr_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce_odr@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], linkonce_odr@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @linkonce_odr_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce_odr, i32 0, i32 1
}
; CHECK-LABEL: weak_odr_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak_odr@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], weak_odr@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @weak_odr_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak_odr, i32 0, i32 1
}
; CHECK-LABEL: external_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], external@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @external_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external, i32 0, i32 1
}
; CHECK-LABEL: external_w_init_test:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external_w_init@gotpcrel32@lo+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], external_w_init@gotpcrel32@hi+12
-; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOTADDR_LO]]:[[GOTADDR_HI]]], 0x0
; CHECK: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO]]:[[ADDR_HI]]], 0x4
define amdgpu_kernel void @external_w_init_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external_w_init, i32 0, i32 1
; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
-; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; SI: buffer_atomic_add v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_add
; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac
; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd
-; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[LOW_K]]:[[HIGH_K]]]{{$}}
+; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[[[LOW_K]]:[[HIGH_K]]]{{$}}
define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
-; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; SIVI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; SIVI: buffer_store_dword v[[RET]]
; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
-; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword v[[RET]]
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
-; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SIVI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SIVI: buffer_store_dword v[[RET]]
; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
-; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword v[[RET]]
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
-; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
-; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
+; CIVI: buffer_atomic_cmpswap_x2 v[[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; CIVI: buffer_store_dwordx2 v[[[RET]]:
; GFX9: global_atomic_cmpswap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
-; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
-; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
-; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
+; CI: buffer_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
+; VI: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; CIVI: buffer_store_dwordx2 v[[[RET]]:
-; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
+; GFX9: global_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
-; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
-; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
+; CIVI: buffer_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; CIVI: buffer_store_dwordx2 v[[[RET]]:
-; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GFX9: global_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) {
entry:
%val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst
}
; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
-; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
-; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:
+; CI: buffer_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; VI: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; CIVI: buffer_store_dwordx2 v[[[RET]]:
-; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GFX9: global_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
; GCN-LABEL: {{^}}atomic_load_i64_neg_offset:
; CI: v_mov_b32_e32 v[[LO:[0-9]+]], 0xffffffe0
; CI: v_mov_b32_e32 v[[HI:[0-9]+]], -1
-; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[[[LO]]:[[HI]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xffffffe0
; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
; uniform load dominated by no-alias store - scalarize
; CHECK-LABEL: @no_memdep_alias_arg
-; CHECK: s_load_dwordx2 s{{\[}}[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]], s[4:5], 0x0
-; CHECK: s_load_dword [[SVAL:s[0-9]+]], s{{\[}}[[IN_LO]]:[[IN_HI]]], 0x0
+; CHECK: s_load_dwordx2 s[[[IN_LO:[0-9]+]]:[[IN_HI:[0-9]+]]], s[4:5], 0x0
+; CHECK: s_load_dword [[SVAL:s[0-9]+]], s[[[IN_LO]]:[[IN_HI]]], 0x0
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
; CHECK: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[VVAL]]
; CHECK: flat_store_dword
; CHECK: v_mov_b32_e32 v[[ADDR_LO:[0-9]+]], s{{[0-9]+}}
; CHECK: v_mov_b32_e32 v[[ADDR_HI:[0-9]+]], s{{[0-9]+}}
-; CHECK: flat_load_dwordx2 [[A_ADDR:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
+; CHECK: flat_load_dwordx2 [[A_ADDR:v\[[0-9]+:[0-9]+\]]], v[[[ADDR_LO]]:[[ADDR_HI]]]
; CHECK: flat_load_dword [[VVAL:v[0-9]+]], [[A_ADDR]]
; CHECK: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[VVAL]]
define amdgpu_kernel void @global_array_alias_store(i32 addrspace(1)* nocapture %out, [8 x i32], i32 %n) {
; ERR: error: <unknown>:0:0: in function illegal_agpr_to_sgpr_copy_v2i32 void (): illegal SGPR to VGPR copy
; GCN-DAG: v_accvgpr_read_b32 v[[COPY1L:[0-9]+]], a0
; GCN-DAG: v_accvgpr_read_b32 v[[COPY1H:[0-9]+]], a1
-; GCN: ; illegal copy v{{\[}}[[COPY1L]]:[[COPY1H]]] to s[10:11]
+; GCN: ; illegal copy v[[[COPY1L]]:[[COPY1H]]] to s[10:11]
define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_v2i32() #1 {
%vgpr = call <2 x i32> asm sideeffect "; def $0", "=${a[0:1]}"()
call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr)
; GCN-NEXT: v_readfirstlane_b32 s[[SREG1:[0-9]+]], v[[VREG1:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG2:[0-9]+]], v[[VREG2:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG3:[0-9]+]], v[[VREG3:[0-9]+]]
-; GCN-NEXT: v_cmp_eq_u64_e32 [[CMP0:vcc]], s{{\[}}[[SREG0]]:[[SREG1]]{{\]}}, v{{\[}}[[VREG0]]:[[VREG1]]{{\]}}
-; GCN-NEXT: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SREG2]]:[[SREG3]]{{\]}}, v{{\[}}[[VREG2]]:[[VREG3]]{{\]}}
+; GCN-NEXT: v_cmp_eq_u64_e32 [[CMP0:vcc]], s[[[SREG0]]:[[SREG1]]], v[[[VREG0]]:[[VREG1]]]
+; GCN-NEXT: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s[[[SREG2]]:[[SREG3]]], v[[[VREG2]]:[[VREG3]]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG4:[0-9]+]], v[[VREG4:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG5:[0-9]+]], v[[VREG5:[0-9]+]]
; GCN-NEXT: s_and_b64 [[AND0:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
-; GCN-NEXT: v_cmp_eq_u64_e32 [[CMP2:vcc]], s{{\[}}[[SREG4]]:[[SREG5]]{{\]}}, v{{\[}}[[VREG4]]:[[VREG5]]{{\]}}
+; GCN-NEXT: v_cmp_eq_u64_e32 [[CMP2:vcc]], s[[[SREG4]]:[[SREG5]]], v[[[VREG4]]:[[VREG5]]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG6:[0-9]+]], v[[VREG6:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG7:[0-9]+]], v[[VREG7:[0-9]+]]
-; GCN-NEXT: v_cmp_eq_u64_e64 [[CMP3:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SREG6]]:[[SREG7]]{{\]}}, v{{\[}}[[VREG6]]:[[VREG7]]{{\]}}
+; GCN-NEXT: v_cmp_eq_u64_e64 [[CMP3:s\[[0-9]+:[0-9]+\]]], s[[[SREG6]]:[[SREG7]]], v[[[VREG6]]:[[VREG7]]]
; GCN-NEXT: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], [[AND0]], [[CMP2]]
; GCN-NEXT: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[AND1]], [[CMP3]]
; GCN-NEXT: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
; GCN-NEXT: s_nop 0
-; GCN-NEXT: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[SREG0]]:[[SREG7]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1
+; GCN-NEXT: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[[[SREG0]]:[[SREG7]]], {{s\[[0-9]+:[0-9]+\]}} dmask:0x1
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: ; implicit-def: $vgpr8_vgpr9
; GCN-NEXT: s_xor_b64 exec, exec, [[SAVE]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG2:[0-9]+]], v[[VREG2:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG3:[0-9]+]], v[[VREG3:[0-9]+]]
-; GCN-NEXT: v_cmp_eq_u64_e32 [[CMP0:vcc]], s{{\[}}[[SREG0]]:[[SREG1]]{{\]}}, v{{\[}}[[VREG0]]:[[VREG1]]{{\]}}
-; GCN-NEXT: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SREG2]]:[[SREG3]]{{\]}}, v{{\[}}[[VREG2]]:[[VREG3]]{{\]}}
+; GCN-NEXT: v_cmp_eq_u64_e32 [[CMP0:vcc]], s[[[SREG0]]:[[SREG1]]], v[[[VREG0]]:[[VREG1]]]
+; GCN-NEXT: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s[[[SREG2]]:[[SREG3]]], v[[[VREG2]]:[[VREG3]]]
; GCN-NEXT: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
; GCN-NEXT: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
; GCN-NEXT: s_nop 0
-; GCN-NEXT: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, s{{\[}}[[SREG0]]:[[SREG3]]{{\]}} dmask:0x1
+; GCN-NEXT: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, s[[[SREG0]]:[[SREG3]]] dmask:0x1
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5
; GCN-NEXT: s_xor_b64 exec, exec, [[SAVE]]
; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
-; GCN-DAG: s_load_dwordx16 s{{\[}}[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]{{\]}}
+; GCN-DAG: s_load_dwordx16 s[[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]]
; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]]
; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62
; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
-; GCN-DAG: s_load_dwordx16 s{{\[}}[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]{{\]}}
+; GCN-DAG: s_load_dwordx16 s[[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]]
; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]]
; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62
; GCN-DAG: v_mov_b32_e32 v[[INS:[0-9]+]], 0x41880000
; MOVREL: v_movreld_b32_e32 v[[ELT0]], v[[INS]]
-; MOVREL: buffer_store_dwordx4 v{{\[}}[[ELT0]]:[[ELT3]]{{\]}}
+; MOVREL: buffer_store_dwordx4 v[[[ELT0]]:[[ELT3]]]
define amdgpu_kernel void @insert_w_offset(<16 x float> addrspace(1)* %out, i32 %in) {
entry:
%add = add i32 %in, 1
; IDXMODE-NEXT: v_mov_b32_e32 v[[ELT0:[0-9]+]], v{{[0-9]+}}
; IDXMODE-NEXT: s_set_gpr_idx_off
-; GCN: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
+; GCN: buffer_store_dwordx4 v[[[ELT0]]:
define amdgpu_kernel void @insert_wo_offset(<16 x float> addrspace(1)* %out, i32 %in) {
entry:
%ins = insertelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, float 17.0, i32 %in
; offset puts outside of superegister bounaries, so clamp to 1st element.
; GCN-LABEL: {{^}}extract_largest_inbounds_offset:
-; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]
+; GCN-DAG: buffer_load_dwordx4 v[[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]
; GCN-DAG: s_load_dword [[IDX0:s[0-9]+]]
; GCN-DAG: s_add_i32 [[IDX:s[0-9]+]], [[IDX0]], 15
}
; GCN-LABEL: {{^}}extract_out_of_bounds_offset:
-; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}}
+; GCN-DAG: buffer_load_dwordx4 v[[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]]
; GCN-DAG: s_load_dword [[IDX:s[0-9]+]]
; GCN: s_add_i32 [[ADD_IDX:s[0-9]+]], [[IDX]], 16
; CHECK-LABEL: {{^}}v_cmp_asm:
; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
-; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]]
+; CHECK: v_cmp_ne_u32_e64 s[[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]], 0, [[SRC]]
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]]
-; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+; CHECK: buffer_store_dwordx2 v[[[V_LO]]:[[V_HI]]]
define amdgpu_kernel void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) {
%sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in)
store i64 %sgpr, i64 addrspace(1)* %out
; GCN-DAG: s_cmp_lg_u32 [[IDX]], 0
; GCN-DAG: s_cselect_b64 [[CC4:[^,]+]], -1, 0
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC4]]
-; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]]
+; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v[[[ELT_FIRST]]:[[ELT_LAST]]]
define amdgpu_kernel void @float4_inselt(<4 x float> addrspace(1)* %out, <4 x float> %vec, i32 %sel) {
entry:
%v = insertelement <4 x float> %vec, float 1.000000e+00, i32 %sel
; GCN-DAG: v_mov_b32_e32 v[[VELT_1:[0-9]+]], s[[ELT_1]]
; GCN-DAG: v_mov_b32_e32 v[[VELT_2:[0-9]+]], s[[ELT_2]]
; GCN-DAG: v_mov_b32_e32 v[[VELT_3:[0-9]+]], s[[ELT_3]]
-; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[VELT_0]]:[[VELT_3]]]
+; GCN: flat_store_dwordx4 v[{{[0-9:]+}}], v[[[VELT_0]]:[[VELT_3]]]
define amdgpu_kernel void @int4_inselt(<4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %sel) {
entry:
%v = insertelement <4 x i32> %vec, i32 1, i32 %sel
; GCN-DAG: s_cmp_lg_u32 [[IDX]], 0
; GCN-DAG: s_cselect_b64 [[CC2:[^,]+]], -1, 0
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC2]]
-; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST]]:[[ELT_LAST]]]
+; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v[[[ELT_FIRST]]:[[ELT_LAST]]]
define amdgpu_kernel void @float2_inselt(<2 x float> addrspace(1)* %out, <2 x float> %vec, i32 %sel) {
entry:
%v = insertelement <2 x float> %vec, float 1.000000e+00, i32 %sel
; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
; GCN-DAG: s_cselect_b64 [[CC8:[^,]+]], -1, 0
; GCN-DAG: v_cndmask_b32_e32 v[[ELT_FIRST1:[0-9]+]], 1.0, v{{[0-9]+}}, [[CC8]]
-; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST0]]:[[ELT_LAST0]]]
-; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[ELT_FIRST1]]:[[ELT_LAST1]]]
+; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v[[[ELT_FIRST0]]:[[ELT_LAST0]]]
+; GCN-DAG: flat_store_dwordx4 v[{{[0-9:]+}}], v[[[ELT_FIRST1]]:[[ELT_LAST1]]]
define amdgpu_kernel void @float8_inselt(<8 x float> addrspace(1)* %out, <8 x float> %vec, i32 %sel) {
entry:
%v = insertelement <8 x float> %vec, float 1.000000e+00, i32 %sel
; GCN: s_mov_b32 s[[KLO:[0-9]+]], 0x3c003c00
; GCN: s_mov_b32 s[[KHI:[0-9]+]], s[[KLO]]
; GCN: s_andn2_b64
-; GCN: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{\[}}[[KLO]]:[[KHI]]]
+; GCN: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[[[KLO]]:[[KHI]]]
; GCN: s_or_b64
define amdgpu_kernel void @half4_inselt(<4 x half> addrspace(1)* %out, <4 x half> %vec, i32 %sel) {
entry:
; GCN: s_mov_b32 s[[KLO:[0-9]+]], 0x10001
; GCN: s_mov_b32 s[[KHI:[0-9]+]], s[[KLO]]
; GCN: s_andn2_b64
-; GCN: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{\[}}[[KLO]]:[[KHI]]]
+; GCN: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[[[KLO]]:[[KHI]]]
; GCN: s_or_b64
define amdgpu_kernel void @short4_inselt(<4 x i16> addrspace(1)* %out, <4 x i16> %vec, i32 %sel) {
entry:
}
; GCN-LABEL: {{^}}test_merge_store_constant_i16_invariant_constant_pointer_load:
-; GCN: s_load_dwordx2 s{{\[}}[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]{{\]}}
+; GCN: s_load_dwordx2 s[[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]]
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
-; GCN: buffer_store_dword [[K]], off, s{{\[}}[[SPTR_LO]]:
+; GCN: buffer_store_dword [[K]], off, s[[[SPTR_LO]]:
define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(4)* dereferenceable(4096) nonnull %in) #0 {
%ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(4)* %in, !invariant.load !0
%ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1
; GCN-LABEL: {{^}}byref_constant_32bit_i32_arg:
; GCN: s_add_i32 s[[PTR_LO:[0-9]+]], s4, 8
; GCN: s_mov_b32 s[[PTR_HI:[0-9]+]], 0{{$}}
-; GCN: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x0{{$}}
+; GCN: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) %in.byref) {
%in = load i32, i32 addrspace(6)* %in.byref
store i32 %in, i32 addrspace(1)* %out, align 4
; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
+; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
store i64 %result, i64* %out
; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
-; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
+; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
+; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32 glc{{$}}
define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 {
%gep = getelementptr i64, i64* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
+; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
-; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
+; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
+; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32{{$}}
define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind {
%gep = getelementptr i64, i64* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64:
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
-; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
+; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
+; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40 glc{{$}}
define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, i64* %ptr, i32 %id
; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64:
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
-; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
+; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
+; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40{{$}}
define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, i64* %ptr, i32 %id
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
+; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}}
define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
store i64 %result, i64 addrspace(1)* %out
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
+; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32
define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
+; GCN: ds_dec_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}}
define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
+; GCN: ds_dec_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32{{$}}
define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
-; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
store i64 %result, i64 addrspace(1)* %out
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
-; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
+; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
+; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX9: global_atomic_dec_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}}
+; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GFX9: global_atomic_dec_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]$}}
define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
-; GFX9: global_atomic_dec_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
+; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
+; GFX9: global_atomic_dec_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
-; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
+; CI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
+; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
-; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
+; CI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
+; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]]{{$}}
define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
+; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}}
define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
store i64 %result, i64 addrspace(1)* %out
; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
+; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32
define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
+; GCN: ds_inc_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}}
define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
+; GCN: ds_inc_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32{{$}}
define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
-; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
+; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
store i64 %result, i64 addrspace(1)* %out
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
-; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
+; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
+; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}}
+; GFX9: global_atomic_inc_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]$}}
define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
-; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
+; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
+; GFX9: global_atomic_inc_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
-; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
+; CI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
+; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
-; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
+; CI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
+; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]]{{$}}
define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
+; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
store i64 %result, i64* %out
; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
-; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
+; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
+; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32 glc{{$}}
define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
%gep = getelementptr i64, i64* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
+; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
ret void
; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset:
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
-; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
+; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
+; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32{{$}}
define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
%gep = getelementptr i64, i64* %ptr, i32 4
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64:
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
-; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
+; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
+; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40 glc{{$}}
define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, i64* %ptr, i32 %id
; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64:
; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
-; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
+; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
+; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40{{$}}
define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, i64* %ptr, i32 %id
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xy:
-; UNPACKED: buffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; UNPACKED: buffer_load_format_d16_xy v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
; PACKED: buffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xyz:
-; UNPACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; UNPACKED: buffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; PACKED: buffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
define amdgpu_ps half @buffer_load_format_d16_xyz(<4 x i32> inreg %rsrc) {
main_body:
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw:
-; UNPACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; UNPACKED: buffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; PACKED: buffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
define amdgpu_ps half @buffer_load_format_d16_xyzw(<4 x i32> inreg %rsrc) {
main_body:
; CHECK-LABEL: {{^}}no_fold_fi_reg_soffset:
; CHECK-DAG: v_mov_b32_e32 v[[FI:[0-9]+]], 4{{$}}
; CHECK-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s
-; CHECK: buffer_load_dword v0, v{{\[}}[[FI]]:[[HI]]
+; CHECK: buffer_load_dword v0, v[[[FI]]:[[HI]]
define amdgpu_ps float @no_fold_fi_reg_soffset(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
%alloca = alloca i32, addrspace(5)
%alloca.cast = ptrtoint i32 addrspace(5)* %alloca to i32
; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}}
; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: buffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; UNPACKED: buffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; PACKED: buffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %index) {
}
; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; UNPACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; UNPACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-; PACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; PACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
main_body:
call void @llvm.amdgcn.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0)
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}s_cvt_pk_i16_i32:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[SX:[0-9]+]]:[[SY:[0-9]+]]{{\]}}, s[0:1], 0x{{b|2c}}
+; GCN-DAG: s_load_dwordx2 s[[[SX:[0-9]+]]:[[SY:[0-9]+]]], s[0:1], 0x{{b|2c}}
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[SY]]
; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, s[[SX]], [[VY]]
; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, s[[SX]], [[VY]]
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}s_cvt_pk_u16_u32:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[SX:[0-9]+]]:[[SY:[0-9]+]]{{\]}}, s[0:1], 0x{{b|2c}}
+; GCN-DAG: s_load_dwordx2 s[[[SX:[0-9]+]]:[[SY:[0-9]+]]], s[0:1], 0x{{b|2c}}
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[SY]]
; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, s[[SX]], [[VY]]
; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, s[[SX]], [[VY]]
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}s_cvt_pknorm_i16_f32:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[SX:[0-9]+]]:[[SY:[0-9]+]]{{\]}}, s[0:1], 0x{{b|2c}}
+; GCN-DAG: s_load_dwordx2 s[[[SX:[0-9]+]]:[[SY:[0-9]+]]], s[0:1], 0x{{b|2c}}
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[SY]]
; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, s[[SX]], [[VY]]
; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, s[[SX]], [[VY]]
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}s_cvt_pknorm_u16_f32:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[SX:[0-9]+]]:[[SY:[0-9]+]]{{\]}}, s[0:1], 0x{{b|2c}}
+; GCN-DAG: s_load_dwordx2 s[[[SX:[0-9]+]]:[[SY:[0-9]+]]], s[0:1], 0x{{b|2c}}
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[SY]]
; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, s[[SX]], [[VY]]
; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, s[[SX]], [[VY]]
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @dispatch_id(i64 addrspace(1)* %out) #0 {
%tmp0 = call i64 @llvm.amdgcn.dispatch.id()
store i64 %tmp0, i64 addrspace(1)* %out
}
; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_1:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13
+; SI-DAG: s_load_dwordx2 s[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x1d
; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v[[[VA_LO]]:[[VA_HI]]]
; SI: buffer_store_dwordx2 [[RESULT0]]
; SI: s_endpgm
define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_2:
; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
-; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x1d
+; SI-DAG: s_load_dwordx2 s[[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]], {{s\[[0-9]+:[0-9]+\]}}, 0x1d
; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v[[[VB_LO]]:[[VB_HI]]], [[A]]
; SI: buffer_store_dwordx2 [[RESULT0]]
; SI: s_endpgm
define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val:
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000
-; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, v[0:1], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]], v[0:1], s[[[K_LO]]:[[K_HI]]]
define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* %out) #0 {
%result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
%result0 = extractvalue { double, i1 } %result, 0
; FIXME: Should be able to shift directly into m0
; GCN-LABEL: {{^}}gws_barrier_sgpr_offset:
-; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
+; NOLOOP-DAG: s_load_dwordx2 s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; Variable offset in SGPR with constant add
; GCN-LABEL: {{^}}gws_barrier_sgpr_offset_add1:
-; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
+; NOLOOP-DAG: s_load_dwordx2 s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; FIXME: Should be able to shift directly into m0
; GCN-LABEL: {{^}}gws_init_sgpr_offset:
-; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
+; NOLOOP-DAG: s_load_dwordx2 s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; Variable offset in SGPR with constant add
; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1:
-; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
+; NOLOOP-DAG: s_load_dwordx2 s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}is_private_vgpr:
-; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]{{\]}}
+; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}is_local_vgpr:
-; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]{{\]}}
+; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
; GFX90A-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GFX90A-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
-; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32
; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) #0 {
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
; GCN-NOT: v_accvgpr_read_b32
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 0x40004000
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 0x3c003c00
; NOLIT-SRCC-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
-; NOLIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], a[{{[0-9:]+}}]
-; LIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], 1.0
-; GFX90A: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9]+}}], v{{\[}}[[TWO]]:{{[0-9]+}}], 1.0
+; NOLIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9:]+}}]
+; LIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0
+; GFX90A: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0
; GFX908-COUNT-16: v_accvgpr_read_b32
; GFX908: global_store_dwordx4
; GFX90A-NOT: v_accvgpr_read_b32
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xy:
-; UNPACKED: buffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; UNPACKED: buffer_load_format_d16_xy v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
; PACKED: buffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xyz:
-; UNPACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; UNPACKED: buffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; PACKED: buffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
define amdgpu_ps half @buffer_load_format_d16_xyz(<4 x i32> inreg %rsrc) {
main_body:
%data = call <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw:
-; UNPACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; UNPACKED: buffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; PACKED: buffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
define amdgpu_ps half @buffer_load_format_d16_xyzw(<4 x i32> inreg %rsrc) {
main_body:
; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}}
; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: buffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+; UNPACKED: buffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
; PACKED: buffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %voffset) {
}
; GCN-LABEL: {{^}}buffer_store_format_d16_xyz:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]]
-; UNPACKED: buffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+; UNPACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
; PACKED: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]]
-; PACKED: buffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) {
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
}
; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; UNPACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+; UNPACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-; PACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+; PACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) {
main_body:
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
}
; GCN-LABEL: {{^}}tbuffer_load_d16_xy:
-; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
}
; GCN-LABEL: {{^}}tbuffer_load_d16_xyz:
-; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
-; GFX10-UNPACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; GFX10-UNPACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
-; GFX10-PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
+; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; GFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw:
-; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
-; GFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; GFX10-UNPACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
-; GFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
+; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; GFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
main_body:
; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}}
; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; PREGFX10-UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
; PREGFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
; GFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED]
}
; GCN-LABEL: {{^}}tbuffer_store_d16_xyz:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}},
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}},
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]]
-; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyz v[[[LO]]:[[HI]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
; PACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]]
-; PREGFX10-PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
-; GFX10-PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED]
+; PREGFX10-PACKED: tbuffer_store_format_d16_xyz v[[[LO]]:[[HI]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
+; GFX10-PACKED: tbuffer_store_format_d16_xyz v[[[LO]]:[[HI]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED]
define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data) {
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
}
; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}},
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}},
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-; PREGFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
-; GFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED]
+; PREGFX10-PACKED: tbuffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED]
+; GFX10-PACKED: tbuffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED]
define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data) {
main_body:
call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
; VI-DAG: s_mov_b32 s[[HIGH1:[0-9]+]], 0x7fefffff
; VI-DAG: s_mov_b32 s[[HIGH2:[0-9]+]], 0xffefffff
; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}
-; VI-DAG: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
-; VI-DAG: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW1]]:[[HIGH2]]]
+; VI-DAG: v_min_f64 v[0:1], [[RSQ]], s[[[LOW1]]:[[HIGH1]]]
+; VI-DAG: v_max_f64 v[0:1], v[0:1], s[[[LOW1]]:[[HIGH2]]]
define amdgpu_kernel void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 {
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
store double %rsq_clamp, double addrspace(1)* %out
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xy:
-; UNPACKED: buffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; UNPACKED: buffer_load_format_d16_xy v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
; PACKED: buffer_load_format_d16_xy v[[FULL:[0-9]+]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xyz:
-; UNPACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; UNPACKED: buffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: buffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; PACKED: buffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
define amdgpu_ps half @buffer_load_format_d16_xyz(<4 x i32> inreg %rsrc) {
main_body:
}
; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw:
-; UNPACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; UNPACKED: buffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; PACKED: buffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
define amdgpu_ps half @buffer_load_format_d16_xyzw(<4 x i32> inreg %rsrc) {
main_body:
; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}}
; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: buffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; UNPACKED: buffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; PACKED: buffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %index) {
}
; GCN-LABEL: {{^}}buffer_store_format_d16_xyz:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]]
-; UNPACKED: buffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; UNPACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; PACKED: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]]
-; PACKED: buffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
}
; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; UNPACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; UNPACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-; PACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+; PACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
main_body:
call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
; GCN-LABEL: {{^}}tbuffer_load_d16_xy:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
; GCN-LABEL: {{^}}tbuffer_load_d16_xyz:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
-; GFX10-PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
+; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
+; GFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
main_body:
; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
-; GFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
+; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
+; GFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
main_body:
; GCN-LABEL: {{^}}tbuffer_store_d16_x:
; GCN-DAG: s_load_dwordx4
-; GCN-DAG: s_load_dword{{[x0-2]*}} s{{\[}}[[S_LO:[0-9]+]]
+; GCN-DAG: s_load_dword{{[x0-2]*}} s[[[S_LO:[0-9]+]]
; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]]
; PREGFX10: tbuffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
; GFX10: tbuffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}}
; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; PREGFX10-UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
; PREGFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
; GFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
}
; GCN-LABEL: {{^}}tbuffer_store_d16_xyz:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]]
-; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
; PACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]]
-; PREGFX10-PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
-; GFX10-PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
+; PREGFX10-PACKED: tbuffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; GFX10-PACKED: tbuffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
}
; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-; PREGFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
-; GFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
+; PREGFX10-PACKED: tbuffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; GFX10-PACKED: tbuffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
main_body:
call void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
}
; GCN-LABEL: {{^}}tbuffer_load_d16_xy:
-; UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; UNPACKED: tbuffer_load_format_d16_xy v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
; PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
}
; GCN-LABEL: {{^}}tbuffer_load_d16_xyz:
-; UNPACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; UNPACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: tbuffer_load_format_d16_xyz v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw:
-; UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; UNPACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
+; PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
main_body:
; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}}
; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; UNPACKED: tbuffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
; PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %vindex) {
}
; GCN-LABEL: {{^}}tbuffer_store_d16_xyz:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; UNPACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; UNPACKED: tbuffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
; PACKED-DAG: s_and_b32 [[SHR0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR0]]
-; PACKED: tbuffer_store_format_d16_xyz v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; PACKED: tbuffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <3 x half> %data, i32 %vindex) {
main_body:
call void @llvm.amdgcn.tbuffer.store.v3f16(<3 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0)
}
; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; UNPACKED: tbuffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-; PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
+; PACKED: tbuffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
main_body:
call void @llvm.amdgcn.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0)
}
; GCN-LABEL: {{^}}update_dpp64_test:
-; GCN: load_dwordx2 v{{\[}}[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]]
+; GCN: load_dwordx2 v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]]
; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i64 %in2) {
; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x7047
; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_LO:[0-9]+]], 0x3afaedd9
; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_HI:[0-9]+]], 0x7047
-; GCN-DAG: load_dwordx2 v{{\[}}[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]]
+; GCN-DAG: load_dwordx2 v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]]
; GCN-OPT-DAG: v_mov_b32_dpp v[[OLD_LO]], v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
; GCN-OPT-DAG: v_mov_b32_dpp v[[OLD_HI]], v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
}
; GCN-LABEL: {{^}}fma_v4f16
-; GCN: buffer_load_dwordx2 v{{\[}}[[A_V4_F16_LO:[0-9]+]]:[[A_V4_F16_HI:[0-9]+]]{{\]}}
-; GCN: buffer_load_dwordx2 v{{\[}}[[B_V4_F16_LO:[0-9]+]]:[[B_V4_F16_HI:[0-9]+]]{{\]}}
-; GCN: buffer_load_dwordx2 v{{\[}}[[C_V4_F16_LO:[0-9]+]]:[[C_V4_F16_HI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[A_V4_F16_LO:[0-9]+]]:[[A_V4_F16_HI:[0-9]+]]]
+; GCN: buffer_load_dwordx2 v[[[B_V4_F16_LO:[0-9]+]]:[[B_V4_F16_HI:[0-9]+]]]
+; GCN: buffer_load_dwordx2 v[[[C_V4_F16_LO:[0-9]+]]:[[C_V4_F16_HI:[0-9]+]]]
; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V4_F16_LO]]
; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_0:[0-9]+]], 16, v[[A_V4_F16_LO]]
; GFX9-DAG: v_pk_fma_f16 v[[R_V4_F16_LO:[0-9]+]], v[[A_V4_F16_LO]], v[[B_V4_F16_LO]], v[[C_V4_F16_LO]]
; GFX9-DAG: v_pk_fma_f16 v[[R_V4_F16_HI:[0-9]+]], v[[A_V4_F16_HI]], v[[B_V4_F16_HI]], v[[C_V4_F16_HI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[R_V4_F16_LO]]:[[R_V4_F16_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[R_V4_F16_LO]]:[[R_V4_F16_HI]]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v4f16(
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
-; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
-; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; GCN-HSA-DAG: {{flat|global}}_load_dword v[[LO:[0-9]+]],
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
-; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]]
+; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
+; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
define amdgpu_kernel void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]]
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
+; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
; EG: MEM_RAT
; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]]
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
+; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
%ext = sext <1 x i32> %ld to <1 x i64>
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
-; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
-; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+; GCN: ds_write_b64 v{{[0-9]+}}, v[[[LO]]:[[HI]]]
; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+; GCN: ds_write_b64 v{{[0-9]+}}, v[[[LO]]:[[HI]]]
; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
; FUNC-LABEL: {{^}}local_load_v16i8:
; GFX9-NOT: m0
-; GCN: ds_read2_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
-; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset1:1{{$}}
+; GCN: ds_read2_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]], v{{[0-9]+}} offset1:1{{$}}
+; GCN: ds_write2_b64 v{{[0-9]+}}, v[[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]] offset1:1{{$}}
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]],
-; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+; GCN: ds_write_b64 v{{[0-9]+}}, v[[[LO]]:[[HI]]]
; EG: LDS_UBYTE_READ_RET
; EG: MOV {{.*}}, literal
; GCN: ds_read_i8 v[[LO:[0-9]+]],
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: ds_write_b64 v{{[0-9]+}}, v[[[LO]]:[[HI]]]
; EG: LDS_UBYTE_READ_RET
; EG: ASHR
; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
+; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
-; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
-; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
+; GCN: ds_add_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] offset:32
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
-; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: ds_add_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
-; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: ds_sub_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) {
; Check loads of %q are scheduled ahead of that store of the memcpy on %p.
; CHECK-LABEL: test_memcpy:
-; CHECK-DAG: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off
+; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16
; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off
define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) {
; Check loads of %q are scheduled ahead of that store of the memcpy on %p.
; CHECK-LABEL: test_memcpy_inline:
-; CHECK-DAG: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off
+; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16
; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
; CHECK: global_store_dwordx4 v[0:1], [[PVAL]], off
define i32 @test_memmove(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) {
; Check loads of %q are scheduled ahead of that store of the memmove on %p.
; CHECK-LABEL: test_memmove:
-; CHECK-DAG: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off
+; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
; CHECK-DAG: global_load_dwordx4 [[PVAL:v\[[0-9]+:[0-9]+\]]], v[0:1], off offset:16
; CHECK-DAG: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
; CHECK: global_store_dwordx4 v[0:1], [[PVAL]]
define i32 @test_memset(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) {
; Check loads of %q are scheduled ahead of that store of the memset on %p.
; CHECK-LABEL: test_memset:
-; CHECK-DAG: global_load_dwordx2 v{{\[}}[[Q0:[0-9]+]]:[[Q1:[0-9]+]]{{\]}}, v[2:3], off
+; CHECK-DAG: global_load_dwordx2 v[[[Q0:[0-9]+]]:[[Q1:[0-9]+]]], v[2:3], off
; CHECK-DAG: v_mov_b32_e32 v[[PVAL:[0-9]+]], 0xaaaaaaaa
-; CHECK: global_store_dwordx4 v[0:1], v{{\[}}[[PVAL]]{{:[0-9]+\]}}, off
+; CHECK: global_store_dwordx4 v[0:1], v[[[PVAL]]{{:[0-9]+\]}}, off
; CHECK: v_add_nc_u32_e32 v{{[0-9]+}}, v[[Q0]], v[[Q1]]
; CHECK: s_setpc_b64 s[30:31]
%p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)*
; GCN-LABEL: {{^}}merge_global_store_2_constants_i32:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b
-; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x1c8{{$}}
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}}
-; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
; GCN-LABEL: {{^}}merge_global_store_5_constants_i32:
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 9{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI4:[0-9]+]], -12{{$}}
-; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}}
+; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI4]]]
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}}
; GCN: buffer_store_dword v[[HI]]
define amdgpu_kernel void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) {
; SI-DAG: ds_read_b64
; SI-DAG: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
; SI-DAG: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
-; SI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
+; SI-DAG: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]]
; SI-DAG: buffer_store_dword
; SI-DAG: buffer_store_dword
; SI: s_endpgm
; FIXME: We should be able to use the SGPR directly as src0 to v_add_i32
; GCN-LABEL: {{^}}clobber_vgpr_pair_pointer_add:
-; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}}
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[ARG1LO:[0-9]+]]:[[ARG1HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v[[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]]
+; GCN-DAG: s_load_dwordx2 s[[[ARG1LO:[0-9]+]]:[[ARG1HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VARG1LO:[0-9]+]], s[[ARG1LO]]
; GCN-DAG: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]]
; GCN: v_add_i32_e32 v[[PTRLO:[0-9]+]], vcc, v[[LDPTRLO]], v[[VARG1LO]]
; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]]
-; GCN: buffer_load_ubyte v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}},
+; GCN: buffer_load_ubyte v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]],
define amdgpu_kernel void @clobber_vgpr_pair_pointer_add(i64 %arg1, [8 x i32], i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 {
bb:
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W64: v_cmp_eq_u64_e32 vcc, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W64: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W64: s_xor_b64 exec, exec, [[AND]]
; W64: s_cbranch_execnz [[LOOPBB]]
; W64: s_mov_b64 exec, [[SAVEEXEC]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W32: v_cmp_eq_u64_e32 vcc_lo, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W32: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB]]
; W32: s_mov_b32 exec_lo, [[SAVEEXEC]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W64: v_cmp_eq_u64_e32 vcc, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W64: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W64: s_xor_b64 exec, exec, [[SAVE]]
; W64: s_cbranch_execnz [[LOOPBB0]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W64: v_cmp_eq_u64_e32 vcc, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: buffer_load_format_x [[RES1:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W64: buffer_load_format_x [[RES1:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W64: s_xor_b64 exec, exec, [[SAVE]]
; W64: s_cbranch_execnz [[LOOPBB1]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W32: v_cmp_eq_u64_e32 vcc_lo, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W32: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB0]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W32: v_cmp_eq_u64_e32 vcc_lo, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: buffer_load_format_x [[RES1:v[0-9]+]], v8, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W32: buffer_load_format_x [[RES1:v[0-9]+]], v8, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB1]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W64: v_cmp_eq_u64_e32 vcc, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W64: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W64: s_xor_b64 exec, exec, [[SAVE]]
; W64: s_cbranch_execnz [[LOOPBB0]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W64: v_cmp_eq_u64_e32 vcc, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: buffer_load_format_x [[RES]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W64: buffer_load_format_x [[RES]], [[IDX]], s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W64: s_xor_b64 exec, exec, [[SAVE]]
; W64: s_cbranch_execnz [[LOOPBB1]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W32: v_cmp_eq_u64_e32 vcc_lo, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W32: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB0]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]]
; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]]
-; W32: v_cmp_eq_u64_e32 vcc_lo, s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
-; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
+; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: buffer_load_format_x [[RES]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
+; W32: buffer_load_format_x [[RES]], [[IDX]], s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB1]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]]
-; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
+; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP3:[0-9]+]], v[[VRSRC3]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC2:[0-9]+]], s[[SRSRCTMP2]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]]
-; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]]
; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]]
; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]]
; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen
+; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[S0]]:[[S3]]], {{.*}} idxen
; W64-O0: s_waitcnt vmcnt(0)
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
; W64-O0: s_xor_b64 exec, exec, [[SAVE]]
; W64-O0: ; %bb.{{[0-9]+}}: ; %bb1
; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
-; W64-O0-DAG: s_mov_b64 s{{\[}}[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]{{\]}}, exec
+; W64-O0-DAG: s_mov_b64 s[[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]], exec
; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]]
; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]]
-; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
+; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP3:[0-9]+]], v[[VRSRC3]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC2:[0-9]+]], s[[SRSRCTMP2]]
; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]]
-; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
+; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]]
; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]]
; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]]
; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]]
; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen
+; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[S0]]:[[S3]]], {{.*}} idxen
; W64-O0: s_waitcnt vmcnt(0)
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
; W64-O0: s_xor_b64 exec, exec, [[SAVE]]
; W64-O0: v_readlane_b32 s[[SAVEEXEC0:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX0]]
; W64-O0: v_readlane_b32 s[[SAVEEXEC1:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX1]]
-; W64-O0: s_mov_b64 exec, s{{\[}}[[SAVEEXEC0]]:[[SAVEEXEC1]]{{\]}}
+; W64-O0: s_mov_b64 exec, s[[[SAVEEXEC0]]:[[SAVEEXEC1]]]
; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF]] ; 4-byte Folded Spill
; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: @volatile_load
-; GCN: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
+; GCN: s_load_dwordx2 s[[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x0
; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
-; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO_VREG]]:[[HI_VREG]]]
define amdgpu_kernel void @volatile_load(i32 addrspace(1)* %arg, [8 x i32], i32 addrspace(1)* nocapture %arg1) {
bb:
; CHECK-DAG: s_addc_u32 [[HI:s[0-9]+]], s{{[0-9]+}}, 0
; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[LO]]
; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
-; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}},
+; CHECK: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]],
define amdgpu_kernel void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) #1 {
entry:
; A subregister use operand should not be tied.
; CHECK-LABEL: {{^}}no_fold_tied_subregister:
-; CHECK: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; CHECK: buffer_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
; CHECK: buffer_store_dword v[[LO]]
define amdgpu_kernel void @no_fold_tied_subregister() #1 {
}
; FUNC-LABEL: {{^}}scalar_or_literal_i64:
-; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
+; SI: s_load_dwordx2 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
; SI-DAG: s_or_b32 s[[RES_HI:[0-9]+]], s[[HI]], 0xf237b
; SI-DAG: s_or_b32 s[[RES_LO:[0-9]+]], s[[LO]], 0x3039
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]]
}
; FUNC-LABEL: {{^}}scalar_or_literal_multi_use_i64:
-; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
+; SI: s_load_dwordx2 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xf237b
; SI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x3039
-; SI: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; SI: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]]
; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_LO]]
; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_HI]]
}
; FUNC-LABEL: {{^}}scalar_or_inline_imm_i64:
-; SI: s_load_dwordx2 s{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
+; SI: s_load_dwordx2 s[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
; SI-NOT: or_b32
; SI: s_or_b32 s[[VAL_LO]], s[[VAL_LO]], 63
; SI-NOT: or_b32
; SI-NOT: or_b32
; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]]
; SI-NOT: or_b32
-; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
define amdgpu_kernel void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
%or = or i64 %a, 63
store i64 %or, i64 addrspace(1)* %out
; SI-DAG: s_or_b32 [[VAL]], [[VAL]], -8
; SI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], -1{{$}}
; SI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[VAL]]
-; SI: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[V_LO]]:[[V_HI]]]
define amdgpu_kernel void @scalar_or_neg_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
%or = or i64 %a, -8
store i64 %or, i64 addrspace(1)* %out
}
; FUNC-LABEL: {{^}}vector_or_i64_loadimm:
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
; SI: s_endpgm
; FIXME: The or 0 should really be removed.
; FUNC-LABEL: {{^}}vector_or_i64_imm:
-; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI: v_or_b32_e32 v[[LO_RESULT:[0-9]+]], 8, v[[LO_VREG]]
; SI-NOT: v_or_b32_e32 {{v[0-9]+}}, 0
-; SI: buffer_store_dwordx2 v{{\[}}[[LO_RESULT]]:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[LO_RESULT]]:[[HI_VREG]]]
; SI: s_endpgm
define amdgpu_kernel void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
; SI-DAG: buffer_load_dword v[[LO_VREG:[0-9]+]]
; SI-DAG: v_or_b32_e32 v[[RES_LO:[0-9]+]], -8, v[[LO_VREG]]
; SI-DAG: v_mov_b32_e32 v[[RES_HI:[0-9]+]], -1{{$}}
-; SI: buffer_store_dwordx2 v{{\[}}[[RES_LO]]:[[RES_HI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[RES_LO]]:[[RES_HI]]]
; SI: s_endpgm
define amdgpu_kernel void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
; GCN-LABEL: {{^}}fadd_v2_v_imm:
; GCN: s_mov_b32 s[[K:[0-9]+]], 0x42c80000
; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, s[[K]], v{{[0-9]+}}
-; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s{{\[}}[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
+; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
define amdgpu_kernel void @fadd_v2_v_imm(<2 x float> addrspace(1)* %a) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i32 %id
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
; GFX90A-DAG: s_mov_b32 s[[LO:[0-9]+]], 0
; GFX90A-DAG: s_mov_b32 s[[HI:[0-9]+]], 1.0
-; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s{{\[}}[[LO]]:[[HI]]]{{$}}
+; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[LO]]:[[HI]]]{{$}}
define amdgpu_kernel void @fadd_v2_v_lit_lo0(<2 x float> addrspace(1)* %a) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i32 %id
; GCN-LABEL: {{^}}fmul_v2_v_imm:
; GCN: s_mov_b32 s[[K:[0-9]+]], 0x42c80000
; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, s[[K]], v{{[0-9]+}}
-; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s{{\[}}[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
+; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
define amdgpu_kernel void @fmul_v2_v_imm(<2 x float> addrspace(1)* %a) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i32 %id
; GCN-DAG: s_mov_b32 s[[K1:[0-9]+]], 0x42c80000
; GCN-DAG: v_mov_b32_e32 v[[K2:[0-9]+]], 0x43480000
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s[[K1]], v[[K2]]
-; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s{{\[}}[[K1]]:{{[0-9:]+}}], v{{\[}}[[K2]]:{{[0-9:]+}}] op_sel_hi:[1,0,0]{{$}}
+; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K1]]:{{[0-9:]+}}], v[[[K2]]:{{[0-9:]+}}] op_sel_hi:[1,0,0]{{$}}
define amdgpu_kernel void @fma_v2_v_imm(<2 x float> addrspace(1)* %a) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i32 %id
; shifted down to the end of the used registers.
; GCN-LABEL: {{^}}store_to_undef:
-; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
-; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, 0 offen{{$}}
+; OPT-DAG: s_mov_b64 s[[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
+; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3]
+; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offen{{$}}
; -O0 should assume spilling, so the input scratch resource descriptor
; -should be used directly without any copies.
}
; GCN-LABEL: {{^}}store_to_inttoptr:
-; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
-; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, 0 offset:124{{$}}
+; OPT-DAG: s_mov_b64 s[[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
+; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3]
+; OPT: buffer_store_dword v{{[0-9]+}}, off, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offset:124{{$}}
define amdgpu_kernel void @store_to_inttoptr() #0 {
store volatile i32 0, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*)
ret void
}
; GCN-LABEL: {{^}}load_from_undef:
-; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
-; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, 0 offen glc{{$}}
+; OPT-DAG: s_mov_b64 s[[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
+; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3]
+; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offen glc{{$}}
define amdgpu_kernel void @load_from_undef() #0 {
%ld = load volatile i32, i32 addrspace(5)* undef
ret void
}
; GCN-LABEL: {{^}}load_from_inttoptr:
-; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
-; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
-; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, 0 offset:124 glc{{$}}
+; OPT-DAG: s_mov_b64 s[[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
+; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3]
+; OPT: buffer_load_dword v{{[0-9]+}}, off, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offset:124 glc{{$}}
define amdgpu_kernel void @load_from_inttoptr() #0 {
%ld = load volatile i32, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*)
ret void
; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}}
-; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @private_elt_size_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}}
-; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @private_elt_size_f64(double addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
; GCN-NOT: buffer_
; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200
; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00
-; GCN: v_lshrrev_b64 v[{{[0-9:]+}}], v{{[0-9]+}}, s{{\[}}[[SL]]:[[SH]]]
+; GCN: v_lshrrev_b64 v[{{[0-9:]+}}], v{{[0-9]+}}, s[[[SL]]:[[SH]]]
; OPT: %gep = getelementptr inbounds <4 x half>, <4 x half> addrspace(5)* %alloca, i32 0, i32 %sel2
; OPT: store <4 x half> <half 0xH3C00, half 0xH4000, half 0xH4200, half 0xH4400>, <4 x half> addrspace(5)* %alloca, align 2
; GCN-NOT: buffer_
; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x40003
; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x20001
-; GCN: v_lshrrev_b64 v[{{[0-9:]+}}], v{{[0-9]+}}, s{{\[}}[[SL]]:[[SH]]]
+; GCN: v_lshrrev_b64 v[{{[0-9:]+}}], v{{[0-9]+}}, s[[[SL]]:[[SH]]]
; OPT: %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(5)* %alloca, i32 0, i32 %sel2
; OPT: store <4 x i16> <i16 1, i16 2, i16 3, i16 4>, <4 x i16> addrspace(5)* %alloca, align 2
; CHECK-LABEL: {{^}}test_read_exec:
; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], exec_lo
; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], exec_hi
-; CHECK: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; CHECK: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_read_exec(i64 addrspace(1)* %out) #0 {
%exec = call i64 @llvm.read_register.i64(metadata !1)
store i64 %exec, i64 addrspace(1)* %out
; CHECK-LABEL: {{^}}test_read_flat_scratch:
; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], flat_scratch_lo
; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], flat_scratch_hi
-; CHECK: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; CHECK: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @test_read_flat_scratch(i64 addrspace(1)* %out) #0 {
%flat_scratch = call i64 @llvm.read_register.i64(metadata !2)
store i64 %flat_scratch, i64 addrspace(1)* %out
; GETREG-SDAG-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
; GETREG-DAG: s_getreg_b32 [[CNT1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES, 0, 20)
; GETREG-DAG: v_mov_b32_e32 v[[VCNT1:[0-9]+]], [[CNT1]]
-; GETREG: global_store_dwordx2 v{{.+}}, v{{\[}}[[VCNT1]]:[[ZERO]]]
+; GETREG: global_store_dwordx2 v{{.+}}, v[[[VCNT1]]:[[ZERO]]]
; GETREG: s_getreg_b32 [[CNT2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES, 0, 20)
; GETREG: v_mov_b32_e32 v[[VCNT2:[0-9]+]], [[CNT2]]
-; GETREG: global_store_dwordx2 v{{.+}}, v{{\[}}[[VCNT2]]:[[ZERO]]]
+; GETREG: global_store_dwordx2 v{{.+}}, v[[[VCNT2]]:[[ZERO]]]
define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
%cycle0 = call i64 @llvm.readcyclecounter()
@g = protected local_unnamed_addr addrspace(4) externally_initialized global i32 0, align 4
; CHECK-LABEL: rel32_neg_offset:
-; CHECK: s_getpc_b64 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{]}}
+; CHECK: s_getpc_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]{{]}}
; CHECK-NEXT: s_add_u32 s[[LO]], s[[LO]], g@rel32@lo-4
; CHECK-NEXT: s_addc_u32 s[[HI]], s[[HI]], g@rel32@hi+4
define i32 addrspace(4)* @rel32_neg_offset() {
; GCN-LABEL: {{^}}multi_use:
; GCN-DAG: v_mov_b32_e32 v[[LO:4[0-9]+]], s30
; GCN-DAG: v_mov_b32_e32 v[[HI:4[0-9]+]], s31
-; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
; GCN: s_swappc_b64
-; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define void @multi_use() nounwind {
entry:
%ret0 = tail call i8* @llvm.returnaddress(i32 0)
; SI-LABEL: {{^}}s_movk_i32_k0:
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k1:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k2:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k3:
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k4:
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k5:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0xffef{{$}}
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0xff00ffff{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k6:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k7:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x2000{{$}}
; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x4000{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k8:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k9:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8001{{$}}
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k10:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8888{{$}}
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k11:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8fff{{$}}
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
; SI-LABEL: {{^}}s_movk_i32_k12:
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff7001{{$}}
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
; SI-DAG: s_mov_b32
-; SI-DAG: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]]
+; SI-DAG: s_load_dword [[OUT:s[0-9]+]], s[[[PTR_LO]]:[[PTR_HI]]], [[OFFSET]]
-; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8
+; CI: s_load_dword [[OUT:s[0-9]+]], s[[[PTR_LO]]:[[PTR_HI]]], 0xbb8
; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]
; GCN-NOHSA: buffer_store_dword [[V_OUT]]
; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]]
; GCN: s_and_b64 vcc, [[CMP1]], [[CMP2]]
; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]]
define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
%icmp0 = icmp ne i32 %a, %b
%icmp1 = icmp ne i32 %a, %c
; GCN: s_and_b64 vcc, vcc, [[CMP1]]
; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]]
define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
%fcmp0 = fcmp one float %a, %b
%fcmp1 = fcmp one float %a, %c
; GCN: s_or_b64 vcc, [[CMP1]], [[CMP2]]
; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]]
define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
%icmp0 = icmp ne i32 %a, %b
%icmp1 = icmp ne i32 %a, %c
; GCN: s_or_b64 vcc, vcc, [[CMP1]]
; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]]
define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
%fcmp0 = fcmp one float %a, %b
%fcmp1 = fcmp one float %a, %c
}
; GCN-LABEL: {{^}}s_select_v2f32:
-; GCN-DAG: s_load_dwordx4 s{{\[}}[[ALO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dwordx4 s[[[ALO:[0-9]+]]:[[BHI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
; GCN: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
-; GCN-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000
+; GCN-DAG: s_bfe_i64 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], [[VAL]], 0x10000
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
define amdgpu_kernel void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 63
; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
; GCN: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
-; GCN-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000
+; GCN-DAG: s_bfe_i64 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], [[VAL]], 0x80000
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
define amdgpu_kernel void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 56
; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
; GCN: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
-; GCN-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000
+; GCN-DAG: s_bfe_i64 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], [[VAL]], 0x100000
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
define amdgpu_kernel void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
; GCN: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
-; GCN-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000
+; GCN-DAG: s_bfe_i64 s[[[SLO:[0-9]+]]:[[SHI:[0-9]+]]], [[VAL]], 0x200000
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 32
; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64:
; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_lshl_b64 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GFX89: {{flat|global}}_load_dwordx2
-; GFX89: v_lshlrev_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; GFX89: v_lshlrev_b64 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GCN: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
+; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64:
; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_lshl_b64 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GFX89: {{flat|global}}_load_dwordx2
-; GFX89: v_lshlrev_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; GFX89: v_lshlrev_b64 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GCN: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
+; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64:
; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_lshl_b64 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GFX89: {{flat|global}}_load_dwordx2
-; GFX89: v_lshlrev_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; GFX89: v_lshlrev_b64 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GCN: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
+; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64:
; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
+; SI: v_lshl_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]],
; GFX89: {{flat|global}}_load_dwordx2
-; GFX89: v_lshlrev_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
+; GFX89: v_lshlrev_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]],
; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
-; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[SHR]]{{\]}}
+; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[SHR]]]
define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use:
; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_lshl_b64 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GFX89: {{flat|global}}_load_dwordx2
-; GFX89: v_lshlrev_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; GFX89: v_lshlrev_b64 v[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]]
; GCN-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
; GCN-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
-; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
-; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
+; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use:
; SI: buffer_load_dwordx2
-; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
+; SI: v_lshl_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]],
; GFX89: {{flat|global}}_load_dwordx2
-; GFX89: v_lshlrev_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
+; GFX89: v_lshlrev_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]],
; GCN-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
; GCN-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]]
-; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
-; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
+; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
target triple = "amdgcn-amd-amdhsa"
; CHECK-LABEL: {{^}}t0:
-; CHECK: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], s[4:5], 0x0
+; CHECK: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], s[4:5], 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, s[[PTR_HI]]
; There should be no redundant copies from PTR_HI.
; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, s[[PTR_HI]]
; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11
; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 13
-; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v[[[SAMPLE_LO]]:[[SAMPLE_HI]]]
; CHECK: exp
; CHECK: s_endpgm
define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(4)* inreg %arg, [32 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <8 x i32>] addrspace(4)* inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
; [[END]]:
; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
-; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]]
; CHECK: s_branch
define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <4 x i32>] addrspace(4)* inreg %arg2, [32 x <8 x i32>] addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO1]]]
define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO1]]]
define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; Spans the dword boundary, so requires full shift.
; Truncated after the shift, so only low shift result is used.
; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64:
-; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]]
; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64:
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]]
; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 30
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO1]]]
define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64:
; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]]
; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
}
; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64_trunc_i32:
-; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]]
; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
; GCN-NOT: v[[SHRLO]]
; GCN-DAG: v_and_b32_e32 v[[SHRLO:[0-9]+]], 4, [[SHR]]
; GCN-NOT: v[[SHRLO]]
; GCN-NOT: v[[SHRHI]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[SHRHI]]]
define amdgpu_kernel void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64:
; GCN-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
-; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27
+; GCN-DAG: v_lshr_b64 v[[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]], [[VAL]], 27
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[SHRHI]]]
+; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO]]]
define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN: v_mov_b32_e32 v[[ZERO_BFE:[0-9]+]], v[[ZERO_SHR]]
; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]]
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
-; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO_SHR]]{{\]}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO_BFE]]{{\]}}
+; GCN-DAG: buffer_store_dwordx2 v[[[SHR]]:[[ZERO_SHR]]]
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO_BFE]]]
define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:{{[0-9]+\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:{{[0-9]+\]}}
; GCN: buffer_store_dword v[[ZERO]]
define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = lshr i64 %val, 35
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = lshr i64 %val, 63
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = lshr i64 %val, 33
; GCN-LABEL: {{^}}lshr_i64_32:
; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = lshr i64 %val, 32
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
; GCN: v_bfe_u32 v[[BFE:[0-9]+]], v[[LO]], 8, 23
-; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
define amdgpu_kernel void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%and = and i64 %val, 9223372036854775807 ; 0x7fffffffffffffff
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 3, [[VAL]]
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 35
; GCN-LABEL: {{^}}shl_i64_const_32:
; GCN-DAG: buffer_load_dword v[[HI:[0-9]+]]
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 32
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 31, [[VAL]]
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @shl_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 63
}
; GCN-LABEL: {{^}}trunc_shl_16_v2i32_v2i64:
-; GCN: buffer_load_dwordx4 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx4 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN-DAG: v_lshlrev_b32_e32 v[[RESHI:[0-9]+]], 16, v{{[0-9]+}}
; GCN-DAG: v_lshlrev_b32_e32 v[[RESLO:[0-9]+]], 16, v[[LO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESLO]]:[[RESHI]]]
define amdgpu_kernel void @trunc_shl_16_v2i32_v2i64(<2 x i32> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in
%shl = shl <2 x i64> %val, <i64 16, i64 16>
; GCN-LABEL: {{^}}trunc_shl_31_i32_i64_multi_use:
; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_lshl_b64 v{{\[}}[[RESLO:[0-9]+]]:[[RESHI:[0-9]+]]{{\]}}, [[VAL]], 31
+; GCN: v_lshl_b64 v[[[RESLO:[0-9]+]]:[[RESHI:[0-9]+]]], [[VAL]], 31
; GCN: buffer_store_dword v[[RESLO]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[RESLO]]:[[RESHI]]]
define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 31
; CHECK: v_add_u32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]]
; CHECK-NOT: v_lshl
; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]]
-; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
+; CHECK: load_dword v{{[0-9]+}}, v[[[ADDRLO]]:
define amdgpu_kernel void @add_const_offset(i32 addrspace(1)* nocapture %arg) {
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
; CHECK: v_or_b32_e32 v[[OR:[0-9]+]], 0x1000, v[[SHL]]
; CHECK-NOT: v_lshl
; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]]
-; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
+; CHECK: load_dword v{{[0-9]+}}, v[[[ADDRLO]]:
define amdgpu_kernel void @or_const_offset(i32 addrspace(1)* nocapture %arg) {
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
}
; SI-LABEL: {{^}}test_add_shl_add_constant:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Y:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
+; SI-DAG: s_load_dwordx2 s[[[X:[0-9]+]]:[[Y:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
; SI-DAG: s_lshl_b32 [[SHL3:s[0-9]+]], s[[X]], 3
; SI: s_add_i32 [[RESULT:s[0-9]+]], [[SHL3]], s[[Y]]
; SI: s_addk_i32 [[RESULT]], 0x3d8
}
; SI-LABEL: {{^}}test_add_shl_add_constant_inv:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Y:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
+; SI-DAG: s_load_dwordx2 s[[[X:[0-9]+]]:[[Y:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
; SI: s_lshl_b32 [[SHL3:s[0-9]+]], s[[X]], 3
; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], s[[Y]]
; SI: s_addk_i32 [[TMP]], 0x3d8
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s
; GCN-LABEL: {{^}}shl_base_atomicrmw_global_atomic_csub_ptr:
-; GCN-DAG: v_lshlrev_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, 2, v[4:5]
+; GCN-DAG: v_lshlrev_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]], 2, v[4:5]
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 43
; GCN: v_add_co_u32 v[[EXTRA_LO:[0-9]+]], vcc_lo, 0x80, v4
; GCN: v_add_co_ci_u32_e32 v[[EXTRA_HI:[0-9]+]], vcc_lo, 0, v5, vcc_lo
-; GCN: global_atomic_csub v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]], off offset:512 glc
-; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[EXTRA_LO]]:[[EXTRA_HI]]{{\]}}
+; GCN: global_atomic_csub v{{[0-9]+}}, v[[[LO]]:[[HI]]], [[K]], off offset:512 glc
+; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
%cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
; GCN-LABEL: {{^}}shl_base_atomicrmw_global_ptr:
; GCN-DAG: v_add_co_u32_e32 v[[EXTRA_LO:[0-9]+]], vcc, 0x80, v4
; GCN-DAG: v_addc_co_u32_e32 v[[EXTRA_HI:[0-9]+]], vcc, 0, v5, vcc
-; GCN-DAG: v_lshlrev_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, 2, v[4:5]
+; GCN-DAG: v_lshlrev_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]], 2, v[4:5]
; GCN-DAG: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
-; GCN-DAG: global_atomic_and v{{\[}}[[LO]]:[[HI]]{{\]}}, [[THREE]], off offset:512
-; GCN-DAG: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[EXTRA_LO]]:[[EXTRA_HI]]{{\]}}
+; GCN-DAG: global_atomic_and v[[[LO]]:[[HI]]], [[THREE]], off offset:512
+; GCN-DAG: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
define void @shl_base_atomicrmw_global_ptr(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
%cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
; GCN-LABEL: {{^}}shl_base_global_ptr_global_atomic_fadd:
; GCN-DAG: v_add_co_u32_e32 v[[EXTRA_LO:[0-9]+]], vcc, 0x80, v4
; GCN-DAG: v_addc_co_u32_e32 v[[EXTRA_HI:[0-9]+]], vcc, 0, v5, vcc
-; GCN-DAG: v_lshlrev_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, 2, v[4:5]
+; GCN-DAG: v_lshlrev_b64 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]], 2, v[4:5]
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x42c80000
-; GCN-DAG: global_atomic_add_f32 v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]], off offset:512
-; GCN-DAG: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[EXTRA_LO]]:[[EXTRA_HI]]{{\]}}
+; GCN-DAG: global_atomic_add_f32 v[[[LO]]:[[HI]]], [[K]], off offset:512
+; GCN-DAG: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
define void @shl_base_global_ptr_global_atomic_fadd(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
%cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
; GCN-DAG: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
-; CI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1
+; CI: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x1
; CI: buffer_store_dword
-; CI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x3
+; CI: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x3
-; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x4
+; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x4
; GFX9: global_store_dword
-; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xc
+; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0xc
; CI: buffer_store_dword
; GFX9: global_store_dword
; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
-; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1
-; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x3
+; CI-DAG: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x1
+; CI-DAG: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x3
-; GFX9-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x4
-; GFX9-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xc
+; GFX9-DAG: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x4
+; GFX9-DAG: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0xc
; GCN-DAG: ds_write_b32
; CI: buffer_store_dword
; VI-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0xbff00000, 0
; VI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; VI-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]]
-; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
+; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[ZERO]]:[[SEL]]]
; VI: s_endpgm
; SI-DAG: s_cmp_eq_u32
; SI-DAG: s_cselect_b64 vcc, -1, 0
; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}, vcc
; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; SI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
+; SI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[ZERO]]:[[SEL]]]
; SI: s_endpgm
define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
}
; GCN-LABEL: @v_sint_to_fp_i64_to_f64
-; GCN: flat_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: flat_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
; GCN-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
; GCN-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
}
; GCN-LABEL: {{^}}s_abs_v4i16:
-; GFX9: s_load_dwordx2 s{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}, s[0:1], 0x2c
+; GFX9: s_load_dwordx2 s[[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]], s[0:1], 0x2c
; GFX9-DAG: v_pk_sub_i16 [[SUB0:v[0-9]+]], 0, s[[VAL0]]
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, s[[VAL1]]
; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], s[[VAL0]], [[SUB0]]
}
; GCN-LABEL: {{^}}v_abs_v4i16:
-; GFX9: global_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; GFX9: global_load_dwordx2 v[[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]]
; GFX9-DAG: v_pk_sub_i16 [[SUB0:v[0-9]+]], 0, v[[VAL0]]
; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], v[[VAL0]], [[SUB0]]
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
; GCN-LABEL: ; %bb.0:
-; GCN: s_load_dword s{{[0-9]+}}, s{{\[}}[[ADDR_LO:[0-9]+]]{{\:}}[[ADDR_HI:[0-9]+]]{{\]}}, 0x0
+; GCN: s_load_dword s{{[0-9]+}}, s[[[ADDR_LO:[0-9]+]]{{\:}}[[ADDR_HI:[0-9]+]]], 0x0
; GCN: s_waitcnt lgkmcnt(0)
; GCN: global_store_dword v
; GCN-LABEL: {{^}}br_scc_eq_i64_simm16:
; VI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x4d2
; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 1
-; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]]
; SI: v_cmp_eq_u64_e32
define amdgpu_kernel void @br_scc_eq_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
; GCN-LABEL: {{^}}br_scc_ne_i64_simm16:
; VI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x4d2
; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 1
-; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]]
; SI: v_cmp_ne_u64_e32
define amdgpu_kernel void @br_scc_ne_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI-DAG: v_sub_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
-; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; VI: buffer_store_dwordx2 v[[[ADD]]:[[VZERO]]], off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
; VI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
-; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; VI-NEXT: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @v_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
; GCN-LABEL: {{^}}s_sub_i32:
-; GCN: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}
+; GCN: s_load_dwordx2 s[[[A:[0-9]+]]:[[B:[0-9]+]]]
; GCN: s_load_dwordx2
; GCN: s_sub_i32 s{{[0-9]+}}, s[[A]], s[[B]]
define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
}
; GCN-LABEL: {{^}}trunc_shl_i64:
-; SI: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; VI: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
-; GCN: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
+; SI: s_load_dwordx2 s[[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; VI: s_load_dwordx2 s[[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; GCN: s_lshl_b64 s[[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s[[[LO_SREG]]:{{[0-9]+\]}}, 2
; GCN: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
; SI: buffer_store_dword v[[LO_VREG]],
}
; GCN-LABEL: {{^}}s_trunc_i64_to_i1:
-; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13
-; VI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4c
+; SI: s_load_dwordx2 s[[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13
+; VI: s_load_dwordx2 s[[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4c
; GCN: s_bitcmp1_b32 s[[SLO]], 0
-; SI: s_cselect_b64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], -1, 0
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]]
+; SI: s_cselect_b64 s[[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], -1, 0
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s[[[VLO]]:[[VHI]]]
; VI: s_cselect_b32 {{s[0-9]+}}, 63, -12
define amdgpu_kernel void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, [8 x i32], i64 %x) {
%trunc = trunc i64 %x to i1
}
; GCN-LABEL: {{^}}v_trunc_i64_to_i1:
-; SI: buffer_load_dwordx2 v{{\[}}[[VLO:[0-9]+]]:{{[0-9]+\]}}
-; VI: flat_load_dwordx2 v{{\[}}[[VLO:[0-9]+]]:{{[0-9]+\]}}
+; SI: buffer_load_dwordx2 v[[[VLO:[0-9]+]]:{{[0-9]+\]}}
+; VI: flat_load_dwordx2 v[[[VLO:[0-9]+]]:{{[0-9]+\]}}
; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 1, v[[VLO]]
; GCN: v_cmp_eq_u32_e32 vcc, 1, [[MASKED]]
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; GCN-LABEL: {{^}}v_uint_to_fp_i64_to_f64
-; GCN: flat_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: flat_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
; GCN-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
; GCN-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
; SI-DAG: s_cselect_b64 vcc, -1, 0
; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, {{v[0-9]+}}, vcc
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
+; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[ZERO]]:[[SEL]]]
; GCN: s_endpgm
define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
}
; SI-LABEL: {{^}}constant_align4_merge_load_2_i32:
-; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; SI: s_load_dwordx2 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]]
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]]
-; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
define amdgpu_kernel void @constant_align4_merge_load_2_i32(i32 addrspace(4)* %p, i32 addrspace(1)* %r) #0 {
%gep0 = getelementptr i32, i32 addrspace(4)* %p, i64 1
%v0 = load i32, i32 addrspace(4)* %p, align 4
}
; GCN-LABEL: {{^}}icmp_users_different_blocks:
-; GCN: s_load_dwordx2 s{{\[}}[[COND0:[0-9]+]]:[[COND1:[0-9]+]]{{\]}}
+; GCN: s_load_dwordx2 s[[[COND0:[0-9]+]]:[[COND1:[0-9]+]]]
; GCN: s_cmp_lt_i32 s[[COND0]], 1
; GCN: s_cbranch_scc1 [[EXIT:.L[0-9_A-Za-z]+]]
; GCN: s_cmp_gt_i32 s[[COND1]], 0{{$}}
}
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; SI-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], s[[SGPR0]], [[VGPR1]]
; GCN: buffer_store_dword [[RESULT]]
}
; GCN-LABEL: {{^}}test_use_s_v_s:
-; SI: s_load_dwordx2 s{{\[}}[[SA:[0-9]+]]:[[SB:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI: s_load_dwordx2 s[[[SA:[0-9]+]]:[[SB:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI: buffer_load_dword [[VA0:v[0-9]+]]
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI: s_load_dwordx2 s{{\[}}[[SA:[0-9]+]]:[[SB:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; VI: s_load_dwordx2 s[[[SA:[0-9]+]]:[[SB:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-NOT: v_mov_b32
; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[SB]]
}
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; SI-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], [[VGPR1]], s[[SGPR0]]
; GCN: buffer_store_dword [[RESULT]]
}
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; SI-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], s[[SGPR0]], s[[SGPR0]]
; GCN: buffer_store_dword [[RESULT]]
}
; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s_x2:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]{{\:}}[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]{{\:}}[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-DAG: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s[[SGPR0]]
; GCN-DAG: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x44800000
}
; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k_x2:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]{{\:}}[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]{{\:}}[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-DAG: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s[[SGPR0]]
; GCN-DAG: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x44800000
}
; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k_x2:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]{{\:}}[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]{{\:}}[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-DAG: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s[[SGPR0]]
; GCN-DAG: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[SGPR1]]
; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x44800000
}
; GCN-LABEL: {{^}}test_s0_s1_k_f32:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; SI-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI-DAG: s_load_dwordx2 s[[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], s[[SGPR1]]
; FIXME: Immediate in SGPRs just copied to VGPRs
; GCN-LABEL: {{^}}test_s0_s1_k_f64:
; GCN-DAG: s_load_dwordx2 [[SGPR0:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x1d|0x74}}
+; GCN-DAG: s_load_dwordx2 s[[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x1d|0x74}}
; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB1:[0-9]+]], 0x40900000
; GCN-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
-; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}}
+; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v[[[VS1_SUB0]]:[[VS1_SUB1]]], v[[[VZERO]]:[[VK0_SUB1]]]
; Same zero component is re-used for half of each immediate.
; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
-; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
+; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v[[[VS1_SUB0]]:[[VS1_SUB1]]], v[[[VZERO]]:[[VK1_SUB1]]]
; GCN: buffer_store_dwordx2 [[RESULT0]]
; GCN: buffer_store_dwordx2 [[RESULT1]]
; (select (cmp (sgprX, constant)), constant, sgprZ)
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprZ_f32:
-; GCN: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}}, s[0:1], {{0x4c|0x13}}
+; GCN: s_load_dwordx2 s[[[X:[0-9]+]]:[[Z:[0-9]+]]], s[0:1], {{0x4c|0x13}}
; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], s[[X]], 0
; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], s[[X]], 0
}
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
+; GCN-DAG: s_load_dwordx2 s[[[X:[0-9]+]]:[[Z:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], s[[X]], 0
; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], s[[X]], 0
; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]]
}
; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i64:
-; GCN: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]{{\]}}
-; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]{{\]}}
-; GCN-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
+; GCN: {{buffer|flat|global}}_load_dwordx2 v[[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]]
+; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v[[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]]
+; GCN-DAG: v_cmp_lt_i64_e32 vcc, -1, v[[[X_LO]]:[[X_HI]]]
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
; A little more complicated case where more sub-dword loads could be coalesced
; if they are not widening earlier.
; GCN-LABEL: {{^}}load_4i16:
-; GCN: s_load_dwordx2 s{{\[}}[[D0:[0-9]+]]:[[D1:[0-9]+]]{{\]}}, s[4:5], 0x4
+; GCN: s_load_dwordx2 s[[[D0:[0-9]+]]:[[D1:[0-9]+]]], s[4:5], 0x4
; GCN-NOT: s_load_dword {{s[0-9]+}}, s[4:5], 0x4
; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s[[D0]], 16
; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s[[D1]], 16
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000
; OFFREG is offset system SGPR
-; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
+; GCN: buffer_store_dword {{v[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
; GCN: NumVgprs: 256
; GCN: ScratchSize: 768
; GFX1032-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], 0{{$}}
; GFX1032-DAG: v_cmp_eq_f32_e64 s[[C_LO:[0-9]+]], {{s[0-9]+}}, |{{[vs][0-9]+}}|
; GFX1032-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
-; GFX1064: v_cmp_eq_f32_e64 s{{\[}}[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], {{s[0-9]+}}, |{{[vs][0-9]+}}|
+; GFX1064: v_cmp_eq_f32_e64 s[[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], {{s[0-9]+}}, |{{[vs][0-9]+}}|
; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
; GFX1064-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[C_HI]]
-; GCN: store_dwordx2 v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]], s
+; GCN: store_dwordx2 v{{[0-9]+}}, v[[[V_LO]]:[[V_HI]]], s
define amdgpu_kernel void @test_intr_fcmp_i64(i64 addrspace(1)* %out, float %src, float %a) {
%temp = call float @llvm.fabs.f32(float %a)
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float %src, float %temp, i32 1)
; GFX1032-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], 0{{$}}
; GFX1032-DAG: v_cmp_eq_u32_e64 [[C_LO:vcc_lo|s[0-9]+]], 0x64, {{s[0-9]+}}
; GFX1032-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[C_LO]]
-; GFX1064: v_cmp_eq_u32_e64 s{{\[}}[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], 0x64, {{s[0-9]+}}
+; GFX1064: v_cmp_eq_u32_e64 s[[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], 0x64, {{s[0-9]+}}
; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
; GFX1064-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[C_HI]]
-; GCN: store_dwordx2 v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]], s
+; GCN: store_dwordx2 v{{[0-9]+}}, v[[[V_LO]]:[[V_HI]]], s
define amdgpu_kernel void @test_intr_icmp_i64(i64 addrspace(1)* %out, i32 %src) {
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32)
store i64 %result, i64 addrspace(1)* %out
; GCN-LABEL: {{^}}test_intr_fcmp_i32:
; GFX1032-DAG: v_cmp_eq_f32_e64 s[[C_LO:[0-9]+]], {{s[0-9]+}}, |{{[vs][0-9]+}}|
; GFX1032-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
-; GFX1064: v_cmp_eq_f32_e64 s{{\[}}[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], {{s[0-9]+}}, |{{[vs][0-9]+}}|
+; GFX1064: v_cmp_eq_f32_e64 s[[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], {{s[0-9]+}}, |{{[vs][0-9]+}}|
; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]
; GCN: store_dword v{{[0-9]+}}, v[[V_LO]], s
define amdgpu_kernel void @test_intr_fcmp_i32(i32 addrspace(1)* %out, float %src, float %a) {
; GCN-LABEL: {{^}}test_intr_icmp_i32:
; GFX1032-DAG: v_cmp_eq_u32_e64 s[[C_LO:[0-9]+]], 0x64, {{s[0-9]+}}
; GFX1032-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]{{$}}
-; GFX1064: v_cmp_eq_u32_e64 s{{\[}}[[C_LO:[0-9]+]]:{{[0-9]+}}], 0x64, {{s[0-9]+}}
+; GFX1064: v_cmp_eq_u32_e64 s[[[C_LO:[0-9]+]]:{{[0-9]+}}], 0x64, {{s[0-9]+}}
; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]{{$}}
; GCN: store_dword v{{[0-9]+}}, v[[V_LO]], s
define amdgpu_kernel void @test_intr_icmp_i32(i32 addrspace(1)* %out, i32 %src) {
; GCN: v_cmp_u_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]],
; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
-; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[VSEL]]:[[VSEL_EXT]]{{\]}}
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v[[[VSEL]]:[[VSEL_EXT]]]
define amdgpu_kernel void @widen_vselect_and_mask_v4f64(<4 x double> %arg) #0 {
bb:
%tmp = extractelement <4 x double> %arg, i64 0
; GCN: v_cmp_eq_u64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]],
; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
-; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[VSEL]]:[[VSEL_EXT]]{{\]}}
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v[[[VSEL]]:[[VSEL_EXT]]]
define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) #0 {
bb:
%tmp = extractelement <4 x i64> %arg, i64 0
%tmp105 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp102, i32 0)
%tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp103, i32 0)
-; GFX9: s_or_saveexec_b64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, -1
+; GFX9: s_or_saveexec_b64 s[{{[0-9]+}}:{{[0-9]+}}], -1
; GFX9-DAG: v_mov_b32_dpp v[[FIRST_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
; GFX9-O3-DAG: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]]
%tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136)
; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]]
-; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]]
+; GFX9-O0: v_cmp_eq_u32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[FIRST]], v[[SECOND]]
%tmp138 = icmp eq i32 %tmp122, %tmp137
%tmp139 = sext i1 %tmp138 to i32
%tmp140 = shl nsw i32 %tmp139, 1
; GFX9-O3: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]]
; GFX9-O0: v_add_u32_e64 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]]
; GFX9: v_mov_b32_e32 v[[FIRST:[0-9]+]], v[[FIRST_ADD]]
-; GFX9-O0: buffer_store_dword v[[FIRST]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET:[0-9]+]]
+; GFX9-O0: buffer_store_dword v[[FIRST]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[FIRST_IMM_OFFSET:[0-9]+]]
%tmp120 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp105, i32 323, i32 12, i32 15, i1 false)
%tmp121 = add i32 %tmp105, %tmp120
%tmp122 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp121)
; GFX9-O3: v_add_u32_e32 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]]
; GFX9-O0: v_add_u32_e64 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]]
; GFX9: v_mov_b32_e32 v[[SECOND:[0-9]+]], v[[SECOND_ADD]]
-; GFX9-O0: buffer_store_dword v[[SECOND]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET:[0-9]+]]
+; GFX9-O0: buffer_store_dword v[[SECOND]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[SECOND_IMM_OFFSET:[0-9]+]]
%tmp135 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp107, i32 323, i32 12, i32 15, i1 false)
%tmp136 = add i32 %tmp107, %tmp135
%tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136)
merge:
%merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ]
; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]]
-; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET]]
-; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]]
-; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]]
+; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[FIRST_IMM_OFFSET]]
+; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[SECOND_IMM_OFFSET]]
+; GFX9-O0: v_cmp_eq_u32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[FIRST]], v[[SECOND]]
%tmp138 = icmp eq i32 %tmp122, %merge_value
%tmp139 = sext i1 %tmp138 to i32
%tmp140 = shl nsw i32 %tmp139, 1
; GFX9-LABEL: {{^}}call_i64:
define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) {
-; GFX9: s_load_dwordx2 s{{\[}}[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]{{\]}}
+; GFX9: s_load_dwordx2 s[[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]]
-; GFX9-O0: s_mov_b64 s{{\[}}[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]{{\]}}, 0{{$}}
+; GFX9-O0: s_mov_b64 s[[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]], 0{{$}}
; GFX9-O0-DAG: v_mov_b32_e32 v9, s[[ARG_HI]]
; GFX9-O0-DAG: v_mov_b32_e32 v8, s[[ARG_LO]]
%tmp105 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp102, i32 0)
%tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp103, i32 0)
-; GFX9: s_or_saveexec_b64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, -1
+; GFX9: s_or_saveexec_b64 s[{{[0-9]+}}:{{[0-9]+}}], -1
; GFX9-DAG: v_mov_b32_dpp v[[FIRST_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
; GFX9-O3-DAG: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]]
%tmp137 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp136)
; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]]
-; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]]
+; GFX9-O0: v_cmp_eq_u32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[FIRST]], v[[SECOND]]
%tmp138 = icmp eq i32 %tmp122, %tmp137
%tmp139 = sext i1 %tmp138 to i32
%tmp140 = shl nsw i32 %tmp139, 1
; GFX9-O3: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]]
; GFX9-O0: v_add_u32_e64 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]]
; GFX9: v_mov_b32_e32 v[[FIRST:[0-9]+]], v[[FIRST_ADD]]
-; GFX9-O0: buffer_store_dword v[[FIRST]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET:[0-9]+]]
+; GFX9-O0: buffer_store_dword v[[FIRST]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[FIRST_IMM_OFFSET:[0-9]+]]
%tmp120 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp105, i32 323, i32 12, i32 15, i1 false)
%tmp121 = add i32 %tmp105, %tmp120
%tmp122 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp121)
; GFX9-O3: v_add_u32_e32 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]]
; GFX9-O0: v_add_u32_e64 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]]
; GFX9: v_mov_b32_e32 v[[SECOND:[0-9]+]], v[[SECOND_ADD]]
-; GFX9-O0: buffer_store_dword v[[SECOND]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET:[0-9]+]]
+; GFX9-O0: buffer_store_dword v[[SECOND]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[SECOND_IMM_OFFSET:[0-9]+]]
%tmp135 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp107, i32 323, i32 12, i32 15, i1 false)
%tmp136 = add i32 %tmp107, %tmp135
%tmp137 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp136)
merge:
%merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ]
; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]]
-; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET]]
-; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]]
-; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]]
+; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[FIRST_IMM_OFFSET]]
+; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:[[SECOND_IMM_OFFSET]]
+; GFX9-O0: v_cmp_eq_u32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[FIRST]], v[[SECOND]]
%tmp138 = icmp eq i32 %tmp122, %merge_value
%tmp139 = sext i1 %tmp138 to i32
%tmp140 = shl nsw i32 %tmp139, 1
; GFX9-LABEL: {{^}}strict_wwm_call_i64:
define amdgpu_kernel void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) {
-; GFX9: s_load_dwordx2 s{{\[}}[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]{{\]}}
+; GFX9: s_load_dwordx2 s[[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]]
-; GFX9-O0: s_mov_b64 s{{\[}}[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]{{\]}}, 0{{$}}
+; GFX9-O0: s_mov_b64 s[[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]], 0{{$}}
; GFX9-O0-DAG: v_mov_b32_e32 v9, s[[ARG_HI]]
; GFX9-O0-DAG: v_mov_b32_e32 v8, s[[ARG_LO]]
}
; FUNC-LABEL: {{^}}scalar_xor_literal_i64:
-; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x9|0x24}}
+; SI: s_load_dwordx2 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x9|0x24}}
; SI-DAG: s_xor_b32 s[[RES_HI:[0-9]+]], s{{[0-9]+}}, 0xf237b
; SI-DAG: s_xor_b32 s[[RES_LO:[0-9]+]], s{{[0-9]+}}, 0x3039
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]]
}
; FUNC-LABEL: {{^}}scalar_xor_literal_multi_use_i64:
-; SI: s_load_dwordx4 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
+; SI: s_load_dwordx4 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xf237b
; SI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x3039
-; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]]
; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_LO]]
; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_HI]]
}
; FUNC-LABEL: {{^}}scalar_xor_inline_imm_i64:
-; SI: s_load_dwordx2 s{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
+; SI: s_load_dwordx2 s[[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
; SI-NOT: xor_b32
; SI: s_xor_b32 s[[VAL_LO]], s{{[0-9]+}}, 63
; SI-NOT: xor_b32
; SI-NOT: xor_b32
; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s{{[0-9]+}}
; SI-NOT: xor_b32
-; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+; SI: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
define amdgpu_kernel void @scalar_xor_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
%or = xor i64 %a, 63
store i64 %or, i64 addrspace(1)* %out
}
; FUNC-LABEL: {{^}}vector_xor_i64_neg_inline_imm:
-; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI: v_xor_b32_e32 {{v[0-9]+}}, -8, v[[LO_VREG]]
; SI: v_xor_b32_e32 {{v[0-9]+}}, -1, {{.*}}
; SI: s_endpgm
}
; FUNC-LABEL: {{^}}vector_xor_literal_i64:
-; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: buffer_load_dwordx2 v[[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]],
; SI-DAG: v_xor_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
; SI-DAG: v_xor_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
; SI: s_endpgm
; GCN: {{^}}s_mad_zext_i32_to_i64:
; GCN: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}}
-; GCN: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}
+; GCN: buffer_store_dwordx2 v[0:[[V_ZERO]]]
define amdgpu_kernel void @s_mad_zext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) #0 {
entry:
%tmp0 = mul i32 %a, %b
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}zext_or_operand_i64:
-; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN: buffer_load_dword v[[LD32:[0-9]+]]
; GCN-NOT: _or_
; GCN-NOT: v[[HI]]
; GCN-NOT: _or_
; GCN-NOT: v[[HI]]
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @zext_or_operand_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
%ld.64 = load volatile i64, i64 addrspace(1)* %in0
%ld.32 = load volatile i32, i32 addrspace(1)* %in1
}
; GCN-LABEL: {{^}}zext_or_operand_commute_i64:
-; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; GCN: buffer_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN: buffer_load_dword v[[LD32:[0-9]+]]
; GCN-NOT: _or_
; GCN-NOT: v[[HI]]
; GCN-NOT: v[[HI]]
; GCN-NOT: _or_
; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
-; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
define amdgpu_kernel void @zext_or_operand_commute_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
%ld.64 = load volatile i64, i64 addrspace(1)* %in0
%ld.32 = load volatile i32, i32 addrspace(1)* %in1
; CHECK: movw [[BASE:r[0-9]+]], :lower16:static_val
; CHECK: movt [[BASE]], :upper16:static_val
; ldm is not formed when the coalescer failed to coalesce everything.
-; CHECK: ldrd r2, [[TMP:r[0-9]+]], {{\[}}[[BASE]]{{\]}}
+; CHECK: ldrd r2, [[TMP:r[0-9]+]], [[[BASE]]]
; CHECK: movw r0, #555
define i32 @main() {
entry:
; CHECK: movw [[BASE:r[0-9]+]], :lower16:static_val
; CHECK: movt [[BASE]], :upper16:static_val
; ldm is not formed when the coalescer failed to coalesce everything.
-; CHECK: ldrd r2, [[TMP:r[0-9]+]], {{\[}}[[BASE]]{{\]}}
+; CHECK: ldrd r2, [[TMP:r[0-9]+]], [[[BASE]]]
; CHECK: movw r0, #555
define i32 @main_fixed_arg() {
entry:
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
-; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
-; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
+; CHECK-NEXT: ldr [[INDEX:r[0-9]]], [[[TLS_INDEX]]]
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], [[[TEB]], #44]
+; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], [[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
-; CHECK-NEXT: ldr r0, {{\[}}[[TLS]], [[SLOT]]]
+; CHECK-NEXT: ldr r0, [[[TLS]], [[SLOT]]]
; CHECK: [[CPI]]:
; CHECK-NEXT: .long i(SECREL32)
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
-; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
-; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
+; CHECK-NEXT: ldr [[INDEX:r[0-9]]], [[[TLS_INDEX]]]
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], [[[TEB]], #44]
+; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], [[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
-; CHECK-NEXT: ldr r0, {{\[}}[[TLS]], [[SLOT]]]
+; CHECK-NEXT: ldr r0, [[[TLS]], [[SLOT]]]
; CHECK: [[CPI]]:
; CHECK-NEXT: .long j(SECREL32)
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
-; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
-; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
+; CHECK-NEXT: ldr [[INDEX:r[0-9]]], [[[TLS_INDEX]]]
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], [[[TEB]], #44]
+; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], [[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
-; CHECK-NEXT: ldr r0, {{\[}}[[TLS]], [[SLOT]]]
+; CHECK-NEXT: ldr r0, [[[TLS]], [[SLOT]]]
; CHECK: [[CPI]]:
; CHECK-NEXT: .long k(SECREL32)
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
-; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
-; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
+; CHECK-NEXT: ldr [[INDEX:r[0-9]]], [[[TLS_INDEX]]]
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], [[[TEB]], #44]
+; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], [[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
-; CHECK-NEXT: ldr r0, {{\[}}[[TLS]], [[SLOT]]]
+; CHECK-NEXT: ldr r0, [[[TLS]], [[SLOT]]]
; CHECK: [[CPI]]:
; CHECK-NEXT: .long l(SECREL32)
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
-; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
-; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
+; CHECK-NEXT: ldr [[INDEX:r[0-9]]], [[[TLS_INDEX]]]
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], [[[TEB]], #44]
+; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], [[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
-; CHECK-NEXT: ldr r0, {{\[}}[[TLS]], [[SLOT]]]
+; CHECK-NEXT: ldr r0, [[[TLS]], [[SLOT]]]
; CHECK: [[CPI]]:
; CHECK: .long m(SECREL32)
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
-; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
-; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
+; CHECK-NEXT: ldr [[INDEX:r[0-9]]], [[[TLS_INDEX]]]
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], [[[TEB]], #44]
+; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], [[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
-; CHECK-NEXT: ldrh r0, {{\[}}[[TLS]], [[SLOT]]]
+; CHECK-NEXT: ldrh r0, [[[TLS]], [[SLOT]]]
; CHECK: [[CPI]]:
; CHECK: .long n(SECREL32)
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
-; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
-; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
+; CHECK-NEXT: ldr [[INDEX:r[0-9]]], [[[TLS_INDEX]]]
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], [[[TEB]], #44]
+; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], [[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
-; CHECK-NEXT: ldrb r0, {{\[}}[[TLS]], [[SLOT]]]
+; CHECK-NEXT: ldrb r0, [[[TLS]], [[SLOT]]]
; CHECK: [[CPI]]:
; CHECK-NEXT: .long o(SECREL32)
@z = global i64 20, align 8
; CHECK_LABEL: main:
-; CHECK: ldr [[R2:r[0-9]+]], {{\[}}[[R1:r[0-9]+]]{{\]}}
-; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4]
+; CHECK: ldr [[R2:r[0-9]+]], [[[R1:r[0-9]+]]]
+; CHECK-NEXT: ldr [[R1]], [[[R1]], #4]
; CHECK: mov [[R4:r[0-9]+]], [[R1]]
-; CHECK: ldr [[R5:r[0-9]+]], {{\[}}[[R1]]{{\]}}
-; CHECK-NEXT: ldr [[R6:r[0-9]+]], {{\[}}[[R1]], #4]
+; CHECK: ldr [[R5:r[0-9]+]], [[[R1]]]
+; CHECK-NEXT: ldr [[R6:r[0-9]+]], [[[R1]], #4]
; CHECK: mov [[R7:r[0-9]+]], [[R6]]
define arm_aapcs_vfpcc i32 @main() #0 {
; CHECK: dmb ish
; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]]
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrexb [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrexb [[OLD:[lr0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strexb [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: strexb [[STATUS:r[0-9]+]], [[NEW]], [[[ADDR]]]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: dmb ish
; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]]
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrexh [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrexh [[OLD:[lr0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strexh [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: strexh [[STATUS:r[0-9]+]], [[NEW]], [[[ADDR]]]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: dmb ish
; CHECK-NOT: uxt
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrex [[OLD:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrex [[OLD:r[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strex [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: strex [[STATUS:r[0-9]+]], [[NEW]], [[[ADDR]]]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: dmb ish
; CHECK-NOT: uxt
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLDLO]], r6
; CHECK: cmpeq [[OLDHI]], r7
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
; CHECK: dmb ish
; CHECK-NOT: uxt
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
+; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [[[ADDR]]]
; CHECK: cmp [[OLDLO]], {{r[0-9]+}}
; CHECK: cmpeq [[OLDHI]], {{r[0-9]+}}
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, {{\[}}[[ADDR]]{{\]}}
+; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [[[ADDR]]]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
; CHECK-LABEL: fn1:
; CHECK: adr [[base:r[0-9]+]], .LCPI0_0
; CHECK-NOT: ldrh {{r[0-9]+}}, .LCPI0_0
-; CHECK: ldrh r{{[0-9]+}}, {{\[}}[[base]]]
+; CHECK: ldrh r{{[0-9]+}}, [[[base]]]
define hidden i32 @fn1() #0 {
entry:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 undef, i8* align 2 bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i1 false)
; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
; ARM-LONG-MACHO: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}}
-; ARM-LONG-MACHO: ldr [[R:r[0-9]+]], {{\[}}[[R1]]]
+; ARM-LONG-MACHO: ldr [[R:r[0-9]+]], [[[R1]]]
; ARM-LONG-ELF: movw [[R1:r[0-9]*]], :lower16:bar
; ARM-LONG-ELF: movt [[R1]], :upper16:bar
-; ARM-LONG-ELF: ldr [[R:r[0-9]+]], {{\[}}[[R1]]]
+; ARM-LONG-ELF: ldr [[R:r[0-9]+]], [[[R1]]]
; ARM-LONG: blx [[R]]
; THUMB-LABEL: @t10
; THUMB-LONG-LABEL: @t10
; THUMB-LONG: {{(movw)|(ldr.n)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
; THUMB-LONG: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}}
-; THUMB-LONG: ldr{{(.w)?}} [[R:r[0-9]+]], {{\[}}[[R1]]{{\]}}
+; THUMB-LONG: ldr{{(.w)?}} [[R:r[0-9]+]], [[[R1]]]
; THUMB-LONG: blx [[R]]
%call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
ret i32 0
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
-; ARM-MACHO-NEXT: ldr [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
-; ARM-MACHO-NEXT: str [[REG1]], {{\[}}[[REG0]], #4]
-; ARM-MACHO-NEXT: ldr [[REG2:r[0-9]+]], {{\[}}[[REG0]], #20]
-; ARM-MACHO-NEXT: str [[REG2]], {{\[}}[[REG0]], #8]
-; ARM-MACHO-NEXT: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #24]
-; ARM-MACHO-NEXT: strh [[REG3]], {{\[}}[[REG0]], #12]
+; ARM-MACHO-NEXT: ldr [[REG1:r[0-9]+]], [[[REG0]], #16]
+; ARM-MACHO-NEXT: str [[REG1]], [[[REG0]], #4]
+; ARM-MACHO-NEXT: ldr [[REG2:r[0-9]+]], [[[REG0]], #20]
+; ARM-MACHO-NEXT: str [[REG2]], [[[REG0]], #8]
+; ARM-MACHO-NEXT: ldrh [[REG3:r[0-9]+]], [[[REG0]], #24]
+; ARM-MACHO-NEXT: strh [[REG3]], [[[REG0]], #12]
; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
; ARM-ELF: movt [[REG0]], :upper16:temp
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
; THUMB: ldr [[REG1:r[0-9]+]], [r0]
-; THUMB: ldr [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16]
-; THUMB: str [[REG2]], {{\[}}[[REG1]], #4]
-; THUMB: ldr [[REG3:r[0-9]+]], {{\[}}[[REG1]], #20]
-; THUMB: str [[REG3]], {{\[}}[[REG1]], #8]
-; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #24]
-; THUMB: strh [[REG4]], {{\[}}[[REG1]], #12]
+; THUMB: ldr [[REG2:r[0-9]+]], [[[REG1]], #16]
+; THUMB: str [[REG2]], [[[REG1]], #4]
+; THUMB: ldr [[REG3:r[0-9]+]], [[[REG1]], #20]
+; THUMB: str [[REG3]], [[[REG1]], #8]
+; THUMB: ldrh [[REG4:r[0-9]+]], [[[REG1]], #24]
+; THUMB: strh [[REG4]], [[[REG1]], #12]
; THUMB: bx lr
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
ret void
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
-; ARM-MACHO: ldrh [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
-; ARM-MACHO-NEXT: strh [[REG1]], {{\[}}[[REG0]], #4]
-; ARM-MACHO-NEXT: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG0]], #18]
-; ARM-MACHO-NEXT: strh [[REG2]], {{\[}}[[REG0]], #6]
-; ARM-MACHO-NEXT: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #20]
-; ARM-MACHO-NEXT: strh [[REG3]], {{\[}}[[REG0]], #8]
-; ARM-MACHO-NEXT: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG0]], #22]
-; ARM-MACHO-NEXT: strh [[REG4]], {{\[}}[[REG0]], #10]
-; ARM-MACHO-NEXT: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG0]], #24]
-; ARM-MACHO-NEXT: strh [[REG5]], {{\[}}[[REG0]], #12]
+; ARM-MACHO: ldrh [[REG1:r[0-9]+]], [[[REG0]], #16]
+; ARM-MACHO-NEXT: strh [[REG1]], [[[REG0]], #4]
+; ARM-MACHO-NEXT: ldrh [[REG2:r[0-9]+]], [[[REG0]], #18]
+; ARM-MACHO-NEXT: strh [[REG2]], [[[REG0]], #6]
+; ARM-MACHO-NEXT: ldrh [[REG3:r[0-9]+]], [[[REG0]], #20]
+; ARM-MACHO-NEXT: strh [[REG3]], [[[REG0]], #8]
+; ARM-MACHO-NEXT: ldrh [[REG4:r[0-9]+]], [[[REG0]], #22]
+; ARM-MACHO-NEXT: strh [[REG4]], [[[REG0]], #10]
+; ARM-MACHO-NEXT: ldrh [[REG5:r[0-9]+]], [[[REG0]], #24]
+; ARM-MACHO-NEXT: strh [[REG5]], [[[REG0]], #12]
; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
; ARM-ELF: movt [[REG0]], :upper16:temp
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
; THUMB: ldr [[REG1:r[0-9]+]], [r0]
-; THUMB: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16]
-; THUMB: strh [[REG2]], {{\[}}[[REG1]], #4]
-; THUMB: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG1]], #18]
-; THUMB: strh [[REG3]], {{\[}}[[REG1]], #6]
-; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #20]
-; THUMB: strh [[REG4]], {{\[}}[[REG1]], #8]
-; THUMB: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG1]], #22]
-; THUMB: strh [[REG5]], {{\[}}[[REG1]], #10]
-; THUMB: ldrh [[REG6:r[0-9]+]], {{\[}}[[REG1]], #24]
-; THUMB: strh [[REG6]], {{\[}}[[REG1]], #12]
+; THUMB: ldrh [[REG2:r[0-9]+]], [[[REG1]], #16]
+; THUMB: strh [[REG2]], [[[REG1]], #4]
+; THUMB: ldrh [[REG3:r[0-9]+]], [[[REG1]], #18]
+; THUMB: strh [[REG3]], [[[REG1]], #6]
+; THUMB: ldrh [[REG4:r[0-9]+]], [[[REG1]], #20]
+; THUMB: strh [[REG4]], [[[REG1]], #8]
+; THUMB: ldrh [[REG5:r[0-9]+]], [[[REG1]], #22]
+; THUMB: strh [[REG5]], [[[REG1]], #10]
+; THUMB: ldrh [[REG6:r[0-9]+]], [[[REG1]], #24]
+; THUMB: strh [[REG6]], [[[REG1]], #12]
; THUMB: bx lr
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
ret void
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
-; ARM-MACHO: ldrb [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
-; ARM-MACHO-NEXT: strb [[REG1]], {{\[}}[[REG0]], #4]
-; ARM-MACHO-NEXT: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #17]
-; ARM-MACHO-NEXT: strb [[REG2]], {{\[}}[[REG0]], #5]
-; ARM-MACHO-NEXT: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #18]
-; ARM-MACHO-NEXT: strb [[REG3]], {{\[}}[[REG0]], #6]
-; ARM-MACHO-NEXT: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #19]
-; ARM-MACHO-NEXT: strb [[REG4]], {{\[}}[[REG0]], #7]
-; ARM-MACHO-NEXT: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #20]
-; ARM-MACHO-NEXT: strb [[REG5]], {{\[}}[[REG0]], #8]
-; ARM-MACHO-NEXT: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #21]
-; ARM-MACHO-NEXT: strb [[REG6]], {{\[}}[[REG0]], #9]
-; ARM-MACHO-NEXT: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #22]
-; ARM-MACHO-NEXT: strb [[REG7]], {{\[}}[[REG0]], #10]
-; ARM-MACHO-NEXT: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #23]
-; ARM-MACHO-NEXT: strb [[REG8]], {{\[}}[[REG0]], #11]
-; ARM-MACHO-NEXT: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #24]
-; ARM-MACHO-NEXT: strb [[REG9]], {{\[}}[[REG0]], #12]
-; ARM-MACHO-NEXT: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #25]
-; ARM-MACHO-NEXT: strb [[REG10]], {{\[}}[[REG0]], #13]
+; ARM-MACHO: ldrb [[REG1:r[0-9]+]], [[[REG0]], #16]
+; ARM-MACHO-NEXT: strb [[REG1]], [[[REG0]], #4]
+; ARM-MACHO-NEXT: ldrb [[REG2:r[0-9]+]], [[[REG0]], #17]
+; ARM-MACHO-NEXT: strb [[REG2]], [[[REG0]], #5]
+; ARM-MACHO-NEXT: ldrb [[REG3:r[0-9]+]], [[[REG0]], #18]
+; ARM-MACHO-NEXT: strb [[REG3]], [[[REG0]], #6]
+; ARM-MACHO-NEXT: ldrb [[REG4:r[0-9]+]], [[[REG0]], #19]
+; ARM-MACHO-NEXT: strb [[REG4]], [[[REG0]], #7]
+; ARM-MACHO-NEXT: ldrb [[REG5:r[0-9]+]], [[[REG0]], #20]
+; ARM-MACHO-NEXT: strb [[REG5]], [[[REG0]], #8]
+; ARM-MACHO-NEXT: ldrb [[REG6:r[0-9]+]], [[[REG0]], #21]
+; ARM-MACHO-NEXT: strb [[REG6]], [[[REG0]], #9]
+; ARM-MACHO-NEXT: ldrb [[REG7:r[0-9]+]], [[[REG0]], #22]
+; ARM-MACHO-NEXT: strb [[REG7]], [[[REG0]], #10]
+; ARM-MACHO-NEXT: ldrb [[REG8:r[0-9]+]], [[[REG0]], #23]
+; ARM-MACHO-NEXT: strb [[REG8]], [[[REG0]], #11]
+; ARM-MACHO-NEXT: ldrb [[REG9:r[0-9]+]], [[[REG0]], #24]
+; ARM-MACHO-NEXT: strb [[REG9]], [[[REG0]], #12]
+; ARM-MACHO-NEXT: ldrb [[REG10:r[0-9]+]], [[[REG0]], #25]
+; ARM-MACHO-NEXT: strb [[REG10]], [[[REG0]], #13]
; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
; ARM-ELF: movt [[REG0]], :upper16:temp
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
; THUMB: ldr [[REG0:r[0-9]+]], [r0]
-; THUMB: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #16]
-; THUMB: strb [[REG2]], {{\[}}[[REG0]], #4]
-; THUMB: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #17]
-; THUMB: strb [[REG3]], {{\[}}[[REG0]], #5]
-; THUMB: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #18]
-; THUMB: strb [[REG4]], {{\[}}[[REG0]], #6]
-; THUMB: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #19]
-; THUMB: strb [[REG5]], {{\[}}[[REG0]], #7]
-; THUMB: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #20]
-; THUMB: strb [[REG6]], {{\[}}[[REG0]], #8]
-; THUMB: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #21]
-; THUMB: strb [[REG7]], {{\[}}[[REG0]], #9]
-; THUMB: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #22]
-; THUMB: strb [[REG8]], {{\[}}[[REG0]], #10]
-; THUMB: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #23]
-; THUMB: strb [[REG9]], {{\[}}[[REG0]], #11]
-; THUMB: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #24]
-; THUMB: strb [[REG10]], {{\[}}[[REG0]], #12]
-; THUMB: ldrb [[REG11:r[0-9]+]], {{\[}}[[REG0]], #25]
-; THUMB: strb [[REG11]], {{\[}}[[REG0]], #13]
+; THUMB: ldrb [[REG2:r[0-9]+]], [[[REG0]], #16]
+; THUMB: strb [[REG2]], [[[REG0]], #4]
+; THUMB: ldrb [[REG3:r[0-9]+]], [[[REG0]], #17]
+; THUMB: strb [[REG3]], [[[REG0]], #5]
+; THUMB: ldrb [[REG4:r[0-9]+]], [[[REG0]], #18]
+; THUMB: strb [[REG4]], [[[REG0]], #6]
+; THUMB: ldrb [[REG5:r[0-9]+]], [[[REG0]], #19]
+; THUMB: strb [[REG5]], [[[REG0]], #7]
+; THUMB: ldrb [[REG6:r[0-9]+]], [[[REG0]], #20]
+; THUMB: strb [[REG6]], [[[REG0]], #8]
+; THUMB: ldrb [[REG7:r[0-9]+]], [[[REG0]], #21]
+; THUMB: strb [[REG7]], [[[REG0]], #9]
+; THUMB: ldrb [[REG8:r[0-9]+]], [[[REG0]], #22]
+; THUMB: strb [[REG8]], [[[REG0]], #10]
+; THUMB: ldrb [[REG9:r[0-9]+]], [[[REG0]], #23]
+; THUMB: strb [[REG9]], [[[REG0]], #11]
+; THUMB: ldrb [[REG10:r[0-9]+]], [[[REG0]], #24]
+; THUMB: strb [[REG10]], [[[REG0]], #12]
+; THUMB: ldrb [[REG11:r[0-9]+]], [[[REG0]], #25]
+; THUMB: strb [[REG11]], [[[REG0]], #13]
; THUMB: bx lr
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
ret void
%add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
store i32 0, i32* %add.ptr, align 4
; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-4]
+; THUMB: str r{{[0-9]}}, [[[REG]], #-4]
ret void
}
%add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
store i32 0, i32* %add.ptr, align 4
; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-252]
+; THUMB: str r{{[0-9]}}, [[[REG]], #-252]
ret void
}
; THUMB: movw [[REG:r[0-9]+]], #65280
; THUMB: movt [[REG]], #65535
; THUMB: add [[PTR]], [[REG]]
-; THUMB: str [[VAL]], {{\[}}[[PTR]]]
+; THUMB: str [[VAL]], [[[PTR]]]
ret void
}
%add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
store i16 0, i16* %add.ptr, align 2
; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-2]
+; THUMB: strh r{{[0-9]}}, [[[REG]], #-2]
ret void
}
%add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
store i16 0, i16* %add.ptr, align 2
; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-254]
+; THUMB: strh r{{[0-9]}}, [[[REG]], #-254]
ret void
}
; THUMB: movw [[REG:r[0-9]+]], #65280
; THUMB: movt [[REG]], #65535
; THUMB: add [[PTR]], [[REG]]
-; THUMB: strh [[VAL]], {{\[}}[[PTR]]]
+; THUMB: strh [[VAL]], [[[PTR]]]
ret void
}
%add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
store i8 0, i8* %add.ptr, align 1
; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-1]
+; THUMB: strb r{{[0-9]}}, [[[REG]], #-1]
ret void
}
%add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
store i8 0, i8* %add.ptr, align 1
; THUMB: mov [[REG:r[0-9]+]], r0
-; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-255]
+; THUMB: strb r{{[0-9]}}, [[[REG]], #-255]
ret void
}
; THUMB: movw [[REG:r[0-9]+]], #65280
; THUMB: movt [[REG]], #65535
; THUMB: add [[PTR]], [[REG]]
-; THUMB: strb [[VAL]], {{\[}}[[PTR]]]
+; THUMB: strb [[VAL]], [[[PTR]]]
ret void
}
; ARM: movw [[REG1:r[0-9]+]], #0
; ARM: mvn [[REG2:r[0-9]+]], #255
; ARM: add [[REG0:r[0-9]+]], r1, [[REG2]]
-; ARM: strh [[REG1]], {{\[}}[[REG0]]]
+; ARM: strh [[REG1]], [[[REG0]]]
ret void
}
; ARM: mov r1, r0
; ARM: movw [[REG1:r[0-9]+]], #0
; ARM: add [[REG0:r[0-9]+]], r1, #256
-; ARM: strh [[REG1]], {{\[}}[[REG0]]]
+; ARM: strh [[REG1]], [[[REG0]]]
ret void
}
; ARM: VarArg
; ARM: mov [[FP:r[0-9]+]], sp
; ARM: sub sp, sp, #32
-; ARM: ldr r1, {{\[}}[[FP]], #-4]
-; ARM: ldr r2, {{\[}}[[FP]], #-8]
-; ARM: ldr r3, {{\[}}[[FP]], #-12]
+; ARM: ldr r1, [[[FP]], #-4]
+; ARM: ldr r2, [[[FP]], #-8]
+; ARM: ldr r3, [[[FP]], #-12]
; ARM: ldr [[Ra:r[0-9]+|lr]], [sp, #16]
; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #12]
; ARM: movw r0, #5
; THUMB: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}}
; THUMB: ldr [[REG:r[0-9]+]], [r0]
-; THUMB: ldr [[REG1:r[0-9]+]], {{\[}}[[REG]]]
+; THUMB: ldr [[REG1:r[0-9]+]], [[[REG]]]
; THUMB: adds [[REG1]], #1
; THUMB: {{(movw r1, :lower16:L_test4g\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
; THUMB: {{(movt r1, :upper16:L_test4g\$non_lazy_ptr)?}}
; THUMB: ldr [[REG2:r[0-9]+]], [r1]
-; THUMB: str [[REG1]], {{\[}}[[REG2]]]
+; THUMB: str [[REG1]], [[[REG2]]]
; ARM-MACHO: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-MACHO: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}}
; ARM-ELF: movw [[REG:r[0-9]+]], :lower16:test4g
; ARM-ELF: movt [[REG]], :upper16:test4g
-; ARM: ldr [[REG1:r[0-9]+]], {{\[}}[[REG]]]
+; ARM: ldr [[REG1:r[0-9]+]], [[[REG]]]
; ARM: add [[REG2:r[0-9]+]], [[REG1]], #1
; ARM-MACHO: {{(movw r1, :lower16:L_test4g\$non_lazy_ptr)|(ldr r0, .LCPI)}}
; ARM-ELF: movw [[REG3:r[0-9]+]], :lower16:test4g
; ARM-ELF: movt [[REG3]], :upper16:test4g
-; ARM: str [[REG2]], {{\[}}[[REG3]]]
+; ARM: str [[REG2]], [[[REG3]]]
}
; ARM: @urem_fold
entry:
; CHECK-LABEL: load_256:
; CHECK: add [[ADDR:r[0-9]+]], r0, #512
-; CHECK: vldr.16 {{s[0-9]+}}, {{\[}}[[ADDR]]{{\]}}
+; CHECK: vldr.16 {{s[0-9]+}}, [[[ADDR]]]
%arrayidx = getelementptr inbounds half, half* %in, i32 256
%load = load half, half* %arrayidx, align 2
store half %load, half* %out
entry:
; CHECK-LABEL: load_neg_256:
; CHECK: sub [[ADDR:r[0-9]+]], r0, #512
-; CHECK: vldr.16 {{s[0-9]+}}, {{\[}}[[ADDR]]{{\]}}
+; CHECK: vldr.16 {{s[0-9]+}}, [[[ADDR]]]
%arrayidx = getelementptr inbounds half, half* %in, i32 -256
%load = load half, half* %arrayidx, align 2
store half %load, half* %out
; CHECK-LABEL: store_256:
%load = load half, half* %in, align 2
; CHECK: add [[ADDR:r[0-9]+]], r1, #512
-; CHECK: vstr.16 {{s[0-9]+}}, {{\[}}[[ADDR]]{{\]}}
+; CHECK: vstr.16 {{s[0-9]+}}, [[[ADDR]]]
%arrayidx = getelementptr inbounds half, half* %out, i32 256
store half %load, half* %arrayidx
ret void
; CHECK-LABEL: store_neg_256:
%load = load half, half* %in, align 2
; CHECK: sub [[ADDR:r[0-9]+]], r1, #512
-; CHECK: vstr.16 {{s[0-9]+}}, {{\[}}[[ADDR]]{{\]}}
+; CHECK: vstr.16 {{s[0-9]+}}, [[[ADDR]]]
%arrayidx = getelementptr inbounds half, half* %out, i32 -256
store half %load, half* %arrayidx
ret void
; CHECK-LABEL: test:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #4]
-; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #4]
-; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], [[[ADDR0]], #4]
+; CHECK-ARMV4T-NEXT: str [[R0]], [[[ADDR1]], #4]
+; CHECK-ARMV4T-NEXT: str [[R1]], [[[ADDR1]]]
%0 = load volatile i64, i64* @x, align 8
store volatile i64 %0, i64* @y, align 8
ret void
; CHECK-LABEL: test_offset:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #-4]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]], #-4]
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
-; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4]
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #-4]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], [[[ADDR1]], #-4]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
-; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]]
-; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #-4]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], [[[ADDR0]], #-4]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-ARMV4T-NEXT: str [[R1]], [[[ADDR1]]]
+; CHECK-ARMV4T-NEXT: str [[R0]], [[[ADDR1]], #-4]
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 -4) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 -4) to i64*), align 8
ret void
; CHECK-LABEL: test_offset_1:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #255]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #255]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #255]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]], #255]
; CHECK-T2: adds [[ADDR0:r[0-9]+]], #255
; CHECK-T2-NEXT: adds [[ADDR1:r[0-9]+]], #255
-; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #255]
-; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #259]
-; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #259]
-; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #255]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], [[[ADDR0]], #255]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], [[[ADDR0]], #259]
+; CHECK-ARMV4T-NEXT: str [[R1]], [[[ADDR1]], #259]
+; CHECK-ARMV4T-NEXT: str [[R0]], [[[ADDR1]], #255]
entry:
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 255) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 255) to i64*), align 8
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #256
; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #256
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #256]
-; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #256]
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #256]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], [[[ADDR1]], #256]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #256]
-; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #260]
-; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #260]
-; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #256]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], [[[ADDR0]], #256]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], [[[ADDR0]], #260]
+; CHECK-ARMV4T-NEXT: str [[R1]], [[[ADDR1]], #260]
+; CHECK-ARMV4T-NEXT: str [[R0]], [[[ADDR1]], #256]
entry:
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 256) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 256) to i64*), align 8
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1020
; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1020
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1020]
-; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #1020]
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #1020]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], [[[ADDR1]], #1020]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1020]
-; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1024]
-; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1024]
-; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1020]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], [[[ADDR0]], #1020]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], [[[ADDR0]], #1024]
+; CHECK-ARMV4T-NEXT: str [[R1]], [[[ADDR1]], #1024]
+; CHECK-ARMV4T-NEXT: str [[R0]], [[[ADDR1]], #1020]
entry:
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1020) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1020) to i64*), align 8
; CHECK-ARMV5TE: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1024
; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1024
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
; CHECK-T2: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: add.w [[ADDR0]], [[ADDR0]], #1024
; CHECK-T2-NEXT: add.w [[ADDR1]], [[ADDR1]], #1024
-; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
-; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
+; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
+; CHECK-T2-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1024]
-; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1028]
-; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1028]
-; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1024]
+; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], [[[ADDR0]], #1024]
+; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], [[[ADDR0]], #1028]
+; CHECK-ARMV4T-NEXT: str [[R1]], [[[ADDR1]], #1028]
+; CHECK-ARMV4T-NEXT: str [[R0]], [[[ADDR1]], #1024]
entry:
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1024) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1024) to i64*), align 8
; ARM: ldr [[R1:r[0-9]+]], LCPI
; ARM: add [[R_NEXTADDR_b:r[0-9]+]], pc, [[R_NEXTADDR]]
; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
-; ARM: str [[R1b]], {{\[}}[[R_NEXTADDR_b]]]
+; ARM: str [[R1b]], [[[R_NEXTADDR_b]]]
; THUMB-LABEL: %L1
; THUMB: ldr [[R2:r[0-9]+]], LCPI
; T1-LABEL: test_tbh:
; T1: lsls [[x:r[0-9]+]], r4, #1
; T1: add [[x]], pc
-; T1: ldrh [[x]], {{\[}}[[x]], #4]
+; T1: ldrh [[x]], [[[x]], #4]
; T1: lsls [[x]], [[x]], #1
; T1: [[ANCHOR:.LCPI[0-9_]+]]:
; T1: add pc, [[x]]
entry:
; A8: movw [[BASER:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}}
; A8: movt [[BASER]], :upper16:{{.*}}TestVar{{.*}}
-; A8: ldr [[BASE:r[0-9]+]], {{\[}}[[BASER]]]
-; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4]
+; A8: ldr [[BASE:r[0-9]+]], [[[BASER]]]
+; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], [[[BASE]], #4]
; A8-NEXT: add [[FIELD2]], [[FIELD1]]
-; A8-NEXT: str [[FIELD2]], {{\[}}[[BASE]]{{\]}}
+; A8-NEXT: str [[FIELD2]], [[[BASE]]]
; CONSERVATIVE-NOT: ldrd
%orig_blocks = alloca [256 x i16], align 2
%0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start.p0i8(i64 512, i8* %0) nounwind
; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,
; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,
-; Think of the monstrosity '{{\[}}[[LB]]]' as '[ [[LB]] ]' without the spaces.
-; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
-; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
+; Think of the monstrosity '[[[LB]]]' as '[ [[LB]] ]' without the spaces.
+; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, [[[LB]]]
+; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, [[[SB]]]
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 17, i1 false)
ret void
}
; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
; CHECKV6-NEXT: ldm{{(\.w)?}} [[LB]]!,
; CHECKV6-NEXT: stm{{(\.w)?}} [[SB]]!,
-; CHECKV6-DAG: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
-; CHECKV6-DAG: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
-; CHECKV6-DAG: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
-; CHECKV6-DAG: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
+; CHECKV6-DAG: ldrh{{(\.w)?}} {{.*}}, [[[LB]]]
+; CHECKV6-DAG: ldrb{{(\.w)?}} {{.*}}, [[[LB]], #2]
+; CHECKV6-DAG: strb{{(\.w)?}} {{.*}}, [[[SB]], #2]
+; CHECKV6-DAG: strh{{(\.w)?}} {{.*}}, [[[SB]]]
; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
-; CHECKV7: ldr{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #11]
-; CHECKV7-NEXT: str{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #11]
+; CHECKV7: ldr{{(\.w)?}} {{.*}}, [[[LB]], #11]
+; CHECKV7-NEXT: str{{(\.w)?}} {{.*}}, [[[SB]], #11]
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 15, i1 false)
ret void
}
;
; setjmp sequence:
; CHECK: add [[PCREG:r[0-9]+]], pc, #8
-; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: str [[PCREG]], [[[BUFREG:r[0-9]+]], #4]
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: add pc, pc, #0
; CHECK-NEXT: mov r0, #1
;
; longjmp sequence:
; CHECK: ldr sp, [{{\s*}}[[BUFREG:r[0-9]+]], #8]
-; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
-; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], [[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, [[[BUFREG]]]
; CHECK-NEXT: bx [[DESTREG]]
; CHECK-LINUX: ldr sp, [{{\s*}}[[BUFREG:r[0-9]+]], #8]
-; CHECK-LINUX-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
-; CHECK-LINUX-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
-; CHECK-LINUX-NEXT: ldr r11, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-LINUX-NEXT: ldr [[DESTREG:r[0-9]+]], [[[BUFREG]], #4]
+; CHECK-LINUX-NEXT: ldr r7, [[[BUFREG]]]
+; CHECK-LINUX-NEXT: ldr r11, [[[BUFREG]]]
; CHECK-LINUX-NEXT: bx [[DESTREG]]
; CHECK-WIN32: ldr.w r11, [{{\s*}}[[BUFREG:r[0-9]+]]]
-; CHECK-WIN32-NEXT: ldr.w sp, {{\[}}[[BUFREG]], #8]
-; CHECK-WIN32-NEXT: ldr.w pc, {{\[}}[[BUFREG]], #4]
+; CHECK-WIN32-NEXT: ldr.w sp, [[[BUFREG]], #8]
+; CHECK-WIN32-NEXT: ldr.w pc, [[[BUFREG]], #4]
define void @foobar() {
entry:
%buf = alloca [5 x i8*], align 4
;
; setjmp sequence:
; CHECK: add [[PCREG:r[0-9]+]], pc, #8
-; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: str [[PCREG]], [[[BUFREG:r[0-9]+]], #4]
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: add pc, pc, #0
; CHECK-NEXT: mov r0, #1
;
; longjmp sequence:
; CHECK: ldr sp, [{{\s*}}[[BUFREG:r[0-9]+]], #8]
-; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
-; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], [[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, [[[BUFREG]]]
; CHECK-NEXT: bx [[DESTREG]]
define void @combine_sjlj_eh_and_setjmp_longjmp() personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) {
entry:
!2 = !{i32 2, !"stack-protector-guard-offset", i32 4296}
; CHECK: mrc p15, #0, [[REG1:r[0-9]+]], c13, c0, #3
-; CHECK-SMALL-NEXT: ldr{{(\.w)?}} [[REG1]], {{\[}}[[REG1]], #1296]
+; CHECK-SMALL-NEXT: ldr{{(\.w)?}} [[REG1]], [[[REG1]], #1296]
; CHECK-LARGE-NEXT: add{{(\.w)?}} [[REG1]], [[REG1]], #4096
-; CHECK-LARGE-NEXT: ldr{{(\.w)?}} [[REG1]], {{\[}}[[REG1]], #200]
+; CHECK-LARGE-NEXT: ldr{{(\.w)?}} [[REG1]], [[[REG1]], #200]
; CHECK: bl baz
; CHECK: mrc p15, #0, [[REG2:r[0-9]+]], c13, c0, #3
-; CHECK-SMALL-NEXT: ldr{{(\.w)?}} [[REG2]], {{\[}}[[REG2]], #1296]
+; CHECK-SMALL-NEXT: ldr{{(\.w)?}} [[REG2]], [[[REG2]], #1296]
; CHECK-LARGE-NEXT: add{{(\.w)?}} [[REG2]], [[REG2]], #4096
-; CHECK-LARGE-NEXT: ldr{{(\.w)?}} [[REG2]], {{\[}}[[REG2]], #200]
+; CHECK-LARGE-NEXT: ldr{{(\.w)?}} [[REG2]], [[[REG2]], #200]
;PIC: ldr [[R0:r[0-9]+]], [[LABEL0:LCPI[0-9_]+]]
;PIC: [[LABEL1:LPC0_1]]:
;PIC: add [[R1:r[0-9]+]], pc, [[R0]]
-;PIC: ldr [[R2:r[0-9]+]], {{\[}}[[R1]]{{\]}}
-;PIC: ldr {{r[0-9]+}}, {{\[}}[[R2]]{{\]}}
+;PIC: ldr [[R2:r[0-9]+]], [[[R1]]]
+;PIC: ldr {{r[0-9]+}}, [[[R2]]]
;PIC: [[LABEL0]]:
;PIC-NEXT: .long L___stack_chk_guard$non_lazy_ptr-([[LABEL1]]+8)
;NO-PIC: foo2
;NO-PIC: ldr [[R0:r[0-9]+]], [[LABEL0:LCPI[0-9_]+]]
;NO-PIC-NOT: LPC
-;NO-PIC: ldr {{r[0-9]+}}, {{\[}}[[R0]]{{\]}}
+;NO-PIC: ldr {{r[0-9]+}}, [[[R0]]]
;STATIC: [[LABEL0]]:
;STATIC-NEXT: .long ___stack_chk_guard
;PIC-V7: movw [[R0:r[0-9]+]], :lower16:(L___stack_chk_guard$non_lazy_ptr-([[LABEL0:LPC[0-9_]+]]+8))
;PIC-V7: movt [[R0]], :upper16:(L___stack_chk_guard$non_lazy_ptr-([[LABEL0]]+8))
;PIC-V7: [[LABEL0]]:
-;PIC-V7: ldr [[R0]], {{\[}}pc, [[R0]]{{\]}}
-;PIC-V7: ldr [[R0]], {{\[}}[[R0]]{{\]}}
+;PIC-V7: ldr [[R0]], [pc, [[R0]]]
+;PIC-V7: ldr [[R0]], [[[R0]]]
;PIC-V7: L___stack_chk_guard$non_lazy_ptr:
;PIC-V7: .indirect_symbol ___stack_chk_guard
;STATIC-V7: movw [[R0:r[0-9]+]], :lower16:___stack_chk_guard
;STATIC-V7: movt [[R0]], :upper16:___stack_chk_guard
-;STATIC-V7: ldr [[R0]], {{\[}}[[R0]]{{\]}}
+;STATIC-V7: ldr [[R0]], [[[R0]]]
;DYNAMIC-NO-PIC-V7: movw [[R0:r[0-9]+]], :lower16:L___stack_chk_guard$non_lazy_ptr
;DYNAMIC-NO-PIC-V7: movt [[R0]], :upper16:L___stack_chk_guard$non_lazy_ptr
-;DYNAMIC-NO-PIC-V7: ldr [[R0]], {{\[}}[[R0]]{{\]}}
-;DYNAMIC-NO-PIC-V7: ldr [[R0]], {{\[}}[[R0]]{{\]}}
+;DYNAMIC-NO-PIC-V7: ldr [[R0]], [[[R0]]]
+;DYNAMIC-NO-PIC-V7: ldr [[R0]], [[[R0]]]
;DYNAMIC-NO-PIC-V7: L___stack_chk_guard$non_lazy_ptr:
;DYNAMIC-NO-PIC-V7: .indirect_symbol ___stack_chk_guard
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;T1POST-NOT: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: ldrh r{{[0-9]+}}, [{{.*}}], #2
entry:
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: ldr r{{[0-9]+}}, [{{.*}}], #4
entry:
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
entry:
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
entry:
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: ldrh r{{[0-9]+}}, [{{.*}}], #2
entry:
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;THUMB1: bne
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: bne
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;THUMB1: bne
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: bne
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: bne
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: bne
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;THUMB1: bne
;NO_NEON: bne
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;THUMB1: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: ldrh r{{[0-9]+}}, [{{.*}}], #2
entry:
;NO_NEON: bne
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: ldr r{{[0-9]+}}, [{{.*}}], #4
entry:
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
entry:
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
entry:
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;THUMB1: bne
;NO_NEON: bne
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;THUMB1: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;T1POST-NOT: ldrh r{{[0-9]+}}, [{{.*}}], #2
entry:
;NO_NEON: bne
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;T1POST-NOT: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;T1POST-NOT: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;T1POST-NOT: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;T1POST-NOT: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;T1POST-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;THUMB1: bne
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: bne
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;THUMB1: bne
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: bne
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: bne
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: bne
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;THUMB1: bne
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: bne
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;THUMB1: bne
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: bne
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: bne
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: bne
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: ldrb r{{[0-9]+}}, [{{.*}}], #1
;NO_NEON: bne
-;THUMB1: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #1
;THUMB1: bne
;NO_NEON: ldrh r{{[0-9]+}}, [{{.*}}], #2
;NO_NEON: bne
-;THUMB1: ldrh r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldrh r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #2
;THUMB1: bne
;NO_NEON: ldr r{{[0-9]+}}, [{{.*}}], #4
;NO_NEON: bne
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: bne
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;NO_NEON: bne
;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [{{.*}}]!
-;THUMB1: ldr r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1: ldr r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;THUMB1: adds [[BASE]], #4
;THUMB1: bne
;V8MBASE-LABEL: <test_M>:
define void @test_M() {
-;V8MBASE: ldrb r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;V8MBASE: ldrb r{{[0-9]+}}, [[[BASE:r[0-9]+]]]
;V8MBASE: adds [[BASE]], #1
;V8MBASE-NOT: movw
entry:
; OPT-DAG: mov [[CSREG:r[1-9].*]], r0
; OPT-DAG: ldr r10, [r10]
; OPT: bl {{_?}}thisreturn_attribute
-; OPT: str r0, {{\[}}[[CSREG]]
+; OPT: str r0, [[[CSREG]]
define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret(i8**), i8** noalias nocapture readonly swiftself) {
entry:
%2 = load i8*, i8** %1, align 8
; CHECK-LABEL: f:
; CHECK: movw [[ADDR:(r[0-9]+|lr)]], #
; CHECK-NEXT: add [[ADDR]], sp
-; CHECK-NEXT: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, {{\[}}[[ADDR]]:128]
+; CHECK-NEXT: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [[[ADDR]]:128]
define <4 x float> @f(<4 x float> %x) {
entry:
%.compoundliteral7837 = alloca <4 x float>, align 16
ret void
; CHECK: ldr [[TMP:r[0-3]]], [[F:\.[A-Z0-9_]+]]
-; CHECK: ldr [[CALLEE:r[0-3]]], {{\[}}[[TMP]]{{\]}}
+; CHECK: ldr [[CALLEE:r[0-3]]], [[[TMP]]]
; CHECK-V4T-NOT: blx
; CHECK-V4T: bl [[INDIRECT_PAD:\.Ltmp[0-9]+]]
; CHECK-LABEL: test_truncate:
; CHECK: vmov.32 [[REG:d[0-9]+]][0], r0
; CHECK-NEXT: mov [[BASE:r[0-9]+]], sp
-; CHECK-NEXT: vld1.32 {[[REG]][1]}, {{\[}}[[BASE]]:32]
+; CHECK-NEXT: vld1.32 {[[REG]][1]}, [[[BASE]]:32]
; CHECK-NEXT: vmov r0, r1, [[REG]]
entry:
%res = trunc <2 x i128> %in to <2 x i8>
;Check for a post-increment updating load.
define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
;CHECK-LABEL: vld3Qi32_update:
-;CHECK: vld3.32 {d16, d18, d20}, {{\[}}[[R:r[0-9]+|lr]]]!
-;CHECK: vld3.32 {d17, d19, d21}, {{\[}}[[R]]]!
+;CHECK: vld3.32 {d16, d18, d20}, [[[R:r[0-9]+|lr]]]!
+;CHECK: vld3.32 {d17, d19, d21}, [[[R]]]!
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1)
; MINGW-LABEL: func:
; MINGW: movw [[REG:r[0-9]+]], :lower16:.refptr.__stack_chk_guard
; MINGW: movt [[REG]], :upper16:.refptr.__stack_chk_guard
-; MINGW: ldr [[REG2:r[0-9]+]], {{\[}}[[REG]]]
-; MINGW: ldr {{r[0-9]+}}, {{\[}}[[REG2]]]
+; MINGW: ldr [[REG2:r[0-9]+]], [[[REG]]]
+; MINGW: ldr {{r[0-9]+}}, [[[REG2]]]
; MINGW: bl other
; MINGW: movw [[REG3:r[0-9]+]], :lower16:.refptr.__stack_chk_guard
; MINGW: movt [[REG3]], :upper16:.refptr.__stack_chk_guard
-; MINGW: ldr [[REG4:r[0-9]+]], {{\[}}[[REG3]]]
-; MINGW: ldr {{r[0-9]+}}, {{\[}}[[REG4]]]
+; MINGW: ldr [[REG4:r[0-9]+]], [[[REG3]]]
+; MINGW: ldr {{r[0-9]+}}, [[[REG4]]]
; MINGW: bl __stack_chk_fail
%c = alloca i8, align 1
;PIC: foo2
;PIC: ldr [[SAVED_GUARD:r[0-9]+]], [[GUARD_STACK_OFFSET:LCPI[0-9_]+]]
;PIC-NEXT: add [[SAVED_GUARD]], sp
-;PIC-NEXT: ldr [[SAVED_GUARD]], {{\[}}[[SAVED_GUARD]]{{\]}}
+;PIC-NEXT: ldr [[SAVED_GUARD]], [[[SAVED_GUARD]]]
;PIC-NEXT: ldr [[ORIGINAL_GUARD:r[0-9]+]], [[ORIGINAL_GUARD_LABEL:LCPI[0-9_]+]]
;PIC-NEXT: [[LABEL1:LPC[0-9_]+]]:
;PIC-NEXT: add [[ORIGINAL_GUARD]], pc
-;PIC-NEXT: ldr [[ORIGINAL_GUARD]], {{\[}}[[ORIGINAL_GUARD]]{{\]}}
-;PIC-NEXT: ldr [[ORIGINAL_GUARD]], {{\[}}[[ORIGINAL_GUARD]]{{\]}}
+;PIC-NEXT: ldr [[ORIGINAL_GUARD]], [[[ORIGINAL_GUARD]]]
+;PIC-NEXT: ldr [[ORIGINAL_GUARD]], [[[ORIGINAL_GUARD]]]
;PIC-NEXT: cmp [[ORIGINAL_GUARD]], [[SAVED_GUARD]]
;PIC: [[GUARD_STACK_OFFSET]]:
;NO-PIC: foo2
;NO-PIC: ldr [[SAVED_GUARD:r[0-9]+]], [[GUARD_STACK_OFFSET:LCPI[0-9_]+]]
;NO-PIC-NEXT: add [[SAVED_GUARD]], sp
-;NO-PIC-NEXT: ldr [[SAVED_GUARD]], {{\[}}[[SAVED_GUARD]]{{\]}}
+;NO-PIC-NEXT: ldr [[SAVED_GUARD]], [[[SAVED_GUARD]]]
;NO-PIC-NEXT: ldr [[ORIGINAL_GUARD:r[0-9]+]], [[ORIGINAL_GUARD_LABEL:LCPI[0-9_]+]]
;NO-PIC-NOT: LPC
-;NO-PIC-NEXT: ldr [[ORIGINAL_GUARD]], {{\[}}[[ORIGINAL_GUARD]]{{\]}}
-;DYNAMIC-NO-PIC-NEXT: ldr [[ORIGINAL_GUARD]], {{\[}}[[ORIGINAL_GUARD]]{{\]}}
+;NO-PIC-NEXT: ldr [[ORIGINAL_GUARD]], [[[ORIGINAL_GUARD]]]
+;DYNAMIC-NO-PIC-NEXT: ldr [[ORIGINAL_GUARD]], [[[ORIGINAL_GUARD]]]
;NO-PIC-NEXT: cmp [[ORIGINAL_GUARD]], [[SAVED_GUARD]]
;STATIC: [[GUARD_STACK_OFFSET]]:
; CHECK: Perl_ck_sort
; CHECK: ldr
; CHECK: mov [[REGISTER:(r[0-9]+)|(lr)]]
-; CHECK: str {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24
+; CHECK: str {{(r[0-9])|(lr)}}, [[[REGISTER]]], #24
define void @Perl_ck_sort() nounwind optsize {
entry:
;PIC: movt [[R0]], :upper16:(L___stack_chk_guard$non_lazy_ptr-([[LABEL0]]+4))
;PIC: [[LABEL0]]:
;PIC: add [[R0]], pc
-;PIC: ldr [[R1:r[0-9]+]], {{\[}}[[R0]]{{\]}}
-;PIC: ldr {{r[0-9]+}}, {{\[}}[[R1]]{{\]}}
+;PIC: ldr [[R1:r[0-9]+]], [[[R0]]]
+;PIC: ldr {{r[0-9]+}}, [[[R1]]]
;STATIC: foo2
;STATIC: movw [[R0:r[0-9]+]], :lower16:___stack_chk_guard
;STATIC: movt [[R0]], :upper16:___stack_chk_guard
-;STATIC: ldr {{r[0-9]+}}, {{\[}}[[R0]]{{\]}}
+;STATIC: ldr {{r[0-9]+}}, [[[R0]]]
;DYNAMIC-NO-PIC: foo2
;DYNAMIC-NO-PIC: movw [[R0:r[0-9]+]], :lower16:L___stack_chk_guard$non_lazy_ptr
;DYNAMIC-NO-PIC: movt [[R0]], :upper16:L___stack_chk_guard$non_lazy_ptr
-;DYNAMIC-NO-PIC: ldr {{r[0-9]+}}, {{\[}}[[R0]]{{\]}}
+;DYNAMIC-NO-PIC: ldr {{r[0-9]+}}, [[[R0]]]
; Function Attrs: nounwind ssp
define i32 @test_stack_guard_remat() #0 {
; CHECKFP-NEXT: stw r10, sp[1]
; CHECKFP-NEXT: ldaw r10, sp[0]
; CHECKFP-NEXT: mkmsk [[REG:r[0-9]+]], 8
-; CHECKFP-NEXT: ldaw r0, r10{{\[}}[[REG]]{{\]}}
+; CHECKFP-NEXT: ldaw r0, r10[[[REG]]]
; CHECKFP-NEXT: extsp 1
; CHECKFP-NEXT: bl f5
; CHECKFP-NEXT: ldaw sp, sp[1]
; CHECKFP-NEXT: stw r10, sp[1]
; CHECKFP-NEXT: ldaw r10, sp[0]
; CHECKFP-NEXT: ldc [[REG:r[0-9]+]], 32767
-; CHECKFP-NEXT: ldaw r0, r10{{\[}}[[REG]]{{\]}}
+; CHECKFP-NEXT: ldaw r0, r10[[[REG]]]
; CHECKFP-NEXT: extsp 1
; CHECKFP-NEXT: bl f5
; CHECKFP-NEXT: ldaw sp, sp[1]
; CHECK: ldaw r11, sp[0]
; scavenge r4 using SR spill slot
; CHECK: stw r4, sp[1]
-; CHECK: ldw r4, cp{{\[}}[[ARG5]]{{\]}}
+; CHECK: ldw r4, cp[[[ARG5]]]
; r11 used to load 5th argument
; CHECK: ldw r11, r11[r4]
; CHECK: ldaw r4, sp[0]
; scavenge r5 using SR spill slot
; CHECK: stw r5, sp[0]
-; CHECK: ldw r5, cp{{\[}}[[INDEX0]]{{\]}}
+; CHECK: ldw r5, cp[[[INDEX0]]]
; r4 & r5 used by InsertSPConstInst() to emit STW_l3r instruction.
; CHECK: stw r0, r4[r5]
; CHECK: ldaw r0, sp[0]
-; CHECK: ldw r5, cp{{\[}}[[INDEX1]]{{\]}}
+; CHECK: ldw r5, cp[[[INDEX1]]]
; CHECK: stw r1, r0[r5]
; CHECK: ldaw r0, sp[0]
-; CHECK: ldw r1, cp{{\[}}[[INDEX2]]{{\]}}
+; CHECK: ldw r1, cp[[[INDEX2]]]
; CHECK: stw r2, r0[r1]
; CHECK: ldaw r0, sp[0]
-; CHECK: ldw r1, cp{{\[}}[[INDEX3]]{{\]}}
+; CHECK: ldw r1, cp[[[INDEX3]]]
; CHECK: stw r3, r0[r1]
; CHECK: ldaw r0, sp[0]
-; CHECK: ldw r1, cp{{\[}}[[INDEX4]]{{\]}}
+; CHECK: ldw r1, cp[[[INDEX4]]]
; CHECK: stw r11, r0[r1]
; CHECK: ldaw sp, sp[65535]
; CHECK: ldw r4, sp[1]
; CHECK-LABEL: _Z1fz:
; CHECK: extsp 3
; CHECK: stw r[[REG:[0-3]{1,1}]]
-; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: , sp[[[REG]]]
; CHECK: stw r[[REG:[0-3]{1,1}]]
-; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: , sp[[[REG]]]
; CHECK: stw r[[REG:[0-3]{1,1}]]
-; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: , sp[[[REG]]]
; CHECK: stw r[[REG:[0-3]{1,1}]]
-; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: , sp[[[REG]]]
; CHECK: ldaw sp, sp[3]
; CHECK: retsp 0
ret void
ret void
}
; PTX-LABEL: sum_of_array(
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
; IR-LABEL: @sum_of_array(
; TODO: GVN is unable to preserve the "inbounds" keyword on the first GEP. Need
ret void
}
; PTX-LABEL: sum_of_array2(
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
; IR-LABEL: @sum_of_array2(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
ret void
}
; PTX-LABEL: sum_of_array3(
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
; IR-LABEL: @sum_of_array3(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
ret void
}
; PTX-LABEL: sum_of_array4(
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
-; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
; IR-LABEL: @sum_of_array4(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
%0 = sext i32 %xy to i64
%p0 = getelementptr inbounds float, float* %input, i64 %0
%v0 = load float, float* %p0, align 4
-; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0:%rd[0-9]+]]{{\]}}
+; PTX: ld.f32 %f{{[0-9]+}}, [[[p0:%rd[0-9]+]]]
call void @use(float %v0)
%y5 = add nsw i32 %y, 5
%p1 = getelementptr inbounds float, float* %input, i64 %1
; IR: getelementptr inbounds float, float* %p0, i64 5
%v1 = load float, float* %p1, align 4
-; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0]]+20{{\]}}
+; PTX: ld.f32 %f{{[0-9]+}}, [[[p0]]+20]
call void @use(float %v1)
ret void
; PTX: mul.wide.s32 [[i4:%rd[0-9]+]], [[i]], 4;
; PTX: add.s64 [[base1:%rd[0-9]+]], [[arr]], [[i4]];
%v1 = load float, float* %p1, align 4
-; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base1]]+20];
+; PTX: ld.f32 {{%f[0-9]+}}, [[[base1]]+20];
call void @foo(float %v1)
%j2 = add nsw i32 %i2, 5
; CHECK: [[b2:%[0-9]+]] = getelementptr float, float* [[b1]], i64 [[bump]]
; PTX: add.s64 [[base2:%rd[0-9]+]], [[base1]], [[i4]];
%v2 = load float, float* %p2, align 4
-; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base2]]+20];
+; PTX: ld.f32 {{%f[0-9]+}}, [[[base2]]+20];
call void @foo(float %v2)
%j3 = add nsw i32 %i3, 5
; CHECK: [[b3:%[0-9]+]] = getelementptr float, float* [[b2]], i64 [[bump]]
; PTX: add.s64 [[base3:%rd[0-9]+]], [[base2]], [[i4]];
%v3 = load float, float* %p3, align 4
-; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base3]]+20];
+; PTX: ld.f32 {{%f[0-9]+}}, [[[base3]]+20];
call void @foo(float %v3)
%j4 = add nsw i32 %i4, 5
; CHECK: [[b4:%[0-9]+]] = getelementptr float, float* [[b3]], i64 [[bump]]
; PTX: add.s64 [[base4:%rd[0-9]+]], [[base3]], [[i4]];
%v4 = load float, float* %p4, align 4
-; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base4]]+20];
+; PTX: ld.f32 {{%f[0-9]+}}, [[[base4]]+20];
call void @foo(float %v4)
ret void