From 3b09d279be164f3e9a16b18eced38ee75a00e6fe Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 22 Feb 2016 20:04:02 +0000 Subject: [PATCH] [WebAssembly] Teach address folding to fold bitwise-or nodes. LLVM converts adds into ors when it can prove that the operands don't share any non-zero bits. Teach address folding to recognize or instructions with constant operands with this property that can be folded into addresses as if they were adds. llvm-svn: 261562 --- .../Target/WebAssembly/WebAssemblyInstrMemory.td | 68 ++++++++++++++++++++++ llvm/test/CodeGen/WebAssembly/offset.ll | 21 +++++++ 2 files changed, 89 insertions(+) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 096d187..a72da44 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -28,6 +28,18 @@ def regPlusImm : PatFrag<(ops node:$addr, node:$off), (add node:$addr, node:$off), [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; +// Treat an 'or' node as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + + APInt KnownZero0, KnownOne0; + CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; +}]>; + // GlobalAddresses are conceptually unsigned values, so we can also fold them // into immediate values as long as their offsets are non-negative. def regPlusGA : PatFrag<(ops node:$addr, node:$off), @@ -76,6 +88,14 @@ def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))), (LOAD_F32 imm:$off, $addr, 0)>; def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))), (LOAD_F64 imm:$off, $addr, 0)>; +def : Pat<(i32 (load (or_is_add I32:$addr, imm:$off))), + (LOAD_I32 imm:$off, $addr, 0)>; +def : Pat<(i64 (load (or_is_add I32:$addr, imm:$off))), + (LOAD_I64 imm:$off, $addr, 0)>; +def : Pat<(f32 (load (or_is_add I32:$addr, imm:$off))), + (LOAD_F32 imm:$off, $addr, 0)>; +def : Pat<(f64 (load (or_is_add I32:$addr, imm:$off))), + (LOAD_F64 imm:$off, $addr, 0)>; def : Pat<(i32 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), (LOAD_I32 tglobaladdr:$off, $addr, 0)>; @@ -188,6 +208,26 @@ def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))), (LOAD32_S_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))), (LOAD32_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i32 (sextloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_S_I32 imm:$off, $addr, 0)>; +def : Pat<(i32 (zextloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_U_I32 imm:$off, $addr, 0)>; +def : Pat<(i32 (sextloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_S_I32 imm:$off, $addr, 0)>; +def : Pat<(i32 (zextloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_U_I32 imm:$off, $addr, 0)>; +def : Pat<(i64 (sextloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_S_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (zextloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (sextloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_S_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (zextloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (sextloadi32 (or_is_add I32:$addr, imm:$off))), + (LOAD32_S_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (zextloadi32 (or_is_add I32:$addr, imm:$off))), + (LOAD32_U_I64 imm:$off, $addr, 0)>; def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), (LOAD8_S_I32 tglobaladdr:$off, $addr, 0)>; @@ -332,6 +372,16 @@ def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))), (LOAD16_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))), (LOAD32_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i32 (extloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_U_I32 imm:$off, $addr, 0)>; +def : Pat<(i32 (extloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_U_I32 imm:$off, $addr, 0)>; +def : Pat<(i64 (extloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (extloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (extloadi32 (or_is_add I32:$addr, imm:$off))), + (LOAD32_U_I64 imm:$off, $addr, 0)>; def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>; @@ -433,6 +483,14 @@ def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)), (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>; def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)), (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>; +def : Pat<(store I32:$val, (or_is_add I32:$addr, imm:$off)), + (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>; +def : Pat<(store I64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>; +def : Pat<(store F32:$val, (or_is_add I32:$addr, imm:$off)), + (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>; +def : Pat<(store F64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>; def : Pat<(store I32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), (STORE_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>; @@ -528,6 +586,16 @@ def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)), (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)), (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (or_is_add I32:$addr, imm:$off)), + (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (or_is_add I32:$addr, imm:$off)), + (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll index 9cf04c0..59f26f3 100644 --- a/llvm/test/CodeGen/WebAssembly/offset.ll +++ b/llvm/test/CodeGen/WebAssembly/offset.ll @@ -125,6 +125,17 @@ define i64 @load_i64_with_unfolded_gep_offset(i64* %p) { ret i64 %t } +; CHECK-LABEL: load_i32_with_folded_or_offset: +; CHECK: i32.load8_s $push{{[0-9]+}}=, 2($pop{{[0-9]+}}){{$}} +define i32 @load_i32_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t1 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %t1 to i32 + ret i32 %conv +} + ; Same as above but with store. ; CHECK-LABEL: store_i32_with_folded_offset: @@ -245,6 +256,16 @@ define void @store_i64_with_unfolded_gep_offset(i64* %p) { ret void } +; CHECK-LABEL: store_i32_with_folded_or_offset: +; CHECK: i32.store8 $discard=, 2($pop{{[0-9]+}}), $pop{{[0-9]+}}{{$}} +define void @store_i32_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + store i8 0, i8* %arrayidx, align 1 + ret void +} + ; When loading from a fixed address, materialize a zero. ; CHECK-LABEL: load_i32_from_numeric_address -- 2.7.4