ret i8 %r
}
-; Negative test - extracts are free and vector op has same cost as scalar.
+; Extracts are free and vector op has same cost as scalar, but we
+; speculatively transform to vector to create more optimization
+; opportunities..
define double @ext0_ext0_fadd(<2 x double> %x, <2 x double> %y) {
; CHECK-LABEL: @ext0_ext0_fadd(
-; CHECK-NEXT: [[E0:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0
-; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 0
-; CHECK-NEXT: [[R:%.*]] = fadd double [[E0]], [[E1]]
-; CHECK-NEXT: ret double [[R]]
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
+; CHECK-NEXT: ret double [[TMP2]]
;
%e0 = extractelement <2 x double> %x, i32 0
%e1 = extractelement <2 x double> %y, i32 0
ret double %r
}
-; Negative test - disguised same vector operand; scalar code is cheaper than general case.
+; Disguised same vector operand; scalar code is not cheaper (with default
+; x86 target), so aggressively form vector binop.
define i32 @ext1_ext1_add_same_vec(<4 x i32> %x) {
; CHECK-LABEL: @ext1_ext1_add_same_vec(
-; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 1
-; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; CHECK-NEXT: [[R:%.*]] = add i32 [[E0]], [[E1]]
-; CHECK-NEXT: ret i32 [[R]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
+; CHECK-NEXT: ret i32 [[TMP2]]
;
%e0 = extractelement <4 x i32> %x, i32 1
%e1 = extractelement <4 x i32> %x, i32 1
ret i32 %r
}
-; Negative test - same vector operand; scalar code is cheaper than general case.
+; Functionally equivalent to above test; should transform as above.
define i32 @ext1_ext1_add_same_vec_cse(<4 x i32> %x) {
; CHECK-LABEL: @ext1_ext1_add_same_vec_cse(
-; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 1
-; CHECK-NEXT: [[R:%.*]] = add i32 [[E0]], [[E0]]
-; CHECK-NEXT: ret i32 [[R]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
+; CHECK-NEXT: ret i32 [[TMP2]]
;
%e0 = extractelement <4 x i32> %x, i32 1
%r = add i32 %e0, %e0
ret i8 %r
}
-; Negative test - vector code would not be cheaper.
+; Vector code costs the same as scalar, so aggressively form vector op.
define i8 @ext1_ext1_add_uses1(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: @ext1_ext1_add_uses1(
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: call void @use_i8(i8 [[E0]])
-; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
-; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]]
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
+; CHECK-NEXT: ret i8 [[TMP2]]
;
%e0 = extractelement <16 x i8> %x, i32 0
call void @use_i8(i8 %e0)
ret i8 %r
}
-; Negative test - vector code would not be cheaper.
+; Vector code costs the same as scalar, so aggressively form vector op.
define i8 @ext1_ext1_add_uses2(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: @ext1_ext1_add_uses2(
-; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
; CHECK-NEXT: call void @use_i8(i8 [[E1]])
-; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]]
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
+; CHECK-NEXT: ret i8 [[TMP2]]
;
%e0 = extractelement <16 x i8> %x, i32 0
%e1 = extractelement <16 x i8> %y, i32 0