--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4"
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [1024 x float] zeroinitializer, align 128
+@B = global [1024 x i8] zeroinitializer, align 128
+
+define void @test() {
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+
+ %iv.0 = add nuw nsw i64 %iv, 0
+ %iv.1 = add nuw nsw i64 %iv, 1
+
+ %in0 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.0
+ %in1 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.1
+
+ %v0 = load float, float* %in0
+ %v1 = load float, float* %in1
+
+ %reduce.add.0 = fadd float %v0, %v1
+
+ %reduce.add.0.narrow = fptoui float %reduce.add.0 to i8
+
+ %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
+ store i8 %reduce.add.0.narrow, i8* %out
+
+ %iv.next = add nuw nsw i64 %iv.0, 2
+ %cmp = icmp ult i64 %iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4"
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [1024 x float] zeroinitializer, align 128
+@B = global [1024 x i8] zeroinitializer, align 128
+
+define void @test() {
+; CHECK-LABEL: 'test'
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, float* %in0, align 4
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+
+ %iv.0 = add nuw nsw i64 %iv, 0
+ %iv.1 = add nuw nsw i64 %iv, 1
+
+ %in0 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.0
+ %in1 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.1
+
+ %v0 = load float, float* %in0
+ %v1 = load float, float* %in1
+
+ %reduce.add.0 = fadd float %v0, %v1
+
+ %reduce.add.0.narrow = fptoui float %reduce.add.0 to i8
+
+ %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
+ store i8 %reduce.add.0.narrow, i8* %out
+
+ %iv.next = add nuw nsw i64 %iv.0, 2
+ %cmp = icmp ult i64 %iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+}
--- /dev/null
+# REQUIRES: x86-registered-target
+
+## Check that --filter works properly.
+# RUN: cp -f %S/Inputs/x86-loopvectorize-costmodel.ll %t.ll && %update_analyze_test_checks --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4" %t.ll
+# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected
+
+## Check that running the script again does not change the result:
+# RUN: %update_analyze_test_checks --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4" %t.ll
+# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected
+
+## Check that running the script again, without arguments, does not change the result:
+# RUN: %update_analyze_test_checks %t.ll
+# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected
r'\s*\n(?P<body>.*)$',
flags=(re.X | re.S))
+LV_DEBUG_RE = re.compile(
+ r'^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*'
+ r'\s*\n(?P<body>.*)$',
+ flags=(re.X | re.S))
+
IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
raw_tool_outputs = common.invoke_tool(ti.args.opt_binary, opt_args, ti.path)
- # Split analysis outputs by "Printing analysis " declarations.
- for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
- builder.process_run_line(common.ANALYZE_FUNCTION_RE, common.scrub_body,
- raw_tool_output, prefixes, False)
+ if re.search(r'Printing analysis ', raw_tool_outputs) is not None:
+ # Split analysis outputs by "Printing analysis " declarations.
+ for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
+ builder.process_run_line(common.ANALYZE_FUNCTION_RE, common.scrub_body,
+ raw_tool_output, prefixes, False)
+ elif re.search(r'LV: Checking a loop in ', raw_tool_outputs) is not None:
+ # Split analysis outputs by "Printing analysis " declarations.
+ for raw_tool_output in re.split(r'LV: Checking a loop in ', raw_tool_outputs):
+ builder.process_run_line(common.LV_DEBUG_RE, common.scrub_body,
+ raw_tool_output, prefixes, False)
+ else:
+ common.warn('Don\'t know how to deal with this output')
+ continue
func_dict = builder.finish_and_get_func_dict()
is_in_function = False