functionality, or simply have a lot to talk about), see the `NOTE` below
for adding a new subsection.
+* The ``readnone`` calls which are crossing suspend points in coroutines will
+ not be merged. Since ``readnone`` calls may access thread id and thread id
+ is not a constant in coroutines. This decision may cause unnecessary
+ performance regressions and we plan to fix it in later versions.
+
* ...
Update on required toolchains to build LLVM
}
}
}
- return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+ return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
+ // FIXME: Currently the calls which may access the thread id may
+ // be considered as not accessing the memory. But this is
+ // problematic for coroutines, since coroutines may resume in a
+ // different thread. So we disable the optimization here for the
+ // correctness. However, it may block many other correct
+ // optimizations. Revert this one when we detect the memory
+ // accessing kind more precisely.
+ !CI->getFunction()->isPresplitCoroutine();
}
return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
return false;
CallInst *CI = dyn_cast<CallInst>(Inst);
- if (!CI || !CI->onlyReadsMemory())
+ if (!CI || !CI->onlyReadsMemory() ||
+ // FIXME: Currently the calls which may access the thread id may
+ // be considered as not accessing the memory. But this is
+ // problematic for coroutines, since coroutines may resume in a
+ // different thread. So we disable the optimization here for the
+ // correctness. However, it may block many other correct
+ // optimizations. Revert this one when we detect the memory
+ // accessing kind more precisely.
+ CI->getFunction()->isPresplitCoroutine())
return false;
return true;
}
}
uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
- if (AA->doesNotAccessMemory(C)) {
+ if (AA->doesNotAccessMemory(C) &&
+ // FIXME: Currently the calls which may access the thread id may
+ // be considered as not accessing the memory. But this is
+ // problematic for coroutines, since coroutines may resume in a
+ // different thread. So we disable the optimization here for the
+ // correctness. However, it may block many other correct
+ // optimizations. Revert this one when we detect the memory
+ // accessing kind more precisely.
+ !C->getFunction()->isPresplitCoroutine()) {
Expression exp = createExpr(C);
uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
- } else if (MD && AA->onlyReadsMemory(C)) {
+ } else if (MD && AA->onlyReadsMemory(C) &&
+ // FIXME: Currently the calls which may access the thread id may
+ // be considered as not accessing the memory. But this is
+ // problematic for coroutines, since coroutines may resume in a
+ // different thread. So we disable the optimization here for the
+ // correctness. However, it may block many other correct
+ // optimizations. Revert this one when we detect the memory
+ // accessing kind more precisely.
+ !C->getFunction()->isPresplitCoroutine()) {
Expression exp = createExpr(C);
auto ValNum = assignExpNewValueNum(exp);
if (ValNum.second) {
return ExprResult::some(createVariableOrConstant(ReturnedValue));
}
}
+
+ // FIXME: Currently the calls which may access the thread id may
+ // be considered as not accessing the memory. But this is
+ // problematic for coroutines, since coroutines may resume in a
+ // different thread. So we disable the optimization here for the
+ // correctness. However, it may block many other correct
+ // optimizations. Revert this one when we detect the memory
+ // accessing kind more precisely.
+ if (CI->getFunction()->isPresplitCoroutine())
+ return ExprResult::none();
+
if (AA->doesNotAccessMemory(CI)) {
return ExprResult::some(
createCallExpression(CI, TOPClass->getMemoryLeader()));
--- /dev/null
+; Tests that the readnone function which don't cross suspend points could be optimized expectly after split.
+;
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,early-cse,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,gvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,newgvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+
+define ptr @f() presplitcoroutine {
+entry:
+ %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+ %size = call i32 @llvm.coro.size.i32()
+ %alloc = call ptr @malloc(i32 %size)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+ %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %sus_result, label %suspend [i8 0, label %resume
+ i8 1, label %cleanup]
+resume:
+ %i = call i32 @readnone_func() readnone
+ ; noop call to break optimization to combine two consecutive readonly calls.
+ call void @nop()
+ %j = call i32 @readnone_func() readnone
+ %cmp = icmp eq i32 %i, %j
+ br i1 %cmp, label %same, label %diff
+
+same:
+ call void @print_same()
+ br label %cleanup
+
+diff:
+ call void @print_diff()
+ br label %cleanup
+
+cleanup:
+ %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+ call void @free(ptr %mem)
+ br label %suspend
+
+suspend:
+ call i1 @llvm.coro.end(ptr %hdl, i1 0)
+ ret ptr %hdl
+}
+
+;
+; CHECK_SPLITTED-LABEL: f.resume(
+; CHECK_SPLITTED-NEXT: :
+; CHECK_SPLITTED-NEXT: call i32 @readnone_func() #[[ATTR_NUM:[0-9]+]]
+; CHECK_SPLITTED-NEXT: call void @nop()
+; CHECK_SPLITTED-NEXT: call void @print_same()
+;
+; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { readnone }
+;
+; CHECK_UNSPLITTED-LABEL: @f(
+; CHECK_UNSPLITTED: br i1 %cmp, label %same, label %diff
+; CHECK_UNSPLITTED-EMPTY:
+; CHECK_UNSPLITTED-NEXT: same:
+; CHECK_UNSPLITTED-NEXT: call void @print_same()
+; CHECK_UNSPLITTED-NEXT: br label
+; CHECK_UNSPLITTED-EMPTY:
+; CHECK_UNSPLITTED-NEXT: diff:
+; CHECK_UNSPLITTED-NEXT: call void @print_diff()
+; CHECK_UNSPLITTED-NEXT: br label
+
+declare i32 @readnone_func() readnone
+declare void @nop()
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8 @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)
--- /dev/null
+; Tests that the readnone function which cross suspend points wouldn't be misoptimized.
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK,CHECK_SPLITTED
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+
+define ptr @f() presplitcoroutine {
+entry:
+ %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+ %size = call i32 @llvm.coro.size.i32()
+ %alloc = call ptr @malloc(i32 %size)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+ %j = call i32 @readnone_func() readnone
+ %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %sus_result, label %suspend [i8 0, label %resume
+ i8 1, label %cleanup]
+resume:
+ %i = call i32 @readnone_func() readnone
+ %cmp = icmp eq i32 %i, %j
+ br i1 %cmp, label %same, label %diff
+
+same:
+ call void @print_same()
+ br label %cleanup
+
+diff:
+ call void @print_diff()
+ br label %cleanup
+
+cleanup:
+ %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+ call void @free(ptr %mem)
+ br label %suspend
+
+suspend:
+ call i1 @llvm.coro.end(ptr %hdl, i1 0)
+ ret ptr %hdl
+}
+
+; Tests that normal functions wouldn't be affected.
+define i1 @normal_function() {
+entry:
+ %i = call i32 @readnone_func() readnone
+ %j = call i32 @readnone_func() readnone
+ %cmp = icmp eq i32 %i, %j
+ br i1 %cmp, label %same, label %diff
+
+same:
+ call void @print_same()
+ ret i1 true
+
+diff:
+ call void @print_diff()
+ ret i1 false
+}
+
+; CHECK_SPLITTED-LABEL: normal_function(
+; CHECK_SPLITTED-NEXT: entry
+; CHECK_SPLITTED-NEXT: call i32 @readnone_func()
+; CHECK_SPLITTED-NEXT: call void @print_same()
+; CHECK_SPLITTED-NEXT: ret i1 true
+;
+; CHECK_SPLITTED-LABEL: f.resume(
+; CHECK_UNSPLITTED-LABEL: @f(
+; CHECK: br i1 %cmp, label %same, label %diff
+; CHECK-EMPTY:
+; CHECK-NEXT: same:
+; CHECK-NEXT: call void @print_same()
+; CHECK-NEXT: br label
+; CHECK-EMPTY:
+; CHECK-NEXT: diff:
+; CHECK-NEXT: call void @print_diff()
+; CHECK-NEXT: br label
+
+declare i32 @readnone_func() readnone
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8 @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)