[Coroutines] Don't merge readnone calls in presplit coroutines

author Chuanqi Xu <yedeng.yd@linux.alibaba.com>

Mon, 17 Oct 2022 02:22:43 +0000 (10:22 +0800)

committer Chuanqi Xu <yedeng.yd@linux.alibaba.com>

Mon, 17 Oct 2022 02:22:43 +0000 (10:22 +0800)
author Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Mon, 17 Oct 2022 02:22:43 +0000 (10:22 +0800)
committer Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Mon, 17 Oct 2022 02:22:43 +0000 (10:22 +0800)
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst

index c4fdabc..6b4711f 100644 (file)
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -42,6 +42,11 @@ Non-comprehensive list of changes in this release
     functionality, or simply have a lot to talk about), see the `NOTE` below
     for adding a new subsection.
  
+*  The ``readnone`` calls which are crossing suspend points in coroutines will
+   not be merged. Since ``readnone`` calls may access thread id and thread id
+   is not a constant in coroutines. This decision may cause unnecessary
+   performance regressions and we plan to fix it in later versions.
+
  * ...
  
  Update on required toolchains to build LLVM
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp

index abc1ded..429a98b 100644 (file)
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -132,7 +132,15 @@ struct SimpleValue {
          }
          }
        }
-      return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+      return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
+             // FIXME: Currently the calls which may access the thread id may
+             // be considered as not accessing the memory. But this is
+             // problematic for coroutines, since coroutines may resume in a
+             // different thread. So we disable the optimization here for the
+             // correctness. However, it may block many other correct
+             // optimizations. Revert this one when we detect the memory
+             // accessing kind more precisely.
+             !CI->getFunction()->isPresplitCoroutine();
      }
      return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
             isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
@@ -463,7 +471,15 @@ struct CallValue {
        return false;
  
      CallInst *CI = dyn_cast<CallInst>(Inst);
-    if (!CI || !CI->onlyReadsMemory())
+    if (!CI || !CI->onlyReadsMemory() ||
+        // FIXME: Currently the calls which may access the thread id may
+        // be considered as not accessing the memory. But this is
+        // problematic for coroutines, since coroutines may resume in a
+        // different thread. So we disable the optimization here for the
+        // correctness. However, it may block many other correct
+        // optimizations. Revert this one when we detect the memory
+        // accessing kind more precisely.
+        CI->getFunction()->isPresplitCoroutine())
        return false;
      return true;
    }
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp

index c62aaae..a489a89 100644 (file)
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -450,12 +450,28 @@ void GVNPass::ValueTable::add(Value *V, uint32_t num) {
  }
  
  uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
-  if (AA->doesNotAccessMemory(C)) {
+  if (AA->doesNotAccessMemory(C) &&
+      // FIXME: Currently the calls which may access the thread id may
+      // be considered as not accessing the memory. But this is
+      // problematic for coroutines, since coroutines may resume in a
+      // different thread. So we disable the optimization here for the
+      // correctness. However, it may block many other correct
+      // optimizations. Revert this one when we detect the memory
+      // accessing kind more precisely.
+      !C->getFunction()->isPresplitCoroutine()) {
      Expression exp = createExpr(C);
      uint32_t e = assignExpNewValueNum(exp).first;
      valueNumbering[C] = e;
      return e;
-  } else if (MD && AA->onlyReadsMemory(C)) {
+  } else if (MD && AA->onlyReadsMemory(C) &&
+             // FIXME: Currently the calls which may access the thread id may
+             // be considered as not accessing the memory. But this is
+             // problematic for coroutines, since coroutines may resume in a
+             // different thread. So we disable the optimization here for the
+             // correctness. However, it may block many other correct
+             // optimizations. Revert this one when we detect the memory
+             // accessing kind more precisely.
+             !C->getFunction()->isPresplitCoroutine()) {
      Expression exp = createExpr(C);
      auto ValNum = assignExpNewValueNum(exp);
      if (ValNum.second) {
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp

index d139739..afa346e 100644 (file)
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1607,6 +1607,17 @@ NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
        return ExprResult::some(createVariableOrConstant(ReturnedValue));
      }
    }
+
+  // FIXME: Currently the calls which may access the thread id may
+  // be considered as not accessing the memory. But this is
+  // problematic for coroutines, since coroutines may resume in a
+  // different thread. So we disable the optimization here for the
+  // correctness. However, it may block many other correct
+  // optimizations. Revert this one when we detect the memory
+  // accessing kind more precisely.
+  if (CI->getFunction()->isPresplitCoroutine())
+    return ExprResult::none();
+
    if (AA->doesNotAccessMemory(CI)) {
      return ExprResult::some(
          createCallExpression(CI, TOPClass->getMemoryLeader()));
diff --git a/llvm/test/Transforms/Coroutines/coro-readnone-02.ll b/llvm/test/Transforms/Coroutines/coro-readnone-02.ll

new file mode 100644 (file)

index 0000000..eede209
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-readnone-02.ll
@@ -0,0 +1,81 @@
+; Tests that the readnone function which don't cross suspend points could be optimized expectly after split.
+;
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,early-cse,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,gvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,newgvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+
+define ptr @f() presplitcoroutine {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sus_result, label %suspend [i8 0, label %resume
+                                         i8 1, label %cleanup]
+resume:
+  %i = call i32 @readnone_func() readnone
+  ; noop call to break optimization to combine two consecutive readonly calls.
+  call void @nop()
+  %j = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  br label %cleanup
+
+diff:
+  call void @print_diff()
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0)
+  ret ptr %hdl
+}
+
+;
+; CHECK_SPLITTED-LABEL: f.resume(
+; CHECK_SPLITTED-NEXT:  :
+; CHECK_SPLITTED-NEXT:    call i32 @readnone_func() #[[ATTR_NUM:[0-9]+]]
+; CHECK_SPLITTED-NEXT:    call void @nop()
+; CHECK_SPLITTED-NEXT:    call void @print_same()
+;
+; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { readnone }
+;
+; CHECK_UNSPLITTED-LABEL: @f(
+; CHECK_UNSPLITTED: br i1 %cmp, label %same, label %diff
+; CHECK_UNSPLITTED-EMPTY:
+; CHECK_UNSPLITTED-NEXT: same:
+; CHECK_UNSPLITTED-NEXT:   call void @print_same()
+; CHECK_UNSPLITTED-NEXT:   br label
+; CHECK_UNSPLITTED-EMPTY:
+; CHECK_UNSPLITTED-NEXT: diff:
+; CHECK_UNSPLITTED-NEXT:   call void @print_diff()
+; CHECK_UNSPLITTED-NEXT:   br label
+
+declare i32 @readnone_func() readnone
+declare void @nop()
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-readnone.ll b/llvm/test/Transforms/Coroutines/coro-readnone.ll

new file mode 100644 (file)

index 0000000..b7bcff4
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-readnone.ll
@@ -0,0 +1,89 @@
+; Tests that the readnone function which cross suspend points wouldn't be misoptimized.
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK,CHECK_SPLITTED
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+
+define ptr @f() presplitcoroutine {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  %j = call i32 @readnone_func() readnone
+  %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sus_result, label %suspend [i8 0, label %resume
+                                         i8 1, label %cleanup]
+resume:
+  %i = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  br label %cleanup
+
+diff:
+  call void @print_diff()
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0)
+  ret ptr %hdl
+}
+
+; Tests that normal functions wouldn't be affected.
+define i1 @normal_function() {
+entry:
+  %i = call i32 @readnone_func() readnone
+  %j = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  ret i1 true
+
+diff:
+  call void @print_diff()
+  ret i1 false
+}
+
+; CHECK_SPLITTED-LABEL: normal_function(
+; CHECK_SPLITTED-NEXT: entry
+; CHECK_SPLITTED-NEXT:   call i32 @readnone_func()
+; CHECK_SPLITTED-NEXT:   call void @print_same()
+; CHECK_SPLITTED-NEXT:   ret i1 true
+;
+; CHECK_SPLITTED-LABEL: f.resume(
+; CHECK_UNSPLITTED-LABEL: @f(
+; CHECK: br i1 %cmp, label %same, label %diff
+; CHECK-EMPTY:
+; CHECK-NEXT: same:
+; CHECK-NEXT:   call void @print_same()
+; CHECK-NEXT:   br label
+; CHECK-EMPTY:
+; CHECK-NEXT: diff:
+; CHECK-NEXT:   call void @print_diff()
+; CHECK-NEXT:   br label
+
+declare i32 @readnone_func() readnone
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)
author	Chuanqi Xu <yedeng.yd@linux.alibaba.com>
	Mon, 17 Oct 2022 02:22:43 +0000 (10:22 +0800)
committer	Chuanqi Xu <yedeng.yd@linux.alibaba.com>
	Mon, 17 Oct 2022 02:22:43 +0000 (10:22 +0800)
llvm/docs/ReleaseNotes.rst		patch \| blob \| history
llvm/lib/Transforms/Scalar/EarlyCSE.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/GVN.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/NewGVN.cpp		patch \| blob \| history
llvm/test/Transforms/Coroutines/coro-readnone-02.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/Coroutines/coro-readnone.ll	[new file with mode: 0644]	patch \| blob