[clang][Interp] Implement __builtin_strcmp
authorTimm Bäder <tbaeder@redhat.com>
Thu, 4 May 2023 05:29:57 +0000 (07:29 +0200)
committerTimm Bäder <tbaeder@redhat.com>
Thu, 20 Jul 2023 13:46:04 +0000 (15:46 +0200)
Make our Function class keep a list of parameter offsets so we can
simply get a parameter by index when evaluating builtin functions.

Differential Revision: https://reviews.llvm.org/D149816

clang/lib/AST/Interp/ByteCodeEmitter.cpp
clang/lib/AST/Interp/Function.cpp
clang/lib/AST/Interp/Function.h
clang/lib/AST/Interp/Interp.h
clang/lib/AST/Interp/InterpBuiltin.cpp
clang/lib/AST/Interp/Pointer.h
clang/test/AST/Interp/builtin-functions.cpp [new file with mode: 0644]

index 3248877..f2072f9 100644 (file)
@@ -26,6 +26,7 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
   // Set up argument indices.
   unsigned ParamOffset = 0;
   SmallVector<PrimType, 8> ParamTypes;
+  SmallVector<unsigned, 8> ParamOffsets;
   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
 
   // If the return is not a primitive, a pointer to the storage where the
@@ -36,6 +37,7 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
   if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
     HasRVO = true;
     ParamTypes.push_back(PT_Ptr);
+    ParamOffsets.push_back(ParamOffset);
     ParamOffset += align(primSize(PT_Ptr));
   }
 
@@ -47,6 +49,7 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
     if (MD->isInstance()) {
       HasThisPointer = true;
       ParamTypes.push_back(PT_Ptr);
+      ParamOffsets.push_back(ParamOffset);
       ParamOffset += align(primSize(PT_Ptr));
     }
 
@@ -75,6 +78,7 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
     Descriptor *Desc = P.createDescriptor(PD, Ty);
     ParamDescriptors.insert({ParamOffset, {Ty, Desc}});
     Params.insert({PD, ParamOffset});
+    ParamOffsets.push_back(ParamOffset);
     ParamOffset += align(primSize(Ty));
     ParamTypes.push_back(Ty);
   }
@@ -82,9 +86,9 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
   // Create a handle over the emitted code.
   Function *Func = P.getFunction(FuncDecl);
   if (!Func)
-    Func =
-        P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
-                         std::move(ParamDescriptors), HasThisPointer, HasRVO);
+    Func = P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
+                            std::move(ParamDescriptors),
+                            std::move(ParamOffsets), HasThisPointer, HasRVO);
 
   assert(Func);
   // For not-yet-defined functions, we only create a Function instance and
index 4e6d175..7531299 100644 (file)
@@ -16,12 +16,14 @@ using namespace clang;
 using namespace clang::interp;
 
 Function::Function(Program &P, const FunctionDecl *F, unsigned ArgSize,
-                   llvm::SmallVector<PrimType, 8> &&ParamTypes,
+                   llvm::SmallVectorImpl<PrimType> &&ParamTypes,
                    llvm::DenseMap<unsigned, ParamDescriptor> &&Params,
+                   llvm::SmallVectorImpl<unsigned> &&ParamOffsets,
                    bool HasThisPointer, bool HasRVO)
     : P(P), Loc(F->getBeginLoc()), F(F), ArgSize(ArgSize),
       ParamTypes(std::move(ParamTypes)), Params(std::move(Params)),
-      HasThisPointer(HasThisPointer), HasRVO(HasRVO) {}
+      ParamOffsets(std::move(ParamOffsets)), HasThisPointer(HasThisPointer),
+      HasRVO(HasRVO) {}
 
 Function::ParamDescriptor Function::getParamDescriptor(unsigned Offset) const {
   auto It = Params.find(Offset);
index 357e6e2..55a23ff 100644 (file)
@@ -156,12 +156,17 @@ public:
 
   unsigned getNumParams() const { return ParamTypes.size(); }
 
+  unsigned getParamOffset(unsigned ParamIndex) const {
+    return ParamOffsets[ParamIndex];
+  }
+
 private:
   /// Construct a function representing an actual function.
   Function(Program &P, const FunctionDecl *F, unsigned ArgSize,
-           llvm::SmallVector<PrimType, 8> &&ParamTypes,
+           llvm::SmallVectorImpl<PrimType> &&ParamTypes,
            llvm::DenseMap<unsigned, ParamDescriptor> &&Params,
-           bool HasThisPointer, bool HasRVO);
+           llvm::SmallVectorImpl<unsigned> &&ParamOffsets, bool HasThisPointer,
+           bool HasRVO);
 
   /// Sets the code of a function.
   void setCode(unsigned NewFrameSize, std::vector<std::byte> &&NewCode,
@@ -201,6 +206,8 @@ private:
   llvm::SmallVector<PrimType, 8> ParamTypes;
   /// Map from byte offset to parameter descriptor.
   llvm::DenseMap<unsigned, ParamDescriptor> Params;
+  /// List of parameter offsets.
+  llvm::SmallVector<unsigned, 8> ParamOffsets;
   /// Flag to indicate if the function is valid.
   bool IsValid = false;
   /// Flag to indicate if the function is done being
index 1511200..ff67e87 100644 (file)
@@ -169,7 +169,7 @@ bool CheckFloatResult(InterpState &S, CodePtr OpPC, APFloat::opStatus Status);
 bool Interpret(InterpState &S, APValue &Result);
 
 /// Interpret a builtin function.
-bool InterpretBuiltin(InterpState &S, CodePtr &PC, unsigned BuiltinID);
+bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F);
 
 enum class ArithOp { Add, Sub };
 
@@ -1701,7 +1701,7 @@ inline bool CallBI(InterpState &S, CodePtr &PC, const Function *Func) {
   InterpFrame *FrameBefore = S.Current;
   S.Current = NewFrame.get();
 
-  if (InterpretBuiltin(S, PC, Func->getBuiltinID())) {
+  if (InterpretBuiltin(S, PC, Func)) {
     NewFrame.release();
     return true;
   }
index c929ad6..c11f22a 100644 (file)
 namespace clang {
 namespace interp {
 
-bool InterpretBuiltin(InterpState &S, CodePtr &PC, unsigned BuiltinID) {
+template <typename T> T getParam(InterpFrame *Frame, unsigned Index) {
+  unsigned Offset = Frame->getFunction()->getParamOffset(Index);
+  return Frame->getParam<T>(Offset);
+}
+
+static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
+                                   InterpFrame *Frame) {
+  const Pointer &A = getParam<Pointer>(Frame, 0);
+  const Pointer &B = getParam<Pointer>(Frame, 1);
+
+  if (!CheckLive(S, OpPC, A, AK_Read) || !CheckLive(S, OpPC, B, AK_Read))
+    return false;
+
+  assert(A.getFieldDesc()->isPrimitiveArray());
+  assert(B.getFieldDesc()->isPrimitiveArray());
+
+  unsigned IndexA = A.getIndex();
+  unsigned IndexB = B.getIndex();
+  int32_t Result = 0;
+  for (;; ++IndexA, ++IndexB) {
+    const Pointer &PA = A.atIndex(IndexA);
+    const Pointer &PB = B.atIndex(IndexB);
+    if (!CheckRange(S, OpPC, PA, AK_Read) ||
+        !CheckRange(S, OpPC, PB, AK_Read)) {
+      return false;
+    }
+    uint8_t CA = PA.deref<uint8_t>();
+    uint8_t CB = PB.deref<uint8_t>();
+
+    if (CA > CB) {
+      Result = 1;
+      break;
+    } else if (CA < CB) {
+      Result = -1;
+      break;
+    }
+    if (CA == 0 || CB == 0)
+      break;
+  }
+
+  S.Stk.push<Integral<32, true>>(Integral<32, true>::from(Result));
+  return true;
+}
+
+bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F) {
+  InterpFrame *Frame = S.Current;
   APValue Dummy;
 
-  switch (BuiltinID) {
+  switch (F->getBuiltinID()) {
   case Builtin::BI__builtin_is_constant_evaluated:
     S.Stk.push<Boolean>(Boolean::from(S.inConstantContext()));
-    return Ret<PT_Bool, true>(S, PC, Dummy);
+    return Ret<PT_Bool, true>(S, OpPC, Dummy);
   case Builtin::BI__builtin_assume:
-    return RetVoid<true>(S, PC, Dummy);
+    return RetVoid<true>(S, OpPC, Dummy);
+  case Builtin::BI__builtin_strcmp:
+    if (interp__builtin_strcmp(S, OpPC, Frame))
+      return Ret<PT_Sint32, true>(S, OpPC, Dummy);
+    return false;
   default:
     return false;
   }
index 7d9e45a..f795466 100644 (file)
@@ -325,7 +325,8 @@ public:
 
   /// Dereferences a primitive element.
   template <typename T> T &elem(unsigned I) const {
-    return reinterpret_cast<T *>(Pointee->rawData())[I];
+    assert(I < getNumElems());
+    return reinterpret_cast<T *>(Pointee->data() + sizeof(InitMap *))[I];
   }
 
   /// Initializes a field.
diff --git a/clang/test/AST/Interp/builtin-functions.cpp b/clang/test/AST/Interp/builtin-functions.cpp
new file mode 100644 (file)
index 0000000..e5141b0
--- /dev/null
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter %s -verify
+// RUN: %clang_cc1 -verify=ref %s -Wno-constant-evaluated
+
+namespace strcmp {
+  constexpr char kFoobar[6] = {'f','o','o','b','a','r'};
+  constexpr char kFoobazfoobar[12] = {'f','o','o','b','a','z','f','o','o','b','a','r'};
+
+  static_assert(__builtin_strcmp("", "") == 0);
+  static_assert(__builtin_strcmp("abab", "abab") == 0);
+  static_assert(__builtin_strcmp("abab", "abba") == -1);
+  static_assert(__builtin_strcmp("abab", "abaa") == 1);
+  static_assert(__builtin_strcmp("ababa", "abab") == 1);
+  static_assert(__builtin_strcmp("abab", "ababa") == -1);
+  static_assert(__builtin_strcmp("a\203", "a") == 1);
+  static_assert(__builtin_strcmp("a\203", "a\003") == 1);
+  static_assert(__builtin_strcmp("abab\0banana", "abab") == 0);
+  static_assert(__builtin_strcmp("abab", "abab\0banana") == 0);
+  static_assert(__builtin_strcmp("abab\0banana", "abab\0canada") == 0);
+  static_assert(__builtin_strcmp(0, "abab") == 0); // expected-error {{not an integral constant}} \
+                                                   // expected-note {{dereferenced null}} \
+                                                   // expected-note {{in call to}} \
+                                                   // ref-error {{not an integral constant}} \
+                                                   // ref-note {{dereferenced null}}
+  static_assert(__builtin_strcmp("abab", 0) == 0); // expected-error {{not an integral constant}} \
+                                                   // expected-note {{dereferenced null}} \
+                                                   // expected-note {{in call to}} \
+                                                   // ref-error {{not an integral constant}} \
+                                                   // ref-note {{dereferenced null}}
+
+  static_assert(__builtin_strcmp(kFoobar, kFoobazfoobar) == -1);
+  static_assert(__builtin_strcmp(kFoobar, kFoobazfoobar + 6) == 0); // expected-error {{not an integral constant}} \
+                                                                    // expected-note {{dereferenced one-past-the-end}} \
+                                                                    // expected-note {{in call to}} \
+                                                                    // ref-error {{not an integral constant}} \
+                                                                    // ref-note {{dereferenced one-past-the-end}}
+}