cc: Rewrite probe functions that refer to tracepoint structures
authorSasha Goldshtein <goldshtn@gmail.com>
Tue, 5 Jul 2016 16:34:56 +0000 (09:34 -0700)
committerSasha Goldshtein <goldshtn@gmail.com>
Sat, 9 Jul 2016 12:19:12 +0000 (05:19 -0700)
When a probe function refers to a tracepoint arguments structure,
such as `struct tracepoint__irq__irq_handler_entry`, add that structure
on-the-fly using a Clang frontend action that runs before any other
steps take place.

Typically, the user will create tracepoint probe functions using
the TRACEPOINT_PROBE macro, which avoids the need for specifying
the tracepoint category and event twice in the signature of the
probe function.

src/cc/export/helpers.h
src/cc/frontends/clang/CMakeLists.txt
src/cc/frontends/clang/b_frontend_action.cc
src/cc/frontends/clang/loader.cc
src/cc/frontends/clang/tp_frontend_action.cc [new file with mode: 0644]
src/cc/frontends/clang/tp_frontend_action.h [new file with mode: 0644]
src/python/bcc/__init__.py

index dbc797b..7fd63e2 100644 (file)
@@ -451,5 +451,8 @@ int bpf_usdt_readarg_p(int argc, struct pt_regs *ctx, void *buf, u64 len) asm("l
 
 #define lock_xadd(ptr, val) ((void)__sync_fetch_and_add(ptr, val))
 
+#define TRACEPOINT_PROBE(category, event) \
+int tracepoint__##category##__##event(struct tracepoint__##category##__##event *args)
+
 #endif
 )********"
index 43fc7c1..70aaa00 100644 (file)
@@ -4,4 +4,4 @@
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKERNEL_MODULES_DIR='\"${BCC_KERNEL_MODULES_DIR}\"'")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKERNEL_MODULES_SUFFIX='\"${BCC_KERNEL_MODULES_SUFFIX}\"'")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKERNEL_HAS_SOURCE_DIR=${BCC_KERNEL_HAS_SOURCE_DIR}")
-add_library(clang_frontend loader.cc b_frontend_action.cc kbuild_helper.cc)
+add_library(clang_frontend loader.cc b_frontend_action.cc tp_frontend_action.cc kbuild_helper.cc)
index 73e888b..d4734c7 100644 (file)
@@ -253,10 +253,11 @@ BTypeVisitor::BTypeVisitor(ASTContext &C, Rewriter &rewriter, vector<TableDesc>
 bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
   // put each non-static non-inline function decl in its own section, to be
   // extracted by the MemoryManager
+  auto real_start_loc = rewriter_.getSourceMgr().getFileLoc(D->getLocStart());
   if (D->isExternallyVisible() && D->hasBody()) {
     current_fn_ = D->getName();
     string attr = string("__attribute__((section(\"") + BPF_FN_PREFIX + D->getName().str() + "\")))\n";
-    rewriter_.InsertText(D->getLocStart(), attr);
+    rewriter_.InsertText(real_start_loc, attr);
     if (D->param_size() > MAX_CALLING_CONV_REGS + 1) {
       error(D->getParamDecl(MAX_CALLING_CONV_REGS + 1)->getLocStart(),
             "too many arguments, bcc only supports in-register parameters");
@@ -295,10 +296,10 @@ bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
     if (CompoundStmt *S = dyn_cast<CompoundStmt>(D->getBody()))
       rewriter_.ReplaceText(S->getLBracLoc(), 1, preamble);
   } else if (D->hasBody() &&
-             rewriter_.getSourceMgr().getFileID(D->getLocStart())
+             rewriter_.getSourceMgr().getFileID(real_start_loc)
                == rewriter_.getSourceMgr().getMainFileID()) {
     // rewritable functions that are static should be always treated as helper
-    rewriter_.InsertText(D->getLocStart(), "__attribute__((always_inline))\n");
+    rewriter_.InsertText(real_start_loc, "__attribute__((always_inline))\n");
   }
   return true;
 }
index aa929b8..4b0729d 100644 (file)
@@ -50,6 +50,7 @@
 #include "exported_files.h"
 #include "kbuild_helper.h"
 #include "b_frontend_action.h"
+#include "tp_frontend_action.h"
 #include "loader.h"
 
 using std::map;
@@ -166,6 +167,34 @@ int ClangLoader::parse(unique_ptr<llvm::Module> *mod, unique_ptr<vector<TableDes
     llvm::errs() << "\n";
   }
 
+  // pre-compilation pass for generating tracepoint structures
+  auto invocation0 = make_unique<CompilerInvocation>();
+  if (!CompilerInvocation::CreateFromArgs(*invocation0, const_cast<const char **>(ccargs.data()),
+                                          const_cast<const char **>(ccargs.data()) + ccargs.size(), diags))
+    return -1;
+
+  invocation0->getPreprocessorOpts().RetainRemappedFileBuffers = true;
+  for (const auto &f : remapped_files_)
+    invocation0->getPreprocessorOpts().addRemappedFile(f.first, &*f.second);
+
+  if (in_memory) {
+    invocation0->getPreprocessorOpts().addRemappedFile(main_path, &*main_buf);
+    invocation0->getFrontendOpts().Inputs.clear();
+    invocation0->getFrontendOpts().Inputs.push_back(FrontendInputFile(main_path, IK_C));
+  }
+  invocation0->getFrontendOpts().DisableFree = false;
+
+  CompilerInstance compiler0;
+  compiler0.setInvocation(invocation0.release());
+  compiler0.createDiagnostics(new IgnoringDiagConsumer());
+
+  // capture the rewritten c file
+  string out_str;
+  llvm::raw_string_ostream os(out_str);
+  TracepointFrontendAction tpact(os);
+  compiler0.ExecuteAction(tpact); // ignore errors, they will be reported later
+  unique_ptr<llvm::MemoryBuffer> out_buf = llvm::MemoryBuffer::getMemBuffer(out_str);
+
   // first pass
   auto invocation1 = make_unique<CompilerInvocation>();
   if (!CompilerInvocation::CreateFromArgs(*invocation1, const_cast<const char **>(ccargs.data()),
@@ -178,12 +207,9 @@ int ClangLoader::parse(unique_ptr<llvm::Module> *mod, unique_ptr<vector<TableDes
   invocation1->getPreprocessorOpts().RetainRemappedFileBuffers = true;
   for (const auto &f : remapped_files_)
     invocation1->getPreprocessorOpts().addRemappedFile(f.first, &*f.second);
-
-  if (in_memory) {
-    invocation1->getPreprocessorOpts().addRemappedFile(main_path, &*main_buf);
-    invocation1->getFrontendOpts().Inputs.clear();
-    invocation1->getFrontendOpts().Inputs.push_back(FrontendInputFile(main_path, IK_C));
-  }
+  invocation1->getPreprocessorOpts().addRemappedFile(main_path, &*out_buf);
+  invocation1->getFrontendOpts().Inputs.clear();
+  invocation1->getFrontendOpts().Inputs.push_back(FrontendInputFile(main_path, IK_C));
   invocation1->getFrontendOpts().DisableFree = false;
 
   CompilerInstance compiler1;
@@ -191,12 +217,12 @@ int ClangLoader::parse(unique_ptr<llvm::Module> *mod, unique_ptr<vector<TableDes
   compiler1.createDiagnostics();
 
   // capture the rewritten c file
-  string out_str;
-  llvm::raw_string_ostream os(out_str);
-  BFrontendAction bact(os, flags_);
+  string out_str1;
+  llvm::raw_string_ostream os1(out_str1);
+  BFrontendAction bact(os1, flags_);
   if (!compiler1.ExecuteAction(bact))
     return -1;
-  unique_ptr<llvm::MemoryBuffer> out_buf = llvm::MemoryBuffer::getMemBuffer(out_str);
+  unique_ptr<llvm::MemoryBuffer> out_buf1 = llvm::MemoryBuffer::getMemBuffer(out_str1);
   // this contains the open FDs
   *tables = bact.take_tables();
 
@@ -209,7 +235,7 @@ int ClangLoader::parse(unique_ptr<llvm::Module> *mod, unique_ptr<vector<TableDes
   invocation2->getPreprocessorOpts().RetainRemappedFileBuffers = true;
   for (const auto &f : remapped_files_)
     invocation2->getPreprocessorOpts().addRemappedFile(f.first, &*f.second);
-  invocation2->getPreprocessorOpts().addRemappedFile(main_path, &*out_buf);
+  invocation2->getPreprocessorOpts().addRemappedFile(main_path, &*out_buf1);
   invocation2->getFrontendOpts().Inputs.clear();
   invocation2->getFrontendOpts().Inputs.push_back(FrontendInputFile(main_path, IK_C));
   invocation2->getFrontendOpts().DisableFree = false;
diff --git a/src/cc/frontends/clang/tp_frontend_action.cc b/src/cc/frontends/clang/tp_frontend_action.cc
new file mode 100644 (file)
index 0000000..db84942
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016 Sasha Goldshtein
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include <fstream>
+#include <regex>
+
+#include <clang/AST/ASTConsumer.h>
+#include <clang/AST/ASTContext.h>
+#include <clang/AST/RecordLayout.h>
+#include <clang/Frontend/CompilerInstance.h>
+#include <clang/Frontend/MultiplexConsumer.h>
+#include <clang/Rewrite/Core/Rewriter.h>
+
+#include "tp_frontend_action.h"
+
+namespace ebpf {
+
+using std::map;
+using std::set;
+using std::string;
+using std::to_string;
+using std::unique_ptr;
+using std::vector;
+using std::regex;
+using std::smatch;
+using std::regex_search;
+using std::ifstream;
+using namespace clang;
+
+TracepointTypeVisitor::TracepointTypeVisitor(ASTContext &C, Rewriter &rewriter)
+    : C(C), diag_(C.getDiagnostics()), rewriter_(rewriter), out_(llvm::errs()) {
+}
+
+string TracepointTypeVisitor::GenerateTracepointStruct(
+    SourceLocation loc, string const& category, string const& event) {
+  static regex field_regex("field:([^;]*);.*size:\\d+;");
+  string format_file = "/sys/kernel/debug/tracing/events/" +
+    category + "/" + event + "/format";
+  ifstream input(format_file.c_str());
+  if (!input)
+    return "";
+
+  string tp_struct = "struct tracepoint__" + category + "__" + event + " {\n";
+  tp_struct += "\tu64 __do_not_use__;\n";
+  for (string line; getline(input, line); ) {
+    smatch field_match;
+    if (!regex_search(line, field_match, field_regex))
+      continue;
+
+    string field = field_match[1];
+    auto pos = field.find_last_of("\t ");
+    if (pos == string::npos)
+      continue;
+
+    string field_type = field.substr(0, pos);
+    string field_name = field.substr(pos + 1);
+    if (field_type.find("__data_loc") != string::npos)
+      continue;
+    if (field_name.find("common_") == 0)
+      continue;
+
+    tp_struct += "\t" + field_type + " " + field_name + ";\n";
+  }
+
+  tp_struct += "};\n";
+  return tp_struct;
+}
+
+bool TracepointTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
+  static regex type_regex("(?:struct|class)\\s+tracepoint__(\\S+)__(\\S+)");
+  if (D->isExternallyVisible() && D->hasBody()) {
+    // If this function has a tracepoint structure as an argument,
+    // add that structure declaration based on the structure name.
+    for (auto arg : D->params()) {
+      auto type = arg->getType();
+      if (type->isPointerType() &&
+          type->getPointeeType()->isStructureOrClassType()) {
+        auto type_name = QualType::getAsString(type.split());
+        smatch type_match;
+        if (regex_search(type_name, type_match, type_regex)) {
+          string tp_cat = type_match[1], tp_evt = type_match[2]; 
+          string tp_struct = GenerateTracepointStruct(
+              D->getLocStart(), tp_cat, tp_evt);
+
+          // Get the actual function declaration point (the macro instantiation
+          // point if using the TRACEPOINT_PROBE macro instead of the macro
+          // declaration point in bpf_helpers.h).
+          auto insert_loc = D->getLocStart();
+          insert_loc = rewriter_.getSourceMgr().getFileLoc(insert_loc);
+          rewriter_.InsertText(insert_loc, tp_struct);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+TracepointTypeConsumer::TracepointTypeConsumer(ASTContext &C, Rewriter &rewriter)
+    : visitor_(C, rewriter) {
+}
+
+bool TracepointTypeConsumer::HandleTopLevelDecl(DeclGroupRef Group) {
+  for (auto D : Group)
+    visitor_.TraverseDecl(D);
+  return true;
+}
+
+TracepointFrontendAction::TracepointFrontendAction(llvm::raw_ostream &os)
+    : os_(os), rewriter_(new Rewriter) {
+}
+
+void TracepointFrontendAction::EndSourceFileAction() {
+  rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(os_);
+  os_.flush();
+}
+
+unique_ptr<ASTConsumer> TracepointFrontendAction::CreateASTConsumer(
+        CompilerInstance &Compiler, llvm::StringRef InFile) {
+  rewriter_->setSourceMgr(Compiler.getSourceManager(), Compiler.getLangOpts());
+  return unique_ptr<ASTConsumer>(new TracepointTypeConsumer(
+              Compiler.getASTContext(), *rewriter_));
+}
+
+}
diff --git a/src/cc/frontends/clang/tp_frontend_action.h b/src/cc/frontends/clang/tp_frontend_action.h
new file mode 100644 (file)
index 0000000..44522e0
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2016 Sasha Goldshtein
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <clang/AST/RecursiveASTVisitor.h>
+#include <clang/Frontend/FrontendAction.h>
+#include <clang/Rewrite/Core/Rewriter.h>
+
+namespace clang {
+class ASTConsumer;
+class ASTContext;
+class CompilerInstance;
+}
+
+namespace llvm {
+class raw_ostream;
+class StringRef;
+}
+
+namespace ebpf {
+
+// Visit functions that have a tracepoint argument structure in their signature
+// and automatically generate the structure on-the-fly.
+class TracepointTypeVisitor :
+  public clang::RecursiveASTVisitor<TracepointTypeVisitor> {
+ public:
+  explicit TracepointTypeVisitor(clang::ASTContext &C,
+                                 clang::Rewriter &rewriter);
+  bool VisitFunctionDecl(clang::FunctionDecl *D);
+
+ private:
+  std::string GenerateTracepointStruct(clang::SourceLocation loc,
+          std::string const& category, std::string const& event);
+
+  clang::ASTContext &C;
+  clang::DiagnosticsEngine &diag_;
+  clang::Rewriter &rewriter_;
+  llvm::raw_ostream &out_; 
+};
+
+class TracepointTypeConsumer : public clang::ASTConsumer {
+ public:
+  explicit TracepointTypeConsumer(clang::ASTContext &C,
+                                  clang::Rewriter &rewriter);
+  bool HandleTopLevelDecl(clang::DeclGroupRef Group) override;
+ private:
+  TracepointTypeVisitor visitor_;
+};
+
+class TracepointFrontendAction : public clang::ASTFrontendAction {
+ public:
+  TracepointFrontendAction(llvm::raw_ostream &os);
+
+  void EndSourceFileAction() override;
+
+  std::unique_ptr<clang::ASTConsumer>
+      CreateASTConsumer(clang::CompilerInstance &Compiler, llvm::StringRef InFile) override;
+
+ private:
+  llvm::raw_ostream &os_;
+  std::unique_ptr<clang::Rewriter> rewriter_;
+};
+
+}  // namespace visitor
index dc0104d..4ce121a 100644 (file)
@@ -182,8 +182,8 @@ class BPF(object):
         if not self.module:
             raise Exception("Failed to compile BPF module %s" % src_file)
 
-        # If any "kprobe__" prefixed functions were defined, they will be
-        # loaded and attached here.
+        # If any "kprobe__" or "tracepoint__" prefixed functions were defined,
+        # they will be loaded and attached here.
         self._trace_autoload()
 
     def load_funcs(self, prog_type=KPROBE):
@@ -620,9 +620,6 @@ class BPF(object):
                 fn = self.load_func(func_name, BPF.TRACEPOINT)
                 tp = fn.name[len("tracepoint__"):].replace("__", ":")
                 self.attach_tracepoint(tp=tp, fn_name=fn.name)
-        # It would be nice to automatically generate the tracepont
-        # structure here, but once we passed the load of the BPF program,
-        # we can't do that anymore. It will have to go in the clang rewriter.
 
     def trace_open(self, nonblocking=False):
         """trace_open(nonblocking=False)