Make FlushICache NOP for Nvidia Denver CPU's.
authorarajp <arajp@nvidia.com>
Fri, 19 Dec 2014 11:16:23 +0000 (03:16 -0800)
committerCommit bot <commit-bot@chromium.org>
Fri, 19 Dec 2014 11:16:36 +0000 (11:16 +0000)
Denver supports a coherent cache mechanism. There is no need to clean
the D cache and invalidate I cache. MTS has to check the translation
anytime there is an I cache invalidate and this time can be saved by
making FlushICache a NOP.

The patch improves Octane by roughly 3-4% on Denver.

Review URL: https://codereview.chromium.org/797233002

Cr-Commit-Position: refs/heads/master@{#25898}

src/arm/assembler-arm.cc
src/arm/cpu-arm.cc
src/arm64/assembler-arm64.cc
src/arm64/cpu-arm64.cc
src/base/cpu.cc
src/base/cpu.h
src/globals.h

index 47883e5..105d711 100644 (file)
@@ -127,6 +127,11 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
   }
 
   if (FLAG_enable_32dregs && cpu.has_vfp3_d32()) supported_ |= 1u << VFP32DREGS;
+
+  if (cpu.implementer() == base::CPU::NVIDIA &&
+      cpu.variant() == base::CPU::NVIDIA_DENVER) {
+    supported_ |= 1u << COHERENT_CACHE;
+  }
 #endif
 
   DCHECK(!IsSupported(VFP3) || IsSupported(ARMv7));
@@ -188,14 +193,15 @@ void CpuFeatures::PrintTarget() {
 void CpuFeatures::PrintFeatures() {
   printf(
     "ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
-    "MOVW_MOVT_IMMEDIATE_LOADS=%d",
+    "MOVW_MOVT_IMMEDIATE_LOADS=%d COHERENT_CACHE=%d",
     CpuFeatures::IsSupported(ARMv7),
     CpuFeatures::IsSupported(VFP3),
     CpuFeatures::IsSupported(VFP32DREGS),
     CpuFeatures::IsSupported(NEON),
     CpuFeatures::IsSupported(SUDIV),
     CpuFeatures::IsSupported(UNALIGNED_ACCESSES),
-    CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS));
+    CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS),
+    CpuFeatures::IsSupported(COHERENT_CACHE));
 #ifdef __arm__
   bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
 #elif USE_EABI_HARDFLOAT
index 9c7104e..4a34070 100644 (file)
@@ -27,6 +27,8 @@ namespace internal {
 void CpuFeatures::FlushICache(void* start, size_t size) {
   if (size == 0) return;
 
+  if (CpuFeatures::IsSupported(COHERENT_CACHE)) return;
+
 #if defined(USE_SIMULATOR)
   // Not generating ARM instructions for C-code. This means that we are
   // building an ARM emulator based target.  We should notify the simulator
index 7e89d49..770d425 100644 (file)
@@ -46,11 +46,25 @@ namespace internal {
 void CpuFeatures::ProbeImpl(bool cross_compile) {
   // AArch64 has no configuration options, no further probing is required.
   supported_ = 0;
+
+  // Only use statically determined features for cross compile (snapshot).
+  if (cross_compile) return;
+
+  // Probe for runtime features
+  base::CPU cpu;
+  if (cpu.implementer() == base::CPU::NVIDIA &&
+      cpu.variant() == base::CPU::NVIDIA_DENVER) {
+    supported_ |= 1u << COHERENT_CACHE;
+  }
 }
 
 
 void CpuFeatures::PrintTarget() { }
-void CpuFeatures::PrintFeatures() { }
+
+
+void CpuFeatures::PrintFeatures() {
+  printf("COHERENT_CACHE=%d\n", CpuFeatures::IsSupported(COHERENT_CACHE));
+}
 
 
 // -----------------------------------------------------------------------------
index 39beb6d..11ba7c9 100644 (file)
@@ -43,6 +43,8 @@ class CacheLineSizes {
 void CpuFeatures::FlushICache(void* address, size_t length) {
   if (length == 0) return;
 
+  if (CpuFeatures::IsSupported(COHERENT_CACHE)) return;
+
 #ifdef USE_SIMULATOR
   // TODO(all): consider doing some cache simulation to ensure every address
   // run has been synced.
index e188406..56e1c46 100644 (file)
@@ -300,6 +300,7 @@ CPU::CPU()
       type_(0),
       implementer_(0),
       architecture_(0),
+      variant_(-1),
       part_(0),
       has_fpu_(false),
       has_cmov_(false),
@@ -388,7 +389,7 @@ CPU::CPU()
   // Extract implementor from the "CPU implementer" field.
   char* implementer = cpu_info.ExtractField("CPU implementer");
   if (implementer != NULL) {
-    char* end ;
+    char* end;
     implementer_ = strtol(implementer, &end, 0);
     if (end == implementer) {
       implementer_ = 0;
@@ -396,10 +397,20 @@ CPU::CPU()
     delete[] implementer;
   }
 
+  char* variant = cpu_info.ExtractField("CPU variant");
+  if (variant != NULL) {
+    char* end;
+    variant_ = strtol(variant, &end, 0);
+    if (end == variant) {
+      variant_ = -1;
+    }
+    delete[] variant;
+  }
+
   // Extract part number from the "CPU part" field.
   char* part = cpu_info.ExtractField("CPU part");
   if (part != NULL) {
-    char* end ;
+    char* end;
     part_ = strtol(part, &end, 0);
     if (end == part) {
       part_ = 0;
@@ -540,7 +551,7 @@ CPU::CPU()
   // Extract implementor from the "CPU implementer" field.
   char* implementer = cpu_info.ExtractField("CPU implementer");
   if (implementer != NULL) {
-    char* end ;
+    char* end;
     implementer_ = strtol(implementer, &end, 0);
     if (end == implementer) {
       implementer_ = 0;
@@ -548,10 +559,20 @@ CPU::CPU()
     delete[] implementer;
   }
 
+  char* variant = cpu_info.ExtractField("CPU variant");
+  if (variant != NULL) {
+    char* end;
+    variant_ = strtol(variant, &end, 0);
+    if (end == variant) {
+      variant_ = -1;
+    }
+    delete[] variant;
+  }
+
   // Extract part number from the "CPU part" field.
   char* part = cpu_info.ExtractField("CPU part");
   if (part != NULL) {
-    char* end ;
+    char* end;
     part_ = strtol(part, &end, 0);
     if (end == part) {
       part_ = 0;
index fe8e102..8c41f9d 100644 (file)
@@ -47,6 +47,8 @@ class CPU FINAL {
   static const int NVIDIA = 0x4e;
   static const int QUALCOMM = 0x51;
   int architecture() const { return architecture_; }
+  int variant() const { return variant_; }
+  static const int NVIDIA_DENVER = 0x0;
   int part() const { return part_; }
   static const int ARM_CORTEX_A5 = 0xc05;
   static const int ARM_CORTEX_A7 = 0xc07;
@@ -92,6 +94,7 @@ class CPU FINAL {
   int type_;
   int implementer_;
   int architecture_;
+  int variant_;
   int part_;
   bool has_fpu_;
   bool has_cmov_;
index 8c27b01..3262ebd 100644 (file)
@@ -609,6 +609,7 @@ enum CpuFeature {
   MIPSr6,
   // ARM64
   ALWAYS_ALIGN_CSP,
+  COHERENT_CACHE,
   NUMBER_OF_CPU_FEATURES
 };