Make FlushICache NOP for Nvidia Denver CPU's.

author arajp <arajp@nvidia.com>

Fri, 19 Dec 2014 11:16:23 +0000 (03:16 -0800)

committer Commit bot <commit-bot@chromium.org>

Fri, 19 Dec 2014 11:16:36 +0000 (11:16 +0000)
author arajp <arajp@nvidia.com>
Fri, 19 Dec 2014 11:16:23 +0000 (03:16 -0800)
committer Commit bot <commit-bot@chromium.org>
Fri, 19 Dec 2014 11:16:36 +0000 (11:16 +0000)
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc

index 47883e5..105d711 100644 (file)
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -127,6 +127,11 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
    }
  
    if (FLAG_enable_32dregs && cpu.has_vfp3_d32()) supported_ |= 1u << VFP32DREGS;
+
+  if (cpu.implementer() == base::CPU::NVIDIA &&
+      cpu.variant() == base::CPU::NVIDIA_DENVER) {
+    supported_ |= 1u << COHERENT_CACHE;
+  }
  #endif
  
    DCHECK(!IsSupported(VFP3) || IsSupported(ARMv7));
@@ -188,14 +193,15 @@ void CpuFeatures::PrintTarget() {
  void CpuFeatures::PrintFeatures() {
    printf(
      "ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
-    "MOVW_MOVT_IMMEDIATE_LOADS=%d",
+    "MOVW_MOVT_IMMEDIATE_LOADS=%d COHERENT_CACHE=%d",
      CpuFeatures::IsSupported(ARMv7),
      CpuFeatures::IsSupported(VFP3),
      CpuFeatures::IsSupported(VFP32DREGS),
      CpuFeatures::IsSupported(NEON),
      CpuFeatures::IsSupported(SUDIV),
      CpuFeatures::IsSupported(UNALIGNED_ACCESSES),
-    CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS));
+    CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS),
+    CpuFeatures::IsSupported(COHERENT_CACHE));
  #ifdef __arm__
    bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
  #elif USE_EABI_HARDFLOAT
diff --git a/src/arm/cpu-arm.cc b/src/arm/cpu-arm.cc

index 9c7104e..4a34070 100644 (file)
--- a/src/arm/cpu-arm.cc
+++ b/src/arm/cpu-arm.cc
@@ -27,6 +27,8 @@ namespace internal {
  void CpuFeatures::FlushICache(void* start, size_t size) {
    if (size == 0) return;
  
+  if (CpuFeatures::IsSupported(COHERENT_CACHE)) return;
+
  #if defined(USE_SIMULATOR)
    // Not generating ARM instructions for C-code. This means that we are
    // building an ARM emulator based target.  We should notify the simulator
diff --git a/src/arm64/assembler-arm64.cc b/src/arm64/assembler-arm64.cc

index 7e89d49..770d425 100644 (file)
--- a/src/arm64/assembler-arm64.cc
+++ b/src/arm64/assembler-arm64.cc
@@ -46,11 +46,25 @@ namespace internal {
  void CpuFeatures::ProbeImpl(bool cross_compile) {
    // AArch64 has no configuration options, no further probing is required.
    supported_ = 0;
+
+  // Only use statically determined features for cross compile (snapshot).
+  if (cross_compile) return;
+
+  // Probe for runtime features
+  base::CPU cpu;
+  if (cpu.implementer() == base::CPU::NVIDIA &&
+      cpu.variant() == base::CPU::NVIDIA_DENVER) {
+    supported_ |= 1u << COHERENT_CACHE;
+  }
  }
  
  
  void CpuFeatures::PrintTarget() { }
-void CpuFeatures::PrintFeatures() { }
+
+
+void CpuFeatures::PrintFeatures() {
+  printf("COHERENT_CACHE=%d\n", CpuFeatures::IsSupported(COHERENT_CACHE));
+}
  
  
  // -----------------------------------------------------------------------------
diff --git a/src/arm64/cpu-arm64.cc b/src/arm64/cpu-arm64.cc

index 39beb6d..11ba7c9 100644 (file)
--- a/src/arm64/cpu-arm64.cc
+++ b/src/arm64/cpu-arm64.cc
@@ -43,6 +43,8 @@ class CacheLineSizes {
  void CpuFeatures::FlushICache(void* address, size_t length) {
    if (length == 0) return;
  
+  if (CpuFeatures::IsSupported(COHERENT_CACHE)) return;
+
  #ifdef USE_SIMULATOR
    // TODO(all): consider doing some cache simulation to ensure every address
    // run has been synced.
diff --git a/src/base/cpu.cc b/src/base/cpu.cc

index e188406..56e1c46 100644 (file)
--- a/src/base/cpu.cc
+++ b/src/base/cpu.cc
@@ -300,6 +300,7 @@ CPU::CPU()
        type_(0),
        implementer_(0),
        architecture_(0),
+      variant_(-1),
        part_(0),
        has_fpu_(false),
        has_cmov_(false),
@@ -388,7 +389,7 @@ CPU::CPU()
    // Extract implementor from the "CPU implementer" field.
    char* implementer = cpu_info.ExtractField("CPU implementer");
    if (implementer != NULL) {
-    char* end ;
+    char* end;
      implementer_ = strtol(implementer, &end, 0);
      if (end == implementer) {
        implementer_ = 0;
@@ -396,10 +397,20 @@ CPU::CPU()
      delete[] implementer;
    }
  
+  char* variant = cpu_info.ExtractField("CPU variant");
+  if (variant != NULL) {
+    char* end;
+    variant_ = strtol(variant, &end, 0);
+    if (end == variant) {
+      variant_ = -1;
+    }
+    delete[] variant;
+  }
+
    // Extract part number from the "CPU part" field.
    char* part = cpu_info.ExtractField("CPU part");
    if (part != NULL) {
-    char* end ;
+    char* end;
      part_ = strtol(part, &end, 0);
      if (end == part) {
        part_ = 0;
@@ -540,7 +551,7 @@ CPU::CPU()
    // Extract implementor from the "CPU implementer" field.
    char* implementer = cpu_info.ExtractField("CPU implementer");
    if (implementer != NULL) {
-    char* end ;
+    char* end;
      implementer_ = strtol(implementer, &end, 0);
      if (end == implementer) {
        implementer_ = 0;
@@ -548,10 +559,20 @@ CPU::CPU()
      delete[] implementer;
    }
  
+  char* variant = cpu_info.ExtractField("CPU variant");
+  if (variant != NULL) {
+    char* end;
+    variant_ = strtol(variant, &end, 0);
+    if (end == variant) {
+      variant_ = -1;
+    }
+    delete[] variant;
+  }
+
    // Extract part number from the "CPU part" field.
    char* part = cpu_info.ExtractField("CPU part");
    if (part != NULL) {
-    char* end ;
+    char* end;
      part_ = strtol(part, &end, 0);
      if (end == part) {
        part_ = 0;
diff --git a/src/base/cpu.h b/src/base/cpu.h

index fe8e102..8c41f9d 100644 (file)
--- a/src/base/cpu.h
+++ b/src/base/cpu.h
@@ -47,6 +47,8 @@ class CPU FINAL {
    static const int NVIDIA = 0x4e;
    static const int QUALCOMM = 0x51;
    int architecture() const { return architecture_; }
+  int variant() const { return variant_; }
+  static const int NVIDIA_DENVER = 0x0;
    int part() const { return part_; }
    static const int ARM_CORTEX_A5 = 0xc05;
    static const int ARM_CORTEX_A7 = 0xc07;
@@ -92,6 +94,7 @@ class CPU FINAL {
    int type_;
    int implementer_;
    int architecture_;
+  int variant_;
    int part_;
    bool has_fpu_;
    bool has_cmov_;
diff --git a/src/globals.h b/src/globals.h

index 8c27b01..3262ebd 100644 (file)
--- a/src/globals.h
+++ b/src/globals.h
@@ -609,6 +609,7 @@ enum CpuFeature {
    MIPSr6,
    // ARM64
    ALWAYS_ALIGN_CSP,
+  COHERENT_CACHE,
    NUMBER_OF_CPU_FEATURES
  };
author	arajp <arajp@nvidia.com>
	Fri, 19 Dec 2014 11:16:23 +0000 (03:16 -0800)
committer	Commit bot <commit-bot@chromium.org>
	Fri, 19 Dec 2014 11:16:36 +0000 (11:16 +0000)
src/arm/assembler-arm.cc		patch \| blob \| history
src/arm/cpu-arm.cc		patch \| blob \| history
src/arm64/assembler-arm64.cc		patch \| blob \| history
src/arm64/cpu-arm64.cc		patch \| blob \| history
src/base/cpu.cc		patch \| blob \| history
src/base/cpu.h		patch \| blob \| history
src/globals.h		patch \| blob \| history