Denver supports a coherent cache mechanism. There is no need to clean
the D cache and invalidate I cache. MTS has to check the translation
anytime there is an I cache invalidate and this time can be saved by
making FlushICache a NOP.
The patch improves Octane by roughly 3-4% on Denver.
Review URL: https://codereview.chromium.org/
797233002
Cr-Commit-Position: refs/heads/master@{#25898}
}
if (FLAG_enable_32dregs && cpu.has_vfp3_d32()) supported_ |= 1u << VFP32DREGS;
+
+ if (cpu.implementer() == base::CPU::NVIDIA &&
+ cpu.variant() == base::CPU::NVIDIA_DENVER) {
+ supported_ |= 1u << COHERENT_CACHE;
+ }
#endif
DCHECK(!IsSupported(VFP3) || IsSupported(ARMv7));
void CpuFeatures::PrintFeatures() {
printf(
"ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
- "MOVW_MOVT_IMMEDIATE_LOADS=%d",
+ "MOVW_MOVT_IMMEDIATE_LOADS=%d COHERENT_CACHE=%d",
CpuFeatures::IsSupported(ARMv7),
CpuFeatures::IsSupported(VFP3),
CpuFeatures::IsSupported(VFP32DREGS),
CpuFeatures::IsSupported(NEON),
CpuFeatures::IsSupported(SUDIV),
CpuFeatures::IsSupported(UNALIGNED_ACCESSES),
- CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS));
+ CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS),
+ CpuFeatures::IsSupported(COHERENT_CACHE));
#ifdef __arm__
bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
#elif USE_EABI_HARDFLOAT
void CpuFeatures::FlushICache(void* start, size_t size) {
if (size == 0) return;
+ if (CpuFeatures::IsSupported(COHERENT_CACHE)) return;
+
#if defined(USE_SIMULATOR)
// Not generating ARM instructions for C-code. This means that we are
// building an ARM emulator based target. We should notify the simulator
void CpuFeatures::ProbeImpl(bool cross_compile) {
// AArch64 has no configuration options, no further probing is required.
supported_ = 0;
+
+ // Only use statically determined features for cross compile (snapshot).
+ if (cross_compile) return;
+
+ // Probe for runtime features
+ base::CPU cpu;
+ if (cpu.implementer() == base::CPU::NVIDIA &&
+ cpu.variant() == base::CPU::NVIDIA_DENVER) {
+ supported_ |= 1u << COHERENT_CACHE;
+ }
}
void CpuFeatures::PrintTarget() { }
-void CpuFeatures::PrintFeatures() { }
+
+
+void CpuFeatures::PrintFeatures() {
+ printf("COHERENT_CACHE=%d\n", CpuFeatures::IsSupported(COHERENT_CACHE));
+}
// -----------------------------------------------------------------------------
void CpuFeatures::FlushICache(void* address, size_t length) {
if (length == 0) return;
+ if (CpuFeatures::IsSupported(COHERENT_CACHE)) return;
+
#ifdef USE_SIMULATOR
// TODO(all): consider doing some cache simulation to ensure every address
// run has been synced.
type_(0),
implementer_(0),
architecture_(0),
+ variant_(-1),
part_(0),
has_fpu_(false),
has_cmov_(false),
// Extract implementor from the "CPU implementer" field.
char* implementer = cpu_info.ExtractField("CPU implementer");
if (implementer != NULL) {
- char* end ;
+ char* end;
implementer_ = strtol(implementer, &end, 0);
if (end == implementer) {
implementer_ = 0;
delete[] implementer;
}
+ char* variant = cpu_info.ExtractField("CPU variant");
+ if (variant != NULL) {
+ char* end;
+ variant_ = strtol(variant, &end, 0);
+ if (end == variant) {
+ variant_ = -1;
+ }
+ delete[] variant;
+ }
+
// Extract part number from the "CPU part" field.
char* part = cpu_info.ExtractField("CPU part");
if (part != NULL) {
- char* end ;
+ char* end;
part_ = strtol(part, &end, 0);
if (end == part) {
part_ = 0;
// Extract implementor from the "CPU implementer" field.
char* implementer = cpu_info.ExtractField("CPU implementer");
if (implementer != NULL) {
- char* end ;
+ char* end;
implementer_ = strtol(implementer, &end, 0);
if (end == implementer) {
implementer_ = 0;
delete[] implementer;
}
+ char* variant = cpu_info.ExtractField("CPU variant");
+ if (variant != NULL) {
+ char* end;
+ variant_ = strtol(variant, &end, 0);
+ if (end == variant) {
+ variant_ = -1;
+ }
+ delete[] variant;
+ }
+
// Extract part number from the "CPU part" field.
char* part = cpu_info.ExtractField("CPU part");
if (part != NULL) {
- char* end ;
+ char* end;
part_ = strtol(part, &end, 0);
if (end == part) {
part_ = 0;
static const int NVIDIA = 0x4e;
static const int QUALCOMM = 0x51;
int architecture() const { return architecture_; }
+ int variant() const { return variant_; }
+ static const int NVIDIA_DENVER = 0x0;
int part() const { return part_; }
static const int ARM_CORTEX_A5 = 0xc05;
static const int ARM_CORTEX_A7 = 0xc07;
int type_;
int implementer_;
int architecture_;
+ int variant_;
int part_;
bool has_fpu_;
bool has_cmov_;
MIPSr6,
// ARM64
ALWAYS_ALIGN_CSP,
+ COHERENT_CACHE,
NUMBER_OF_CPU_FEATURES
};