From 398a62037eb5f0aa049d3243818d16f2b3a10dec Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 11 May 2018 12:55:55 -0700
Subject: [PATCH] Reads the L2 and L3 cache sizes from the system instead of
 using hard-coded constants.

PiperOrigin-RevId: 196296096
---
 tensorflow/core/kernels/conv_grad_filter_ops.cc |  3 +--
 tensorflow/core/kernels/conv_grad_input_ops.cc  |  5 ++---
 tensorflow/core/kernels/deep_conv2d.cc          | 10 ++++------
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index aca7517..bdd0822 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -404,10 +404,9 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
     // image ('work_unit_size').
 
     // TODO(andydavis)
-    // *) Get L3 cache size from device at runtime (30MB is from ivybridge).
     // *) Consider reducing 'target_working_set_size' if L3 is shared by
     //    other concurrently running tensorflow ops.
-    const size_t target_working_set_size = (30LL << 20) / sizeof(T);
+    const size_t target_working_set_size = Eigen::l3CacheSize() / sizeof(T);
 
     const size_t size_A = output_image_size * filter_total_size;
 
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index 63a775a..95301b1 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -420,9 +420,8 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
     const int output_image_size =
         dims.spatial_dims[0].output_size * dims.spatial_dims[1].output_size;
 
-    // TODO(andydavis) Get L2/L3 cache sizes from device.
-    const size_t l2_cache_size = 256LL << 10;
-    const size_t l3_cache_size = 30LL << 20;
+    const size_t l2_cache_size = Eigen::l2CacheSize();
+    const size_t l3_cache_size = Eigen::l3CacheSize();
 
     // Use L3 cache size as target working set size.
     const size_t target_working_set_size = l3_cache_size / sizeof(T);
diff --git a/tensorflow/core/kernels/deep_conv2d.cc b/tensorflow/core/kernels/deep_conv2d.cc
index 829155f..014684d 100644
--- a/tensorflow/core/kernels/deep_conv2d.cc
+++ b/tensorflow/core/kernels/deep_conv2d.cc
@@ -393,9 +393,8 @@ struct TransformFilters {
 
     // Calculate filter transform batch based on cache/filter sizes.
 
-    // Cache budget (based on L2 cache size = 256KB).
-    // TODO(andydavis) Read cache size from system.
-    const int64 cache_size = (256LL << 10) / sizeof(T);
+    // Cache budget (based on L2 cache size).
+    const int64 cache_size = Eigen::l2CacheSize() / sizeof(T);
 
     // Fixed cost.
     const int64 filter_transform_matrix_size =
@@ -1017,9 +1016,8 @@ struct DeepConv2D<CPUDevice, T> {
       const int64 filter_shard_size = filter_shards_row * filter_shards_col;
       const int64 out_tile_spatial_size = out_tile_rows * out_tile_cols;
 
-      // Cache budget (based on L2 cache size = 256KB).
-      // TODO(andydavis) Read cache size from the system.
-      const int64 cache_size = (256LL << 10) / sizeof(T);
+      // Cache budget (based on L2 cache size).
+      const int64 cache_size = Eigen::l2CacheSize() / sizeof(T);
 
       // Fixed costs.
       const int64 tile_transform_matrix_size =
-- 
2.7.4