Reads the L2 and L3 cache sizes from the system instead of using hard-coded constants.

author A. Unique TensorFlower <gardener@tensorflow.org>

Fri, 11 May 2018 19:55:55 +0000 (12:55 -0700)

committer TensorFlower Gardener <gardener@tensorflow.org>

Fri, 11 May 2018 19:58:41 +0000 (12:58 -0700)
author A. Unique TensorFlower <gardener@tensorflow.org>
Fri, 11 May 2018 19:55:55 +0000 (12:55 -0700)
committer TensorFlower Gardener <gardener@tensorflow.org>
Fri, 11 May 2018 19:58:41 +0000 (12:58 -0700)
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc

index aca7517..bdd0822 100644 (file)
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -404,10 +404,9 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
      // image ('work_unit_size').
  
      // TODO(andydavis)
-    // *) Get L3 cache size from device at runtime (30MB is from ivybridge).
      // *) Consider reducing 'target_working_set_size' if L3 is shared by
      //    other concurrently running tensorflow ops.
-    const size_t target_working_set_size = (30LL << 20) / sizeof(T);
+    const size_t target_working_set_size = Eigen::l3CacheSize() / sizeof(T);
  
      const size_t size_A = output_image_size * filter_total_size;
  
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc

index 63a775a..95301b1 100644 (file)
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -420,9 +420,8 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
      const int output_image_size =
          dims.spatial_dims[0].output_size * dims.spatial_dims[1].output_size;
  
-    // TODO(andydavis) Get L2/L3 cache sizes from device.
-    const size_t l2_cache_size = 256LL << 10;
-    const size_t l3_cache_size = 30LL << 20;
+    const size_t l2_cache_size = Eigen::l2CacheSize();
+    const size_t l3_cache_size = Eigen::l3CacheSize();
  
      // Use L3 cache size as target working set size.
      const size_t target_working_set_size = l3_cache_size / sizeof(T);
diff --git a/tensorflow/core/kernels/deep_conv2d.cc b/tensorflow/core/kernels/deep_conv2d.cc

index 829155f..014684d 100644 (file)
--- a/tensorflow/core/kernels/deep_conv2d.cc
+++ b/tensorflow/core/kernels/deep_conv2d.cc
@@ -393,9 +393,8 @@ struct TransformFilters {
  
      // Calculate filter transform batch based on cache/filter sizes.
  
-    // Cache budget (based on L2 cache size = 256KB).
-    // TODO(andydavis) Read cache size from system.
-    const int64 cache_size = (256LL << 10) / sizeof(T);
+    // Cache budget (based on L2 cache size).
+    const int64 cache_size = Eigen::l2CacheSize() / sizeof(T);
  
      // Fixed cost.
      const int64 filter_transform_matrix_size =
@@ -1017,9 +1016,8 @@ struct DeepConv2D<CPUDevice, T> {
        const int64 filter_shard_size = filter_shards_row * filter_shards_col;
        const int64 out_tile_spatial_size = out_tile_rows * out_tile_cols;
  
-      // Cache budget (based on L2 cache size = 256KB).
-      // TODO(andydavis) Read cache size from the system.
-      const int64 cache_size = (256LL << 10) / sizeof(T);
+      // Cache budget (based on L2 cache size).
+      const int64 cache_size = Eigen::l2CacheSize() / sizeof(T);
  
        // Fixed costs.
        const int64 tile_transform_matrix_size =
author	A. Unique TensorFlower <gardener@tensorflow.org>
	Fri, 11 May 2018 19:55:55 +0000 (12:55 -0700)
committer	TensorFlower Gardener <gardener@tensorflow.org>
	Fri, 11 May 2018 19:58:41 +0000 (12:58 -0700)
tensorflow/core/kernels/conv_grad_filter_ops.cc		patch \| blob \| history
tensorflow/core/kernels/conv_grad_input_ops.cc		patch \| blob \| history
tensorflow/core/kernels/deep_conv2d.cc		patch \| blob \| history