From 398a62037eb5f0aa049d3243818d16f2b3a10dec Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 11 May 2018 12:55:55 -0700 Subject: [PATCH] Reads the L2 and L3 cache sizes from the system instead of using hard-coded constants. PiperOrigin-RevId: 196296096 --- tensorflow/core/kernels/conv_grad_filter_ops.cc | 3 +-- tensorflow/core/kernels/conv_grad_input_ops.cc | 5 ++--- tensorflow/core/kernels/deep_conv2d.cc | 10 ++++------ 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index aca7517..bdd0822 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -404,10 +404,9 @@ class Conv2DCustomBackpropFilterOp : public OpKernel { // image ('work_unit_size'). // TODO(andydavis) - // *) Get L3 cache size from device at runtime (30MB is from ivybridge). // *) Consider reducing 'target_working_set_size' if L3 is shared by // other concurrently running tensorflow ops. - const size_t target_working_set_size = (30LL << 20) / sizeof(T); + const size_t target_working_set_size = Eigen::l3CacheSize() / sizeof(T); const size_t size_A = output_image_size * filter_total_size; diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 63a775a..95301b1 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -420,9 +420,8 @@ class Conv2DCustomBackpropInputOp : public OpKernel { const int output_image_size = dims.spatial_dims[0].output_size * dims.spatial_dims[1].output_size; - // TODO(andydavis) Get L2/L3 cache sizes from device. - const size_t l2_cache_size = 256LL << 10; - const size_t l3_cache_size = 30LL << 20; + const size_t l2_cache_size = Eigen::l2CacheSize(); + const size_t l3_cache_size = Eigen::l3CacheSize(); // Use L3 cache size as target working set size. const size_t target_working_set_size = l3_cache_size / sizeof(T); diff --git a/tensorflow/core/kernels/deep_conv2d.cc b/tensorflow/core/kernels/deep_conv2d.cc index 829155f..014684d 100644 --- a/tensorflow/core/kernels/deep_conv2d.cc +++ b/tensorflow/core/kernels/deep_conv2d.cc @@ -393,9 +393,8 @@ struct TransformFilters { // Calculate filter transform batch based on cache/filter sizes. - // Cache budget (based on L2 cache size = 256KB). - // TODO(andydavis) Read cache size from system. - const int64 cache_size = (256LL << 10) / sizeof(T); + // Cache budget (based on L2 cache size). + const int64 cache_size = Eigen::l2CacheSize() / sizeof(T); // Fixed cost. const int64 filter_transform_matrix_size = @@ -1017,9 +1016,8 @@ struct DeepConv2D { const int64 filter_shard_size = filter_shards_row * filter_shards_col; const int64 out_tile_spatial_size = out_tile_rows * out_tile_cols; - // Cache budget (based on L2 cache size = 256KB). - // TODO(andydavis) Read cache size from the system. - const int64 cache_size = (256LL << 10) / sizeof(T); + // Cache budget (based on L2 cache size). + const int64 cache_size = Eigen::l2CacheSize() / sizeof(T); // Fixed costs. const int64 tile_transform_matrix_size = -- 2.7.4