From: Parichay Kapoor Date: Tue, 5 Jan 2021 15:16:03 +0000 (+0900) Subject: [dynamic-training] Add dynamic training using derivatives X-Git-Tag: accepted/tizen/unified/20210204.134412~19 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a54ef34d1ec7ce57f3aac9414d56040e153bcc73;p=platform%2Fcore%2Fml%2Fnntrainer.git [dynamic-training] Add dynamic training using derivatives Added dynamic training using derivatives where the decision to apply the gradient is calculated using the derivative received without calculating the gradient itself. Signed-off-by: Parichay Kapoor --- diff --git a/jni/Android.mk b/jni/Android.mk index 3ca004c..39d7792 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -26,7 +26,6 @@ $(info $(shell ($(LOCAL_PATH)/prepare_iniparser.sh $(NDK_LIBS_OUT)))) endif endif - include $(CLEAR_VARS) NNTRAINER_JNI_ROOT := $(NNTRAINER_ROOT)/jni @@ -57,7 +56,6 @@ include $(PREBUILT_STATIC_LIBRARY) endif #ENABLE_TFLITE_BACKBONE - ifeq ($(ENABLE_BLAS), 1) include $(CLEAR_VARS) @@ -79,11 +77,11 @@ LOCAL_EXPORT_CFLAGS += -DUSE_BLAS=1 include $(PREBUILT_STATIC_LIBRARY) endif #ENABLE_BLAS - include $(CLEAR_VARS) NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \ $(NNTRAINER_ROOT)/nntrainer/models/model_loader.cpp \ + $(NNTRAINER_ROOT)/nntrainer/models/dynamic_training_optimization.cpp \ $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer.cpp \ $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer_factory.cpp \ $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer_func.cpp \ diff --git a/nntrainer/models/dynamic_training_optimization.cpp b/nntrainer/models/dynamic_training_optimization.cpp new file mode 100644 index 0000000..cdd6573 --- /dev/null +++ b/nntrainer/models/dynamic_training_optimization.cpp @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2020 Parichay Kapoor + * + * @file dynamic_training_optimization.cpp + * @date 5 January 2021 + * @see https://github.com/nnstreamer/nntrainer + * @author Parichay Kapoor + * @bug No known bugs except for NYI items + * @brief This is Dynamic Training Optimization for Neural Network + * + */ + +#include +#include + +#include +#include +#include +#include + +namespace nntrainer { +DynamicTrainingOptimization::DynamicTrainingOptimization(int threshold_, + int skip_n_iter) : + threshold(threshold_), + enabled(false), + epsilon(1e-7), + skip_n_iterations(skip_n_iter) { + reduce_op = reduceByNorm; + calc_ratio_op = ratioUsingDerivative; + rng.seed(getSeed()); + dist = std::uniform_real_distribution(0.0, 1.0); +} + +/** + * @brief Check if the given weights can skip updating + * @note true if should be applied, else false + */ +bool DynamicTrainingOptimization::checkIfApply( + const std::vector &weights, const std::shared_ptr &input, + const std::shared_ptr &output, + const std::shared_ptr &opt, int iteration) { + if (!enabled || iteration < skip_n_iterations) + return true; + + std::vector apply; + apply.reserve(weights.size()); + + for (auto const &weight : weights) + apply.push_back(checkIfApply(weight, input, output, opt, iteration)); + + return std::accumulate(apply.begin(), apply.end(), true, + std::logical_and()); +} + +/** + * @brief Check if the given weight can skip updating + * @note true if should be applied, else false + */ +bool DynamicTrainingOptimization::checkIfApply( + const Weight &weight, const std::shared_ptr &input, + const std::shared_ptr &output, + const std::shared_ptr &opt, int iteration) { + if (iteration < skip_n_iterations) + return true; + + if (!weight.getTrainable() || weight.getGradientRef().uninitialized()) + return true; + + float reduced_ratio = calc_ratio_op(weight, input, output, reduce_op); + + return checkIfApply(reduced_ratio, (float)opt->getLearningRate(iteration)); +} + +/** + * @brief Calculate the ratio of update to the weight using derivative + */ +float DynamicTrainingOptimization::ratioUsingDerivative( + const Weight &weight, const std::shared_ptr &input, + const std::shared_ptr &output, + std::function reduce_op) { + float reduced_derivative = reduce_op(output->getGradientRef()); + float reduced_input = reduce_op(input->getVariableRef()); + float reduced_weight = reduce_op(weight.getVariableRef()); + float reduced_grad = reduced_derivative * reduced_input; + + return reduced_grad / reduced_weight; +} + +/** + * @brief Calculate the ratio of update to the weight using gradient + */ +float DynamicTrainingOptimization::ratioUsingGradient( + const Weight &weight, const std::shared_ptr &input, + const std::shared_ptr &output, + std::function reduce_op) { + Tensor ratio = weight.getGradientRef().divide(weight.getVariableRef()); + return reduce_op(ratio); +} + +/** + * @brief Check if the update should be applied or skipped + * @note true if should be applied, else false + */ +bool DynamicTrainingOptimization::checkIfApply(float reduced_ratio, + float learning_rate) { + /** + * If the reduced update ratio is higher than 1, then always apply update. + * If the reduced update raito is less than 1, then apply it with + * probability = update ratio + */ + if (dist(rng) < reduced_ratio * learning_rate / threshold) + return true; + + return false; +} + +/** + * @brief Operation to decide if update should be skipped + * @note Calculate l0 norm of the tensor + */ +float DynamicTrainingOptimization::reduceByMax(Tensor const &ratio) { + return ratio.max_abs(); +} + +/** + * @brief Operation to decide if update should be skipped + * @note Calcalate l2 norm of the tensor averaged by its size + */ +float DynamicTrainingOptimization::reduceByNorm(Tensor const &ratio) { + float l2norm = ratio.l2norm(); + return l2norm / std::sqrt(ratio.length()); +} + +/**< Different types of reduce operations */ +const std::string DynamicTrainingOptimization::dft_opt_max = "max"; +const std::string DynamicTrainingOptimization::dft_opt_norm = "norm"; + +const std::string DynamicTrainingOptimization::dft_opt_mode_gradient = + "gradient"; +const std::string DynamicTrainingOptimization::dft_opt_mode_derivative = + "derivative"; + +} /* namespace nntrainer */ diff --git a/nntrainer/models/dynamic_training_optimization.h b/nntrainer/models/dynamic_training_optimization.h index cac23e2..b270d65 100644 --- a/nntrainer/models/dynamic_training_optimization.h +++ b/nntrainer/models/dynamic_training_optimization.h @@ -2,13 +2,34 @@ /** * Copyright (C) 2020 Parichay Kapoor * - * @file activation_layer.cpp + * @file dynamic_training_optimization.h * @date 4 January 2021 * @see https://github.com/nnstreamer/nntrainer * @author Parichay Kapoor * @bug No known bugs except for NYI items * @brief This is Dynamic Training Optimization for Neural Network * + * Dynamic training aims to optimize the cost of applying the gradient. + * The cost of applying the gradient includes the cost of the optimizer (adam, + * etc) where the optimizer variables are updated, and the cost of actually + * updating the weights (which can be non-trivial with bigger weights and + * distributed training). + * + * There are two supported modes: + * 1. Gradient Mode: The already calculated gradient is used to estimate if this + * gradient must be used to update the weight, or if this update must be + * skipped. + * + * 2. Derivative Mode: This mode tries to estimate an approximate gradient with + * low cost in order to save the cost of calculating gradient. This cost of + * calculating gradient is wasted if the gradient is not going to be applied. + * + * There are two supported reduction operations which reduce the gradient and + * the weight to a single value in order to compare it with a threshold. + * If the reduced value is less than threshold, the update is performed with + * some probabilty proportional to the value. If the reduced value is higher + * than threshold, then the update is always performed. + * */ #ifndef __DYNAMIC_TRAINING_OPT_H__ @@ -20,7 +41,6 @@ #include #include -#include namespace nntrainer { @@ -33,131 +53,181 @@ public: /** * @brief Constructor of DynamicFineTuning Optimization */ - DynamicTrainingOptimization(int threshold_ = 1, int skip_n_iter = 1) : - threshold(threshold_), - enabled(false), - epsilon(1e-7), - skip_n_iterations(skip_n_iter) { - reduce_op = reduce_by_norm; - rng.seed(getSeed()); - dist = std::uniform_real_distribution(0.0, 1.0); - } + DynamicTrainingOptimization(int threshold_ = 1, int skip_n_iter = 1); /** * @brief Set threshold for optimization */ - void setThreshold(float threshold_) { threshold = threshold_; }; + void setThreshold(float threshold_) { + if (threshold_ < epsilon) + throw std::invalid_argument("Threshold is too small or negative"); + + threshold = threshold_; + }; /** * @brief Set the reduce operation for dynamic optimization */ - void setOp(std::string op) { - enabled = true; + void setOp(const std::string &op) { if (op == dft_opt_max) - reduce_op = reduce_by_max; + reduce_op = reduceByMax; else if (op == dft_opt_norm) - reduce_op = reduce_by_norm; + reduce_op = reduceByNorm; else - enabled = false; + throw std::invalid_argument( + "Unsupported reduction op in dynamic training"); }; /** - * @brief Set initial iteraions to skip from optimization + * @brief Enable the optimization */ - void setSkipIterations(int skip_n_iter) { skip_n_iterations = skip_n_iter; } + void enable() { enabled = true; } /** - * @brief Check if the given weights can skip updating + * @brief Disable the optimization */ - std::vector checkIfApply(const std::vector &weights, - const std::shared_ptr input, - const std::shared_ptr output, - const std::shared_ptr opt, - int iteration) { - if (!enabled) - return std::vector(weights.size(), true); + void disable() { enabled = false; } - std::vector apply; - apply.reserve(weights.size()); + /** + * @brief Set the mode for optimization + */ + void setMode(const std::string &mode_) { + calc_ratio_mode = mode_; + if (mode_ == dft_opt_mode_derivative) + calc_ratio_op = ratioUsingDerivative; + else if (mode_ == dft_opt_mode_gradient) + calc_ratio_op = ratioUsingGradient; + else + throw std::invalid_argument("Unsupported mode in dynamic training"); + } - for (auto const &weight : weights) - apply.push_back(checkIfApply(weight, input, output, opt, iteration)); + /** + * @brief Check if the derivative mode is used for optimization + * @note Use the derivative to calculate an approximate gradient to estimate + * if the actual gradient needs applying + */ + bool isDerivativeMode() { + if (enabled && calc_ratio_mode == dft_opt_mode_derivative) + return true; + return false; + } - return apply; + /** + * @brief Check if the gradient mode is used for optimization + * @note Use the gradient to estimate if this gradient needs applying + */ + bool isGradientMode() { + if (enabled && calc_ratio_mode == dft_opt_mode_gradient) + return true; + return false; } /** + * @brief Initial iterations to not perform dynamic training optimization + * @note If the current iteration is less than skip_n_iterations, the weights + * will updated and dynamic training optimization will not be performed. + * + */ + void setSkipIterations(int skip_n_iter) { skip_n_iterations = skip_n_iter; } + + /** + * @brief Check if the given weights can skip updating + * @param[in] weights All the weight tensors for a layer + * @param[in] input Input tensor for a layer + * @param[in] output Output tensor for a layer, from forward operation + * @param[in] opt Optimizer used to update the layer weights + * @param[in] iteration Current iteration number in training + * @note true if should be applied, else false + */ + bool checkIfApply(const std::vector &weights, + const std::shared_ptr &input, + const std::shared_ptr &output, + const std::shared_ptr &opt, int iteration); + + /** * @brief Check if the given weight can skip updating + * @param[in] weight Weight tensor for a layer + * @param[in] input Input tensor for a layer + * @param[in] output Output tensor for a layer, from forward operation + * @param[in] opt Optimizer used to update the layer weights + * @param[in] iteration Current iteration number in training + * @note true if should be applied, else false */ bool checkIfApply(const Weight &weight, const std::shared_ptr &input, const std::shared_ptr &output, - const std::shared_ptr &opt, int iteration) { - // by gradient - if (iteration < skip_n_iterations) - return true; + const std::shared_ptr &opt, int iteration); - Tensor &weight_grad = weight.getGradientRef(); - Tensor &weight_var = weight.getVariableRef(); + /**< Different types of reduce operations */ + static const std::string dft_opt_max; + static const std::string dft_opt_norm; - if (!weight.getTrainable() || weight_grad.uninitialized()) - return true; + /**< Different types of optimization modes */ + static const std::string dft_opt_mode_gradient; + static const std::string dft_opt_mode_derivative; - Tensor ratio = weight_grad.divide(weight_var); +private: + std::mt19937 rng; /**< random number generator */ + std::uniform_real_distribution + dist; /**< uniform random distribution */ + float threshold; /**< threshold to decide when to skip updating */ + bool enabled; /**< if optimization is enabled */ + float epsilon; /**< epsilon to skip overflow */ + int skip_n_iterations; /**< skip initial iterations from optimization */ + std::string calc_ratio_mode; /**< the mode to calc the ratio */ - // by derivative - // Tensor ratio = output.getGradientRef().divide(weight.getVariableRef()); - // ratio.multiply_i(input.getVariableRef()); + std::function + reduce_op; /**< operation to reduce update ratio to value */ + std::function &, + const std::shared_ptr &, + std::function reduce_op)> + calc_ratio_op; /**< calculate the ratio of update to the weight */ - /** - * If the reduced update ratio is higher than 1, then always apply update. - * If the reduced update raito is less than 1, then apply it with - * probability = update ratio - */ - if (dist(rng) < - reduce_op(ratio) * ((float)opt->getLearningRate(iteration)) / threshold) - return false; + /** + * @brief Calculate the ratio of update to the weight using derivative + * @param[in] weight Weight tensor for a layer + * @param[in] input Input tensor for a layer + * @param[in] output Output tensor for a layer, from forward operation + * @param[in] reduce_op Operation to reduce the ratio + */ + static float + ratioUsingDerivative(const Weight &weight, + const std::shared_ptr &input, + const std::shared_ptr &output, + std::function reduce_op); - return true; - } + /** + * @brief Calculate the ratio of update to the weight using gradient + * @param[in] weight Weight tensor for a layer + * @param[in] input Input tensor for a layer + * @param[in] output Output tensor for a layer, from forward operation + * @param[in] reduce_op Operation to reduce the ratio + */ + static float + ratioUsingGradient(const Weight &weight, + const std::shared_ptr &input, + const std::shared_ptr &output, + std::function reduce_op); + + /** + * @brief Check if the update should be applied or skipped + * @note true if should be applied, else false + */ + bool checkIfApply(float reduced_ratio, float learning_rate); /** * @brief Operation to decide if update should be skipped * @note Calculate l0 norm of the tensor */ - static float reduce_by_max(Tensor const &ratio) { return ratio.max_abs(); } + static float reduceByMax(Tensor const &ratio); /** * @brief Operation to decide if update should be skipped * @note Calcalate l2 norm of the tensor averaged by its size */ - static float reduce_by_norm(Tensor const &ratio) { - float l2norm = ratio.l2norm(); - return (l2norm * l2norm) / ratio.length(); - } - - /**< Different types of reduce operations */ - static const std::string dft_opt_off; - static const std::string dft_opt_max; - static const std::string dft_opt_norm; - -private: - std::mt19937 rng; /**< random number generator */ - std::uniform_real_distribution - dist; /**< uniform random distribution */ - float threshold; /**< threshold to decide when to skip updating */ - bool enabled; /**< if optimization is enabled */ - float epsilon; /**< epsilon to skip overflow */ - int skip_n_iterations; /**< skip initial iterations from optimization */ - std::function - reduce_op; /**< operation to reduce update ratio to value */ + static float reduceByNorm(Tensor const &ratio); }; -/**< Different types of reduce operations */ -const std::string dft_opt_off = "off"; -const std::string dft_opt_max = "max"; -const std::string dft_opt_norm = "norm"; - } /* namespace nntrainer */ #endif /* __cplusplus */ diff --git a/nntrainer/models/meson.build b/nntrainer/models/meson.build index 248b82a..62e930b 100644 --- a/nntrainer/models/meson.build +++ b/nntrainer/models/meson.build @@ -1,6 +1,7 @@ model_sources = [ 'model_loader.cpp', - 'neuralnet.cpp' + 'neuralnet.cpp', + 'dynamic_training_optimization.cpp' ] model_headers = [ diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index e13ca9f..8058341 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -311,6 +311,38 @@ sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input, return forwarding(training); } +void NeuralNetwork::backwarding(std::shared_ptr layer, int iteration, + bool calc_derivative) { + /** + * Do not change this order: + * 1. calcGradient + * 2. calcDerivative + * 3. applyGradient + */ + bool apply_gradient; + /** If gradient optimization mode, then calculate gradient first */ + if (dynamic_training_opt.isGradientMode()) + layer->calcGradient(); + + /** + * If optimization off, or gradient must be applied, then this will be true + */ + apply_gradient = dynamic_training_opt.checkIfApply( + layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0], opt, + iteration); + + /** If gradient must be applied and its not gradient mode, calculate gradient + */ + if (!dynamic_training_opt.isGradientMode() && apply_gradient) + layer->calcGradient(); + + if (calc_derivative) + layer->calcDerivative(); + + if (apply_gradient) + opt->apply_gradients(layer->getWeightsRef(), iteration); +} + /** * @brief back propagation * Call backwarding function of layer in reverse order @@ -322,36 +354,20 @@ void NeuralNetwork::backwarding(int iteration) { */ auto iter_begin = model_graph.getBackwardingBeginIter(); auto iter_end = model_graph.getBackwardingEndIter(); - for (auto iter = iter_begin; iter != iter_end - 1; iter++) { - auto layer = iter->layer; - layer->backwarding(); - - auto apply_grad_check = - dft_opt.checkIfApply(layer->getWeightsRef(), layer->net_input[0], - layer->net_hidden[0], opt, iteration); - std::vector weights_to_update; - - for (unsigned int idx = 0; idx < apply_grad_check.size(); idx++) { - if (apply_grad_check[idx]) - weights_to_update.emplace_back(layer->getWeightsRef()[idx]); - } - opt->apply_gradients(weights_to_update, iteration); + for (auto iter = iter_begin; iter != iter_end - 1; iter++) { + backwarding(iter->layer, iteration, true); } auto last_layer = (iter_end - 1)->layer; /** * The last trainable layer need not calculate the derivatives - * Do not change this order: - * 1. calcGradient - * 2. calcDerivative - * 3. applyGradient */ - last_layer->calcGradient(); #ifdef ENABLE_TEST - last_layer->calcDerivative(); + backwarding(last_layer, iteration, true); +#else + backwarding(last_layer, iteration, false); #endif - opt->apply_gradients(last_layer->getWeightsRef(), iteration); } /** diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index eb35be5..64b67ed 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -48,7 +48,6 @@ #include #include #include -#include #include #include @@ -422,18 +421,18 @@ public: * "max" and "norm" for now */ void enableDynamicTraining( - float threshold, - std::string op = DynamicTrainingOptimization::dft_opt_norm) { - dft_opt.setThreshold(threshold); - dft_opt.setOp(op); + float threshold, std::string op = DynamicTrainingOptimization::dft_opt_norm, + std::string mode = DynamicTrainingOptimization::dft_opt_mode_derivative) { + dynamic_training_opt.setThreshold(threshold); + dynamic_training_opt.setOp(op); + dynamic_training_opt.setMode(mode); + dynamic_training_opt.enable(); } /** * @brief Disable dynamic fine-tuning optimization */ - void disableDynamicFineTuning() { - dft_opt.setOp(DynamicTrainingOptimization::dft_opt_off); - } + void disableDynamicFineTuning() { dynamic_training_opt.disable(); } /// @todo Make a more common class have this /// Maybe appcontext can have this? @@ -570,8 +569,8 @@ private: bool in_place_optimization; /**< Run batch normalization, activation, etc layers in-place */ - DynamicTrainingOptimization dft_opt; /**< Dynamic fine-tuning optimization - mode. supported modes are "off", "max" and "norm" */ + DynamicTrainingOptimization dynamic_training_opt; /**< Dynamic fine-tuning + optimization mode. supported modes are "max" and "norm" */ /** * @brief print function for neuralnet @@ -666,6 +665,16 @@ private: * @retval true if matches, false is error */ bool validateInput(sharedConstTensors X); + + /** + * @brief Backward Propagation for the layer + * @param[in] layer Layer to backpropagate + * @param[in] iteration Iteration Number for the optimizer + * @param[in] calc_derivative If the derivative for previous layer must be + * calculated + */ + void backwarding(std::shared_ptr layer, int iteration, + bool calc_derivative); }; } /* namespace nntrainer */ diff --git a/nntrainer/tensor/blas_interface.cpp b/nntrainer/tensor/blas_interface.cpp index 720e4e1..5d5dacd 100644 --- a/nntrainer/tensor/blas_interface.cpp +++ b/nntrainer/tensor/blas_interface.cpp @@ -119,6 +119,20 @@ static void sgemm_raw(CBLAS_ORDER order, CBLAS_TRANSPOSE TransA, } } +static unsigned int isamax_raw(const unsigned int N, const float *X, + const int incX) { + + unsigned int max_idx = 0; + float max_val = X[0]; + for (unsigned int n = 1; n < N; n += incX) { + float cur_val = abs(X[n]); + if (cur_val > max_val) { + max_val = cur_val; + max_idx = n; + } + } +} + #endif void saxpy(const unsigned int N, const float alpha, const float *X, @@ -217,4 +231,12 @@ void sgemv(CBLAS_ORDER order, CBLAS_TRANSPOSE TransA, const unsigned int M, #endif } +unsigned int isamax(const unsigned int N, const float *X, const int incX) { +#ifdef USE_BLAS + return cblas_isamax(N, X, incX); +#else + return isamax_raw(N, X, incX); +#endif +} + } // namespace nntrainer diff --git a/nntrainer/tensor/blas_interface.h b/nntrainer/tensor/blas_interface.h index b09d945..85b8676 100644 --- a/nntrainer/tensor/blas_interface.h +++ b/nntrainer/tensor/blas_interface.h @@ -60,6 +60,8 @@ void sgemv(CBLAS_ORDER order, CBLAS_TRANSPOSE TransA, const unsigned int M, const unsigned int lda, const float *X, const int incX, const float beta, float *Y, const int incY); +unsigned int isamax(const unsigned int N, const float *X, const int incX); + } /* namespace nntrainer */ #endif /* __cplusplus */ #endif /* __BLAS_INTERFACE_H__ */ diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index 86b0d12..3ceedff 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -961,6 +961,14 @@ float Tensor::l2norm() const { return snrm2(len, data, 1); } +float Tensor::max_abs() const { + unsigned int len = length(); + const float *data = getData(); + + unsigned int idx = isamax(len, data, 1); + return *(data + idx); +} + Tensor &Tensor::normalization(Tensor &output) const { if (output.uninitialized()) output = Tensor(dim); diff --git a/nntrainer/tensor/var_grad.h b/nntrainer/tensor/var_grad.h index 3549d4d..6bbdbca 100644 --- a/nntrainer/tensor/var_grad.h +++ b/nntrainer/tensor/var_grad.h @@ -233,14 +233,14 @@ public: * * @return Tensor Variable tensor */ - Tensor &getVariableRef() const { return *var.get(); } + const Tensor &getVariableRef() const { return *var.get(); } /** * @brief Get the Gradient tensor (by reference) * * @return Tensor Gradient tensor */ - Tensor &getGradientRef() const { return *grad.get(); } + const Tensor &getGradientRef() const { return *grad.get(); } protected: TensorDim dim; /**< dimension of the tensor */