src/tensor.cpp

   1 /**
   2  * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *   http://www.apache.org/licenses/LICENSE-2.0
   8  * Unless required by applicable law or agreed to in writing, software
   9  * distributed under the License is distributed on an "AS IS" BASIS,
  10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11  * See the License for the specific language governing permissions and
  12  * limitations under the License.
  13  *
  14  *
  15  * @file        tensor.cpp
  16  * @date        04 December 2019
  17  * @brief       This is Tensor class for calculation
  18  * @see         https://github.com/nnstreamer/nntrainer
  19  * @author      Jijoong Moon <jijoong.moon@samsung.com>
  20  * @bug         No known bugs except for NYI items
  21  *
  22  */
  23
  24 #include "include/tensor.h"
  25 #include <assert.h>
  26 #include <stdio.h>
  27 #include <cstring>
  28 #include <sstream>
  29
  30 #ifdef USE_CUBLAS
  31 #include <helper_cuda.h>
  32 #include <helper_functions.h>
  33 #endif
  34
  35 namespace Tensors {
  36
  37 void TensorDim::setTensorDim(std::string input_shape) {
  38   std::regex words_regex("[^\\s.,:;!?]+");
  39   auto words_begin = std::sregex_iterator(input_shape.begin(), input_shape.end(), words_regex);
  40   auto words_end = std::sregex_iterator();
  41   int cur_dim = std::distance(words_begin, words_end);
  42   if (cur_dim > 4) {
  43     std::cout << "Tensor Dimension should be less than 4" << std::endl;
  44     exit(0);
  45   }
  46   int cn = 0;
  47   for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
  48     Dim[MAXDIM - cur_dim + cn] = std::stoi((*i).str());
  49     cn++;
  50   }
  51 }
  52
  53 Tensor::Tensor(int height, int width) {
  54   this->height = height;
  55   this->width = width;
  56   this->batch = 1;
  57   this->dim = 2;
  58   this->len = height * width * batch;
  59   this->data = std::vector<float>(len);
  60   setZero();
  61 }
  62
  63 Tensor::Tensor(int batch, int height, int width) {
  64   this->height = height;
  65   this->width = width;
  66   this->batch = batch;
  67   this->dim = 3;
  68   this->len = height * width * batch;
  69   this->data = std::vector<float>(len);
  70   setZero();
  71 }
  72
  73 float Tensor::getValue(int batch, int h, int w) { return this->data[batch * height * width + h * width + w]; }
  74
  75 void Tensor::setValue(int batch, int h, int w, float value) {
  76   this->data[batch * height * width + h * width + w] = value;
  77 }
  78
  79 Tensor::Tensor(std::vector<std::vector<float>> const &d) {
  80   assert(d.size() != 0);
  81   this->height = d.size();
  82   this->width = d[0].size();
  83   this->batch = 1;
  84   this->dim = 2;
  85   this->len = height * width * batch;
  86   this->data = std::vector<float>(len);
  87
  88   for (int j = 0; j < height; ++j)
  89     for (int k = 0; k < width; ++k)
  90       this->setValue(0, j, k, d[j][k]);
  91 }
  92
  93 Tensor::Tensor(std::vector<std::vector<std::vector<float>>> const &d) {
  94   assert(d.size() != 0 && d[0].size() != 0);
  95   this->batch = d.size();
  96   this->height = d[0].size();
  97   this->width = d[0][0].size();
  98   this->dim = 3;
  99   this->len = this->batch * this->height * this->width;
 100   this->data = std::vector<float>(len);
 101
 102   for (int i = 0; i < this->batch; ++i)
 103     for (int j = 0; j < this->height; ++j)
 104       for (int k = 0; k < this->width; ++k)
 105         this->setValue(i, j, k, d[i][j][k]);
 106 }
 107
 108 Tensor Tensor::multiply(float const &value) {
 109   Tensor result(batch, height, width);
 110 #ifdef USE_BLAS
 111   memset(result.data.data(), 0, sizeof(float) * result.len);
 112   cblas_saxpy(this->len, value, this->data.data(), 1, result.data.data(), 1);
 113 #else
 114   for (int k = 0; k < len; ++k) {
 115     result.data[k] = data[k] * value;
 116   }
 117 #endif
 118   return result;
 119 }
 120
 121 Tensor Tensor::divide(float const &value) {
 122   Tensor result(batch, height, width);
 123 #ifdef USE_BLAS
 124   memset(result.data.data(), 0, sizeof(float) * result.len);
 125   cblas_saxpy(this->len, 1.0 / value, this->data.data(), 1, result.data.data(), 1);
 126 #else
 127   for (int k = 0; k < len; ++k) {
 128     result.data[k] = data[k] / value;
 129   }
 130 #endif
 131   return result;
 132 }
 133
 134 Tensor Tensor::add(float const &value) {
 135   Tensor result(batch, height, width);
 136 #ifdef USE_BLAS
 137   cblas_scopy(this->len, this->data.data(), 1, result.data.data(), 1);
 138   Tensor tmp(batch, height, width);
 139   for (int i = 0; i < tmp.len; ++i)
 140     tmp.data[i] = 1.0;
 141   cblas_saxpy(this->len, value, tmp.data.data(), 1, result.data.data(), 1);
 142 #else
 143   for (int k = 0; k < len; ++k) {
 144     result.data[k] = data[k] + value;
 145   }
 146 #endif
 147
 148   return result;
 149 }
 150
 151 Tensor Tensor::add(Tensor const &m) const {
 152   assert(height == m.height && width == m.width);
 153
 154   Tensor result(batch, height, width);
 155 #ifdef USE_BLAS
 156   cblas_scopy(this->len, this->data.data(), 1, result.data.data(), 1);
 157   int size = this->width * this->height;
 158   if (m.batch == 1) {
 159     for (int k = 0; k < batch; ++k) {
 160       cblas_saxpy(size, 1.0, m.data.data(), 1, &(result.data.data()[k * size]), 1);
 161     }
 162   } else {
 163     cblas_saxpy(this->len, 1.0, m.data.data(), 1, result.data.data(), 1);
 164   }
 165 #else
 166   int i, j, k;
 167   if (m.batch == 1) {
 168     for (k = 0; k < batch; ++k) {
 169       for (i = 0; i < m.len; ++i) {
 170         j = k * m.len;
 171         result.data[j + i] = data[j + i] + m.data[i];
 172       }
 173     }
 174   } else {
 175     for (k = 0; k < len; ++k) {
 176       result.data[k] = data[k] + m.data[k];
 177     }
 178   }
 179 #endif
 180
 181   return result;
 182 }
 183
 184 Tensor Tensor::subtract(Tensor const &m) const {
 185   assert(height == m.height && width == m.width);
 186   Tensor result(batch, height, width);
 187
 188 #ifdef USE_BLAS
 189   cblas_scopy(this->len, this->data.data(), 1, result.data.data(), 1);
 190   int size = this->width * this->height;
 191   float alpha = -1.0;
 192
 193   if (m.batch == 1) {
 194     for (int k = 0; k < batch; ++k) {
 195       cblas_saxpy(size, alpha, m.data.data(), 1, &(result.data.data()[k * size]), 1);
 196     }
 197   } else {
 198     assert(batch == m.batch);
 199     cblas_saxpy(this->len, alpha, m.data.data(), 1, result.data.data(), 1);
 200   }
 201 #else
 202   int i, j, k;
 203   if (m.batch == 1) {
 204     for (k = 0; k < batch; ++k) {
 205       for (i = 0; i < m.len; ++i) {
 206         j = k * m.len;
 207         result.data[j + i] = data[j + i] - m.data[i];
 208       }
 209     }
 210   } else {
 211     for (k = 0; k < len; ++k) {
 212       result.data[k] = data[k] - m.data[k];
 213     }
 214   }
 215 #endif
 216   return result;
 217 }
 218
 219 Tensor Tensor::subtract(float const &value) {
 220   Tensor result(batch, height, width);
 221 #ifdef USE_BLAS
 222   cblas_scopy(this->len, this->data.data(), 1, result.data.data(), 1);
 223   Tensor tmp(batch, height, width);
 224   for (int i = 0; i < tmp.len; ++i)
 225     tmp.data[i] = -1.0;
 226   cblas_saxpy(this->len, value, tmp.data.data(), 1, result.data.data(), 1);
 227 #else
 228   for (int k = 0; k < len; ++k) {
 229     result.data[k] = data[k] - value;
 230   }
 231 #endif
 232
 233   return result;
 234 }
 235
 236 Tensor Tensor::multiply(Tensor const &m) const {
 237   assert(height == m.height && width == m.width);
 238   Tensor result(batch, height, width);
 239
 240   int end = this->len / 4;
 241   int e = width * height / 4;
 242   int i;
 243   if (m.batch == 1) {
 244     for (int k = 0; k < batch; ++k) {
 245       int b = k * width * height;
 246       for (i = 0; i < e * 4; i += 4) {
 247         result.data[b + i + 0] = this->data[b + i + 0] * m.data[i + 0];
 248         result.data[b + i + 1] = this->data[b + i + 1] * m.data[i + 1];
 249         result.data[b + i + 2] = this->data[b + i + 2] * m.data[i + 2];
 250         result.data[b + i + 3] = this->data[b + i + 3] * m.data[i + 3];
 251       }
 252       for (int j = i; j < width * height; j++)
 253         result.data[b + j] = this->data[b + j] * m.data[j];
 254     }
 255   } else {
 256     for (i = 0; i < end * 4; i += 4) {
 257       result.data[i + 0] = this->data[i + 0] * m.data[i + 0];
 258       result.data[i + 1] = this->data[i + 1] * m.data[i + 1];
 259       result.data[i + 2] = this->data[i + 2] * m.data[i + 2];
 260       result.data[i + 3] = this->data[i + 3] * m.data[i + 3];
 261     }
 262     for (int j = i; j < len; ++j)
 263       result.data[j] = this->data[j] * m.data[j];
 264   }
 265
 266   return result;
 267 }
 268
 269 Tensor Tensor::divide(Tensor const &m) const {
 270   assert(height == m.height && width == m.width);
 271   Tensor result(batch, height, width);
 272
 273   int end = this->len / 4;
 274   int e = width * height / 4;
 275   int i;
 276
 277   if (m.batch == 1) {
 278     for (int k = 0; k < batch; ++k) {
 279       int b = k * width * height;
 280       for (i = 0; i < e * 4; i += 4) {
 281         result.data[b + i + 0] = this->data[b + i + 0] / m.data[i + 0];
 282         result.data[b + i + 1] = this->data[b + i + 1] / m.data[i + 1];
 283         result.data[b + i + 2] = this->data[b + i + 2] / m.data[i + 2];
 284         result.data[b + i + 3] = this->data[b + i + 3] / m.data[i + 3];
 285       }
 286       for (int j = i; j < width * height; ++j)
 287         result.data[b + j] = this->data[b + j] / m.data[j];
 288     }
 289   } else {
 290     for (i = 0; i < end * 4; i += 4) {
 291       result.data[i + 0] = this->data[i + 0] / m.data[i + 0];
 292       result.data[i + 1] = this->data[i + 1] / m.data[i + 1];
 293       result.data[i + 2] = this->data[i + 2] / m.data[i + 2];
 294       result.data[i + 3] = this->data[i + 3] / m.data[i + 3];
 295     }
 296     for (int j = i; j < len; ++j)
 297       result.data[j] = this->data[j] / m.data[j];
 298   }
 299
 300   return result;
 301 }
 302
 303 /**
 304  * This is to sum the Tensor data according to the batch.
 305  * Therefore the result has M(batch, 1, 1) dimension.
 306  */
 307 Tensor Tensor::sum() const {
 308   int k;
 309   Tensor ret(batch, 1, 1);
 310 #ifdef USE_BLAS
 311   for (k = 0; k < batch; ++k)
 312     ret.data[k] = cblas_sasum(width * height, &(data.data()[k * width * height]), 1);
 313 #else
 314   int i;
 315   for (k = 0; k < batch; ++k) {
 316     int id = k * width * height;
 317     ret.data[id] = 0.0;
 318     for (i = 0; i < height * width; ++i) {
 319       ret.data[id] += data[id + i];
 320     }
 321   }
 322 #endif
 323
 324   return ret;
 325 }
 326
 327 Tensor Tensor::sum(int axis) const {
 328   Tensor ret;
 329
 330   switch (axis) {
 331     case 0: {
 332       ret = Tensor(1, height, width);
 333       for (int i = 0; i < height; ++i) {
 334         int I = i * width;
 335         for (int j = 0; j < width; ++j) {
 336           for (int k = 0; k < batch; ++k) {
 337             int K = k * width * height;
 338             ret.data[I + j] += data[K + I + j];
 339           }
 340         }
 341       }
 342     } break;
 343     case 1: {
 344       ret = Tensor(batch, 1, width);
 345       for (int k = 0; k < batch; ++k) {
 346         int K = k * width;
 347         for (int j = 0; j < width; ++j) {
 348           for (int i = 0; i < height; ++i) {
 349             int I = i * width * batch;
 350             ret.data[K + j] += data[K + I + j];
 351           }
 352         }
 353       }
 354     } break;
 355     case 2: {
 356       ret = Tensor(batch, height, width);
 357       for (int k = 0; k < batch; ++k) {
 358         int K = k * height;
 359         for (int i = 0; i < height; ++i) {
 360           for (int j = 0; j < width; ++j) {
 361             int J = j * height * batch;
 362             ret.data[K + i] += data[K + J + i];
 363           }
 364         }
 365       }
 366     } break;
 367     default:
 368       std::runtime_error("Error: Cannot excide 2");
 369       break;
 370   }
 371   return ret;
 372 }
 373
 374 /**
 375  * If the batch sizeo of m is one, the it is reused for
 376  * every calculation along with batch
 377  */
 378 Tensor Tensor::dot(Tensor const &m) const {
 379   assert(width == m.height);
 380   int mwidth = m.width;
 381   Tensor result(batch, height, mwidth);
 382
 383 #ifdef USE_BLAS
 384   float alpha_dgemm = 1.0;
 385   float beta_dgemm = 1.0;
 386   if (m.batch == 1) {
 387     for (int k = 0; k < batch; k++) {
 388       int i = k * width * height;
 389       int ii = k * height * mwidth;
 390       cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, height, mwidth, width, alpha_dgemm, &(data.data()[i]),
 391                   width, m.data.data(), mwidth, beta_dgemm, &(result.data.data()[ii]), mwidth);
 392     }
 393   } else {
 394     for (int k = 0; k < batch; k++) {
 395       int i = k * width * height;
 396       int j = k * m.width * m.height;
 397       int ii = k * height * mwidth;
 398
 399       cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, height, mwidth, width, alpha_dgemm, &(data.data()[i]),
 400                   width, &(m.data.data()[j]), mwidth, beta_dgemm, &(result.data.data()[ii]), mwidth);
 401     }
 402   }
 403 #elif USE_CUBLAS
 404   int devID = 0;
 405   cudaDeviceProp deviceProp;
 406   cudaGetDeviceProperties(&deviceProp, devID);
 407   float *d_A, *d_B, *d_C;
 408
 409   unsigned int size_A = this->width * height * sizeof(float);
 410   unsigned int size_B = m.width * m.height * sizeof(float);
 411   unsigned int size_C = result.width * result.height * sizeof(float);
 412
 413   if (m.batch == 1) {
 414     for (int k = 0; k < batch; k++) {
 415       int i = k * width * height;
 416       int ii = k * height * mwidth;
 417
 418       cudaMalloc((void **)&d_A, size_A);
 419       cudaMalloc((void **)&d_B, size_B);
 420       cudaMemcpy(d_A, &data.data()[i], size_A, cudaMemcpyHostToDevice);
 421       cudaMemcpy(d_B, m.data.data(), size_B, cudaMemcpyHostToDevice);
 422       cudaMalloc((void **)&d_C, size_C);
 423
 424       {
 425         const float alpha = 1.0f;
 426         const float beta = 0.0f;
 427         cublasHandle_t handle;
 428
 429         (cublasCreate(&handle));
 430
 431         (cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m.width, height, width, &alpha, d_B, m.width, d_A, width, &beta,
 432                      d_C, m.width));
 433
 434         (cudaMemcpy(&result.data.data()[ii], d_C, size_C, cudaMemcpyDeviceToHost));
 435         (cublasDestroy(handle));
 436       }
 437     }
 438   } else {
 439     for (int k = 0; k < batch; k++) {
 440       int i = k * width * height;
 441       int j = k * m.width * m.height;
 442       int ii = k * height * mwidth;
 443
 444       (cudaMalloc((void **)&d_A, size_A));
 445       (cudaMalloc((void **)&d_B, size_B));
 446       (cudaMemcpy(d_A, &data.data()[i], size_A, cudaMemcpyHostToDevice));
 447       (cudaMemcpy(d_B, &m.data.data()[j], size_B, cudaMemcpyHostToDevice));
 448       (cudaMalloc((void **)&d_C, size_C));
 449
 450       {
 451         const float alpha = 1.0f;
 452         const float beta = 0.0f;
 453         cublasHandle_t handle;
 454
 455         (cublasCreate(&handle));
 456
 457         (cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m.width, height, width, &alpha, d_B, m.width, d_A, width, &beta,
 458                      d_C, m.width));
 459
 460         (cudaMemcpy(&result.data.data()[ii], d_C, size_C, cudaMemcpyDeviceToHost));
 461         (cublasDestroy(handle));
 462       }
 463     }
 464   }
 465 #else
 466   float w = 0.0;
 467   int i, j, k, h;
 468   if (m.batch == 1) {
 469     for (k = 0; k < batch; ++k) {
 470       for (i = 0; i < height; ++i) {
 471         for (j = 0; j < mwidth; ++j) {
 472           for (h = 0; h < width; ++h) {
 473             w += data[k * height * width + i * width + h] * m.data[h * mwidth + j];
 474           }
 475           result.data[k * height * mwidth + i * mwidth + j] = w;
 476           w = 0.0;
 477         }
 478       }
 479     }
 480   } else {
 481     for (k = 0; k < batch; k++) {
 482       for (i = 0; i < height; i++) {
 483         for (j = 0; j < mwidth; j++) {
 484           for (h = 0; h < width; h++) {
 485             w += data[k * height * width + i * width + h] * m.data[k * width * mwidth + h * mwidth + j];
 486           }
 487           result.data[k * height * mwidth + i * mwidth + j] = w;
 488           w = 0.0;
 489         }
 490       }
 491     }
 492   }
 493 #endif
 494
 495   return result;
 496 }
 497
 498 Tensor Tensor::transpose() const {
 499   Tensor result(batch, width, height);
 500   int i, j, k;
 501   for (k = 0; k < batch; ++k) {
 502     int b = k * width * height;
 503     for (i = 0; i < width; ++i) {
 504       for (j = 0; j < height; ++j) {
 505         result.data[b + i * height + j] = data[b + j * width + i];
 506       }
 507     }
 508   }
 509   return result;
 510 }
 511
 512 Tensor Tensor::apply(float (*function)(float)) const {
 513   Tensor result(batch, height, width);
 514   int i;
 515
 516   for (i = 0; i < this->len; ++i)
 517     result.data[i] = (*function)(data[i]);
 518
 519   return result;
 520 }
 521
 522 Tensor Tensor::apply(Tensor (*function)(Tensor)) const { return (*function)(*this); }
 523
 524 void Tensor::print(std::ostream &out) const {
 525   int i, j, k;
 526   std::stringstream ss;
 527   for (k = 0; k < batch; k++) {
 528     for (i = 0; i < height; i++) {
 529       for (j = 0; j < width; j++) {
 530         out << data[k * width * height + i * width + j] << " ";
 531       }
 532       out << std::endl;
 533     }
 534     out << std::endl;
 535   }
 536 }
 537
 538 std::ostream &operator<<(std::ostream &out, Tensor const &m) {
 539   m.print(out);
 540   return out;
 541 }
 542
 543 Tensor &Tensor::copy(const Tensor &from) {
 544   if (this != &from && from.len != 0) {
 545     height = from.height;
 546     width = from.width;
 547     batch = from.batch;
 548 #ifdef USE_BLAS
 549     cblas_scopy(this->len, from.data.data(), 1, this->data.data(), 1);
 550 #else
 551     for (int i = 0; i < len; ++i)
 552       data[i] = from.data[i];
 553 #endif
 554   }
 555
 556   return *this;
 557 }
 558
 559 /**
 560  * This generate one dimension vector has the every element in Tensor
 561  */
 562 std::vector<float> Tensor::Mat2Vec() {
 563   std::vector<float> ret;
 564
 565   for (int i = 0; i < this->len; i++)
 566     ret.push_back(data[i]);
 567
 568   return ret;
 569 }
 570
 571 void Tensor::save(std::ofstream &file) {
 572   for (int i = 0; i < this->len; i++)
 573     file.write((char *)&data[i], sizeof(float));
 574 }
 575
 576 void Tensor::read(std::ifstream &file) {
 577   for (int i = 0; i < this->len; i++)
 578     file.read((char *)&data[i], sizeof(float));
 579 }
 580
 581 /**
 582  * This calculates average value according to the batch direction.
 583  * That is the why it has (1, height, width) dimension.
 584  */
 585 Tensor Tensor::average() const {
 586   if (batch == 1)
 587     return *this;
 588
 589   Tensor result(1, height, width);
 590   for (int i = 0; i < height; i++) {
 591     for (int j = 0; j < width; j++) {
 592       result.data[i * width + j] = 0.0;
 593       for (int k = 0; k < batch; k++) {
 594         result.data[i * width + j] += data[k * width * height + i * width + j];
 595       }
 596       result.data[i * width + j] = result.data[i * width + j] / (float)batch;
 597     }
 598   }
 599   return result;
 600 }
 601
 602 void Tensor::setZero() { memset(this->data.data(), 0, sizeof(float) * this->len); }
 603
 604 Tensor Tensor::softmax() const {
 605   Tensor result(batch, height, width);
 606   Tensor divisor(batch, height, 1);
 607
 608   divisor.setZero();
 609
 610   for (int k = 0; k < batch; k++) {
 611     int index = k * height;
 612     for (int i = 0; i < height; i++) {
 613       for (int j = 0; j < width; j++) {
 614         divisor.data[index + i] += exp(this->data[k * height * width + i * width + j]);
 615       }
 616     }
 617   }
 618
 619   for (int k = 0; k < batch; ++k) {
 620     int index = k * height;
 621     for (int i = 1; i < height; ++i) {
 622       divisor.data[index] += divisor.data[index + i];
 623     }
 624   }
 625
 626   for (int k = 0; k < batch; k++) {
 627     int index = k * height;
 628     for (int i = 0; i < height; i++) {
 629       for (int j = 0; j < width; j++) {
 630         int id = k * height * width + i * width + j;
 631         result.data[id] = exp(this->data[id]) / divisor.data[index];
 632       }
 633     }
 634   }
 635
 636   return result;
 637 }
 638
 639 int Tensor::argmax() {
 640   int index = 0;
 641   float maximum = 0.0;
 642   for (int i = 0; i < len; i++) {
 643     if (this->data[i] > maximum) {
 644       maximum = this->data[i];
 645       index = i;
 646     }
 647   }
 648   return index;
 649 }
 650
 651 float Tensor::l2norm() const {
 652   float sum = 0.0;
 653   for (int i = 0; i < len; i++) {
 654     sum += this->data[i] * this->data[i];
 655   }
 656
 657   return sqrt(sum);
 658 }
 659
 660 Tensor Tensor::normalization() const {
 661   Tensor results(batch, height, width);
 662   float Min = 1000000.0;
 663   float Max = 0.0;
 664
 665   for (int k = 0; k < batch; ++k) {
 666     for (int i = 0; i < height; ++i) {
 667       for (int j = 0; j < width; ++j) {
 668         int id = k * height * width + i * width + j;
 669         if (this->data[id] < Min)
 670           Min = this->data[id];
 671         if (this->data[id] > Max)
 672           Max = this->data[id];
 673       }
 674     }
 675   }
 676   float dif = Max - Min;
 677
 678   for (int k = 0; k < batch; ++k) {
 679     for (int i = 0; i < height; ++i) {
 680       for (int j = 0; j < width; ++j) {
 681         int id = k * height * width + i * width + j;
 682         results.data[id] = (this->data[id] - Min) / dif;
 683       }
 684     }
 685   }
 686
 687   return results;
 688 }
 689
 690 Tensor Tensor::standardization() const {
 691   Tensor result(batch, height, width);
 692
 693   for (int k = 0; k < batch; ++k) {
 694     int K = k * height * width;
 695     float mean;
 696     float mean_tmp = 0.0;
 697     float std_tmp = 0.0;
 698     float std_dev = 0.0;
 699
 700     for (int i = 0; i < height; ++i) {
 701       int I = K + i * width;
 702       for (int j = 0; j < width; ++j) {
 703         int J = I + j;
 704         mean_tmp += this->data[J];
 705       }
 706     }
 707
 708     mean = mean_tmp / (this->width * this->height);
 709
 710     for (int i = 0; i < height; ++i) {
 711       int I = K + i * width;
 712       for (int j = 0; j < width; ++j) {
 713         int J = I + j;
 714         std_tmp += (this->data[J] - mean) * (this->data[J] - mean);
 715       }
 716     }
 717
 718     std_dev = sqrt(std_tmp) / (this->height * this->width);
 719
 720     for (int i = 0; i < height; ++i) {
 721       int I = K + i * width;
 722       for (int j = 0; j < width; ++j) {
 723         int J = I + j;
 724         result.data[J] = (this->data[J] - mean) / std_dev;
 725       }
 726     }
 727   }
 728
 729   return result;
 730 }
 731 }