--- /dev/null
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
--- /dev/null
+// net/lstm-eigen-layer3.h
+//Dense matrices are temporary variables
+
+// Copyright 2015 Yajie Miao, Hang Su
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+#ifndef EESEN_LSTM_LAYER_H_
+#define EESEN_LSTM_LAYER_H_
+
+#include "onDevice-net/layer.h"
+#include "onDevice-net/trainable-layer.h"
+#include "onDevice-net/utils-functions.h"
+//#include "gpucompute/cuda-math.h"
+#include "cudamatrix/cu-math.h"
+#include "matrix/matrix-lib.h"
+#include "Eigen/Eigen"
+//#include </data/local/tmp/eigen/Eigen/Eigen>
+
+#ifdef ANDROIDLIB
+#include <android/log.h>
+#endif
+
+
+using namespace Eigen;
+
+typedef Eigen::Triplet<double> T;
+
+namespace kaldi {
+
+#ifdef ANDROIDLIB
+#define ANDLOID_TAG "onDeviceASR"
+#define LOGE(fmt, args...) (__android_log_print(ANDROID_LOG_ERROR, ANDLOID_TAG, "lstm-eigen-layer3.h:" fmt, ##args))
+#define LOGD(fmt, args...) (__android_log_print(ANDROID_LOG_DEBUG, ANDLOID_TAG, "lstm-eigen-layer3.h:" fmt, ##args))
+#define LOGI(fmt, args...) (__android_log_print(ANDROID_LOG_INFO, ANDLOID_TAG, "lstm-eigen-layer3.h:" fmt, ##args))
+#endif
+
+class Lstm : public TrainableLayer {
+public:
+ void copyFromMat(Eigen::SparseMatrix<double,RowMajor> &sp_mat, const CuMatrixBase<BaseFloat> &mat) {
+ int32 NumRows = mat.NumRows();
+ int32 NumCols = mat.NumCols();
+ sp_mat.resize(NumRows, NumCols);
+
+ Vector<BaseFloat> tmp_vec(NumRows * NumCols);
+ tmp_vec.CopyRowsFromMat(mat.Mat());
+
+ std::vector<T> tripletList;
+ int i, j, k = 0;
+ for (i = 0; i < NumRows; i++) {
+ for (j = 0; j < NumCols; j++, k++) {
+ if (tmp_vec(k) != 0)
+ tripletList.push_back(T(i,j,tmp_vec(k)));
+ }
+ }
+ sp_mat.setFromTriplets(tripletList.begin(), tripletList.end());
+ }
+
+ void copyFromMat(MatrixXd &dense_mat, const CuMatrixBase<BaseFloat> &mat) {
+ int32 NumRows = mat.NumRows();
+ int32 NumCols = mat.NumCols();
+ dense_mat.resize(NumRows, NumCols);
+
+ Vector<BaseFloat> tmp_vec(NumRows * NumCols);
+ tmp_vec.CopyRowsFromMat(mat.Mat());
+
+ int i, j, k = 0;
+ for (i = 0; i < NumRows; i++) {
+ for (j = 0; j < NumCols; j++, k++) {
+ dense_mat(i,j) = tmp_vec(k);
+ }
+ }
+ }
+
+ void copyFromRowVec(MatrixXd &mat, const CuVectorBase<BaseFloat> &cuvec) {
+ Vector<BaseFloat> vec(cuvec.Vec());
+ int32 Dim = vec.Dim();
+ mat.resize(1, Dim);
+ for (int i = 0; i < Dim; i++) {
+ mat(0,i) = vec(i);
+ }
+ }
+
+ void copyDiagVec(Eigen::SparseMatrix<double,RowMajor> &mat, const CuVectorBase<BaseFloat> &cuvec) {
+ Vector<BaseFloat> vec(cuvec.Vec());
+ int32 Dim = vec.Dim();
+ mat.resize(Dim, Dim);
+ std::vector<T> tripletList;
+ for (int i = 0; i < Dim; i++) {
+ if (vec(i) != 0)
+ tripletList.push_back(T(i,i,vec(i)));
+ }
+ mat.setFromTriplets(tripletList.begin(), tripletList.end());
+ }
+
+ void copyIntoMat(const Eigen::SparseMatrix<double,RowMajor> &sp_mat, CuMatrixBase<BaseFloat> *mat) {
+ int32 NumRows = sp_mat.rows();
+ int32 NumCols = sp_mat.cols();
+
+ Vector<BaseFloat> tmp_vec(NumRows * NumCols);
+ int i, j;
+ for (int k=0; k < sp_mat.outerSize(); ++k)
+ {
+ for (Eigen::SparseMatrix<double,RowMajor>::InnerIterator it(sp_mat,k); it; ++it)
+ {
+ i = it.row(); j = it.col();
+ tmp_vec(i*NumCols + j) = it.value();
+ }
+ }
+
+ mat->CopyRowsFromVec(CuVector<BaseFloat>(tmp_vec));
+ }
+
+ // to replase CopyRowsFromVec, because of CUDA and fortran dependency
+ void my_CopyRowsFromVec(CuMatrixBase<BaseFloat> *mat, const VectorBase<BaseFloat> &rv){
+ int32 num_rows = mat->NumRows();
+ int32 num_cols = mat->NumCols();
+ int32 stride = mat->Stride();
+
+ if (rv.Dim() == num_rows * num_cols) {
+ if (stride == num_cols ) {
+ // one big copy operation.
+ const BaseFloat *rv_data = rv.Data();
+ std::memcpy(mat->Data(), rv_data, sizeof(BaseFloat)*num_rows*num_cols);
+ } else {
+ const BaseFloat *rv_data = rv.Data();
+ for (MatrixIndexT r = 0; r < num_rows; r++) {
+ BaseFloat *row_data = mat->RowData(r);
+ for (MatrixIndexT c = 0; c < num_cols; c++) {
+ row_data[c] = rv_data[c];
+ }
+ rv_data += num_cols;
+ }
+ }
+ } else if (rv.Dim() == num_cols) {
+ const BaseFloat *rv_data = rv.Data();
+ for (MatrixIndexT r = 0; r < num_rows; r++)
+ std::memcpy(mat->RowData(r), rv_data, sizeof(BaseFloat)*num_cols);
+ } else {
+ KALDI_ERR << "Wrong sized arguments";
+ }
+ }
+
+ void copyIntoMat(const Ref<MatrixXd> dense_mat, CuMatrixBase<BaseFloat> *mat) {
+ int32 NumRows = dense_mat.rows();
+ int32 NumCols = dense_mat.cols();
+ //mat->Resize(NumRows, NumCols);
+
+ Vector<BaseFloat> tmp_vec(NumRows * NumCols);
+ int i, j, k = 0;
+ for (i = 0; i < NumRows; i++) {
+ for (j = 0; j < NumCols; j++, k++) {
+ tmp_vec(k) = dense_mat(i,j);
+ }
+ }
+
+ //mat->CopyRowsFromVec(CuVector<BaseFloat>(tmp_vec));
+ my_CopyRowsFromVec(mat, tmp_vec);
+ }
+
+
+ /*void copyWei(CuMatrixBase<BaseFloat> *x_mat, CuMatrixBase<BaseFloat> *m_mat) {
+ if (compressed_state) {
+ copyIntoMat(wei_gifo_x_sp_, x_mat);
+ copyIntoMat(wei_gifo_m_sp_, m_mat);
+ } else {
+ copyIntoMat(wei_gifo_x_dense_, x_mat);
+ copyIntoMat(wei_gifo_m_dense_, m_mat);
+ }
+ }*/
+
+ void copyIntoVec(Ref<MatrixXd> dense_vec, CuVectorBase<BaseFloat> &vec) {
+ int32 Dim = dense_vec.size();
+ //vec.Resize(Dim);
+
+ Vector<BaseFloat> tmp_vec(Dim);
+
+ for (int i = 0; i < Dim; i++) {
+ tmp_vec(i) = dense_vec(0,i);
+ }
+
+ vec.CopyFromVec(tmp_vec);
+ }
+
+ void copyRowFromVec(Ref<MatrixXd> mat, const CuVectorBase<BaseFloat> &vec, int32 row) {
+ int32 NumCols = mat.cols();
+ //KALDI_ASSERT( NumCols == vec.Dim() );
+ for (int i = 0; i < NumCols; i++)
+ mat(row,i) = vec(i);
+ }
+
+ void tanhMat(Ref<MatrixXd> src_mat, Ref<MatrixXd> dest_mat) {
+ //KALDI_ASSERT(src_mat.rows() == dest_mat.rows() && src_mat.cols() == dest_mat.cols());
+ int32 NumRows = src_mat.rows();
+ int32 NumCols = src_mat.cols();
+ dest_mat = src_mat;
+ int i, j;
+ for (i = 0; i < NumRows; i++) {
+ for (j = 0; j < NumCols; j++)
+ dest_mat(i,j) = std::tanh(dest_mat(i,j));
+ }
+ }
+
+ void sigmoidMat(Ref<MatrixXd> src_mat, Ref<MatrixXd> dest_mat) {
+ //KALDI_ASSERT(src_mat.rows() == dest_mat.rows() && src_mat.cols() == dest_mat.cols());
+ dest_mat = src_mat;
+ dest_mat *= 0.5;
+ tanhMat(dest_mat, dest_mat);
+ dest_mat = (dest_mat + MatrixXd::Constant(dest_mat.rows(), dest_mat.cols(), 1) ) * 0.5;
+ }
+
+ void difftanhMat(Ref<MatrixXd> dest_mat, Ref<MatrixXd> value_mat, Ref<MatrixXd> diff_mat) {
+ //KALDI_ASSERT(value_mat.rows() == dest_mat.rows() && value_mat.cols() == dest_mat.cols()
+ // && diff_mat.rows() == dest_mat.rows() && diff_mat.cols() == dest_mat.cols());
+ int32 NumRows = dest_mat.rows();
+ int32 NumCols = dest_mat.cols();
+ int i, j;
+ for (i = 0; i < NumRows; i++) {
+ for (j = 0; j < NumCols; j++) {
+ dest_mat(i,j) = diff_mat(i,j) * (1.0 - value_mat(i,j) * value_mat(i,j));
+ }
+ }
+ }
+
+ void diffsigmoidMat(Ref<MatrixXd> dest_mat, Ref<MatrixXd> value_mat, Ref<MatrixXd> diff_mat) {
+ //KALDI_ASSERT(value_mat.rows() == dest_mat.rows() && value_mat.cols() == dest_mat.cols()
+ // && diff_mat.rows() == dest_mat.rows() && diff_mat.cols() == dest_mat.cols());
+ int32 NumRows = dest_mat.rows();
+ int32 NumCols = dest_mat.cols();
+ int i, j;
+ for (i = 0; i < NumRows; i++) {
+ for (j = 0; j < NumCols; j++) {
+ dest_mat(i,j) = diff_mat(i,j) * value_mat(i,j) * (1.0 - value_mat(i,j));
+ }
+ }
+ }
+
+ void writeDensex(std::ostream &os, bool binary) const {
+ KALDI_ASSERT(!compressed_state);
+ if (binary) {
+ int32 NumRows = wei_gifo_x_dense_.rows();
+ int32 NumCols = wei_gifo_x_dense_.cols();
+
+ os.write(reinterpret_cast<const char *>(&NumRows), sizeof(NumRows));
+ os.write(reinterpret_cast<const char *>(&NumCols), sizeof(NumCols));
+
+ double *data = new double [NumCols];
+ for (int i = 0; i < NumRows; i++) {
+ for (int j = 0; j < NumCols; j++)
+ data[j] = wei_gifo_x_dense_(i,j);
+ os.write(reinterpret_cast<const char*> (data), sizeof(double) * NumCols);
+ }
+
+ if (!os.good())
+ KALDI_ERR << "Failed to write dense matrix to stream";
+
+ } else {
+ if (wei_gifo_x_dense_.cols() == 0) {
+ os << " [ ]\n";
+ } else {
+ os << " [";
+ int32 NumRows = wei_gifo_x_dense_.rows();
+ int32 NumCols = wei_gifo_x_dense_.cols();
+ for (int32 i = 0; i < NumRows; i++) {
+ os << "\n ";
+ for (int32 j = 0; j < NumCols; j++)
+ os << wei_gifo_x_dense_(i,j) << " ";
+ }
+ os << "]\n";
+ }
+ }
+ }
+
+ void writeDensem(std::ostream &os, bool binary) const {
+ KALDI_ASSERT(!compressed_state);
+ if (binary) {
+ int32 NumRows = wei_gifo_m_dense_.rows();
+ int32 NumCols = wei_gifo_m_dense_.cols();
+
+ os.write(reinterpret_cast<const char *>(&NumRows), sizeof(NumRows));
+ os.write(reinterpret_cast<const char *>(&NumCols), sizeof(NumCols));
+
+ double *data = new double [NumCols];
+ for (int i = 0; i < NumRows; i++) {
+ for (int j = 0; j < NumCols; j++)
+ data[j] = wei_gifo_m_dense_(i,j);
+ os.write(reinterpret_cast<const char*> (data), sizeof(double) * NumCols);
+ }
+
+ if (!os.good())
+ KALDI_ERR << "Failed to write dense matrix to stream";
+
+ } else {
+ if (wei_gifo_m_dense_.cols() == 0) {
+ os << " [ ]\n";
+ } else {
+ os << " [";
+ int32 NumRows = wei_gifo_m_dense_.rows();
+ int32 NumCols = wei_gifo_m_dense_.cols();
+ for (int32 i = 0; i < NumRows; i++) {
+ os << "\n ";
+ for (int32 j = 0; j < NumCols; j++)
+ os << wei_gifo_m_dense_(i,j) << " ";
+ }
+ os << "]\n";
+ }
+ }
+ }
+
+ void writeDense(std::ostream &os, bool binary) const {
+ writeDensex(os, binary);
+ writeDensem(os, binary);
+ }
+
+ void writeSparsex(std::ostream &os, bool binary) const {
+ KALDI_ASSERT(compressed_state);
+ if (binary) {
+ int32 NumRows = wei_gifo_x_sp_.rows();
+ int32 NumCols = wei_gifo_x_sp_.cols();
+ int32 NonZeros = wei_gifo_x_sp_.nonZeros() + wei_gifo_x_sp_.outerSize();
+
+ os.write(reinterpret_cast<const char *>(&NumRows), sizeof(NumRows));
+ os.write(reinterpret_cast<const char *>(&NumCols), sizeof(NumCols));
+ os.write(reinterpret_cast<const char *>(&NonZeros), sizeof(NonZeros));
+
+ double *data = new double [2];
+ for (int i = 0; i < wei_gifo_x_sp_.outerSize(); i++) {
+ data[0] = -1; data[1] = i;
+ os.write(reinterpret_cast<const char*> (data), sizeof(double)*2 );
+ NonZeros--;
+ for (Eigen::SparseMatrix<double,RowMajor>::InnerIterator it(wei_gifo_x_sp_,i); it; ++it) {
+ data[0] = it.col(); data[1] = it.value();
+ os.write(reinterpret_cast<const char*> (data), sizeof(double) * 2);
+ NonZeros--;
+ }
+ }
+
+ while (NonZeros > 0) {
+ data[0] = -1; data[1] = -1;
+ os.write(reinterpret_cast<const char*> (data), sizeof(double) * 2);
+ NonZeros--;
+ }
+
+ if (!os.good())
+ KALDI_ERR << "Failed to write sparse matrix to stream";
+
+ } else {
+ if (wei_gifo_x_sp_.cols() == 0) {
+ os << " [ ]\n";
+ } else {
+ os << " [";
+ int32 NumRows = wei_gifo_x_sp_.rows();
+ int32 NumCols = wei_gifo_x_sp_.cols();
+ bool flag = true;
+ for (int i = 0; i < wei_gifo_x_sp_.outerSize(); i++) {
+ os << "\n " << i << " ";
+ for (Eigen::SparseMatrix<double,RowMajor>::InnerIterator it(wei_gifo_x_sp_,i); it; ++it) {
+ os << "\n " << it.col() << " " << it.value() << " ";
+ if (flag) {
+ os << NumRows << " " << NumCols << " ";
+ flag = false;
+ }
+ }
+ }
+ os << "]\n";
+ }
+ }
+ }
+
+ void writeSparsem(std::ostream &os, bool binary) const {
+ KALDI_ASSERT(compressed_state);
+ if (binary) {
+ int32 NumRows = wei_gifo_m_sp_.rows();
+ int32 NumCols = wei_gifo_m_sp_.cols();
+ int32 NonZeros = wei_gifo_m_sp_.nonZeros() + wei_gifo_m_sp_.outerSize();
+
+ os.write(reinterpret_cast<const char *>(&NumRows), sizeof(NumRows));
+ os.write(reinterpret_cast<const char *>(&NumCols), sizeof(NumCols));
+ os.write(reinterpret_cast<const char *>(&NonZeros), sizeof(NonZeros));
+
+ double *data = new double [2];
+ for (int i = 0; i < wei_gifo_m_sp_.outerSize(); i++) {
+ data[0] = -1; data[1] = i;
+ os.write(reinterpret_cast<const char*> (data), sizeof(double) * 2);
+ NonZeros--;
+ for (Eigen::SparseMatrix<double,RowMajor>::InnerIterator it(wei_gifo_m_sp_,i); it; ++it) {
+ data[0] = it.col(); data[1] = it.value();
+ os.write(reinterpret_cast<const char*> (data), sizeof(double) * 2);
+ NonZeros--;
+ }
+ }
+
+ while (NonZeros > 0) {
+ data[0] = -1; data[1] = -1;
+ os.write(reinterpret_cast<const char*> (data), sizeof(double) * 2);
+ NonZeros--;
+ }
+
+ if (!os.good())
+ KALDI_ERR << "Failed to write sparse matrix to stream";
+
+ } else {
+ if (wei_gifo_m_sp_.cols() == 0) {
+ os << " []\n";
+ } else {
+ os << " [";
+ int32 NumRows = wei_gifo_m_sp_.rows();
+ int32 NumCols = wei_gifo_m_sp_.cols();
+ bool flag = true;
+ for (int i = 0; i < wei_gifo_m_sp_.outerSize(); i++) {
+ os << "\n " << i << " ";
+ for (Eigen::SparseMatrix<double,RowMajor>::InnerIterator it(wei_gifo_m_sp_,i); it; ++it) {
+ os << "\n " << it.col() << " " << it.value() << " ";
+ if (flag) {
+ os << NumRows << " " << NumCols << " ";
+ flag = false;
+ }
+ }
+ }
+ os << "]\n";
+ }
+ }
+ }
+
+ void writeSparse(std::ostream &os, bool binary) const {
+ writeSparsex(os, binary);
+ writeSparsem(os, binary);
+ }
+
+ void readDensex(std::istream & is, bool binary) {
+ if (binary) {
+ int32 NumRows, NumCols;
+
+ is.read(reinterpret_cast<char *>(&NumRows), sizeof(NumRows));
+ is.read(reinterpret_cast<char *>(&NumCols), sizeof(NumCols));
+ if (NumRows != wei_gifo_x_dense_.rows() || NumCols != wei_gifo_x_dense_.cols())
+ wei_gifo_x_dense_.resize(NumRows,NumCols);
+
+ double *data = new double[NumCols];
+ for (int i = 0; i < NumRows; i++) {
+ is.read(reinterpret_cast<char*>(data), sizeof(double)*NumCols);
+ if (is.fail()) goto bad;
+ for (int j = 0; j < NumCols; j++)
+ wei_gifo_x_dense_(i,j) = data[j];
+ }
+
+ if (is.eof()) return;
+ if (is.fail()) goto bad;
+ return;
+
+ } else {
+ std::string str;
+ is >> str;
+ if (str == "[]") { wei_gifo_x_dense_.resize(0, 0); return; }
+ std::vector<std::vector<double> > data;
+ std::vector<double> curr_row;
+ while(1) {
+ int i = is.peek();
+ if (static_cast<char>(i) == ']') {
+ is.get();
+ i = is.peek();
+ if (static_cast<char>(i) == '\r') {
+ is.get();
+ is.get();
+ } else if (static_cast<char>(i) == '\n') { is.get(); }
+
+ if (!curr_row.empty()) data.push_back(curr_row);
+ if (data.empty()) {wei_gifo_x_dense_.resize(0,0); return;}
+ int NumRows = data.size(), NumCols = data[0].size();
+ wei_gifo_x_dense_.resize(NumRows, NumCols);
+ for (int r = 0; r < NumRows; r++) {
+ for (int c = 0; c < NumCols; c++) {
+ wei_gifo_x_dense_(r,c) = data[r][c];
+ }
+ }
+ } else if (static_cast<char>(i) == '\n' || static_cast<char>(i) == ';') {
+ is.get();
+ if (curr_row.size() != 0) {
+ data.push_back(curr_row);
+ curr_row.clear();
+ curr_row.reserve(data.back().size());
+ }
+ } else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number...
+ double real;
+ is >> real;
+ curr_row.push_back(real);
+ } else if (isspace(i)) {
+ is.get();
+ } else {
+ std::string str;
+ is >> str;
+ if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
+ !KALDI_STRCASECMP(str.c_str(), "infinity")) {
+ curr_row.push_back(std::numeric_limits<double>::infinity());
+ KALDI_WARN << "Reading infinite value into matrix.";
+ } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
+ curr_row.push_back(std::numeric_limits<double>::quiet_NaN());
+ KALDI_WARN << "Reading NaN value into matrix.";
+ } else {
+ if (str.length() > 20) str = str.substr(0, 17) + "...";
+ KALDI_ERR << "Expecting numeric matrix data, got " << str;
+ }
+ }
+ }
+ }
+ bad:
+ KALDI_ERR << "Failed to read dense matrix from stream";
+ }
+
+ void readDensem(std::istream & is, bool binary) {
+ if (binary) {
+ int32 NumRows, NumCols;
+
+ is.read(reinterpret_cast<char *>(&NumRows), sizeof(NumRows));
+ is.read(reinterpret_cast<char *>(&NumCols), sizeof(NumCols));
+ if (NumRows != wei_gifo_m_dense_.rows() || NumCols != wei_gifo_m_dense_.cols())
+ wei_gifo_m_dense_.resize(NumRows,NumCols);
+
+ double *data = new double[NumCols];
+ for (int i = 0; i < NumRows; i++) {
+ is.read(reinterpret_cast<char*>(data), sizeof(double)*NumCols);
+ if (is.fail()) goto bad;
+ for (int j = 0; j < NumCols; j++)
+ wei_gifo_m_dense_(i,j) = data[j];
+ }
+
+ if (is.eof()) return;
+ if (is.fail()) goto bad;
+ return;
+
+ } else {
+ std::string str;
+ is >> str;
+ if (str == "[]") { wei_gifo_m_dense_.resize(0, 0); return; }
+ std::vector<std::vector<double> > data;
+ std::vector<double> curr_row;
+ while(1) {
+ int i = is.peek();
+ if (static_cast<char>(i) == ']') {
+ is.get();
+ i = is.peek();
+ if (static_cast<char>(i) == '\r') {
+ is.get();
+ is.get();
+ } else if (static_cast<char>(i) == '\n') { is.get(); }
+
+ if (!curr_row.empty()) data.push_back(curr_row);
+ if (data.empty()) {wei_gifo_m_dense_.resize(0,0); return;}
+ int NumRows = data.size(), NumCols = data[0].size();
+ wei_gifo_m_dense_.resize(NumRows, NumCols);
+ for (int r = 0; r < NumRows; r++) {
+ for (int c = 0; c < NumCols; c++) {
+ wei_gifo_m_dense_(r,c) = data[r][c];
+ }
+ }
+ } else if (static_cast<char>(i) == '\n' || static_cast<char>(i) == ';') {
+ is.get();
+ if (curr_row.size() != 0) {
+ data.push_back(curr_row);
+ curr_row.clear();
+ curr_row.reserve(data.back().size());
+ }
+ } else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number...
+ double real;
+ is >> real;
+ curr_row.push_back(real);
+ } else if (isspace(i)) {
+ is.get();
+ } else {
+ std::string str;
+ is >> str;
+ if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
+ !KALDI_STRCASECMP(str.c_str(), "infinity")) {
+ curr_row.push_back(std::numeric_limits<double>::infinity());
+ KALDI_WARN << "Reading infinite value into matrix.";
+ } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
+ curr_row.push_back(std::numeric_limits<double>::quiet_NaN());
+ KALDI_WARN << "Reading NaN value into matrix.";
+ } else {
+ if (str.length() > 20) str = str.substr(0, 17) + "...";
+ KALDI_ERR << "Expecting numeric matrix data, got " << str;
+ }
+ }
+ }
+ }
+ bad:
+ KALDI_ERR << "Failed to read dense matrix from stream";
+ }
+
+ void readDense(std::istream & is, bool binary) {
+ readDensex(is, binary);
+ readDensem(is, binary);
+ }
+
+ void readSparsex(std::istream & is, bool binary) {
+ KALDI_LOG << "readSparsex...";
+ if (binary) {
+ int32 NumRows, NumCols, NonZeros;
+
+ is.read(reinterpret_cast<char *>(&NumRows), sizeof(NumRows));
+ is.read(reinterpret_cast<char *>(&NumCols), sizeof(NumCols));
+ if (NumRows != wei_gifo_x_sp_.rows() || NumCols != wei_gifo_x_sp_.cols())
+ wei_gifo_x_sp_.resize(NumRows,NumCols);
+
+ is.read(reinterpret_cast<char *>(&NonZeros), sizeof(NonZeros));
+// double *data = new double[2];
+ short col;
+ float data;
+ std::vector<T> tripletList;
+ int curr_i;
+ for (int i = 0; i < NonZeros; i++) {
+// is.read(reinterpret_cast<char*>(data), sizeof(double)*2);
+ is.read(reinterpret_cast<char*>(&col), sizeof(short));
+ is.read(reinterpret_cast<char*>(&data), sizeof(float));
+ if (is.fail()) goto bad;
+/*
+ if (data[0] == -1) {
+ if (data[1] == -1)
+ continue;
+ curr_i = data[1];
+ }
+ else tripletList.push_back(T(curr_i,data[0],data[1]));
+*/
+ if (col == -1) {
+ if (data == -1)
+ continue;
+ curr_i = data;
+ }
+ else tripletList.push_back(T(curr_i,(double)col,(double)data));
+ }
+ wei_gifo_x_sp_.setFromTriplets(tripletList.begin(), tripletList.end());
+ if (is.eof()) return;
+ if (is.fail()) goto bad;
+ return;
+
+ } else {
+ std::string str;
+ is >> str;
+ if (str == "[]") { wei_gifo_m_sp_.resize(0, 0); return; }
+ std::vector<std::vector<double> > data;
+ std::vector<double> curr_row;
+ while(1) {
+ int i = is.peek();
+ if (static_cast<char>(i) == ']') {
+ is.get();
+ i = is.peek();
+ if (static_cast<char>(i) == '\r') {
+ is.get();
+ is.get();
+ } else if (static_cast<char>(i) == '\n') { is.get(); }
+
+ if (!curr_row.empty()) data.push_back(curr_row);
+ if (data.empty()) {wei_gifo_m_sp_.resize(0,0); return;}
+ int NonZeros = data.size(), NumRows = data[0][2], NumCols = data[0][3];
+ wei_gifo_x_sp_.resize(NumRows, NumCols);
+ std::vector<T> tripletList;
+ int curr_r;
+ for (int r = 0; r < NonZeros; r++) {
+ if (data[r].size() != 1)
+ tripletList.push_back(T(curr_r, data[r][0],data[r][1]));
+ else
+ curr_r = data[r][0];
+ }
+ wei_gifo_x_sp_.setFromTriplets(tripletList.begin(), tripletList.end());
+ } else if (static_cast<char>(i) == '\n' || static_cast<char>(i) == ';') {
+ is.get();
+ if (curr_row.size() != 0) {
+ data.push_back(curr_row);
+ curr_row.clear();
+ curr_row.reserve(data.back().size());
+ }
+ } else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number...
+ double real;
+ is >> real;
+ curr_row.push_back(real);
+ } else if (isspace(i)) {
+ is.get();
+ } else {
+ std::string str;
+ is >> str;
+ if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
+ !KALDI_STRCASECMP(str.c_str(), "infinity")) {
+ curr_row.push_back(std::numeric_limits<double>::infinity());
+ KALDI_WARN << "Reading infinite value into matrix.";
+ } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
+ curr_row.push_back(std::numeric_limits<double>::quiet_NaN());
+ KALDI_WARN << "Reading NaN value into matrix.";
+ } else {
+ if (str.length() > 20) str = str.substr(0, 17) + "...";
+ KALDI_ERR << "Expecting numeric matrix data, got " << str;
+ }
+ }
+ }
+ return;
+ }
+ bad:
+ KALDI_ERR << "Failed to read sparse matrix from stream";
+ }
+
+ void readSparsem(std::istream & is, bool binary) {
+ KALDI_LOG << "readSparsem...";
+ if (binary) {
+ int32 NumRows, NumCols, NonZeros;
+
+ is.read(reinterpret_cast<char *>(&NumRows), sizeof(NumRows));
+ is.read(reinterpret_cast<char *>(&NumCols), sizeof(NumCols));
+ if (NumRows != wei_gifo_m_sp_.rows() || NumCols != wei_gifo_m_sp_.cols())
+ wei_gifo_m_sp_.resize(NumRows,NumCols);
+
+ is.read(reinterpret_cast<char *>(&NonZeros), sizeof(NonZeros));
+
+ // double *data = new double[3];
+ short col;
+ float data;
+ std::vector<T> tripletList;
+ int curr_i;
+ for (int i = 0; i < NonZeros; i++) {
+ // is.read(reinterpret_cast<char*>(data), sizeof(double)*2);
+ is.read(reinterpret_cast<char*>(&col), sizeof(short));
+ is.read(reinterpret_cast<char*>(&data), sizeof(float));
+ if (is.fail()) goto bad;
+ /*
+ if (data[0] == -1) {
+ if (data[1] == -1)
+ continue;
+ curr_i = data[1];
+ }
+ else tripletList.push_back(T(curr_i,data[0],data[1]));
+ */
+ if (col == -1) {
+ if (data == -1)
+ continue;
+ curr_i = data;
+ }
+ else tripletList.push_back(T(curr_i,(double)col,(double)data));
+ }
+ wei_gifo_m_sp_.setFromTriplets(tripletList.begin(), tripletList.end());
+
+ if (is.eof()) return;
+ if (is.fail()) goto bad;
+ return;
+
+ } else {
+ std::string str;
+ is >> str;
+ if (str == "[]") { wei_gifo_m_sp_.resize(0, 0); return; }
+ std::vector<std::vector<double> > data;
+ std::vector<double> curr_row;
+ while(1) {
+ int i = is.peek();
+ if (static_cast<char>(i) == ']') {
+ is.get();
+ i = is.peek();
+ if (static_cast<char>(i) == '\r') {
+ is.get();
+ is.get();
+ } else if (static_cast<char>(i) == '\n') { is.get(); }
+
+ if (!curr_row.empty()) data.push_back(curr_row);
+ if (data.empty()) {wei_gifo_m_sp_.resize(0,0); return;}
+ int NonZeros = data.size(), NumRows = data[0][3], NumCols = data[0][4];
+ wei_gifo_m_sp_.resize(NumRows, NumCols);
+ std::vector<T> tripletList;
+ int curr_r;
+ for (int r = 0; r < NonZeros; r++) {
+ if (data[r].size() != 1)
+ tripletList.push_back(T(curr_r, data[r][0],data[r][1]));
+ else
+ curr_r = data[r][0];
+ }
+ wei_gifo_m_sp_.setFromTriplets(tripletList.begin(), tripletList.end());
+ } else if (static_cast<char>(i) == '\n' || static_cast<char>(i) == ';') {
+ is.get();
+ if (curr_row.size() != 0) {
+ data.push_back(curr_row);
+ curr_row.clear();
+ curr_row.reserve(data.back().size());
+ }
+ } else if ( (i >= '0' && i <= '9') || i == '-' ) { // A number...
+ double real;
+ is >> real;
+ curr_row.push_back(real);
+ } else if (isspace(i)) {
+ is.get();
+ } else {
+ std::string str;
+ is >> str;
+ if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
+ !KALDI_STRCASECMP(str.c_str(), "infinity")) {
+ curr_row.push_back(std::numeric_limits<double>::infinity());
+ KALDI_WARN << "Reading infinite value into matrix.";
+ } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
+ curr_row.push_back(std::numeric_limits<double>::quiet_NaN());
+ KALDI_WARN << "Reading NaN value into matrix.";
+ } else {
+ if (str.length() > 20) str = str.substr(0, 17) + "...";
+ KALDI_ERR << "Expecting numeric matrix data, got " << str;
+ }
+ }
+ }
+ return;
+ }
+ bad:
+ KALDI_ERR << "Failed to read sparse matrix from stream";
+ }
+
+ void readSparse(std::istream & is, bool binary) {
+ KALDI_LOG << "readSparse...";
+ readSparsex(is, binary);
+ readSparsem(is, binary);
+ }
+
+ Lstm(int32 input_dim, int32 output_dim) :
+ TrainableLayer(input_dim, output_dim),
+ cell_dim_(output_dim),
+ learn_rate_coef_(1.0), max_grad_(0.0)
+ { }
+
+ ~Lstm()
+ { }
+
+ //Layer* Copy() const { return new Lstm(*this); }
+ Layer* Copy() const {
+ //KALDI_WARN<<"COPY";
+ return new Lstm(*this);
+ }
+ LayerType GetType() const { return l_Lstm; }
+ LayerType GetTypeNonParal() const { return l_Lstm; }
+
+ void InitData(std::istream &is) {
+ KALDI_LOG << "InitData()";
+ // define options
+ float param_range = 0.02, max_grad = 0.0;
+ float learn_rate_coef = 1.0;
+ float fgate_bias_init = 0.0; // the initial value for the bias of the forget gates
+ // parse config
+ std::string token;
+ while (!is.eof()) {
+ ReadToken(is, false, &token);
+ if (token == "<ParamRange>") ReadBasicType(is, false, ¶m_range);
+ else if (token == "<LearnRateCoef>") ReadBasicType(is, false, &learn_rate_coef);
+ else if (token == "<MaxGrad>") ReadBasicType(is, false, &max_grad);
+ else if (token == "<FgateBias>") ReadBasicType(is, false, &fgate_bias_init);
+ else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
+ << " (ParamRange|LearnRateCoef|BiasLearnRateCoef|MaxGrad)";
+ is >> std::ws; // eat-up whitespace
+ }
+
+ // initialize weights and biases
+ wei_gifo_x_dense_ = MatrixXd::Random(4 * cell_dim_, input_dim_)*param_range;
+ wei_gifo_x_sp_.resize(4 * cell_dim_, input_dim_);
+
+ // the weights connecting momory cell outputs with the units/gates
+ wei_gifo_m_dense_ = MatrixXd::Random(4 * cell_dim_, cell_dim_)*param_range;
+ wei_gifo_m_sp_.resize(4 * cell_dim_, cell_dim_);
+
+
+ // the bias for the units/gates
+ bias_.Resize(4 * cell_dim_); bias_.InitRandUniform(param_range);
+ if (fgate_bias_init != 0.0) { // reset the bias of the forget gates
+ bias_.Range(2 * cell_dim_, cell_dim_).Set(fgate_bias_init);
+ }
+ // peephole connections for i, f, and o, with diagonal matrices (vectors)
+ phole_i_c_.Resize(cell_dim_); phole_i_c_.InitRandUniform(param_range);
+ phole_f_c_.Resize(cell_dim_); phole_f_c_.InitRandUniform(param_range);
+ phole_o_c_.Resize(cell_dim_); phole_o_c_.InitRandUniform(param_range);
+
+ //
+ learn_rate_coef_ = learn_rate_coef;
+ max_grad_ = max_grad;
+
+ compressed_state = false;
+ }
+
+ void ReadData(std::istream &is, bool binary, bool decompress) {
+#ifdef ANDROIDLIB
+ LOGD("ReadData()");
+#endif
+ KALDI_LOG << "ReadData()";
+ compressed_state = decompress;
+ // optional learning-rate coefs
+ if ('<' == Peek(is, binary)) {
+ ExpectToken(is, binary, "<LearnRateCoef>");
+ ReadBasicType(is, binary, &learn_rate_coef_);
+ }
+ if ('<' == Peek(is, binary)) {
+ ExpectToken(is, binary, "<MaxGrad>");
+ ReadBasicType(is, binary, &max_grad_);
+ }
+// CuMatrix<BaseFloat> wei_gifo_x_, wei_gifo_m_;
+ int32 wei_x_rows, wei_x_cols, wei_m_rows, wei_m_cols;
+#if 0
+ if (!compressed_state) {
+ // read parameters
+ //wei_gifo_x_.Read(is, binary);
+ //wei_gifo_m_.Read(is, binary);
+ readDense(is, binary);
+ wei_x_rows = wei_gifo_x_dense_.rows(); wei_x_cols = wei_gifo_x_dense_.cols();
+ wei_m_rows = wei_gifo_m_dense_.rows(); wei_m_cols = wei_gifo_m_dense_.cols();
+ bias_.Read(is, binary);
+ phole_i_c_.Read(is, binary);
+ phole_f_c_.Read(is, binary);
+ phole_o_c_.Read(is, binary);
+// copyFromMat(wei_gifo_x_dense_, wei_gifo_x_);
+// copyFromMat(wei_gifo_m_dense_, wei_gifo_m_);
+
+
+ /*// yyLee: Check Quantized Values
+ KALDI_LOG << "Xr:" << wei_gifo_x_.NumRows() <<"Xc:"<< wei_gifo_x_.NumCols();
+ Vector<BaseFloat> weight_row_x(wei_gifo_x_.NumCols());
+ for(MatrixIndexT idx=0; idx<wei_gifo_x_.NumRows(); idx++) {
+ int nq=0, iq;
+ BaseFloat val[20];
+ weight_row_x.CopyRowFromMat(wei_gifo_x_.Mat(), idx);
+ for(MatrixIndexT i=0; i<wei_gifo_x_.NumCols(); i++) {
+ if (weight_row_x(i) != 0.0) {
+ iq = 0;
+ while(iq<nq && val[iq]!=weight_row_x(i)) iq++;
+ if (iq==nq) {
+ val[iq] = weight_row_x(i);
+ nq++;
+ }
+ }
+ }
+ if (nq != 16) KALDI_WARN << "exceed Q:" << nq << " @row " << idx;
+ } //end of yyLee */
+ }
+ else {
+ bias_.Read(is, binary);
+ phole_i_c_.Read(is, binary);
+ phole_f_c_.Read(is, binary);
+ phole_o_c_.Read(is, binary);
+// mask_x.Read(is, binary);
+// mask_m.Read(is, binary);
+// alphas_x.Read(is, binary);
+// alphas_m.Read(is, binary);
+// int32 quant_bits;
+// ReadBasicType(is, binary, &quant_bits);
+// for(int i = 0; i < quant_bits; i++){
+// tmp_arr_x.Read(is, binary);
+// tmp_arr_m.Read(is, binary);
+// betas_x.push_back(tmp_arr_x);
+// betas_m.push_back(tmp_arr_m);
+// }
+// wei_gifo_x_.Resize(4 * cell_dim_, input_dim_);
+// wei_gifo_m_.Resize(4 * cell_dim_, cell_dim_);
+// wei_gifo_x_.Decompress(mask_x, alphas_x, betas_x);
+// wei_gifo_m_.Decompress(mask_m, alphas_m, betas_m);
+// copyFromMat(wei_gifo_x_sp_, wei_gifo_x_);
+// copyFromMat(wei_gifo_m_sp_, wei_gifo_m_);
+ readSparse(is, binary);
+ wei_x_rows = wei_gifo_x_sp_.rows(); wei_x_cols = wei_gifo_x_sp_.cols();
+ wei_m_rows = wei_gifo_m_sp_.rows(); wei_m_cols = wei_gifo_m_sp_.cols();
+ }
+#endif
+ // hojun.jin add
+ bias_.Read(is, binary);
+ phole_i_c_.Read(is, binary);
+ phole_f_c_.Read(is, binary);
+ phole_o_c_.Read(is, binary);
+ readSparse(is, binary);
+ wei_x_rows = wei_gifo_x_sp_.rows(); wei_x_cols = wei_gifo_x_sp_.cols();
+ wei_m_rows = wei_gifo_m_sp_.rows(); wei_m_cols = wei_gifo_m_sp_.cols();
+
+
+ // initialize the buffer for gradients updates
+ wei_gifo_x_corr_.Resize(wei_x_rows, wei_x_cols);
+ wei_gifo_m_corr_.Resize(wei_m_rows, wei_m_cols);
+ bias_corr_ = bias_; bias_corr_.SetZero();
+ phole_i_c_corr_ = phole_i_c_; phole_i_c_corr_.SetZero();
+ phole_f_c_corr_ = phole_f_c_; phole_f_c_corr_.SetZero();
+ phole_o_c_corr_ = phole_o_c_; phole_o_c_corr_.SetZero();
+ //KALDI_WARN<<"Reset";
+ backup_buf_.Resize(cell_dim_); backup_buf_.SetZero();
+ backup_buf2_.Resize(cell_dim_); backup_buf2_.SetZero();
+
+ //cpubackup_buf_.Resize(cell_dim_); //cpubackup_buf_.SetZero();
+ }
+#if 0
+ void Pruning(const int prune_rate) {
+ KALDI_ASSERT(!compressed_state);
+ KALDI_LOG << "Pruning()";
+ KALDI_ASSERT(prune_rate <= 100 && prune_rate >= 0); // 100: uncompressed model has already pruned weights
+ CuMatrix<BaseFloat> wei_gifo_x_, wei_gifo_m_;
+ copyIntoMat(wei_gifo_x_dense_,&wei_gifo_x_);
+ copyIntoMat(wei_gifo_m_dense_,&wei_gifo_m_);
+ wei_gifo_x_.Prune(prune_rate, mask_x);
+ wei_gifo_m_.Prune(prune_rate, mask_m);
+ copyFromMat(wei_gifo_x_sp_, wei_gifo_x_);
+ copyFromMat(wei_gifo_m_sp_, wei_gifo_m_);
+ compressed_state = true;
+ }
+ void AlterQuantization(const int quant_bits, const bool do_prune) {
+ KALDI_ASSERT(!compressed_state);
+ KALDI_LOG << "AlterQuantization()";
+ KALDI_ASSERT(quant_bits >= 1);
+ CuMatrix<BaseFloat> wei_gifo_x_, wei_gifo_m_;
+ copyIntoMat(wei_gifo_x_dense_,&wei_gifo_x_);
+ copyIntoMat(wei_gifo_m_dense_,&wei_gifo_m_);
+ wei_gifo_x_.AlterQuant(quant_bits, do_prune, alphas_x, betas_x);
+ wei_gifo_m_.AlterQuant(quant_bits, do_prune, alphas_m, betas_m);
+ copyFromMat(wei_gifo_x_sp_, wei_gifo_x_);
+ copyFromMat(wei_gifo_m_sp_, wei_gifo_m_);
+ compressed_state = true;
+ }
+#endif
+
+#if 0
+ void WriteData(std::ostream &os, bool binary, bool compress) const {
+ KALDI_LOG << "WriteData()";
+ KALDI_ASSERT(compressed_state == compress);
+ WriteToken(os, binary, "<LearnRateCoef>");
+ WriteBasicType(os, binary, learn_rate_coef_);
+ WriteToken(os, binary, "<MaxGrad>");
+ WriteBasicType(os, binary, max_grad_);
+
+ // write parameters of the forward layer
+ if(!compressed_state) {
+ writeDense(os, binary);
+ bias_.Write(os, binary);
+ phole_i_c_.Write(os, binary);
+ phole_f_c_.Write(os, binary);
+ phole_o_c_.Write(os, binary);
+ }
+ else {
+ bias_.Write(os, binary);
+ phole_i_c_.Write(os, binary);
+ phole_f_c_.Write(os, binary);
+ phole_o_c_.Write(os, binary);
+ writeSparse(os, binary);
+ }
+ }
+ // print statistics of the parameters
+ std::string Info() const {
+ KALDI_LOG << "Info()";
+ //CuMatrix<BaseFloat> wei_gifo_x_, wei_gifo_m_;
+ //copyWei(&wei_gifo_x_, &wei_gifo_m_);
+ return std::string(" ") +
+ //"\n wei_gifo_x_ " + MomentStatistics(wei_gifo_x_) +
+ //"\n wei_gifo_m_ " + MomentStatistics(wei_gifo_m_) +
+ "\n bias_ " + MomentStatistics(bias_) +
+ "\n phole_i_c_ " + MomentStatistics(phole_i_c_) +
+ "\n phole_f_c_ " + MomentStatistics(phole_f_c_) +
+ "\n phole_o_c_ " + MomentStatistics(phole_o_c_);
+ }
+
+ // print statistics of the gradients buffer
+ std::string InfoGradient() const {
+ KALDI_LOG << "InfoGradient()";
+ return std::string(" ") +
+ "\n wei_gifo_x_corr_ " + MomentStatistics(wei_gifo_x_corr_) +
+ "\n wei_gifo_m_corr_ " + MomentStatistics(wei_gifo_m_corr_) +
+ "\n bias_corr_ " + MomentStatistics(bias_corr_) +
+ "\n phole_i_c_corr_ " + MomentStatistics(phole_i_c_corr_) +
+ "\n phole_f_c_corr_ " + MomentStatistics(phole_f_c_corr_) +
+ "\n phole_o_c_corr_ " + MomentStatistics(phole_o_c_corr_);
+ }
+#endif
+//yhohan add
+ void SetBackupBuf(){
+ //KALDI_WARN<<"Setzero";
+ backup_buf_.Resize(cell_dim_);
+ backup_buf2_.Resize(cell_dim_);
+ backup_buf_.SetZero();
+ backup_buf2_.SetZero();
+ //KALDI_WARN<<backup_buf_;
+ //KALDI_WARN<<backup_buf2_;
+ }
+ // the feedforward pass
+ void PropagateFnc(const CuMatrixBase<BaseFloat> &in, CuMatrixBase<BaseFloat> *out) {
+ //KALDI_LOG << "Propagate in eigen-layer";
+//#define IN_MAT
+#ifdef IN_MAT
+ Matrix<BaseFloat> temp_mat;
+ temp_mat.Resize(in.NumRows(), in.NumCols());
+ temp_mat.CopyFromMat(in);
+ std::cout<<"in "<<temp_mat;
+ exit(1);
+#endif
+ MatrixXd in_mat; copyFromMat(in_mat, in);
+ MatrixXd bias_mat; copyFromRowVec(bias_mat, bias_);
+
+ int32 T = in.NumRows(); // total number of frames
+ // resize & clear propagation buffers. [0] - the initial states with all the values to be 0
+ // [1, T] - correspond to the inputs [T+1] - not used; for alignment with the backward layer
+ propagate_buf_.Resize(T + 2, 7 * cell_dim_, kSetZero);
+ MatrixXd propagate_buf_mat; copyFromMat(propagate_buf_mat, propagate_buf_);
+
+ Ref<MatrixXd> YG = propagate_buf_mat.middleCols(0, cell_dim_);
+ Ref<MatrixXd> YI = propagate_buf_mat.middleCols(1 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YF = propagate_buf_mat.middleCols(2 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YO = propagate_buf_mat.middleCols(3 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YC = propagate_buf_mat.middleCols(4 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YH = propagate_buf_mat.middleCols(5 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YM = propagate_buf_mat.middleCols(6 * cell_dim_, cell_dim_);
+
+ Ref<MatrixXd> YGIFO = propagate_buf_mat.middleCols(0, 4 * cell_dim_);
+
+ // kykim
+ //KALDI_WARN<<"from backup"<<backup_buf_;
+ //KALDI_WARN<<backup_buf_;
+ //Vector<BaseFloat> temp;
+ //temp.Resize(backup_buf_.Dim());
+ //temp.CopyFromVec(backup_buf_);
+ //KALDI_WARN<<temp(0)<<" "<<temp(1);
+ //KALDI_WARN<<temp;
+ //KALDI_WARN<<temp;
+ //KALDI_WARN<<"cpubackup_buf_"<<cpubackup_buf_(0);
+ copyRowFromVec(YM,backup_buf_,0);
+ copyRowFromVec(YC,backup_buf2_,0);
+
+ // no recurrence involved in the inputs
+ if (compressed_state) {
+ //KALDI_ASSERT(in_mat.cols() == wei_gifo_x_sp_.cols()
+ //&& in_mat.rows() == T && wei_gifo_x_sp_.rows() == YGIFO.cols());
+ YGIFO.middleRows(1,T) = (in_mat * wei_gifo_x_sp_.transpose());
+ } else {
+ //KALDI_ASSERT(in_mat.cols() == wei_gifo_x_dense_.cols()
+ //&& in_mat.rows() == T && wei_gifo_x_dense_.rows() == YGIFO.cols());
+ YGIFO.middleRows(1,T) = (in_mat * wei_gifo_x_dense_.transpose());
+ }
+
+ //KALDI_ASSERT(bias_mat.size() == YGIFO.cols());
+ YGIFO.middleRows(1,T) += MatrixXd::Constant(T,1,1) * bias_mat;
+
+ for (int t = 1; t <= T; t++) {
+ // variables representing invidivual units/gates. we additionally use the Matrix forms
+ // because we want to take advantage of the Mat.Sigmoid and Mat.Tanh function.
+ Ref<MatrixXd> YG_t = YG.middleRows(t,1);
+ Ref<MatrixXd> YI_t = YI.middleRows(t,1);
+ Ref<MatrixXd> YF_t = YF.middleRows(t,1);
+ Ref<MatrixXd> YO_t = YO.middleRows(t,1);
+ Ref<MatrixXd> YC_t = YC.middleRows(t,1);
+ Ref<MatrixXd> YH_t = YH.middleRows(t,1);
+ Ref<MatrixXd> YM_t = YM.middleRows(t,1);
+ Ref<MatrixXd> YGIFO_t = YGIFO.middleRows(t,1);
+ // add the recurrence of the previous memory cell to various gates/units
+ if (compressed_state) {
+ //KALDI_ASSERT(wei_gifo_m_sp_.cols() == YM.cols() && YGIFO_t.cols() == wei_gifo_m_sp_.rows());
+ YGIFO_t += YM.middleRows(t-1,1) * wei_gifo_m_sp_.transpose();
+ } else {
+ //KALDI_ASSERT(wei_gifo_m_dense_.cols() == YM.cols() && YGIFO_t.cols() == wei_gifo_m_dense_.rows());
+ YGIFO_t += YM.middleRows(t-1,1) * wei_gifo_m_dense_.transpose();
+ }
+ //temp.Resize(y_gifo.Dim()); temp.CopyFromVec(y_gifo);
+ //std::cout<<"y_gifo "<<temp;
+
+ Ref<MatrixXd> YC_t1 = YC.middleRows(t-1,1);
+
+ // input gate
+ Vector<BaseFloat> phole_i_c_vec(phole_i_c_.Vec());
+ //KALDI_ASSERT(phole_i_c_vec.Dim() == YC.cols() && YI_t.cols() == phole_i_c_vec.Dim());
+ for (int i = 0; i < YI_t.cols(); i++)
+ YI_t(0,i) += phole_i_c_vec(i) * YC_t1(0,i);
+ //temp.Resize(y_i.Dim()); temp.CopyFromVec(y_i);
+ //std::cout<<"y_i "<<temp;
+ // forget gate
+ Vector<BaseFloat> phole_f_c_vec(phole_f_c_.Vec());
+ //KALDI_ASSERT(phole_f_c_vec.Dim() == YC.cols() && YF_t.cols() == phole_f_c_vec.Dim());
+ for (int i = 0; i < YF_t.cols(); i++)
+ YF_t(0,i) += phole_f_c_vec(i) * YC_t1(0,i);
+ // temp.Resize(y_f.Dim()); temp.CopyFromVec(y_f);
+ //std::cout<<"y_f "<<temp;
+ // apply sigmoid/tanh functionis to squash the outputs
+
+ //add kykim
+ // y_f.Add(1.0); // for cudnn model(remove peephole)
+ //temp.Resize(y_f.Dim()); temp.CopyFromVec(y_f);
+ //std::cout<<"y_f "<<temp;
+
+ sigmoidMat(YI_t,YI_t);
+ sigmoidMat(YF_t,YF_t);
+ tanhMat(YG_t,YG_t);
+ // memory cell
+ //KALDI_ASSERT(YG_t.size() == YI_t.size() && YC_t.size() == YG_t.size() && YC_t.size() == YF_t.size());
+ for (int i = 0; i < YC_t.size(); i++)
+ YC_t(0,i) = (YI_t(0,i) * YG_t(0,i)) + (YF_t(0,i) * YC_t1(0,i));
+
+ // h - the tanh-squashed version of c
+ tanhMat(YC_t,YH_t);
+
+ // output gate
+ Vector<BaseFloat> phole_o_c_vec(phole_o_c_.Vec());
+ //KALDI_ASSERT(phole_o_c_vec.Dim() == YC_t.size() && YO_t.size() == phole_o_c_vec.Dim());
+ for (int i = 0; i < YO_t.size(); i++)
+ YO_t(0,i) += phole_o_c_vec(i) * YC_t(0,i);
+ sigmoidMat(YO_t,YO_t);
+
+ // finally the outputs
+ //KALDI_ASSERT(YO_t.size() == YH_t.size() && YM_t.size() == YO_t.size());
+ for (int i = 0; i < YM_t.size(); i++)
+ YM_t(0,i) = (YO_t(0,i) * YH_t(0,i));
+ } // end of loop t
+ copyIntoMat(YM.middleRows(1,T),out);
+ copyIntoMat(propagate_buf_mat, &propagate_buf_);
+//#define OUT_MAT
+#ifdef OUT_MAT
+ Matrix<BaseFloat> temp_mat;
+ temp_mat.Resize(propagate_buf_.NumRows(), propagate_buf_.NumCols());
+ temp_mat.CopyFromMat(propagate_buf_);
+ std::cout<<"out "<<temp_mat;
+ exit(1);
+#endif
+ //temp_out.Resize(out(1)->Dim());
+ //KALDI_WARN<<temp[1];
+
+ // kykim
+ backup_buf_.Resize(cell_dim_);
+ copyIntoVec(YM.middleRows(T,1), backup_buf_);
+
+ backup_buf2_.Resize(cell_dim_);
+ copyIntoVec(YC.middleRows(T,1), backup_buf2_);
+
+
+ //temp.Resize(backup_buf2_.Dim());
+ //temp.CopyFromVec(backup_buf2_);
+
+ //KALDI_WARN<<temp(0)<<" "<<temp(1);
+ //std::cout<<"backup_buf2_ "<<temp;
+ //KALDI_WARN<<"cpubackup_buf_"<<cpubackup_buf_(0);
+
+ //KALDI_WARN<<"copy backup"<<backup_buf_;
+ //KALDI_WARN<<&backup_buf_;
+ }
+#if 0
+ // the back-propagation pass
+ void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in, const CuMatrixBase<BaseFloat> &out,
+ const CuMatrixBase<BaseFloat> &out_diff, CuMatrixBase<BaseFloat> *in_diff) {
+ KALDI_LOG << "Backpropagate in eigen-layer";
+
+ int32 T = in.NumRows();
+ // initialize the back-propagation buffer
+ backpropagate_buf_.Resize(T + 2, 7 * cell_dim_, kSetZero);
+ MatrixXd propagate_buf_mat; copyFromMat(propagate_buf_mat, propagate_buf_);
+ MatrixXd backpropagate_buf_mat; copyFromMat(backpropagate_buf_mat, backpropagate_buf_);
+
+ // get the activations of the gates/units from the feedforward buffer; these variabiles will be used
+ // in gradients computation
+ Ref<MatrixXd> YG = propagate_buf_mat.middleCols(0, cell_dim_);
+ Ref<MatrixXd> YI = propagate_buf_mat.middleCols(1 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YF = propagate_buf_mat.middleCols(2 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YO = propagate_buf_mat.middleCols(3 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YC = propagate_buf_mat.middleCols(4 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YH = propagate_buf_mat.middleCols(5 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> YM = propagate_buf_mat.middleCols(6 * cell_dim_, cell_dim_);
+
+ // errors back-propagated to individual gates/units
+ Ref<MatrixXd> DG = backpropagate_buf_mat.middleCols(0, cell_dim_);
+ Ref<MatrixXd> DI = backpropagate_buf_mat.middleCols(1 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> DF = backpropagate_buf_mat.middleCols(2 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> DO = backpropagate_buf_mat.middleCols(3 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> DC = backpropagate_buf_mat.middleCols(4 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> DH = backpropagate_buf_mat.middleCols(5 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> DM = backpropagate_buf_mat.middleCols(6 * cell_dim_, cell_dim_);
+ Ref<MatrixXd> DGIFO = backpropagate_buf_mat.middleCols(0, 4 * cell_dim_);
+
+ MatrixXd tmp_mat;
+ copyFromMat(tmp_mat, out_diff);
+ DM.middleRows(1,T) = tmp_mat;
+
+ for (int t = T; t >= 1; t--) {
+ // variables representing activations of invidivual units/gates
+ Ref<MatrixXd> YG_t = YG.middleRows(t,1);
+ Ref<MatrixXd> YI_t = YI.middleRows(t,1);
+ Ref<MatrixXd> YF_t = YF.middleRows(t,1);
+ Ref<MatrixXd> YO_t = YO.middleRows(t,1);
+ Ref<MatrixXd> YC_t = YC.middleRows(t,1);
+ Ref<MatrixXd> YH_t = YH.middleRows(t,1);
+ Ref<MatrixXd> YM_t = YM.middleRows(t,1);
+
+ // variables representing errors of invidivual units/gates
+ Ref<MatrixXd> DG_t = DG.middleRows(t,1);
+ Ref<MatrixXd> DI_t = DI.middleRows(t,1);
+ Ref<MatrixXd> DF_t = DF.middleRows(t,1);
+ Ref<MatrixXd> DO_t = DO.middleRows(t,1);
+ Ref<MatrixXd> DC_t = DC.middleRows(t,1);
+ Ref<MatrixXd> DH_t = DH.middleRows(t,1);
+ Ref<MatrixXd> DM_t = DM.middleRows(t,1);
+
+ // d_m comes from two parts: errors from the upper layer and errors from the following frame (t+1)
+ if (compressed_state) {
+ KALDI_ASSERT(wei_gifo_m_sp_.rows() == DGIFO.cols() && DM_t.size() == wei_gifo_m_sp_.cols());
+ DM_t += wei_gifo_m_sp_.transpose() * DGIFO.middleRows(t+1,1).transpose();
+ } else {
+ KALDI_ASSERT(wei_gifo_m_dense_.rows() == DGIFO.cols() && DM_t.size() == wei_gifo_m_dense_.cols());
+ DM_t += wei_gifo_m_dense_.transpose() * DGIFO.row(t+1);
+ }
+
+ // d_h
+ KALDI_ASSERT(YO_t.size() == DM_t.size() && DM_t.size() == DH_t.size());
+ for (int i = 0; i < DH_t.size(); i++)
+ DH_t(0,i) = (YO_t(0,i) * DM_t(0,i));
+ difftanhMat(DH_t, YH_t, DH_t);
+
+ // d_o - output gate
+ KALDI_ASSERT(YH_t.size() == DM_t.size() && DM_t.size() == DO_t.size());
+ for (int i = 0; i < DO_t.size(); i++)
+ DO_t(0,i) = (YH_t(0,i) * DM_t(0,i));
+ diffsigmoidMat(DO_t, YO_t, DO_t);
+
+ // d_c - memory cell
+ Vector<BaseFloat> phole_o_c_vec(phole_o_c_.Vec());
+ Vector<BaseFloat> phole_f_c_vec(phole_f_c_.Vec());
+ Vector<BaseFloat> phole_i_c_vec(phole_i_c_.Vec());
+ KALDI_ASSERT(DC_t.size() == DH_t.size()
+ && phole_o_c_vec.Dim() == DO_t.size() && DC_t.size() == phole_o_c_vec.Dim()
+ && DC.cols() == YF.cols() && DC_t.size() == YF.cols()
+ && phole_f_c_vec.Dim() == DF.cols() && DC_t.size() == phole_f_c_vec.Dim()
+ && phole_i_c_vec.Dim() == DI.cols() && DC_t.size() == phole_i_c_vec.Dim());
+ for (int i = 0; i < DC_t.size(); i++)
+ DC_t(0,i) += (DH_t(0,i))
+ + (phole_o_c_vec(i) * DO_t(0,i))
+ + (YF(t+1,i) * DC(t+1,i))
+ + (phole_f_c_vec(i) * DF(t+1,i))
+ + (phole_i_c_vec(i) * DI(t+1,i));
+
+ // d_f - forge gate
+ KALDI_ASSERT(YC.cols() == DC_t.size() && DC_t.size() == DF_t.size());
+ for (int i = 0; i < DF_t.size(); i++)
+ DF_t(0,i) = (YC(t-1,i) * DC_t(0,i));
+ diffsigmoidMat(DF_t, YF_t, DF_t);
+
+ // d_i - input gate
+ KALDI_ASSERT(YG_t.size() == DC_t.size() && DI_t.size() == DC_t.size());
+ for (int i = 0; i < DI_t.size(); i++)
+ DI_t(0,i) = (YG_t(0,i) * DC_t(0,i));
+ diffsigmoidMat(DI_t, YI_t, DI_t);
+
+ // d_g
+ KALDI_ASSERT(YI_t.size() == DC_t.size() && DC_t.size() == DG_t.size());
+ for (int i = 0; i < DG_t.size(); i++)
+ DG_t(0,i) = (YI_t(0,i) * DC_t(0,i));
+ difftanhMat(DG_t, YG_t, DG_t);
+ } // end of t
+ copyIntoMat(propagate_buf_mat, &propagate_buf_);
+ copyIntoMat(backpropagate_buf_mat, &backpropagate_buf_);
+ // errors back-propagated to the inputs
+ MatrixXd in_diff_mat;
+ if (compressed_state) {
+ KALDI_ASSERT(DGIFO.cols() == wei_gifo_x_sp_.rows());
+ in_diff_mat = DGIFO.middleRows(1,T) * wei_gifo_x_sp_;
+ } else {
+ KALDI_ASSERT(DGIFO.cols() == wei_gifo_x_dense_.rows());
+ in_diff_mat = DGIFO.middleRows(1,T) * wei_gifo_x_dense_;
+ }
+ copyIntoMat(in_diff_mat, in_diff);
+ // updates to the model parameters
+ const BaseFloat mmt = opts_.momentum;
+ CuMatrix<BaseFloat> DGIFO_T; copyIntoMat(DGIFO.middleRows(1,T),&DGIFO_T);
+ CuMatrix<BaseFloat> YM_T0; copyIntoMat(YM.middleRows(0,T),&YM_T0);
+ CuMatrix<BaseFloat> YC_T0; copyIntoMat(YC.middleRows(0,T),&YC_T0);
+ CuMatrix<BaseFloat> YC_T1; copyIntoMat(YC.middleRows(1,T),&YC_T1);
+ CuMatrix<BaseFloat> DI_T; copyIntoMat(DI.middleRows(1,T),&DI_T);
+ CuMatrix<BaseFloat> DF_T; copyIntoMat(DF.middleRows(1,T),&DF_T);
+ CuMatrix<BaseFloat> DO_T; copyIntoMat(DO.middleRows(1,T),&DO_T);
+
+
+ wei_gifo_x_corr_.AddMatMat(1.0, DGIFO_T, kTrans, in, kNoTrans, mmt);
+ wei_gifo_m_corr_.AddMatMat(1.0, DGIFO_T, kTrans, YM_T0, kNoTrans, mmt);
+ bias_corr_.AddRowSumMat(1.0, DGIFO_T, mmt);
+ phole_i_c_corr_.AddDiagMatMat(1.0, DI_T, kTrans, YC_T0, kNoTrans, mmt);
+ phole_f_c_corr_.AddDiagMatMat(1.0, DF_T, kTrans, YC_T0, kNoTrans, mmt);
+ phole_o_c_corr_.AddDiagMatMat(1.0, DO_T, kTrans, YC_T1, kNoTrans, mmt);
+ }
+#endif
+ void Update(const CuMatrixBase<BaseFloat> &input, const CuMatrixBase<BaseFloat> &diff) {
+ KALDI_LOG << "Update()";
+ // clip gradients
+ if (max_grad_ > 0) {
+ wei_gifo_x_corr_.ApplyFloor(-max_grad_); wei_gifo_x_corr_.ApplyCeiling(max_grad_);
+ wei_gifo_m_corr_.ApplyFloor(-max_grad_); wei_gifo_m_corr_.ApplyCeiling(max_grad_);
+ bias_corr_.ApplyFloor(-max_grad_); bias_corr_.ApplyCeiling(max_grad_);
+ phole_i_c_corr_.ApplyFloor(-max_grad_); phole_i_c_corr_.ApplyCeiling(max_grad_);
+ phole_f_c_corr_.ApplyFloor(-max_grad_); phole_f_c_corr_.ApplyCeiling(max_grad_);
+ phole_o_c_corr_.ApplyFloor(-max_grad_); phole_o_c_corr_.ApplyCeiling(max_grad_);
+ }
+
+ // update parameters
+ const BaseFloat lr = opts_.learn_rate * learn_rate_coef_;
+
+ MatrixXd wei_gifo_x_corr_mat; copyFromMat(wei_gifo_x_corr_mat, wei_gifo_x_corr_);
+ MatrixXd wei_gifo_m_corr_mat; copyFromMat(wei_gifo_m_corr_mat, wei_gifo_m_corr_);
+ if (compressed_state) {
+ Eigen::SparseMatrix<double, RowMajor> wei_gifo_x_corr_mat;
+ copyFromMat(wei_gifo_x_corr_mat, wei_gifo_x_corr_);
+ Eigen::SparseMatrix<double, RowMajor> wei_gifo_m_corr_mat;
+ copyFromMat(wei_gifo_m_corr_mat, wei_gifo_m_corr_);
+ wei_gifo_x_sp_ += -lr*wei_gifo_x_corr_mat;
+ wei_gifo_m_sp_ += -lr*wei_gifo_m_corr_mat;
+ } else {
+ MatrixXd wei_gifo_x_corr_mat; copyFromMat(wei_gifo_x_corr_mat, wei_gifo_x_corr_);
+ MatrixXd wei_gifo_m_corr_mat; copyFromMat(wei_gifo_m_corr_mat, wei_gifo_m_corr_);
+ wei_gifo_x_dense_ += -lr*wei_gifo_x_corr_mat;
+ wei_gifo_m_dense_ += -lr*wei_gifo_m_corr_mat;
+ }
+ bias_.AddVec(-lr, bias_corr_, 1.0);
+ phole_i_c_.AddVec(-lr, phole_i_c_corr_, 1.0);
+ phole_f_c_.AddVec(-lr, phole_f_c_corr_, 1.0);
+ phole_o_c_.AddVec(-lr, phole_o_c_corr_, 1.0);
+
+ }
+
+ void Scale(BaseFloat scale) {
+ KALDI_LOG << "Scale()";
+ if (compressed_state) {
+ wei_gifo_x_sp_ *= scale;
+ wei_gifo_m_sp_ *= scale;
+ } else {
+ wei_gifo_x_dense_ *= scale;
+ wei_gifo_m_dense_ *= scale;
+ }
+ bias_.Scale(scale);
+ phole_i_c_.Scale(scale);
+ phole_f_c_.Scale(scale);
+ phole_o_c_.Scale(scale);
+ }
+
+ void Add(BaseFloat scale, const TrainableLayer & layer_other) {
+ KALDI_LOG << "Add()";
+ const Lstm *other = dynamic_cast<const Lstm*>(&layer_other);
+ if (compressed_state) {
+ if (other->compressed_state) {
+ wei_gifo_x_sp_ += scale*other->wei_gifo_x_sp_;
+ wei_gifo_m_sp_ += scale*other->wei_gifo_m_sp_;
+ } else {
+ wei_gifo_x_dense_ = scale*other->wei_gifo_x_dense_;
+ wei_gifo_x_dense_ += wei_gifo_x_sp_;
+
+ wei_gifo_m_dense_ = scale*other->wei_gifo_m_dense_;
+ wei_gifo_m_dense_ += wei_gifo_m_sp_;
+
+ compressed_state = false;
+ }
+ } else {
+ if (other->compressed_state) {
+ wei_gifo_x_dense_ += scale*other->wei_gifo_x_sp_;
+ wei_gifo_m_dense_ += scale*other->wei_gifo_m_sp_;
+ } else {
+ wei_gifo_x_dense_ += scale*other->wei_gifo_x_dense_;
+ wei_gifo_m_dense_ += scale*other->wei_gifo_m_dense_;
+ }
+ }
+ bias_.AddVec(scale, other->bias_);
+ phole_i_c_.AddVec(scale, other->phole_i_c_);
+ phole_f_c_.AddVec(scale, other->phole_f_c_);
+ phole_o_c_.AddVec(scale, other->phole_o_c_);
+ }
+
+ int32 NumParams() const {
+ int32 x_size = 0, m_size = 0;
+ /*if (compressed_state) {
+ x_size = wei_gifo_x_sp_.size();
+ m_size = wei_gifo_m_sp_.size();
+ } else {
+ x_size = wei_gifo_x_dense_.size();
+ m_size = wei_gifo_m_dense_.size();
+ }*/
+ return x_size +
+ m_size +
+ bias_.Dim() +
+ phole_i_c_.Dim() +
+ phole_f_c_.Dim() +
+ phole_o_c_.Dim();
+ }
+
+ void GetParams(Vector<BaseFloat>* wei_copy) const {
+ //KALDI_WARN<<"Here?";
+ wei_copy->Resize(NumParams());
+ int32 offset = 0, size;
+ // copy parameters of the forward sub-layer
+ //CuMatrix<BaseFloat> wei_gifo_x_, wei_gifo_m_;
+ //copyWei(&wei_gifo_x_, &wei_gifo_m_);
+ //size = wei_gifo_x_.NumRows() * wei_gifo_x_.NumCols();
+ //wei_copy->Range(offset, size).CopyRowsFromMat(wei_gifo_x_); offset += size;
+ //size = wei_gifo_m_.NumRows() * wei_gifo_m_.NumCols();
+ //wei_copy->Range(offset, size).CopyRowsFromMat(wei_gifo_m_); offset += size;
+ size = bias_.Dim();
+ wei_copy->Range(offset, size).CopyFromVec(bias_); offset += size;
+ size = phole_i_c_.Dim();
+ wei_copy->Range(offset, size).CopyFromVec(phole_i_c_); offset += size;
+ size = phole_f_c_.Dim();
+ wei_copy->Range(offset, size).CopyFromVec(phole_f_c_); offset += size;
+ size = phole_o_c_.Dim();
+ wei_copy->Range(offset, size).CopyFromVec(phole_o_c_); offset += size;
+ }
+
+ //CuVector<BaseFloat> backup_buf_;
+//private:
+protected:
+ bool compressed_state;
+
+ int32 cell_dim_;
+ BaseFloat learn_rate_coef_;
+ BaseFloat max_grad_;
+
+ // parameters of the forward layer
+ //CuMatrix<BaseFloat> wei_gifo_x_;
+ //CuMatrix<BaseFloat> wei_gifo_m_;
+ CuVector<BaseFloat> bias_;
+ CuVector<BaseFloat> phole_i_c_;
+ CuVector<BaseFloat> phole_f_c_;
+ CuVector<BaseFloat> phole_o_c_;
+ // parameters for compression
+ CuArray<uint32> mask_x;
+ CuArray<uint32> mask_m;
+ // Assume 6bit quantization
+ CuVector<BaseFloat> alphas_x;
+ CuVector<BaseFloat> alphas_m;
+ CuArray<uint32> tmp_arr_m;
+ CuArray<uint32> tmp_arr_x;
+ std::vector<CuArray<uint32>> betas_x;
+ std::vector<CuArray<uint32>> betas_m;
+ // the corresponding parameter updates
+ CuMatrix<BaseFloat> wei_gifo_x_corr_;
+ CuMatrix<BaseFloat> wei_gifo_m_corr_;
+ CuVector<BaseFloat> bias_corr_;
+ CuVector<BaseFloat> phole_i_c_corr_;
+ CuVector<BaseFloat> phole_f_c_corr_;
+ CuVector<BaseFloat> phole_o_c_corr_;
+
+ MatrixXd wei_gifo_x_dense_;
+ Eigen::SparseMatrix<double,RowMajor> wei_gifo_x_sp_;
+ MatrixXd wei_gifo_m_dense_;
+ Eigen::SparseMatrix<double,RowMajor> wei_gifo_m_sp_;
+
+
+ // propagation buffer
+ CuMatrix<BaseFloat> propagate_buf_;
+
+ // back-propagation buffer
+ CuMatrix<BaseFloat> backpropagate_buf_;
+
+ // back up for online
+ CuVector<BaseFloat> backup_buf_;
+ CuVector<BaseFloat> backup_buf2_;
+
+};
+} // namespace kaldi
+
+#endif