TestGradientBasedSolver: replace dummy data with hdf5
authorEvan Shelhamer <shelhamer@imaginarynumber.net>
Sat, 8 Aug 2015 20:07:13 +0000 (13:07 -0700)
committerEvan Shelhamer <shelhamer@imaginarynumber.net>
Sat, 8 Aug 2015 21:23:31 +0000 (14:23 -0700)
Rely on fixed hdf5 data for determinism of solver tests.

- draw random Gaussian data and targets for test and save to hdf5
- use the same data for all tests without constant / gaussian condition
  previously needed for accumulation
- avoid test artifacts due to order of random draws in dummy data

src/caffe/test/test_data/generate_sample_data.py
src/caffe/test/test_data/solver_data.h5 [new file with mode: 0644]
src/caffe/test/test_data/solver_data_list.txt [new file with mode: 0644]
src/caffe/test/test_gradient_based_solver.cpp

index ab55726..3703b41 100644 (file)
@@ -1,5 +1,5 @@
 """
-Generate data used in the HDF5DataLayer test.
+Generate data used in the HDF5DataLayer and GradientBasedSolver tests.
 """
 import os
 import numpy as np
@@ -7,6 +7,8 @@ import h5py
 
 script_dir = os.path.dirname(os.path.abspath(__file__))
 
+# Generate HDF5DataLayer sample_data.h5
+
 num_cols = 8
 num_rows = 10
 height = 6
@@ -51,3 +53,27 @@ with h5py.File(script_dir + '/sample_data_2_gzip.h5', 'w') as f:
 with open(script_dir + '/sample_data_list.txt', 'w') as f:
     f.write(script_dir + '/sample_data.h5\n')
     f.write(script_dir + '/sample_data_2_gzip.h5\n')
+
+# Generate GradientBasedSolver solver_data.h5
+
+num_cols = 3
+num_rows = 8
+height = 10
+width = 10
+
+data = np.random.randn(num_rows, num_cols, height, width)
+data = data.reshape(num_rows, num_cols, height, width)
+data = data.astype('float32')
+
+targets = np.random.randn(num_rows, 1)
+targets = targets.astype('float32')
+
+print data
+print targets
+
+with h5py.File(script_dir + '/solver_data.h5', 'w') as f:
+    f['data'] = data
+    f['targets'] = targets
+
+with open(script_dir + '/solver_data_list.txt', 'w') as f:
+    f.write(script_dir + '/solver_data.h5\n')
diff --git a/src/caffe/test/test_data/solver_data.h5 b/src/caffe/test/test_data/solver_data.h5
new file mode 100644 (file)
index 0000000..7ee05ea
Binary files /dev/null and b/src/caffe/test/test_data/solver_data.h5 differ
diff --git a/src/caffe/test/test_data/solver_data_list.txt b/src/caffe/test/test_data/solver_data_list.txt
new file mode 100644 (file)
index 0000000..a6552f5
--- /dev/null
@@ -0,0 +1 @@
+src/caffe/test/test_data/solver_data.h5
index 30b041f..a8b211b 100644 (file)
@@ -25,15 +25,26 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
  protected:
   GradientBasedSolverTest() :
       seed_(1701), num_(4), channels_(3), height_(10), width_(10),
-      constant_data_(false), share_(false) {}
+      share_(false) {
+        input_file_ = new string(
+        CMAKE_SOURCE_DIR "caffe/test/test_data/solver_data_list.txt" CMAKE_EXT);
+      }
+  ~GradientBasedSolverTest() {
+    delete input_file_;
+  }
 
   string snapshot_prefix_;
   shared_ptr<SGDSolver<Dtype> > solver_;
   int seed_;
+  // Dimensions are determined by generate_sample_data.py
+  // TODO this is brittle and the hdf5 file should be checked instead.
   int num_, channels_, height_, width_;
-  bool constant_data_, share_;
+  bool share_;
   Dtype delta_;  // Stability constant for AdaGrad.
 
+  // Test data: check out generate_sample_data.py in the same directory.
+  string* input_file_;
+
   virtual SolverParameter_SolverType solver_type() = 0;
   virtual void InitSolver(const SolverParameter& param) = 0;
 
@@ -71,25 +82,10 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
        "  name: 'TestNetwork' "
        "  layer { "
        "    name: 'data' "
-       "    type: 'DummyData' "
-       "    dummy_data_param { "
-       "      num: " << num_ / iter_size << " "
-       "      channels: " << channels_ << " "
-       "      height: " << height_ << " "
-       "      width: " << width_ << " "
-       "      channels: 1 "
-       "      height: 1 "
-       "      width: 1 "
-       "      data_filler { "
-       "        type: '" << string(constant_data_ ? "constant" : "gaussian")
-                         << "' "
-       "        std: 1.0 "
-       "        value: 1.0 "
-       "      } "
-       "      data_filler { "
-       "        type: 'gaussian' "
-       "        std: 1.0 "
-       "      } "
+       "    type: 'HDF5Data' "
+       "    hdf5_data_param { "
+       "      source: '" << *(this->input_file_) << "' "
+       "      batch_size: " << num_ / iter_size << " "
        "    } "
        "    top: 'data' "
        "    top: 'targets' "
@@ -354,7 +350,6 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
       const Dtype kMomentum, const int kNumIters, const int kIterSize) {
     const double kPrecision = 1e-2;
     const double kMinPrecision = 1e-7;
-    constant_data_ = true;
     // Solve without accumulation and save parameters.
     this->RunLeastSquaresSolver(kLearningRate, kWeightDecay, kMomentum,
         kNumIters);