add dropout during eval (#17549)
authorHuan Gui <huangui@fb.com>
Fri, 1 Mar 2019 07:17:35 +0000 (23:17 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 1 Mar 2019 07:21:29 +0000 (23:21 -0800)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/17549

Currently Dropout is only enabled in training, we enable the option of having dropout in Eval.

This is to follow [1]. This functionality would be used for uncertainty estimation in exploration project.

[1] Gal, Yarin, and Zoubin Ghahramani. "Dropout as a bayesian approximation: Representing model uncertainty in deep learning." international conference on machine learning. 2016.

Reviewed By: Wakeupbuddy

Differential Revision: D14216216

fbshipit-source-id: 87c8c9cc522a82df467b685805f0775c86923d8b

caffe2/python/layers/dropout.py
caffe2/python/layers_test.py

index f2a8873..a5d3f01 100644 (file)
@@ -16,6 +16,7 @@ class Dropout(ModelLayer):
             input_record,
             name='dropout',
             ratio=0.5,
+            dropout_for_eval=False,
             **kwargs):
 
         super(Dropout, self).__init__(model, name, input_record, **kwargs)
@@ -25,6 +26,7 @@ class Dropout(ModelLayer):
 
         self.output_schema = input_record.clone_schema()
         self.output_schema.set_value(self.get_next_blob_reference('output'))
+        self.dropout_for_eval = dropout_for_eval
 
         self.ratio = ratio
 
@@ -42,7 +44,7 @@ class Dropout(ModelLayer):
         self._add_ops(net, is_test=False)
 
     def add_eval_ops(self, net):
-        self._add_ops(net, is_test=True)
+        self._add_ops(net, is_test=(not self.dropout_for_eval))
 
     def add_ops(self, net):
         self.add_eval_ops(net)
index 813eebd..d01b267 100644 (file)
@@ -1376,11 +1376,15 @@ class TestLayers(LayersTestCase):
 
     @given(
         X=hu.arrays(dims=[5, 5]),  # Shape of X is irrelevant
+        dropout_for_eval=st.booleans(),
     )
-    def testDropout(self, X):
+    def testDropout(self, X, dropout_for_eval):
         input_record = self.new_record(schema.Scalar((np.float32, (1,))))
         schema.FeedRecord(input_record, [X])
-        d_output = self.model.Dropout(input_record)
+        d_output = self.model.Dropout(
+            input_record,
+            dropout_for_eval=dropout_for_eval
+        )
         self.assertEqual(schema.Scalar((np.float32, (1,))), d_output)
         self.model.output_schema = schema.Struct()
 
@@ -1389,14 +1393,14 @@ class TestLayers(LayersTestCase):
         input_blob = input_record.field_blobs()[0]
         output_blob = d_output.field_blobs()[0]
 
-        train_d_spec = OpSpec(
+        with_d_spec = OpSpec(
             "Dropout",
             [input_blob],
             [output_blob, None],
             {'is_test': 0, 'ratio': 0.5}
         )
 
-        test_d_spec = OpSpec(
+        without_d_spec = OpSpec(
             "Dropout",
             [input_blob],
             [output_blob, None],
@@ -1405,22 +1409,30 @@ class TestLayers(LayersTestCase):
 
         self.assertNetContainOps(
             train_net,
-            [train_d_spec]
+            [with_d_spec]
         )
 
         eval_net = self.get_eval_net()
-
-        self.assertNetContainOps(
-            eval_net,
-            [test_d_spec]
-        )
-
         predict_net = self.get_predict_net()
 
-        self.assertNetContainOps(
-            predict_net,
-            [test_d_spec]
-        )
+        if dropout_for_eval:
+            self.assertNetContainOps(
+                eval_net,
+                [with_d_spec]
+            )
+            self.assertNetContainOps(
+                predict_net,
+                [with_d_spec]
+            )
+        else:
+            self.assertNetContainOps(
+                eval_net,
+                [without_d_spec]
+            )
+            self.assertNetContainOps(
+                predict_net,
+                [without_d_spec]
+            )
 
         workspace.RunNetOnce(train_init_net)
         workspace.RunNetOnce(train_net)