)
from caffe2.python.modeling.net_modifier import NetModifier
-from caffe2.python.optimizer import get_param_device
+from caffe2.python.optimizer import get_param_device, Optimizer
from caffe2.python.regularizer import Regularizer, RegularizationBy
from caffe2.python.layers import layers
from caffe2.proto import caffe2_pb2
scope.CurrentNameScope(), param_name, ref_shape, shape)
)
+ def _validate_param_optim(self, param_name, optim):
+ # there are three possible values for optim:
+ # 1) None (which will use self._default_optimizer after this layer is instantiated)
+ # 2) self.NoOptim
+ # 3) an instance of Optimizer class such as AdagradOptimizer
+
+ # this implies this parameter is not shared with any other parameter so far
+ if param_name not in self.param_to_optim:
+ return
+
+ logger.info("{} shares the same parameter with another parameter. "
+ "Validating if the same optimizer has been specified for them.".format(
+ param_name,
+ ))
+
+ ref_optim = self.param_to_optim[param_name]
+
+ if optim is None:
+ assert ref_optim == self._default_optimizer, (
+ "Optim for {} is None which will fall back to use default_optimizer. "
+ "However, the optimizer that has been specified for this shared parameter "
+ "is {} which is different from default_optimizer {}. "
+ "Please check the optimizers specified for parameters shared "
+ "with {} and the default_optimizer to ensure the consistency.".format(
+ param_name, ref_optim, self._default_optimizer, param_name
+ )
+ )
+ elif optim == self.NoOptim:
+ assert ref_optim == self.NoOptim, (
+ "Optim for {} is NoOptim. However, the optimizer for the parameters "
+ "shared with {} is {} which is different from NoOptim. "
+ "Please check the optimizer specified for other parameters in the "
+ "shared group to ensure consistency.".format(
+ param_name, param_name, ref_optim
+ )
+ )
+ elif isinstance(optim, Optimizer):
+ assert isinstance(ref_optim, Optimizer), (
+ "Optim for {} is an instance of Optimizer. However, the optimizer "
+ "for the parameters shared with {} is {} which is not an instance "
+ "of Optimizer. Please check the optimizer specified for other "
+ " parameters in the shared group to ensure consistency.".format(
+ param_name, param_name, ref_optim, optim
+ )
+ )
+
+ assert type(optim) is type(ref_optim) and optim.attributes == ref_optim.attributes, (
+ "Optim for {} is an instance of Optimizer. However, the optimizer "
+ "for the parameters shared with {} is {}. "
+ "This optimizer either doesn't have the same type as the current optimizer: "
+ "{} vs {}, or its attributes such as learning rate are different from "
+ "that of current optimizer which is {} vs {}. "
+ "Please check the optimizer specified for other parameters in the "
+ "shared group to ensure consistency.".format(
+ param_name, param_name, ref_optim, type(optim), type(ref_optim), optim.attributes, ref_optim.attributes
+ )
+ )
+ else:
+ raise ValueError("optim should be either None, NoOptim, or an instance of Optimizer, Got {} ".format(optim))
+
def create_param(self, param_name, shape, initializer, optimizer=None,
ps_param=None, regularizer=None):
if isinstance(param_name, core.BlobReference):
self._validate_param_shape(param_name, shape)
+ self._validate_param_optim(param_name, optimizer)
+
self._param_to_shape[param_name] = shape
return param
from caffe2.python.modeling.parameter_sharing import (
ParameterSharing,
)
+from caffe2.python.optimizer import AdagradOptimizer, AdamOptimizer
from caffe2.python.layer_test_util import LayersTestCase
import six
sorted(op_outputs),
['global_scope/shared_fc/b', 'global_scope/shared_fc/w']
)
+
+ def test_layer_shared_parameter_optim_validator(self):
+ """
+ This test is to cover the _validate_param_optim function in
+ layer_model_helper class.
+ """
+
+ output_dims = 2
+
+ adagrad_optim = AdagradOptimizer(
+ alpha=0.004,
+ epsilon=0.02,
+ )
+
+ self.model.default_optimizer = adagrad_optim
+
+ # the following covers the branch -- optim is None
+ with scope.NameScope('global_scope_0'):
+ with ParameterSharing({'scope_1': 'scope_0'}):
+ with scope.NameScope('scope_0'):
+ fc1_output = self.model.FC(
+ self.model.input_feature_schema.float_features,
+ output_dims,
+ weight_optim=self.model.NoOptim,
+ )
+
+ with scope.NameScope('scope_1'), self.assertRaises(Exception):
+ fc2_output = self.model.FC(
+ self.model.input_feature_schema.float_features,
+ output_dims
+ )
+
+ # the following covers the branch -- optim is NoOptim
+ with scope.NameScope('global_scope_1'):
+ with ParameterSharing({'scope_1': 'scope_0'}):
+ with scope.NameScope('scope_0'):
+ fc1_output = self.model.FC(
+ self.model.input_feature_schema.float_features,
+ output_dims,
+ weight_optim=None,
+ )
+
+ with scope.NameScope('scope_1'), self.assertRaises(Exception):
+ fc2_output = self.model.FC(
+ self.model.input_feature_schema.float_features,
+ output_dims,
+ weight_optim=self.model.NoOptim,
+ )
+
+ # the following covers the branch -- optim is an instance of Optimizer
+ adagrad_optim_2 = AdagradOptimizer(
+ alpha=0.005,
+ epsilon=0.02,
+ )
+
+ adam_optim = AdamOptimizer()
+
+ self.model.default_optimizer = adagrad_optim_2
+
+ with scope.NameScope('global_scope_2'):
+ with ParameterSharing({'scope_1': 'scope_0', 'scope_2': 'scope_0'}):
+ with scope.NameScope('scope_0'):
+ fc1_output = self.model.FC(
+ self.model.input_feature_schema.float_features,
+ output_dims,
+ weight_optim=None, # it will use adagrad_optim_2
+ )
+
+ with scope.NameScope('scope_1'), self.assertRaises(Exception):
+ fc2_output = self.model.FC(
+ self.model.input_feature_schema.float_features,
+ output_dims,
+ weight_optim=adagrad_optim,
+ )
+
+ with scope.NameScope('scope_2'), self.assertRaises(Exception):
+ fc2_output = self.model.FC(
+ self.model.input_feature_schema.float_features,
+ output_dims,
+ weight_optim=adam_optim,
+ )