From c48e1679f9fe758abe8e3dd126ae87d4daef3af0 Mon Sep 17 00:00:00 2001 From: Jiyan Yang Date: Wed, 17 Apr 2019 21:07:42 -0700 Subject: [PATCH] Add validator for optimizers when parameters are shared Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18497 Reviewed By: kennyhorror Differential Revision: D14614738 fbshipit-source-id: beddd8349827dcc8ccae36f21e5d29627056afcd --- caffe2/python/layer_model_helper.py | 64 ++++++++++++++++++++- caffe2/python/layer_parameter_sharing_test.py | 82 +++++++++++++++++++++++++++ caffe2/python/optimizer.py | 8 +++ 3 files changed, 153 insertions(+), 1 deletion(-) diff --git a/caffe2/python/layer_model_helper.py b/caffe2/python/layer_model_helper.py index e4da1f3..3807877 100644 --- a/caffe2/python/layer_model_helper.py +++ b/caffe2/python/layer_model_helper.py @@ -14,7 +14,7 @@ from caffe2.python.modeling.parameter_sharing import ( ) from caffe2.python.modeling.net_modifier import NetModifier -from caffe2.python.optimizer import get_param_device +from caffe2.python.optimizer import get_param_device, Optimizer from caffe2.python.regularizer import Regularizer, RegularizationBy from caffe2.python.layers import layers from caffe2.proto import caffe2_pb2 @@ -228,6 +228,66 @@ class LayerModelHelper(model_helper.ModelHelper): scope.CurrentNameScope(), param_name, ref_shape, shape) ) + def _validate_param_optim(self, param_name, optim): + # there are three possible values for optim: + # 1) None (which will use self._default_optimizer after this layer is instantiated) + # 2) self.NoOptim + # 3) an instance of Optimizer class such as AdagradOptimizer + + # this implies this parameter is not shared with any other parameter so far + if param_name not in self.param_to_optim: + return + + logger.info("{} shares the same parameter with another parameter. " + "Validating if the same optimizer has been specified for them.".format( + param_name, + )) + + ref_optim = self.param_to_optim[param_name] + + if optim is None: + assert ref_optim == self._default_optimizer, ( + "Optim for {} is None which will fall back to use default_optimizer. " + "However, the optimizer that has been specified for this shared parameter " + "is {} which is different from default_optimizer {}. " + "Please check the optimizers specified for parameters shared " + "with {} and the default_optimizer to ensure the consistency.".format( + param_name, ref_optim, self._default_optimizer, param_name + ) + ) + elif optim == self.NoOptim: + assert ref_optim == self.NoOptim, ( + "Optim for {} is NoOptim. However, the optimizer for the parameters " + "shared with {} is {} which is different from NoOptim. " + "Please check the optimizer specified for other parameters in the " + "shared group to ensure consistency.".format( + param_name, param_name, ref_optim + ) + ) + elif isinstance(optim, Optimizer): + assert isinstance(ref_optim, Optimizer), ( + "Optim for {} is an instance of Optimizer. However, the optimizer " + "for the parameters shared with {} is {} which is not an instance " + "of Optimizer. Please check the optimizer specified for other " + " parameters in the shared group to ensure consistency.".format( + param_name, param_name, ref_optim, optim + ) + ) + + assert type(optim) is type(ref_optim) and optim.attributes == ref_optim.attributes, ( + "Optim for {} is an instance of Optimizer. However, the optimizer " + "for the parameters shared with {} is {}. " + "This optimizer either doesn't have the same type as the current optimizer: " + "{} vs {}, or its attributes such as learning rate are different from " + "that of current optimizer which is {} vs {}. " + "Please check the optimizer specified for other parameters in the " + "shared group to ensure consistency.".format( + param_name, param_name, ref_optim, type(optim), type(ref_optim), optim.attributes, ref_optim.attributes + ) + ) + else: + raise ValueError("optim should be either None, NoOptim, or an instance of Optimizer, Got {} ".format(optim)) + def create_param(self, param_name, shape, initializer, optimizer=None, ps_param=None, regularizer=None): if isinstance(param_name, core.BlobReference): @@ -270,6 +330,8 @@ class LayerModelHelper(model_helper.ModelHelper): self._validate_param_shape(param_name, shape) + self._validate_param_optim(param_name, optimizer) + self._param_to_shape[param_name] = shape return param diff --git a/caffe2/python/layer_parameter_sharing_test.py b/caffe2/python/layer_parameter_sharing_test.py index 65c583c..5d87dbd 100644 --- a/caffe2/python/layer_parameter_sharing_test.py +++ b/caffe2/python/layer_parameter_sharing_test.py @@ -7,6 +7,7 @@ from caffe2.python import core, scope from caffe2.python.modeling.parameter_sharing import ( ParameterSharing, ) +from caffe2.python.optimizer import AdagradOptimizer, AdamOptimizer from caffe2.python.layer_test_util import LayersTestCase import six @@ -149,3 +150,84 @@ class ParameterSharingTest(LayersTestCase): sorted(op_outputs), ['global_scope/shared_fc/b', 'global_scope/shared_fc/w'] ) + + def test_layer_shared_parameter_optim_validator(self): + """ + This test is to cover the _validate_param_optim function in + layer_model_helper class. + """ + + output_dims = 2 + + adagrad_optim = AdagradOptimizer( + alpha=0.004, + epsilon=0.02, + ) + + self.model.default_optimizer = adagrad_optim + + # the following covers the branch -- optim is None + with scope.NameScope('global_scope_0'): + with ParameterSharing({'scope_1': 'scope_0'}): + with scope.NameScope('scope_0'): + fc1_output = self.model.FC( + self.model.input_feature_schema.float_features, + output_dims, + weight_optim=self.model.NoOptim, + ) + + with scope.NameScope('scope_1'), self.assertRaises(Exception): + fc2_output = self.model.FC( + self.model.input_feature_schema.float_features, + output_dims + ) + + # the following covers the branch -- optim is NoOptim + with scope.NameScope('global_scope_1'): + with ParameterSharing({'scope_1': 'scope_0'}): + with scope.NameScope('scope_0'): + fc1_output = self.model.FC( + self.model.input_feature_schema.float_features, + output_dims, + weight_optim=None, + ) + + with scope.NameScope('scope_1'), self.assertRaises(Exception): + fc2_output = self.model.FC( + self.model.input_feature_schema.float_features, + output_dims, + weight_optim=self.model.NoOptim, + ) + + # the following covers the branch -- optim is an instance of Optimizer + adagrad_optim_2 = AdagradOptimizer( + alpha=0.005, + epsilon=0.02, + ) + + adam_optim = AdamOptimizer() + + self.model.default_optimizer = adagrad_optim_2 + + with scope.NameScope('global_scope_2'): + with ParameterSharing({'scope_1': 'scope_0', 'scope_2': 'scope_0'}): + with scope.NameScope('scope_0'): + fc1_output = self.model.FC( + self.model.input_feature_schema.float_features, + output_dims, + weight_optim=None, # it will use adagrad_optim_2 + ) + + with scope.NameScope('scope_1'), self.assertRaises(Exception): + fc2_output = self.model.FC( + self.model.input_feature_schema.float_features, + output_dims, + weight_optim=adagrad_optim, + ) + + with scope.NameScope('scope_2'), self.assertRaises(Exception): + fc2_output = self.model.FC( + self.model.input_feature_schema.float_features, + output_dims, + weight_optim=adam_optim, + ) diff --git a/caffe2/python/optimizer.py b/caffe2/python/optimizer.py index 0aa0201..8a7540f 100644 --- a/caffe2/python/optimizer.py +++ b/caffe2/python/optimizer.py @@ -9,6 +9,7 @@ from collections import namedtuple, defaultdict from past.builtins import basestring import logging +import copy import numpy as np @@ -72,6 +73,13 @@ class Optimizer(object): classname, self._instance_num, base_str, node_name, gpu_id, ) + @property + def attributes(self): + # return a dict that contains attributes related to init args only + attr = copy.deepcopy(self.__dict__) + del attr['_instance_num'] + return attr + def make_unique_blob_name(self, base_str): """ Returns a blob name that will be unique to the current device -- 2.7.4