From c3e3c5cc39165470ddab5afb6373399fdbd6598e Mon Sep 17 00:00:00 2001 From: Min Ni Date: Wed, 27 Mar 2019 11:14:32 -0700 Subject: [PATCH] Skip tests if C2/ONNX models cannot be read (#18494) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18494 Today we have some C2 end2end test run requiring reading model data from external filesystem (for example, Gluster and AWS). This could be a source for flaky test when the external filesystems are not reachable during the tests. In this diff, we add try/catch logic around where we download models and open model files from external system. In case such attempts fails, we will catch the excption and let the unittest skip the current test instead of failure. I also refactor the code a little bit by removing some duplicated logic on downloading and build the c2 model data. It has been duplicated in two classes and a few functions... Reviewed By: yinghai Differential Revision: D14442241 fbshipit-source-id: da8bf56c8d096efa34ca2070de5cd10a18aad70c --- caffe2/python/models/download.py | 28 +++++++++-------- caffe2/python/onnx/test_onnxifi.py | 54 +++------------------------------ caffe2/python/onnx/tests/c2_ref_test.py | 44 +++++++++++---------------- caffe2/python/onnx/tests/test_utils.py | 26 ---------------- caffe2/python/trt/test_trt.py | 41 ++++++------------------- 5 files changed, 46 insertions(+), 147 deletions(-) diff --git a/caffe2/python/models/download.py b/caffe2/python/models/download.py index 5b38efa..37dfd52 100644 --- a/caffe2/python/models/download.py +++ b/caffe2/python/models/download.py @@ -147,37 +147,39 @@ def validModelName(name): return True class ModelDownloader: + + def __init__(self, model_env_name='CAFFE2_MODELS'): + self.model_env_name = model_env_name + def _model_dir(self, model): caffe2_home = os.path.expanduser(os.getenv('CAFFE2_HOME', '~/.caffe2')) - models_dir = os.getenv('CAFFE2_MODELS', os.path.join(caffe2_home, 'models')) + models_dir = os.getenv(self.model_env_name, os.path.join(caffe2_home, 'models')) return os.path.join(models_dir, model) def _download(self, model): model_dir = self._model_dir(model) assert not os.path.exists(model_dir) os.makedirs(model_dir) + for f in ['predict_net.pb', 'init_net.pb', 'value_info.json']: url = getURLFromName(model, f) dest = os.path.join(model_dir, f) try: - try: - downloadFromURLToFile(url, dest, - show_progress=False) - except TypeError: - # show_progress not supported prior to - # Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1 - # (Sep 17, 2017) - downloadFromURLToFile(url, dest) - except Exception as e: - print("Abort: {reason}".format(reason=e)) - print("Cleaning up...") + downloadFromURLToFile(url, dest, show_progress=False) + except TypeError: + # show_progress not supported prior to + # Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1 + # (Sep 17, 2017) + downloadFromURLToFile(url, dest) + except Exception: deleteDirectory(model_dir) - exit(1) + raise def get_c2_model(self, model_name): model_dir = self._model_dir(model_name) if not os.path.exists(model_dir): self._download(model_name) + c2_predict_pb = os.path.join(model_dir, 'predict_net.pb') c2_predict_net = caffe2_pb2.NetDef() with open(c2_predict_pb, 'rb') as f: diff --git a/caffe2/python/onnx/test_onnxifi.py b/caffe2/python/onnx/test_onnxifi.py index 5e96d64..a859b57 100644 --- a/caffe2/python/onnx/test_onnxifi.py +++ b/caffe2/python/onnx/test_onnxifi.py @@ -15,7 +15,7 @@ from onnx.backend.base import namedtupledict from onnx.helper import make_node, make_graph, make_tensor, make_tensor_value_info, make_model from caffe2.proto import caffe2_pb2 from caffe2.python import core, workspace -from caffe2.python.models.download import downloadFromURLToFile, getURLFromName, deleteDirectory +from caffe2.python.models.download import ModelDownloader from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net from caffe2.python.onnx.tests.test_utils import TestCase @@ -109,54 +109,8 @@ class OnnxifiTest(TestCase): class OnnxifiTransformTest(TestCase): - def _model_dir(self, model): - caffe2_home = os.path.expanduser(os.getenv('CAFFE2_HOME', '~/.caffe2')) - models_dir = os.getenv('CAFFE2_MODELS', os.path.join(caffe2_home, 'models')) - return os.path.join(models_dir, model) - - def _download(self, model): - model_dir = self._model_dir(model) - assert not os.path.exists(model_dir) - os.makedirs(model_dir) - for f in ['predict_net.pb', 'init_net.pb', 'value_info.json']: - url = getURLFromName(model, f) - dest = os.path.join(model_dir, f) - try: - try: - downloadFromURLToFile(url, dest, - show_progress=False) - except TypeError: - # show_progress not supported prior to - # Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1 - # (Sep 17, 2017) - downloadFromURLToFile(url, dest) - except Exception as e: - print("Abort: {reason}".format(reason=e)) - print("Cleaning up...") - deleteDirectory(model_dir) - exit(1) - - # TODO: we need to modulize this function - def _get_c2_model(self, model_name): - model_dir = self._model_dir(model_name) - if not os.path.exists(model_dir): - self._download(model_name) - c2_predict_pb = os.path.join(model_dir, 'predict_net.pb') - c2_predict_net = caffe2_pb2.NetDef() - with open(c2_predict_pb, 'rb') as f: - c2_predict_net.ParseFromString(f.read()) - c2_predict_net.name = model_name - - c2_init_pb = os.path.join(model_dir, 'init_net.pb') - c2_init_net = caffe2_pb2.NetDef() - with open(c2_init_pb, 'rb') as f: - c2_init_net.ParseFromString(f.read()) - c2_init_net.name = model_name + '_init' - - with open(os.path.join(model_dir, 'value_info.json')) as f: - value_info = json.load(f) - - return c2_init_net, c2_predict_net, value_info + def setUp(self): + self.model_downloader = ModelDownloader() def _add_head_tail(self, pred_net, new_head, new_tail): orig_head = pred_net.external_input[0] @@ -187,7 +141,7 @@ class OnnxifiTransformTest(TestCase): N = 1 repeat = 1 print("Batch size: {}, repeat inference {} times".format(N, repeat)) - init_net, pred_net, _ = self._get_c2_model('resnet50') + init_net, pred_net, _ = self.model_downloader.get_c2_model('resnet50') self._add_head_tail(pred_net, 'real_data', 'real_softmax') input_blob_dims = (N, 3, 224, 224) input_name = "real_data" diff --git a/caffe2/python/onnx/tests/c2_ref_test.py b/caffe2/python/onnx/tests/c2_ref_test.py index fdab720..40cb5a3 100644 --- a/caffe2/python/onnx/tests/c2_ref_test.py +++ b/caffe2/python/onnx/tests/c2_ref_test.py @@ -23,14 +23,14 @@ import caffe2.python.onnx.frontend as c2_onnx import caffe2.python.onnx.backend as c2 import numpy as np -from caffe2.python.models.download import downloadFromURLToFile, getURLFromName, deleteDirectory +from caffe2.python.models.download import ModelDownloader -from caffe2.python.onnx.tests.test_utils import DownloadingTestCase +from caffe2.python.onnx.tests.test_utils import TestCase import caffe2.python._import_c_extension as C -class TestCaffe2Basic(DownloadingTestCase): +class TestCaffe2Basic(TestCase): def test_dummy_name(self): g = C.DummyName() n1 = g.new_dummy_name() @@ -726,43 +726,33 @@ class TestCaffe2Basic(DownloadingTestCase): self.assertSameOutputs(c2_outputs, onnx_outputs) -class TestCaffe2End2End(DownloadingTestCase): - def _model_dir(self, model): - caffe2_home = os.path.expanduser(os.getenv('CAFFE2_HOME', '~/.caffe2')) - models_dir = os.getenv('ONNX_MODELS', os.path.join(caffe2_home, 'models')) - return os.path.join(models_dir, model) +class TestCaffe2End2End(TestCase): + def setUp(self): + self.model_downloader = ModelDownloader('ONNX_MODELS') def _test_net(self, net_name, input_blob_dims=(1, 3, 224, 224), decimal=7): np.random.seed(seed=0) - model_dir = self._model_dir(net_name) - if not os.path.exists(model_dir): - self._download(net_name) - c2_predict_pb = os.path.join(model_dir, 'predict_net.pb') - c2_predict_net = caffe2_pb2.NetDef() - with open(c2_predict_pb, 'rb') as f: - c2_predict_net.ParseFromString(f.read()) - c2_predict_net.name = net_name - - c2_init_pb = os.path.join(model_dir, 'init_net.pb') - c2_init_net = caffe2_pb2.NetDef() - with open(c2_init_pb, 'rb') as f: - c2_init_net.ParseFromString(f.read()) - c2_init_net.name = net_name + '_init' + try: + c2_init_net, c2_predict_net, value_info = self.model_downloader.get_c2_model(net_name) + except (OSError, IOError) as e: + # catch IOError/OSError that is caused by FileNotFoundError and PermissionError + self.skipTest(str(e)) + # start to run the model and compare outputs n, c, h, w = input_blob_dims data = np.random.randn(n, c, h, w).astype(np.float32) inputs = [data] _, c2_outputs = c2_native_run_net(c2_init_net, c2_predict_net, inputs) del _ - with open(os.path.join(model_dir, 'value_info.json'), 'r') as value_info_conf: - model = c2_onnx.caffe2_net_to_onnx_model( - predict_net=c2_predict_net, - init_net=c2_init_net, - value_info=json.load(value_info_conf)) + model = c2_onnx.caffe2_net_to_onnx_model( + predict_net=c2_predict_net, + init_net=c2_init_net, + value_info=value_info, + ) c2_ir = c2.prepare(model) onnx_outputs = c2_ir.run(inputs) self.assertSameOutputs(c2_outputs, onnx_outputs, decimal=decimal) diff --git a/caffe2/python/onnx/tests/test_utils.py b/caffe2/python/onnx/tests/test_utils.py index aba500c..1fec938 100644 --- a/caffe2/python/onnx/tests/test_utils.py +++ b/caffe2/python/onnx/tests/test_utils.py @@ -10,8 +10,6 @@ import os import unittest import numpy as np -from caffe2.python.models.download import downloadFromURLToFile, getURLFromName, deleteDirectory - class TestCase(unittest.TestCase): @@ -31,27 +29,3 @@ class TestCase(unittest.TestCase): raise ValueError('Duplicated test name: {}'.format(name)) setattr(self, name, test_func) - -class DownloadingTestCase(TestCase): - - def _download(self, model): - model_dir = self._model_dir(model) - assert not os.path.exists(model_dir) - os.makedirs(model_dir) - for f in ['predict_net.pb', 'init_net.pb', 'value_info.json']: - url = getURLFromName(model, f) - dest = os.path.join(model_dir, f) - try: - try: - downloadFromURLToFile(url, dest, - show_progress=False) - except TypeError: - # show_progress not supported prior to - # Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1 - # (Sep 17, 2017) - downloadFromURLToFile(url, dest) - except Exception as e: - print("Abort: {reason}".format(reason=e)) - print("Cleaning up...") - deleteDirectory(model_dir) - raise AssertionError("Test model downloading failed") diff --git a/caffe2/python/trt/test_trt.py b/caffe2/python/trt/test_trt.py index aa88967..e95cb4b 100644 --- a/caffe2/python/trt/test_trt.py +++ b/caffe2/python/trt/test_trt.py @@ -9,11 +9,11 @@ import onnx import onnx.defs from onnx.helper import make_node, make_graph, make_tensor, make_tensor_value_info, make_model from onnx.backend.base import namedtupledict -from caffe2.python.models.download import downloadFromURLToFile, getURLFromName, deleteDirectory +from caffe2.python.models.download import ModelDownloader import caffe2.python.onnx.backend as c2 from caffe2.python.onnx.workspace import Workspace from caffe2.python.trt.transform import convert_onnx_model_to_trt_op, transform_caffe2_net -from caffe2.python.onnx.tests.test_utils import TestCase, DownloadingTestCase +from caffe2.python.onnx.tests.test_utils import TestCase import numpy as np import os.path import json @@ -80,6 +80,9 @@ def _download_onnx_model(model_name, opset_version): return model_dir class TensorRTOpTest(TestCase): + def setUp(self): + self.opset_version = onnx.defs.onnx_opset_version() + def _test_relu_graph(self, X, batch_size, trt_max_batch_size): node_def = make_node("Relu", ["X"], ["Y"]) Y_c2 = c2.run_node(node_def, {"X": X}) @@ -114,8 +117,7 @@ class TensorRTOpTest(TestCase): X = np.random.randn(52, 1, 3, 2).astype(np.float32) self._test_relu_graph(X, 52, 50) - def _test_onnx_importer(self, model_name, data_input_index, - opset_version = onnx.defs.onnx_opset_version()): + def _test_onnx_importer(self, model_name, data_input_index, opset_version=onnx.defs.onnx_opset_version()): model_dir = _download_onnx_model(model_name, opset_version) model_def = onnx.load(os.path.join(model_dir, 'model.onnx')) input_blob_dims = [int(x.dim_value) for x in model_def.graph.input[data_input_index].type.tensor_type.shape.dim] @@ -176,31 +178,9 @@ class TensorRTOpTest(TestCase): self._test_onnx_importer('vgg19', -2, 9) -class TensorRTTransformTest(DownloadingTestCase): - def _model_dir(self, model): - caffe2_home = os.path.expanduser(os.getenv('CAFFE2_HOME', '~/.caffe2')) - models_dir = os.getenv('CAFFE2_MODELS', os.path.join(caffe2_home, 'models')) - return os.path.join(models_dir, model) - - def _get_c2_model(self, model_name): - model_dir = self._model_dir(model_name) - if not os.path.exists(model_dir): - self._download(model_name) - c2_predict_pb = os.path.join(model_dir, 'predict_net.pb') - c2_predict_net = caffe2_pb2.NetDef() - with open(c2_predict_pb, 'rb') as f: - c2_predict_net.ParseFromString(f.read()) - c2_predict_net.name = model_name - - c2_init_pb = os.path.join(model_dir, 'init_net.pb') - c2_init_net = caffe2_pb2.NetDef() - with open(c2_init_pb, 'rb') as f: - c2_init_net.ParseFromString(f.read()) - c2_init_net.name = model_name + '_init' - - with open(os.path.join(model_dir, 'value_info.json')) as f: - value_info = json.load(f) - return c2_init_net, c2_predict_net, value_info +class TensorRTTransformTest(TestCase): + def setUp(self): + self.model_downloader = ModelDownloader() def _add_head_tail(self, pred_net, new_head, new_tail): orig_head = pred_net.external_input[0] @@ -226,14 +206,13 @@ class TensorRTTransformTest(DownloadingTestCase): pred_net.op.extend([tail]) pred_net.external_output[0] = new_tail - @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support") def test_resnet50_core(self): N = 2 warmup = 20 repeat = 100 print("Batch size: {}, repeat inference {} times, warmup {} times".format(N, repeat, warmup)) - init_net, pred_net, _ = self._get_c2_model('resnet50') + init_net, pred_net, _ = self.model_downloader.get_c2_model('resnet50') self._add_head_tail(pred_net, 'real_data', 'real_softmax') input_blob_dims = (N, 3, 224, 224) input_name = "real_data" -- 2.7.4