[CoreML][fbcode] Add the `preprocess` python APIs (#64521)
authorTao Xu <taox@fb.com>
Fri, 17 Sep 2021 07:19:36 +0000 (00:19 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Fri, 17 Sep 2021 07:25:14 +0000 (00:25 -0700)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/64521

Add the preprocess part for the coreml delegate. Check out the `example.py` for the usage.
ghstack-source-id: 138324214

Test Plan:
```
(base) [taox@devvm2780.vll0 ~/fbsource/fbcode/caffe2/fb]  buck run coreml:example -- --model="/home/taox/mobilenetv2/mobilenetv2.pt" --out="/home/taox/mobilenetv2/mobilenetv2_coreml.pt"
Parsing buck files: finished in 0.5 sec
Downloaded 0/1 artifacts, 0.00 bytes, 100.0% cache miss (for updated rules)
Building: finished in 10.6 sec (100%) 12611/57623 jobs, 1/57623 updated
  Total time: 11.1 sec
Converting Frontend ==> MIL Ops: 100%|██████████████████████████████████████████▉| 382/383 [00:00<00:00, 692.58 ops/s]
Running MIL optimization passes: 100%|███████████████████████████████████████████| 18/18 [00:00<00:00, 45.55 passes/s]
Translating MIL ==> MLModel Ops: 100%|███████████████████████████████████████████| 704/704 [00:01<00:00, 468.56 ops/s]
input {
  name: "input_0"
  type {
    multiArrayType {
      shape: 1
      shape: 3
      shape: 224
      shape: 224
      dataType: FLOAT32
    }
  }
}
output {
  name: "645"
  type {
    multiArrayType {
      dataType: FLOAT32
    }
  }
}
metadata {
  userDefined {
    key: "com.github.apple.coremltools.source"
    value: "torch==1.10.0a0+fb"
  }
  userDefined {
    key: "com.github.apple.coremltools.version"
    value: "4.1"
  }
}

{'inputs': '[["input_0", "0", "[1, 3, 224, 224]"]]', 'outputs': '[["645", "0", "[1, 1000]"]]', 'config': '{"spec_ver": "4", "backend": "cpu", "allow_low_precision": "True"}', 'metadata': '{"coremltool_ver": "4.1", "torch_ver": "torch==1.10.0a0+fb"}'}
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0826 13:27:12.690302 2477051 backend_detail.cpp:376] Warning: Backend [coreml] is not available. Execution of this Module is still possible by saving and loading on a device where the backend is available. (function codegen_backend_module)
graph(%self.1 : torch.jit.LoweredModule.coreml.__torch__.torchvision.models.mobilenetv2.MobileNetV2,
      %x.1 : Tensor):
  %51 : str = prim::Constant[value="Exception: Backend is not available."]()
  %50 : str = prim::Constant[value="AssertionError: "]()
  %14 : str = prim::Constant[value="forward"]() # <string>:5:62
  %48 : Tensor = prim::Uninitialized()
  %44 : Tensor = prim::Uninitialized()
  %typed_inputs.1 : Any[] = prim::ListConstruct(%x.1)
  %__backend.3 : __torch__.torch.classes.__backends__.coreml = prim::GetAttr[name="__backend"](%self.1)
  %8 : bool = prim::CallMethod[name="is_available"](%__backend.3) # <string>:4:19
  %49 : Tensor = prim::If(%8) # <string>:4:16
    block0():
      %__backend : __torch__.torch.classes.__backends__.coreml = prim::GetAttr[name="__backend"](%self.1)
      %__handles : Dict(str, Any) = prim::GetAttr[name="__handles"](%self.1)
      %15 : Any = aten::__getitem__(%__handles, %14) # <string>:5:47
      %17 : Any[] = prim::CallMethod[name="execute"](%__backend, %15, %typed_inputs.1) # <string>:5:24
      %18 : Any = prim::ListUnpack(%17)
      %20 : bool = prim::isinstance[types=[Tensor]](%18)
      %39 : Tensor = prim::If(%20) # <string>:6:18
        block0():
          %22 : Tensor = prim::unchecked_cast(%18)
          -> (%22)
        block1():
           = prim::RaiseException(%50) # <string>:6:18
          -> (%44)
      -> (%39)
    block1():
       = prim::RaiseException(%51) # <string>:9:18
      -> (%48)
  return (%49)

```

Reviewed By: raziel

Differential Revision: D30585154

fbshipit-source-id: 66c7d2e931be6eaa3c43a0ee131ea8046452449d

torch/backends/_coreml/__init__.py [new file with mode: 0644]
torch/backends/_coreml/preprocess.py [new file with mode: 0644]
torch/csrc/jit/backends/coreml/cpp/backend.cpp [new file with mode: 0644]
torch/csrc/jit/backends/coreml/cpp/preprocess.cpp [new file with mode: 0644]

diff --git a/torch/backends/_coreml/__init__.py b/torch/backends/_coreml/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/torch/backends/_coreml/preprocess.py b/torch/backends/_coreml/preprocess.py
new file mode 100644 (file)
index 0000000..7f27e60
--- /dev/null
@@ -0,0 +1,135 @@
+import hashlib
+import json
+from dataclasses import dataclass, astuple, field
+from typing import Dict, Tuple, List
+
+import coremltools as ct  # type: ignore[import]
+import torch
+from coremltools.converters.mil.input_types import TensorType  # type: ignore[import]
+from coremltools.converters.mil.mil import types  # type: ignore[import]
+
+CT_METADATA_VERSION = "com.github.apple.coremltools.version"
+CT_METADATA_SOURCE = "com.github.apple.coremltools.source"
+
+
+class ScalarType:
+    Float = 0
+    Double = 1
+    Int = 2
+    Long = 3
+    Undefined = 4
+
+# Supported Tensor types in coremltools:
+# https://github.com/apple/coremltools/blob/main/coremltools/converters/mil/frontend/torch/converter.py#L28
+torch_to_mil_types = {
+    ScalarType.Float: types.fp32,
+    ScalarType.Double: types.fp64,
+    ScalarType.Int: types.int32,
+    ScalarType.Long: types.int64,
+}
+
+
+class CoreMLComputeUnit:
+    CPU = "cpuOnly"
+    CPUAndGPU = "cpuAndGPU"
+    ALL = "all"
+
+
+@dataclass
+class _TensorSpec:
+    shape: List[int] = field(default_factory=List[int])
+    dtype: int = ScalarType.Float
+
+
+def TensorSpec(*args, **kwargs):
+    """
+    TensorSpec specifies the tensor information. The default dtype is float32
+    Example:
+    ts = TensorSpec(
+        shape = [1, 3, 224, 224],
+        dtype = ScalarType.Float
+    )
+    """
+    return astuple(_TensorSpec(*args, **kwargs))
+
+
+@dataclass
+class _CompileSpec:
+    inputs: Tuple[_TensorSpec] = ()  # type: ignore[assignment]
+    outputs: Tuple[_TensorSpec] = ()  # type: ignore[assignment]
+    backend: str = CoreMLComputeUnit.CPU
+    allow_low_precision: bool = True
+
+
+def CompileSpec(*args, **kwargs):
+    """
+    CompileSpec specifies the model information.
+    Example:
+    cs = CompileSpec(
+            inputs=(
+                TensorSpec(
+                    shape=[1, 3, 224, 224],
+                ),
+            ),
+            outputs=(
+                TensorSpec(
+                    shape=[1, 1000],
+                ),
+            ),
+            backend=CoreMLComputeUnit.CPU,
+            allow_low_precision=True,
+    ),
+    """
+    return astuple(_CompileSpec(*args, **kwargs))
+
+
+def _convert_to_mil_type(spec: _TensorSpec, name: str):
+    ml_type = TensorType(shape=spec.shape, dtype=torch_to_mil_types[spec.dtype])
+    ml_type.name = name
+    return ml_type
+
+
+def preprocess(script_module: torch._C.ScriptObject, compile_spec: Dict[str, Tuple]):
+    spec = compile_spec["forward"]
+    forward_spec = _CompileSpec(*spec)
+    mil_inputs = []
+    inputs = []
+    for index, input_spec in enumerate(forward_spec.inputs):
+        input_spec = _TensorSpec(*input_spec)  # type: ignore[misc]
+        name = "input_" + str(index)
+        inputs.append([name, str(input_spec.dtype), str(input_spec.shape)])
+        ml_type = _convert_to_mil_type(input_spec, name)
+        mil_inputs.append(ml_type)
+    model = torch.jit.RecursiveScriptModule._construct(script_module, lambda x: None)
+    mlmodel = ct.convert(model, inputs=mil_inputs)
+    spec = mlmodel.get_spec()
+    output_specs = forward_spec.outputs
+    assert len(spec.description.output) == len(output_specs)  # type: ignore[attr-defined]
+    outputs = []
+    for index, output_spec in enumerate(output_specs):
+        output_spec = _TensorSpec(*output_spec)  # type: ignore[misc]
+        name = spec.description.output[index].name  # type: ignore[attr-defined]
+        outputs.append([name, str(output_spec.dtype), str(output_spec.shape)])
+    mlmodel = ct.models.model.MLModel(spec)
+    config = {
+        "spec_ver": str(spec.specificationVersion),  # type: ignore[attr-defined]
+        "backend": forward_spec.backend,
+        "allow_low_precision": str(forward_spec.allow_low_precision),
+    }
+    metadata = {
+        "coremltool_ver": mlmodel.user_defined_metadata[CT_METADATA_VERSION],
+        "torch_ver": mlmodel.user_defined_metadata[CT_METADATA_SOURCE],
+    }
+    coreml_compile_spec = {
+        "inputs": inputs,
+        "outputs": outputs,
+        "config": config,
+        "metadata": metadata,
+    }
+    mlmodel = spec.SerializeToString()  # type: ignore[attr-defined]
+
+    return {
+        "model": mlmodel,
+        "hash": str(hashlib.sha256(mlmodel).hexdigest()),
+        "extra": json.dumps(coreml_compile_spec),
+    }
diff --git a/torch/csrc/jit/backends/coreml/cpp/backend.cpp b/torch/csrc/jit/backends/coreml/cpp/backend.cpp
new file mode 100644 (file)
index 0000000..6b9a18a
--- /dev/null
@@ -0,0 +1,31 @@
+#include <torch/csrc/jit/backends/backend.h>
+#include <torch/script.h>
+
+namespace {
+
+class CoreMLBackend : public torch::jit::PyTorchBackendInterface {
+ public:
+  c10::impl::GenericDict compile(
+      c10::IValue processed,
+      c10::impl::GenericDict method_compile_spec) override {
+    TORCH_CHECK(false, "The CoreML backend is not supported on server side!");
+    auto handles = c10::Dict<std::string, std::string>();
+    return c10::impl::toGenericDict(handles);
+  }
+
+  c10::impl::GenericList execute(
+      c10::IValue handle,
+      c10::impl::GenericList inputs) override {
+    TORCH_CHECK(false, "The CoreML backend is not supported on server side!");
+    c10::List<at::Tensor> output_list;
+    return c10::impl::toList(output_list);
+  }
+
+  bool is_available() override {
+    return false;
+  }
+};
+
+static auto cls = torch::jit::backend<CoreMLBackend>("coreml");
+
+} // namespace
diff --git a/torch/csrc/jit/backends/coreml/cpp/preprocess.cpp b/torch/csrc/jit/backends/coreml/cpp/preprocess.cpp
new file mode 100644 (file)
index 0000000..39d86b5
--- /dev/null
@@ -0,0 +1,34 @@
+// (c) Facebook, Inc. and its affiliates. Confidential and proprietary.
+#include <pybind11/pybind11.h>
+#include <torch/csrc/jit/backends/backend.h>
+#include <torch/csrc/jit/backends/backend_preprocess.h>
+#include <torch/csrc/jit/python/pybind_utils.h>
+#include <torch/script.h>
+
+namespace py = pybind11;
+
+namespace {
+
+c10::IValue preprocess(
+    const torch::jit::Module& mod,
+    const c10::Dict<c10::IValue, c10::IValue>& method_compile_spec,
+    const torch::jit::BackendDebugHandleGenerator& generate_debug_handles) {
+  py::object pyModule =
+      py::module_::import("torch.backends._coreml.preprocess");
+  py::object pyMethod = pyModule.attr("preprocess");
+
+  py::dict modelDict =
+      pyMethod(mod, torch::jit::toPyObject(method_compile_spec));
+
+  c10::Dict<std::string, std::string> modelData;
+  for (auto item : modelDict) {
+    modelData.insert(
+        item.first.cast<std::string>(), item.second.cast<std::string>());
+  }
+  return modelData;
+}
+
+static auto pre_reg =
+    torch::jit::backend_preprocess_register("coreml", preprocess);
+
+} // namespace