endif(MSVC)
# add source group
-FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "nnvm/src/*.cc")
-FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h"
- "nnvm/src/*.h" "nnvm/include/*.h")
+FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc")
+FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h")
assign_source_group("Source" ${GROUP_SOURCE})
assign_source_group("Include" ${GROUP_INCLUDE})
list(APPEND COMPILER_SRCS ${COMPILER_VERILOG_SRCS})
endif()
-file(GLOB_RECURSE NNVM_COMPILER_SRCS
- nnvm/src/c_api/*.cc
- nnvm/src/core/*.cc
- nnvm/src/pass/*.cc
- nnvm/src/compiler/*.cc
- nnvm/src/top/*.cc
- )
file(GLOB TOPI_SRCS
topi/src/*.cc
add_dependencies(tvm_runtime sgx_edl tvm_t)
install(TARGETS tvm_t ARCHIVE DESTINATION lib${LIB_SUFFIX})
endif()
-add_library(nnvm_compiler SHARED ${NNVM_COMPILER_SRCS})
if(USE_THREADS)
message(STATUS "Build with thread support...")
target_link_libraries(tvm Threads::Threads)
target_link_libraries(tvm_topi Threads::Threads)
target_link_libraries(tvm_runtime Threads::Threads)
- target_link_libraries(nnvm_compiler Threads::Threads)
endif(USE_THREADS)
target_link_libraries(tvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS})
target_link_libraries(tvm_topi tvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS})
target_link_libraries(tvm_runtime ${TVM_RUNTIME_LINKER_LIBS})
-target_link_libraries(nnvm_compiler tvm)
if (HIDE_PRIVATE_SYMBOLS AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(HIDE_SYMBOLS_LINKER_FLAGS "-Wl,--exclude-libs,ALL")
target_link_libraries(tvm ${HIDE_SYMBOLS_LINKER_FLAGS})
target_link_libraries(tvm_topi ${HIDE_SYMBOLS_LINKER_FLAGS})
target_link_libraries(tvm_runtime ${HIDE_SYMBOLS_LINKER_FLAGS})
- target_link_libraries(nnvm_compiler ${HIDE_SYMBOLS_LINKER_FLAGS})
endif()
# Related headers
target_include_directories(
tvm_topi
PUBLIC "topi/include")
-target_include_directories(
- nnvm_compiler
- PUBLIC "nnvm/include"
- PUBLIC "topi/include")
+
# Tests
set(TEST_EXECS "")
install(TARGETS tvm DESTINATION lib${LIB_SUFFIX})
install(TARGETS tvm_topi DESTINATION lib${LIB_SUFFIX})
install(TARGETS tvm_runtime DESTINATION lib${LIB_SUFFIX})
-install(TARGETS nnvm_compiler DESTINATION lib${LIB_SUFFIX})
if (INSTALL_DEV)
install(
FILES_MATCHING
PATTERN "*.h"
)
- install(
- DIRECTORY "nnvm/include/." DESTINATION "include"
- FILES_MATCHING
- PATTERN "*.h"
- )
else(INSTALL_DEV)
install(
DIRECTORY "include/tvm/runtime/." DESTINATION "include/tvm/runtime"
if(MSVC)
target_compile_definitions(tvm PRIVATE -DTVM_EXPORTS)
target_compile_definitions(tvm_runtime PRIVATE -DTVM_EXPORTS)
- target_compile_definitions(nnvm_compiler PRIVATE -DNNVM_EXPORTS)
endif()
cpplint:
python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src
python3 3rdparty/dmlc-core/scripts/lint.py topi cpp topi/include;
- python3 3rdparty/dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src;
python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp include src \
examples/extension/src examples/graph_executor/src
pylint:
python3 -m pylint python/tvm --rcfile=$(ROOTDIR)/tests/lint/pylintrc
python3 -m pylint topi/python/topi --rcfile=$(ROOTDIR)/tests/lint/pylintrc
- python3 -m pylint nnvm/python/nnvm --rcfile=$(ROOTDIR)/tests/lint/pylintrc
python3 -m pylint vta/python/vta --rcfile=$(ROOTDIR)/tests/lint/pylintrc
jnilint:
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
cd topi/python
$PYTHON setup.py install --single-version-externally-managed --record=/tmp/record.txt
cd ../..
-
-cd nnvm/python
-$PYTHON setup.py install --single-version-externally-managed --record=/tmp/record.txt
-cd ../..
imports:
- tvm
- topi
- - nnvm
requires:
- pytest
- scipy
make -j10
# Environment variables
-ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH}
+ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH}
ENV ANDROID_HOME=/opt/android-sdk-linux/
RUN bash /install/install_tvm_cpu.sh
# Environment variables
-ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH}
+ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH}
RUN bash /install/install_tvm_gpu.sh
# Environment variables
-ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH}
+ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH}
ENV PATH=/usr/local/nvidia/bin:${PATH}
ENV PATH=/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}
make -j6
RUN echo "Building Python package"
-ENV PYTHONPATH=${TVM_HOME}/python:${TVM_HOME}/topi/python:${TVM_HOME}/nnvm/python:${PYTHONPATH}
+ENV PYTHONPATH=${TVM_HOME}/python:${TVM_HOME}/topi/python:${PYTHONPATH}
RUN cd ${TVM_HOME}/python && python3 setup.py install --user
RUN cd ${TVM_HOME}/topi/python && python3 setup.py install --user
-RUN cd ${TVM_HOME}/nnvm/python && python3 setup.py install --user
public class GraphRuntime {
/**
* Create a runtime executor module given a graph and module.
- * @param graphJson The graph deployed in json format output by nnvm graph.
+ * @param graphJson The graph deployed in json format output by compiler.
* @param libmod The module of the corresponding function.
* @param ctx The local or remote context to deploy the module.
* @return Runtime graph module that can be used to execute the graph.
export LDFLAGS = -pthread -lm
export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC
-CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/3rdparty/dlpack/include -I$(TVMPATH)/3rdparty/HalideIR/src -I$(TVMPATH)/topi/include
ifdef DMLC_CORE_PATH
CFLAGS += -I$(DMLC_CORE_PATH)/include
NO_WHOLE_ARCH= --no-whole-archive
endif
-all: lib/libnnvm.a lib/libnnvm_compiler.$(SHARED_LIBRARY_SUFFIX)
+all: lib/libnnvm.a lib/libnnvm.$(SHARED_LIBRARY_SUFFIX)
SRC = $(wildcard src/*.cc src/c_api/*.cc src/core/*.cc src/pass/*.cc)
SRC_COMPILER = $(wildcard src/top/*/*.cc wildcard src/top/vision/*/*.cc src/compiler/*.cc src/compiler/*/*.cc)
@mkdir -p $(@D)
$(AR) crv $@ $(filter %.o, $?)
-lib/libnnvm_compiler.$(SHARED_LIBRARY_SUFFIX): lib/libnnvm.a ${TOP_OBJ}
+lib/libnnvm.$(SHARED_LIBRARY_SUFFIX): lib/libnnvm.a ${TOP_OBJ}
@mkdir -p $(@D)
$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o, $^) $(LDFLAGS) -Wl,${WHOLE_ARCH} lib/libnnvm.a -Wl,${NO_WHOLE_ARCH}
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->
-# NNVM Compiler Module of TVM Stack
+# NNVM
-```python
-import tvm
-from tvm.contrib import graph_runtime, rpc
-import nnvm.frontend
-import nnvm.compiler
-
-# GET model from frameworks
-# change xyz to supported framework name.
-graph, params = nnvm.frontend.from_xyz(...)
-
-# OPTIMIZE and COMPILE the graph to get a deployable module
-# target can be "opencl", "llvm", "metal" or any target supported by tvm
-target = "cuda"
-graph, lib, params = nnvm.compiler.build(graph, target, {"data", data_shape}, params=params)
-
-# DEPLOY and run on gpu(0)
-module = graph_runtime.create(graph, lib, tvm.gpu(0))
-module.set_input(**params)
-module.run(data=data_array)
-output = tvm.nd.empty(out_shape, ctx=tvm.gpu(0))
-module.get_output(0, output)
-
-# DEPLOY to REMOTE mobile/rasp/browser with minimum tvm rpc runtime
-# useful for quick experiments on mobile devices
-remote = rpc.connect(remote_host, remote_port)
-lib.export_library("mylib.so")
-remote.upload("mylib.so")
-rlib = rpc.load_module("mylib.so")
-# run on remote device
-rmodule = graph_runtime.create(graph, rlib, remote.gpu(0))
-rmodule.set_input(**params)
-rmodule.run()
-```
+NNVM is a graph level IR for neural networks.
+We are moving towards Relay IR, a better unified IR that support wider range of programs.
+Please use relay instead.
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
/*!\brief "unsafe" getter function of any type */
using dmlc::unsafe_get;
+enum TypeFlag {
+ kFloat32 = 0,
+ kFloat64 = 1,
+ kFloat16 = 2,
+ kUint8 = 3,
+ kInt32 = 4,
+ kInt8 = 5,
+ kInt64 = 6,
+ // kBool = 7,
+ // 7 is reserved for kBool, in order to keep consistency with MXNet TypeFlag defined in
+ // https://github.com/apache/incubator-mxnet/blob/master/3rdparty/mshadow/mshadow/base.h#L314
+ kInt16 = 8,
+ kUint16 = 9,
+ kUint32 = 10,
+ kUint64 = 11,
+ kBfloat16 = 12,
+};
+
} // namespace nnvm
// describe op registration point
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nnvm/compiler/op_attr_types.h
- * \brief The Expr and related elements in DataFlow construction.
- */
-#ifndef NNVM_COMPILER_OP_ATTR_TYPES_H_
-#define NNVM_COMPILER_OP_ATTR_TYPES_H_
-
-#include <tvm/expr.h>
-#include <tvm/tensor.h>
-#include <tvm/schedule.h>
-#include <tvm/packed_func_ext.h>
-#include <tvm/runtime/registry.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/graph.h>
-#include <vector>
-#include <string>
-#include "packed_func_ext.h"
-
-namespace nnvm {
-namespace compiler {
-
-using ::tvm::Array;
-using ::tvm::Tensor;
-using ::tvm::Schedule;
-
-/*! \brief operator pattern used in graph fusion */
-enum OpPatternKind {
- // Elementwise operation
- kElemWise = 0,
- // Broadcasting operator, can always map output axis to the input in order.
- // for example :code:`out[i, ax1, j, ax2] = input[i, j]`.
- // Note that the axis need to be in order so transpose is not a bcast operator.
- kBroadcast = 1,
- // Injective operator, can always injectively map output axis to a single input axis.
- // All injective operator can still be safely fused to injective and reduction.
- kInjective = 2,
- // Communicative reduction operator.
- kCommReduce = 3,
- // Complex operation, can still fuse elemwise operations into its output.
- // but cannot chain another complex op
- kOutEWiseFusable = 4,
- // Opaque operation, cannot fuse anything.
- kOpaque = 8
-};
-
-/*! \brief the operator pattern */
-using TOpPattern = int;
-
-/*!
- * \brief Computation description interface
- * \param attrs The attribute of the node.
- * \param inputs The input tensors(placeholders)
- * \param out_info Tensors holding shape/type information about output,
- & these are always placeholders.
- * \return The output description of the tensor.
- */
-using FTVMCompute = std::function<
- Array<Tensor>(const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info)>;
-
-/*!
- * \brief Build the computation schedule for
- * op whose root is at current op.
- * \param attrs The attribute of the node.
- * \param outs The output tensors.
- * \param target The build target.
- * \return schedule The computation schedule.
- */
-using FTVMSchedule = std::function<
- Schedule(const NodeAttrs& attrs,
- const Array<Tensor>& outs,
- const std::string& target)>;
-
-/*!
- * \brief Modify the op node to alter its input layout.
- * it is invoked in AlterOpLayout pass.
- * \param attrs The attribute of the original node.
- * \param inputs The input symbols of the original node.
- * \param tinfos The inferred shape and dtype of the inputs.
- * \param ret The replaced operator.
- * \return Whether to replace current operator.
- */
-using FTVMAlterOpLayout = std::function<
- bool(const NodeAttrs& attrs,
- const Symbol& inputs,
- const Array<Tensor>& tinfos,
- Symbol* ret)>;
-
-/*!
- * \brief Transform from normal operator to vectorized operator
- * \param node The source node.
- * \return Transformed vectorized op.
- */
-using FTVMVectorizedOp = std::function<nnvm::NodePtr (const nnvm::Node* node)>;
-
-} // namespace compiler
-} // namespace nnvm
-#endif // NNVM_COMPILER_OP_ATTR_TYPES_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nnvm/compiler/packed_func_ext.h
- * \brief Extension to enable packed functionn for nnvm types
- */
-#ifndef NNVM_COMPILER_PACKED_FUNC_EXT_H_
-#define NNVM_COMPILER_PACKED_FUNC_EXT_H_
-
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-#include <nnvm/graph.h>
-#include <nnvm/symbolic.h>
-#include <string>
-#include <vector>
-#include <unordered_map>
-
-namespace nnvm {
-namespace compiler {
-
-using tvm::runtime::PackedFunc;
-
-using AttrDict = std::unordered_map<std::string, std::string>;
-
-/*!
- * \brief Get PackedFunction from global registry and
- * report error if it does not exist
- * \param name The name of the function.
- * \return The created PackedFunc.
- */
-inline const PackedFunc& GetPackedFunc(const std::string& name) {
- const PackedFunc* pf = tvm::runtime::Registry::Get(name);
- CHECK(pf != nullptr) << "Cannot find function " << name << " in registry";
- return *pf;
-}
-} // namespace compiler
-} // namespace nnvm
-
-// Enable the graph and symbol object exchange.
-namespace tvm {
-namespace runtime {
-
-template<>
-struct extension_type_info<nnvm::Symbol> {
- static const int code = 16;
-};
-
-template<>
-struct extension_type_info<nnvm::Graph> {
- static const int code = 17;
-};
-
-template<>
-struct extension_type_info<nnvm::compiler::AttrDict> {
- static const int code = 18;
-};
-
-} // namespace runtime
-} // namespace tvm
-#endif // NNVM_COMPILER_PACKED_FUNC_EXT_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
-* \file nnvm/compiler/util.h
-* \brief Utility functions for nnvm compiler
-*/
-#ifndef NNVM_COMPILER_UTIL_H_
-#define NNVM_COMPILER_UTIL_H_
-
-#include <tvm/expr.h>
-#include <nnvm/tuple.h>
-
-namespace nnvm {
-namespace compiler {
-
-/*
- * \brief Helper function to convert TShape to TVM array. Useful for
- * passing data from NNVM param structures to TOPI ops.
- *
- * \param shape The shape to convert
- *
- * \return An Array of Expr, where each element is a constant int32
- */
-inline tvm::Array<tvm::Expr> ShapeToArray(TShape shape) {
- tvm::Array<tvm::Expr> result;
- for (auto i : shape) {
- result.push_back(tvm::make_const(tvm::DataType::Int(32), i));
- }
- return result;
-}
-
-/*
- * \brief Helper function to convert TShape to TVM array. Useful for
- * passing data from NNVM param structures to TOPI ops.
- *
- * \param shape The shape to convert
- *
- * \return An Array of Expr, where each element is a constant int32
- */
-inline tvm::Array<tvm::Integer> ShapeToIntArray(TShape shape) {
- return tvm::Downcast<tvm::Array<tvm::Integer> >(ShapeToArray(shape));
-}
-} // namespace compiler
-} // namespace nnvm
-#endif // NNVM_COMPILER_UTIL_H_
+++ /dev/null
-NNVM Core Operator and Compiler
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nnvm/top/nn.h
- * \brief Auxiliary param for tensor primitive.
- */
-#ifndef NNVM_TOP_NN_H_
-#define NNVM_TOP_NN_H_
-
-#include <dmlc/base.h>
-#include <dmlc/parameter.h>
-#include <nnvm/tuple.h>
-#include <nnvm/layout.h>
-#include <string>
-#include "tensor.h"
-
-namespace nnvm {
-namespace top {
-
-struct DenseParam : public dmlc::Parameter<DenseParam> {
- int units;
- bool use_bias;
-
- DMLC_DECLARE_PARAMETER(DenseParam) {
- DMLC_DECLARE_FIELD(units).set_lower_bound(1)
- .describe("Number of hidden units of the dense transformation.");
- DMLC_DECLARE_FIELD(use_bias).set_default(true)
- .describe("Whether to use bias parameter");
- }
- // constants
- static const constexpr int kData = 0;
- static const constexpr int kWeight = 1;
- static const constexpr int kBias = 2;
-};
-
-struct DropoutParam : public dmlc::Parameter<DropoutParam> {
- float rate;
-
- DMLC_DECLARE_PARAMETER(DropoutParam) {
- DMLC_DECLARE_FIELD(rate).set_default(0.5)
- .set_range(0, 1)
- .describe("Fraction of the input that gets dropped out during training time.");
- }
-};
-
-struct BatchNormParam : public dmlc::Parameter<BatchNormParam> {
- int axis;
- double epsilon;
- double momentum;
- bool center;
- bool scale;
-
- DMLC_DECLARE_PARAMETER(BatchNormParam) {
- DMLC_DECLARE_FIELD(axis).set_default(1)
- .describe("Specify which shape axis the channel is specified.");
- DMLC_DECLARE_FIELD(epsilon).set_default(1e-5)
- .describe("Small float added to variance to avoid dividing by zero.");
- DMLC_DECLARE_FIELD(center).set_default(true)
- .describe("If True, add offset of `beta` to normalized tensor."
- "If False, `beta` is ignored.");
- DMLC_DECLARE_FIELD(scale).set_default(true)
- .describe("If True, multiply by `gamma`. If False, `gamma` is not used."
- "When the next layer is piecewise linear (also e.g. `nn.relu`),"
- "this can be disabled since the scaling"
- "will be done by the next layer.");
- }
- // constants
- static const constexpr int kData = 0;
- static const constexpr int kGamma = 1;
- static const constexpr int kBeta = 2;
- static const constexpr int kMovingMean = 3;
- static const constexpr int kMovingVariance = 4;
-};
-
-
-// Shared by softmax and log_softmax
-struct SoftmaxParam : public dmlc::Parameter<SoftmaxParam> {
- int axis;
-
- DMLC_DECLARE_PARAMETER(SoftmaxParam) {
- DMLC_DECLARE_FIELD(axis).set_default(-1)
- .describe("The axis to sum over when computing softmax.");
- }
-};
-
-struct LeakyReLUParam : public dmlc::Parameter<LeakyReLUParam> {
- double alpha;
-
- DMLC_DECLARE_PARAMETER(LeakyReLUParam) {
- DMLC_DECLARE_FIELD(alpha).set_lower_bound(0.0).set_default(0.25)
- .describe("slope coefficient for the negative half axis.");
- }
-};
-
-struct PReLUParam : public dmlc::Parameter<PReLUParam> {
- int axis;
- DMLC_DECLARE_PARAMETER(PReLUParam) {
- DMLC_DECLARE_FIELD(axis).set_default(1)
- .describe("Specify which shape axis the channel is specified.");
- }
-};
-
-struct PadParam : public dmlc::Parameter<PadParam> {
- float pad_value;
- Tuple<Tuple<int> > pad_width;
-
- DMLC_DECLARE_PARAMETER(PadParam) {
- DMLC_DECLARE_FIELD(pad_value).set_default(0.0)
- .describe("The value to be padded.");
- DMLC_DECLARE_FIELD(pad_width)
- .describe("Number of values padded to the edges of each axis, "
- "in the format of ((before_1, after_1), ... (before_N, after_N))");
- }
-};
-
-
-struct Conv2DParam : public dmlc::Parameter<Conv2DParam> {
- int channels;
- TShape kernel_size;
- TShape strides;
- TShape padding;
- TShape dilation;
- int groups;
- std::string layout;
- std::string kernel_layout;
- std::string out_layout;
- int out_dtype;
- bool use_bias;
-
- DMLC_DECLARE_PARAMETER(Conv2DParam) {
- DMLC_DECLARE_FIELD(channels)
- .describe("The dimensionality of the output space"
- "i.e. the number of output channels in the convolution.");
- DMLC_DECLARE_FIELD(kernel_size)
- .describe("Specifies the dimensions of the convolution window.");
- DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
- .describe("Specifies the strides of the convolution.");
- DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
- .describe("If padding is non-zero, then the input is implicitly zero-padded"
- "on both sides for padding number of points");
- DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1}))
- .describe("Specifies the dilation rate to use for dilated convolution.");
- DMLC_DECLARE_FIELD(groups).set_default(1)
- .describe("Controls the connections between inputs and outputs."
- "At groups=1, all inputs are convolved to all outputs."
- "At groups=2, the operation becomes equivalent to having two convolution"
- "layers side by side, each seeing half the input channels, and producing"
- "half the output channels, and both subsequently concatenated.");
- DMLC_DECLARE_FIELD(layout).set_default("NCHW")
- .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Convolution is applied on the 'H' and"
- "'W' dimensions.");
- DMLC_DECLARE_FIELD(out_layout).set_default("__undef__")
- .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Default to be same as input layout.");
- DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW")
- .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
- "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
- "dimensions respectively.");
- DMLC_DECLARE_DTYPE_FIELD(out_dtype)
- .add_enum("same", -1)
- .set_default(-1)
- .describe("Output data type, set to explicit type under mixed precision setting");
-
- DMLC_DECLARE_FIELD(use_bias).set_default(true)
- .describe("Whether the layer uses a bias vector.");
- }
- // constants
- static const constexpr int kData = 0;
- static const constexpr int kWeight = 1;
- static const constexpr int kBias = 2;
-};
-
-struct WinogradWeightTransformParam : public dmlc::Parameter<WinogradWeightTransformParam> {
- int tile_size;
-
- DMLC_DECLARE_PARAMETER(WinogradWeightTransformParam) {
- DMLC_DECLARE_FIELD(tile_size)
- .describe("Tile size of winograd. E.g. 2 for F(2x2, 3x3) and 4 for F(4x4, 3x3)");
- }
-
- static const constexpr int kWeight = 0;
-};
-
-struct WinogradNNPACKWeightTransformParam
- : public dmlc::Parameter<WinogradNNPACKWeightTransformParam> {
- int convolution_algorithm;
- int out_dtype;
-
- DMLC_DECLARE_PARAMETER(WinogradNNPACKWeightTransformParam) {
- DMLC_DECLARE_FIELD(convolution_algorithm)
- .describe(
- "The convolution algorithm for Winograd NNPACK. "
- "E.g. tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8 for WT_8x8, "
- "tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8_FP16 for WT_8x8_FP16");
- DMLC_DECLARE_DTYPE_FIELD(out_dtype)
- .add_enum("same", -1)
- .set_default(-1)
- .describe("Output data type, set to explicit type under mixed precision setting");
- }
-
- static const constexpr int kWeight = 0;
-};
-
-struct WinogradConv2DParam : public dmlc::Parameter<WinogradConv2DParam> {
- int channels;
- TShape kernel_size;
- TShape strides;
- TShape padding;
- TShape dilation;
- int groups;
- std::string layout;
- std::string kernel_layout;
- std::string out_layout;
- int out_dtype;
- bool use_bias;
- int tile_size;
-
- DMLC_DECLARE_PARAMETER(WinogradConv2DParam) {
- DMLC_DECLARE_FIELD(channels)
- .describe("The dimensionality of the output space"
- "i.e. the number of output channels in the convolution.");
- DMLC_DECLARE_FIELD(kernel_size)
- .describe("Specifies the dimensions of the convolution window.");
- DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
- .describe("Specifies the strides of the convolution.");
- DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
- .describe("If padding is non-zero, then the input is implicitly zero-padded"
- "on both sides for padding number of points");
- DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1}))
- .describe("Specifies the dilation rate to use for dilated convolution.");
- DMLC_DECLARE_FIELD(groups).set_default(1)
- .describe("Controls the connections between inputs and outputs."
- "At groups=1, all inputs are convolved to all outputs."
- "At groups=2, the operation becomes equivalent to having two convolution"
- "layers side by side, each seeing half the input channels, and producing"
- "half the output channels, and both subsequently concatenated.");
- DMLC_DECLARE_FIELD(layout).set_default("NCHW")
- .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Convolution is applied on the 'H' and"
- "'W' dimensions.");
- DMLC_DECLARE_FIELD(out_layout).set_default("__undef__")
- .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Default to be same as input layout.");
- DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW")
- .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
- "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
- "dimensions respectively.");
- DMLC_DECLARE_DTYPE_FIELD(out_dtype)
- .add_enum("same", -1)
- .set_default(-1)
- .describe("Output data type, set to explicit type under mixed precision setting");
- DMLC_DECLARE_FIELD(use_bias).set_default(true)
- .describe("Whether the layer uses a bias vector.");
- DMLC_DECLARE_FIELD(tile_size)
- .describe("Tile size of winograd. E.g. 2 for F(2x2, 3x3) and 4 for F(4x4, 3x3)");
- }
- // constants
- static const constexpr int kData = 0;
- static const constexpr int kWeight = 1;
- static const constexpr int kBias = 2;
-};
-
-struct Conv2DTransposeParam : public dmlc::Parameter<Conv2DTransposeParam> {
- int channels;
- TShape kernel_size;
- TShape strides;
- TShape padding;
- TShape output_padding;
- TShape dilation;
- int groups;
- std::string layout;
- std::string kernel_layout;
- int out_dtype;
- bool use_bias;
-
- DMLC_DECLARE_PARAMETER(Conv2DTransposeParam) {
- DMLC_DECLARE_FIELD(channels)
- .describe("The dimensionality of the output space"
- "i.e. the number of output channels in the convolution.");
- DMLC_DECLARE_FIELD(kernel_size)
- .describe("Specifies the dimensions of the convolution window.");
- DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
- .describe("Specifies the strides of the convolution.");
- DMLC_DECLARE_FIELD(output_padding).set_default(TShape({0, 0}))
- .describe("Zero-padding added to one side of the output.");
- DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
- .describe("If padding is non-zero, then the input is implicitly zero-padded"
- "on both sides for padding number of points");
- DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1}))
- .describe("Specifies the dilation rate to use for dilated convolution.");
- DMLC_DECLARE_FIELD(groups).set_default(1)
- .describe("Controls the connections between inputs and outputs."
- "At groups=1, all inputs are convolved to all outputs."
- "At groups=2, the operation becomes equivalent to having two convolution"
- "layers side by side, each seeing half the input channels, and producing"
- "half the output channels, and both subsequently concatenated.");
- DMLC_DECLARE_FIELD(layout).set_default("NCHW")
- .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Convolution is applied on the 'H' and"
- "'W' dimensions.");
- DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW")
- .describe("Dimension ordering of data and weight. Can be 'OIHW', 'OIHW16o16i', etc."
- "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
- "dimensions respectively.");
- DMLC_DECLARE_DTYPE_FIELD(out_dtype)
- .add_enum("same", -1)
- .set_default(-1)
- .describe("Output data type, set to explicit type under mixed precision setting");
- DMLC_DECLARE_FIELD(use_bias).set_default(true)
- .describe("Whether the layer uses a bias vector.");
- }
- // constants
- static const constexpr int kData = 0;
- static const constexpr int kWeight = 1;
- static const constexpr int kBias = 2;
-};
-
-
-struct MaxPool2DParam : public dmlc::Parameter<MaxPool2DParam> {
- TShape pool_size;
- TShape strides;
- TShape padding;
- std::string layout;
- bool ceil_mode;
-
- DMLC_DECLARE_PARAMETER(MaxPool2DParam) {
- DMLC_DECLARE_FIELD(pool_size)
- .describe("Size of the pooling windows..");
- DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
- .describe("Specifies the strides of the convolution.");
- DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
- .describe("If padding is non-zero, then the input is implicitly zero-padded"
- "Padding support both symmetric and asymmetric as"
- "one int : same padding used on all sides"
- "two int : bottom, right will use same padding as top, left"
- "four int : padding width in the order of (top, left, bottom, right)");
- DMLC_DECLARE_FIELD(layout).set_default("NCHW")
- .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Convolution is applied on the 'H' and"
- "'W' dimensions.");
- DMLC_DECLARE_FIELD(ceil_mode).set_default(false)
- .describe("When true, will use ceil instead of floor to compute the output shape.");
- }
-};
-
-
-struct AvgPool2DParam : public dmlc::Parameter<AvgPool2DParam> {
- TShape pool_size;
- TShape strides;
- TShape padding;
- std::string layout;
- bool ceil_mode;
- bool count_include_pad;
-
- DMLC_DECLARE_PARAMETER(AvgPool2DParam) {
- DMLC_DECLARE_FIELD(pool_size)
- .describe("Size of the pooling windows..");
- DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
- .describe("Specifies the strides of the convolution.");
- DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
- .describe("If padding is non-zero, then the input is implicitly zero-padded"
- "Padding support both symmetric and asymmetric as"
- "one int : same padding used on all sides"
- "two int : bottom, right will use same padding as top, left"
- "four int : padding width in the order of (top, left, bottom, right)");
- DMLC_DECLARE_FIELD(layout).set_default("NCHW")
- .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Convolution is applied on the 'H' and"
- "'W' dimensions.");
- DMLC_DECLARE_FIELD(ceil_mode).set_default(false)
- .describe("When true, will use ceil instead of floor to compute the output shape.");
- DMLC_DECLARE_FIELD(count_include_pad).set_default(false)
- .describe("When true, will include padding to compute the average");
- }
-};
-
-
-struct GlobalPool2DParam : public dmlc::Parameter<GlobalPool2DParam> {
- std::string layout;
-
- DMLC_DECLARE_PARAMETER(GlobalPool2DParam) {
- DMLC_DECLARE_FIELD(layout).set_default("NCHW")
- .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Convolution is applied on the 'H' and"
- "'W' dimensions.");
- }
-};
-
-struct UpSamplingParam : public dmlc::Parameter<UpSamplingParam> {
- int scale;
- std::string layout;
- std::string method;
-
- DMLC_DECLARE_PARAMETER(UpSamplingParam) {
- DMLC_DECLARE_FIELD(scale)
- .describe("upsampling scaling factor");
- DMLC_DECLARE_FIELD(layout)
- .set_default("NCHW")
- .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Upsampling is applied on the 'H' and"
- "'W' dimensions.");
- DMLC_DECLARE_FIELD(method)
- .set_default("NEAREST_NEIGHBOR")
- .describe("Specify the mode to use for scaling."
- "NEAREST_NEIGHBOR - Nearest Neighbor"
- "BILINEAR - Bilinear Interpolation");
- }
-};
-
-struct LayoutTransformParam : public dmlc::Parameter<LayoutTransformParam> {
- std::string src_layout;
- std::string dst_layout;
-
- DMLC_DECLARE_PARAMETER(LayoutTransformParam) {
- DMLC_DECLARE_FIELD(src_layout).set_default("__undef__")
- .describe("Dimension ordering of data");
- DMLC_DECLARE_FIELD(dst_layout).set_default("__undef__")
- .describe("Dimension ordering of data.");
- }
-};
-
-struct MultiBoxPriorParam : public dmlc::Parameter<MultiBoxPriorParam> {
- Tuple<float> sizes;
- Tuple<float> ratios;
- Tuple<float> steps;
- Tuple<float> offsets;
- bool clip;
-
- DMLC_DECLARE_PARAMETER(MultiBoxPriorParam) {
- DMLC_DECLARE_FIELD(sizes).set_default(Tuple<float>({1.0}))
- .describe("List of sizes of generated MultiBoxPriores.");
- DMLC_DECLARE_FIELD(ratios).set_default(Tuple<float>({1.0}))
- .describe("List of aspect ratios of generated MultiBoxPriores.");
- DMLC_DECLARE_FIELD(steps).set_default(Tuple<float>({-1.0, -1.0}))
- .describe("Priorbox step across y and x, -1 for auto calculation.");
- DMLC_DECLARE_FIELD(offsets).set_default(Tuple<float>({0.5, 0.5}))
- .describe("Priorbox center offsets, y and x respectively.");
- DMLC_DECLARE_FIELD(clip).set_default(false)
- .describe("Whether to clip out-of-boundary boxes.");
- }
-};
-
-struct MultiBoxTransformLocParam : public dmlc::Parameter<MultiBoxTransformLocParam> {
- bool clip;
- float threshold;
- Tuple<float> variances;
- DMLC_DECLARE_PARAMETER(MultiBoxTransformLocParam) {
- DMLC_DECLARE_FIELD(clip).set_default(true)
- .describe("Clip out-of-boundary boxes.");
- DMLC_DECLARE_FIELD(threshold).set_default(0.01)
- .describe("Threshold to be a positive prediction.");
- DMLC_DECLARE_FIELD(variances).set_default(Tuple<float>({0.1f, 0.1f, 0.2f, 0.2f}))
- .describe("Variances to be decoded from box regression output.");
- }
-};
-
-struct NonMaximumSuppressionParam : public dmlc::Parameter<NonMaximumSuppressionParam> {
- bool return_indices;
- float iou_threshold;
- bool force_suppress;
- int top_k;
- int id_index;
- int coord_start;
- int score_index;
- int max_output_size;
- bool invalid_to_bottom;
- DMLC_DECLARE_PARAMETER(NonMaximumSuppressionParam) {
- DMLC_DECLARE_FIELD(max_output_size).set_default(-1)
- .describe("Max number of output valid boxes for each instance."
- "By default all valid boxes are returned.");
- DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5)
- .describe("Non-maximum suppression threshold.");
- DMLC_DECLARE_FIELD(force_suppress).set_default(false)
- .describe("Suppress all detections regardless of class_id.");
- DMLC_DECLARE_FIELD(top_k).set_default(-1)
- .describe("Keep maximum top k detections before nms, -1 for no limit.");
- DMLC_DECLARE_FIELD(coord_start).set_default(2)
- .describe("Start index of the consecutive 4 coordinates.");
- DMLC_DECLARE_FIELD(score_index).set_default(1)
- .describe("Index of the scores/confidence of boxes.");
- DMLC_DECLARE_FIELD(id_index).set_default(0)
- .describe("Axis index of id.");
- DMLC_DECLARE_FIELD(return_indices).set_default(true)
- .describe("Whether to return box indices in input data.");
- DMLC_DECLARE_FIELD(invalid_to_bottom).set_default(false)
- .describe("Whether to move all invalid bounding boxes to the bottom.");
- }
-};
-
-struct LRNParam : public dmlc::Parameter<LRNParam> {
- int size;
- int axis;
- float alpha;
- float beta;
- float bias;
-
- DMLC_DECLARE_PARAMETER(LRNParam) {
- DMLC_DECLARE_FIELD(size)
- .describe("The size of the local region to be considered for normalization.");
- DMLC_DECLARE_FIELD(axis)
- .describe("input data layout channel axis");
- DMLC_DECLARE_FIELD(alpha)
- .describe("The scaling parameter.");
- DMLC_DECLARE_FIELD(beta)
- .describe("The exponent parameter.");
- DMLC_DECLARE_FIELD(bias)
- .describe("The offset parameter.");
- }
- // constants
- static const constexpr int kData = 0;
-};
-
-struct L2NormalizeParam : public dmlc::Parameter<L2NormalizeParam> {
- float eps;
- Tuple<int> axis;
-
- DMLC_DECLARE_PARAMETER(L2NormalizeParam) {
- DMLC_DECLARE_FIELD(eps)
- .describe("float type epsilon value.");
- DMLC_DECLARE_FIELD(axis)
- .describe("axis over the normalization applied");
- }
-};
-
-} // namespace top
-} // namespace nnvm
-
-#endif // NNVM_TOP_NN_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nnvm/top/tensor.h
- * \brief Auxiliary param for tensor primitive.
- */
-#ifndef NNVM_TOP_TENSOR_H_
-#define NNVM_TOP_TENSOR_H_
-
-#include <dmlc/base.h>
-#include <dmlc/parameter.h>
-#include <nnvm/tuple.h>
-
-namespace nnvm {
-namespace top {
-
-struct ConcatenateParam : public dmlc::Parameter<ConcatenateParam> {
- int axis;
- DMLC_DECLARE_PARAMETER(ConcatenateParam) {
- DMLC_DECLARE_FIELD(axis).set_default(1)
- .describe("the axis to be concated.");
- }
-};
-
-struct ExpandDimsParam : public dmlc::Parameter<ExpandDimsParam> {
- int axis;
- int num_newaxis;
- DMLC_DECLARE_PARAMETER(ExpandDimsParam) {
- DMLC_DECLARE_FIELD(axis)
- .describe("the axis to be expanded.");
- DMLC_DECLARE_FIELD(num_newaxis).set_lower_bound(1).set_default(1)
- .describe("Number of new axis to be inserted.");
- }
-};
-
-struct SplitParam : public dmlc::Parameter<SplitParam> {
- // numpy convention, only support indices, not support list.
- Tuple<int> indices_or_sections;
- int axis;
- // additional hint whether it is equal_split mode
- // deduced from indices_or_sections
- bool equal_split;
-
- DMLC_DECLARE_PARAMETER(SplitParam) {
- DMLC_DECLARE_FIELD(indices_or_sections)
- .describe("Number of outputs to be splitted");
- DMLC_DECLARE_FIELD(axis).set_default(1)
- .describe("the axis to be splitted.");
- }
-};
-
-
-struct TakeParam : public dmlc::Parameter<TakeParam> {
- dmlc::optional<int> axis;
-
- DMLC_DECLARE_PARAMETER(TakeParam) {
- DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional<int>())
- .describe("the axis over which to select values.");
- }
-};
-
-struct StridedSliceParam : public dmlc::Parameter<StridedSliceParam> {
- // numpy convention, only support indices, not support list.
- Tuple<int64_t> begin;
- Tuple<int64_t> end;
- Tuple<int64_t> stride;
-
- DMLC_DECLARE_PARAMETER(StridedSliceParam) {
- DMLC_DECLARE_FIELD(begin)
- .describe("Indices for begin of slice");
- DMLC_DECLARE_FIELD(end)
- .describe("Indices for end of the slice");
- DMLC_DECLARE_FIELD(stride).set_default(Tuple<int64_t>())
- .describe("Stride values of the slice");
- }
-};
-
-enum TypeFlag {
- kFloat32 = 0,
- kFloat64 = 1,
- kFloat16 = 2,
- kUint8 = 3,
- kInt32 = 4,
- kInt8 = 5,
- kInt64 = 6,
- // kBool = 7,
- // 7 is reserved for kBool, in order to keep consistency with MXNet TypeFlag defined in
- // https://github.com/apache/incubator-mxnet/blob/master/3rdparty/mshadow/mshadow/base.h#L314
- kInt16 = 8,
- kUint16 = 9,
- kUint32 = 10,
- kUint64 = 11,
- kBfloat16 = 12,
-};
-
-enum IndicatorRuleFlag {
- kGT0 = 0,
- kLT0 = 1,
- kMax = 2,
- kMin = 3,
-};
-
-#define DMLC_DECLARE_DTYPE_FIELD(name) \
- DMLC_DECLARE_FIELD(name) \
- .add_enum("float16", kFloat16) \
- .add_enum("float32", kFloat32) \
- .add_enum("float64", kFloat64) \
- .add_enum("uint8", kUint8) \
- .add_enum("uint16", kUint16) \
- .add_enum("uint32", kUint32) \
- .add_enum("uint64", kUint64) \
- .add_enum("int8", kInt8) \
- .add_enum("int16", kInt16) \
- .add_enum("int32", kInt32) \
- .add_enum("int64", kInt64) \
- .add_enum("bfloat16", kBfloat16)
-
-struct CastParam : public dmlc::Parameter<CastParam> {
- int dtype;
- DMLC_DECLARE_PARAMETER(CastParam) {
- DMLC_DECLARE_DTYPE_FIELD(dtype)
- .describe("Output data type.");
- }
-};
-
-struct IndicatorParam : public dmlc::Parameter<IndicatorParam> {
- TShape axis;
- bool exclude;
- DMLC_DECLARE_PARAMETER(IndicatorParam) {
- DMLC_DECLARE_FIELD(axis).set_default(TShape())
- .describe(R"code(The axis or axes along which to perform the indicator rule.
-
- The default, `axis=()`, will compute over all elements into a
- scalar array with shape `(1,)`.
-
- If `axis` is int, rule is applied on a particular axis.
-
- If `axis` is a tuple of ints, rule is applied on all the axes
- specified in the tuple.
-
- If `exclude` is true, rule will be applied on the axes that are
- NOT in axis instead.)code");
- DMLC_DECLARE_FIELD(exclude).set_default(false)
- .describe("Whether to apply rule on axis that are NOT in axis instead.");
- }
-};
-
-struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
- Tuple<int64_t> shape;
-
- DMLC_DECLARE_PARAMETER(ReshapeParam) {
- DMLC_DECLARE_FIELD(shape);
- }
-};
-
-struct SqueezeParam : public dmlc::Parameter<SqueezeParam> {
- TShape axis;
-
- DMLC_DECLARE_PARAMETER(SqueezeParam) {
- DMLC_DECLARE_FIELD(axis).set_default(TShape())
- .describe("The axis to squeeze in the input tensor.");
- }
-};
-
-struct ScalarParam : public dmlc::Parameter<ScalarParam> {
- double scalar;
-
- DMLC_DECLARE_PARAMETER(ScalarParam) {
- DMLC_DECLARE_FIELD(scalar);
- }
-};
-
-struct FillValueParam : public dmlc::Parameter<FillValueParam> {
- double fill_value;
-
- DMLC_DECLARE_PARAMETER(FillValueParam) {
- DMLC_DECLARE_FIELD(fill_value)
- .describe("Scalar value to be filled");
- }
-};
-
-struct TransposeParam : public dmlc::Parameter<TransposeParam> {
- TShape axes;
-
- DMLC_DECLARE_PARAMETER(TransposeParam) {
- DMLC_DECLARE_FIELD(axes).set_default(TShape())
- .describe("Target axis order. By default the axes will be inverted.");
- }
-};
-
-struct FlipParam : public dmlc::Parameter<FlipParam> {
- int axis;
- DMLC_DECLARE_PARAMETER(FlipParam) {
- DMLC_DECLARE_FIELD(axis).set_default(0)
- .describe("the axis to be reveresed.");
- }
-};
-
-struct BroadcastToParam : public dmlc::Parameter<BroadcastToParam> {
- TShape shape;
-
- DMLC_DECLARE_PARAMETER(BroadcastToParam) {
- DMLC_DECLARE_FIELD(shape).set_default(TShape())
- .describe("The shape of the desired array."
- " We can set the dim to zero if it's same as the original."
- " E.g `A = broadcast_to(B, shape=(10, 0, 0))` ");
- }
-};
-
-struct ReduceParam : public dmlc::Parameter<ReduceParam> {
- TShape axis;
- bool keepdims;
- bool exclude;
- int dtype;
-
- DMLC_DECLARE_PARAMETER(ReduceParam) {
- DMLC_DECLARE_FIELD(axis).set_default(TShape())
- .describe(R"code(The axis or axes along which to perform the reduction.
-
- The default, `axis=()`, will compute over all elements into a
- scalar array with shape `(1,)`.
-
- If `axis` is int, a reduction is performed on a particular axis.
-
- If `axis` is a tuple of ints, a reduction is performed on all the axes
- specified in the tuple.
-
- If `exclude` is true, reduction will be performed on the axes that are
- NOT in axis instead.)code");
-
- DMLC_DECLARE_FIELD(keepdims).set_default(false)
- .describe("If this is set to `True`, the reduced axes are left "
- "in the result as dimension with size one.");
- DMLC_DECLARE_FIELD(exclude).set_default(false)
- .describe("Whether to perform reduction on axis that are NOT in axis instead.");
- DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kInt32)
- .describe("Target data type.");
- }
-};
-
-struct InitOpWithScalarParam : public dmlc::Parameter<InitOpWithScalarParam> {
- TShape shape;
- int dtype;
- double fill_value;
-
- DMLC_DECLARE_PARAMETER(InitOpWithScalarParam) {
- DMLC_DECLARE_FIELD(shape).set_default(TShape());
- DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
- .describe("Target data type.");
- DMLC_DECLARE_FIELD(fill_value).describe("Scalar value to fill");
- }
-};
-
-struct InitOpParam : public dmlc::Parameter<InitOpParam> {
- TShape shape;
- int dtype;
-
- DMLC_DECLARE_PARAMETER(InitOpParam) {
- DMLC_DECLARE_FIELD(shape).set_default(TShape());
- DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
- .describe("Target data type.");
- }
-};
-
-struct ElementWiseReduceParam : public dmlc::Parameter<ElementWiseReduceParam> {
- int num_args;
- DMLC_DECLARE_PARAMETER(ElementWiseReduceParam) {
- DMLC_DECLARE_FIELD(num_args).set_lower_bound(1)
- .describe("Number of inputs to be reduced.");
- }
-};
-
-struct MatMulParam : public dmlc::Parameter<MatMulParam> {
- bool transpose_a;
- bool transpose_b;
-
- DMLC_DECLARE_PARAMETER(MatMulParam) {
- DMLC_DECLARE_FIELD(transpose_a)
- .describe("If true then transpose the first input before dot.")
- .set_default(false);
- DMLC_DECLARE_FIELD(transpose_b)
- .describe("If true then transpose the second input before dot.")
- .set_default(false);
- }
-};
-
-struct ClipParam : public dmlc::Parameter<ClipParam> {
- double a_min, a_max;
- DMLC_DECLARE_PARAMETER(ClipParam) {
- DMLC_DECLARE_FIELD(a_min)
- .describe("Minimum value such that value smaller then this will be clipped.");
- DMLC_DECLARE_FIELD(a_max)
- .describe("Maximum value such that value larger then this will be clipped.");
- }
-};
-
-struct SliceLikeParam : public dmlc::Parameter<SliceLikeParam> {
- Tuple<int> axis;
- DMLC_DECLARE_PARAMETER(SliceLikeParam) {
- DMLC_DECLARE_FIELD(axis).set_default(Tuple<int>())
- .describe("List of axes on which input data will be sliced according to the "
- "corresponding size of the second input. By default will slice "
- "on all axes. Negative axes are supported.");
- }
-};
-
-} // namespace top
-} // namespace nnvm
-
-#endif // NNVM_TOP_TENSOR_H_
+++ /dev/null
-*.c
-*.cpp
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#!/usr/bin/env python
-# coding: utf-8
-"""NNVM python API for ease of use and help new framework establish python API. """
-from __future__ import absolute_import as _abs
-import warnings
-
-from . import _base
-from . import symbol as sym
-from . import symbol
-from ._base import NNVMError
-from . import frontend
-
-__version__ = _base.__version__
-
-warnings.warn("NNVM is deprecated and will be removed in a future version. Use Relay instead.",
- FutureWarning)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-# pylint: disable=invalid-name, unused-import
-""" ctypes library of nnvm and helper functions """
-from __future__ import absolute_import
-
-import os
-import sys
-import ctypes
-import numpy as np
-from . import libinfo
-
-try:
- import tvm
-except ImportError:
- pass
-
-#----------------------------
-# library loading
-#----------------------------
-if sys.version_info[0] == 3:
- string_types = str
- numeric_types = (float, int, np.float32, np.int32)
- # this function is needed for python3
- # to convert ctypes.char_p .value back to python str
- py_str = lambda x: x.decode('utf-8')
-else:
- string_types = basestring
- numeric_types = (float, int, long, np.float32, np.int32)
- py_str = lambda x: x
-
-
-class NNVMError(Exception):
- """Error that will be throwed by all nnvm functions"""
-
-
-def _load_lib():
- """Load libary by searching possible path."""
- lib_path = libinfo.find_lib_path()
- lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_LOCAL)
- # DMatrix functions
- lib.NNGetLastError.restype = ctypes.c_char_p
- return lib
-
-# version number
-__version__ = libinfo.__version__
-# library instance of nnvm
-_LIB = _load_lib()
-# The FFI mode of TVM
-_FFI_MODE = os.environ.get("TVM_FFI", "auto")
-
-# type definitions
-nn_uint = ctypes.c_uint
-OpHandle = ctypes.c_void_p
-SymbolHandle = ctypes.c_void_p
-GraphHandle = ctypes.c_void_p
-
-# Global dict of str to symbol to initialize variables
-_all_var_init = {}
-
-#----------------------------
-# helper function definition
-#----------------------------
-def check_call(ret):
- """Check the return value of C API call
-
- This function will raise exception when error occurs.
- Wrap every API call with this function
-
- Parameters
- ----------
- ret : int
- return value from API calls
- """
- if ret != 0:
- raise NNVMError(py_str(_LIB.NNGetLastError()))
-
-def c_str(string):
- """Create ctypes char * from a python string
- Parameters
- ----------
- string : string type
- python string
-
- Returns
- -------
- str : c_char_p
- A char pointer that can be passed to C API
- """
- return ctypes.c_char_p(string.encode('utf-8'))
-
-
-def c_array(ctype, values):
- """Create ctypes array from a python array
-
- Parameters
- ----------
- ctype : ctypes data type
- data type of the array we want to convert to
-
- values : tuple or list
- data content
-
- Returns
- -------
- out : ctypes array
- Created ctypes array
- """
- return (ctype * len(values))(*values)
-
-def ctypes2buffer(cptr, length):
- """Convert ctypes pointer to buffer type.
-
- Parameters
- ----------
- cptr : ctypes.POINTER(ctypes.c_char)
- pointer to the raw memory region
- length : int
- the length of the buffer
-
- Returns
- -------
- buffer : bytearray
- The raw byte memory buffer
- """
- if not isinstance(cptr, ctypes.POINTER(ctypes.c_char)):
- raise TypeError('expected char pointer')
- res = bytearray(length)
- rptr = (ctypes.c_char * length).from_buffer(res)
- if not ctypes.memmove(rptr, cptr, length):
- raise RuntimeError('memmove failed')
- return res
-
-def ctypes2numpy_shared(cptr, shape):
- """Convert a ctypes pointer to a numpy array
-
- The result numpy array shares the memory with the pointer
-
- Parameters
- ----------
- cptr : ctypes.POINTER(mx_float)
- pointer to the memory region
-
- shape : tuple
- shape of target ndarray
-
- Returns
- -------
- out : numpy_array
- A numpy array : numpy array
- """
- if not isinstance(cptr, ctypes.POINTER(mx_float)):
- raise RuntimeError('expected float pointer')
- size = 1
- for s in shape:
- size *= s
- dbuffer = (mx_float * size).from_address(ctypes.addressof(cptr.contents))
- return np.frombuffer(dbuffer, dtype=np.float32).reshape(shape)
-
-
-def ctypes2docstring(num_args, arg_names, arg_types, arg_descs, remove_dup=True):
- """Convert ctypes returned doc string information into parameters docstring.
-
- num_args : nn_uint
- Number of arguments.
-
- arg_names : ctypes.POINTER(ctypes.c_char_p)
- Argument names.
-
- arg_types : ctypes.POINTER(ctypes.c_char_p)
- Argument type information.
-
- arg_descs : ctypes.POINTER(ctypes.c_char_p)
- Argument description information.
-
- remove_dup : boolean, optional
- Whether remove duplication or not.
-
- Returns
- -------
- docstr : str
- Python docstring of parameter sections.
- """
- param_keys = set()
- param_str = []
- for i in range(num_args.value):
- key = py_str(arg_names[i])
- if key in param_keys and remove_dup:
- continue
- param_keys.add(key)
- type_info = py_str(arg_types[i])
- ret = '%s : %s' % (key, type_info)
- if arg_descs[i]:
- ret += '\n ' + py_str(arg_descs[i])
- param_str.append(ret)
- doc_str = ('Parameters\n' +
- '----------\n' +
- '%s\n')
- doc_str = doc_str % ('\n'.join(param_str))
- return doc_str
+++ /dev/null
-Ctypes specific implementation of certain modules
\ No newline at end of file
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-""""ctypes implementation of the Symbol"""
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-# pylint: disable=invalid-name, protected-access, too-many-arguments, too-many-lines,
-# pylint: disable=len-as-condition, consider-iterating-dictionary
-"""Symbolic configuration API."""
-from __future__ import absolute_import as _abs
-
-import copy
-import ctypes
-import sys
-from .._base import _LIB
-from .._base import c_array, c_str, nn_uint, py_str
-from .._base import SymbolHandle, OpHandle
-from .._base import check_call, ctypes2docstring
-from ..name import NameManager
-from ..attribute import AttrScope
-
-class SymbolBase(object):
- """Symbol is symbolic graph."""
- __slots__ = ["handle"]
- # pylint: disable=no-member
- def __init__(self, handle):
- """Initialize the function with handle
-
- Parameters
- ----------
- handle : SymbolHandle
- the handle to the underlying C++ Symbol
- """
- self.handle = handle
-
- def __del__(self):
- check_call(_LIB.NNSymbolFree(self.handle))
-
- def __call__(self, *args, **kwargs):
- """Invoke symbol as function on inputs.
-
- Parameters
- ----------
- args:
- provide positional arguments
-
- kwargs:
- provide keyword arguments
- Returns
- -------
- the resulting symbol
- """
- s = copy.deepcopy(self)
- s._compose(*args, **kwargs)
- return s
-
- def _compose(self, *args, **kwargs):
- """Compose symbol on inputs.
-
- This call mutates the current symbol.
-
- Parameters
- ----------
- args:
- provide positional arguments
-
- kwargs:
- provide keyword arguments
-
- Returns
- -------
- the resulting symbol
- """
- name = kwargs.pop('name', None)
-
- if name:
- name = c_str(name)
- if len(args) != 0 and len(kwargs) != 0:
- raise TypeError('compose only accept input Symbols \
- either as positional or keyword arguments, not both')
-
- for arg in args:
- if not isinstance(arg, SymbolBase):
- raise TypeError('Compose expect `Symbol` as arguments')
- for val in kwargs.values():
- if not isinstance(val, SymbolBase):
- raise TypeError('Compose expect `Symbol` as arguments')
-
- num_args = len(args) + len(kwargs)
- if len(kwargs) != 0:
- keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()])
- args = c_array(SymbolHandle, [s.handle for s in kwargs.values()])
- else:
- keys = None
- args = c_array(SymbolHandle, [s.handle for s in args])
- check_call(_LIB.NNSymbolCompose(
- self.handle, name, num_args, keys, args))
-
- def _set_attr(self, **kwargs):
- """Set the attribute of the symbol.
-
- Parameters
- ----------
- **kwargs
- The attributes to set
- """
- keys = c_array(ctypes.c_char_p,
- [c_str(key) for key in kwargs.keys()])
- vals = c_array(ctypes.c_char_p,
- [c_str(str(val)) for val in kwargs.values()])
- num_args = nn_uint(len(kwargs))
- check_call(_LIB.NNSymbolSetAttrs(
- self.handle, num_args, keys, vals))
-
-
-_symbol_cls = SymbolBase
-
-def _set_symbol_class(cls):
- global _symbol_cls
- _symbol_cls = cls
-
-
-def _make_atomic_symbol_function(handle, name):
- """Create an atomic symbol function by handle and funciton name."""
- real_name = ctypes.c_char_p()
- desc = ctypes.c_char_p()
- num_args = nn_uint()
- arg_names = ctypes.POINTER(ctypes.c_char_p)()
- arg_types = ctypes.POINTER(ctypes.c_char_p)()
- arg_descs = ctypes.POINTER(ctypes.c_char_p)()
- ret_type = ctypes.c_char_p()
-
- check_call(_LIB.NNGetOpInfo(
- handle, ctypes.byref(real_name), ctypes.byref(desc),
- ctypes.byref(num_args),
- ctypes.byref(arg_names),
- ctypes.byref(arg_types),
- ctypes.byref(arg_descs),
- ctypes.byref(ret_type)))
- param_str = ctypes2docstring(num_args, arg_names, arg_types, arg_descs)
- func_name = name
- desc = py_str(desc.value)
-
- doc_str = ('%s\n\n' +
- '%s\n' +
- 'Returns\n' +
- '-------\n' +
- 'result: Tensor\n' +
- ' The result Tensor.')
- doc_str = doc_str % (desc, param_str)
-
- def creator(*args, **kwargs):
- """Activation Operator of Neural Net.
- The parameters listed below can be passed in as keyword arguments.
-
- Parameters
- ----------
- name : string, required.
- Name of the resulting symbol.
-
- Returns
- -------
- symbol: Symbol
- the resulting symbol
- """
- param_keys = []
- param_vals = []
- symbol_kwargs = {}
- name = kwargs.pop('name', None)
- attr = kwargs.pop('attr', None)
-
- for k, v in kwargs.items():
- if isinstance(v, SymbolBase):
- symbol_kwargs[k] = v
- else:
- param_keys.append(c_str(k))
- param_vals.append(c_str(str(v)))
- # create atomic symbol
- param_keys = c_array(ctypes.c_char_p, param_keys)
- param_vals = c_array(ctypes.c_char_p, param_vals)
- sym_handle = SymbolHandle()
- check_call(_LIB.NNSymbolCreateAtomicSymbol(
- handle,
- nn_uint(len(param_keys)),
- param_keys, param_vals,
- ctypes.byref(sym_handle)))
-
- if len(args) != 0 and len(symbol_kwargs) != 0:
- raise TypeError(
- '%s can only accept input'
- 'Symbols either as positional or keyword arguments, not both' % func_name)
- s = _symbol_cls(sym_handle)
- attr = AttrScope.current.get(attr)
- if attr:
- s._set_attr(**attr)
- hint = func_name.lower()
- name = NameManager.current.get(name, hint)
- s._compose(*args, name=name, **symbol_kwargs)
- return s
-
- creator.__name__ = func_name
- creator.__doc__ = doc_str
- return creator
-
-
-def _init_symbol_module(symbol_class, root_namespace):
- """List and add all the atomic symbol functions to current module."""
- _set_symbol_class(symbol_class)
- plist = ctypes.POINTER(ctypes.c_char_p)()
- size = ctypes.c_uint()
-
- check_call(_LIB.NNListAllOpNames(ctypes.byref(size),
- ctypes.byref(plist)))
- op_names = []
- for i in range(size.value):
- op_names.append(py_str(plist[i]))
-
- module_obj = sys.modules["%s.symbol" % root_namespace]
- module_obj_contrib = sys.modules["%s.contrib" % root_namespace]
- module_internal = sys.modules["%s._symbol_internal" % root_namespace]
- for name in op_names:
- hdl = OpHandle()
- check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
- function = _make_atomic_symbol_function(hdl, name)
- if function.__name__.startswith('_contrib_'):
- setattr(module_obj_contrib, function.__name__.split('_contrib_')[1], function)
- elif function.__name__.startswith('_'):
- setattr(module_internal, function.__name__, function)
- setattr(module_obj, function.__name__, function)
- else:
- setattr(module_obj, function.__name__, function)
+++ /dev/null
-This folder is by default empty and will hold DLLs generated by cython.
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Namespace for cython generated modules for python2"""
+++ /dev/null
-This folder is by default empty and will hold DLLs generated by cython.
\ No newline at end of file
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Cython generated modules"""
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Module space to register internal functions. Leave empty"""
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-"""Attribute scoping support for symbolic API."""
-from __future__ import absolute_import
-
-from ._base import string_types
-
-class AttrScope(object):
- """Attribute manager for scoping.
-
- User can also inherit this object to change naming behavior.
-
- Parameters
- ----------
- kwargs
- The attributes to set for all symbol creations in the scope.
- """
- current = None
-
- def __init__(self, **kwargs):
- self._old_scope = None
- for value in kwargs.values():
- if not isinstance(value, string_types):
- raise ValueError("Attributes need to be string")
- self._attr = kwargs
-
- def get(self, attr):
- """
- Get the attribute dict given the attribute set by the symbol.
-
- Parameters
- ----------
- attr : dict of string to string
- The attribute passed in by user during symbol creation.
-
- Returns
- -------
- attr : dict of string to string
- Updated attributes to add other scope related attributes.
- """
- if self._attr:
- ret = self._attr.copy()
- if attr:
- ret.update(attr)
- return ret
- return attr
-
- def __enter__(self):
- # pylint: disable=protected-access
- self._old_scope = AttrScope.current
- attr = AttrScope.current._attr.copy()
- attr.update(self._attr)
- self._attr = attr
- AttrScope.current = self
- return self
-
- def __exit__(self, ptype, value, trace):
- assert self._old_scope
- AttrScope.current = self._old_scope
-
-AttrScope.current = AttrScope()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""NNVM compiler toolchain.
-
-User only need to use :any:`build` and :any:`build_config` to do the compilation,
-and :any:`save_param_dict` to save the parameters into bytes.
-The other APIs are for more advanced interaction with the compiler toolchain.
-"""
-from __future__ import absolute_import
-
-import tvm
-
-from . import build_module
-from . build_module import build, optimize, build_config
-from . compile_engine import engine, graph_key
-from . param_dict import save_param_dict, load_param_dict
-
-from .. import symbol as _symbol
-from .. import graph as _graph
-
-from .. import top as _top
-
-
-tvm.register_extension(_symbol.Symbol, _symbol.Symbol)
-tvm.register_extension(_graph.Graph, _graph.Graph)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Namespace for building operators."""
-from __future__ import absolute_import as _abs
-
-import logging
-import tvm
-
-from tvm.contrib import graph_runtime
-from tvm import autotvm
-from . import graph_attr, graph_util
-from .. import graph as _graph
-from .. import symbol as sym
-from .._base import _all_var_init
-
-OPT_PASS_LEVEL = {
- "SimplifyInference": 0,
- "PrecomputePrune": 2,
- "OpFusion": 1,
- "FoldScaleAxis": 3,
- "AlterOpLayout": 3,
-}
-
-# List of optimization pass and level when switch on
-class BuildConfig(object):
- """Configuration scope to set a build config option.
-
- Parameters
- ----------
- kwargs
- Keyword arguments of configurations to set.
- """
- current = None
- defaults = {
- "opt_level": 2,
- "add_pass": None,
- }
- def __init__(self, **kwargs):
- self._old_scope = None
- for k, _ in kwargs.items():
- if k not in BuildConfig.defaults:
- raise ValueError(
- "invalid argument %s, candidates are %s" % (k, BuildConfig.defaults.keys()))
- self._attr = kwargs
-
- def __getattr__(self, name):
- if name not in self._attr:
- return BuildConfig.defaults[name]
- return self._attr[name]
-
- def __enter__(self):
- # pylint: disable=protected-access
- self._old_scope = BuildConfig.current
- attr = BuildConfig.current._attr.copy()
- attr.update(self._attr)
- self._attr = attr
- BuildConfig.current = self
- return self
-
- def __exit__(self, ptype, value, trace):
- assert self._old_scope
- BuildConfig.current = self._old_scope
-
- def pass_enabled(self, pass_name):
- """Get whether pass is enabled.
-
- Parameters
- ----------
- pass_name : str
- The optimization pass name
-
- Returns
- -------
- enabled : bool
- Whether pass is enabled.
- """
- if self.add_pass and pass_name in self.add_pass:
- return True
- return self.opt_level >= OPT_PASS_LEVEL[pass_name]
-
-
-BuildConfig.current = BuildConfig()
-
-def build_config(**kwargs):
- """Configure the build behavior by setting config variables.
-
- Parameters
- ----------
- opt_level: int, default=2
- Optimization level. See OPT_PASS_LEVEL for level of each pass.
-
- add_pass: set of str
- Optimization pass to be added regardless of optimization level.
-
- Returns
- -------
- config: BuildConfig
- The build configuration
- """
- return BuildConfig(**kwargs)
-
-
-@tvm.register_func("nnvm.compiler.lower")
-def _lower(sch, inputs, func_name, graph):
- import traceback
- # pylint: disable=broad-except
- try:
- f = tvm.lower(sch, inputs, name=func_name)
- logging.debug("lower function %s", func_name)
- logging.debug("%s", tvm.lower(sch, inputs, simple_mode=True))
- except Exception:
- msg = traceback.format_exc()
- msg += "Error during compile graph\n"
- msg += "--------------------------\n"
- msg += graph.ir(join_entry_attrs=["shape"])
- raise RuntimeError(msg)
- return f if isinstance(
- f, (tvm.container.Array, tuple, list)) else [f]
-
-
-@tvm.register_func("nnvm.compiler.build_target")
-def _build(funcs, target, target_host):
- if target_host == "":
- target_host = None
- return tvm.build(funcs, target=target, target_host=target_host)
-
-
-def _update_shape_dtype(shape, dtype, params):
- """Update shape dtype given params information"""
- if not params:
- return shape, dtype
- shape = shape.copy()
- shape.update({k : v.shape for k, v in params.items()})
- if isinstance(dtype, str):
- for k, v in params.items():
- if v.dtype != dtype and v.shape:
- raise ValueError(
- "%s: dtype not expected %s vs %s" % (k, dtype, v.dtype))
- else:
- dtype = dtype.copy()
- dtype.update({k : str(v.dtype) for k, v in params.items()})
- return shape, dtype
-
-
-def optimize(graph, shape, dtype="float32", layout=None):
- """Perform target and parameter invariant graph optimization.
-
- This is an advanced function that usually do not need to be called.
- Call build instead.
-
- Parameters
- ----------
- graph : Graph
- The graph to be used in optimized.
-
- Returns
- -------
- graph : Graph
- The optimized graph.
- """
- # pylint: disable=unused-argument
- cfg = BuildConfig.current
-
- if cfg.pass_enabled("AlterOpLayout"):
- layout = layout if layout else {}
- graph = graph_attr.set_layout_inputs(graph, layout)
- graph = graph.apply(["CorrectLayout"])
-
- graph = graph_attr.set_shape_inputs(graph, shape)
- graph = graph_attr.set_dtype_inputs(graph, dtype)
- graph = graph.apply(["InferShape", "InferType", "AlterOpLayout"])
- graph = graph_attr.set_layout_inputs(graph, layout)
- graph = graph.apply(["CorrectLayout"])
-
- if cfg.pass_enabled("SimplifyInference"):
- graph = graph_attr.set_shape_inputs(graph, shape)
- graph = graph.apply(["InferShape", "SimplifyInference"])
-
- if cfg.pass_enabled("FoldScaleAxis"):
- graph = graph_attr.set_shape_inputs(graph, shape)
- graph = graph.apply(["InferShape", "FoldScaleAxis"])
- return graph
-
-
-def build(graph, target=None, shape=None, dtype="float32",
- params=None, target_host=None, layout=None):
- """Build graph into runtime library.
-
- The build function will optimize the graph and do the compilation.
-
- When params is provided, the compiler might split the graph to
- pre-compute certain values, so the final execution graph can
- be different from the original one.
-
- Parameters
- ----------
- graph : Graph
- The graph to be used in lowering
-
- target : str or :any:`tvm.target.Target`, optional
- The build target
-
- shape : dict of str to tuple, optional
- The input shape to the graph
-
- dtype : str or dict of str to str
- The input types to the graph
-
- params : dict of str to NDArray
- Input parameters to the graph that do not change
- during inference time. Used for pre-compute
- folding optimization.
-
- target_host : str or :any:`tvm.target.Target` optional
- Host compilation target, if target is device.
- When TVM compiles device specific program such as CUDA,
- we also need host(CPU) side code to interact with the driver
- setup the dimensions and parameters correctly.
- target_host is used to specify the host side codegen target.
- By default, llvm is used if it is enabled,
- otherwise a stackvm intepreter is used.
-
- layout : dict of str to str or str optional
- The input layout
-
- Returns
- -------
- graph : Graph
- The final execution graph.
-
- libmod : tvm.Module
- The module that comes with the execution graph
-
- params : dict of str to NDArray
- The updated parameters of graph if params is passed.
- This can be different from the params passed in.
- """
- target = target if target else tvm.target.current_target()
- if target is None:
- raise ValueError("Target is not set in env or passed as argument.")
- target = tvm.target.create(target)
-
- # If current dispatch context is fallback context (the default root context),
- # then load pre-tuned parameters from TopHub
- if isinstance(autotvm.DispatchContext.current, autotvm.FallbackContext):
- tophub_context = autotvm.tophub.context(target)
- else:
- tophub_context = autotvm.util.EmptyContext()
-
- with tophub_context:
- shape = shape if shape else {}
- if not isinstance(shape, dict):
- raise TypeError("require shape to be dict")
- for value in shape.values():
- if not all(isinstance(x, tvm._ffi.base.integer_types) for x in value):
- raise TypeError("shape value must be Integer types iterator")
-
- cfg = BuildConfig.current
- graph = graph if isinstance(graph, _graph.Graph) else _graph.create(graph)
- shape, dtype = _update_shape_dtype(shape, dtype, params)
-
- # correct layout if necessary
- layout = layout if layout else {}
- graph = graph_attr.set_layout_inputs(graph, layout)
- graph = graph.apply("CorrectLayout")
- index = graph.index
- layouts = graph.json_attr("layout")
- layout = {x: layouts[index.entry_id(x)] for x in index.input_names}
-
- # Initial pass do shape type inference
- ishape, _ = graph_util.infer_shape(graph, **shape)
- shape.update(zip(graph.index.input_names, ishape))
- if not isinstance(dtype, str):
- idtype, _ = graph_util.infer_dtype(graph, **dtype)
- dtype.update(zip(graph.index.input_names, idtype))
- # Initialize all variables specified in _all_var_init
- init_var = {}
- if _all_var_init:
- init_var = initialize_variables(shape, dtype)
- # Apply optimization
- with target:
- graph = optimize(graph, shape, dtype, layout)
-
- # Clear extra params without nodes.
- _remove_noref_params(params, graph)
-
- # Precompute prune
- if params and cfg.pass_enabled("PrecomputePrune"):
- graph, params = precompute_prune(graph, params)
- shape, dtype = _update_shape_dtype(shape, dtype, params)
- # Operator Fusion and generation
- graph = graph_attr.set_shape_inputs(graph, shape)
- graph = graph.apply("InferShape")
- graph = graph_attr.set_dtype_inputs(graph, dtype)
- graph._set_json_attr("target", str(target), "str")
- if target_host is not None:
- graph._set_json_attr("target_host", str(target_host), "str")
- if cfg.pass_enabled("OpFusion"):
- graph._set_json_attr("opt_level", 1, "int")
- else:
- graph._set_json_attr("opt_level", 0, "int")
- graph = graph.apply("InferShape").apply("InferType")
- graph = graph.apply("GraphFindFusibleGroups")
- graph = graph.apply("GraphFuse")
- with target:
- graph = graph.apply("GraphCompile")
- libmod = graph_attr._move_out_module(graph, "module")
- # Write variable initial values into params
- if init_var:
- if params is None:
- params = {}
- params.update(init_var)
- return graph, libmod, params
-
-def _remove_noref_params(params, graph):
- """ Helper to clear non referenced params
-
- Parameters
- ----------
- graph : Graph
- The input graph
-
- params: dict of str to ndarray
- The parameter dictionary
- """
- arg_list = set(graph.symbol.list_input_names())
-
- if params:
- param_keys = list(params.keys())
- for key in param_keys:
- if key not in arg_list:
- params.pop(key)
-
-def _run_graph(graph, params):
- """Helper utility to build and run and get outputs, only use cpu mode.
-
- Parameters
- ----------
- graph : Graph
- The graph to be executed.
-
- params: dict of str to ndarray
- The parameter dictionary.
-
- Returns
- -------
- out_dict: dict of str to tvm.NDArray
- The output dictionaries.
- """
- graph = graph if isinstance(graph, _graph.Graph) else _graph.create(graph)
- shape = {k : v.shape for k, v in params.items()}
- dtype = {k : v.dtype for k, v in params.items()}
- target = "llvm"
- ctx = tvm.cpu(0)
- _, oshape = graph_util.infer_shape(graph, **shape)
- _, odtype = graph_util.infer_dtype(graph, **dtype)
- graph, libmod, _ = build(graph, target, shape, dtype)
- m = graph_runtime.create(graph, libmod, ctx)
- set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
- kset = set(graph.symbol.list_input_names())
- for k, v in params.items():
- if k in kset:
- set_input(k, tvm.nd.array(v))
- run()
- out_data = []
- for i, kv in enumerate(zip(oshape, odtype)):
- shape, dtype = kv
- arr = tvm.nd.empty(shape, dtype, ctx)
- get_output(i, arr)
- out_data.append(arr)
- return out_data
-
-
-def precompute_prune(graph, params):
- """Precompute the part of graph that can be pre-computed.
-
- This will create a new graph that only contains the ops
- that need to be computed depending on input as well as
- updated version of param dict that pre-computes some of
- intermediate results.
-
- Parameters
- ----------
- graph : Graph
- The input graph
-
- params : dict of str -> tvm.NDArray
- The parameter dictionary of the graph
-
- Returns
- -------
- pruned_graph : Graph
- The pruned graph
-
- new_params : dict of str-> tvm.NDArray
- The updated dictionary of parameters.
- """
- graph = graph if isinstance(graph, _graph.Graph) else _graph.create(graph)
- graph._set_json_attr("param_name_list", list(params.keys()), "list_str")
- graph = graph.apply("PrecomputePrune")
- pre_graph = graph_attr._move_out_graph(graph, "precompute_graph")
- if pre_graph is None:
- return graph, params
- out_names = pre_graph.json_attr("output_names")
- if not pre_graph.symbol.list_output_names():
- return graph, params
- with tvm.build_config(auto_unroll_max_step=0):
- out_arrs = _run_graph(pre_graph, params)
- return graph, dict(zip(out_names, out_arrs))
-
-
-def initialize_variables(ishape, idtype):
- """ Initialize variables stored in _all_var_init dictionary.
-
- Parameters
- ----------
- ishape : dict of str to tuple of int
- The input shape to the graph
-
- idtype : str or dict of str to str
- The input types to the graph
-
- Returns
- -------
- init_var : dict of str to tvm.ndarray
- """
- symbol_init_dict = {}
- const_init_dict = {}
- init_var = {}
- for key, value in _all_var_init.items():
- if isinstance(value, sym.Symbol):
- symbol_init_dict[key] = value
- else:
- const_init_dict[key] = tvm.nd.array(value)
- # Make sure variables are initialized only once.
- _all_var_init.clear()
- if symbol_init_dict:
- # Create dummy params to run initialization graph
- params = {}
- for name, shape in ishape.items():
- dtype = idtype if isinstance(idtype, str) else idtype[name]
- params[name] = tvm.nd.empty(shape, dtype, ctx=tvm.cpu())
- init_group_sym = sym.Group(symbol_init_dict.values())
- graph = _graph.create(init_group_sym)
- with tvm.build_config(auto_unroll_max_step=0):
- init_values = _run_graph(graph, params)
- init_var.update(dict(zip(symbol_init_dict.keys(), init_values)))
- init_var.update(const_init_dict)
- for name, data in init_var.items():
- ishape[name] = data.shape
- return init_var
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Compiler engine interface to internal engine
-
-You can get the engine singleton at ``nnvm.compiler.engine``
-"""
-import tvm
-
-_list_cache_items = tvm.get_global_func("nnvm.compiler.ListCacheItems")
-_clear_cache = tvm.get_global_func("nnvm.compiler.ClearCache")
-_get_cache_item = tvm.get_global_func("nnvm.compiler.GetCacheItem")
-_set_cache_item = tvm.get_global_func("nnvm.compiler.SetCacheItem")
-_graph_key_get_graph = tvm.get_global_func("nnvm.compiler.GraphKeyGetGraph")
-_make_graph_key = tvm.get_global_func("nnvm.compiler.MakeGraphKey")
-
-@tvm.register_node
-class GraphKey(tvm.node.NodeBase):
- """Key of a graph compilation context"""
- @property
- def graph(self):
- return _graph_key_get_graph(self)
-
-
-@tvm.register_node
-class GraphCacheEntry(tvm.node.NodeBase):
- """CacheEntry of compilation into a TVM Function"""
-
-
-@tvm.register_node
-class GraphFunc(tvm.node.NodeBase):
- """Compiled result of a graph into a TVM Function"""
-
-
-class Engine(object):
- """Global singleton compilation engine.
-
- You can get the singleton at ``nnvm.compiler.engine``
- """
- def items(self):
- """List the available cache key value pairs.
-
- Returns
- -------
- item_list : list of (GraphKey, GraphCacheEntry)
- The existing cache items
- """
- res = _list_cache_items()
- assert len(res) % 2 == 0
- return [(res[2*i], res[2*i+1]) for i in range(len(res) // 2)]
-
- def clear_cache(self):
- """Clear the existing cached functions."""
- _clear_cache()
-
- def __setitem__(self, key, value):
- """Clear the existing cached functions."""
- if isinstance(value, GraphCacheEntry):
- _set_cache_item(key, value.graph_func)
- else:
- _set_cache_item(key, value)
-
- def __getitem__(self, key):
- """Clear the existing cached functions."""
- return _get_cache_item(key)
-
- def dump(self):
- """Return a string representation of engine dump
-
- Returns
- -------
- dump : str
- The dumped string representation
- """
- items = self.items()
- res = "====================================\n"
- res += "CompilerEngine dump, %d items cached\n" % len(items)
- for key, value in items:
- res += "------------------------------------\n"
- res += "target={}\n".format(key.target)
- res += "inputs={}\n".format(key.inputs)
- res += "use_count={}\n".format(value.use_count)
- res += "func_name={}\n".format(value.graph_func.func_name)
- res += key.graph.ir() + "\n"
- res += "===================================\n"
- return res
-
-engine = Engine()
-
-
-def graph_key(graph, inputs, target):
- """Construct a new graph key.
-
- Parameters
- ----------
- graph : Graph
- The computation graph structure
-
- inputs : list of Tensor(placeholder)
- The input requirement to the graph.
-
- target : str
- The target of compilation.
- """
- return _make_graph_key(graph, inputs, target)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Utilities to access graph attributes"""
-from __future__ import absolute_import as _abs
-
-import tvm
-
-def set_shape_inputs(g, shape):
- """Set the shape of input graph nodes in the graph attribute.
-
- Parameters
- ----------
- g : Graph
- The input graph
-
- shape : dict of str to tuple
- The input shape
-
- Returns
- -------
- g : Graph
- The updated graph with updated shape.
- """
- list_shape = [
- shape.get(name, ()) for name in g.index.input_names]
- g._set_json_attr("shape_inputs", list_shape, 'list_shape')
- return g
-
-
-DTYPE_TO_TCODE = {
- "default": -1,
- "float32": 0,
- "float64": 1,
- "float16": 2,
- "uint8": 3,
- "int32": 4,
- "int8": 5,
- "int64": 6,
- "int16": 7,
- "uint16": 8,
- "uint32": 9,
- "uint64": 10,
- "bool": 11,
-}
-
-TCODE_TO_DTYPE = {
- -1: None,
- 0: "float32",
- 1: "float64",
- 2: "float16",
- 3: "uint8",
- 4: "int32",
- 5: "int8",
- 6: "int64",
- 7: "int16",
- 8: "uint16",
- 9: "uint32",
- 10: "uint64",
- 11: "bool",
-}
-
-def set_dtype_inputs(g, dtype):
- """Set the dtype inputs of graph nodes
-
- Parameters
- ----------
- g : Graph
- The input graph
-
- dtype : dict of str to str or str
- The input dtype
-
- Returns
- -------
- g : Graph
- The updated graph with updated dtype.
- """
- if isinstance(dtype, dict):
- list_dtype = [
- DTYPE_TO_TCODE[str(dtype.get(name, "default"))]
- for name in g.index.input_names]
- else:
- list_dtype = [DTYPE_TO_TCODE[dtype]] * len(g.index.input_names)
- g._set_json_attr("dtype_inputs", list_dtype, "list_int")
- return g
-
-
-def set_layout_inputs(g, layout):
- """Set the layout inputs of graph nodes
-
- Parameters
- ----------
- g : Graph
- The input graph
-
- layout : dict of str to str or str
- The input layout
-
- Returns
- -------
- g : Graph
- The updated graph with updated layout.
- """
- if isinstance(layout, dict):
- list_layout = [
- layout.get(name, "__undef__") for name in g.index.input_names]
- elif isinstance(layout, str):
- list_layout = ["__undef__"] * len(g.index.input_names)
- list_layout[0] = layout
- else:
- raise ValueError("Input layout must be str or dict")
- last_inferred_layouts = g.json_attr("layout")
- if last_inferred_layouts:
- input_layout = [last_inferred_layouts[g.index.entry_id(x)] for x in g.index.input_names]
- for i, layout_stored in enumerate(input_layout):
- list_layout[i] = list_layout[i] if list_layout[i] != '__undef__' else layout_stored
- g._set_json_attr("layout_inputs", list_layout, 'list_layout')
- return g
-
-_move_out_module = tvm.get_global_func("nnvm.graph._move_module")
-_move_out_graph = tvm.get_global_func("nnvm.graph._move_graph")
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Namespace of graph pass.
-
-Principle:
-- Graph in, graph out: always takes in graph as first argument and returns a graph
-- Composable API: break graph transformation pass as segments of small transformations.
-"""
-from __future__ import absolute_import as _abs
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Utility function to get information from graph."""
-from __future__ import absolute_import as _abs
-
-import tvm
-from . import graph_attr
-
-from ..graph import create
-from ..symbol import Group, ones_like
-
-def infer_shape(graph, **shape):
- """Infer the shape given the shape of inputs.
-
- Parameters
- ----------
- graph : Graph
- The graph to perform shape inference from
-
- shape : dict of str to tuple
- The specific input shape.
-
- Returns
- -------
- in_shape : list of tuple
- Shape of inputs
-
- out_shape: list of tuple
- Shape of outputs
- """
- graph = graph_attr.set_shape_inputs(graph, shape)
- graph = graph.apply("InferShape")
- shape = graph.json_attr("shape")
- index = graph.index
- input_shape = [shape[index.entry_id(x)] for x in index.input_names]
- output_shape = [shape[index.entry_id(x)] for x in index.output_entries]
- return input_shape, output_shape
-
-
-def infer_dtype(graph, **dtype):
- """Infer the type given the typeS of inputs.
-
- Parameters
- ----------
- graph : Graph
- The graph to perform type inference from
-
- dtype : dict of str to dtype
- The specific input data type.
-
- Returns
- -------
- in_dtype : list of tuple
- Dtype of inputs
-
- out_dtype: list of tuple
- Dtype of outputs
- """
- graph = graph_attr.set_dtype_inputs(graph, dtype)
- graph = graph.apply("InferType")
- dtype = graph.json_attr("dtype")
- index = graph.index
- input_dtype = [graph_attr.TCODE_TO_DTYPE[dtype[index.entry_id(x)]]
- for x in index.input_names]
- output_dtype = [graph_attr.TCODE_TO_DTYPE[dtype[index.entry_id(x)]]
- for x in index.output_entries]
- return input_dtype, output_dtype
-
-
-_deep_compare = tvm.get_global_func("nnvm.graph.DeepCompare")
-
-def check_graph_equal(grapha, graphb, compare_variable_attrs=False):
- """Check if two graphs have equal structure.
-
- Parameters
- ----------
- grapha : Graph
- The first graph
-
- graphb : Graph
- The second graph
-
- compare_variable_attrs : bool, optional
- Whether we want to compare attributes(names) on variables.
- Usually it is safe to skip it unless we want input name
- to exactly match
-
- Raises
- ------
- ValueError
- ValueError is raised with error message when graph not equal
- """
- err = _deep_compare(grapha, graphb, compare_variable_attrs)
- if err:
- raise ValueError("Graph compare error: " + err)
-
-def get_gradient_graph(ys, xs, grad_ys=None):
- """Create gradient graph of ys with respect to xs.
-
- Parameters
- ----------
- ys : Symbol or list of Symbol
- Symbols from which the gradient is calculated.
- xs : Symbol or list of Symbol
- Symbols the gradient respect to.
- For group symbol, gradients for all outputs will be calculated.
- grad_ys : Symbol or list of Symbol
- Head gradients for ys.
-
- Returns
- -------
- ret : Graph
- Generated gradient graph.
- """
- if isinstance(ys, list):
- ys = Group(ys)
- g = create(ys)
- g._set_symbol_list_attr('grad_ys', ys)
- g._set_symbol_list_attr('grad_xs', xs)
- ny = len(ys.list_output_names())
- if grad_ys is None:
- grad_ys = [ones_like(ys[i]) for i in range(ny)]
- g._set_symbol_list_attr('grad_ys_out_grad', grad_ys)
- return g.apply('Gradient')
-
-def gradients(ys, xs, grad_ys=None):
- """Create gradient symbol of ys respect to xs.
-
- Parameters
- ----------
- ys : Symbol or list of Symbol
- Symbols from which the gradient is calculated.
- xs : Symbol or list of Symbol
- Symbols the gradient respect to.
- For group symbol, gradients for all outputs will be calculated.
- grad_ys : Symbol or list of Symbol
- Head gradients for ys.
-
- Returns
- -------
- ret : list of Symbol
- Generated gradient symbol. For each xs,
- all gradients from ys are merged into a single symbol.
- """
- grad_g = get_gradient_graph(ys, xs, grad_ys)
- nx = len(Group(xs).list_output_names()) \
- if isinstance(xs, list) else len(xs.list_output_names())
- ret = [grad_g.symbol[i] for i in range(nx)]
- return ret
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=too-few-public-methods, no-member
-"""API for scheduling learning rate."""
-from .. import symbol as sym
-
-class LRScheduler(object):
- """Base class of a learning rate scheduler.
-
- A scheduler returns a new learning rate based on the number of updates that have
- been performed.
-
- Parameters
- ----------
- base_lr : float, optional
- The initial learning rate.
- """
- def __init__(self, base_lr=0.01, name='LRScheduler'):
- self.name = name
- self.base_lr = base_lr
-
- def __call__(self, num_update):
- """Return a new learning rate based on number of updates.
-
- Parameters
- ----------
- num_update: nnvm Symbol
- the number of updates applied to weight.
- """
- raise NotImplementedError("__call__ method must be overridden.")
-
-class FactorScheduler(LRScheduler):
- """Reduce the learning rate by a factor for every *n* steps.
-
- It returns a new learning rate by::
-
- base_lr * pow(factor, num_update/step)
-
- Parameters
- ----------
- step : int
- Changes the learning rate for every n updates.
- factor : float, optional
- The factor to change the learning rate.
- stop_factor_lr : float, optional
- Stop updating the learning rate if it is less than this value.
- """
- def __init__(self, step, factor=1, stop_factor_lr=1e-8, name='FactorScheduler', **kwargs):
- super(FactorScheduler, self).__init__(name=name, **kwargs)
- if step < 1:
- raise ValueError("Schedule step must be greater or equal than 1 round")
- if factor > 1.0:
- raise ValueError("Factor must be no more than 1 to make lr reduce")
- self.step = step
- self.factor = factor
- self.stop_factor_lr = stop_factor_lr
-
- def __call__(self, num_update):
- updated_lr = self.base_lr * self.factor ** (num_update / self.step)
- return sym.clip(updated_lr, a_min=self.stop_factor_lr, a_max=self.base_lr)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, no-member, too-few-public-methods, too-many-arguments, too-many-locals, protected-access
-"""Optimizer API"""
-from . import graph_util
-from .. import symbol as sym
-
-class Optimizer(object):
- """Base class inherited by all optimizers.
-
- Parameters
- ----------
- learning_rate : float, optional
- The initial learning rate.
-
- lr_scheduler : LRScheduler, optional
- The learning rate scheduler.
-
- rescale_grad : float, optional
- Multiply the gradient with `rescale_grad` before updating. Often
- choose to be ``1.0/batch_size``.
-
- clip_gradient : float, optional
- Clip the gradient by projecting onto the box ``[-clip_gradient, clip_gradient]``.
-
- wd : float, optional
- The weight decay (or L2 regularization) coefficient. Modifies objective
- by adding a penalty for having large weights.
-
- name : string, optional
- The name of optimizer.
- """
- def __init__(self, learning_rate=0.01, lr_scheduler=None,
- rescale_grad=1, clip_gradient=None, wd=0, name="Optimizer"):
- self.name = name
- self.lr = learning_rate
- self.lr_scheduler = lr_scheduler
- self.rescale_grad = rescale_grad
- self.clip_gradient = clip_gradient
- self.wd = wd
- init_update_t = sym.Variable(name+'_t', init=sym.zeros(shape=(1,), dtype="int32"))
- self.update_t = sym._assign(init_update_t, init_update_t + 1)
-
- def minimize(self, obj, var=None):
- """Minimize given obj symbol respect to var. If var is not set, all input
- variables of obj will be used.
-
- Parameters
- ----------
- obj : nnvm Symbol or list of nnvm Symbols
- Symbols to be minimized.
- var : nnvm Symbol or list of nnvm Symbols, optional
- Symbols the gradient respect to.
-
- Returns
- -------
- group_sym : nnvm Symbol
- Group symbol represents update symbols.
- """
- raise NotImplementedError()
-
- def _get_lr(self):
- """Gets the learning rate with learning rate scheduler.
-
- Returns
- -------
- lr : float
- Learning rate.
- """
- if self.lr_scheduler is not None:
- lr = self.lr_scheduler(self.update_t)
- else:
- lr = self.lr
- return lr
-
-
-class SGD(Optimizer):
- """The SGD optimizer
- """
- def __init__(self, name='SGD', **kwargs):
- super(SGD, self).__init__(name=name, **kwargs)
-
- def minimize(self, obj, var=None):
- variables = var or obj.list_input_variables()
- if not isinstance(variables, list):
- variables = [variables]
- grads = graph_util.gradients(obj, variables)
- updates = []
- lr_t = self._get_lr()
- for v, g in zip(variables, grads):
- g = self.rescale_grad * g
- if self.clip_gradient is not None:
- g = sym.clip(g, a_min=-1 * self.clip_gradient, a_max=self.clip_gradient)
- updates.append(sym._assign(v, v - lr_t * (g + self.wd * v)))
- return sym.Group(updates)
-
-
-class Adam(Optimizer):
- """The Adam optimizer.
-
- This class implements the optimizer described in *Adam: A Method for
- Stochastic Optimization*, available at http://arxiv.org/abs/1412.6980.
- """
- def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999,
- epsilon=1e-8, name='Adam', **kwargs):
- super(Adam, self).__init__(learning_rate=learning_rate, name=name, **kwargs)
- self.beta1 = beta1
- self.beta2 = beta2
- self.epsilon = epsilon
- self.m = []
- self.v = []
-
- def minimize(self, obj, var=None):
- variables = var or obj.list_input_variables()
- if not isinstance(variables, list):
- variables = [variables]
- grads = graph_util.gradients(obj, variables)
- updates = []
- for i, v in enumerate(variables):
- self.m.append(sym.Variable(self.name + '_m' + str(i), init=sym.zeros_like(v)))
- self.v.append(sym.Variable(self.name + '_v' + str(i), init=sym.zeros_like(v)))
- rate = sym.sqrt(1 - self.beta2 ** self.update_t) / (1 - self.beta1 ** self.update_t)
- lr_t = self._get_lr() * rate
- for variable, g, m, v in zip(variables, grads, self.m, self.v):
- g = self.rescale_grad * g
- if self.clip_gradient is not None:
- g = sym.clip(g, a_min=-1 * self.clip_gradient, a_max=self.clip_gradient)
- update_m = sym._assign(m, self.beta1 * m + (1 - self.beta1) * g)
- update_v = sym._assign(v, self.beta2 * v + (1 - self.beta2) * g * g)
- update_var = sym._assign(variable, variable - lr_t * (update_m / (sym.sqrt(update_v) \
- + self.epsilon) + self.wd * variable))
- updates.append(update_var)
- return sym.Group(updates)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Helper utility to save parameter dict"""
-import tvm
-
-_save_param_dict = tvm.get_global_func("nnvm.compiler._save_param_dict")
-_load_param_dict = tvm.get_global_func("nnvm.compiler._load_param_dict")
-
-def save_param_dict(params):
- """Save parameter dictionary to binary bytes.
-
- The result binary bytes can be loaded by the
- GraphModule with API "load_params".
-
- Parameters
- ----------
- params : dict of str to NDArray
- The parameter dictionary.
-
- Returns
- -------
- param_bytes: bytearray
- Serialized parameters.
-
- Examples
- --------
- .. code-block:: python
-
- # compile and save the modules to file.
- graph, lib, params = nnvm.compiler.build(
- graph, target, shape={"data", data_shape}, params=params)
- module = graph_runtime.create(graph, lib, tvm.gpu(0))
- # save the parameters as byte array
- param_bytes = nnvm.compiler.save_param_dict(params)
- # We can serialize the param_bytes and load it back later.
- # Pass in byte array to module to directly set parameters
- module["load_params"](param_bytes)
- """
- args = []
- for k, v in params.items():
- args.append(k)
- args.append(tvm.nd.array(v))
- return _save_param_dict(*args)
-
-
-def load_param_dict(param_bytes):
- """Load parameter dictionary to binary bytes.
-
- Parameters
- ----------
- param_bytes: bytearray
- Serialized parameters.
-
- Returns
- -------
- params : dict of str to NDArray
- The parameter dictionary.
- """
- if isinstance(param_bytes, (bytes, str)):
- param_bytes = bytearray(param_bytes)
- load_arr = _load_param_dict(param_bytes)
- return {v.name : v.array for v in load_arr}
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Module space to register contrib functions. Leave empty"""
+++ /dev/null
-Cython specific implementation of certain modules
\ No newline at end of file
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ctypedef void* SymbolHandle
-ctypedef void* OpHandle
-ctypedef unsigned nn_uint
-
-cdef py_str(const char* x):
- if PY_MAJOR_VERSION < 3:
- return x
- else:
- return x.decode("utf-8")
-
-
-cdef c_str(pystr):
- """Create ctypes char * from a python string
- Parameters
- ----------
- string : string type
- python string
-
- Returns
- -------
- str : c_char_p
- A char pointer that can be passed to C API
- """
- return pystr.encode("utf-8")
-
-
-cdef CALL(int ret):
- if ret != 0:
- raise NNVMError(NNGetLastError())
-
-
-cdef const char** CBeginPtr(vector[const char*]& vec):
- if (vec.size() != 0):
- return &vec[0]
- else:
- return NULL
-
-cdef vector[const char*] SVec2Ptr(vector[string]& vec):
- cdef vector[const char*] svec
- svec.resize(vec.size())
- for i in range(vec.size()):
- svec[i] = vec[i].c_str()
- return svec
-
-
-cdef BuildDoc(nn_uint num_args,
- const char** arg_names,
- const char** arg_types,
- const char** arg_descs,
- remove_dup=True):
- """Convert ctypes returned doc string information into parameters docstring.
-
- num_args : nn_uint
- Number of arguments.
-
- arg_names : ctypes.POINTER(ctypes.c_char_p)
- Argument names.
-
- arg_types : ctypes.POINTER(ctypes.c_char_p)
- Argument type information.
-
- arg_descs : ctypes.POINTER(ctypes.c_char_p)
- Argument description information.
-
- remove_dup : boolean, optional
- Whether remove duplication or not.
-
- Returns
- -------
- docstr : str
- Python docstring of parameter sections.
- """
- param_keys = set()
- param_str = []
- for i in range(num_args):
- key = arg_names[i]
- if key in param_keys and remove_dup:
- continue
- param_keys.add(key)
- type_info = arg_types[i]
- ret = '%s : %s' % (key, type_info)
- if len(arg_descs[i]) != 0:
- ret += '\n ' + py_str(arg_descs[i])
- param_str.append(ret)
- doc_str = ('Parameters\n' +
- '----------\n' +
- '%s\n')
- doc_str = doc_str % ('\n'.join(param_str))
- return doc_str
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from __future__ import absolute_import as _abs
-
-import sys as _sys
-import ctypes as _ctypes
-from numbers import Number as _Number
-from .._base import NNVMError
-from ..name import NameManager
-from ..attribute import AttrScope
-from libcpp.vector cimport vector
-from libcpp.string cimport string
-from cpython.version cimport PY_MAJOR_VERSION
-
-include "./base.pyi"
-
-cdef extern from "nnvm/c_api.h":
- const char* NNGetLastError();
- int NNListAllOpNames(nn_uint *out_size,
- const char ***out_array);
- int NNGetOpHandle(const char *op_name,
- OpHandle *handle);
- int NNGetOpInfo(OpHandle op,
- const char **name,
- const char **description,
- nn_uint *num_doc_args,
- const char ***arg_names,
- const char ***arg_type_infos,
- const char ***arg_descriptions,
- const char **return_type);
- int NNListOpNames(nn_uint *out_size,
- const char ***out_array);
- int NNSymbolCreateAtomicSymbol(OpHandle op,
- nn_uint num_param,
- const char **keys,
- const char **vals,
- SymbolHandle *out);
- int NNSymbolFree(SymbolHandle symbol);
- int NNSymbolSetAttrs(SymbolHandle symbol,
- nn_uint num_param,
- const char** keys,
- const char** values);
- int NNSymbolCompose(SymbolHandle sym,
- const char* name,
- nn_uint num_args,
- const char** keys,
- SymbolHandle* args);
-
-cdef class SymbolBase:
- """Symbol is symbolic graph."""
- # handle for symbolic operator.
- cdef SymbolHandle handle
-
- def __init__(self, handle):
- cdef unsigned long ptr
- if handle is None:
- self.handle = NULL
- else:
- ptr = handle.value
- self.handle = <SymbolHandle>(ptr)
-
- def __dealloc__(self):
- CALL(NNSymbolFree(self.handle))
-
- @property
- def handle(self):
- return _ctypes.cast(<unsigned long>self.handle, _ctypes.c_void_p)
-
- def _set_attr(self, **kwargs):
- """Set the attribute of the symbol.
-
- Parameters
- ----------
- **kwargs
- The attributes to set
- """
- SymbolSetAttr(self.handle, kwargs)
-
-
-cdef SymbolSetAttr(SymbolHandle handle, dict kwargs):
- cdef vector[string] sparam_keys
- cdef vector[string] sparam_vals
- cdef nn_uint num_args
- for k, v in kwargs.items():
- sparam_keys.push_back(c_str(k))
- sparam_vals.push_back(c_str(str(v)))
- # keep strings in vector
- cdef vector[const char*] param_keys = SVec2Ptr(sparam_keys)
- cdef vector[const char*] param_vals = SVec2Ptr(sparam_vals)
- num_args = param_keys.size()
- CALL(NNSymbolSetAttrs(
- handle, num_args, CBeginPtr(param_keys), CBeginPtr(param_vals)))
-
-
-_symbol_cls = SymbolBase
-
-cdef _set_symbol_class(cls):
- global _symbol_cls
- _symbol_cls = cls
-
-cdef NewSymbol(SymbolHandle handle):
- """Create a new symbol given handle"""
- sym = _symbol_cls(None)
- (<SymbolBase>sym).handle = handle
- return sym
-
-cdef _make_atomic_symbol_function(OpHandle handle, string name):
- """Create an atomic symbol function by handle and funciton name."""
- cdef const char *real_name
- cdef const char *desc
- cdef nn_uint num_args
- cdef const char** arg_names
- cdef const char** arg_types
- cdef const char** arg_descs
- cdef const char* return_type
-
- CALL(NNGetOpInfo(
- handle, &real_name, &desc,
- &num_args, &arg_names,
- &arg_types, &arg_descs,
- &return_type))
-
- param_str = BuildDoc(num_args, arg_names, arg_types, arg_descs)
- func_name = py_str(name.c_str())
- doc_str = ('%s\n\n' +
- '%s\n' +
- 'Returns\n' +
- '-------\n' +
- 'result: Tensor\n' +
- ' The result Tensor.')
- doc_str = doc_str % (desc, param_str)
- func_hint = func_name.lower()
-
- def creator(*args, **kwargs):
- cdef vector[string] sparam_keys
- cdef vector[string] sparam_vals
- cdef vector[SymbolHandle] symbol_args
- cdef vector[string] ssymbol_keys
- cdef SymbolHandle ret_handle
-
- name = kwargs.pop("name", None)
- attr = kwargs.pop("attr", None)
-
- if len(kwargs) != 0:
- for k, v in kwargs.items():
- if isinstance(v, SymbolBase):
- ssymbol_keys.push_back(c_str(k))
- symbol_args.push_back((<SymbolBase>v).handle)
- else:
- sparam_keys.push_back(c_str(k))
- sparam_vals.push_back(c_str(str(v)))
-
- if len(args) != 0:
- if symbol_args.size() != 0:
- raise TypeError("compose only accept input Symbols\
- either as positional or keyword arguments, not both")
- for v in args:
- if not isinstance(v, SymbolBase):
- raise TypeError('Compose expect `Symbol` as arguments')
- symbol_args.push_back((<SymbolBase>v).handle)
-
- cdef vector[const char*] param_keys = SVec2Ptr(sparam_keys)
- cdef vector[const char*] param_vals = SVec2Ptr(sparam_vals)
- cdef vector[const char*] symbol_keys = SVec2Ptr(ssymbol_keys)
-
- CALL(NNSymbolCreateAtomicSymbol(
- handle,
- <nn_uint>param_keys.size(),
- CBeginPtr(param_keys),
- CBeginPtr(param_vals),
- &ret_handle))
- num_args = <nn_uint>(symbol_args.size())
-
- attr = AttrScope.current.get(attr)
- if attr:
- SymbolSetAttr(ret_handle, attr)
- name = NameManager.current.get(name, func_hint)
-
- cdef const char* c_name = NULL
-
- if name:
- name = c_str(name)
- c_name = name
-
- CALL(NNSymbolCompose(
- ret_handle,
- c_name,
- num_args,
- &symbol_keys[0] if symbol_keys.size() != 0 else NULL,
- &symbol_args[0] if symbol_args.size() != 0 else NULL))
- return NewSymbol(ret_handle)
-
- creator.__name__ = func_name
- creator.__doc__ = doc_str
- return creator
-
-
-def _init_symbol_module(symbol_class, root_namespace):
- """List and add all the atomic symbol functions to current module."""
- cdef const char** op_name_ptrs
- cdef nn_uint size
- cdef vector[string] op_names
- cdef OpHandle handle
-
- _set_symbol_class(symbol_class)
- CALL(NNListAllOpNames(&size, &op_name_ptrs))
- for i in range(size):
- op_names.push_back(string(op_name_ptrs[i]));
- module_obj = _sys.modules["%s.symbol" % root_namespace]
- module_internal = _sys.modules["%s._symbol_internal" % root_namespace]
- for i in range(op_names.size()):
- CALL(NNGetOpHandle(op_names[i].c_str(), &handle))
- function = _make_atomic_symbol_function(handle, op_names[i])
- if function.__name__.startswith('_'):
- setattr(module_internal, function.__name__, function)
- setattr(module_obj, function.__name__, function)
- else:
- setattr(module_obj, function.__name__, function)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""NNVM frontends."""
-from __future__ import absolute_import
-from .mxnet import from_mxnet
-from .onnx import from_onnx
-from .coreml import from_coreml
-from .keras import from_keras
-from .darknet import from_darknet
-from .tensorflow import from_tensorflow
-from .caffe2 import from_caffe2
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, line-too-long, unused-argument
-"""Caffe2 frontend"""
-from __future__ import absolute_import as _abs
-import tvm
-from nnvm import symbol as _sym
-from .common import get_nnvm_op, Renamer, AttrConverter as AttrCvt
-from .onnx_caffe2_utils import dimension_picker, dimension_constraint, infer_channels, revert_caffe2_pad
-from . import onnx
-
-__all__ = ['from_caffe2']
-
-
-def _clean_up_pool_args(args):
- """ A helper function to clean up common arguments in conv and pooling ops.
- """
- assert isinstance(args, dict)
-
- if 'stride_h' in args and 'stride_w' in args:
- assert 'stride' not in args and 'strides' not in args
- args['strides'] = [args['stride_h'], args['stride_w']]
- args.pop('stride_h')
- args.pop('stride_w')
- elif 'stride' in args:
- args['strides'] = [args['stride'], args['stride']]
- args.pop('stride')
-
- # rename 'kernel', 'kernels', to 'kernel_shape'
- if 'kernel_h' in args and 'kernel_w' in args:
- assert 'kernel' not in args and 'kernels' not in args
- args['kernel_shape'] = [args['kernel_h'], args['kernel_w']]
- args.pop('kernel_h')
- args.pop('kernel_w')
- elif 'kernel' in args:
- args['kernel_shape'] = [args['kernel'], args['kernel']]
- args.pop('kernel')
- elif 'kernels' in args:
- args['kernel_shape'] = args['kernels']
- args.pop('kernels')
-
- if 'pad_t' in args and 'pad_l' in args and 'pad_b' in args and 'pad_r' in args:
- assert 'pad' not in args and 'pads' not in args
- args['pads'] = [
- args['pad_t'], args['pad_l'], args['pad_b'], args['pad_r']
- ]
- for pad in ['pad_t', 'pad_l', 'pad_b', 'pad_r']:
- args.pop(pad)
- elif 'pad' in args:
- args['pads'] = [args['pad'], args['pad']]
- args.pop('pad')
-
- if 'dilation_h' in args and 'dilation_w' in args:
- assert 'dilation' not in args and 'dilations' not in args
- args['dilations'] = [args['dilation_h'], args['dilation_w']]
- args.pop('dilation_h')
- args.pop('dilation_w')
- elif 'dilation' in args:
- args['dilations'] = [args['dilation'], args['dilation']]
- args.pop('dilation')
-
- return args
-
-
-class Caffe2OpConverter(object):
- """ A helper class for holding Caffe2 op converters.
- """
-
- @classmethod
- def get_converter(cls):
- """ Get converter.
-
- :return: converter, which should be `_impl`.
- """
-
- if hasattr(cls, '_impl'):
- return getattr(cls, '_impl')
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not implemented in frontend Caffe2.'.format(cls.__name__))
-
-
-_caffe2_internal_args = {
- # nnpack args
- 'algo',
- 'convolution_transform_strategy',
- 'float16_compute',
- 'shared_buffer',
-
- # training args
- 'init_params',
- 'cudnn_exhaustive_search',
- 'exhaustive_search',
-
- # training args
- 'adj',
- 'hwgq',
-
- # args that we don't care
- 'legacy_pad',
-}
-
-
-class Pool(Caffe2OpConverter):
- """ A helper class for pool op converters.
- """
-
- name = ''
-
- @classmethod
- def _impl(cls, inputs, args, params):
- _clean_up_pool_args(args)
- if 'global_pooling' in args and args['global_pooling'] == 1:
- op_name = dimension_picker('global_' + cls.name)
- return get_nnvm_op(op_name(args))(*inputs)
-
- return AttrCvt(
- op_name=dimension_picker(cls.name),
- transforms={
- 'kernel_shape': 'pool_size',
- 'pads': ('padding', (0, 0), revert_caffe2_pad),
- 'strides': 'strides',
- },
- excludes={
- # TVM poolop does not support dilation
- 'dilations',
- },
- ignores=_caffe2_internal_args | {'global_pooling', 'order'},
- custom_check=dimension_constraint())(inputs, args, params)
-
-
-class AveragePool(Pool):
- name = 'avg_pool'
-
-
-class MaxPool(Pool):
- name = 'max_pool'
-
-
-class Conv(Caffe2OpConverter):
- """ Operator converter for Conv.
- """
-
- @classmethod
- def _impl(cls, inputs, args, params):
- # get number of channels
- channels = infer_channels(inputs[1], params)
- args['channels'] = channels
- _clean_up_pool_args(args)
- return AttrCvt(
- op_name=dimension_picker('conv'),
- transforms={
- 'group': ('groups', 1),
- 'kernel_shape':
- 'kernel_size',
- 'pads': ('padding', (0, 0), revert_caffe2_pad),
- 'strides':
- 'strides',
- 'dilations': ('dilation', (1, 1)),
- 'order':
- ('layout', ("NCHW"),
- lambda x: x if isinstance(x, str) else x.decode('UTF-8')),
- },
- excludes={},
- ignores=_caffe2_internal_args,
- extras={'use_bias': len(inputs) == 3},
- custom_check=dimension_constraint())(inputs, args, params)
-
-
-class Concat(Caffe2OpConverter):
- """ Operator converter for Concat.
- """
-
- @classmethod
- def _impl(cls, inputs, args, params):
- def _get_axis_from_order_str(order):
- order = order if isinstance(order, str) else order.decode('UTF-8')
- if order == 'NCHW':
- return 1
- if order == 'NHWC':
- return 3
- raise tvm.error.OpAttributeInvalid('Value {} in attribute {} of operator {} is not valid.'.format(order, 'order', 'Concat'))
-
- return AttrCvt(
- op_name='concatenate',
- transforms={
- 'order': ('axis', (1), _get_axis_from_order_str),
- },
- excludes={
- 'add_axis',
- })(inputs, args, params)
-
-
-class NormalizePlanarYUV(Caffe2OpConverter):
- """ Operator converter for NormalizePlanarYUV.
- caffe2 definition: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/norm_planar_yuv_op.cc
- """
-
- @classmethod
- def _impl(cls, inputs, args, params):
- assert len(inputs) == 3
- mean = _sym.expand_dims(inputs[1], axis=2, num_newaxis=2)
- std = _sym.expand_dims(inputs[2], axis=2, num_newaxis=2)
-
- return _sym.broadcast_div(_sym.broadcast_sub(inputs[0], mean), std)
-
-
-class ResizeNearest(Caffe2OpConverter):
- """ Operator converter for Upsample (nearest mode).
- """
-
- @classmethod
- def _impl(cls, inputs, args, params):
- width_scale = args['width_scale'] if 'width_scale' in args else 1
- height_scale = args['height_scale'] if 'height_scale' in args else 1
- assert width_scale == height_scale
-
- return _sym.upsampling(
- inputs[0], scale=int(width_scale), method="NEAREST_NEIGHBOR")
-
-
-class FC(Caffe2OpConverter):
- """ Operator converter for FC.
- """
-
- @classmethod
- def _impl(cls, inputs, args, params):
- inputs[0] = _sym.flatten(inputs[0])
- args['units'] = infer_channels(inputs[1], params)
- return AttrCvt(
- 'dense',
- ignores=['axis', 'axis_w'],
- extras={'use_bias': len(inputs) == 3},
- )(inputs, args, params)
-
-
-class SpatialBN(Caffe2OpConverter):
- """ Operator converter for SpatialBN.
- """
-
- @classmethod
- def _impl(cls, inputs, args, params):
- return AttrCvt(
- op_name='batch_norm',
- disables=['momentum'],
- ignores=[
- 'order', 'spatial', 'is_test', 'consumed_inputs', 'num_batches'
- ])(inputs, args, params)
-
-
-# compatible operators that do NOT require any conversion.
-_identity_list = []
-
-# _convert_map defines maps of name to converter functor(callable)
-# for 1 to 1 mapping, use Renamer if nothing but name is different
-# use AttrCvt if attributes need to be converted
-# for 1 to N mapping(composed), use custom callable functions
-# for N to 1 mapping, currently not supported(?)
-
-# Minimal set of ops for squeezenet and resnet50
-def _get_convert_map():
- return {
- # caffe2/onnx common operators
- 'Add': onnx.Add.get_converter(opset=1),
- 'Sum': onnx.Sum.get_converter(opset=1),
- 'Softmax': onnx.Softmax.get_converter(opset=1),
-
- # nn
- 'AveragePool': AveragePool.get_converter(),
- 'MaxPool': MaxPool.get_converter(),
- 'Conv': Conv.get_converter(),
- 'Concat': Concat.get_converter(),
- 'FC': FC.get_converter(),
- 'SpatialBN': SpatialBN.get_converter(),
- 'ResizeNearest': ResizeNearest.get_converter(),
- 'Relu': AttrCvt('relu', {}, ignores=['order']),
- 'Sigmoid': Renamer('sigmoid'),
- 'Dropout': AttrCvt('dropout', {'ratio': 'rate'}, ignores=['is_test']),
-
- # c2 image preprocessing ops
- 'NormalizePlanarYUV': NormalizePlanarYUV.get_converter(),
- }
-
-
-class Caffe2NetDef(object):
- """A helper class for handling nnvm graph copying from pb2.GraphProto.
- Definition: https://github.com/pytorch/pytorch/blob/master/caffe2/proto/caffe2.proto
- """
-
- def __init__(self):
- self._nodes = {}
- self._params = {}
- self._visited_nodes = set()
- self._ops = {}
-
- def from_caffe2(self, init_net, predict_net):
- """Construct nnvm nodes from caffe2 graph.
-
- Parameters
- ----------
- workspace : Caffe2 workspace
- predict_net : protobuf object
-
- Returns
- -------
- sym : nnvm.sym.Symbol
- The returned nnvm symbol
- params : dict
- A dict of name: tvm.nd.array pairs, used as pretrained weights
- """
- from caffe2.python import workspace
- workspace.RunNetOnce(init_net)
-
- # Input
- input_name = predict_net.op[0].input[0]
-
- # Params
- self._params = {}
- used_blobs = set()
- for c2_op in predict_net.op:
- for i in c2_op.input:
- used_blobs.add(i)
- for blob in workspace.Blobs():
- if blob in used_blobs and blob != input_name:
- self._params[blob] = tvm.nd.array(workspace.FetchBlob(blob))
-
- # Variables
- self._nodes = {}
- for blob in predict_net.external_input:
- self._nodes[blob] = _sym.Variable(name=blob)
-
- # Ops
- for c2_op in predict_net.op:
- for blob in c2_op.output:
- self._ops[blob] = c2_op
- for c2_op in predict_net.op:
- self._process_op(c2_op)
-
- # Outputs
- out = []
- for blob in predict_net.external_output:
- out.append(self._nodes[blob])
-
- if len(out) > 1:
- sym = _sym.Group(out)
- else:
- sym = out[0]
-
- return sym, self._params
-
- def _get_node(self, blob):
- """Get the nnvm Symbol of blob and detect cyclic dependency in the graph."""
- if blob in self._nodes:
- return self._nodes[blob]
-
- assert blob not in self._visited_nodes, 'Cyclic dependency in the graph (in {})'.format(
- blob)
- self._visited_nodes.add(blob)
-
- self._process_op(self._ops[blob])
- return self._nodes[blob]
-
- def _process_op(self, c2_op):
- op_type = c2_op.type
- args = self._parse_arg(c2_op.arg)
- inputs = [self._get_node(i) for i in c2_op.input]
- tvm_op = self._convert_operator(op_type, inputs, args)
- # Ignore all outputs except the first one
- self._nodes[c2_op.output[0]] = tvm_op[0]
-
- def _parse_arg(self, arg):
- """Convert a list of Argument to a dict, with names as keys."""
- args = {}
- for a in arg:
- for f in ['f', 'i', 's']:
- if a.HasField(f):
- args[a.name] = getattr(a, f)
- for f in ['floats', 'ints', 'strings']:
- if list(getattr(a, f)):
- assert a.name not in args, "Only one type of attr is allowed"
- args[a.name] = tuple(getattr(a, f))
- for f in ['n']:
- if a.HasField(f):
- raise NotImplementedError(
- "Field {} is not supported in nnvm.".format(f))
- for f in ['nets']:
- if list(getattr(a, f)):
- raise NotImplementedError(
- "Field {} is not supported in nnvm.".format(f))
- if a.name not in args:
- raise ValueError("Cannot parse attribute: \n{}\n.".format(a))
- return args
-
- def _convert_operator(self,
- op_type,
- inputs,
- args,
- identity_list=None,
- convert_map=None):
- """Convert from Caffe2 operator to nnvm operator.
- The converter must specify conversions explicitly for incompatible name, and
- apply handlers to operator attributes.
-
- Parameters
- ----------
- op_type : str
- Operator name, such as Convolution, FullyConnected
- inputs : list of nnvm.Symbol
- List of input symbols.
- args : dict
- Dict of operator attributes
- identity_list : list
- List of operators that don't require conversion
- convert_map : dict
- Dict of name : callable, where name is the op's name that
- require conversion to nnvm, callable are functions which
- take args and return (new_op_type, new_args)
-
- Returns
- -------
- sym : nnvm.Symbol
- Converted nnvm Symbol
- """
- identity_list = identity_list if identity_list else _identity_list
- convert_map = convert_map if convert_map else _get_convert_map()
- if op_type in identity_list:
- sym = get_nnvm_op(op_type)(*inputs, **args)
- elif op_type in convert_map:
- # Add a sanitizing step to convert all byte strings in args to strings
- sym = convert_map[op_type](inputs, args, self._params)
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend Caffe2.'.format(op_type))
- return sym
-
-
-def from_caffe2(init_net, predict_net):
- """Load caffe2 graph which contains init_net and predict_net into nnvm graph.
-
- Parameters
- ----------
- init_net : protobuf object
- Caffe2 NetDef containing the weights
-
- predict_net : protobuf object
- Caffe2 NetDef containing the graph
-
- Returns
- -------
- sym : nnvm.Symbol
- Compatible nnvm symbol
-
- params : dict of str to tvm.ndarray
- Dict of converted parameters stored in tvm.ndarray format
- """
-
- caffe2 = Caffe2NetDef()
- return caffe2.from_caffe2(init_net, predict_net)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Shared functions and classes for frontends."""
-from __future__ import absolute_import as _abs
-import logging
-from nnvm import sym as _sym
-from .._base import string_types
-
-def get_nnvm_op(op_name):
- op = getattr(_sym, op_name)
- if not op:
- raise OpNotImplemented(
- 'Operator {} is not supported.'.format(op))
- return op
-
-def required_attr(attr, key, op_name):
- assert isinstance(attr, dict)
- if key not in attr:
- raise OpAttributeRequired(
- 'Required attribute {} not found in operator {}'.format(key, op_name))
- return attr[key]
-
-def parse_tshape(tshape):
- """Parse tshape in string."""
- return [int(x.strip()) for x in tshape.strip('()').split(',')]
-
-def parse_bool_str(attr, key, default='False'):
- """Parse bool string to boolean."""
- return attr.get(key, default).strip().lower() in ['true', '1', 't', 'y', 'yes']
-
-class Renamer(object):
- """A simply renamer for operators.
-
- Parameters
- ----------
- new_name : str
- The new name for the operator
- """
- def __init__(self, new_name):
- self._new_name = new_name
-
- def __call__(self, inputs, attrs, *args):
- return get_nnvm_op(self._new_name)(*inputs, **attrs)
-
-
-class AttrConverter(object):
- """Common attribute converter. An AttrConverter instance is a callable:
- ```
- attr_converter = AttrConverter(op_name, transforms={'a':'b', 'c':('d', 1)})
- new_op_name, new_attr = attr_converter(attrs)
- ```
-
- Parameters
- ----------
- op_name : str or callable
- If set as str, returned operator name is the str.
- If set as callable, returned operator is the str returned by calling:
- `op_name = func(attr)`
- transforms : dict of `new_name, or (new_name, default_value, transform function)`
- If only a new_name is provided, it's like renaming the attribute name.
- If default_value if provided, then the attribute is considered as optional.
- If transform function is provided, the original attribute value is handled
- by transform function.
- excludes : list
- A list of excluded attributes that should `NOT` appear.
- Raise NotImplementedError if occurred.
- disables : list
- A list of attributes that is disabled in nnvm. Log warnings.
- ignores : list
- A list of attributes that is ignored in nnvm. Debug level logging.
- extras : dict
- A series of additional attributes should be added anyway to the returned
- attribute dict.
- custom_check : callable
- A custom function takes attribute, and return True/False.
- Raise RuntimeError if not bool(True) returned.
- """
- def __init__(self, op_name, transforms=None,
- excludes=None, disables=None, ignores=None,
- extras=None, custom_check=None):
- self._op_name = op_name
- self._transforms = transforms if transforms else {}
- self._excludes = excludes if excludes else []
- self._disables = disables if disables else []
- self._ignores = ignores if ignores else []
- self._extras = extras if extras else {}
- self._custom_check = custom_check
-
- def __call__(self, inputs, attrs, *args):
- # apply custom check
- if self._custom_check:
- func, msg = self._custom_check
- if not func(attrs):
- raise RuntimeError("Check failed: {}".format(msg))
- # get new op_name
- if isinstance(self._op_name, string_types):
- op_name = self._op_name
- else:
- assert callable(self._op_name), "op_name can either be string or callable"
- op_name = self._op_name(attrs)
- # convert attributes
- new_attrs = {}
- for k in attrs.keys():
- if k in self._excludes:
- raise NotImplementedError("Attribute {} not supported yet.".format(k))
- elif k in self._disables:
- logging.warning("Attribute %s is disabled in nnvm.sym.%s", k, op_name)
- elif k in self._ignores:
- logging.debug("Attribute %s is ignored in nnvm.sym.%s", k, op_name)
- elif k in self._transforms:
- new_name, defaults, transform = self._parse_default(self._transforms[k])
- if defaults is None:
- new_attr = self._required_attr(attrs, k)
- else:
- new_attr = attrs.get(k, None)
- if new_attr is None:
- new_attrs[new_name] = defaults
- else:
- new_attrs[new_name] = transform(new_attr)
- else:
- # copy
- new_attrs[k] = attrs[k]
- # add extras
- new_attrs.update(self._extras)
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
- def _parse_default(self, target):
- """Helper function to parse default values."""
- if not isinstance(target, (list, tuple)):
- k, v, t = target, None, lambda x: x
- elif len(target) == 1:
- k, v, t = target[0], None, lambda x: x
- elif len(target) == 2:
- k, v, t = target[0], target[1], lambda x: x
- elif len(target) > 2:
- k, v, t = target[0], target[1], target[2]
- else:
- k = None # should raise
- if not isinstance(k, string_types):
- msg = "{} is not a valid target, (name, default) expected.".format(target)
- raise ValueError(msg)
- return k, v, t
-
- def _parse_bool(self, value):
- """Helper function to parse default boolean values."""
- if isinstance(value, string_types):
- return value.strip().lower() in ['true', '1', 't', 'y', 'yes']
- return bool(value)
-
- def _required_attr(self, attr, key):
- """Wrapper for getting required attributes."""
- assert isinstance(attr, dict)
- if key not in attr:
- raise AttributeError("Required attribute {} not found.".format(key))
- return attr[key]
-
-
-class SymbolTable(object):
- """Table storing symbols by names."""
- def __init__(self):
- self.vars = {}
- self.params = {}
- self.const_ctr = 1
- self.in_padding = False
- self.paddings = [0, 0]
-
- def new_const(self, value):
- name = "_param_%d" % (self.const_ctr)
- self.const_ctr += 1
- self.params[name] = value
- self.vars[name] = _sym.Variable(name=name)
- return self.vars[name]
-
- def get_var(self, name, must_contain=True):
- if must_contain:
- assert name in self.vars
- if name not in self.vars:
- self.vars[name] = _sym.Variable(name=name)
- return self.vars[name]
-
- def set_var(self, name, sym):
- assert isinstance(sym, _sym.Symbol)
- self.vars[name] = sym
-
- def set_padding(self, paddings):
- self.paddings = paddings
- self.in_padding = True
-
- def clear_padding(self):
- self.in_padding = False
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""CoreML frontend."""
-from __future__ import absolute_import as _abs
-import numpy as np
-import tvm
-from .common import SymbolTable
-from .. import symbol as _sym
-from .._base import string_types
-
-__all__ = ['from_coreml']
-
-
-def NeuralNetworkImageScaler(op, insym, symtab):
- # this changes the symbol
- biases = np.array([op.blueBias, op.greenBias, op.redBias]).reshape([3, 1, 1])
- bias = symtab.new_const(biases)
- ret = _sym.__mul_scalar__(insym, scalar=op.channelScale)
- ret = _sym.broadcast_add(ret, bias)
- return ret
-
-
-def NeuralNetworkMeanImage(op, insym, symtab):
- # this changes the symbol
- ret = _sym.elemwise_sub(insym, scalar=op.meanImage)
- return ret
-
-
-def ConvolutionLayerParams(op, insym, symtab):
- """Convolution layer params."""
- weights = symtab.new_const(np.array(list(op.weights.floatValue)).reshape(
- tuple([op.outputChannels, op.kernelChannels] + list(op.kernelSize))))
- if op.hasBias:
- biases = symtab.new_const(list(op.bias.floatValue))
- dilation = list(op.dilationFactor)
- if not dilation:
- dilation = [1, 1]
- params = {'channels':op.outputChannels,
- 'kernel_size':list(op.kernelSize),
- 'strides':list(op.stride),
- 'dilation': dilation,
- 'use_bias': op.hasBias,
- 'groups':op.nGroups}
-
- if op.WhichOneof('ConvolutionPaddingType') == 'valid':
- valid = op.valid
- padding = [b.startEdgeSize for b in valid.paddingAmounts.borderAmounts]
- padding2 = [b.endEdgeSize for b in valid.paddingAmounts.borderAmounts]
- for i, j in zip(padding, padding2):
- assert i == j, "Asymmetry padding not supported"
- if padding:
- params['padding'] = padding
- elif op.WhichOneof('ConvolutionPaddingType') == 'same':
- kernel = params['kernel_size']
- pad_h = kernel[0] - 1
- pad_w = kernel[1] - 1
- pad_t = pad_h // 2
- pad_l = pad_w // 2
- pad_b = pad_h - pad_t
- pad_r = pad_w - pad_l
- assert pad_t == pad_r and pad_l == pad_b, "Asymmetry padding not supported"
- params['padding'] = [pad_t, pad_l]
- else:
- raise NotImplementedError("Valid/Same convolution padding implemented")
-
- if op.hasBias:
- pos = [insym, weights, biases]
- else:
- pos = [insym, weights]
-
- # consume padding layer
- if symtab.in_padding:
- params['padding'] = [sum(x) for x in zip(params.get('padding', [0, 0]), symtab.paddings)]
- symtab.clear_padding()
-
- if op.isDeconvolution:
- ret = _sym.conv2d_transpose(*pos, **params)
- else:
- ret = _sym.conv2d(*pos, **params)
- return ret
-
-def BatchnormLayerParams(op, insym, symtab):
- """Get layer of batchnorm parameter"""
- # this changes the symbol
- if op.instanceNormalization:
- msg = 'Operator "instance normalization" is not supported in frontend CoreML.'
- raise tvm.error.OpNotImplemented(msg)
- else:
- params = {'gamma':symtab.new_const(list(op.gamma.floatValue)),
- 'beta':symtab.new_const(list(op.beta.floatValue)),
- 'moving_mean':symtab.new_const(list(op.mean.floatValue)),
- 'moving_var': symtab.new_const(list(op.variance.floatValue)),
- 'epsilon': op.epsilon}
- return _sym.batch_norm(data=insym, **params)
-
-def ActivationParams(op, insym, symtab):
- """Get activation parameters"""
- whichActivation = op.WhichOneof('NonlinearityType')
- par = getattr(op, whichActivation)
- if whichActivation == 'linear':
- return _sym.__add_scalar__(_sym.__mul_scalar__(insym, scalar=par.alpha), scalar=par.beta)
- if whichActivation == 'ReLU':
- return _sym.relu(insym)
- if whichActivation == 'leakyReLU':
- return _sym.leaky_relu(insym, alpha=par.alpha)
- if whichActivation == 'thresholdedReLU':
- alpha_tensor = _sym.full_like(insym, fill_value=float(par.alpha))
- return _sym.elemwise_mul(insym, _sym.greater(insym, alpha_tensor))
- if whichActivation == 'PReLU':
- return _sym.prelu(insym, alpha=par.alpha)
- if whichActivation == 'tanh':
- return _sym.tanh(insym)
- if whichActivation == 'scaledTanh':
- return _sym.__mul_scalar__(_sym.tanh(_sym.__mul_scalar__(
- insym, scalar=par.beta)), scalar=par.alpha)
- if whichActivation == 'sigmoid':
- return _sym.sigmoid(insym)
- if whichActivation == 'sigmoidHard':
- transformX = (par.alpha * insym) + par.beta
- return _sym.clip(transformX, a_min=0, a_max=1)
- if whichActivation == 'ELU':
- return _sym.__mul_scalar__(_sym.__add_scalar__(
- _sym.exp(insym), scalar=-1), scalar=par.alpha)
- if whichActivation == 'softsign':
- return insym / (1 + (_sym.relu(insym) + _sym.relu(_sym.negative(insym))))
- if whichActivation == 'softplus':
- return _sym.log(_sym.__add_scalar__(_sym.exp(insym), scalar=1))
- if whichActivation == 'parametricSoftplus':
- alpha = list(par.alpha.floatValue)
- beta = list(par.alpha.floatValue)
- if len(alpha) == 1:
- return _sym.__mul_scalar__(_sym.log(_sym.__add_scalar__(
- _sym.exp(insym), scalar=beta[0])), scalar=alpha[0])
- alpha = np.array(alpha).reshape((len(alpha), 1, 1))
- beta = np.array(beta).reshape((len(beta), 1, 1))
- alphasym = symtab.new_const(alpha)
- betasym = symtab.new_const(beta)
- return _sym.broadcast_mul(_sym.log(_sym.broadcast_add(
- _sym.exp(insym), betasym)), alphasym)
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend CoreML.'.format(whichActivation))
-
-def ScaleLayerParams(op, insym, symtab):
- """Scale layer params."""
- scale = symtab.new_const(np.array(list(op.scale.floatValue)).reshape(
- tuple(list(op.shapeScale) + [1, 1])))
- # scale = _sym.reshape(scale, shape=tuple(list(op.shapeScale) + [1,1]))
- ret = _sym.broadcast_mul(insym, scale)
- if op.hasBias:
- bias = symtab.new_const(np.array(list(op.bias.floatValue)).reshape(
- tuple(list(op.shapeBias) + [1, 1])))
- # bias = _sym.reshape(bias, shape=tuple(list(op.shapeBias) + [1,1]))
- ret = _sym.broadcast_add(ret, bias)
- return ret
-
-def PoolingLayerParams(op, insym, symtab):
- """get pooling parameters"""
- if op.globalPooling:
- if op.type == 0:
- return _sym.global_max_pool2d(insym)
- if op.type == 1:
- return _sym.global_avg_pool2d(insym)
- raise tvm.error.OpNotImplemented(
- 'Operator pooling (not max or average) is not supported in frontend CoreML.')
-
- else:
- params = {'pool_size':list(op.kernelSize),
- 'strides':list(op.stride)}
-
- if op.WhichOneof('PoolingPaddingType') == 'valid':
- valid = op.valid
- padding = [b.startEdgeSize for b in valid.paddingAmounts.borderAmounts]
- padding2 = [b.endEdgeSize for b in valid.paddingAmounts.borderAmounts]
- for i, j in zip(padding, padding2):
- assert i == j
- params['padding'] = padding
- elif op.WhichOneof('PoolingPaddingType') == 'includeLastPixel':
- # I don't know if this is correct
- valid = op.includeLastPixel
- padding = list(valid.paddingAmounts)
- params['padding'] = padding
- params['ceil_mode'] = True
- else:
- msg = 'Value {} in attribute PoolingPaddingType of operator Pooling is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(op.WhichOneof('PoolingPaddingType')))
-
- # consume padding layer
- if symtab.in_padding:
- params['padding'] = [sum(x) for x in zip(
- params.get('padding', [0, 0]), symtab.paddings)]
- symtab.clear_padding()
-
- if op.type == 0:
- return _sym.max_pool2d(insym, **params)
- if op.type == 1:
- return _sym.avg_pool2d(insym, **params)
- msg = 'Operator pooling (not max or average) is not supported in frontend CoreML.'
- raise tvm.error.OpNotImplemented(msg)
-
-def SoftmaxLayerParams(op, insym, symtab):
- return _sym.softmax(_sym.flatten(insym))
-
-def InnerProductLayerParams(op, insym, symtab):
- weights = symtab.new_const(np.array(op.weights.floatValue).reshape(
- (op.outputChannels, op.inputChannels)))
- par = {'weight':weights, 'use_bias':False, 'units':op.outputChannels}
- if op.hasBias:
- bias = symtab.new_const(np.array(op.bias.floatValue))
- par['bias'] = bias
- par['use_bias'] = True
- return _sym.dense(data=insym, **par)
-
-def AddLayerParams(op, insyms, symtab):
- if not isinstance(insyms, list):
- insyms = [insyms]
- ret = insyms[0]
- for i in range(1, len(insyms)):
- ret = _sym.elemwise_add(ret, insyms[i])
- if op.alpha > 0:
- ret = _sym.__add_scalar__(ret, scalar=op.alpha)
- return ret
-
-def MultiplyLayerParams(op, insyms, symtab):
- if not isinstance(insyms, list):
- insyms = [insyms]
- ret = insyms[0]
- for i in range(1, len(insyms)):
- ret = _sym.elemwise_mul(ret, insyms[i])
- if op.alpha != 1:
- ret = _sym.__mul_scalar__(ret, scalar=op.alpha)
- return ret
-
-def ConcatLayerParams(op, insyms, symtab):
- if not isinstance(insyms, list):
- insyms = [insyms]
- if op.sequenceConcat:
- raise tvm.error.OpNotImplemented(
- 'Operator Sequence Concat is not supported in frontend CoreML.')
- ret = _sym.concatenate(*insyms, axis=1)
- return ret
-
-def FlattenLayerParams(op, insym, symtab):
- if op.mode == 1:
- insym = _sym.transpose(_sym.reshape(insym, shape=(0, 0, -1)), axes=(0, 2, 1))
- return _sym.flatten(insym)
-
-def PaddingLayerParams(op, insym, symtab):
- """Hacking for padding layer params."""
- if op.WhichOneof('PaddingType') == 'constant':
- constant = op.constant
- if constant.value != 0:
- msg = 'Value {} in attribute "padding value" of operator Padding is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(constant.value))
- padding = [b.startEdgeSize for b in op.paddingAmounts.borderAmounts]
- padding2 = [b.endEdgeSize for b in op.paddingAmounts.borderAmounts]
- for i, j in zip(padding, padding2):
- assert i == j
- symtab.set_padding(padding)
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator "non-constant padding" is not supported in frontend CoreML.')
- return insym
-
-def PermuteLayerParams(op, insym, symtab):
- axes = tuple(op.axis)
- return _sym.transpose(insym, axes=axes)
-
-def UpsampleLayerParams(op, insym, symtab):
- if op.scalingFactor[0] != op.scalingFactor[1]:
- raise tvm.error.OpAttributeInvalid(
- 'Height and width scaling factors of Upsample operator must be equal.')
- interpolationMode = 'NEAREST_NEIGHBOR' if op.mode == 0 else 'BILINEAR'
- return _sym.upsampling(insym, scale=op.scalingFactor[0], method=interpolationMode)
-
-def L2NormalizeLayerParams(op, insym, symtab):
- return _sym.l2_normalize(insym, eps=op.epsilon, axis=1)
-
-def LRNLayerParams(op, insym, symtab):
- par = {}
- par['size'] = op.localSize
- par['bias'] = op.k
- par['alpha'] = op.alpha
- par['beta'] = op.beta
- par['axis'] = 1 #default layout is nchw
- return _sym.lrn(data=insym, **par)
-
-def AverageLayerParams(op, insyms, symtab):
- if not isinstance(insyms, list) or len(insyms) < 2:
- raise ValueError("Expect minimum 2 inputs")
- count = len(insyms)
- _sum = insyms[0]
- for i in range(1, count):
- _sum = _sym.broadcast_add(_sum, insyms[i])
- return _sum / count
-
-def MaxLayerParams(op, insyms, symtab):
- if not isinstance(insyms, list) or len(insyms) < 2:
- raise ValueError("Expect minimum 2 inputs")
- _max = insyms[0]
- for i in range(1, len(insyms)):
- _max = _sym.broadcast_max(_max, insyms[i])
- return _max
-
-def MinLayerParams(op, insyms, symtab):
- if not isinstance(insyms, list) or len(insyms) < 2:
- raise ValueError("Expect minimum 2 inputs")
- _min = insyms[0]
- for i in range(1, len(insyms)):
- _min = _sym.broadcast_min(_min, insyms[i])
- return _min
-
-_convert_map = {
- 'NeuralNetworkMeanImage': NeuralNetworkMeanImage,
- 'NeuralNetworkImageScaler': NeuralNetworkImageScaler,
- 'ConvolutionLayerParams':ConvolutionLayerParams,
- 'BatchnormLayerParams':BatchnormLayerParams,
- 'ActivationParams':ActivationParams,
- 'ScaleLayerParams':ScaleLayerParams,
- 'PoolingLayerParams':PoolingLayerParams,
- 'SoftmaxLayerParams':SoftmaxLayerParams,
- 'InnerProductLayerParams':InnerProductLayerParams,
- 'AddLayerParams':AddLayerParams,
- 'MultiplyLayerParams':MultiplyLayerParams,
- 'FlattenLayerParams':FlattenLayerParams,
- 'ConcatLayerParams':ConcatLayerParams,
- 'PaddingLayerParams':PaddingLayerParams,
- 'PermuteLayerParams':PermuteLayerParams,
- 'UpsampleLayerParams':UpsampleLayerParams,
- 'L2NormalizeLayerParams':L2NormalizeLayerParams,
- 'LRNLayerParams':LRNLayerParams,
- 'AverageLayerParams':AverageLayerParams,
- 'MaxLayerParams':MaxLayerParams,
- 'MinLayerParams':MinLayerParams,
-}
-
-def coreml_op_to_nnvm(op, inname, outname, symtab):
- """Convert coreml layer to nnvm layer.
-
- Parameters
- ----------
- coremlop: a coreml protobuf bit
-
- prevsym: previous nnvm symbol
-
- Returns:
- -------
- nnvm.sym.Symbol
- Converted symbol
- """
- classname = type(op).__name__
- if classname not in _convert_map:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend CoreML.'.format(classname))
- if isinstance(inname, string_types):
- insym = symtab.get_var(inname)
- else:
- insym = [symtab.get_var(i) for i in inname]
- ret = _convert_map[classname](op, insym, symtab)
- if outname:
- symtab.set_var(outname, ret)
- if classname != 'PaddingLayerParams':
- assert not symtab.in_padding, "Previous padding not consumed by conv/pool"
-
-def from_coreml(model):
- """Convert from coreml model into NNVM format.
-
- Parameters
- ----------
- model:
- coremltools.models.MLModel of a NeuralNetworkClassifier
-
- Returns
- -------
- sym : nnvm.Symbol
- Compatible nnvm symbol
-
- params : dict of str to tvm.NDArray
- The parameter dict to be used by nnvm
- """
- try:
- import coremltools as cm
- except ImportError:
- raise ImportError('The coremltools package must be installed')
-
- assert isinstance(model, cm.models.MLModel)
- spec = model.get_spec()
- modeltype = spec.WhichOneof('Type')
- assert modeltype in ['neuralNetworkClassifier', 'neuralNetwork', 'neuralNetworkRegressor']
- cc = getattr(spec, modeltype)
-
- symtab = SymbolTable()
- for i in spec.description.input:
- symtab.get_var(i.name, must_contain=False)
-
- for pp in cc.preprocessing:
- whichpp = pp.WhichOneof('preprocessor')
- ppmethod = getattr(pp, whichpp)
- # the NeuralNetworkImageScalar doesn't seem to have a featureName?
- if whichpp == 'scaler':
- for i in spec.description.input:
- coreml_op_to_nnvm(ppmethod, i.name, i.name, symtab)
- else:
- coreml_op_to_nnvm(ppmethod, pp.featureName, pp.featureName, symtab)
-
- for l in cc.layers:
- layertype = l.WhichOneof('layer')
- layerop = getattr(l, layertype)
- assert len(l.output) == 1
- if len(l.input) == 1:
- coreml_op_to_nnvm(layerop, l.input[0], l.output[0], symtab)
- else:
- coreml_op_to_nnvm(layerop, list(l.input), l.output[0], symtab)
- returns = [symtab.get_var(i.name, must_contain=False) for i in spec.description.output]
- tvmparams = {k:tvm.nd.array(np.array(v, dtype=np.float32)) for k, v in symtab.params.items()}
- # for now return first output
- return returns[0], tvmparams
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-DarkNet symbol frontend.
-"""
-
-from __future__ import absolute_import as _abs
-import numpy as np
-import tvm
-from .. import symbol as _sym
-from .common import get_nnvm_op, required_attr, parse_tshape, parse_bool_str
-
-class LAYERTYPE(object):
- """Darknet LAYERTYPE Class constant."""
- CONVOLUTIONAL = 0
- DECONVOLUTIONAL = 1
- CONNECTED = 2
- MAXPOOL = 3
- SOFTMAX = 4
- DETECTION = 5
- DROPOUT = 6
- CROP = 7
- ROUTE = 8
- COST = 9
- NORMALIZATION = 10
- AVGPOOL = 11
- LOCAL = 12
- SHORTCUT = 13
- ACTIVE = 14
- RNN = 15
- GRU = 16
- LSTM = 17
- CRNN = 18
- BATCHNORM = 19
- NETWORK = 20
- XNOR = 21
- REGION = 22
- YOLO = 23
- REORG = 24
- UPSAMPLE = 25
- LOGXENT = 26
- L2NORM = 27
- BLANK = 28
-
-class ACTIVATION(object):
- """Darknet ACTIVATION Class constant."""
- LOGISTIC = 0
- RELU = 1
- RELIE = 2
- LINEAR = 3
- RAMP = 4
- TANH = 5
- PLSE = 6
- LEAKY = 7
- ELU = 8
- LOGGY = 9
- STAIR = 10
- HARDTAN = 11
- LHTAN = 12
-
-__all__ = ['from_darknet']
-
-def _darknet_maxpooling(inputs, attrs):
- """Process the max pool 2d operation."""
- kernel = parse_tshape(required_attr(attrs, 'kernel', 'maxpool'))
- if len(kernel) != 1:
- raise tvm.error.OpAttributeUnImplemented(
- 'Non-2D kernels for Max Pooling are not supported in frontend Darknet.')
-
- op_name, new_attrs = 'max_pool2d', {}
- strides = int(attrs.get('stride', (1, 1)))
- pads = int(attrs.get('pad', (0, 0)))
- new_attrs['pool_size'] = [kernel[0], kernel[0]]
- new_attrs['strides'] = str((strides, strides))
- new_attrs['padding'] = str((pads, pads))
- extra_pad_size = attrs.get('extra_pad_size', 0)
- if extra_pad_size:
- pad_width = ((0, 0), (0, 0), (0, extra_pad_size), (0, extra_pad_size))
- inputs = _sym.pad(*inputs, pad_width=pad_width, pad_value=np.finfo(np.float32).min)
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_avgpooling(inputs, attrs):
- """Process the average pool 2d operation."""
- kernel = parse_tshape(required_attr(attrs, 'kernel', 'avgpool'))
- if len(kernel) != 1:
- raise tvm.error.OpAttributeUnimplemented(
- 'Non-2D kernels for Average Pooling are not supported in frontend Darknet.')
-
- op_name, new_attrs = 'avg_pool2d', {}
- strides = int(attrs.get('stride', (1, 1)))
- pads = int(attrs.get('pad', (0, 0)))
- new_attrs['pool_size'] = [kernel[0], kernel[0]]
- new_attrs['strides'] = str((strides, strides))
- new_attrs['padding'] = str((pads, pads))
-
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_batch_norm(inputs, attrs):
- """Process the batchnormalization operation."""
- op_name, new_attrs = 'darknet_batch_norm', {}
- new_attrs['axis'] = attrs.get('axis', 1)
- new_attrs['epsilon'] = attrs.get('eps', 0.000001)
- new_attrs['center'] = True
- new_attrs['scale'] = True
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_conv2d(inputs, attrs):
- """Process the convolution 2d operation."""
- kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d'))
- if len(kernel) != 1:
- raise tvm.error.OpAttributeUnimplemented('Non-2D kernels for Conv2D are unsupported '
- 'in frontend Darknet.')
- layout = attrs.get('layout', 'NCHW')
- if layout not in ['NCHW', 'NHWC']:
- raise tvm.error.OpAttributeInvalid(
- 'Value {} in attribute "layout" of operator Conv2D is not valid.'.format(layout))
- strides = int(attrs.get('stride', (1, 1)))
- pads = int(attrs.get('pad', (0, 0)))
-
- op_name, new_attrs = 'conv2d', {}
- new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d')
- new_attrs['kernel_size'] = [kernel[0], kernel[0]]
- new_attrs['strides'] = (strides, strides)
- new_attrs['padding'] = (pads, pads)
- new_attrs['dilation'] = attrs.get('dilate', (1, 1))
- new_attrs['groups'] = attrs.get('num_group', 1)
- new_attrs['layout'] = layout
- if attrs.get('use_batchNorm', False) is True:
- new_attrs['use_bias'] = False
- else:
- new_attrs['use_bias'] = True
- out_name = {}
- sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
- out_name[0] = sym.list_output_names()[0].replace('_output', '')
-
- if attrs.get('use_batchNorm', False) is True:
- op_name, new_attrs = 'batch_norm', {}
- new_attrs['epsilon'] = 0.000001
- sym = get_nnvm_op(op_name)(*sym, **new_attrs)
- out_name[1] = sym.list_output_names()[0].replace('_output', '')
- if 'activation' in attrs:
- new_attrs = {}
- new_attrs['activation'] = attrs['activation']
- new_attrs['slope'] = 0.1
- sym, _ = _darknet_activations(sym, new_attrs)
- return sym, out_name
-
-
-def _darknet_conv2d_transpose(inputs, attrs):
- """Process the convolution 2d transpose operation."""
- if 'target_shape' in attrs:
- raise tvm.error.OpAttributeUnimplemented(
- 'Attribute "target_shape" is not supported in operator Conv2D-transpose.')
- kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d_transpose'))
- if len(kernel) != 2:
- raise tvm.error.OpAttributeUnimplemented(
- 'Non-2D kernels are not supported in operator Conv2D-transpose.')
- layout = attrs.get('layout', 'NCHW')
- if layout not in ['NCHW', 'NHWC']:
- msg = 'Value {} in attribute "layout" of operator Conv2D-transpose is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(layout))
- op_name, new_attrs = 'conv2d_transpose', {}
- new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d_transpose')
- new_attrs['kernel_size'] = kernel
- new_attrs['strides'] = attrs.get('stride', (1, 1))
- new_attrs['output_padding'] = attrs.get('adj', (0, 0))
- new_attrs['padding'] = attrs.get('pad', (0, 0))
- new_attrs['dilation'] = attrs.get('dilate', (1, 1))
- new_attrs['groups'] = attrs.get('num_group', 1)
- new_attrs['layout'] = layout
- new_attrs['use_bias'] = not parse_bool_str(attrs, 'no_bias')
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_shortcut(inputs, attrs):
- """Process the shortcut operation."""
- op_name, new_attrs = 'elemwise_add', {}
- input_0 = inputs[0]
- input_1 = inputs[1]
- input_0_channel = int(attrs['out_channel'])
- input_1_channel = int(attrs['add_out_channel'])
- input_0_size = int(attrs['out_size'])
- input_1_size = int(attrs['add_out_size'])
-
- if input_0_size > input_1_size:
- scale = int(input_0_size/input_1_size)
- input_1 = _sym.upsampling(input_1, scale=scale, name="_upsampling")
- elif input_0_size < input_1_size:
- stride = int(input_1_size/input_0_size)
- input_1 = _sym.avg_pool2d(input_1, pool_size=(1, 1),
- strides=(stride, stride), padding=(0, 0), name="_downsampling")
-
- if input_0_channel != input_1_channel:
- pad_channel = input_0_channel - input_1_channel
- input_1 = _sym.pad(input_1, pad_width=((0, 0), (0, pad_channel), (0, 0), (0, 0)),
- pad_value=0.)
-
- new_inputs = _as_list([input_0, input_1])
- sym = get_nnvm_op(op_name)(*new_inputs, **new_attrs)
- out_name = sym.list_output_names()[0].replace('_output', '')
- if 'activation' in attrs:
- new_attrs['activation'] = attrs['activation']
- sym, _ = _darknet_activations(sym, new_attrs)
- return sym, out_name
-
-def _darknet_dense(inputs, attrs):
- """Process the dense operation."""
- op_name, new_attrs = 'dense', {}
- new_attrs['units'] = required_attr(attrs, 'num_hidden', 'dense')
- out_name = {}
- new_attrs['use_bias'] = attrs.get('use_bias', False)
- if attrs.get('use_flatten', False) is True:
- inputs[0] = _sym.flatten(inputs[0])
- sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
- out_name[0] = sym.list_output_names()[0].replace('_output', '')
- if 'use_batchNorm' in attrs:
- op_name, new_attrs = 'batch_norm', {}
- new_attrs['epsilon'] = 0.000001
- sym = get_nnvm_op(op_name)(*sym, **new_attrs)
- out_name[1] = sym.list_output_names()[0].replace('_output', '')
- if 'activation' in attrs:
- new_attrs = {}
- new_attrs['activation'] = attrs['activation']
- sym, _ = _darknet_activations(sym, new_attrs)
- return sym, out_name
-
-def _darknet_dropout(inputs, attrs):
- """Process the dropout operation, its a blank operation."""
- op_name, new_attrs = 'dropout', {}
- new_attrs['rate'] = attrs.get('p', 0.5)
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_reshape(inputs, attrs):
- """Process the reshape operation."""
- if parse_bool_str(attrs, 'reverse'):
- raise tvm.error.OpAttributeUnimplemented(
- 'Attribute "reverse" is not supported in operator Reshape.')
- op_name, new_attrs = 'reshape', {}
- new_attrs['shape'] = required_attr(attrs, 'shape', 'reshape')
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_upsampling(inputs, attrs):
- """Process the upsampling operation."""
- op_name, new_attrs = 'upsampling', {}
- new_attrs['scale'] = attrs.get('scale', 1)
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_l2normalize(inputs, attrs):
- """Process the l2 normalization operation."""
- op_name, new_attrs = 'l2_normalize', {}
- new_attrs['eps'] = attrs.get('eps', 0)
- new_attrs['axis'] = attrs.get('axis', 1)
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_softmax_output(inputs, attrs):
- """Process the softmax operation."""
- temperature = attrs.get('temperature', 1)
- if temperature != 1:
- inputs[0] = inputs[0] / float(temperature)
- op_name, new_attrs = 'softmax', {}
- if parse_bool_str(attrs, 'multi_output'):
- new_attrs['axis'] = 1
-
- if attrs.get('use_flatten', False) is True:
- inputs[0] = _sym.flatten(inputs[0])
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_route(inputs, attrs):
- """Process the route operation, which is equivalent to concat."""
- op_name = 'concatenate'
- new_attrs = {'axis': attrs.get('dim', 1)}
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_reorg(inputs, attrs):
- """Process the reorg operation."""
- op_name, new_attrs = 'yolo_reorg', {}
- if 'stride' in attrs:
- new_attrs = {'stride': attrs.get('stride', 1)}
- return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_region(inputs, attrs):
- """Process the region operation."""
- num = attrs.get('n', 1)
- classes = attrs.get('classes', 1)
- coords = attrs.get('coords', 0)
- background = attrs.get('background', 0)
- softmax = attrs.get('softmax', True)
- input_shape = attrs.get('shape')
-
- split_size = classes + coords + 1
- intermediate_shape = (input_shape[0], num, split_size, input_shape[2], input_shape[3])
- data_block = _sym.reshape(inputs[0], shape=intermediate_shape)
- split_indices = (2, 4, 5)
- split_res = _sym.split(data_block, indices_or_sections=split_indices, axis=2)
- split_res0 = _sym.sigmoid(split_res[0])
- if not background:
- split_res2 = _sym.sigmoid(split_res[2])
- else:
- split_res2 = split_res[2]
- if softmax:
- split_res3 = _sym.softmax(split_res[3], axis=2)
- concat_list = [split_res0, split_res[1], split_res2, split_res3]
- out = _sym.concatenate(*concat_list, axis=2)
- return _sym.reshape(out, shape=input_shape), None
-
-
-def _darknet_yolo(inputs, attrs):
- """Process the yolo operation."""
- num = attrs.get('n', 1)
- classes = attrs.get('classes', 1)
- input_shape = attrs.get('shape')
- split_size = classes + 5
- intermediate_shape = (input_shape[0], num, split_size, input_shape[2], input_shape[3])
- data_block = _sym.reshape(inputs[0], shape=intermediate_shape)
- split_indices = (2, 4)
- split_res = _sym.split(data_block, indices_or_sections=split_indices, axis=2)
- split_res0 = _sym.sigmoid(split_res[0])
- split_res2 = _sym.sigmoid(split_res[2])
- concat_list = [split_res0, split_res[1], split_res2]
- out = _sym.concatenate(*concat_list, axis=2)
- return _sym.reshape(out, shape=input_shape), None
-
-def _darknet_activations(inputs, attrs):
- """Process the activation function."""
- act = required_attr(attrs, 'activation', 'activations')
- if ACTIVATION.LOGISTIC == act:
- act_type = 'sigmoid'
- elif ACTIVATION.RELU == act:
- act_type = 'relu'
- elif ACTIVATION.TANH == act:
- act_type = 'tanh'
- elif ACTIVATION.LINEAR == act:
- return inputs, None
- elif ACTIVATION.LEAKY == act:
- act_type = 'leaky_relu'
- elif ACTIVATION.ELU == act:
- act_type = 'elu'
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator act: {} is not supported in framework Darknet.'.format(act))
-
- if act_type in ['relu', 'tanh']:
- op_name, new_attrs = act_type, {}
- sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
- elif act_type in ['leaky_relu']:
- op_name, new_attrs = act_type, {}
- new_attrs['alpha'] = attrs.get('slope', 0.1)
- sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
- elif act_type in ['elu']:
- sym = -1 * _sym.relu(1 - _sym.exp(*inputs)) + _sym.relu(*inputs)
- elif act_type in ['sigmoid']:
- op_name, new_attrs = act_type, {}
- sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator act: {} is not supported in framework Darknet.'.format(act))
- return sym, None
-
-def _darknet_op_not_support(inputs, attrs):
- """Raise exception if the operation is not supported."""
- err = "{} is not supported in {}.".format(attrs, inputs)
- raise NotImplementedError(err)
-
-_DARKNET_CONVERT_MAP = {
- LAYERTYPE.CONVOLUTIONAL : _darknet_conv2d,
- LAYERTYPE.DECONVOLUTIONAL : _darknet_conv2d_transpose,
- LAYERTYPE.CONNECTED : _darknet_dense,
- LAYERTYPE.MAXPOOL : _darknet_maxpooling,
- LAYERTYPE.SOFTMAX : _darknet_softmax_output,
- LAYERTYPE.DROPOUT : _darknet_dropout,
- LAYERTYPE.AVGPOOL : _darknet_avgpooling,
- LAYERTYPE.BATCHNORM : _darknet_batch_norm,
- LAYERTYPE.ROUTE : _darknet_route,
- LAYERTYPE.REORG : _darknet_reorg,
- LAYERTYPE.REGION : _darknet_region,
- LAYERTYPE.SHORTCUT : _darknet_shortcut,
- LAYERTYPE.UPSAMPLE : _darknet_upsampling,
- LAYERTYPE.L2NORM : _darknet_l2normalize,
- LAYERTYPE.YOLO : _darknet_yolo,
- LAYERTYPE.DETECTION : _darknet_op_not_support,
- LAYERTYPE.CROP : _darknet_op_not_support,
- LAYERTYPE.COST : _darknet_op_not_support,
- LAYERTYPE.NORMALIZATION : _darknet_op_not_support,
- LAYERTYPE.LOCAL : _darknet_op_not_support,
- LAYERTYPE.ACTIVE : _darknet_op_not_support,
- LAYERTYPE.RNN : _darknet_op_not_support,
- LAYERTYPE.GRU : _darknet_op_not_support,
- LAYERTYPE.LSTM : _darknet_op_not_support,
- LAYERTYPE.CRNN : _darknet_op_not_support,
- LAYERTYPE.NETWORK : _darknet_op_not_support,
- LAYERTYPE.XNOR : _darknet_op_not_support,
- LAYERTYPE.BLANK : _darknet_op_not_support,
-}
-
-def _darknet_convert_symbol(op_name, inputs, attrs):
- """Convert from darknet op to nnvm op.
- The converter must specify some conversions explicitly to
- support gluon format ops such as conv2d...
-
- Parameters
- ----------
- op_name : str
- Operator name, such as Convolution, Connected, etc
- inputs : list of nnvm.Symbol
- List of input symbols.
- attrs : dict
- Dict of operator attributes
-
- Returns
- -------
- out_name : converted out name of operation
- sym : nnvm.Symbol
- Converted nnvm Symbol
- """
-
- if op_name in _DARKNET_CONVERT_MAP:
- sym, out_name = _DARKNET_CONVERT_MAP[op_name](inputs, attrs)
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend Darknet.'.format(op_name))
- if out_name is None:
- out_name = sym.list_output_names()[0].replace('_output', '')
- return out_name, sym
-
-
-def _as_list(arr):
- """Force being a list, ignore if already is."""
- if isinstance(arr, list):
- return arr
- return [arr]
-
-
-class GraphProto(object):
- """A helper class for handling nnvm graph copying from darknet model.
- """
-
- def __init__(self, net, dtype='float32'):
- self.net = net
- self.dtype = dtype
- self._sym_array = {}
- self._tvmparams = {}
- self._outs = []
- self._state_ctr = {}
- self._state_ctr['rnn'] = 0
- self._state_ctr['crnn'] = 0
- self._state_ctr['lstm'] = 0
- self._state_ctr['cell_state'] = 0
- self._state_ctr['gru'] = 0
-
- def _read_memory_buffer(self, shape, data, dtype=None):
- if dtype is None:
- dtype = self.dtype
- length = 1
- for x in shape:
- length *= x
- data_np = np.zeros(length, dtype=dtype)
- for i in range(length):
- data_np[i] = data[i]
- return data_np.reshape(shape)
-
- def _get_convolution_weights(self, layer, opname):
- """Get the convolution layer weights and biases."""
- if layer.nweights == 0:
- return
-
- if layer.n * layer.c * layer.size * layer.size != layer.nweights:
- msg = 'nweights ({}) != n * c * h * w ({}) in operator {}'
- msg = msg.format(layer.nweights, layer.n * layer.c * layer.size ** 2, opname)
- raise tvm.error.OpAttributeInvalid(msg)
-
- shape = (layer.n, layer.c, layer.size, layer.size)
- weights = self._read_memory_buffer(shape, layer.weights)
-
- biases = self._read_memory_buffer((layer.n, ), layer.biases)
-
- k = self._get_tvm_params_name(opname[0], 'weight')
- self._tvmparams[k] = tvm.nd.array(weights)
-
- if layer.batch_normalize == 1 and layer.dontloadscales != 1:
- self._get_batchnorm_weights(layer, opname[1], layer.n)
- k = self._get_tvm_params_name(opname[1], 'beta')
- self._tvmparams[k] = tvm.nd.array(biases)
- else:
- k = self._get_tvm_params_name(opname[0], 'bias')
- self._tvmparams[k] = tvm.nd.array(biases)
-
- def _get_connected_weights(self, layer, opname):
- """Parse the weights and biases for fully connected or dense layer."""
- size = layer.outputs * layer.inputs
- if size == 0:
- return
-
- weights = self._read_memory_buffer((layer.outputs, layer.inputs), layer.weights)
- biases = self._read_memory_buffer((layer.outputs, ), layer.biases)
-
- k = self._get_tvm_params_name(opname[0], 'weight')
- self._tvmparams[k] = tvm.nd.array(weights)
-
- if layer.batch_normalize == 1 and layer.dontloadscales != 1:
- self._get_batchnorm_weights(layer, opname[1], layer.outputs)
- k = self._get_tvm_params_name(opname[1], 'beta')
- self._tvmparams[k] = tvm.nd.array(biases)
- else:
- k = self._get_tvm_params_name(opname[0], 'bias')
- self._tvmparams[k] = tvm.nd.array(biases)
-
- def _get_region_weights(self, layer, opname):
- """Parse the biases for region layer."""
- biases = self._read_memory_buffer((layer.n*2, ), layer.biases)
- attributes = np.array([layer.n, layer.out_c, layer.out_h, layer.out_w,
- layer.classes, layer.coords, layer.background],
- dtype=np.int32)
- k = self._get_tvm_params_name(opname, 'bias')
- self._tvmparams[k] = tvm.nd.array(biases)
- k = self._get_tvm_params_name(opname, 'attr')
- self._tvmparams[k] = tvm.nd.array(attributes)
-
- def _get_yolo_weights(self, layer, opname):
- """Parse the biases and mask for yolo layer."""
- biases = self._read_memory_buffer((layer.total*2, ), layer.biases)
- mask = self._read_memory_buffer((layer.n, ), layer.mask, dtype='int32')
- attributes = np.array([layer.n, layer.out_c, layer.out_h, layer.out_w,
- layer.classes, layer.total],
- dtype=np.int32)
- k = self._get_tvm_params_name(opname, 'bias')
- self._tvmparams[k] = tvm.nd.array(biases)
- k = self._get_tvm_params_name(opname, 'mask')
- self._tvmparams[k] = tvm.nd.array(mask)
- k = self._get_tvm_params_name(opname, 'attr')
- self._tvmparams[k] = tvm.nd.array(attributes)
-
- def _get_batchnorm_weights(self, layer, opname, size):
- """Parse the weights for batchnorm, which includes, scales, moving mean
- and moving variances."""
- scales = self._read_memory_buffer((size, ), layer.scales)
- rolling_mean = self._read_memory_buffer((size, ), layer.rolling_mean)
- rolling_variance = self._read_memory_buffer((size, ), layer.rolling_variance)
-
- k = self._get_tvm_params_name(opname, 'moving_mean')
- self._tvmparams[k] = tvm.nd.array(rolling_mean)
- k = self._get_tvm_params_name(opname, 'moving_var')
- self._tvmparams[k] = tvm.nd.array(rolling_variance)
- k = self._get_tvm_params_name(opname, 'gamma')
- self._tvmparams[k] = tvm.nd.array(scales)
-
- def _get_darknet_attrs(self, layer, layer_num):
- """Parse attributes of each layer and return."""
- attr = {}
- use_flatten = True
- if LAYERTYPE.CONVOLUTIONAL == layer.type:
- attr.update({'layout' : 'NCHW'})
- attr.update({'pad' : str(layer.pad)})
- attr.update({'num_group' : str(layer.groups)})
- attr.update({'num_filter' : str(layer.n)})
- attr.update({'stride' : str(layer.stride)})
- attr.update({'kernel' : str(layer.size)})
- attr.update({'activation' : (layer.activation)})
-
- if layer.nbiases == 0:
- attr.update({'use_bias' : False})
- else:
- attr.update({'use_bias' : True})
-
- if layer.batch_normalize == 1 and layer.dontloadscales != 1:
- attr.update({'use_batchNorm' : True})
- attr.update({'use_scales' : True})
-
- elif LAYERTYPE.CONNECTED == layer.type:
- attr.update({'num_hidden' : str(layer.outputs)})
- attr.update({'activation' : (layer.activation)})
- if layer_num != 0:
- layer_prev = self.net.layers[layer_num - 1]
- if (layer_prev.out_h == layer.h and
- layer_prev.out_w == layer.w and
- layer_prev.out_c == layer.c):
- use_flatten = False
- attr.update({'use_flatten' : use_flatten})
- attr.update({'use_bias' : True})
- if layer.batch_normalize == 1 and layer.dontloadscales != 1:
- attr.update({'use_batchNorm' : True})
- attr.update({'use_scales' : True})
- attr.update({'use_bias' : False})
-
- elif LAYERTYPE.MAXPOOL == layer.type:
- attr.update({'pad' : str(layer.pad)})
- attr.update({'stride' : str(layer.stride)})
- attr.update({'kernel' : str(layer.size)})
- max_output = (layer.w - layer.size + 2 * layer.pad)/float(layer.stride) + 1
- if max_output < layer.out_w:
- extra_pad = (layer.out_w - max_output)*layer.stride
- attr.update({'extra_pad_size' : int(extra_pad)})
- elif LAYERTYPE.AVGPOOL == layer.type:
- attr.update({'pad' : str(layer.pad)})
- if layer.stride == 0:
- attr.update({'stride' : str(1)})
- else:
- attr.update({'stride' : str(layer.stride)})
- if layer.size == 0 and layer.h == layer.w:
- attr.update({'kernel' : str(layer.h)})
- else:
- attr.update({'kernel' : str(layer.size)})
-
- elif LAYERTYPE.DROPOUT == layer.type:
- attr.update({'p' : str(layer.probability)})
-
- elif LAYERTYPE.SOFTMAX == layer.type:
- attr.update({'axis' : 1})
- attr.update({'use_flatten' : True})
- if layer.temperature:
- attr.update({'temperature' : str(layer.temperature)})
-
- elif LAYERTYPE.SHORTCUT == layer.type:
- add_layer = self.net.layers[layer.index]
- attr.update({'activation' : (layer.activation)})
- attr.update({'out_channel' : (layer.out_c)})
- attr.update({'out_size' : (layer.out_h)})
- attr.update({'add_out_channel' : (add_layer.out_c)})
- attr.update({'add_out_size' : (add_layer.out_h)})
-
- elif LAYERTYPE.ROUTE == layer.type:
- pass
-
- elif LAYERTYPE.COST == layer.type:
- pass
-
- elif LAYERTYPE.REORG == layer.type:
- attr.update({'stride' : layer.stride})
-
- elif LAYERTYPE.REGION == layer.type:
- attr.update({'n' : layer.n})
- attr.update({'classes' : layer.classes})
- attr.update({'coords' : layer.coords})
- attr.update({'background' : layer.background})
- attr.update({'softmax' : layer.softmax})
- attr.update({'shape' : (1, layer.c, layer.h, layer.w)})
-
- elif LAYERTYPE.YOLO == layer.type:
- attr.update({'n' : layer.n})
- attr.update({'classes' : layer.classes})
- attr.update({'shape' : (1, layer.c, layer.h, layer.w)})
-
- elif LAYERTYPE.UPSAMPLE == layer.type:
- attr.update({'scale' : layer.stride})
-
- elif LAYERTYPE.L2NORM == layer.type:
- pass
-
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend Darknet.'.format(layer.type))
-
- return attr
-
- def _get_tvm_params_name(self, opname, arg_name):
- """Makes the params name for the k,v pair."""
- return opname + '_'+ arg_name
-
- def _get_darknet_params(self, layer, opname):
- """To parse and get the darknet params."""
- if LAYERTYPE.CONVOLUTIONAL == layer.type:
- self._get_convolution_weights(layer, opname)
-
- elif LAYERTYPE.CONNECTED == layer.type:
- self._get_connected_weights(layer, opname)
-
- elif LAYERTYPE.REGION == layer.type:
- self._get_region_weights(layer, opname)
-
- elif LAYERTYPE.YOLO == layer.type:
- self._get_yolo_weights(layer, opname)
- def _preproc_layer(self, layer, layer_num):
- """To preprocess each darknet layer, some layer doesnt need processing."""
- if layer_num == 0:
- name = 'data'
- attribute = {}
- sym = [_sym.Variable(name, **attribute)]
- else:
- sym = self._sym_array[layer_num - 1]
- skip_layer = False
-
- if LAYERTYPE.ROUTE == layer.type:
- sym = []
- for j in range(layer.n):
- sym.append(self._sym_array[layer.input_layers[j]])
- if layer.n == 1:
- skip_layer = True
-
- elif LAYERTYPE.COST == layer.type:
- skip_layer = True
-
- elif LAYERTYPE.SHORTCUT == layer.type:
- sym = [sym, self._sym_array[layer.index]]
-
- elif LAYERTYPE.BLANK == layer.type:
- skip_layer = True
-
- if skip_layer is True:
- self._sym_array[layer_num] = sym
-
- return skip_layer, sym
-
- def _get_opname(self, layer):
- """Returs the layer name."""
- return layer.type
-
- def _new_rnn_state_sym(self, state=None, name='rnn'):
- """Returs a symbol for state"""
- sym_name = name + "%d_state" % self._state_ctr[name]
- self._state_ctr[name] += 1
- return _sym.Variable(name=sym_name, init=state)
-
- def _get_rnn_state_buffer(self, layer, name):
- """Get the state buffer for rnn."""
- buffer = np.zeros((1, layer.outputs), self.dtype)
- return self._new_rnn_state_sym(buffer, name)
-
- def _get_darknet_rnn_attrs(self, layer, sym):
- """Get the rnn converted symbol from attributes."""
- attr = self._get_darknet_attrs(layer, 0)
- op_name = self._get_opname(layer)
- layer_name, sym = _darknet_convert_symbol(op_name, _as_list(sym), attr)
- self._get_darknet_params(layer, layer_name)
- return sym
-
- def _handle_darknet_rnn_layers(self, layer_num, sym):
- """Parse attributes and handle the rnn layers."""
- attr = {}
- layer = self.net.layers[layer_num]
- processed = False
-
- if LAYERTYPE.RNN == layer.type:
- attr.update({'n' : layer.n})
- attr.update({'batch' : layer.batch})
- attr.update({'num_hidden' : str(layer.outputs)})
-
- state = self._get_rnn_state_buffer(layer, 'rnn')
-
- for _ in range(layer.steps):
- input_layer = layer.input_layer
- sym = self._get_darknet_rnn_attrs(input_layer, sym)
-
- self_layer = layer.self_layer
- state = self._get_darknet_rnn_attrs(self_layer, state)
-
- op_name, new_attrs = 'elemwise_add', {}
- new_inputs = _as_list([sym, state])
- state = get_nnvm_op(op_name)(*new_inputs, **new_attrs)
- self._outs.append(state)
-
- output_layer = layer.output_layer
- sym = self._get_darknet_rnn_attrs(output_layer, state)
-
- self._sym_array[layer_num] = sym
- processed = True
-
- elif LAYERTYPE.CRNN == layer.type:
- attr.update({'n' : layer.n})
- attr.update({'batch' : layer.batch})
- attr.update({'num_hidden' : str(layer.outputs)})
-
- state = self._get_rnn_state_buffer(layer, 'crnn')
-
- for _ in range(layer.steps):
- input_layer = layer.input_layer
- sym = self._get_darknet_rnn_attrs(input_layer, sym)
-
- self_layer = layer.self_layer
- state = self._get_darknet_rnn_attrs(self_layer, state)
-
- op_name, new_attrs = 'elemwise_add', {}
- new_inputs = _as_list([sym, state])
- state = get_nnvm_op(op_name)(*new_inputs, **new_attrs)
- self._outs.append(state)
-
- output_layer = layer.output_layer
- sym = self._get_darknet_rnn_attrs(output_layer, state)
-
- self._sym_array[layer_num] = sym
- processed = True
-
- elif LAYERTYPE.LSTM == layer.type:
- if layer.steps > 1:
- raise tvm.error.OpAttributeInvalid(
- 'Number of steps {} of RNN is not valid.'.format(layer.steps))
-
- op_name_add = 'elemwise_add'
- op_name_mul = 'elemwise_mul'
- attrs = {}
- act_attr = {}
-
- h_state = self._get_rnn_state_buffer(layer, 'lstm')
- c_state = self._get_rnn_state_buffer(layer, 'cell_state')
- for _ in range(layer.steps):
- sym_wf = self._get_darknet_rnn_attrs(layer.wf, h_state)
- sym_wi = self._get_darknet_rnn_attrs(layer.wi, h_state)
- sym_wg = self._get_darknet_rnn_attrs(layer.wg, h_state)
- sym_wo = self._get_darknet_rnn_attrs(layer.wo, h_state)
-
- input_sym = sym
- sym_uf = self._get_darknet_rnn_attrs(layer.uf, input_sym)
- sym_ui = self._get_darknet_rnn_attrs(layer.ui, input_sym)
- sym_ug = self._get_darknet_rnn_attrs(layer.ug, input_sym)
- sym_uo = self._get_darknet_rnn_attrs(layer.uo, input_sym)
-
- new_inputs = _as_list([sym_wf, sym_uf])
- add_f = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
- new_inputs = _as_list([sym_wi, sym_ui])
- add_i = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
- new_inputs = _as_list([sym_wg, sym_ug])
- add_g = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
- new_inputs = _as_list([sym_wo, sym_uo])
- add_o = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
- act_attr['activation'] = ACTIVATION.LOGISTIC
- act_f, _ = _darknet_activations(_as_list(add_f), act_attr)
-
- act_attr['activation'] = ACTIVATION.LOGISTIC
- act_i, _ = _darknet_activations(_as_list(add_i), act_attr)
-
- act_attr['activation'] = ACTIVATION.TANH
- act_g, _ = _darknet_activations(_as_list(add_g), act_attr)
-
- act_attr['activation'] = ACTIVATION.LOGISTIC
- act_o, _ = _darknet_activations(_as_list(add_o), act_attr)
-
- new_inputs = _as_list([act_i, act_g])
- mul_t = get_nnvm_op(op_name_mul)(*new_inputs, **attrs)
-
- new_inputs = _as_list([act_f, c_state])
- c_state = get_nnvm_op(op_name_mul)(*new_inputs, **attrs)
-
- new_inputs = _as_list([mul_t, c_state])
- c_state = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
- act_attr['activation'] = ACTIVATION.TANH
- h_state, _ = _darknet_activations(_as_list(c_state), act_attr)
-
- new_inputs = _as_list([act_o, h_state])
- h_state = get_nnvm_op(op_name_mul)(*new_inputs, **attrs)
- self._outs = self._outs + [c_state, h_state]
- sym = h_state
- self._sym_array[layer_num] = sym
- processed = True
-
- elif LAYERTYPE.GRU == layer.type:
- if layer.steps > 1:
- raise tvm.error.OpAttributeInvalid(
- 'Number of steps {} is not valid in RNN.'.format(layer.steps))
-
- op_name_add = 'elemwise_add'
- op_name_mul = 'elemwise_mul'
- attrs = {}
- act_attr = {}
-
- state = self._get_rnn_state_buffer(layer, "gru")
- for _ in range(layer.steps):
- sym_wz = self._get_darknet_rnn_attrs(layer.wz, state)
- sym_wr = self._get_darknet_rnn_attrs(layer.wr, state)
-
- input_sym = sym
- sym_uz = self._get_darknet_rnn_attrs(layer.uz, input_sym)
- sym_ur = self._get_darknet_rnn_attrs(layer.ur, input_sym)
- sym_uh = self._get_darknet_rnn_attrs(layer.uh, input_sym)
-
- new_inputs = _as_list([sym_uz, sym_wz])
- add_z = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
- new_inputs = _as_list([sym_ur, sym_wr])
- add_r = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
- act_attr['activation'] = ACTIVATION.LOGISTIC
- act_z, _ = _darknet_activations(_as_list(add_z), act_attr)
-
- act_attr['activation'] = ACTIVATION.LOGISTIC
- act_r, _ = _darknet_activations(_as_list(add_r), act_attr)
-
- new_inputs = _as_list([act_r, state])
- forgot = get_nnvm_op(op_name_mul)(*new_inputs, **attrs)
-
- sym_wh = self._get_darknet_rnn_attrs(layer.wh, forgot)
-
- new_inputs = _as_list([sym_uh, sym_wh])
- h_state = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
- if layer.tanh == 1:
- act_attr['activation'] = ACTIVATION.TANH
- else:
- act_attr['activation'] = ACTIVATION.LOGISTIC
- h_state, _ = _darknet_activations(_as_list(h_state), act_attr)
-
- sym = act_z * state + (1 - act_z) * h_state
-
- self._outs = self._outs + [sym]
- self._sym_array[layer_num] = sym
- processed = True
-
- return processed, sym
-
- def _make_outlist(self, sym, op_name, layer, layer_num):
- if layer.type == LAYERTYPE.REGION:
- k = self._get_tvm_params_name(op_name, 'attr')
- self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
- k = self._get_tvm_params_name(op_name, 'bias')
- self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
- if layer_num != self.net.n-1:
- self._outs.insert(0, sym)
-
- elif layer.type == LAYERTYPE.YOLO:
- k = self._get_tvm_params_name(op_name, 'attr')
- self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
- k = self._get_tvm_params_name(op_name, 'bias')
- self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
- k = self._get_tvm_params_name(op_name, 'mask')
- self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
- if layer_num != self.net.n-1:
- self._outs.insert(0, sym)
-
- def from_darknet(self):
- """To convert the darknet symbol to nnvm symbols."""
- for i in range(self.net.n):
- layer = self.net.layers[i]
- need_skip, sym = self._preproc_layer(layer, i)
- if need_skip is True:
- continue
-
- processed, sym = self._handle_darknet_rnn_layers(i, sym)
- if processed is True:
- continue
-
- attr = self._get_darknet_attrs(layer, i)
- op_name = self._get_opname(layer)
- layer_name, sym = _darknet_convert_symbol(op_name, _as_list(sym), attr)
- self._get_darknet_params(self.net.layers[i], layer_name)
- self._sym_array[i] = sym
- self._make_outlist(sym, layer_name, layer, i)
-
- self._outs = _as_list(sym) + self._outs
- if isinstance(self._outs, list):
- sym = _sym.Group(self._outs)
- return sym, self._tvmparams
-
-def from_darknet(net, dtype='float32'):
- """Convert from darknet's model into compatible NNVM format.
- Reconstruct a nnvm symbol by traversing the darknet input.
-
- Parameters
- ----------
- net : ctype Pointer to network
- Darknet parsed symbols
-
- dtype : str
- Datatype of the input net structure, default is float32
-
- Returns
- -------
- sym : nnvm.Symbol
- Compatible nnvm symbol
-
- params : dict of str to tvm.NDArray
- The parameter dict to be used by nnvm
- """
-
- return GraphProto(net, dtype).from_darknet()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, import-self
-"""Keras frontend."""
-from __future__ import absolute_import as _abs
-import sys
-import numpy as np
-import tvm
-from .. import symbol as _sym
-from .common import SymbolTable
-
-__all__ = ['from_keras']
-
-
-def _check_data_format(keras_layer):
- if hasattr(keras_layer, ('data_format')):
- if keras_layer.data_format != 'channels_last':
- raise ValueError("Keras frontend currently supports data_format = channels_last only.")
-
-
-def _get_pad_pair(input1d, kernel1d, stride1d):
- out1d = (input1d + stride1d - 1) // stride1d
- pad = np.maximum((out1d - 1) * stride1d + kernel1d - input1d, 0)
- pad_before = pad // 2
- pad_after = pad - pad_before
- return [pad_before, pad_after]
-
-def _get_elu(insym, alpha):
- """ A helper method for elu.
- """
- return -alpha * _sym.relu(1 - _sym.exp(insym)) + _sym.relu(insym)
-
-def _convert_recurrent_activation(insym, keras_layer):
- act_type = keras_layer.recurrent_activation.__name__
- return _convert_activation(insym, act_type, None)
-
-def _convert_activation(insym, keras_layer, _):
- if isinstance(keras_layer, str):
- act_type = keras_layer
- else:
- if sys.version_info.major < 3:
- act_type = keras_layer.activation.func_name
- else:
- act_type = keras_layer.activation.__name__
- if act_type == 'linear':
- if isinstance(keras_layer, str):
- return insym
- alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1
- beta = keras_layer.beta if hasattr(keras_layer, "beta") else 0
- return _sym.__add_scalar__(_sym.__mul_scalar__(insym, \
- scalar=alpha), scalar=beta)
- if act_type == 'softmax':
- return _sym.softmax(insym, axis=1)
- if act_type == 'sigmoid':
- return _sym.sigmoid(insym)
- if act_type == 'tanh':
- return _sym.tanh(insym)
- if act_type == 'relu':
- return _sym.relu(insym)
- if act_type == 'softplus':
- return _sym.log(_sym.__add_scalar__(_sym.exp(insym), scalar=1))
- if act_type == 'elu':
- alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1
- return _get_elu(insym, alpha)
- if act_type == 'selu':
- # Alpha, Gamma values, obtained from https://arxiv.org/abs/1706.02515
- alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") \
- else 1.6732632423543772848170429916717
- gamma = keras_layer.gamma if hasattr(keras_layer, "gamma") \
- else 1.0507009873554804934193349852946
- return gamma * _get_elu(insym, alpha)
- if act_type == 'relu6':
- return _sym.clip(insym, a_min=0, a_max=6)
- if act_type == 'softsign':
- return insym / (1 + (_sym.relu(insym) + _sym.relu(_sym.negative(insym))))
- if act_type == 'hard_sigmoid':
- transformX = (0.2 * insym) + 0.5
- return _sym.clip(transformX, a_min=0, a_max=1)
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend Keras.'.format(act_type))
-
-
-def _convert_advanced_activation(insym, keras_layer, symtab):
- act_type = type(keras_layer).__name__
- if act_type == 'ReLU':
- if keras_layer.max_value:
- return _sym.clip(insym, a_min=0, a_max=keras_layer.max_value)
- return _sym.relu(insym)
- if act_type == 'LeakyReLU':
- return _sym.leaky_relu(insym, alpha=keras_layer.alpha)
- if act_type == 'ELU':
- alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1
- return _get_elu(insym, alpha)
- if act_type == 'PReLU':
- assert hasattr(keras_layer, "alpha"), \
- "alpha required for PReLU."
- _check_data_format(keras_layer)
- size = len(keras_layer.alpha.shape)
- return -symtab.new_const(keras_layer.get_weights()[0] \
- .transpose(np.roll(range(size), 1))) \
- * _sym.relu(-insym) + _sym.relu(insym)
- if act_type == 'ThresholdedReLU':
- theta = keras_layer.theta if hasattr(keras_layer, "theta") else 1.0
- theta_tensor = _sym.full_like(insym[0], fill_value=float(theta))
- return _sym.elemwise_mul(insym[0], _sym.greater(insym[0], theta_tensor, out_type="float32"))
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend Keras.'.format(act_type))
-
-
-def _convert_merge(insym, keras_layer, _):
- merge_type = type(keras_layer).__name__
- ret = insym[0]
- for i in range(1, len(insym)):
- if merge_type == 'Add':
- ret = _sym.elemwise_add(ret, insym[i])
- elif merge_type == 'Subtract':
- ret = _sym.elemwise_sub(ret, insym[i])
- elif merge_type == 'Multiply':
- ret = _sym.elemwise_mul(ret, insym[i])
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} Merge is not supported in frontend Keras.'.format(merge_type))
- return ret
-
-
-def _convert_dense(insym, keras_layer, symtab):
- weightList = keras_layer.get_weights()
- weight = symtab.new_const(weightList[0].transpose([1, 0]))
- params = {'weight':weight, 'use_bias':False, 'units':weightList[0].shape[1]}
- if keras_layer.use_bias:
- params['use_bias'] = True
- params['bias'] = symtab.new_const(weightList[1])
- input_shape = keras_layer.input_shape
- input_dim = len(input_shape)
- # In case of RNN dense, input shape will be (1, 1, n)
- if input_dim > 2:
- input_shape = tuple(dim if dim else 1 for dim in _as_list(input_shape)[0])
- if input_dim != 3 or input_shape[0] != 1 or input_shape[1] != 1:
- msg = 'Value {} in attribute "input_shape" of operator Dense is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(input_shape))
- insym = _sym.squeeze(insym, axis=0)
- out = _sym.dense(data=insym, **params)
- # defuse activation
- if sys.version_info.major < 3:
- act_type = keras_layer.activation.func_name
- else:
- act_type = keras_layer.activation.__name__
- if act_type != 'linear':
- out = _convert_activation(out, act_type, symtab)
- if input_dim > 2:
- out = _sym.expand_dims(out, axis=0)
- return out
-
-
-def _convert_convolution(insym, keras_layer, symtab):
- _check_data_format(keras_layer)
- is_deconv = type(keras_layer).__name__ == 'Conv2DTranspose'
- is_depthconv = type(keras_layer).__name__ == 'DepthwiseConv2D'
- weightList = keras_layer.get_weights()
- if is_deconv:
- kernel_h, kernel_w, n_filters, in_channels = weightList[0].shape
- weight = weightList[0].transpose([3, 2, 0, 1])
- elif is_depthconv:
- kernel_h, kernel_w, in_channels, depth_mult = weightList[0].shape
- weight = weightList[0].transpose([2, 3, 0, 1])
- else:
- kernel_h, kernel_w, in_channels, n_filters = weightList[0].shape
- weight = weightList[0].transpose([3, 2, 0, 1])
- if isinstance(keras_layer.dilation_rate, (list, tuple)):
- dilation = [keras_layer.dilation_rate[0], keras_layer.dilation_rate[1]]
- else:
- dilation = [keras_layer.dilation_rate, keras_layer.dilation_rate]
- dilated_kernel_h = (kernel_h - 1) * dilation[0] + 1
- dilated_kernel_w = (kernel_w - 1) * dilation[1] + 1
- stride_h, stride_w = keras_layer.strides
- params = {'weight': symtab.new_const(weight),
- 'kernel_size': [kernel_h, kernel_w],
- 'strides': [stride_h, stride_w],
- 'dilation': dilation,
- 'padding': [0, 0],
- 'use_bias': False}
- if is_depthconv:
- params['channels'] = in_channels * depth_mult
- params['groups'] = in_channels
- else:
- params['channels'] = n_filters
- if keras_layer.use_bias:
- params['use_bias'] = True
- params['bias'] = symtab.new_const(weightList[1])
- if keras_layer.padding == 'valid':
- pass
- # we insert a separate pad operator
- elif keras_layer.padding == 'same':
- in_h = keras_layer.input_shape[1]
- in_w = keras_layer.input_shape[2]
- pad_t, pad_b = _get_pad_pair(in_h, dilated_kernel_h, stride_h)
- pad_l, pad_r = _get_pad_pair(in_w, dilated_kernel_w, stride_w)
- if pad_t == pad_b and pad_l == pad_r:
- params['padding'] = (pad_t, pad_l)
- else:
- insym = _sym.pad(data=insym, pad_width=((0, 0), (0, 0), (pad_t, pad_b), (pad_l, pad_r)))
- else:
- msg = 'Value {} in attribute "padding" of operator Convolution is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding))
- if is_deconv:
- out = _sym.conv2d_transpose(data=insym, **params)
- else:
- out = _sym.conv2d(data=insym, **params)
- # defuse activation
- if sys.version_info.major < 3:
- act_type = keras_layer.activation.func_name
- else:
- act_type = keras_layer.activation.__name__
- if act_type != 'linear':
- out = _convert_activation(out, act_type, symtab)
- return out
-
-
-def _convert_separable_convolution(insym, keras_layer, symtab):
- _check_data_format(keras_layer)
- weightList = keras_layer.get_weights()
- # depthwise conv
- kernel_h, kernel_w, in_channels, depth_mult = weightList[0].shape
- stride_h, stride_w = keras_layer.strides
- weight0 = weightList[0].transpose([2, 3, 0, 1])
- params0 = {'weight': symtab.new_const(weight0),
- 'channels': in_channels * depth_mult,
- 'groups': in_channels,
- 'kernel_size': [kernel_h, kernel_w],
- 'strides': [stride_h, stride_w],
- 'dilation': [1, 1],
- 'padding': [0, 0],
- 'use_bias': False}
- if keras_layer.padding == 'valid':
- pass
- # we insert a separate pad operator
- elif keras_layer.padding == 'same':
- in_h = keras_layer.input_shape[1]
- in_w = keras_layer.input_shape[2]
- pad_t, pad_b = _get_pad_pair(in_h, kernel_h, stride_h)
- pad_l, pad_r = _get_pad_pair(in_w, kernel_w, stride_w)
- insym = _sym.pad(data=insym, pad_width=(
- (0, 0), (0, 0), (pad_t, pad_b), (pad_l, pad_r)))
- else:
- msg = 'Value {} in attribute "padding" of operator Separable Convolution is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding))
- depthconv = _sym.conv2d(data=insym, **params0)
- # pointwise conv
- weight1 = weightList[1].transpose([3, 2, 0, 1])
- params1 = {'weight': symtab.new_const(weight1),
- 'channels': weight1.shape[0],
- 'groups': 1,
- 'kernel_size': [1, 1],
- 'strides': [1, 1],
- 'dilation': [1, 1],
- 'use_bias': False}
- if keras_layer.use_bias:
- params1['use_bias'] = True
- params1['bias'] = symtab.new_const(weightList[2])
- out = _sym.conv2d(data=depthconv, **params1)
- # defuse activation
- if sys.version_info.major < 3:
- act_type = keras_layer.activation.func_name
- else:
- act_type = keras_layer.activation.__name__
- if act_type != 'linear':
- out = _convert_activation(out, act_type, symtab)
- return out
-
-
-def _convert_flatten(insym, keras_layer, _):
- _check_data_format(keras_layer)
- # NCHW -> NHWC so that dense can be correctly converted
- insym = _sym.transpose(insym, axes=[0, 2, 3, 1])
- return _sym.flatten(insym)
-
-
-def _convert_pooling(insym, keras_layer, symtab):
- _check_data_format(keras_layer)
- pool_type = type(keras_layer).__name__
- # global pool in keras = global pool + flatten in nnvm
- if pool_type == 'GlobalMaxPooling2D':
- return _convert_flatten(_sym.global_max_pool2d(insym), keras_layer, symtab)
- if pool_type == 'GlobalAveragePooling2D':
- return _convert_flatten(_sym.global_avg_pool2d(insym), keras_layer, symtab)
- pool_h, pool_w = keras_layer.pool_size
- stride_h, stride_w = keras_layer.strides
- params = {'pool_size': [pool_h, pool_w],
- 'strides': [stride_h, stride_w],
- 'padding': [0, 0]}
- if keras_layer.padding == 'valid':
- pass
- elif keras_layer.padding == 'same':
- in_h = keras_layer.input_shape[1]
- in_w = keras_layer.input_shape[2]
- pad_t, pad_b = _get_pad_pair(in_h, pool_h, stride_h)
- pad_l, pad_r = _get_pad_pair(in_w, pool_w, stride_w)
- params['padding'] = [pad_t, pad_l, pad_b, pad_r]
- else:
- msg = 'Value {} in attribute "padding" of operator Pooling is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding))
- if pool_type == 'MaxPooling2D':
- return _sym.max_pool2d(insym, **params)
- if pool_type == 'AveragePooling2D':
- # TODO: in keras, padded zeros are not calculated
- return _sym.avg_pool2d(insym, **params)
- msg = 'Value {} in attribute "padding" of operator Pooling is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding))
-
-
-def _convert_upsample(insym, keras_layer, _):
- _check_data_format(keras_layer)
- upsample_type = type(keras_layer).__name__
- if upsample_type == "UpSampling1D":
- h = keras_layer.size
- params = {'scale': h}
- elif upsample_type == "UpSampling2D":
- h, w = keras_layer.size
- if h != w:
- raise tvm.error.OpAttributeInvalid(
- 'Upsample height ({}) must equal width ({})'.format(h, w))
- params = {'scale': h}
- elif upsample_type == "UpSampling3D":
- h, w, d = keras_layer.size
- if h != w or w != d:
- raise tvm.error.OpAttributeInvalid(
- 'Upsample height ({}), width ({}), and depth ({}) must be equal.'.format(h, w, d))
- params = {'scale': h}
- else:
- msg = 'Operator {} is not supported in frontend Keras.'
- raise tvm.error.OpNotImplemented(msg.format(upsample_type))
- return _sym.upsampling(insym, **params)
-
-
-def _convert_cropping(insym, keras_layer, _):
- _check_data_format(keras_layer)
- crop_type = type(keras_layer).__name__
- if crop_type == "Cropping2D":
- (_, in_h, in_w, _) = keras_layer.input_shape
- ((crop_t, crop_b), (crop_l, crop_r)) = keras_layer.cropping
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend Keras.'.format(crop_type))
- int32_max = np.iinfo(np.int32).max
- return _sym.strided_slice(insym, begin=[0, 0, crop_t, crop_l],
- end=[int32_max, int32_max, in_h-crop_b, in_w-crop_r])
-
-
-def _convert_batchnorm(insym, keras_layer, symtab):
- params = {'scale': False,
- 'center': False,
- 'epsilon': keras_layer.epsilon}
- idx = 0
- if keras_layer.scale:
- params['scale'] = True
- gamma = keras_layer.get_weights()[idx]
- params['gamma'] = symtab.new_const(gamma)
- idx += 1
- if keras_layer.center:
- params['center'] = True
- beta = keras_layer.get_weights()[idx]
- params['beta'] = symtab.new_const(beta)
- idx += 1
- moving_mean = keras_layer.get_weights()[idx]
- moving_var = keras_layer.get_weights()[idx + 1]
- params['moving_mean'] = symtab.new_const(moving_mean)
- params['moving_var'] = symtab.new_const(moving_var)
- return _sym.batch_norm(data=insym, **params)
-
-
-def _convert_padding(insym, keras_layer, _):
- _check_data_format(keras_layer)
- padding_type = type(keras_layer).__name__
- padding = keras_layer.padding
- top = left = bottom = right = 0
- if padding_type == 'ZeroPadding2D':
- if isinstance(padding, int):
- top = left = bottom = right = padding
- elif isinstance(padding, tuple):
- if isinstance(padding[0], int):
- top, left = padding
- bottom, right = padding
- elif isinstance(padding[0], tuple):
- top, bottom = padding[0]
- left, right = padding[1]
- else:
- msg = 'Value {} in attribute "padding" of operator {} is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(str(padding), padding_type))
- else:
- msg = 'Value {} in attribute "padding" of operator {} is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(str(padding), padding_type))
- else:
- raise tvm.error.OpNotImplemented('Operator {} is not supported in frontend Keras.')
- return _sym.pad(data=insym, pad_width=((0, 0), (0, 0), (top, bottom), (left, right)))
-
-
-def _convert_concat(insym, keras_layer, _):
- _check_data_format(keras_layer)
- if not isinstance(insym, list):
- insym = [insym]
- return _sym.concatenate(*insym, axis=1)
-
-
-def _convert_reshape(insym, keras_layer, _):
- _check_data_format(keras_layer)
- ch = keras_layer.input_shape[-1]
- assert ch == keras_layer.target_shape[-1], \
- "Only supports last dimension in target shape being equal to " \
- "the channel number of input tensor."
- shape = (-1, ch) + keras_layer.target_shape[:-1]
- return _sym.reshape(insym, shape=shape)
-
-def _convert_lstm(insym, keras_layer, symtab):
- _check_data_format(keras_layer)
- if not isinstance(insym, list):
- buffer = np.zeros((1, keras_layer.units), 'float32')
- c_sym = symtab.new_const(buffer)
- h_sym = symtab.new_const(buffer)
- insym = [insym, h_sym, c_sym]
-
- in_data = insym[0]
- next_h = insym[1]
- next_c = insym[2]
-
- weightList = keras_layer.get_weights()
- inp_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.input_shape)[0])
-
- kernel_wt = symtab.new_const(weightList[0].transpose([1, 0]))
- recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0]))
- in_bias = symtab.new_const(weightList[2])
-
- units = list(weightList[0].shape)[1]
-
- time_steps = inp_shape[1]
- in_data = _sym.squeeze(in_data, axis=0)
- in_data = _sym.split(in_data, indices_or_sections=time_steps, axis=0)
- #loop for the number of time_steps
- for data in in_data:
- ixh1 = _sym.dense(data, kernel_wt, use_bias=False, units=units)
- ixh2 = _sym.dense(next_h, recurrent_wt, in_bias, use_bias=True, units=units)
- gate = ixh1 + ixh2
- gates = _sym.split(gate, indices_or_sections=4, axis=1)
- in_gate = _convert_recurrent_activation(gates[0], keras_layer)
- in_transform = _convert_recurrent_activation(gates[1], keras_layer)
- next_c = in_transform * next_c + in_gate * _convert_activation(gates[2], keras_layer, None)
- out_gate = _convert_recurrent_activation(gates[3], keras_layer)
- next_h = out_gate * _convert_activation(next_c, keras_layer, None)
-
- out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0])
- out = _sym.reshape(next_h, shape=out_shape)
- return [out, next_h, next_c]
-
-def _convert_simple_rnn(insym, keras_layer, symtab):
- _check_data_format(keras_layer)
- if not isinstance(insym, list):
- buffer = np.zeros((1, keras_layer.units), 'float32')
- prev_sym = symtab.new_const(buffer)
- insym = [insym, prev_sym]
- in_data = insym[0]
- prev_sym = insym[1]
-
- weightList = keras_layer.get_weights()
- kernel_wt = symtab.new_const(weightList[0].transpose([1, 0]))
- recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0]))
- in_bias = symtab.new_const(weightList[2])
- units = list(weightList[0].shape)[1]
-
- in_data = _sym.flatten(in_data)
- ixh = _sym.dense(in_data, kernel_wt, in_bias, use_bias=True, units=units)
- prev_sym = _sym.flatten(prev_sym)
- ixh2 = _sym.dense(prev_sym, recurrent_wt, use_bias=False, units=units)
- output = ixh + ixh2
- output = _convert_activation(output, keras_layer, None)
-
- out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0])
- output = _sym.reshape(output, shape=out_shape)
-
- return [output, output]
-
-def _convert_gru(insym, keras_layer, symtab):
- _check_data_format(keras_layer)
- if not isinstance(insym, list):
- buffer = np.zeros((1, keras_layer.units), 'float32')
- h_tm1 = symtab.new_const(buffer)
- insym = [insym, h_tm1]
- in_data = insym[0]
- h_tm1_sym = insym[1]
-
- weightList = keras_layer.get_weights()
- kernel_wt = symtab.new_const(weightList[0].transpose([1, 0]))
- recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0]))
- in_bias = symtab.new_const(weightList[2])
-
- units = list(weightList[0].shape)[1]
-
- in_data = _sym.flatten(in_data)
- matrix_x = _sym.dense(in_data, kernel_wt, in_bias, use_bias=True, units=units)
-
- # inputs projected by all gate matrices at once
- split_indices = [keras_layer.units, 2 * keras_layer.units]
- gates = _sym.split(matrix_x, indices_or_sections=split_indices, axis=1)
- x_z = gates[0]
- x_r = gates[1]
- x_h = gates[2]
-
- # hidden state projected separately for update/reset and new
- units = 2 * keras_layer.units
- split_indices = [units]
- rec_wts = _sym.split(recurrent_wt, indices_or_sections=split_indices, axis=0)
-
- h_tm1_sym = _sym.flatten(h_tm1_sym)
- matrix_inner = _sym.dense(h_tm1_sym, rec_wts[0], use_bias=False, units=units)
-
- split_indices = [keras_layer.units]
- recurrent = _sym.split(matrix_inner, indices_or_sections=split_indices, axis=1)
- recurrent_z = recurrent[0]
- recurrent_r = recurrent[1]
-
- rec_act_z = _convert_recurrent_activation(x_z + recurrent_z, keras_layer)
- rec_act_r = _convert_recurrent_activation(x_r + recurrent_r, keras_layer)
-
- units = keras_layer.units
- recurrent_h = _sym.dense(rec_act_r * h_tm1_sym, rec_wts[1], use_bias=False, units=units)
- act_hh = _convert_activation(x_h + recurrent_h, keras_layer, None)
-
- # previous and candidate state mixed by update gate
- output = rec_act_z * h_tm1_sym + (1 - rec_act_z) * act_hh
-
- out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0])
- output = _sym.reshape(output, shape=out_shape)
- return [output, output]
-
-def _default_skip(insym, keras_layer, _): # pylint: disable=unused-argument
- """Layers that can be skipped because they are train time only."""
- return insym
-
-
-_convert_map = {
- 'Dense' : _convert_dense,
- 'Activation' : _convert_activation,
- 'ReLU' : _convert_advanced_activation,
- 'LeakyReLU' : _convert_advanced_activation,
- 'PReLU' : _convert_advanced_activation,
- 'ELU' : _convert_advanced_activation,
- 'ThresholdedReLU' : _convert_advanced_activation,
-
- 'AveragePooling2D' : _convert_pooling,
- 'MaxPooling2D' : _convert_pooling,
- 'GlobalAveragePooling2D' : _convert_pooling,
- 'GlobalMaxPooling2D' : _convert_pooling,
- 'Conv2D' : _convert_convolution,
- 'Conv2DTranspose' : _convert_convolution,
- 'DepthwiseConv2D' : _convert_convolution,
- 'SeparableConv2D' : _convert_separable_convolution,
-
- 'Flatten' : _convert_flatten,
- 'Reshape' : _convert_reshape,
- 'Concatenate' : _convert_concat,
- 'BatchNormalization' : _convert_batchnorm,
-
- 'Add' : _convert_merge,
- 'Subtract' : _convert_merge,
- 'Multiply' : _convert_merge,
- 'ZeroPadding2D' : _convert_padding,
- 'UpSampling2D' : _convert_upsample,
- 'Cropping2D' : _convert_cropping,
-
- # 'ZeroPadding1D' : _convert_padding,
- # 'AveragePooling1D' : _convert_pooling,
- # 'MaxPooling1D' : _convert_pooling,
- # 'GlobalAveragePooling1D' : _convert_pooling,
- # 'GlobalMaxPooling1D' : _convert_pooling,
- # 'Cropping1D' : _convert_cropping,
- # 'UpSampling1D' : _convert_upsample,
- # 'UpSampling3D' : _convert_upsample,
- # 'Conv1D' : _convert_convolution1d,
-
- 'SimpleRNN' : _convert_simple_rnn,
- 'LSTM' : _convert_lstm,
- 'GRU' : _convert_gru,
- # 'Bidirectional' : _convert_bidirectional,
- # 'TimeDistributed' : _default_skip,
-
- # 'Average' : _convert_merge,
- # 'Maximum' : _convert_merge,
- # 'Dot' : _convert_merge,
- # 'Permute' : _convert_permute,
- # 'Embedding' : _convert_embedding,
- # 'RepeatVector' : _convert_repeat_vector,
-
- 'InputLayer' : _default_skip,
- 'Dropout' : _default_skip,
- 'SpatialDropout2D' : _default_skip,
- 'SpatialDropout1D' : _default_skip,
-}
-
-
-def _check_unsupported_layers(model):
- for layer in model.layers:
- op_name = type(layer).__name__
- if op_name not in _convert_map:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend Keras.'.format(op_name))
-
-def _as_list(arr):
- """Force being a list, ignore if already is."""
- if isinstance(arr, list):
- return arr
- return [arr]
-
-def keras_op_to_nnvm(insym, keras_layer, outname, symtab):
- """Convert keras layer to nnvm symbol, and update symtab.
-
- Parameters
- ----------
- insym : nnvm.symbol.Symbol or a list of it
- The input nnvm symbol(s)
-
- keras_layer : keras.layers
- The keras layer to be converted
-
- outname : str
- Name of the output nnvm symbol
-
- symtab : nnvm.frontend.common.SymbolTable
- The global symbol table to be updated
- """
- op_name = type(keras_layer).__name__
- if op_name not in _convert_map:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend Keras.'.format(op_name))
- outs = _convert_map[op_name](insym, keras_layer, symtab)
- outs = _as_list(outs)
-
- for t_idx, out in enumerate(outs):
- name = outname + ":" + str(t_idx)
- symtab.set_var(name, out)
-
-def from_keras(model):
- """Convert keras model to NNVM format.
-
- Parameters
- ----------
- model : keras.engine.training.Model
- The keras model to be converted
-
- Returns
- -------
- sym : nnvm.Symbol
- Compatible nnvm symbol
-
- params : dict of str to tvm.NDArray
- The parameter dict to be used by nnvm
- """
- try:
- import keras
- except ImportError:
- raise ImportError('Keras must be installed')
-
- assert isinstance(model, keras.engine.training.Model)
- if keras.backend.backend() != 'tensorflow':
- raise ValueError("Keras frontend currently supports tensorflow backend only.")
- if keras.backend.image_data_format() != 'channels_last':
- raise ValueError("Keras frontend currently supports data_format = channels_last only.")
- _check_unsupported_layers(model)
-
- symtab = SymbolTable()
- for keras_layer in model.layers:
- if isinstance(keras_layer, keras.engine.InputLayer):
- symtab.get_var(keras_layer.name, must_contain=False)
- else:
- inbound_nodes = keras_layer.inbound_nodes if hasattr(keras_layer, 'inbound_nodes') \
- else keras_layer._inbound_nodes if hasattr(keras_layer, '_inbound_nodes') \
- else None
- if inbound_nodes is None:
- raise TypeError("Unknown layer type or unsupported Keras version : {}"
- .format(keras_layer))
- for node_idx, node in enumerate(inbound_nodes):
- # If some nodes in imported model is not relevant to the current model,
- # skip such layers. model._network_nodes contains keys of all nodes relevant
- # to the current model.
- if not model._node_key(keras_layer, node_idx) in model._network_nodes:
- continue
-
- insym = []
-
- # Since Keras allows creating multiple layers from the same name instance,
- # we append node index to the symbol name to make it unique.
- # The one exception is InputLayer. Changing input variable names after conversion
- # would confuse users, so we should keep them as far as possible. Fortunately,
- # they are named uniquely to input_1, input_2, input_3 ... by default.
- zip_node = zip(node.node_indices, node.tensor_indices, node.inbound_layers)
- for n_idx, t_idx, layer in zip_node:
- if isinstance(layer, keras.engine.InputLayer):
- sym = symtab.get_var(layer.name, must_contain=True)
- else:
- sym_name = layer.name + ':' + str(n_idx) + ':' + str(t_idx)
- sym = symtab.get_var(sym_name, must_contain=True)
- insym.append(sym)
-
- if len(insym) == 1:
- insym = insym[0]
- keras_op_to_nnvm(insym, keras_layer, keras_layer.name + ':' + str(node_idx), symtab)
-
- #model._output_coordinates contains out_node(oc[0]), node_index(oc[1]) and tensor index(oc[2])
- #Get all output nodes in symtab using the name made from above values. The out symbols
- #were added to symtab in keras_op_to_nnvm using this name. For multiple outputs, make a list
- #with these output symbols and Group them.
- outsym = [symtab.get_var(oc[0].name + ":" + str(oc[1]) + ":" + str(oc[2]))
- for oc in model._output_coordinates]
-
- tvmparams = {k:tvm.nd.array(np.array(v, dtype=np.float32)) for k, v in symtab.params.items()}
- return _sym.Group(outsym), tvmparams
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, import-self
-"""MXNet symbol frontend."""
-from __future__ import absolute_import as _abs
-import json
-import tvm
-from .. import symbol as _sym
-from .common import get_nnvm_op, required_attr, parse_tshape, parse_bool_str
-
-__all__ = ['from_mxnet']
-
-def _rename(new_name):
- def impl(inputs, attrs):
- return get_nnvm_op(new_name)(*inputs, **attrs)
- return impl
-
-def _pooling(inputs, attrs):
- kernel = parse_tshape(required_attr(attrs, 'kernel', 'pooling'))
- if len(kernel) != 2:
- raise tvm.error.OpAttributeUnImplemented(
- 'Non-2D kernels are not supported for Pool2D.')
- global_pool = 'global' if parse_bool_str(attrs, 'global_pool') else ''
- pool_type = required_attr(attrs, 'pool_type', 'pooling')
- if pool_type not in ['avg', 'max']:
- raise tvm.error.OpNotImplemented(
- 'Only max and average pooling are supported in frontend MXNet.')
- op_name, new_attrs = '_'.join([global_pool, pool_type, 'pool2d']).strip('_'), {}
- # new_attrs['layout'] = 'NCHW'
- if not global_pool:
- new_attrs['pool_size'] = kernel
- new_attrs['strides'] = attrs.get('stride', (1, 1))
- new_attrs['padding'] = attrs.get('pad', (0, 0))
- new_attrs['ceil_mode'] = (attrs.get('pooling_convention', 'valid') == 'full')
- if pool_type == 'avg':
- new_attrs['count_include_pad'] = attrs.get('count_include_pad', True)
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _batch_norm(inputs, attrs):
- if parse_bool_str(attrs, 'output_mean_var'):
- raise tvm.error.OpAttributeUnImplemented(
- 'Attribute "output_mean_var" is not supported in operator batch_norm.')
- # if parse_bool_str(attrs, 'fix_gamma'):
- # _warn_not_used('fix_gamma', 'batch_norm')
- if parse_bool_str(attrs, 'use_global_stats'):
- from warnings import warn
- warn(
- 'Attribute "use_global_stats" is ignored in operator batch_norm.')
- # if parse_bool_str(attrs, 'momentum'):
- # _warn_not_used('momentum', 'batch_norm')
- op_name, new_attrs = 'batch_norm', {}
- new_attrs['axis'] = attrs.get('axis', 1)
- new_attrs['epsilon'] = attrs.get('eps', 0.001)
- new_attrs['center'] = True
- new_attrs['scale'] = not parse_bool_str(attrs, 'fix_gamma', default="False")
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _concat(inputs, attrs):
- op_name = 'concatenate'
- new_attrs = {'axis': attrs.get('dim', 1)}
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _conv2d(inputs, attrs):
- kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d'))
- if len(kernel) != 2:
- raise tvm.error.OpAttributeUnimplemented(
- 'Non-2D kernels are not supported for operator Conv2D.')
- layout = attrs.get('layout', 'NCHW')
- if layout not in ['NCHW', 'NHWC']:
- raise tvm.error.OpAttributeUnimplemented(
- 'Layout {} is not supported in operator Conv2D.'.format(layout))
- if 'kernel_layout' in attrs:
- kernel_layout = attrs['kernel_layout']
- else:
- kernel_layout = 'HWIO' if layout == 'NHWC' else 'OIHW'
- op_name, new_attrs = 'conv2d', {}
- new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d')
- new_attrs['kernel_size'] = kernel
- new_attrs['strides'] = attrs.get('stride', (1, 1))
- new_attrs['padding'] = attrs.get('pad', (0, 0))
- new_attrs['dilation'] = attrs.get('dilate', (1, 1))
- new_attrs['groups'] = attrs.get('num_group', 1)
- new_attrs['layout'] = layout
- new_attrs['kernel_layout'] = kernel_layout
- new_attrs['use_bias'] = attrs.get('no_bias', 'False').strip() == 'False'
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _conv2d_transpose(inputs, attrs):
- if 'target_shape' in attrs:
- raise tvm.error.OpAttributeUnimplemented(
- 'Attribute "target_shape" is not supported in operator Conv2D-transpose.')
- kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d_transpose'))
- if len(kernel) != 2:
- raise tvm.error.OpAttributeInvalid(
- 'Non-2D kernels are not supported in Conv2D-transpose.')
- layout = attrs.get('layout', 'NCHW')
- if layout not in ['NCHW', 'NHWC']:
- raise tvm.error.OpAttributeUnimplemented(
- 'Layout {} is not supported in operator Conv2D-transpose.')
- if 'kernel_layout' in attrs:
- kernel_layout = attrs['kernel_layout']
- else:
- kernel_layout = 'HWIO' if layout == 'NHWC' else 'OIHW'
- op_name, new_attrs = 'conv2d_transpose', {}
- new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d_transpose')
- new_attrs['kernel_size'] = kernel
- new_attrs['strides'] = attrs.get('stride', (1, 1))
- new_attrs['output_padding'] = attrs.get('adj', (0, 0))
- new_attrs['padding'] = attrs.get('pad', (0, 0))
- new_attrs['dilation'] = attrs.get('dilate', (1, 1))
- new_attrs['groups'] = attrs.get('num_group', 1)
- new_attrs['layout'] = layout
- new_attrs['kernel_layout'] = kernel_layout
- new_attrs['use_bias'] = not parse_bool_str(attrs, 'no_bias')
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _dense(inputs, attrs):
- import mxnet as mx
- op_name, new_attrs = 'dense', {}
- new_attrs['units'] = required_attr(attrs, 'num_hidden', 'dense')
- new_attrs['use_bias'] = not parse_bool_str(attrs, 'no_bias')
- try:
- _ = mx.sym.FullyConnected(mx.sym.var('x'), num_hidden=1, flatten=True)
- has_flatten = True
- except mx.base.MXNetError:
- # no flatten attribute in old mxnet
- has_flatten = False
- use_flatten = parse_bool_str(attrs, 'flatten', 'True')
- if has_flatten and use_flatten:
- inputs[0] = _sym.flatten(inputs[0])
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _dropout(inputs, attrs):
- op_name, new_attrs = 'dropout', {}
- new_attrs['rate'] = attrs.get('p', 0.5)
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _leaky_relu(inputs, attrs):
- act_type = required_attr(attrs, 'act_type', 'leaky_relu')
- if act_type in ['leaky', 'prelu']:
- op_name, new_attrs = act_type, {}
- if act_type == 'leaky':
- new_attrs['alpha'] = attrs.get('slope', 0.25)
- sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
- elif act_type == 'elu':
- slope = attrs.get('slope', 0.25)
- sym = -slope * _sym.relu(1 - _sym.exp(*inputs)) + _sym.relu(*inputs)
- elif act_type == 'rrelu':
- lower_bound = float(required_attr(attrs, 'lower_bound', 'leaky_relu'))
- upper_bound = float(required_attr(attrs, 'upper_bound', 'leaky_relu'))
- slope = (lower_bound + upper_bound) / 2.0
- op_name, new_attrs = 'leaky_relu', {'alpha': str(slope)}
- sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend MXNet.'.format(act_type))
- return sym
-
-def _activations(inputs, attrs):
- act_type = required_attr(attrs, 'act_type', 'activations')
- if act_type in ['relu', 'sigmoid', 'tanh']:
- op_name, new_attrs = act_type, {}
- sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
- elif act_type == 'softrelu':
- sym = _sym.log((1 + _sym.exp(*inputs)))
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend MXNet.'.format(act_type))
- return sym
-
-def _reshape(inputs, attrs):
- if parse_bool_str(attrs, 'reverse'):
- raise tvm.error.OpAttributeUnimplemented(
- 'Attribute "reverse" is not supported in operator Reshape.')
- op_name, new_attrs = 'reshape', {}
- new_attrs['shape'] = required_attr(attrs, 'shape', 'reshape')
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _slice(inputs, attrs):
- begin = attrs.get('begin', None)
- end = attrs.get('end', None)
- stride = attrs.get('step', None)
- if begin is None or end is None:
- raise RuntimeError('begin and end are required params')
- if 'None' in begin or 'None' in end:
- raise RuntimeError('None in begin or end not supported yet...')
- new_attrs = {'begin': begin, 'end': end}
- if stride is not None:
- new_attrs['stride'] = stride
- return get_nnvm_op('strided_slice')(inputs[0], **new_attrs)
-
-def _split(inputs, attrs):
- op_name, new_attrs = 'split', {}
- axis = attrs.get('axis', 1)
- new_attrs['indices_or_sections'] = required_attr(attrs, 'num_outputs', 'split')
- new_attrs['axis'] = axis
- outputs = get_nnvm_op(op_name)(*inputs, **new_attrs)
- if parse_bool_str(attrs, 'squeeze_axis'):
- squeeze_attrs = {'axis': axis}
- outputs = _sym.Group([get_nnvm_op('squeeze')(o, **squeeze_attrs) for o in outputs])
- return outputs
-
-def _softmax_activation(inputs, attrs):
- op_name, new_attrs = 'softmax', {}
- mode = attrs.get('mode', 'instance')
- new_attrs['axis'] = 0 if mode == 'instance' else 1
- return get_nnvm_op(op_name)(inputs[0], **new_attrs)
-
-def _softmax_output(inputs, attrs):
- op_name, new_attrs = 'softmax', {}
- if parse_bool_str(attrs, 'multi_output'):
- new_attrs['axis'] = 1
- return get_nnvm_op(op_name)(inputs[0], **new_attrs)
-
-def _upsampling(inputs, attrs):
- scale = attrs.get('scale')
- new_attrs = {'scale':int(scale)}
- return get_nnvm_op('upsampling')(inputs[0], **new_attrs)
-
-def _clip(inputs, attrs):
- op_name, new_attrs = "clip", {}
- new_attrs['a_min'] = required_attr(attrs, 'a_min', 'clip')
- new_attrs['a_max'] = required_attr(attrs, 'a_max', 'clip')
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _contrib_multibox_detection(inputs, attrs):
- clip = parse_bool_str(attrs, 'clip', default='True')
- threshold = attrs.get('threshold') or 0.01
- nms_threshold = attrs.get('nms_threshold') or 0.5
- force_suppress = parse_bool_str(attrs, 'force_suppress', default='False')
- variances = tuple([float(x.strip()) for x in attrs.get('variances').strip('()').split(',')]) \
- if attrs.get('variances') is not None else (0.1, 0.1, 0.2, 0.2)
- nms_topk = attrs.get('nms_topk') or -1
- new_attrs0 = {'clip': clip, 'threshold': float(threshold), 'variances': variances}
- new_attrs1 = {'return_indices': False, 'iou_threshold': float(nms_threshold),
- 'force_suppress': force_suppress, 'top_k': int(nms_topk)}
- data, valid_count = get_nnvm_op('multibox_transform_loc')(inputs[0], inputs[1],
- inputs[2], **new_attrs0)
- return get_nnvm_op('non_max_suppression')(data, valid_count, **new_attrs1)
-
-def _elemwise_sum(inputs, _):
- new_attrs = {'num_args':len(inputs)}
- return get_nnvm_op('elemwise_sum')(*inputs, **new_attrs)
-
-def _crop_like(inputs, attrs):
- new_attrs = {}
- offsets = \
- tuple([float(x.strip()) for x in attrs.get('offsets').strip('()').split(',')]) \
- if attrs.get('offsets') is not None else (0, 0)
- if offsets != (0, 0):
- raise tvm.error.OpAttributeInvalid(
- 'crop_like offsets must equal (0,0).')
- center_crop = parse_bool_str(attrs, 'center_crop', default="False")
- if center_crop:
- raise tvm.error.OpAttributeUnimplemented(
- 'Center crop is not supported in operator crop_like.')
- if len(inputs) < 2:
- raise tvm.error.OpAttributeUnimplemented("Only support crop_like pattern.")
- new_attrs["axis"] = [2, 3]
- return get_nnvm_op('slice_like')(inputs[0], inputs[1], **new_attrs)
-
-
-def _expand_dims(inputs, attrs):
- op_name, new_attrs = 'expand_dims', {}
- new_attrs['axis'] = required_attr(attrs, 'axis', 'expand_dims')
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _lrn(inputs, attrs):
- op_name, new_attrs = 'lrn', {}
- new_attrs['alpha'] = attrs.get('alpha', 0.0001)
- new_attrs['beta'] = attrs.get('beta', 0.75)
- new_attrs['bias'] = attrs.get('knorm', 2)
- # NCHW format and normalization along channel axis
- new_attrs['axis'] = 1
- new_attrs['size'] = required_attr(attrs, 'nsize', 'lrn')
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _minimum(inputs, attrs):
- return get_nnvm_op('broadcast_min')(*inputs, **attrs)
-
-def _maximum(inputs, attrs):
- return get_nnvm_op('broadcast_max')(*inputs, **attrs)
-
-def _ones(_, attrs):
- op_name = 'ones'
- return get_nnvm_op(op_name)(**attrs)
-
-def _zeros(_, attrs):
- op_name = 'zeros'
- return get_nnvm_op(op_name)(**attrs)
-
-def _argmax(inputs, attrs):
- op_name, new_attrs = 'argmax', {}
- new_attrs['dtype'] = 'float32'
- new_attrs['axis'] = attrs.get('axis', 0)
- new_attrs['keepdims'] = parse_bool_str(attrs, 'keepdims', default="False")
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _argmin(inputs, attrs):
- op_name, new_attrs = 'argmin', {}
- new_attrs['dtype'] = 'float32'
- new_attrs['axis'] = attrs.get('axis', 0)
- new_attrs['keepdims'] = parse_bool_str(attrs, 'keepdims', default="False")
- return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-_identity_list = ['__add_scalar__', '__add_symbol__', '__div_scalar__',
- '__div_symbol__', '__mul_scalar__', '__mul_symbol__',
- '__pow_scalar__', '__rdiv_scalar__', '__rpow_scalar__',
- '__rsub_scalar__', '__sub_scalar__', '__sub_symbol__',
- 'broadcast_add', 'broadcast_div', 'broadcast_mul',
- 'broadcast_sub', 'broadcast_to', 'cast', 'elemwise_add',
- 'elemwise_div', 'elemwise_mul', 'elemwise_sub', 'exp',
- 'flatten', 'log', 'log_softmax', 'max', 'min', 'negative',
- 'ones_like', 'relu', 'sigmoid', 'slice_like', 'softmax',
- 'sum', 'tanh', 'transpose', 'zeros_like', 'gather_nd',
- 'reshape_like', 'where']
-
-_convert_map = {
- '_copy' : _rename('copy'),
- '_div_scalar' : _rename('__div_scalar__'),
- '_minus_scalar' : _rename('__sub_scalar__'),
- '_mul_scalar' : _rename('__mul_scalar__'),
- '_plus_scalar' : _rename('__add_scalar__'),
- '_rdiv_scalar' : _rename('__rdiv_scalar__'),
- '_rminus_scalar': _rename('__rsub_scalar__'),
- '_contrib_MultiBoxPrior' : _rename('multibox_prior'),
- '_contrib_MultiBoxDetection' : _contrib_multibox_detection,
- '_minimum' : _minimum,
- '_maximum' : _maximum,
- '_ones' : _ones,
- '_zeros' : _zeros,
- 'argmax' : _argmax,
- 'argmin' : _argmin,
- 'Activation' : _activations,
- 'BatchNorm' : _batch_norm,
- 'BatchNorm_v1' : _batch_norm,
- 'Cast' : _rename('cast'),
- 'Concat' : _concat,
- 'Convolution' : _conv2d,
- 'Convolution_v1': _conv2d,
- 'Crop' : _crop_like,
- 'Deconvolution' : _conv2d_transpose,
- 'Dropout' : _dropout,
- 'Flatten' : _rename('flatten'),
- 'FullyConnected': _dense,
- 'LeakyReLU' : _leaky_relu,
- 'Pooling' : _pooling,
- 'Pooling_v1' : _pooling,
- 'Reshape' : _reshape,
- 'slice' : _slice,
- 'SliceChannel' : _split,
- 'split' : _split,
- 'Softmax' : _rename('softmax'),
- 'SoftmaxActivation' : _softmax_activation,
- 'SoftmaxOutput' : _softmax_output,
- 'add_n' : _elemwise_sum,
- 'concat' : _concat,
- 'max_axis' : _rename('max'),
- 'min_axis' : _rename('min'),
- 'reshape' : _reshape,
- 'sum_axis' : _rename('sum'),
- 'UpSampling' : _upsampling,
- 'clip' : _clip,
- 'expand_dims' : _expand_dims,
- 'LRN' : _lrn
-}
-
-def _convert_symbol(op_name, inputs, attrs,
- identity_list=None,
- convert_map=None):
- """Convert from mxnet op to nnvm op.
- The converter must specify some conversions explicitly to
- support gluon format ops such as conv2d...
-
- Parameters
- ----------
- op_name : str
- Operator name, such as Convolution, FullyConnected
- inputs : list of nnvm.Symbol
- List of input symbols.
- attrs : dict
- Dict of operator attributes
- identity_list : list
- List of operators that don't require conversion
- convert_map : dict
- Dict of name : callable, where name is the op's name that
- require conversion to nnvm, callable are functions which
- take attrs and return (new_op_name, new_attrs)
-
- Returns
- -------
- sym : nnvm.Symbol
- Converted nnvm Symbol
- """
- identity_list = identity_list if identity_list else _identity_list
- convert_map = convert_map if convert_map else _convert_map
- if op_name in identity_list:
- op = get_nnvm_op(op_name)
- sym = op(*inputs, **attrs)
- elif op_name in convert_map:
- sym = convert_map[op_name](inputs, attrs)
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend MXNet.'.format(op_name))
- return sym
-
-def _as_list(arr):
- """Force being a list, ignore if already is."""
- if isinstance(arr, list):
- return arr
- return [arr]
-
-def _topo_sort(symbol):
- """Sort all symbols in the mxnet graph in topological order.
-
- Parameters
- ----------
- symbol : mxnet.sym.Symbol
-
- Returns:
- -------
- list
- List of mxnet symbol
- """
- queue = []
- symbol_map = {}
- deps = {}
- dep_cnts = {}
- for s in symbol:
- symbol_map[s.attr('name')] = s
- queue.append(s)
- while queue:
- sym = queue.pop(0)
- name = sym.attr('name')
- childs = sym.get_children()
- if childs is None:
- dep_cnts[name] = 0
- else:
- dep_cnts[name] = len({c.attr('name') for c in childs})
- for child in childs:
- child_name = child.attr('name')
- if child_name not in deps:
- deps[child_name] = set()
- deps[child_name].add(name)
- if child_name not in symbol_map:
- symbol_map[child_name] = child
- queue.append(child)
- order = []
- while dep_cnts:
- remove = []
- for name in dep_cnts:
- if dep_cnts[name] == 0:
- order.append(symbol_map[name])
- remove.append(name)
- if name in deps:
- for other in deps[name]:
- dep_cnts[other] -= 1
- for name in remove:
- del dep_cnts[name]
- return order
-
-def _from_mxnet_impl(symbol, graph):
- """Convert mxnet symbol to nnvm implementation.
- Reconstruct a nnvm symbol by traversing the mxnet symbol.
-
- Parameters
- ----------
- symbol : mxnet.sym.Symbol
- Incompatible symbol from mxnet, sharing similar graph structure.
- The op_name and attrs inside are not always compatible.
- graph : dict
- Reusable nodes are stored in graph.
-
- Returns:
- -------
- nnvm.sym.Symbol
- Converted symbol
- """
- def get_node(sym):
- name = sym.attr('name')
- if name not in graph:
- return None
- output_index = json.loads(sym.tojson())['heads'][0][1]
- return graph[name][output_index]
-
- assert symbol is not None
- # Traverse all symbols in topological order
- for sym in _topo_sort(symbol):
- name = sym.attr('name')
- attr = sym.list_attr()
- op_name = sym.attr('op_name')
- childs = sym.get_children()
- if childs is not None:
- childs = [get_node(child) for child in childs]
- childs = [x for y in childs for x in _as_list(y)]
- node = _convert_symbol(op_name, childs, attr)
- elif op_name != 'null':
- node = _convert_symbol(op_name, [], attr)
- else:
- node = _sym.Variable(name=name, **attr)
- graph[name] = node
- nodes = []
- for sym in symbol:
- node = get_node(sym)
- assert node is not None
- nodes.append(node)
- if len(nodes) > 1:
- return _sym.Group(nodes)
- return nodes[0]
-
-def from_mxnet(symbol, arg_params=None, aux_params=None):
- """Convert from MXNet's model into compatible NNVM format.
-
- Parameters
- ----------
- symbol : mxnet.Symbol or mxnet.gluon.HybridBlock
- MXNet symbol
-
- arg_params : dict of str to mx.NDArray
- The argument parameters in mxnet
-
- aux_params : dict of str to mx.NDArray
- The auxiliary parameters in mxnet
-
- Returns
- -------
- sym : nnvm.Symbol
- Compatible nnvm symbol
-
- params : dict of str to tvm.NDArray
- The parameter dict to be used by nnvm
- """
- try:
- import mxnet as mx
- except ImportError as e:
- raise ImportError('{}. MXNet is required to parse symbols.'.format(e))
-
- if isinstance(symbol, mx.sym.Symbol):
- sym = _from_mxnet_impl(symbol, {})
- params = {}
- arg_params = arg_params if arg_params else {}
- aux_params = aux_params if aux_params else {}
- for k, v in arg_params.items():
- params[k] = tvm.nd.array(v.asnumpy())
- for k, v in aux_params.items():
- params[k] = tvm.nd.array(v.asnumpy())
- elif isinstance(symbol, mx.gluon.HybridBlock):
- data = mx.sym.Variable('data')
- sym = symbol(data)
- sym = _from_mxnet_impl(sym, {})
- params = {}
- for k, v in symbol.collect_params().items():
- params[k] = tvm.nd.array(v.data().asnumpy())
- elif isinstance(symbol, mx.gluon.Block):
- raise NotImplementedError("Only Hybrid Blocks are supported now.")
- else:
- msg = "mxnet.Symbol or gluon.HybridBlock expected, got {}".format(type(symbol))
- raise ValueError(msg)
- if isinstance(sym, list):
- sym = _sym.Group(sym)
- return sym, params
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines
-"""ONNX: Open Neural Network Exchange frontend."""
-from __future__ import absolute_import as _abs
-import numpy as np
-import tvm
-from .. import symbol as _sym
-from .common import get_nnvm_op, Renamer, SymbolTable, AttrConverter as AttrCvt
-from .onnx_caffe2_utils import dimension_picker, dimension_constraint, \
- infer_channels, revert_caffe2_pad
-
-__all__ = ['from_onnx']
-
-
-def onnx_storage_order2layout(storage_order):
- if storage_order not in (0, 1):
- raise tvm.error.OpAttributeInvalid('Mode of storage_order must be either 0 or 1')
-
- return 'NCHW' if storage_order == 0 else 'NHWC'
-
-
-class OnnxOpConverter(object):
- """ A helper class for holding onnx op converters.
- """
-
- @classmethod
- def get_converter(cls, opset):
- """ Get converter matches given opset.
-
- :param opset: opset from model.
- :return: converter, which should be `_impl_vx`. Number x is the biggest
- number smaller than or equal to opset belongs to all support versions.
- """
- versions = [
- int(d.replace('_impl_v', '')) for d in dir(cls) if '_impl_v' in d
- ]
- versions = sorted(versions + [opset])
- version = versions[
- max([i for i, v in enumerate(versions) if v == opset]) - 1]
- if hasattr(cls, '_impl_v{}'.format(version)):
- return getattr(cls, '_impl_v{}'.format(version))
- raise NotImplementedError(
- 'opset version {} of {} not implemented'.format(
- version, cls.__name__))
-
-
-class Elemwise(OnnxOpConverter):
- """ A helper class for elemwise op converters.
- """
-
- name = ''
-
- @classmethod
- def _math_name_picker(cls, suffix):
-
- def _impl(attr):
- if attr.get('broadcast', 0):
- return 'broadcast_' + suffix
- return 'elemwise_' + suffix
-
- return _impl
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- assert len(inputs) == 2, "Math op take 2 inputs, {} given".format(
- len(inputs))
- op_name = cls._math_name_picker(cls.name)(attr)
- axis = int(attr.get('axis', 0))
- conv_ops = ["conv2d", "conv2d_transpose"]
- if op_name == 'broadcast_add' and inputs[0].attr('op_name') in conv_ops:
- # TODO(zhreshold): remove hard coded infershape
- inputs[1] = _sym.expand_dims(inputs[1], axis=axis, num_newaxis=2)
- return get_nnvm_op(op_name)(*inputs)
-
-
-class Pool(OnnxOpConverter):
- """ A helper class for pool op converters.
- """
-
- name = ''
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- return AttrCvt(
- op_name=dimension_picker(cls.name),
- transforms={
- 'kernel_shape': 'pool_size',
- 'pads': ('padding', (0, 0), revert_caffe2_pad)
- },
- # very weird attributes here in onnx, force check
- ignores=['dilations'],
- # TODO(zhreshold): make sure ceil_mode in onnx, and layout?
- extras={'ceil_mode': False},
- custom_check=dimension_constraint())(inputs, attr, params)
-
-
-class Absolute(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- return _sym.relu(inputs[0]) + _sym.relu(_sym.negative(inputs[0]))
-
-
-class Add(Elemwise):
- name = 'add'
-
-
-class AveragePool(Pool):
- name = 'avg_pool'
-
-
-class BatchNorm(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- # TODO(zhreshold): 'spatial' is not properly handled here.
- return AttrCvt(
- op_name='batch_norm',
- disables=['momentum'],
- ignores=['spatial', 'is_test', 'consumed_inputs'])(inputs, attr,
- params)
-
-
-class Conv(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- # get number of channels
- channels = infer_channels(inputs[1], params)
- attr['channels'] = channels
- return AttrCvt(
- op_name=dimension_picker('conv'),
- transforms={
- 'kernel_shape': 'kernel_size',
- 'dilations': ('dilation', (0, 0)),
- 'pads': ('padding', (0, 0), revert_caffe2_pad),
- 'group': ('groups', 1)
- },
- extras={'use_bias': len(inputs) == 3},
- custom_check=dimension_constraint())(inputs, attr, params)
-
-
-class ConvTranspose(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- # get number of channels
- channels = infer_channels(inputs[1], params, True)
- attr['channels'] = channels
- groups = attr.pop('group')
- attr['groups'] = groups
- return AttrCvt(
- op_name=dimension_picker('conv', '_transpose'),
- transforms={
- 'kernel_shape': 'kernel_size',
- 'dilations': ('dilation', (0, 0)),
- 'pads': ('padding', (0, 0), revert_caffe2_pad)
- },
- disables=['output_shape'],
- extras={'use_bias': len(inputs) == 3},
- custom_check=dimension_constraint())(inputs, attr, params)
-
-
-class Div(Elemwise):
- name = 'div'
-
-
-class Elu(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- alpha = float(attr.get('alpha', 1.0))
- return -alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(
- inputs[0])
-
-
-class Gemm(OnnxOpConverter):
- """ Operator converter for Gemm.
- """
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- assert len(inputs) == 3, "Gemm op take 3 inputs, {} given".format(
- len(inputs))
- # Y = alpha * A * B + beta * C
- alpha = float(attr.get('alpha', 1.0))
- beta = float(attr.get('beta', 1.0))
- transA = int(attr.get('transA', 0))
- transB = int(attr.get('transB', 0))
- # get number of channels
- channels = infer_channels(inputs[1], params, not transB)
- if transA:
- inputs[0] = _sym.transpose(inputs[0], axes=(1, 0))
- if not transB:
- inputs[1] = _sym.transpose(inputs[1], axes=(1, 0))
- inputs[0] = _sym.flatten(inputs[0])
- return _sym.dense(
- alpha * inputs[0], inputs[1], beta * inputs[2], units=channels)
-
-
-class MaxPool(Pool):
- """ Operator converter for MaxPool
- """
- name = 'max_pool'
-
- @classmethod
- def _impl_v8(cls, inputs, attr, params):
- return AttrCvt(
- op_name=dimension_picker(cls.name),
- transforms={
- 'kernel_shape': 'pool_size',
- 'pads': ('padding', (0, 0), revert_caffe2_pad),
- 'storage_order': ('layout', 'NCHW', onnx_storage_order2layout),
- },
- # very weird attributes here in onnx, force check
- ignores=['dilations', 'auto_pad'],
- # TODO(higumachan): make sure ceil_mode in onnx, and layout?
- extras={'ceil_mode': False},
- custom_check=dimension_constraint())(inputs, attr, params)
-
- @classmethod
- def _impl_v10(cls, inputs, attr, params):
- return AttrCvt(
- op_name=dimension_picker(cls.name),
- transforms={
- 'kernel_shape': 'pool_size',
- 'pads': ('padding', (0, 0), revert_caffe2_pad),
- 'storage_order': ('layout', 'NCHW', onnx_storage_order2layout),
- 'ceil_mode': 'ceil_mode'
- },
- # very weird attributes here in onnx, force check
- ignores=['dilations', 'auto_pad'],
- custom_check=dimension_constraint())(inputs, attr, params)
-
-class Mul(Elemwise):
- name = 'mul'
-
-
-class Pad(OnnxOpConverter):
- """ Operator converter for Pad.
- """
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- pad_width = []
- pads = attr.pop('paddings')
- dims = int(len(pads) / 2)
- for i in range(dims):
- pad_width.append((pads[i], pads[i+dims]))
- attr['pad_width'] = pad_width
-
- return AttrCvt(
- op_name='pad',
- transforms={
- 'value': 'pad_value',
- },
- ignores=['mode'],
- custom_check=(lambda attrs: attrs.get('mode', 'constant').decode("utf-8") == 'constant',
- 'split mode != constant'))(inputs, attr, params)
-
- @classmethod
- def _impl_v2(cls, inputs, attr, params):
- pad_width = []
- pads = attr.pop('pads')
- dims = int(len(pads) / 2)
- for i in range(dims):
- pad_width.append((pads[i], pads[i+dims]))
- attr['pad_width'] = pad_width
-
- return AttrCvt(
- op_name='pad',
- transforms={
- 'value': 'pad_value',
- },
- ignores=['mode'],
- custom_check=(lambda attrs: attrs.get('mode', 'constant').decode("utf-8") == 'constant',
- 'split mode != constant'))(inputs, attr, params)
-
-
-class ParametricSoftPlus(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- alpha = float(attr.get('alpha', 1.0))
- beta = float(attr.get('beta', 1.0))
- return _sym.log(_sym.exp(beta * inputs[0]) + 1) * alpha
-
-
-class Prelu(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- assert len(inputs) == 2, "Prelu need 2 inputs, {} given".format(
- len(inputs))
- return _sym.prelu(inputs[0], inputs[1])
-
-
-class Reciprocal(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- return 1.0 / inputs[0]
-
-
-class Reshape(OnnxOpConverter):
- """ Operator converter for Reshape.
- """
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- return _sym.reshape(inputs[0], shape=attr['shape'])
-
- @classmethod
- def _impl_v5(cls, inputs, attr, params):
- if inputs[1].list_output_names()[0] in params:
- shape = tuple(params[inputs[1].list_output_names()[0]].asnumpy())
- out = _sym.reshape(inputs[0], shape=shape)
- else:
- out = _sym.reshape_like(inputs[0], inputs[1])
-
- return out
-
-class Scale(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- scale = float(attr.get('scale', 1.0))
- return inputs[0] * scale
-
-
-class Selu(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- alpha = float(attr.get('alpha', 1.6732))
- gamma = float(attr.get('gamma', 1.0507))
- return gamma * (
- -alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(inputs[0]))
-
-
-class ScaledTanh(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- alpha = float(attr.get('alpha', 1.0))
- beta = float(attr.get('beta', 1.0))
- return _sym.tanh(beta * inputs[0]) * alpha
-
-
-class SoftPlus(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- return _sym.log(_sym.exp(inputs[0]) + 1)
-
-
-class Softsign(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- return inputs[0] / (1 + Absolute.get_converter(1)(inputs, attr, params))
-
-
-class Sub(Elemwise):
- name = 'sub'
-
-
-class Sum(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- # Onnx Sum Operator
- for in_index in range(len(inputs) - 1):
- inputs[in_index + 1] = _sym.broadcast_add(inputs[in_index],
- inputs[in_index + 1])
-
- return inputs[len(inputs) - 1]
-
-
-class ThresholdedRelu(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- alpha = float(attr.get('alpha', 1.0))
- alpha_tensor = _sym.full_like(inputs[0], fill_value=float(alpha))
- return _sym.elemwise_mul(inputs[0], _sym.greater(inputs[0], alpha_tensor))
-
-class ImageScaler(OnnxOpConverter):
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- channelScale = attr['scale']
- bias_attr = attr['bias']
- bias = SymbolTable().new_const(np.array(bias_attr).reshape([3, 1, 1]))
- scaledChannel = _sym.__mul_scalar__(inputs[0], scalar=channelScale)
- ret = _sym.broadcast_add(scaledChannel, bias)
- return ret
-
-
-def _broadcast_constraint():
-
- def _broadcast_check(attrs):
- if attrs.get('axis', None):
- return False
- return True
-
- return _broadcast_check, "Specifying broadcast axis not allowed."
-
-
-def _fully_connected(opset):
-
- def _impl(inputs, attr, params):
- # get number of channels
- channels = infer_channels(inputs[1], params)
- attr['units'] = channels
- return AttrCvt('dense', ignores=['axis', 'axis_w'])(inputs, attr)
-
- return _impl
-
-
-class Upsample(OnnxOpConverter):
- """ Operator converter for Upsample (nearest mode).
- """
-
- @classmethod
- def _impl_v9(cls, inputs, attr, params):
- scales = attr.get('scales')
- if not scales:
- #Here we are going to higher OPSET version.
- assert len(inputs) == 2, "Upsample op take 2 inputs, {} given".format(len(inputs))
- input_name = inputs[1].list_input_names()[0]
- scales = params[input_name].asnumpy()
- inputs = inputs[:1]
- assert len(scales) == 4 and scales[0] == 1.0 and scales[1] == 1.0 and scales[2] == scales[3]
- mode = attr.get('mode')
- if mode == b'nearest':
- method = "NEAREST_NEIGHBOR"
- elif mode == b'linear':
- method = "BILINEAR"
- else:
- raise tvm.error.OpAttributeInvalid(
- 'Value {} in attribute "mode" of operator Upsample is not valid.'.format(mode))
- return _sym.upsampling(inputs[0], scale=int(scales[-1]), method=method, layout='NCHW')
-
-
-class Shape(OnnxOpConverter):
- """ Operator converter for Shape.
- """
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- # Result of this operator is prominently used by reshape operator.
- # Just pass the input as it is so that reshape_like can be used there.
- print("Shape: Differently implemented in NNVM as a bypass (dummy operator)")
- return inputs[0]
-
-class Cast(OnnxOpConverter):
- """ Operator converter for Cast.
- """
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- return AttrCvt(op_name='cast', transforms={'to': 'dtype'})(inputs, attr)
-
- @classmethod
- def _impl_v5(cls, inputs, attr, params):
- try:
- from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE
- attr['to'] = TENSOR_TYPE_TO_NP_TYPE[attr['to']]
- except ImportError as e:
- raise ImportError(
- "Unable to import onnx.mapping which is required {}".format(e))
- return AttrCvt(op_name='cast', transforms={'to': 'dtype'})(inputs, attr)
-
-
-class Unsqueeze(OnnxOpConverter):
- """ Operator converter for Unsqueeze.
- """
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- for axes in attr['axes']:
- inputs[0] = _sym.expand_dims(inputs[0], axis=axes, num_newaxis=1)
- return inputs[0]
-
-
-class Split(OnnxOpConverter):
- """ Operator converter for Split.
- """
-
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- attr['indices_or_sections'] = []
- index = 0
- for i in attr['split'][:-1]:
- index += i
- attr['indices_or_sections'].append(index)
- return AttrCvt(
- op_name='split',
- ignores=['split'])(inputs, attr, params)
-
-
-class Slice(OnnxOpConverter):
- """ Operator converter for Slice.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- if isinstance(attr['starts'], int):
- attr['starts'] = (attr['starts'],)
- attr['ends'] = (attr['ends'],)
-
- try:
- # Update the starts and ends according to axes if required.
- if isinstance(attr['axes'], int):
- attr['axes'] = (attr['axes'],)
-
- if (max(attr['axes']) + 1) != len(attr['axes']):
- new_axes = []
- new_starts = []
- new_ends = []
- pop_index = 0
- for i in range(max(attr['axes']) + 1):
- if i in attr['axes']:
- new_axes.append(i)
- new_starts.append(attr['starts'][pop_index])
- new_ends.append(attr['ends'][pop_index])
- pop_index += 1
- else:
- new_axes.append(i)
- new_starts.append(0)
- new_ends.append(np.iinfo(np.int32).max)
- attr['axes'] = new_axes
- attr['starts'] = new_starts
- attr['ends'] = new_ends
- except KeyError:
- pass
-
- return AttrCvt(op_name='strided_slice',
- transforms={'starts': 'begin',
- 'ends': 'end'},
- ignores=['axes'])(inputs, attr)
-
-class Gather(OnnxOpConverter):
- """ Operator converter for Gather.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- axis = attr.get('axis', 0)
- return AttrCvt(op_name='take',
- extras={'axis':axis})(inputs, attr)
-
-class LRN(OnnxOpConverter):
- """ Operator converter for Local Response Normalization.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- """LRN support only NCHW format
- https://github.com/onnx/onnx/blob/master/docs/Operators.md#LRN
- """
- axis = 1
- alpha = attr.get('alpha', 0.0001)
- beta = attr.get('beta', 0.75)
- bias = attr.get('bias', 1.0)
- nsize = attr.get('size')
- return _sym.lrn(inputs[0], size=nsize, axis=axis,
- alpha=alpha, beta=beta, bias=bias)
-
-class Maximum(OnnxOpConverter):
- """ Operator converter for Maximum.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- if not isinstance(inputs, list) or len(inputs) < 2:
- raise ValueError("Expect minimum 2 inputs")
- _max = inputs[0]
- for i in range(1, len(inputs)):
- _max = AttrCvt(op_name='broadcast_max')([_max, inputs[i]], {})
- return _max
-
-class Minimum(OnnxOpConverter):
- """ Operator converter for Minimum.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- if not isinstance(inputs, list) or len(inputs) < 2:
- raise ValueError("Expect minimum 2 inputs")
- _min = inputs[0]
- for i in range(1, len(inputs)):
- _min = AttrCvt(op_name='broadcast_min')([_min, inputs[i]], {})
- return _min
-
-class Mean(OnnxOpConverter):
- """ Operator converter for Mean.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- if not isinstance(inputs, list) or len(inputs) < 2:
- raise ValueError("Expect minimum 2 inputs")
- count = len(inputs)
- _sum = inputs[0]
- for i in range(1, count):
- _sum = AttrCvt(op_name='broadcast_add')([_sum, inputs[i]], {})
- return _sum / count
-
-class HardSigmoid(OnnxOpConverter):
- """ Operator converter for HardSigmoid.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- alpha = attr.get('alpha', 0.2)
- beta = attr.get('beta', 0.5)
- transformX = (inputs[0] * alpha) + beta
- attr = {'a_min':0, 'a_max':1}
- return AttrCvt(op_name='clip')([transformX], attr)
-
-class ArgMax(OnnxOpConverter):
- """ Operator converter for ArgMax.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- axis = attr.get('axis', 0)
- keepdims = attr.get('keepdims', True)
- attr = {'axis':axis, 'keepdims':keepdims}
- return AttrCvt(op_name='argmax')(inputs, attr)
-
-class ArgMin(OnnxOpConverter):
- """ Operator converter for ArgMin.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- axis = attr.get('axis', 0)
- keepdims = attr.get('keepdims', True)
- attr = {'axis':axis, 'keepdims':keepdims}
- return AttrCvt(op_name='argmin')(inputs, attr)
-
-class Softmax(OnnxOpConverter):
- """ Operator converter for Softmax.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- # set default value when axis is not set in the model
- if 'axis' not in attr:
- attr['axis'] = 1
- return AttrCvt(
- op_name='softmax',
- transforms={
- 'axis': ('axis', 1),
- })(inputs, attr, params)
-
-class ConstantFill(OnnxOpConverter):
- """ Operator converter for ConstantFill.
- """
- @classmethod
- def _impl_v1(cls, inputs, attr, params):
- is_full = True
- num_inputs = len(inputs)
- if 'shape' in attr:
- if num_inputs > 0:
- raise ImportError(
- "Can't set shape and input tensor at a time")
- shape = attr.pop('shape')
- else:
- if num_inputs == 0:
- raise ImportError(
- "Either shape attribute or input should be set")
- if 'input_as_shape' in attr and attr['input_as_shape']:
- shape = params[inputs[0].list_output_names()[0]].asnumpy()
- else:
- is_full = False
-
- if not is_full:
- if 'extra_shape' in attr:
- raise ImportError(
- "Extra Shape not supported with fill_like")
-
- out = AttrCvt(
- op_name='full_like',
- transforms={'value': 'fill_value'},
- ignores=['dtype'])(inputs, attr)
- return _sym.cast(out, dtype=attr['dtype'].decode("utf-8"))
- if 'extra_shape' in attr:
- shape = shape + attr.pop('extra_shape')
-
- return AttrCvt(
- op_name='full',
- transforms={'value': 'fill_value'},
- extras={'shape':shape})(inputs, attr)
-
-# compatible operators that do NOT require any conversion.
-_identity_list = []
-
-
-# _convert_map defines maps of name to converter functor(callable)
-# for 1 to 1 mapping, use Renamer if nothing but name is different
-# use AttrCvt if attributes need to be converted
-# for 1 to N mapping(composed), use custom callable functions
-# for N to 1 mapping, currently not supported(?)
-def _get_convert_map(opset):
- return {
- # defs/experimental
- 'Identity': Renamer('copy'),
- # 'Affine'
- 'ThresholdedRelu': ThresholdedRelu.get_converter(opset),
- 'ScaledTanh': ScaledTanh.get_converter(opset),
- 'ParametricSoftplus': ParametricSoftPlus.get_converter(opset),
- 'ConstantFill': ConstantFill.get_converter(opset),
- # 'GivenTensorFill'
- 'FC': AttrCvt('dense', ignores=['axis', 'axis_w']),
- 'Scale': Scale.get_converter(opset),
- # 'GRUUnit'
- # 'ATen'
- 'ImageScaler': ImageScaler.get_converter(opset),
- # 'MeanVarianceNormalization'
- # 'Crop'
- # 'Embedding'
- 'Upsample' : Upsample.get_converter(opset),
- 'SpatialBN': BatchNorm.get_converter(opset),
-
- # defs/generator
- # 'Constant' # Implemented
- # 'RandomUniform'
- # 'RandomNormal'
- # 'RandomUniformLike'
- # 'RandomNormalLike'
-
- # defs/logical
-
- # defs/math
- 'Add': Add.get_converter(opset),
- 'Sub': Sub.get_converter(opset),
- 'Mul': Mul.get_converter(opset),
- 'Div': Div.get_converter(opset),
- 'Neg': Renamer('negative'),
- 'Abs': Absolute.get_converter(opset),
- 'Reciprocal': Reciprocal.get_converter(opset),
- 'Floor': Renamer('floor'),
- 'Ceil': Renamer('ceil'),
- 'Sqrt': Renamer('sqrt'),
- 'Relu': Renamer('relu'),
- 'LeakyRelu': Renamer('leaky_relu'),
- 'Selu': Selu.get_converter(opset),
- 'Elu': Elu.get_converter(opset),
- 'Exp': Renamer('exp'),
- 'Log': Renamer('log'),
- 'Tanh': Renamer('tanh'),
- 'Pow': Renamer('broadcast_pow'),
- 'PRelu': Prelu.get_converter(opset),
- 'Sigmoid': Renamer('sigmoid'),
- 'HardSigmoid': HardSigmoid.get_converter(opset),
- 'Max': Maximum.get_converter(opset),
- 'Min': Minimum.get_converter(opset),
- 'Sum': Sum.get_converter(opset),
- 'Mean': Mean.get_converter(opset),
- 'Clip': AttrCvt('clip', transforms={'min': 'a_min', 'max': 'a_max'}),
- # softmax default axis is different in onnx
- 'Softmax': Softmax.get_converter(opset),
- 'LogSoftmax': AttrCvt('log_softmax', {'axis': ('axis', 1)}),
- # 'Hardmax'
- 'Softsign': Softsign.get_converter(opset),
- 'SoftPlus': SoftPlus.get_converter(opset),
- 'Gemm': Gemm.get_converter(opset),
- 'MatMul': Renamer('matmul'),
-
- # defs/nn
- 'AveragePool': AveragePool.get_converter(opset),
- 'MaxPool': MaxPool.get_converter(opset),
- 'Conv': Conv.get_converter(opset),
- 'ConvTranspose': ConvTranspose.get_converter(opset),
- 'GlobalAveragePool': Renamer('global_avg_pool2d'),
- 'GlobalMaxPool': Renamer('global_max_pool2d'),
- 'BatchNormalization': BatchNorm.get_converter(opset),
- # 'InstanceNormalization'
- # 'LpNormalization'
- 'Dropout': AttrCvt('dropout', {'ratio': 'rate'}, ignores=['is_test']),
- 'Flatten': Renamer('flatten'),
- 'LRN': LRN.get_converter(opset),
-
- # defs/reduction
- 'ReduceMax': AttrCvt('max', {'axes': 'axis'}),
- 'ReduceMin': AttrCvt('min', {'axes': 'axis'}),
- 'ReduceSum': AttrCvt('sum', {'axes': 'axis'}),
- 'ReduceMean': AttrCvt('mean', {'axes': 'axis'}),
- # 'ReduceProd'
- # 'ReduceLogSumExp'
- 'ArgMax': ArgMax.get_converter(opset),
- 'ArgMin': ArgMin.get_converter(opset),
-
- # defs/tensor
- 'Cast': Cast.get_converter(opset),
- 'Reshape': Reshape.get_converter(opset),
- 'Concat': Renamer('concatenate'),
- 'Split': Split.get_converter(opset),
- 'Slice': Slice.get_converter(opset),
- 'Transpose': AttrCvt('transpose', {'perm': 'axes'}),
- 'Gather': Gather.get_converter(opset),
- 'Squeeze': AttrCvt('squeeze', {'axes': 'axis'}),
- 'Unsqueeze': Unsqueeze.get_converter(opset),
- 'Pad': Pad.get_converter(opset),
- 'Shape': Shape.get_converter(opset),
- }
-
-
-class GraphProto(object):
- """A helper class for handling nnvm graph copying from pb2.GraphProto.
- Definition: https://github.com/onnx/onnx/blob/master/onnx/onnx.proto
- """
-
- def __init__(self):
- self._nodes = {}
- self._params = {}
- self._renames = {}
- self._num_input = 0
- self._num_param = 0
-
- def from_onnx(self, graph, opset):
- """Construct nnvm nodes from onnx graph.
- The inputs from onnx graph is vague, only providing "1", "2"...
- For convenience, we rename the `real` input names to "input_0",
- "input_1"... And renaming parameters to "param_0", "param_1"...
-
- Parameters
- ----------
- graph : onnx protobuf object
- The loaded onnx graph
- opset : opset version
-
- Returns
- -------
- sym : nnvm.sym.Symbol
- The returned nnvm symbol
- params : dict
- A dict of name: tvm.nd.array pairs, used as pretrained weights
- """
- # parse network inputs to nnvm, aka parameters
- for init_tensor in graph.initializer:
- if not init_tensor.name.strip():
- raise ValueError("Tensor's name is required.")
- self._params[init_tensor.name] = self._parse_array(init_tensor)
- for i in graph.input:
- # from onnx v0.2, GraphProto.input has type ValueInfoProto,
- # and the name is 'i.name'
- i_name = self._parse_value_proto(i)
- if i_name in self._params:
- # i is a param instead of input
- self._num_param += 1
- self._params[i_name] = self._params.pop(i_name)
- self._nodes[i_name] = _sym.Variable(
- name=i_name, shape=self._params[i_name].shape)
- else:
- self._num_input += 1
- self._nodes[i_name] = _sym.Variable(name=i_name)
- # get list of unsupported ops
- convert_map = _get_convert_map(opset)
- unsupported_ops = set()
- for node in graph.node:
- op_name = node.op_type
- if op_name not in convert_map and \
- op_name != 'Constant' and \
- op_name not in _identity_list:
- unsupported_ops.add(op_name)
- if unsupported_ops:
- msg = 'The following operators are not supported for frontend ONNX: '
- msg += ', '.join(unsupported_ops)
- raise tvm.error.OpNotImplemented(msg)
- # construct nodes, nodes are stored as directed acyclic graph
- for node in graph.node:
- op_name = node.op_type
- attr = self._parse_attr(node.attribute)
- inputs = [self._nodes[self._renames.get(i, i)] for i in node.input]
- if op_name == "Constant":
- t_proto = self._parse_attr(node.attribute)["value"]
- self._num_param += 1
- self._params[node.output[0]] = self._parse_array(t_proto)
- self._nodes[node.output[0]] = _sym.Variable(name=node.output[0],
- shape=list(t_proto.dims))
- else:
- op = self._convert_operator(op_name, inputs, attr, opset)
- node_output = self._fix_outputs(op_name, node.output)
- assert len(node_output) == len(op.list_output_names()), (
- "Number of output mismatch {} vs {} in {}.".format(
- len(node_output), len(op.list_output_names()), op_name))
- for k, i in zip(list(node_output), range(len(node_output))):
- self._nodes[k] = op[i]
- # now return the outputs
- out = [self._nodes[self._parse_value_proto(i)] for i in graph.output]
- if len(out) > 1:
- out = _sym.Group(out)
- else:
- out = out[0]
- return out, self._params
-
- def _parse_value_proto(self, value_proto):
- """Parse ValueProto or raw str."""
- try:
- name = value_proto.name
- except AttributeError:
- name = value_proto
- return name
-
- def _parse_array(self, tensor_proto):
- """Grab data in TensorProto and convert to numpy array."""
- try:
- from onnx.numpy_helper import to_array
- except ImportError as e:
- raise ImportError(
- "Unable to import onnx which is required {}".format(e))
- np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims))
- return tvm.nd.array(np_array)
-
- def _parse_attr(self, attr_proto):
- """Convert a list of AttributeProto to a dict, with names as keys."""
- attrs = {}
- for a in attr_proto:
- for f in ['f', 'i', 's']:
- if a.HasField(f):
- attrs[a.name] = getattr(a, f)
- for f in ['floats', 'ints', 'strings']:
- if list(getattr(a, f)):
- assert a.name not in attrs, "Only one type of attr is allowed"
- attrs[a.name] = tuple(getattr(a, f))
- for f in ['t']:
- if a.HasField(f):
- attrs[a.name] = getattr(a, f)
- for f in ['tensors']:
- if list(getattr(a, f)):
- assert a.name not in attrs, "Only one type of attr is allowed"
- attrs[a.name] = tuple(getattr(a, f))
- for f in ['g']:
- if a.HasField(f):
- raise NotImplementedError(
- "Filed {} is not supported in nnvm.".format(f))
- for f in ['graphs']:
- if list(getattr(a, f)):
- raise NotImplementedError(
- "Filed {} is not supported in nnvm.".format(f))
- if a.name not in attrs:
- raise ValueError("Cannot parse attribute: \n{}\n.".format(a))
- return attrs
-
- def _convert_operator(self,
- op_name,
- inputs,
- attrs,
- opset,
- identity_list=None,
- convert_map=None):
- """Convert from onnx operator to nnvm operator.
- The converter must specify conversions explicitly for incompatible name, and
- apply handlers to operator attributes.
-
- Parameters
- ----------
- op_name : str
- Operator name, such as Convolution, FullyConnected
- inputs : list of nnvm.Symbol
- List of input symbols.
- attrs : dict
- Dict of operator attributes
- opset : int
- Opset version
- identity_list : list
- List of operators that don't require conversion
- convert_map : dict
- Dict of name : callable, where name is the op's name that
- require conversion to nnvm, callable are functions which
- take attrs and return (new_op_name, new_attrs)
-
- Returns
- -------
- sym : nnvm.Symbol
- Converted nnvm Symbol
- """
- identity_list = identity_list if identity_list else _identity_list
- convert_map = convert_map if convert_map else _get_convert_map(opset)
- if op_name in identity_list:
- sym = get_nnvm_op(op_name)(*inputs, **attrs)
- elif op_name in convert_map:
- sym = convert_map[op_name](inputs, attrs, self._params)
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend ONNX.')
- return sym
-
- def _fix_outputs(self, op_name, outputs):
- """A hack to handle dropout or similar operator that have more than one out
- in ONNX.
- """
- if op_name == 'Dropout':
- if len(outputs) == 1:
- return outputs
- # TODO(zhreshold): support dropout mask?
- outputs = outputs[:-1]
- return outputs
-
-
-def from_onnx(model):
- """Load onnx graph which is a python protobuf object into nnvm graph.
- The companion parameters will be handled automatically.
- The inputs from onnx graph is vague, only providing "1", "2"...
- For convenience, we rename the `real` input names to "input_0",
- "input_1"... And renaming parameters to "param_0", "param_1"...
-
- Parameters
- ----------
- model : protobuf object
- ONNX ModelProto after ONNX v1.1.0
-
- Returns
- -------
- sym : nnvm.Symbol
- Compatible nnvm symbol
-
- params : dict of str to tvm.ndarray
- Dict of converted parameters stored in tvm.ndarray format
- """
- g = GraphProto()
- graph = model.graph
- try:
- opset = model.opset_import[0].version if model.opset_import else 1
- except AttributeError:
- opset = 1
- sym, params = g.from_onnx(graph, opset)
- return sym, params
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Util functions shared by the ONNX and Caffe2 frontends."""
-from __future__ import absolute_import as _abs
-from nnvm import graph as _graph
-from nnvm.compiler import graph_util
-
-
-def dimension_picker(prefix, surfix=''):
- def _impl(attr):
- kernel = attr['kernel_shape']
- if len(kernel) == 2:
- return prefix + '2d' + surfix
- raise NotImplementedError("Only 2d kernel supported.")
-
- return _impl
-
-
-def dimension_constraint():
- def _dim_check(attrs):
- if len(attrs['kernel_shape']) == 2:
- return True
- return False
-
- return _dim_check, "Only 2d kernel supported."
-
-
-def infer_channels(inputs, params, transpose=False):
- """A hack for getting 'channels' or 'units' since caffe2 don't provide
- these attributes. We check the shape of weights provided to get the number.
- """
- g = _graph.create(inputs)
- shape_dict = {k: v.shape for k, v in params.items()}
- _, out_shapes = graph_util.infer_shape(g, **shape_dict)
- channels = out_shapes[0][0] if not transpose else out_shapes[0][1]
- return channels
-
-
-def revert_caffe2_pad(pads):
- """Caffe2 require two times the normal padding."""
- if len(pads) == 4:
- pads = pads[:2]
- elif len(pads) == 2:
- pass
- else:
- raise ValueError("Invalid caffe2 type padding: {}".format(pads))
- return pads
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines
-"""TF: Tensorflow frontend."""
-from __future__ import absolute_import as _abs
-from __future__ import print_function
-
-import warnings
-# Numpy support
-import numpy as np
-
-import tvm
-from .. import symbol as _sym
-from .. import graph as _graph
-from .. compiler import graph_util, build_module
-from .common import get_nnvm_op, AttrConverter as AttrConvert
-
-__all__ = ['from_tensorflow']
-
-class AttrCvt(object):
- """A Wrapper to handle some common jobs:
- """
- def __init__(self, op_name, transforms=None,
- excludes=None, disables=None, ignores=None,
- extras=None, custom_check=None):
- self._op_name = op_name
- self._transforms = transforms if transforms else {}
- self._excludes = excludes if excludes else []
- self._disables = disables if disables else []
- self._ignores = ignores if ignores else []
- self._extras = extras if extras else {}
- self._custom_check = custom_check
-
- def __call__(self, inputs, attrs, *args):
- self._ignores.append('_output_shapes')
- self._ignores.append('_input_shapes')
- self._ignores.append('T')
- self._ignores.append('use_cudnn_on_gpu')
- self._ignores.append('_node_name')
- self._ignores.append('is_training')
- self._ignores.append('_target_layout')
- self._ignores.append('_input_0d_mismatch')
- # Retain the names
- try:
- attrs['name'] = attrs['_node_name']
- except KeyError:
- pass
- return AttrConvert(self._op_name, self._transforms, self._excludes,
- self._disables, self._ignores, self._extras,
- self._custom_check)(inputs, attrs, *args)
-
-def _get_pad_pair(input1d, kernel1d, stride1d):
- if input1d % stride1d == 0:
- pad = max(kernel1d - stride1d, 0)
- else:
- pad = max(kernel1d - (input1d % stride1d), 0)
-
- pad_before = pad // 2
- pad_after = pad - pad_before
-
- return [pad_before, pad_after]
-
-def _math_name_picker(surfix):
- def _impl(attr):
- return 'broadcast_' + surfix
- return _impl
-
-def _dimension_picker(prefix, surfix=''):
- def _impl(attr):
- kernel = attr['kernel_shape']
- if len(kernel) == 2:
- return prefix + '2d' + surfix
- raise tvm.error.OpAttributeUnImplemented(
- 'Non-2D kernels are not supported for operator {}.'.format(prefix))
- return _impl
-
-def _dimension_constraint():
- def _dim_check(attrs):
- if len(attrs['kernel_shape']) == 2:
- return True
- return False
- return _dim_check, "Only 2d kernel supported."
-
-def _infer_channels(inputs, params, transpose=False):
- """A hack for getting 'channles' or 'units' since tensorflow don't provide
- these attributes. We check the shape of weights provided to get the number.
- """
- g = _graph.create(inputs)
- shape_dict = {k: v.shape for k, v in params.items()}
- _, out_shapes = graph_util.infer_shape(g, **shape_dict)
- channels = out_shapes[0][0] if not transpose else out_shapes[0][1]
- return channels
-
-def _rsqrt():
- def _impl(inputs, attr, *args):
- return AttrCvt(op_name="__pow_scalar__", extras={'scalar': -0.5})(inputs, attr)
- return _impl
-
-def _argx(func, func_name):
- """ A common wrapper for argmin and argmax operations """
- def _impl(inputs, attr, params):
- try:
- # In Tensorflow, `axis` argument is a Tensor, not attribute. We
- # support the case where it inputs from a scalar constant.
- axis_input_name = inputs[1].list_output_names()[0]
- axis_input_vlaue = params[axis_input_name].asnumpy()[0]
- except (IndexError, KeyError):
- raise TypeError( \
- "Unsupported argument for `{}` : `axis` should be a constant".format(func_name))
- return func(inputs[0], axis=axis_input_vlaue, keepdims=False)
- return _impl
-
-def _elemwise(name):
- def _impl(inputs, attr, *args):
- assert len(inputs) == 2, "{} take 2 inputs, {} given".format(name, len(inputs))
- op_name = _math_name_picker(name)(attr)
- return get_nnvm_op(op_name)(*inputs)
- return _impl
-
-def _pooling(name):
- def _impl(inputs, attr, params):
-
- attr['data_format'] = attr['data_format'].decode("utf-8")
- flip_layout = False
-
- input_shape = attr['_input_shapes'][inputs[0]]
-
- if attr['data_format'] == 'NHWC':
- attr['kernel_shape'] = (attr['ksize'][1], attr['ksize'][2])
- attr['strides'] = (attr['strides'][1], attr['strides'][2])
- elif attr['data_format'] == 'NCHW':
- attr['kernel_shape'] = (attr['ksize'][2], attr['ksize'][3])
- attr['strides'] = (attr['strides'][2], attr['strides'][3])
- else:
- msg = 'Value {} in attribute "data_format" of operator Pooling is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(attr['data_format']))
-
- if attr['_target_layout'] == "NCHW" and attr['data_format'] == "NHWC":
- tmp_shape = attr['_input_shapes'][inputs[0]]
- input_shape = [tmp_shape[ii] for ii in (0, 3, 1, 2)]
- inputs[0] = _sym.transpose(inputs[0], axes=(0, 3, 1, 2))
- attr['data_format'] = "NCHW"
- flip_layout = True
-
- # Fix padding
- attr['padding'] = attr['padding'].decode("utf-8")
-
- if attr['padding'] == 'VALID':
- attr['padding'] = [0, 0]
- elif attr['padding'] == 'SAME':
- stride_h, stride_w = attr['strides']
- kernel_h, kernel_w = attr['kernel_shape']
- if attr['data_format'] == 'NHWC':
- in_h = input_shape[1]
- in_w = input_shape[2]
- else:
- in_h = input_shape[2]
- in_w = input_shape[3]
-
- pad_v = _get_pad_pair(in_h, kernel_h, stride_h)
- pad_h = _get_pad_pair(in_w, kernel_w, stride_w)
-
- attr['padding'] = [pad_v[0], pad_h[0], pad_v[1], pad_h[1]]
- else:
- msg = 'Value {} in attribute "padding" of operator Pooling is not valid.'
- raise tvm.error.OpAttributeUnImplemented(msg.format(attr['padding']))
-
- if name == "avg_pool":
- attr['count_include_pad'] = False
-
- out = AttrCvt(
- op_name=_dimension_picker(name),
- transforms={
- 'kernel_shape':'pool_size',
- 'data_format':'layout'},
- ignores=['ksize'],
- extras={'ceil_mode': False},
- custom_check=_dimension_constraint())(inputs, attr)
-
- if flip_layout:
- out = _sym.transpose(out, axes=(0, 2, 3, 1))
-
- return out
- return _impl
-
-def _conv(opname):
- def _impl(inputs, attr, params):
- attr['data_format'] = attr['data_format'].decode("utf-8")
- flip_layout = False
-
- # NCHW Layout require weights transpose
- if attr['data_format'] == 'NCHW':
- tmp_shape = attr['_input_shapes'][inputs[1]]
- if opname == 'conv':
- tmp_shape = [tmp_shape[ii] for ii in (3, 2, 0, 1)]
- inputs[1] = _sym.transpose(inputs[1], axes=(3, 2, 0, 1))
- else:
- tmp_shape = [tmp_shape[ii] for ii in (2, 3, 0, 1)]
- inputs[1] = _sym.transpose(inputs[1], axes=(2, 3, 0, 1))
- attr['_input_shapes'][inputs[1]] = tmp_shape
-
- input_shape = attr['_input_shapes'][inputs[0]]
- weights_shape = attr['_input_shapes'][inputs[1]]
-
- if attr['_target_layout'] == "NCHW" and attr['data_format'] == "NHWC":
- input_shape = [input_shape[ii] for ii in (0, 3, 1, 2)]
- inputs[0] = _sym.transpose(inputs[0], axes=(0, 3, 1, 2))
- if opname == 'conv':
- weights_shape = [weights_shape[ii] for ii in (3, 2, 0, 1)]
- inputs[1] = _sym.transpose(inputs[1], axes=(3, 2, 0, 1))
- else:
- weights_shape = [weights_shape[ii] for ii in (2, 3, 0, 1)]
- inputs[1] = _sym.transpose(inputs[1], axes=(2, 3, 0, 1))
-
- attr['data_format'] = "NCHW"
- attr['strides'] = [attr['strides'][ii] for ii in (0, 3, 1, 2)]
- flip_layout = True
-
- if attr['data_format'] == 'NHWC':
- kernel_h, kernel_w, _, depth_mult = weights_shape
- attr['kernel_shape'] = (weights_shape[0], weights_shape[1])
- if opname == 'conv':
- attr['channels'] = weights_shape[3]
- else:
- attr['channels'] = input_shape[3] * depth_mult
-
- if 'dilations' in attr:
- attr['dilations'] = (attr['dilations'][1], attr['dilations'][2])
- attr['strides'] = (attr['strides'][1], attr['strides'][2])
- elif attr['data_format'] == 'NCHW':
- _, depth_mult, kernel_h, kernel_w = weights_shape
- attr['kernel_shape'] = (weights_shape[2], weights_shape[3])
- if opname == 'conv':
- attr['channels'] = weights_shape[0]
- else:
- attr['channels'] = input_shape[1] * depth_mult
- if attr['channels'] < 0:
- attr['channels'] *= -1
-
- if 'dilations' in attr:
- attr['dilations'] = (attr['dilations'][2], attr['dilations'][3])
- attr['strides'] = (attr['strides'][2], attr['strides'][3])
- else:
- msg = 'Value {} in attribute "data_format" of operator Conv is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(attr['data_format']))
-
-
- if opname == 'depthwise':
- if depth_mult > 1:
- raise tvm.error.OpNotImplemented('depth_mult > 1 of operator DepthwiseConv2dNative'
- ' is not supported.')
- attr['groups'] = attr['channels']
-
- # Fix padding
- attr['padding'] = attr['padding'].decode("utf-8")
-
- if attr['padding'] == 'VALID':
- attr['padding'] = [0, 0]
- elif attr['padding'] == 'SAME':
- stride_h, stride_w = attr['strides']
- kernel_h, kernel_w = attr['kernel_shape']
- if attr['data_format'] == 'NHWC':
- in_h = input_shape[1]
- in_w = input_shape[2]
- else:
- in_h = input_shape[2]
- in_w = input_shape[3]
-
- dilation_h = attr['dilations'][0]
- dilation_w = attr['dilations'][1]
- dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
- dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
- pad_v = _get_pad_pair(in_h, dilated_kernel_h, stride_h)
- pad_h = _get_pad_pair(in_w, dilated_kernel_w, stride_w)
-
- if attr['data_format'] == 'NHWC':
- inputs[0] = _sym.pad(data=inputs[0],
- pad_width=((0, 0),
- (pad_v[0], pad_v[1]),
- (pad_h[0], pad_h[1]),
- (0, 0)))
- else:
- inputs[0] = _sym.pad(data=inputs[0],
- pad_width=((0, 0),
- (0, 0),
- (pad_v[0], pad_v[1]),
- (pad_h[0], pad_h[1])))
-
- attr['padding'] = [0, 0]
-
- else:
- msg = 'Value {} in attribute "padding" of operator Conv is not valid.'
- raise tvm.error.OpAttributeInvalid(msg.format(attr['padding']))
-
- if 'kernel_layout' not in attr:
- if opname == 'conv':
- attr['kernel_layout'] = 'HWIO' if attr['data_format'] == 'NHWC' else 'OIHW'
- else:
- attr['kernel_layout'] = 'HWOI' if attr['data_format'] == 'NHWC' else 'OIHW'
-
- out = AttrCvt(
- op_name=_dimension_picker('conv'),
- transforms={
- 'kernel_shape': 'kernel_size',
- 'data_format': 'layout',
- 'dilations': ('dilation', (0, 0)),
- 'group': ('groups', 1)},
- extras={'use_bias': len(inputs) == 3},
- custom_check=_dimension_constraint())(inputs, attr)
-
- if flip_layout:
- out = _sym.transpose(out, axes=(0, 2, 3, 1))
-
- return out
- return _impl
-
-def _decode_image():
- def _impl(inputs, attr, params):
- # Image decode wrapper: Expecting user to feed decoded input to next layer drop this layer.
- warnings.warn("DecodeJpeg: It's a pass through, "
- "please handle preprocessing before input")
- return inputs[0]
- return _impl
-
-def _cast():
- def _impl(inputs, attr, params):
- # Convert from tensorflow Dtype to str
- attr['DstT'] = attr['DstT'].name
- return AttrCvt(op_name='cast', transforms={'DstT': 'dtype'},
- ignores=['SrcT', 'Truncate'])(inputs, attr)
- return _impl
-
-def _expand_dims():
- def _impl(inputs, attr, params):
- dim_input = inputs.pop(1)
- axis = params[dim_input.list_output_names()[0]]
- params.pop(dim_input.list_output_names()[0])
- return _expand_dims_0d_aware(inputs[0], attr, axis=axis.asnumpy()[0])
- return _impl
-
-def _resize_bilinear():
- def _impl(inputs, attr, params):
- attr['size'] = attr['_output_shapes'][0][1:3]
- inputs.pop(1)
- # NHWC
- attr['layout'] = 'NHWC'
-
- return AttrCvt(op_name="resize",
- ignores=['Tdim'],
- extras={'method': "BILINEAR"})(inputs, attr)
- return _impl
-
-def _check_numerics():
- def _impl(inputs, attr, params):
- # Making a copy node assuming no need to verify
- return AttrCvt(op_name="copy", ignores=['message'])(inputs, attr)
- return _impl
-
-
-def _matmul():
- def _impl(inputs, attr, params):
- channels = _infer_channels(inputs[1], params, not attr['transpose_b'])
- if attr['transpose_a']:
- inputs[0] = _sym.transpose(inputs[0], axes=(1, 0))
- if not attr['transpose_b']:
- inputs[1] = _sym.transpose(inputs[1], axes=(1, 0))
- return AttrCvt(op_name="dense",
- extras={'use_bias': False, 'units': channels},
- ignores=['transpose_a', 'transpose_b', 'T'])(inputs, attr)
-
- return _impl
-
-def _undef():
- def _impl(inputs, attr, params):
- return _sym.__undef__()
- return _impl
-
-def _identity():
- def _impl(inputs, attr, params):
- return inputs[0]
- return _impl
-
-def _concatV2():
- def _impl(inputs, attr, params):
- pop_node = inputs.pop(len(inputs)-1)
- axis = params[pop_node.list_output_names()[0]]
- params.pop(pop_node.list_output_names()[0])
- return AttrCvt(
- op_name="concatenate", ignores=['T', 'N', 'Tidx'],
- extras={'axis': axis.asnumpy()[0]})(inputs, attr)
- return _impl
-
-def _concat():
- def _impl(inputs, attr, params):
- pop_node = inputs.pop(0)
- axis = params[pop_node.list_output_names()[0]]
- params.pop(pop_node.list_output_names()[0])
- return AttrCvt(
- op_name="concatenate", ignores=['N'],
- extras={'axis': axis.asnumpy()[0]})(inputs, attr)
- return _impl
-
-def _pack():
- def _impl(inputs, attr, params):
- axis = int(attr["axis"])
- inputs_reshaped = [_expand_dims_0d_aware(i, attr, axis=axis, num_newaxis=1) for i in inputs]
- return _sym.concatenate(*inputs_reshaped, axis=axis, name=attr["_node_name"])
-
- return _impl
-
-def _slice():
- def _impl(inputs, attr, params):
- begin = params.pop(inputs[1].list_output_names()[0]).asnumpy().tolist()
- size = params.pop(inputs[2].list_output_names()[0]).asnumpy().tolist()
- data_shape = attr['_input_shapes'][inputs[0]]
- data_dim = len(data_shape)
- end = size
- for i in range(data_dim):
- if size[i] == -1:
- end[i] = data_shape[i] - begin[i]
- else:
- end[i] += begin[i]
- return _sym.strided_slice(inputs[0], begin=begin, end=size)
- return _impl
-
-def _reshape():
- def _impl(inputs, attr, params):
- try:
- pop_node = inputs[1]
- shape_arg = params.pop(pop_node.list_output_names()[0])
- inputs.pop(1)
-
- return AttrCvt(
- op_name="reshape",
- extras={'shape':tuple(shape_arg.asnumpy())},
- ignores=['Tshape'])(inputs, attr)
- except KeyError:
- # Shape operator is already pruned, hence
- # try to infer shape by precompute prune if possible.
- if all(in_node in params for in_node in inputs[1].list_input_names()):
- graph = _graph.create(_sym.Group(inputs[1]))
- params_pre = {k: params[k] for k in inputs[1].list_input_names()}
- params_new = build_module._run_graph(graph, params_pre)
- inputs.pop(1)
- return AttrCvt(
- op_name="reshape",
- extras={'shape':tuple(params_new[0].asnumpy().flatten())},
- ignores=['Tshape'])(inputs, attr)
- raise tvm.error.OpAttributeUnimplemented(
- 'Attribute "dynamic shape" of operator Reshape is not supported.')
- return _impl
-
-def _bias_add():
- def _impl(inputs, attr, params):
- if attr['data_format'].decode("utf-8") == 'NCHW':
- bias = _sym.reshape(inputs[1], newshape=(1, -1, 1, 1))
- else:
- bias = inputs[1]
- return _sym.broadcast_add(inputs[0], bias)
- return _impl
-
-def _squeeze():
- def _impl(inputs, attr, params):
- return AttrCvt(
- op_name="squeeze",
- transforms={'squeeze_dims':'axis'},
- ignores=['T'])(inputs, attr)
- return _impl
-
-def _fused_batch_norm():
- def _impl(inputs, attr, params):
- # Tensorflow: (data, gamma, beta, moving_mean, moving_variance)
- # NNVM: (data, gamma, beta, moving_mean, moving_varience)
- axis = 3
- need_cast = False
-
- if 'data_format' in attr:
- attr['data_format'] = attr['data_format'].decode("utf-8")
- if attr['data_format'] == 'NCHW':
- axis = 1
- if 'U' in attr:
- need_cast = True
- inputs[0] = _sym.cast(inputs[0], dtype=attr['U'].name)
-
- out = AttrCvt(op_name='batch_norm',
- transforms={'scale_after_normalization':'scale',
- 'variance_epsilon':'epsilon'},
- extras={'axis': axis},
- ignores=['data_format', 'U'],
- disables=['momentum'])(inputs, attr)
-
- if need_cast:
- out = _sym.cast(out, dtype=attr['T'].name)
- return out
- return _impl
-
-def _batch_norm():
- def _impl(inputs, attr, params):
- # Rearrange inputs from
- # (data, moving_mean, moving_variance, beta, gamma)
- # to
- # (data, gamma, beta, moving_mean, moving_var)
- new_inputs = [inputs[0], inputs[4], inputs[3], inputs[1], inputs[2]]
-
- axis = 3
- if 'data_format' in attr:
- attr['data_format'] = attr['data_format'].decode("utf-8")
- if attr['data_format'] == 'NCHW':
- axis = 1
-
- return AttrCvt(
- op_name='batch_norm',
- transforms={'scale_after_normalization':'scale', 'variance_epsilon':'epsilon'},
- extras={'axis': axis},
- ignores=['data_format'],
- disables=['momentum'])(new_inputs, attr)
- return _impl
-
-def _relu6():
- def _impl(inputs, attr, params):
- return _sym.clip(inputs[0], a_min=0, a_max=6, name=attr['_node_name'])
- return _impl
-
-def _shape():
- def _impl(inputs, attr, params):
- return np.array(attr['_input_shapes'][inputs[0]], dtype='int32')
- return _impl
-
-def _fill():
- def _impl(inputs, attr, params):
- fill_arg = params.pop(inputs.pop(1).list_output_names()[0])
- new_inputs = []
- return AttrCvt(
- op_name='full',
- extras={'shape':inputs[0],
- 'fill_value':fill_arg.asnumpy()[0], 'dtype':attr['T'].name},
- ignores=['index_type', 'T'])(new_inputs, attr)
- return _impl
-
-def _lrn():
- def _impl(inputs, attr, params):
- attr_new = {}
- depth_radius = attr.get('depth_radius', 5)
- size = (depth_radius * 2) + 1
- attr_new['axis'] = 3 # Fix axis, NHWC format
- attr_new['size'] = size
- attr_new['bias'] = attr.get('bias', 1)
- attr_new['alpha'] = attr.get('alpha', 1) * size
- attr_new['beta'] = attr.get('beta', 0.5)
- return AttrCvt(op_name='lrn')(inputs, attr_new)
- return _impl
-
-def _sum():
- def _impl(inputs, attr, params):
- axis = params.pop(inputs[1].list_output_names()[0]).asnumpy()
- # convert to tuple for preventing invalid parameter format error
- axis = tuple(axis)
- return AttrCvt(
- op_name='sum',
- extras={'axis': axis},
- transforms={'keep_dims':'keepdims'},
- ignores=['name', 'Tidx'])(inputs[0], attr)
- return _impl
-
-def _square():
- def _impl(inputs, attr, params):
- return _sym.elemwise_mul(inputs[0], inputs[0])
- return _impl
-
-def _gather_v2():
- "Tensorflow now support only gatherv2"
- def _impl(inputs, attr, params):
- axis = params[inputs.pop(2).list_output_names()[0]].asnumpy()[0]
- new_input = []
- new_input.append(inputs.pop(0))
- new_input.append(inputs.pop(0))
- return AttrCvt(
- op_name="take",
- extras={'axis':axis},
- ignores=['Tindices', 'Tparams', 'validate_indices', \
- 'Taxis', '_class'])(new_input, attr)
- return _impl
-
-def _infer_out_shapes(inputs, params):
- """A method to get the output shape of an intermediate node in the NNVM graph."""
- g = _graph.create(inputs)
- shape_dict = {k: v.shape for k, v in params.items()}
- _, out_shapes = graph_util.infer_shape(g, **shape_dict)
- return out_shapes
-
-def _stridedSlice():
- def _impl(inputs, attr, params):
- """Strided Slice.
- Operator description: https://www.tensorflow.org/api_docs/python/tf/strided_slice
- Tensorflow mask validation: https://github.com/tensorflow/tensorflow/blob/master/
- tensorflow/core/util/strided_slice_op.cc#L147-L368
- """
- begin = params.pop(inputs[1].list_output_names()[0]).asnumpy().tolist()
- end = params.pop(inputs[2].list_output_names()[0]).asnumpy().tolist()
- stride = params.pop(inputs[3].list_output_names()[0]).asnumpy().tolist()
- begin_mask = int(attr.get('begin_mask', 0))
- end_mask = int(attr.get('end_mask', 0))
- ellipsis_mask = int(attr.get('ellipsis_mask', 0))
- new_axis_mask = int(attr.get('new_axis_mask', 0))
- shrink_axis_mask = int(attr.get('shrink_axis_mask', 0))
- data_shape = attr['_input_shapes'][inputs[0]]
- data_dim = len(data_shape)
- stride_dim = len(stride)
-
- def _transform_mask(stride_dim, ellipsis_mask):
- """Handle mask inputs to create new begin, end, stride and output shape"""
- m_begin = [0] * data_dim
- m_end = [0] * data_dim
- m_stride = [0] * data_dim
- fshape_indices = []
- #Count new axis after ellipsis_mask, consider while applying ellipsis_mask.
- ellipsis_seen = False
- new_axes_after_ellipsis = 0
- for i in range(stride_dim):
- mask = 1 << i
- if ellipsis_seen and (mask & new_axis_mask) != 0:
- new_axes_after_ellipsis += 1
- if (mask & ellipsis_mask) != 0:
- ellipsis_seen = True
- if not ellipsis_seen:
- #Used later for extending the stride attributes in the below loop.
- ellipsis_mask |= (1 << stride_dim)
- stride_dim += 1
- final_index = 0
- for index in range(stride_dim):
- mask = 1 << index
- if mask & ellipsis_mask:
- #Identify the end index for applying ellipsis_mask
- to_index = min(((data_dim - (stride_dim-index)) + 1 \
- + new_axes_after_ellipsis), data_dim)
- for i in range(final_index, to_index):
- m_begin[final_index] = 0
- m_end[final_index] = data_shape[final_index]
- m_stride[final_index] = 1
- fshape_indices.append(final_index)
- final_index += 1
- elif mask &new_axis_mask:
- fshape_indices.append(-1)
- elif not mask & new_axis_mask:
- if final_index == len(m_begin):
- break
- if mask & begin_mask:
- m_begin[final_index] = data_shape[final_index] \
- if stride[index] < 0 else 0
- elif begin[index]:
- m_begin[final_index] = begin[index]
- if mask & end_mask:
- m_end[final_index] = 0 if stride[index] < 0 \
- else data_shape[final_index]
- elif end[index]:
- m_end[final_index] = end[index]
- m_stride[final_index] = stride[index]
- if mask & shrink_axis_mask:
- #Tensorflow make axis with shrink_axis_mask as dimension 1
- m_begin[final_index] = data_shape[final_index] + begin[index] \
- if begin[index] < 0 else begin[index]
- m_end[final_index] = begin[index] + 1
- m_stride[final_index] = 1
- fshape_indices.append(-2)
- else:
- fshape_indices.append(final_index)
-
- final_index += 1
- return m_begin, m_end, m_stride, fshape_indices
-
- fshape_indices = None
- if begin_mask or end_mask or ellipsis_mask or new_axis_mask or shrink_axis_mask:
- begin, end, stride, fshape_indices = _transform_mask(stride_dim, ellipsis_mask)
- out = _sym.strided_slice(inputs[0], begin=begin, end=end, stride=stride)
- out_shape = _infer_out_shapes(out, params)[0]
- if not fshape_indices:
- fshape_indices = range(len(out_shape))
-
- #Create final output shape.
- final_output = []
- for gather_index in fshape_indices:
- if gather_index == -1:
- final_output.append(1)
- elif gather_index == -2:
- pass
- else:
- final_output.append(out_shape[gather_index])
- # Prevent 0-dim tensors which are not accepted by nnvm
- if not final_output:
- final_output.append(1)
- return _sym.reshape(out, shape=tuple(final_output))
- return _impl
-
-def _LSTMBlockCell():
- def _impl(inputs, in_state_c, in_state_h, attr, params):
- """LSTM Block cell.
- Calculations are described in: https://github.com/tensorflow/tensorflow/blob/
- r1.8/tensorflow/contrib/rnn/python/ops/lstm_ops.py#L41-L114
-
- Parameters
- ----------
- inputs : nnvm.Symbol
- Input data
- in_state_c: list of nnvm.Symbol
- Cell state input values for all the layers
- in_state_h: list of nnvm.Symbol
- Hidden state input values for all the layers
- attrs : dict
- Dict of operator attributes
- params : dict
- List of pretrained weights and bias
-
- Returns
- -------
- sym : nnvm.Symbol
- Converted nnvm Symbol
- output: nnvm.Symbol
- Output state value.
- """
- in_data = inputs[0]
- in_weight = inputs[3]
- in_bias = inputs[7]
- forget_bias = attr.pop('forget_bias')
- input_shape = attr['_input_shapes'][inputs[0]]
- weight_shape = attr['_input_shapes'][inputs[3]]
- batch_size, input_size = input_shape[0], input_shape[1]
- num_hidden_layers = weight_shape[1]
- num_hidden = num_hidden_layers // 4
-
- in_data = _sym.reshape(in_data,
- shape=(batch_size, input_size))
- ixh = _sym.concatenate(*[in_data, in_state_h], axis=1)
- in_weight = _sym.transpose(in_weight)
- gates = _sym.dense(ixh, in_weight, in_bias, use_bias=True,
- units=num_hidden_layers)
- gate_list = _sym.split(gates, indices_or_sections=4, axis=1)
- in_gate = _sym.sigmoid(gate_list[0])
- in_transform = _sym.tanh(gate_list[1])
- forget_gate = _sym.sigmoid(gate_list[2])
- forget_gate = forget_gate + forget_bias
- out_gate = _sym.sigmoid(gate_list[3])
- next_c = _sym.broadcast_add(_sym.broadcast_mul(forget_gate, in_state_c),
- _sym.broadcast_mul(in_gate, in_transform))
- next_h = out_gate * _sym.tanh(next_c)
- out_state = _sym.concatenate(*[next_c, next_h])
- out_state = _sym.reshape(out_state,
- shape=(2, batch_size, num_hidden))
- return next_h, out_state
- return _impl
-
-
-def _pad(name):
- def _impl(inputs, attr, params):
- padlist_key = inputs[1].list_output_names()[0]
- if padlist_key in params:
- padlist = params.pop(padlist_key).asnumpy()
- else:
- raise tvm.error.OpAttributeRequired(
- 'Required attribute "{}" not found in operator Pad.'.format(padlist_key))
- paddings = tuple([tuple(l) for l in padlist])
- attr['pad_width'] = paddings
- attr['pad_value'] = 0
- new_inputs = [inputs[0]]
- if name == 'PadV2':
- constant_values = params.pop(inputs[2].list_output_names()[0]).asnumpy()
- attr['pad_value'] = constant_values[0]
- return AttrCvt(
- op_name='pad',
- ignores=['Tpaddings'],)(new_inputs, attr)
- return _impl
-
-
-def _transpose():
- def _impl(inputs, attr, params):
- # If perm is not specified, axes is left empty,
- # otherwise its value is get from params
- param_name = inputs[1].list_output_names()[0]
- axes = params.get(param_name, tvm.nd.array([])).asnumpy()
- return _sym.transpose(inputs[0], axes=tuple(axes))
- return _impl
-
-def _rank():
- def _impl(inputs, attr, params):
- input_shape = attr['_input_shapes'][inputs[0]]
-
- name = attr["_node_name"]
- params[name] = tvm.nd.array([len(input_shape)])
- return _sym.Variable(name=name, shape=params[name].shape)
- return _impl
-
-def _range():
- def _impl(inputs, attr, params):
- start = params.pop(inputs[0].list_output_names()[0]).asnumpy()[0]
- limit = params.pop(inputs[1].list_output_names()[0]).asnumpy()[0]
- delta = params.pop(inputs[2].list_output_names()[0]).asnumpy()[0]
-
- name = attr["_node_name"]
- params[name] = tvm.nd.array([start, limit, delta])
- return _sym.Variable(name=name, shape=params[name].shape)
- return _impl
-
-def _elu():
- def _impl(inputs, attr, params):
- alpha = 1.0
- return -alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(inputs[0])
- return _impl
-
-def _selu():
- def _impl(inputs, attr, params):
- alpha = 1.6732632423543772848170429916717
- gamma = 1.0507009873554804934193349852946
- return gamma * (-alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(inputs[0]))
- return _impl
-
-def _mean():
- def _impl(inputs, attr, params):
- axis = params.pop(inputs[1].list_output_names()[0])
- return AttrCvt(op_name="mean", ignores=['Tdim', 'Tidx'],
- transforms={'keep_dims': 'keepdims'},
- extras={'axis': tuple(axis.asnumpy())})(inputs[0], attr)
- return _impl
-
-def _broadcast(name):
- def _impl(inputs, attr, params):
- op_name = _math_name_picker(name)(attr)
- return AttrCvt(
- op_name=op_name,
- ignores=['name', 'Tidx']
- )(inputs, attr)
- return _impl
-
-def _split(has_size_vector):
- # TF documentation https://www.tensorflow.org/api_docs/python/tf/split
- def _impl(inputs, attr, params):
- try:
- # order and number of inputs are different:
- # if has_size_vector:
- # https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/split-v
- # else:
- # https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/split
-
- # in addition, `axis` and `num_or_size_splits` can be tensors in TensorFlow,
- # we can only support constants
- if has_size_vector:
- input_node_index = 0
- input_axis_index = 2
- size_splits_input_name = inputs[1].list_output_names()[0]
- size_splits = params[size_splits_input_name].asnumpy()
- section_beginnings = np.cumsum(size_splits)[:-1]
- indices_or_sections = tuple(section_beginnings)
- else:
- input_node_index = 1
- input_axis_index = 0
- indices_or_sections = attr['num_split']
- input_node = inputs[input_node_index]
- axis_input_name = inputs[input_axis_index].list_output_names()[0]
- axis_input_value = params[axis_input_name].asnumpy()[0]
- except (IndexError, KeyError):
- raise TypeError( \
- "Unsupported argument for split: `axis` and `num_or_size_splits` " \
- "should be constants")
- return _sym.split(input_node,
- indices_or_sections=indices_or_sections,
- axis=axis_input_value)
- return _impl
-
-def _unpack():
- def _impl(inputs, attr, params):
- input_node = inputs[0]
- axis = attr['axis']
- input_shape = attr['_input_shapes'][input_node]
- axis_length = input_shape[axis]
- if axis_length < 0:
- raise TypeError("Unstack with unknown axis length")
- splitted = _sym.split(input_node,
- indices_or_sections=axis_length,
- axis=axis,
- name=attr.get('_node_name', 'unstack'))
-
- return _sym.Group([_sym.squeeze(split_item, axis=axis) for split_item in splitted])
- return _impl
-
-def _expand_dims_0d_aware(data, attr, axis, num_newaxis=1):
- if data in attr['_input_0d_mismatch']:
- return data if num_newaxis == 1 else \
- _sym.expand_dims(data, axis=axis, num_newaxis=num_newaxis-1)
-
- return _sym.expand_dims(data, axis=axis, num_newaxis=num_newaxis)
-
-def _logical(name):
- def _impl(inputs, attr, params):
- return AttrCvt(op_name=name)(inputs, attr)
- return _impl
-
-# compatible operators that do NOT require any conversion.
-_identity_list = []
-
-# _convert_map defines maps of name to converter functor(callable)
-# for 1 to 1 mapping, use Renamer if nothing but name is different
-# use AttrCvt if attributes need to be converted
-# for 1 to N mapping(composed), use custom callable functions
-# for N to 1 mapping, currently not supported(?)
-_convert_map = {
- 'ArgMax' : _argx(_sym.argmax, 'argmax'),
- 'ArgMin' : _argx(_sym.argmin, 'argmin'),
- 'AvgPool' : _pooling('avg_pool'),
- 'BatchNormWithGlobalNormalization' : _batch_norm(),
- 'BiasAdd' : _bias_add(),
- 'Cast' : _cast(),
- 'Ceil' : AttrCvt('ceil'),
- 'CheckNumerics' : _check_numerics(),
- 'Concat' : _concat(),
- 'ConcatV2' : _concatV2(),
- 'Conv2D' : _conv('conv'),
- 'DecodeJpeg' : _decode_image(),
- 'Elu' : _elu(),
- 'ExpandDims' : _expand_dims(),
- 'Floor' : AttrCvt('floor'),
- 'Identity' : _identity(),
- 'MatMul' : _matmul(),
- 'MaxPool' : _pooling('max_pool'),
- 'Add' : _elemwise('add'),
- 'Sub' : _elemwise('sub'),
- 'Mul' : _elemwise('mul'),
- 'RealDiv' : _elemwise('div'),
- 'Maximum' : _elemwise('max'),
- 'Minimum' : _elemwise('min'),
- 'Sum' : _sum(),
- 'Square' : _square(),
- 'Pack' : _pack(),
- 'Slice' : _slice(),
- 'LeakyRelu' : AttrCvt('leaky_relu'),
- 'Relu' : AttrCvt('relu'),
- 'Reshape' : _reshape(),
- 'ResizeBilinear' : _resize_bilinear(),
- 'Selu' : _selu(),
- 'Softmax' : AttrCvt('softmax', {'axis': ('axis', 1)}),
- 'Rsqrt' : _rsqrt(),
- 'Squeeze' : _squeeze(),
- 'FusedBatchNorm' : _fused_batch_norm(),
- 'FusedBatchNormV2' : _fused_batch_norm(),
- 'Relu6' : _relu6(),
- 'DepthwiseConv2dNative' : _conv('depthwise'),
- 'Shape' : _shape(),
- 'Sigmoid' : AttrCvt('sigmoid'),
- 'Fill' : _fill(),
- 'GatherV2' : _gather_v2(),
- 'StridedSlice' : _stridedSlice(),
- 'LRN' : _lrn(),
- 'Pad' : _pad('Pad'),
- 'PadV2' : _pad('PadV2'),
- 'Range' : _range(),
- 'Rank' : _rank(),
- 'Transpose' : _transpose(),
- 'Tanh' : AttrCvt('tanh'),
- 'Mean' : _mean(),
- 'LogicalAnd' : _logical('logical_and'),
- 'LogicalOr' : _logical('logical_or'),
- 'LogicalNot' : _logical('logical_not'),
- 'Less' : _broadcast('less'),
- 'Greater' : _broadcast('greater'),
- 'LessEqual' : _broadcast('less_equal'),
- 'GreaterEqual' : _broadcast('greater_equal'),
- 'Equal' : _broadcast('equal'),
- 'NotEqual' : _broadcast('not_equal'),
- 'Split' : _split(False),
- 'SplitV' : _split(True),
- 'Unpack' : _unpack(),
-}
-
-# _convert_map_rnn defines maps of rnn operator name to
-# converter functor(callable) for 1 to 1 mapping.
-_convert_map_rnn = {
- 'LSTMBlockCell' : _LSTMBlockCell(),
-}
-
-class RecurrentNetworks(object):
- """Recurrent network layer handlers.
-
- Handle Layer operations.
- ToDo: Operators like RNN/GRU layer concepts also can be handled here
-
- Parameters
- ----------
- nodes : list
- list of graph nodes used for tensorflow parsing.
-
- out_rnn : list
- List of RecurrentNetwork outputs. This output will be appended to the
- 'head' nodes of the graph.
-
- graph : tensorflow graph definition object
- The loaded tensorflow GraphDef
-
- convert_map : dict
- Dict of name : callable, where name is the op's name that
- require conversion to nnvm, callable are functions which
- take attrs and return (new_op_name, new_attrs)
- """
- def __init__(self, nodes, out_rnn, graph, convert_map):
- self._graph = graph
- self._convert_map = convert_map
- self._nodes = nodes
- self._out_rnn = out_rnn
- self._cur_lstm_layer = 0
- self._layer_name_list = []
- self._recurrent_ops_layer_map = {
- 'LSTMBlockCell' : self._LSTMBlockCellLayer(),
- }
-
- def _LSTMBlockCellLayer(self):
- """LSTMBlockCell layer handler.
-
- Parameters
- ----------
- op_name : str
- Operator name, eg:LSTMBlockCell
-
- layer_name : str list
- Layer name is used for creating the state input placeholder.
-
- inputs : nnvm.Symbol
- Input data
-
- attrs : dict
- Dict of operator attributes
-
- params : dict
- List of pretrained weights and bias
-
- num_layers : int
- Total number of LSTM layer presented in the graph
-
- Returns
- -------
- sym : nnvm.sym.Symbol
- The returned nnvm symbol
- """
- def _impl(op_name, layer_name, inputs, attrs, params, num_layers):
- in_state_c_name = layer_name+'_c'
- in_state_h_name = layer_name+'_h'
-
- def _init_state(num_layers, batch_size, num_hidden):
- """Create the initial states for the first layer in the graph."""
- in_state_c = _sym.Variable(in_state_c_name,
- shape=(num_layers, batch_size, num_hidden))
- in_state_h = _sym.Variable(in_state_h_name,
- shape=(num_layers, batch_size, num_hidden))
- return in_state_c, in_state_h
-
- def _get_cur_input_state(in_state_c, in_state_h, num_layers,
- layer, batch_size, num_hidden):
- """Select the appropriate states for the current layer"""
- in_state_c_tup = _sym.split(in_state_c,
- indices_or_sections=num_layers, axis=0)
- in_state_h_tup = _sym.split(in_state_h,
- indices_or_sections=num_layers, axis=0)
- cur_in_state_c = _sym.reshape(in_state_c_tup[layer],
- shape=(batch_size, num_hidden))
- cur_in_state_h = _sym.reshape(in_state_h_tup[layer],
- shape=(batch_size, num_hidden))
- return cur_in_state_c, cur_in_state_h
-
- def _LSTMBlockCellWrapper(inputs, attr, params,
- num_layers, layer):
- """LSTM cell warapper to prepare the inputs"""
- input_shape = attr['_input_shapes'][inputs[0]]
- weight_shape = attr['_input_shapes'][inputs[3]]
- batch_size = input_shape[0]
- num_hidden = weight_shape[1] // 4
-
- if layer == 0:
- #Create initial states placeholder in case of first layer
- in_state_c, in_state_h = _init_state(num_layers,
- batch_size, num_hidden)
- else:
- in_state_c = self._nodes[in_state_c_name]
- in_state_h = self._nodes[in_state_h_name]
-
- cur_in_state_c, cur_in_state_h = _get_cur_input_state( \
- in_state_c, in_state_h,
- num_layers, layer,
- batch_size, num_hidden)
- output, out_state = self._convert_map[op_name](inputs, cur_in_state_c,
- cur_in_state_h,
- attr, params)
- return output, out_state, in_state_c, in_state_h
-
- sym, cur_out_state, in_state_c, in_state_h = \
- _LSTMBlockCellWrapper(inputs, attrs, params,
- num_layers, self._cur_lstm_layer)
- self._nodes[in_state_c_name] = in_state_c
- self._nodes[in_state_h_name] = in_state_h
- cur_out_state = _sym.expand_dims(cur_out_state, axis=0, num_newaxis=1)
- self._out_rnn.append(cur_out_state)
- self._cur_lstm_layer += 1
- return sym
- return _impl
-
- def process_op(self, op_name, inputs, attrs, params):
- """Process recurrent layer operators.
-
- List '_recurrent_ops_layer_map' map each Layer based operators with its
- layer handlers. Total number of layers are calculated to form the input
- data shapes.
-
- Parameters
- ----------
- op_name : str
- Operator name, such as LSTMBlockCell
-
- inputs : nnvm.Symbol
- Input data
-
- attrs : dict
- Dict of operator attributes
-
- params : dict
- List of pretrained weights and bias
-
- Returns
- -------
- sym : nnvm.sym.Symbol
- The returned nnvm symbol
- """
- def _get_abs_layer_name(node):
- """Identify the layer name is already handled. Return the absolute name
- """
- if not self._layer_name_list:
- self._layer_name_list.append(node.name)
- return node.name
-
- for _name in self._layer_name_list:
- if _name in node.name:
- abs_name = _name
- else:
- self._layer_name_list.append(node.name)
- abs_name = node.name
- return abs_name
-
- #Find number of layers of this same operator node in the graph
- #and also read the inputs name for the current op.
- num_layers = 0
- for _, node in enumerate(self._graph.node):
- if node.op == op_name:
- layer_name = _get_abs_layer_name(node)
- num_layers += 1
-
- sym = self._recurrent_ops_layer_map[op_name](op_name, layer_name, inputs, attrs,
- params, num_layers)
- return sym
-
-class GraphProto(object):
- """ A helper class for handling nnvm graph copying from Tensorflow GraphDef.
- Definition:
- https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto
- """
- def __init__(self):
- self._nodes = {}
- self._params = {}
- self._output_shapes = {}
- self._num_param = 0
- self._num_rnn_layer = False
- self._outputs_are_0d = {}
- self._input_shapes = {}
-
- def from_tensorflow(self, graph, layout="NHWC", shape=None, outputs=None):
- """Construct nnvm nodes from tensorflow graph definition - GraphDef.
-
- Follow the tensorflow graph definition to parse and convert it to NNVM.
- Some of the assumptions listed below.
-
- -> All Placeholders are considered as graph input.
- -> All Const nodes are params.
- -> Last node is assumed as graph output.
- -> _output_shapes : Graph should be frozen with add_shapes=True.
- Or user can pass input shape dictionary optionally.
- -> DecodeJpeg, ResizeBilinear: These are dummy operators.
- Hence user should handle preprocessing outside.
- -> CheckNumerics: No implementation as of now for this.
- Just copies input to output.
-
- Parameters
- ----------
- graph : tensorflow graph definition object
- The loaded tensorflow GraphDef
-
- layout : target layout to be used (Optional)
- NCHW only supported now to enable NHWC models on GPU.
-
- shape : Dictionary of input dimensions (Optional)
- Graph level input shape dictionary.
-
- outputs : List of output tensor names (Optional)
- if not specified then the last node is assumed as graph output.
-
- Returns
- -------
- sym : nnvm.sym.Symbol
- The returned nnvm symbol
- params : dict
- A dict of name: tvm.nd.array pairs, used as pretrained weights
- """
-
- try:
- from tensorflow.python.framework import tensor_util
- except ImportError as e:
- raise ImportError(
- "Unable to import tensorflow which is required {}".format(e))
-
- missing_operators = self._parse_import_prerequisites(graph)
-
- if missing_operators:
- msg = 'The following operators are not supported in frontend TensorFlow: {}'
- ops = str(list(missing_operators)).strip('[,]')
- raise tvm.error.OpNotImplemented(msg.format(ops))
-
- for node in graph.node:
- if node.op == 'Placeholder':
- # Give priority to user argument.
- if shape and node.name in shape:
- self._input_shapes[node.name] = list(shape[node.name])
- else:
- self._input_shapes[node.name] = \
- tensor_util.TensorShapeProtoToList(node.attr['shape'].shape)
- for idx, dim in enumerate(self._input_shapes[node.name]):
- if dim < 0:
- self._input_shapes[node.name][idx] = 1
- warnings.warn("Use 1 instead of -1 in shape of operator %s."
- % node.name)
-
- self._nodes[node.name] = _sym.Variable(name=node.name,
- shape=self._input_shapes[node.name])
- self._output_shapes[node.name] = [self._input_shapes[node.name]]
- self._outputs_are_0d[node.name] = [ \
- not tshape if isinstance(tshape, list) else False \
- for tshape in self._output_shapes[node.name]]
-
- # Ignore user's input shape for Non placeholder
- elif node.op == 'Const':
- tensor_value = node.attr['value'].tensor
- self._input_shapes[node.name] = \
- tensor_util.TensorShapeProtoToList(tensor_value.tensor_shape)
- if shape and node.name in shape:
- warnings.warn("Ignore the passed shape. "
- "Shape in graphdef will be used for operator %s." % node.name)
-
- final_op = None
- # Parse the nodes to re-create TF graph using Symbol API of NNVM
- for node in graph.node:
- # Tensorflow doesn't have separate list for params extraction.
- # Operator name 'Const' is treated as a parameter to build NNVM params dict.
-
- input_shapes = {}
- input_0d_mismatch = set()
- attr = self._parse_attr(node.attr)
-
- # Variable converted to Const will not have only value attr
- if 'value' in attr and node.op == 'Const':
- self._output_shapes[node.name] = [self._input_shapes[node.name]]
- elif '_output_shapes' in attr:
- self._output_shapes[node.name] = \
- [tensor_util.TensorShapeProtoToList(tshape) \
- for tshape in attr['_output_shapes']]
- else:
- # Keep the list indexable to avoid key error.
- # Actual value will be filled after node creation.
- # Will infer shapes if the graph is not frozen with add_shapes=True
- self._output_shapes[node.name] = [None]
-
- self._outputs_are_0d[node.name] = [ \
- not tshape if isinstance(tshape, list) else False \
- for tshape in self._output_shapes[node.name]]
-
- if node.op == "Const":
- # All Const nodes are Param nodes, lets parse
- self._num_param += 1
- for key, value in node.attr.items():
- self._parse_param(key, value, node.name)
- if node.name not in self._nodes:
- raise NotImplementedError( \
- "Const {} couldn't be converted to Param.".format(node.name))
-
- attr = self._parse_attr(node.attr)
-
- elif node.op != "Placeholder":
- # Pass the parsed shapes instead
- attr["_output_shapes"] = output_shapes = self._output_shapes[node.name]
-
- # Pass the node name too in attr
- attr["_node_name"] = node.name
-
- # Pass the target layout
- attr["_target_layout"] = layout
-
- # Fill shapes for all inputs in a list
- inputs = []
- for i in node.input:
- # Some TensorFlow operators internally maintain execution layers
- # and their output name includes the layer number along with
- # graph node name. E.g. the node name is 'Model/RNN/cell_0/RnnCell', but the
- # output tensor name is 'Model/RNN/cell_0/RnnCell:0'. In this case,
- # the number has to be ignored for single-output nodes.
- # On the other hand, for multi-output nodes the number is the output index,
- # and the lack of the number implies 0.
- tensor_name = i.split(':')
- node_name = tensor_name[0]
- if node_name in self._nodes:
- in_sym = self._nodes[node_name]
- if len(in_sym.list_output_names()) > 1:
- tensor_slot = int(tensor_name[1]) if len(tensor_name) > 1 else 0
- in_sym = in_sym[tensor_slot]
- input_shape = self._output_shapes[node_name][tensor_slot]
- else:
- tensor_slot = 0
- input_shape = self._output_shapes[node_name][0]
- inputs.append(in_sym)
- input_shapes[in_sym] = input_shape
- # This means the node is 1d in NNVM and 0d in TF.
- # See `_expand_dims_0d_aware`.
- if self._outputs_are_0d[node_name][tensor_slot] and input_shape:
- input_0d_mismatch.add(in_sym)
- attr['_input_shapes'] = input_shapes
- attr['_input_0d_mismatch'] = input_0d_mismatch
-
- inputs = self._fix_extranodes(node.op, attr, inputs)
- op = self._convert_operator(node.op, inputs, attr, graph)
-
- # Check if op is converted to param
- if isinstance(op, np.ndarray):
- self._params[node.name] = tvm.nd.array(op)
- op = _sym.Variable(name=node.name,
- shape=self._params[node.name].shape)
-
- # Assuming only one output.
- self._nodes[node.name] = op
- final_op = op
-
- # Infer shapes even without specifying "add_shapes=True"
- if output_shapes == [None]:
- g = _graph.create(final_op)
- self._output_shapes[node.name] = \
- list(graph_util.infer_shape(g, **self._input_shapes))[-1]
-
- if self._output_shapes[node.name] and shape and node.name in shape:
- assert self._output_shapes[node.name] == list(shape[node.name])
-
- # Infer shapes if passed explicitely
- node_output = self._nodes[node.name]
- if shape and (not self._output_shapes[node.name][0]
- or -1 in self._output_shapes[node.name][0]):
- g = _graph.create(node_output)
- shape_dict = {k: v.shape for k, v in self._params.items()}
- shape_dict.update(shape)
- _, out_shapes = graph_util.infer_shape(g, **shape_dict)
- self._output_shapes[node.name] = out_shapes
-
- out = []
- if outputs is None:
- out.append(final_op)
- else:
- for out_name in outputs:
- if ":" in out_name:
- out_name, out_num = out_name.split(":")
- out_num = int(out_num)
- out.append(self._nodes[out_name][out_num])
- else:
- out.append(self._nodes[out_name])
-
- #Add the RNN outputs also with 'head' nodes of the nnvm graph
- if self._num_rnn_layer:
- out_rnn = _sym.concatenate(*self._out_rnn, axis=0)
- out.append(out_rnn)
-
- if isinstance(out, list):
- out = _sym.Group(out) if len(out) > 1 else out[0]
-
- return out, self._params
-
- def _parse_import_prerequisites(self, graph):
- """ Calculate the named preconditions from TensorFlow `graph`.
- Return prerequisites for parsing:
- a. Set of operator names which don't have their mapping in TVM, i.e.
- which are not supported
- """
- missing_operators = set()
- for node in graph.node:
- if node.op == "Placeholder":
- pass
- elif node.op == "Const":
- pass
- else:
- if any([node.op in t for t in [_identity_list, _convert_map, _convert_map_rnn]]):
- pass
- else:
- missing_operators.add(node.op)
-
- return missing_operators
-
- def _parse_param(self, key, value, name):
- try:
- from tensorflow.python.framework import tensor_util
- except ImportError as e:
- raise ImportError(
- "Unable to import tensorflow which is required {}".format(e))
-
- if key == 'value':
- np_array = tensor_util.MakeNdarray(value.tensor)
-
- if np_array.dtype == np.dtype(object):
- # Object types are generally tensorflow DT_STRING (DecodeJpeg op).
- # Just leave it as placeholder.
- self._nodes[name] = _sym.Variable(name=name)
- return
-
- array_ndim = len(np_array.shape)
- if array_ndim == 0:
- new_array = np.empty([1], dtype=np_array.dtype)
- new_array[0] = np_array
- self._params[name] = tvm.nd.array(new_array)
- else:
- self._params[name] = tvm.nd.array(np_array)
- self._nodes[name] = _sym.Variable(name=name,
- shape=self._params[name].shape)
- else:
- if key not in ('dtype', '_output_shapes', '_class'):
- raise NotImplementedError \
- ("Other attributes for a Const(param) Node {} ? .".format(key))
-
- def _get_attr(self, buf):
- """Returns the value of the attr of this buf with the given `name`.
-
- Args:
- buf: attrvalue protobuf.
-
- Returns:
- The value of the attr, as a Python object.
-
- Raises:
- ValueError: If this op does not have an attr with the given `name`.
- """
- fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"]
-
- x = buf
-
- ret = []
-
- try:
- from tensorflow.python.framework import dtypes
- except ImportError as e:
- raise ImportError(
- "Unable to import tensorflow which is required {}".format(e))
-
- # Treat an empty oneof value as an empty list.
- if not x.WhichOneof("value"):
- return ret
- if x.HasField("list"):
- for f in fields:
- if getattr(x.list, f):
- if f == "type":
- ret += [dtypes.as_dtype(x) for x in list(getattr(x.list, f))]
- else:
- ret += list(getattr(x.list, f))
- else:
- for f in fields:
- if x.HasField(f):
- if f == "type":
- ret = dtypes.as_dtype(getattr(x, f))
- else:
- ret = getattr(x, f)
- return ret
-
- def _parse_attr(self, attr_proto):
- """Convert a list of AttributeProto to a dict, with names as keys."""
- attrs = {}
- for key, value in attr_proto.items():
- attrs[key] = self._get_attr(value)
-
- return attrs
-
- def _convert_rnn_operator(self, op_name, inputs,
- attrs, params, graph, convert_map):
- """Convert RNN and its variant operators to NNVM operators.
- This converter read the input states of each layers and
- also maintain the output states of each layer in a list.
-
- Parameters
- ----------
- op_name : str
- Operator name, such as LSTMBlockCell
- inputs : list of nnvm.Symbol
- List of input symbols.
- attrs : dict
- Dict of operator attributes
- params : dict
- List of pretrained weights and bias
- graph : Tensorflow graph object
- Graph is to find the number of upcoming same operator to
- calculate the number of layers.
- convert_map : dict
- Dict of name : callable, where name is the op's name that
- require conversion to nnvm, callable are functions which
- take attrs and return (new_op_name, new_attrs)
-
- Returns
- -------
- sym : nnvm.Symbol
- Converted nnvm Symbol
- """
- if not self._num_rnn_layer:
- self._out_rnn = []
- self.rnn = RecurrentNetworks(self._nodes, self._out_rnn, graph, convert_map)
- self._num_rnn_layer = True
- sym = self.rnn.process_op(op_name, inputs, attrs, params)
- return sym
-
- def _convert_operator(self, op_name, inputs, attrs,
- graph, identity_list=None, convert_map=None):
- """Convert from Tensorflow operator to nnvm operator.
- The converter must specify conversions explicitly for incompatible name, and
- apply handlers to operator attributes.
-
- Parameters
- ----------
- op_name : str
- Operator name, such as Conv2D, AvgPool
- inputs : list of nnvm.Symbol
- List of input symbols.
- attrs : dict
- Dict of operator attributes
- identity_list : list
- List of operators that don't require conversion
- convert_map : dict
- Dict of name : callable, where name is the op's name that
- require conversion to nnvm, callable are functions which
- take attrs and return (new_op_name, new_attrs)
-
- Returns
- -------
- sym : nnvm.Symbol
- Converted nnvm Symbol
- """
- identity_list = identity_list if identity_list else _identity_list
- convert_map = convert_map if convert_map else _convert_map
- convert_map_rnn = _convert_map_rnn
- if op_name in identity_list:
- sym = get_nnvm_op(op_name)(*inputs, **attrs)
- elif op_name in convert_map:
- sym = convert_map[op_name](inputs, attrs, self._params)
- elif op_name in convert_map_rnn:
- sym = self._convert_rnn_operator(op_name, inputs, attrs,
- self._params, graph,
- convert_map_rnn)
- else:
- raise tvm.error.OpNotImplemented(
- 'Operator {} is not supported in frontend TensorFlow.'.format(op_name))
- return sym
-
- def _fix_extranodes(self, op_name, attr, inputs):
- if op_name == "Softmax":
- # Require some times flatten of data before it goes to softmax
- # Need to relook into this with latest softmax axis support.
- op = AttrCvt(op_name='flatten')(inputs, {})
- node_output = op.list_output_names()
- for k, i in zip(list(node_output), range(len(node_output))):
- self._nodes[k] = op[i]
- inputs = [op]
-
- return inputs
-
-def from_tensorflow(graph, layout="NHWC", shape=None, outputs=None):
- """Load tensorflow graph which is a python tensorflow graph object into nnvm graph.
- The companion parameters will be handled automatically.
-
- Parameters
- ----------
- graph : GraphDef object
- Tensorflow GraphDef
-
- layout : target layout to be used (Optional)
- NCHW only supported now to enable NHWC models on GPU.
-
- shape : Dictionary of input dimensions (Optional)
- Graph level input shape dictionary.
-
- outputs : List of output tensor names (Optional)
- if not specified then the last node is assumed as graph output.
-
- Returns
- -------
- sym : nnvm.Symbol
- Compatible nnvm symbol
-
- params : dict of str to tvm.ndarray
- Dict of converted parameters stored in tvm.ndarray format
- """
- g = GraphProto()
- sym, params = g.from_tensorflow(graph, layout, shape, outputs)
- return sym, params
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-# pylint: disable=invalid-name, protected-access, too-many-arguments, too-many-lines
-"""NNVM Graph IR API.
-
-This is a developer API that is used to manipulate and transform graphs.
-"""
-from __future__ import absolute_import as _abs
-
-import ctypes
-import json
-from ._base import _LIB
-from ._base import c_array, c_str, nn_uint, py_str, string_types
-from ._base import GraphHandle, SymbolHandle
-from ._base import check_call
-from .symbol import Variable, Symbol, Group as _Group
-
-class GraphIndex(object):
- """Index for quickly accessing graph attributes.
-
- Parameters
- ----------
- graph : Graph
- The graph to create index.
- """
- def __init__(self, graph):
- jgraph = json.loads(create(graph).apply("SaveJSON").json_attr("json"))
- self.nodes = jgraph["nodes"]
- self.entry_ptr = jgraph["node_row_ptr"]
- self._name2nodeid = {n["name"]: i for i, n in enumerate(self.nodes)}
- self.input_names = graph.symbol.list_input_names()
- self.output_entries = jgraph["heads"]
-
- @property
- def num_nodes(self):
- """Number of nodes in graph."""
- return len(self.entry_ptr) - 1
-
- @property
- def num_node_entries(self):
- """Number of nodes in graph."""
- return self.entry_ptr[-1]
-
- def node_id(self, key):
- """Get the node index for a given key.
-
- Parameters
- ----------
- key : str or int
- The node key or index
-
- Returns
- -------
- index : int
- The entry index
- """
- return self._name2nodeid[key]
-
- def entry_id(self, key, value_index=0):
- """Get the entry id of a node entry.
-
- Parameters
- ----------
- key : str or int
- The node key or index
-
- value_index : int
- The value index of output
-
- Returns
- -------
- index : int
- The entry index
- """
- if isinstance(key, (list, tuple)):
- if len(key) != 3:
- raise ValueError("Expect entry index to be tuple of 3 elems")
- key, value_index, _ = key
- idx = self.node_id(key) if isinstance(key, str) else key
- assert value_index < self.entry_ptr[idx + 1]
- return self.entry_ptr[idx] + value_index
-
-
-
-class Graph(object):
- """Graph is the graph object that can be used to apply optimization pass.
-
- It contains additional graphwise attribute besides the internal symbol.
- """
- _tvm_tcode = 17
-
- # pylint: disable=no-member
- def __init__(self, handle):
- """Initialize the function with handle
-
- Parameters
- ----------
- handle : GraphHandle
- the handle to the underlying C++ Graph
- """
- self.handle = handle
- self._index = None
-
- def __del__(self):
- check_call(_LIB.NNGraphFree(self.handle))
-
- def json_attr(self, key):
- """Get attribute string from the graph.
-
- Parameters
- ----------
- key : str
- The key to get attribute from.
-
- Returns
- -------
- value : str
- The attribute value of the key, returns None if attribute do not exist.
- """
- ret = ctypes.c_char_p()
- success = ctypes.c_int()
- check_call(_LIB.NNGraphGetJSONAttr(
- self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success)))
- if success.value != 0:
- json_str = py_str(ret.value)
- return json.loads(json_str)[1]
- return None
-
- def _set_symbol_list_attr(self, key, value):
- """Set the attribute of the graph.
-
- Parameters
- ----------
- key : string
- The key of the attribute
- value : value
- The any type that can be dumped to json
- type_name : string
- The typename registered on c++ side.
- """
- if isinstance(value, list):
- value = _Group(value)
- if not isinstance(value, Symbol):
- raise ValueError("value need to be grouped symbol")
- check_call(_LIB.NNGraphSetNodeEntryListAttr_(
- self.handle, c_str(key), value.handle))
-
- def _set_json_attr(self, key, value, type_name=None):
- """Set the attribute of the graph.
-
- Parameters
- ----------
- key : string
- The key of the attribute
- value : value
- The any type that can be dumped to json
- type_name : string
- The typename registered on c++ side.
- """
- if isinstance(value, string_types):
- type_name = 'str'
- elif type_name is None:
- raise ValueError("Need to specify type_name")
- json_value = json.dumps([type_name, value])
- check_call(_LIB.NNGraphSetJSONAttr(
- self.handle, c_str(key), c_str(json_value)))
-
- @property
- def _tvm_handle(self):
- return self.handle.value
-
- @property
- def symbol(self):
- shandle = SymbolHandle()
- check_call(_LIB.NNGraphGetSymbol(self.handle, ctypes.byref(shandle)))
- return Symbol(shandle)
-
- def json(self):
- """Get JSON representation of the graph
-
- Returns
- -------
- json : str
- JSON representation of the graph
- """
- return self.apply("SaveJSON").json_attr("json")
-
- def _tvm_graph_json(self):
- """Get TVM graph json"""
- return self.json()
-
- @property
- def index(self):
- if not self._index:
- self._index = GraphIndex(self)
- return self._index
-
- def ir(self, join_entry_attrs=None, join_node_attrs=None):
- """Get text form of graph ir.
-
- Parameters
- ----------
- join_entry_attrs : list of str
- List of graph NodeEntry attribute to be
- printed along each operator.
-
- join_node_attrs : list of str
- List of graph node attribute to be
- printed along each operator.
- """
- if join_entry_attrs:
- self._set_json_attr("join_entry_attrs", join_entry_attrs, "list_str")
- if join_node_attrs:
- self._set_json_attr("join_node_attrs", join_node_attrs, "list_str")
- return self.apply("PrintGraphIR").json_attr("graphir")
-
- def apply(self, passes):
- """Apply passes to the graph
-
- Parameters
- ----------
- passes : str or list of str
- The passes to be applied
-
- Returns
- -------
- g : Graph
- The transformed graph.
- """
- if isinstance(passes, string_types):
- passes = [passes]
- cpass = c_array(ctypes.c_char_p, [c_str(key) for key in passes])
- ghandle = GraphHandle()
- npass = nn_uint(len(passes))
- check_call(_LIB.NNGraphApplyPasses(self.handle, npass, cpass, ctypes.byref(ghandle)))
- return Graph(ghandle)
-
-
-def load_json(json_str):
- """Create a new graph by loading from json
-
- Parameters
- ----------
- json_str : str
- The json string
-
- Returns
- -------
- graph : Graph
- The loaded graph
- """
- ret = create(Variable("x"))
- ret._set_json_attr("json", json_str)
- return ret.apply("LoadJSON")
-
-
-def create(symbol):
- """Create a new graph from symbol.
-
- Parameters
- ----------
- symbol : Symbol
- The symbolic graph used to create Graph object.
-
- Returns
- -------
- graph : Graph
- A generated new graph object.
- """
- ghandle = GraphHandle()
- check_call(_LIB.NNGraphCreate(
- symbol.handle, ctypes.byref(ghandle)))
- return Graph(ghandle)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-"""Information about nnvm."""
-from __future__ import absolute_import
-import sys
-import os
-import platform
-
-if sys.version_info[0] == 3:
- import builtins as __builtin__
-else:
- import __builtin__
-
-def find_lib_path():
- """Find NNNet dynamic library files.
-
- Returns
- -------
- lib_path : list(string)
- List of all found path to the libraries
- """
- if hasattr(__builtin__, "NNVM_BASE_PATH"):
- base_path = __builtin__.NNVM_BASE_PATH
- else:
- base_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-
- if hasattr(__builtin__, "NNVM_LIBRARY_NAME"):
- lib_name = __builtin__.NNVM_LIBRARY_NAME
- else:
- lib_name = "nnvm_compiler" if sys.platform.startswith('win32') else "libnnvm_compiler"
-
- api_path = os.path.join(base_path, '..', '..', 'lib')
- cmake_build_path_win = os.path.join(base_path, '..', '..', '..', 'build', 'Release')
- cmake_build_path = os.path.join(base_path, '..', '..', '..', 'build')
- install_path = os.path.join(base_path, '..', '..', '..')
- dll_path = [base_path, api_path, cmake_build_path_win, cmake_build_path,
- install_path]
-
- if sys.platform.startswith('linux') and os.environ.get('LD_LIBRARY_PATH', None):
- dll_path.extend([p.strip() for p in os.environ['LD_LIBRARY_PATH'].split(":")])
- elif sys.platform.startswith('darwin') and os.environ.get('DYLD_LIBRARY_PATH', None):
- dll_path.extend([p.strip() for p in os.environ['DYLD_LIBRARY_PATH'].split(":")])
- elif sys.platform.startswith('win32') and os.environ.get('PATH', None):
- dll_path.extend([p.strip() for p in os.environ['PATH'].split(";")])
-
- if sys.platform.startswith('win32'):
- vs_configuration = 'Release'
- if platform.architecture()[0] == '64bit':
- dll_path.append(os.path.join(base_path, '..', '..', '..', 'build', vs_configuration))
- dll_path.append(os.path.join(base_path, '..', '..', '..', 'windows', 'x64',
- vs_configuration))
- else:
- dll_path.append(os.path.join(base_path, '..', '..', '..', 'build', vs_configuration))
- dll_path.append(os.path.join(base_path, '..', '..', '..', 'windows', vs_configuration))
- dll_path = [os.path.join(p, '%s.dll' % lib_name) for p in dll_path]
- elif sys.platform.startswith('darwin'):
- dll_path = [os.path.join(p, '%s.dylib' % lib_name) for p in dll_path]
- else:
- dll_path = [os.path.join(p, '%s.so' % lib_name) for p in dll_path]
-
- lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
- if not lib_path:
- raise RuntimeError('Cannot find the files.\n' +
- 'List of candidates:\n' + str('\n'.join(dll_path)))
- return lib_path
-
-
-# current version
-__version__ = "0.8.0"
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-"""Automatic naming support for symbolic API."""
-from __future__ import absolute_import as _abs
-
-class NameManager(object):
- """NameManager to do automatic naming.
-
- User can also inherit this object to change naming behavior.
- """
- current = None
-
- def __init__(self):
- self._counter = {}
- self._old_manager = None
-
- def get(self, name, hint):
- """Get the canonical name for a symbol.
-
- This is default implementation.
- When user specified a name,
- the user specified name will be used.
-
- When user did not, we will automatically generate a
- name based on hint string.
-
- Parameters
- ----------
- name : str or None
- The name user specified.
-
- hint : str
- A hint string, which can be used to generate name.
-
- Returns
- -------
- full_name : str
- A canonical name for the user.
- """
- if name:
- return name
- if hint not in self._counter:
- self._counter[hint] = 0
- name = '%s%d' % (hint, self._counter[hint])
- self._counter[hint] += 1
- return name
-
- def __enter__(self):
- self._old_manager = NameManager.current
- NameManager.current = self
- return self
-
- def __exit__(self, ptype, value, trace):
- assert self._old_manager
- NameManager.current = self._old_manager
-
-
-class Prefix(NameManager):
- """A name manager that always attach a prefix to all names.
-
- Examples
- --------
- >>> import nnvm as nn
- >>> data = nn.symbol.Variable('data')
- >>> with nn.name.Prefix('mynet_'):
- net = nn.symbol.FullyConnected(data, num_hidden=10, name='fc1')
- >>> net.list_arguments()
- ['data', 'mynet_fc1_weight', 'mynet_fc1_bias']
- """
- def __init__(self, prefix):
- super(Prefix, self).__init__()
- self._prefix = prefix
-
- def get(self, name, hint):
- name = super(Prefix, self).get(name, hint)
- return self._prefix + name
-
-# initialize the default name manager
-NameManager.current = NameManager()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-import, protected-access
-"""Symbolic graph construction API.
-
-This namespace contains most of the registered operators.
-For detailed list of operators, checkout ``Core Tensor Operators``
-"""
-from __future__ import absolute_import as _abs
-import sys as _sys
-import os as _os
-import ctypes as _ctypes
-from numbers import Number as _Number
-
-import numpy as np
-
-from . import _base
-from ._base import _LIB, check_call as _check_call, _FFI_MODE, _all_var_init
-from .attribute import AttrScope
-from . import _symbol_internal as _internal
-from . import contrib
-
-# Use different verison of SymbolBase
-# When possible, use cython to speedup part of computation.
-
-IMPORT_EXCEPT = RuntimeError if _FFI_MODE == "cython" else ImportError
-
-try:
- if _FFI_MODE == "ctypes":
- raise ImportError()
- if _sys.version_info >= (3, 0):
- from ._cy3.symbol import SymbolBase, _init_symbol_module
- else:
- from ._cy2.symbol import SymbolBase, _init_symbol_module
-except IMPORT_EXCEPT:
- # pylint: disable=wrong-import-position
- from ._ctypes.symbol import SymbolBase, _init_symbol_module
-
-
-class Symbol(SymbolBase):
- """Symbol is basic operation unit for symbolic graph composition."""
- # disable dictionary storage, also do not have parent type.
- __slots__ = []
-
- _tvm_tcode = 16
-
- @property
- def _tvm_handle(self):
- return self.handle.value
-
- def __add__(self, other):
- """x.__add__(y) <=> x+y"""
- if isinstance(other, Symbol):
- return __add_symbol__(self, other)
- if isinstance(other, _Number):
- return __add_scalar__(self, scalar=other)
- raise TypeError("type %s not supported" % str(type(other)))
-
- def __radd__(self, other):
- return self.__add__(other)
-
- def __sub__(self, other):
- """x.__sub__(y) <=> x-y"""
- if isinstance(other, Symbol):
- return __sub_symbol__(self, other)
- if isinstance(other, _Number):
- return __sub_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __rsub__(self, other):
- if isinstance(other, _Number):
- return __rsub_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __mul__(self, other):
- """x.__mul__(y) <=> x*y"""
- if isinstance(other, Symbol):
- return __mul_symbol__(self, other)
- if isinstance(other, _Number):
- return __mul_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __rmul__(self, other):
- return self.__mul__(other)
-
- def __div__(self, other):
- """x.__div__(y) <=> x/y"""
- if isinstance(other, Symbol):
- return __div_symbol__(self, other)
- if isinstance(other, _Number):
- return __div_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __rdiv__(self, other):
- if isinstance(other, _Number):
- return __rdiv_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __lshift__(self, other):
- """x.__lshift__(y) <=> x << y"""
- if isinstance(other, _Number):
- return __lshift_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __rshift__(self, other):
- """x.__rshift__(y) <=> x >> y"""
- if isinstance(other, _Number):
- return __rshift_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __truediv__(self, other):
- return self.__div__(other)
-
- def __rtruediv__(self, other):
- return self.__rdiv__(other)
-
- def __pow__(self, other):
- """x.__pow__(y) <=> x**y"""
- if isinstance(other, Symbol):
- return __pow_symbol__(self, other)
- if isinstance(other, _Number):
- return __pow_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __rpow__(self, other):
- if isinstance(other, _Number):
- return __rpow_scalar__(self, scalar=other)
- raise TypeError('type %s not supported' % str(type(other)))
-
- def __neg__(self):
- """x.__neg__() <=> -x"""
- return self.__mul__(-1.0)
-
- def __copy__(self):
- return self.__deepcopy__()
-
- def __deepcopy__(self, _=None):
- """Returns a deep copy of the input object."""
- handle = _base.SymbolHandle()
- _base.check_call(_LIB.NNSymbolCopy(self.handle,
- _ctypes.byref(handle)))
- return Symbol(handle)
-
- def __getitem__(self, index):
- if isinstance(index, _base.string_types):
- idx = None
- for i, name in enumerate(self.list_output_names()):
- if name == index:
- if idx is not None:
- raise ValueError('There are multiple outputs with name \"%s\"' % index)
- idx = i
- if idx is None:
- raise ValueError('Cannot find output that matches name \"%s\"' % index)
- index = idx
- if not isinstance(index, int):
- raise TypeError('Symbol only support integer index to fetch i-th output')
- handle = _base.SymbolHandle()
- _check_call(_LIB.NNSymbolGetOutput(
- self.handle, _base.nn_uint(index), _ctypes.byref(handle)))
- return Symbol(handle=handle)
-
- def __iter__(self):
- return (self[i] for i in self.list_output_names())
-
- def attr(self, key):
- """Get attribute string from the symbol, this function only works for non-grouped symbol.
-
- Parameters
- ----------
- key : str
- The key to get attribute from.
-
- Returns
- -------
- value : str
- The attribute value of the key, returns None if attribute do not exist.
- """
- ret = _ctypes.c_char_p()
- success = _ctypes.c_int()
- _check_call(_LIB.NNSymbolGetAttr(
- self.handle, _base.c_str(key), _ctypes.byref(ret), _ctypes.byref(success)))
- if success.value != 0:
- return _base.py_str(ret.value)
- return None
-
- def list_attr(self, recursive=False):
- """Get all attributes from the symbol.
-
- Parameters
- ----------
- recursive : bool
- Default `False`. When `recursive` is `True`, list recursively all the
- attributes in the descendents. The attribute names are pre-pended with
- the symbol names to avoid conflicts. If `False`, then only attributes
- that belongs to this symbol is returned, and the attribute names will
- **not** be pre-pended with the symbol name.
- """
- size = _base.nn_uint()
- pairs = _ctypes.POINTER(_ctypes.c_char_p)()
- option = _ctypes.c_int(0) if recursive else _ctypes.c_int(1)
- _check_call(_LIB.NNSymbolListAttrs(
- self.handle, option, _ctypes.byref(size), _ctypes.byref(pairs)))
- return {_base.py_str(pairs[i*2]): _base.py_str(pairs[i*2+1]) for i in range(size.value)}
-
- def get_internals(self):
- """Get a new grouped symbol whose output contains all the internal outputs of this symbol.
-
- Returns
- -------
- sgroup : Symbol
- The internal of the symbol.
- """
- handle = _base.SymbolHandle()
- _check_call(_LIB.NNSymbolGetInternals(
- self.handle, _ctypes.byref(handle)))
- return Symbol(handle=handle)
-
- def get_children(self):
- """Gets a new grouped symbol whose output contains
- inputs to output nodes of the original symbol."""
- handle = _base.SymbolHandle()
- _check_call(_LIB.NNSymbolGetChildren(
- self.handle, _ctypes.byref(handle)))
- ret = Symbol(handle=handle)
- if not ret.list_output_names():
- return None
- return ret
-
- def _get_list_copt(self, option):
- """internal function to get list option"""
- if option == 'all':
- return _ctypes.c_int(0)
- if option == 'read_only':
- return _ctypes.c_int(1)
- if option == 'aux_state':
- return _ctypes.c_int(2)
- raise ValueError("option need to be in {'all', 'read_only, 'aux_state'}")
-
- def list_input_variables(self, option='all'):
- """List all the input variables in the symbol.
-
- Parameters
- ----------
- option : {'all', 'read_only', 'aux_state'}, optional
- The listing option
- - 'all' will list all the arguments.
- - 'read_only' lists arguments that are readed by the graph.
- - 'aux_state' lists arguments that are mutated by the graph as state.
- Returns
- -------
- vars : list of symbol
- List of all the variables
- """
- size = _ctypes.c_uint()
- sarr = _ctypes.POINTER(_base.SymbolHandle)()
- _check_call(_LIB.NNSymbolListInputVariables(
- self.handle, self._get_list_copt(option),
- _ctypes.byref(size), _ctypes.byref(sarr)))
- return [Symbol(_base.SymbolHandle(sarr[i])) for i in range(size.value)]
-
- def list_input_names(self, option='all'):
- """List all the inputs in the symbol.
-
- Parameters
- ----------
- option : {'all', 'read_only', 'aux_state'}, optional
- The listing option
- - 'all' will list all the arguments.
- - 'read_only' lists arguments that are readed by the graph.
- - 'aux_state' lists arguments that are mutated by the graph as state.
- Returns
- -------
- args : list of string
- List of all the arguments.
- """
- size = _ctypes.c_uint()
- sarr = _ctypes.POINTER(_ctypes.c_char_p)()
- _check_call(_LIB.NNSymbolListInputNames(
- self.handle, self._get_list_copt(option),
- _ctypes.byref(size), _ctypes.byref(sarr)))
- return [_base.py_str(sarr[i]) for i in range(size.value)]
-
- def list_output_names(self):
- """List all outputs in the symbol.
-
- Returns
- -------
- returns : list of string
- List of all the outputs.
- """
- size = _ctypes.c_uint()
- sarr = _ctypes.POINTER(_ctypes.c_char_p)()
- _check_call(_LIB.NNSymbolListOutputNames(
- self.handle, _ctypes.byref(size), _ctypes.byref(sarr)))
- return [_base.py_str(sarr[i]) for i in range(size.value)]
-
- def debug_str(self):
- """Get a debug string.
-
- Returns
- -------
- debug_str : string
- Debug string of the symbol.
- """
- debug_str = _ctypes.c_char_p()
- _check_call(_LIB.NNSymbolPrint(
- self.handle, _ctypes.byref(debug_str)))
- return _base.py_str(debug_str.value)
-
- def _add_control_deps(self, deps):
- """Add control flow dependencies.
- This makes current op depend on the deps.
- Only use when necessary,
- this function mutate the current symbol node.
-
- Returns
- -------
- deps : Symbol for list of symbol
- The dependencies
- """
- if isinstance(deps, list):
- deps = Group(deps)
- _check_call(_LIB.NNAddControlDeps(
- self.handle, deps.handle))
-
-
-def Variable(name, init=None, **kwargs):
- """Create a symbolic variable with specified name.
-
- Parameters
- ----------
- name : str
- Name of the variable.
- init : Symbol or numpy.ndarray
- Symbol or numpy ndarray of initial value for the variable.
- Note that for symbolic initialization value, it must be able
- to be defined through InferShape, such as sym.zeros_like(v),
- in which v is an input or parameter. Otherwise, pass a numpy
- ndarray instead.
- kwargs : dict of string -> string
- Additional attributes to set on the variable.
-
- Returns
- -------
- variable : Symbol
- The created variable symbol.
- """
- if not isinstance(name, _base.string_types):
- raise TypeError('Expect a string for variable `name`')
- handle = _base.SymbolHandle()
- _base.check_call(_LIB.NNSymbolCreateVariable(
- _base.c_str(name), _ctypes.byref(handle)))
- ret = Symbol(handle)
- attr = AttrScope.current.get(kwargs)
- if attr:
- ret._set_attr(**attr)
- if init is not None:
- if not isinstance(init, (Symbol, np.ndarray)):
- raise TypeError('Expect a Symbol or numpy ndarray'
- 'for variable `init`')
- _all_var_init[name] = init
- return ret
-
-
-def Group(symbols):
- """Create a symbol that groups symbols together.
-
- Parameters
- ----------
- symbols : list
- List of symbols to be grouped.
-
- Returns
- -------
- sym : Symbol
- The created group symbol.
- """
- ihandles = []
- for sym in symbols:
- if not isinstance(sym, Symbol):
- raise TypeError('Expect Symbols in the list input')
- ihandles.append(sym.handle)
- handle = _base.SymbolHandle()
- _check_call(_LIB.NNSymbolCreateGroup(
- _base.nn_uint(len(ihandles)),
- _base.c_array(_base.SymbolHandle, ihandles),
- _ctypes.byref(handle)))
- return Symbol(handle)
-
-# Set the real symbol class to Symbol
-_init_symbol_module(Symbol, "nnvm")
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Utilities for testing and benchmarks"""
-from __future__ import absolute_import as _abs
-
-from .config import ctx_list
-from .utils import create_workload
-from . import mobilenet
-from . import mobilenet_v2
-from . import mlp
-from . import resnet
-from . import vgg
-from . import densenet
-from . import squeezenet
-from . import inception_v3
-from . import dcgan
-from . import dqn
-from . import check_computation
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=cell-var-from-loop,no-else-return
-"""Helper utilities to check functions and their gradients."""
-from __future__ import absolute_import as _abs
-
-import logging
-import numpy as np
-
-import tvm
-from tvm.contrib import graph_runtime
-from tvm.testing import check_numerical_grads
-from tvm import relay
-
-import nnvm
-from nnvm.compiler import graph_util
-from nnvm.compiler.graph_attr import TCODE_TO_DTYPE, DTYPE_TO_TCODE
-from nnvm.to_relay import to_relay
-from .config import ctx_list
-
-def infer_shapes_dtypes(graph, shape=None, dtype=None, fallback_dtype=None):
- """Runs dtype and shape inference passes on a graph and returns the resulting graph
- along with the inferred information.
-
- Parameters
- ----------
- graph : nnvm.graph.Graph
- A graph we want to run inference on.
-
- shape : Dict[str, Tuple[int]] or Tuple[int], optional
- A dict mapping input variable names to shapes.
- By default shapes will be inferred from variables' attributes.
- Note that this parameter takes precedence over variables' attributes.
-
- dtype : Dict[str, str] or str, optional
- A dict mapping input variable names to dtypes, or just a single dtype.
- By default dtypes will be inferred from variables' attributes.
- Note that this parameter takes precedence over variables' attributes.
-
- fallback_dtype : str, optional
- A dtype that will be used for variables whose dtype can't be inferred from other
- variables' dtypes.
-
- Returns
- -------
- graph : nnvm.graph.Graph
- The resulting graph with dtype and shape information on its nodes.
-
- input_shapes : Dict[str, Tuple[int]]
- The inferred shapes of input variables merged with the `shape` dictionary.
-
- input_dtypes : Dict[str, str]
- The inferred dtypes of input variables merged with the `dtype` dictionary.
-
- output_shapes : List[Tuple[int]]
- The inferred shapes of outputs.
-
- output_dtypes : List[str]
- The inferred dtypes of outputs.
- """
- # Preprocess input parameters
- if shape is None:
- provided_shapes = {}
- elif isinstance(shape, dict):
- provided_shapes = shape
- else:
- provided_shapes = {x: shape for x in graph.symbol.list_input_variables()}
-
- if dtype is None:
- provided_dtypes = {}
- elif isinstance(dtype, dict):
- provided_dtypes = dtype
- else:
- provided_dtypes = {x: dtype for x in graph.symbol.list_input_variables()}
-
- provided_shapes = _dict_var_to_dict_str(provided_shapes)
- provided_dtypes = _dict_var_to_dict_str(provided_dtypes)
-
- # The graph may already contain shape and dtype info, so extract it and merge with
- # the user-specified shapes and dtypes (use the user-specified one on contradiction)
- preexisting_shapes = graph.json_attr('shape')
- preexisting_dtypes = graph.json_attr('dtype')
-
- if preexisting_shapes:
- for x in graph.index.input_names:
- if x not in provided_shapes:
- x_shape = tuple(preexisting_shapes[graph.index.entry_id(x)])
- provided_shapes[x] = x_shape
-
- if preexisting_dtypes:
- for x in graph.index.input_names:
- if x not in provided_dtypes:
- x_dtype = TCODE_TO_DTYPE[preexisting_dtypes[graph.index.entry_id(x)]]
- provided_dtypes[x] = x_dtype
-
- # Perform inference
- nnvm.compiler.graph_attr.set_shape_inputs(graph, provided_shapes)
- nnvm.compiler.graph_attr.set_dtype_inputs(graph, provided_dtypes)
-
- graph = graph.apply('InferShape').apply('InferType')
-
- inferred_shapes = graph.json_attr('shape')
- inferred_dtypes = graph.json_attr('dtype')
-
- index = graph.index
-
- output_shapes = [tuple(inferred_shapes[index.entry_id(entry)])
- for entry in index.output_entries]
- output_dtypes = [TCODE_TO_DTYPE[inferred_dtypes[index.entry_id(entry)]]
- for entry in index.output_entries]
-
- # Postprocess the results
- input_shapes = provided_shapes.copy()
- input_dtypes = provided_dtypes.copy()
-
- for x in graph.symbol.list_input_variables():
- x_name = x.attr('name')
- x_entry_id = graph.index.entry_id(x_name)
- input_shapes[x_name] = tuple(inferred_shapes[x_entry_id])
- input_dtypes[x_name] = TCODE_TO_DTYPE[inferred_dtypes[x_entry_id]]
-
- # Merge the original user-specified shapes in case some of them are specified for non-existing
- # variables
- for x_name, x_shape in provided_shapes.items():
- x_shape = tuple(x_shape)
- if input_shapes.get(x_name, x_shape) != x_shape:
- raise RuntimeError("Inferred shape differs from the provided shape.\n"
- "Provided shapes: {}\nInferred shapes: {}"
- .format(provided_shapes, input_shapes))
- else:
- input_shapes[x_name] = x_shape
-
- # Merge the original user-specified dtypes
- for x_name, x_dtype in provided_dtypes.items():
- if not isinstance(x_dtype, str):
- x_dtype = TCODE_TO_DTYPE[x_dtype]
- if input_dtypes.get(x_name, x_dtype) != x_dtype:
- raise RuntimeError("Inferred dtype differs from the provided dtype.\n"
- "Provided dtypes: {}\nInferred dtypes: {}"
- .format(provided_dtypes, input_dtypes))
- else:
- input_dtypes[x_name] = x_dtype
-
- # If some dtypes weren't inferred and there is a fallback dtype, assign it to those varibles
- # and repeat the inference
- if fallback_dtype is not None and not all(input_dtypes.values()):
- input_dtypes = {x: input_dtypes[x] if input_dtypes[x] else fallback_dtype
- for x in input_dtypes}
- return infer_shapes_dtypes(graph, input_shapes, input_dtypes, fallback_dtype=None)
-
- return graph, input_shapes, input_dtypes, output_shapes, output_dtypes
-
-def graph_to_function(graph, target, ctx, shape=None, dtype=None):
- """Convert a graph to a function taking a keyword args and returning a list of results
- (both args and results are numpy arrays).
-
- Example::
-
- fun = graph_to_function(graph, llvm, cpu(0))
- [res1, res2] = fun(x=np.zeros((1,2)), y=np.zeros((1,)))
-
- Parameters
- ----------
- graph : nnvm.graph.Graph
- A graph we want to convert to a function.
-
- target : str or :any:`tvm.target.Target`
- The build target
-
- ctx : TVMContext
- The context to deploy the module.
-
- shape : Dict[str, Tuple[int]], optional
- A dict mapping input variable names to shapes.
- By default shapes will be inferred from variables' attributes.
- Note that this parameter takes precedence over variables' attributes.
-
- dtype : Dict[str, str] or str, optional
- A dict mapping input variable names to dtypes, or just a single dtype.
- By default dtypes will be inferred from variables' attributes.
- Note that this parameter takes precedence over variables' attributes.
-
- Returns
- -------
- function : Callable[..., List[numpy.ndarray]]
- """
- # Infer missing shapes and dtypes
- graph, shape, dtype, output_shapes, output_dtypes = \
- infer_shapes_dtypes(graph, shape=shape, dtype=dtype)
-
- if None in dtype.values():
- raise ValueError("Input variables with no type: {}".format(dtype))
-
- if not all(shape.values()):
- raise ValueError("Input variables with no shape: {}".format(shape))
-
- compute_graph, lib, params = nnvm.compiler.build(graph, target, shape=shape, dtype=dtype)
- module = graph_runtime.create(compute_graph, lib, ctx)
-
- if params:
- module.set_inputs(**params)
-
- def run(**kwargs):
- module.run(**kwargs)
- res = []
- for i, (o_shape, o_dtype) in enumerate(zip(output_shapes, output_dtypes)):
- res.append(module.get_output(i, tvm.nd.empty(o_shape, o_dtype)).asnumpy())
- return res
-
- return run
-
-def _dict_var_to_dict_str(dictionary):
- """Convert a Dict[nnvm.Symbol, T] to Dict[str, T]"""
- if isinstance(dictionary, dict):
- return {s.attr('name') if isinstance(s, nnvm.symbol.Symbol) else s:
- dictionary[s] for s in dictionary}
- else:
- return dictionary
-
-def check_function(symbol, forward=None, backward=None, grad_input_vars=None,
- shape=None, dtype=None, in_range=None, values=None,
- exclude_targets=None, only_targets=None,
- additional_params=None,
- numerical_grads=None, numerical_grads_params=None,
- atol=1e-5, rtol=1e-5, quiet=False):
- """Compute the function and/or its gradients on a random input and raise
- an exception if the result doesn't match the reference implementation.
-
- Parameters
- ----------
- symbol : nnvm.Symbol
- A symbol representing the output.
-
- forward : Callable[..., List[numpy.ndarray]], optional
- A reference implementation to compare with.
-
- backward : Callable[..., List[numpy.ndarray] or Dict[str, numpy.ndarray]], optional
- A reference implementation of gradients. Should also accept head_grads besides
- normal inputs which is a list of gradients of some scalar wrt the outputs or just a
- single gradient if there are multiple outputs.
- Should return either a dict mapping input variable names to the respective
- gradients or a list of gradients wrt variables from grad_input_vars in
- exactly the same order (in alphabetical order by default).
-
- grad_input_vars : List[nnvm.Symbol or str], optional
- A list of variables with respect to which the gradients will be computed.
- None (default) means that all input variables will be used in an alphabetical order.
-
- shape : Dict[nnvm.Symbol or str, Tuple[int]] or Tuple[int], optional
- A dict mapping input variable names to shapes, or just a single shape.
- By default shapes will be inferred from variables' attributes (see the Examples).
- Note that this parameter takes precedence over variables' attributes.
-
- dtype : Dict[nnvm.Symbol or str, str] or str, optional
- A dict mapping input variable names to dtypes, or just a single dtype.
- By default dtypes will be inferred from variables' attributes (see the Examples).
- If dtypes cannot be inferred for some variables then float32 will be used as a fallback.
- Note that this parameter takes precedence over variables' attributes.
-
- in_range : Dict[nnvm.Symbol or str, (float, float)] or (float, float), optional
- A dict mapping input variable names to ranges or just a single range
- (the same for all variables). Input values will be generated from
- uniform distributions on these ranges. `head_grads` can also be
- assigned a range this way.
-
- values : Dict[nnvm.Symbol or str, numpy.ndarray], optional
- A dict explicitly providing values for some variables instead of random generation.
-
- exclude_targets : Set[str], optional
- Skip compiling and running anything for these targets.
-
- only_targets : Set[str], optional
- Test only for those targets from `ctx_list()` that are also in this set.
-
- additional_params : dict, optional
- A dict of additional parameters which will be passed to forward and backward.
-
- numerical_grads : bool or 'if_possible', optional
- Whether to additionally check against numerically computed gradients. If 'if_possible' or
- None is passed (which is the default) then it will try to create a gradient computation
- graph and then check gradients numerically only if this graph can be created (i.e. if there
- are some operations with unimplemented gradients, it will just issue a warning).
- Checking against numerical gradients is done via the `check_numerical_grads` function.
-
- numerical_grads_params : dict, optional
- Additional parameters for `check_numerical_grads`.
-
- atol : float, optional
- Absolute tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.
-
- rtol : float, optional
- Relative tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.
-
- quiet : bool, optional
- Don't dump additional information to stdout on failure.
-
- Examples
- --------
- .. code-block:: python
-
- x = sym.Variable("x", shape=(1, 2))
- y = sym.Variable("y", shape=(1, 2))
-
- # check the function and its gradients both numerically and using a reference function
- check_function(x + 2*y,
- lambda x, y: x + 2*y,
- lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads})
-
- # just check gradients numerically
- check_function(x + 2*y, numerical_grads=True)
-
- # just check the forward computation
- check_function(x + 2*y, lambda x, y: x + 2*y, numerical_grads=False)
-
- # specifying dtype
- check_function(x + 2*y, lambda x, y: x + 2*y, dtype='float64')
-
- # dtypes can also be specified during variable creation with dtype codes
- x = sym.Variable("x", dtype=0)
- check_function(x + 1, shape=(2, 2), numerical_grads=True)
- """
- # validate and preprocess the input params
- if numerical_grads is None and forward is None and backward is None:
- raise ValueError("No reference function was passed to check_function. If you only want to "
- "check gradients numerically, pass numerical_grads=True explicitly.")
-
- if numerical_grads is None:
- numerical_grads = 'if_possible'
-
- if numerical_grads not in [False, True, 'if_possible']:
- raise ValueError("numerical_grads must be a bool or 'if_possible', not {}"
- .format(numerical_grads))
-
- if additional_params is None:
- additional_params = {}
-
- input_vars = symbol.list_input_variables()
- input_dict = {x.attr('name'): x for x in input_vars}
-
- if grad_input_vars is None:
- grad_input_vars = sorted(input_vars, key=lambda x: x.attr('name'))
- else:
- grad_input_vars = [input_dict[x] if isinstance(x, str) else x for x in grad_input_vars]
-
- in_range = _dict_var_to_dict_str(in_range)
- values = _dict_var_to_dict_str(values)
-
- out_len = len(symbol.list_output_names())
-
- # Infer the output shapes and dtypes, and preprocess the shape and dtype params
- forward_graph, shape, dtype, out_shapes, out_dtypes = \
- infer_shapes_dtypes(nnvm.graph.create(symbol), shape=shape, dtype=dtype,
- fallback_dtype='float32')
-
- if not all(out_shapes) or not all(out_dtypes):
- if not quiet:
- print(forward_graph.ir(join_node_attrs=['shape', 'dtype']))
- raise ValueError("Could not infer shapes or dtypes for outputs.\n"
- "out_shapes = {}\nout_dtypes = {}".format(out_shapes, out_dtypes))
-
- backward_graph = None
-
- # If we want gradients, we have to recreate the graph, but now with gradient computations
- # Note that here we need out_shapes for defining the shape of head grads, so we have to
- # create the graph twice
- if backward is not None or numerical_grads:
- try:
- head_grads_symbols = [nnvm.symbol.Variable("head_grads_" + str(i),
- shape=out_shapes[i],
- dtype=DTYPE_TO_TCODE[out_dtypes[i]])
- for i in range(out_len)]
- grad_symbols = graph_util.gradients([symbol], grad_input_vars,
- grad_ys=head_grads_symbols)
- # Sometimes grads do not depend on head_grads, so head_grads does not appear
- # in the variable list; adding it manually prevents this, making things a bit easier
- backward_graph = \
- nnvm.graph.create(nnvm.symbol.Group([symbol] + grad_symbols + head_grads_symbols))
-
- backward_graph, shape, dtype, out_shapes, out_dtypes = \
- infer_shapes_dtypes(backward_graph, shape=shape, dtype=dtype,
- fallback_dtype='float32')
- except nnvm._base.NNVMError as err:
- if backward is None and numerical_grads == "if_possible":
- logging.warning("Won't check gradients because: %s", str(err).split('\n', 1)[0])
- numerical_grads = False
- backward_graph = None
- else:
- raise
-
- main_graph = backward_graph if backward_graph is not None else forward_graph
-
- # Generate random data for inputs (including head_grads)
-
- np_inputs = {}
-
- for x in main_graph.symbol.list_input_variables():
- x_name = x.attr('name')
- x_shape = shape[x_name]
- x_dtype = dtype[x_name]
-
- if values is not None and x_name in values:
- np_inputs[x_name] = values[x_name].astype(x_dtype)
- continue
-
- low = -1.0
- high = 1.0
- if in_range is not None:
- if isinstance(in_range, dict):
- if x_name in in_range:
- low = in_range[x_name][0]
- high = in_range[x_name][1]
- else:
- low = in_range[0]
- high = in_range[1]
-
- np_inputs[x_name] = np.random.uniform(size=x_shape, low=low, high=high).astype(x_dtype)
-
- np_inputs_without_head_grads = {k: np_inputs[k] for k in np_inputs
- if not k.startswith('head_grads_')}
-
- nothing_was_done = True
-
- # Compute and compare the results
- for target, ctx in ctx_list():
- if exclude_targets is not None:
- if target in exclude_targets or str(target) in exclude_targets:
- logging.info("Skipping target = %s, ctx = %s", target, ctx)
- continue
- if only_targets is not None:
- if target not in only_targets and str(target) not in only_targets:
- logging.info("Skipping target = %s, ctx = %s", target, ctx)
- continue
-
- logging.info("Checking computation on target = %s, ctx = %s", target, ctx)
-
- debug_stage = None
-
- try:
- nnvm_res = None
-
- debug_stage = "compiling"
- main_function = graph_to_function(main_graph, target, ctx)
-
- # nnvm_res contains the output and gradients (if they are needed)
- debug_stage = "running"
- nnvm_res = main_function(**np_inputs)
-
- try:
- logging.debug("checking to_relay conversion")
- inputs = np_inputs_without_head_grads.copy()
- func, inputs = to_relay(main_graph, shape, dtype, params=inputs)
- with relay.build_config(opt_level=3):
- graph, lib, params = relay.build(func, target=target)
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input(**inputs)
- m.set_input(**params)
- m.run()
- for i in range(out_len):
- relay_out = m.get_output(i).asnumpy()
- tvm.testing.assert_allclose(nnvm_res[i], relay_out, atol=atol, rtol=rtol)
- except NotImplementedError as err:
- # the NNVM operator is not supported yet
- logging.warning(err)
-
- if backward_graph is not None:
- grad_var_names = [x.attr('name') for x in grad_input_vars]
- nnvm_grads = {x: v for x, v in zip(grad_var_names, nnvm_res[out_len:])}
-
- if forward is not None:
- nothing_was_done = False
- debug_stage = "checking forward computation"
- logging.debug(debug_stage)
-
- params = {}
- params.update(np_inputs_without_head_grads)
- params.update(additional_params)
- numpy_res = forward(**params)
-
- if isinstance(numpy_res, tuple):
- numpy_res = list(numpy_res)
-
- if not isinstance(numpy_res, list):
- numpy_res = [numpy_res]
-
- if len(numpy_res) != out_len:
- raise ValueError("Forward function returned {} values, but "
- "the nnvm graph returns {} values"
- .format(len(numpy_res), out_len))
-
- for i in range(out_len):
- tvm.testing.assert_allclose(nnvm_res[i], numpy_res[i], atol=atol, rtol=rtol)
-
- if backward is not None:
- nothing_was_done = False
- debug_stage = "checking gradients"
- logging.debug(debug_stage)
-
- np_head_grads = [np_inputs["head_grads_" + str(i)] for i in range(out_len)]
-
- if out_len == 1:
- np_head_grads = np_head_grads[0]
-
- params = {'head_grads': np_head_grads}
- params.update(np_inputs_without_head_grads)
- params.update(additional_params)
- numpy_grads = backward(**params)
-
- if not isinstance(numpy_grads, dict):
- if isinstance(numpy_grads, tuple):
- numpy_grads = list(numpy_grads)
- if not isinstance(numpy_grads, list):
- numpy_grads = [numpy_grads]
- numpy_grads = {x: v for x, v in zip(grad_var_names, numpy_grads)}
- if len(numpy_grads) != len(grad_var_names):
- raise ValueError("The backward function returns a list of gradients which "
- "does not contain gradients for these variables: {}"
- .format(set(grad_var_names) - set(numpy_grads)))
-
- for x_name in numpy_grads:
- tvm.testing.assert_allclose(nnvm_grads[x_name], numpy_grads[x_name],
- atol=atol, rtol=rtol)
-
- if numerical_grads:
- nothing_was_done = False
- debug_stage = "checking gradients numerically"
- logging.debug(debug_stage)
-
- forward_function = graph_to_function(forward_graph, target, ctx)
-
- # Since the result may be non-scalar, we have to put another operation on the top,
- # so we just multiple by the randomly generated head_grads and then sum everything.
- # This way we can reuse the gradient values which has been already computed.
- def scalar_function(**kwargs):
- res = forward_function(**kwargs)
- return np.sum([np.dot(np_inputs['head_grads_' + str(i)].ravel(), res[i].ravel())
- for i in range(out_len)])
-
- if numerical_grads_params is None:
- numerical_grads_params = {}
-
- check_numerical_grads(
- scalar_function,
- input_values=np_inputs_without_head_grads,
- grad_values=nnvm_grads,
- **numerical_grads_params)
-
- except:
- if not quiet:
- print("\ncheck_function failed while {}, here is the main graph"
- .format(debug_stage))
- print(main_graph.ir(join_node_attrs=['shape', 'dtype']))
- if nnvm_res is not None:
- print("Generated inputs:")
- print(np_inputs)
- print()
- raise
-
- if nothing_was_done:
- logging.warning("Nothing was done in check_function. Check ctx_list().")
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Configuration about tests"""
-from __future__ import absolute_import as _abs
-
-import os
-import tvm
-
-def ctx_list():
- """Get context list for testcases"""
- device_list = os.environ.get("NNVM_TEST_TARGETS", "")
- device_list = (device_list.split(",") if device_list
- else ["llvm", "cuda"])
- device_list = set(device_list)
- res = [(device, tvm.context(device, 0)) for device in device_list]
- return [x for x in res if x[1].exist]
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-Symbol of the generator of DCGAN
-
-Adopted from:
-https://github.com/tqchen/mxnet-gan/blob/master/mxgan/generator.py
-
-Reference:
-Radford, Alec, Luke Metz, and Soumith Chintala.
-"Unsupervised representation learning with deep convolutional generative adversarial networks."
-arXiv preprint arXiv:1511.06434 (2015).
-"""
-from .. import symbol as sym
-from . utils import create_workload
-
-def deconv2d(data, ishape, oshape, kshape, name, stride=(2, 2)):
- """a deconv layer that enlarges the feature map"""
- target_shape = (oshape[-2], oshape[-1])
-
- pad_y = (kshape[0] - 1) // 2
- pad_x = (kshape[1] - 1) // 2
- adj_y = (target_shape[0] + 2 * pad_y - kshape[0]) % stride[0]
- adj_x = (target_shape[1] + 2 * pad_x - kshape[1]) % stride[1]
-
- net = sym.conv2d_transpose(data,
- kernel_size=kshape,
- strides=stride,
- channels=oshape[0],
- padding=(pad_y, pad_x),
- output_padding=(adj_y, adj_x),
- use_bias=False,
- name=name)
- return net
-
-def deconv2d_bn_relu(data, prefix, **kwargs):
- """a block of deconv + batch norm + relu"""
- eps = 1e-5 + 1e-12
- net = deconv2d(data, name="%s_deconv" % prefix, **kwargs)
- net = sym.batch_norm(net, epsilon=eps, name="%s_bn" % prefix)
- net = sym.relu(net, name="%s_act" % prefix)
- return net
-
-def get_symbol(oshape, ngf=128, code=None):
- """get symbol of dcgan generator"""
- assert oshape[-1] == 64, "Only support 64x64 image"
- assert oshape[-2] == 64, "Only support 64x64 image"
-
- code = sym.Variable("data") if code is None else code
- net = sym.dense(code, name="g1", units=4*4*ngf*8, use_bias=False)
- net = sym.relu(net)
- # 4 x 4
- net = sym.reshape(net, shape=(-1, ngf * 8, 4, 4))
- # 8 x 8
- net = deconv2d_bn_relu(
- net, ishape=(ngf * 8, 4, 4), oshape=(ngf * 4, 8, 8), kshape=(4, 4), prefix="g2")
- # 16x16
- net = deconv2d_bn_relu(
- net, ishape=(ngf * 4, 8, 8), oshape=(ngf * 2, 16, 16), kshape=(4, 4), prefix="g3")
- # 32x32
- net = deconv2d_bn_relu(
- net, ishape=(ngf * 2, 16, 16), oshape=(ngf, 32, 32), kshape=(4, 4), prefix="g4")
- # 64x64
- net = deconv2d(
- net, ishape=(ngf, 32, 32), oshape=oshape[-3:], kshape=(4, 4), name="g5_deconv")
- net = sym.tanh(net)
- return net
-
-
-def get_workload(batch_size, oshape=(3, 64, 64), ngf=128, random_len=100, dtype="float32"):
- """Get benchmark workload for a DCGAN generator
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
- oshape : tuple, optional
- The shape of output image, layout="CHW"
- ngf: int, optional
- The number of final feature maps in the generator
- random_len : int, optional
- The length of random input
- dtype : str, optional
- The data type
-
- Returns
- -------
- net : nnvm.symbol
- The computational graph
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(oshape=oshape, ngf=ngf)
- return create_workload(net, batch_size, (random_len, ), dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-DenseNet, load model from gluon model zoo
-
-Reference:
-Huang, Gao, et al. "Densely Connected Convolutional Networks." CVPR 2017
-"""
-
-from .utils import create_workload
-from ..frontend.mxnet import _from_mxnet_impl
-
-def get_workload(batch_size, num_classes=1000, num_layers=121, dtype="float32"):
- """Get benchmark workload for mobilenet
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of classes
-
- num_layers : int, optional
- The number of layers
-
- dtype : str, optional
- The data type
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- import mxnet as mx
- from mxnet.gluon.model_zoo.vision import get_model
-
- image_shape = (1, 3, 224, 224)
-
- block = get_model('densenet%d' % num_layers, classes=num_classes, pretrained=False)
-
- data = mx.sym.Variable('data')
- sym = block(data)
- sym = mx.sym.SoftmaxOutput(sym)
-
- net = _from_mxnet_impl(sym, {})
-
- return create_workload(net, batch_size, image_shape[1:], dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Symbol of Nature DQN
-
-Reference:
-Mnih, Volodymyr, et al. "Human-level control through deep reinforcement learning."
-Nature 518.7540 (2015): 529.
-"""
-
-from .. import symbol as sym
-from . utils import create_workload
-
-def get_symbol(num_actions=18):
- """get symbol of nature dqn"""
- data = sym.Variable(name='data')
- net = sym.conv2d(data, kernel_size=(8, 8), strides=(4, 4), padding=(0, 0),
- channels=32, name='conv1')
- net = sym.relu(net, name='relu1')
- net = sym.conv2d(net, kernel_size=(4, 4), strides=(2, 2), padding=(0, 0),
- channels=64, name='conv2')
- net = sym.relu(net, name='relu2')
- net = sym.conv2d(net, kernel_size=(3, 3), strides=(1, 1), padding=(0, 0),
- channels=64, name='conv3')
- net = sym.relu(net, name='relu3')
- net = sym.flatten(net, name='flatten')
- net = sym.dense(net, units=512, name='fc4')
- net = sym.relu(net, name='relu4')
- net = sym.dense(net, units=num_actions, name='fc5')
-
- return net
-
-
-def get_workload(batch_size, num_actions=18, image_shape=(4, 84, 84), dtype="float32"):
- """Get benchmark workload for a Deep Q Network
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
- num_actions : int, optional
- Number of actions
- image_shape : tuple, optional
- The input image shape
- dtype : str, optional
- The data type
-
- Returns
- -------
- net : nnvm.symbol
- The computational graph
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(num_actions=num_actions)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Inception V3, suitable for images with around 299 x 299
-
-Reference:
-Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision."
-arXiv preprint arXiv:1512.00567 (2015).
-
-Adopted from https://github.com/apache/incubator-mxnet/blob/
- master/example/image-classification/symbols/inception-v3.py
-"""
-# pylint: disable=invalid-name,missing-docstring,unused-argument
-from .. import symbol as sym
-from .utils import create_workload
-
-def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''):
- conv = sym.conv2d(data=data, channels=num_filter, kernel_size=kernel,
- strides=stride, padding=pad, use_bias=False,
- name='%s%s_conv2d' % (name, suffix))
- bn = sym.batch_norm(data=conv, name='%s%s_batchnorm' % (name, suffix), epsilon=2e-5)
- act = sym.relu(data=bn, name='%s%s_relu' % (name, suffix))
- return act
-
-def Pooling(data, kernel, stride, pad, pool_type, name):
- if pool_type == 'max':
- return sym.max_pool2d(data=data, pool_size=kernel, strides=stride, padding=pad, name=name)
- if pool_type == 'avg':
- return sym.avg_pool2d(data=data, pool_size=kernel, strides=stride, padding=pad, name=name,
- count_include_pad=True)
- raise ValueError("Invalid pooling type: " + pool_type)
-
-def Inception7A(data,
- num_1x1,
- num_3x3_red, num_3x3_1, num_3x3_2,
- num_5x5_red, num_5x5,
- pool, proj,
- name):
- tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name))
- tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv')
- tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name),
- suffix='_conv_1')
- tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv')
- tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name),
- suffix='_conv_1')
- tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name),
- suffix='_conv_2')
- pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool,
- name=('%s_pool_%s_pool' % (pool, name)))
-
- cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv')
- concat = sym.concatenate(*[tower_1x1, tower_5x5, tower_3x3, cproj],
- name='ch_concat_%s_chconcat' % name)
- return concat
-
-# First Downsample
-def Inception7B(data,
- num_3x3,
- num_d3x3_red, num_d3x3_1, num_d3x3_2,
- pool,
- name):
- tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2),
- name=('%s_conv' % name))
- tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv')
- tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1),
- name=('%s_tower' % name), suffix='_conv_1')
- tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2),
- name=('%s_tower' % name), suffix='_conv_2')
- pooling = Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0, 0), pool_type="max",
- name=('max_pool_%s_pool' % name))
- concat = sym.concatenate(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name)
- return concat
-
-def Inception7C(data,
- num_1x1,
- num_d7_red, num_d7_1, num_d7_2,
- num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4,
- pool, proj,
- name):
- tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
- tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv')
- tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3),
- name=('%s_tower' % name), suffix='_conv_1')
- tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0),
- name=('%s_tower' % name), suffix='_conv_2')
- tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv')
- tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0),
- name=('%s_tower_1' % name), suffix='_conv_1')
- tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3),
- name=('%s_tower_1' % name), suffix='_conv_2')
- tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0),
- name=('%s_tower_1' % name), suffix='_conv_3')
- tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3),
- name=('%s_tower_1' % name), suffix='_conv_4')
- pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool,
- name=('%s_pool_%s_pool' % (pool, name)))
- cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1),
- name=('%s_tower_2' % name), suffix='_conv')
- # concat
- concat = sym.concatenate(*[tower_1x1, tower_d7, tower_q7, cproj],
- name='ch_concat_%s_chconcat' % name)
- return concat
-
-def Inception7D(data,
- num_3x3_red, num_3x3,
- num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3,
- pool,
- name):
- tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name),
- suffix='_conv')
- tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2),
- name=('%s_tower' % name), suffix='_conv_1')
- tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name),
- suffix='_conv')
- tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3),
- name=('%s_tower_1' % name), suffix='_conv_1')
- tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0),
- name=('%s_tower_1' % name), suffix='_conv_2')
- tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2),
- name=('%s_tower_1' % name), suffix='_conv_3')
- pooling = Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, pad=(0, 0),
- name=('%s_pool_%s_pool' % (pool, name)))
- # concat
- concat = sym.concatenate(*[tower_3x3, tower_d7_3x3, pooling],
- name='ch_concat_%s_chconcat' % name)
- return concat
-
-def Inception7E(data,
- num_1x1,
- num_d3_red, num_d3_1, num_d3_2,
- num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2,
- pool, proj,
- name):
- tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
- tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv')
- tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1),
- name=('%s_tower' % name), suffix='_mixed_conv')
- tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0),
- name=('%s_tower' % name), suffix='_mixed_conv_1')
- tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name),
- suffix='_conv')
- tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1),
- name=('%s_tower_1' % name), suffix='_conv_1')
- tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1),
- name=('%s_tower_1' % name), suffix='_mixed_conv')
- tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0),
- name=('%s_tower_1' % name), suffix='_mixed_conv_1')
- pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool,
- name=('%s_pool_%s_pool' % (pool, name)))
- cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name),
- suffix='_conv')
- # concat
- concat = sym.concatenate(
- *[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj],
- name='ch_concat_%s_chconcat' % name)
- return concat
-
-
-def get_symbol(num_classes=1000, **kwargs):
- data = sym.Variable(name="data")
- # stage 1
- conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv")
- conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1")
- conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2")
- pool = Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", pad=(0, 0),
- name="pool")
- # stage 2
- conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3")
- conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4")
- pool1 = Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", pad=(0, 0),
- name="pool1")
-
- # stage 3
- in3a = Inception7A(pool1, 64,
- 64, 96, 96,
- 48, 64,
- "avg", 32, "mixed")
- in3b = Inception7A(in3a, 64,
- 64, 96, 96,
- 48, 64,
- "avg", 64, "mixed_1")
- in3c = Inception7A(in3b, 64,
- 64, 96, 96,
- 48, 64,
- "avg", 64, "mixed_2")
- in3d = Inception7B(in3c, 384,
- 64, 96, 96,
- "max", "mixed_3")
- # stage 4
- in4a = Inception7C(in3d, 192,
- 128, 128, 192,
- 128, 128, 128, 128, 192,
- "avg", 192, "mixed_4")
- in4b = Inception7C(in4a, 192,
- 160, 160, 192,
- 160, 160, 160, 160, 192,
- "avg", 192, "mixed_5")
- in4c = Inception7C(in4b, 192,
- 160, 160, 192,
- 160, 160, 160, 160, 192,
- "avg", 192, "mixed_6")
- in4d = Inception7C(in4c, 192,
- 192, 192, 192,
- 192, 192, 192, 192, 192,
- "avg", 192, "mixed_7")
- in4e = Inception7D(in4d, 192, 320,
- 192, 192, 192, 192,
- "max", "mixed_8")
- # stage 5
- in5a = Inception7E(in4e, 320,
- 384, 384, 384,
- 448, 384, 384, 384,
- "avg", 192, "mixed_9")
- in5b = Inception7E(in5a, 320,
- 384, 384, 384,
- 448, 384, 384, 384,
- "max", 192, "mixed_10")
- # pool
- pool = Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", pad=(0, 0),
- name="global_pool")
- flatten = sym.flatten(data=pool, name="flatten")
- fc1 = sym.dense(data=flatten, units=num_classes, name='fc1')
- softmax = sym.softmax(data=fc1, name='softmax')
- return softmax
-
-def get_workload(batch_size=1, num_classes=1000,
- image_shape=(3, 299, 299), dtype="float32", **kwargs):
- """Get benchmark workload for InceptionV3
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of classes
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- kwargs : dict
- Extra arguments
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(num_classes=num_classes, **kwargs)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Initializer of parameters."""
-import numpy as np
-
-class Initializer(object):
- """The base class of an initializer."""
- def __init__(self, **kwargs):
- self._kwargs = kwargs
-
- def __call__(self, desc, arr):
- """Initialize an array
-
- Parameters
- ----------
- desc : str
- Initialization pattern descriptor.
-
- arr : NDArray
- The array to be initialized.
- """
- if desc.endswith('weight'):
- self._init_weight(desc, arr)
- elif desc.endswith('bias'):
- self._init_bias(desc, arr)
- elif desc.endswith('gamma'):
- self._init_gamma(desc, arr)
- elif desc.endswith('beta'):
- self._init_beta(desc, arr)
- elif desc.endswith('mean'):
- self._init_mean(desc, arr)
- elif desc.endswith('var'):
- self._init_var(desc, arr)
- else:
- self._init_default(desc, arr)
-
- def _init_bias(self, _, arr):
- arr[:] = 0.0
-
- def _init_gamma(self, _, arr):
- arr[:] = 1.0
-
- def _init_beta(self, _, arr):
- arr[:] = 0.0
-
- def _init_mean(self, _, arr):
- arr[:] = 0.0
-
- def _init_var(self, _, arr):
- arr[:] = 1.0
-
- def _init_weight(self, name, arr):
- """Abstract method to Initialize weight."""
- raise NotImplementedError("Must override it")
-
- def _init_default(self, name, _):
- raise ValueError(
- 'Unknown initialization pattern for %s. ' \
- 'Default initialization is now limited to '\
- '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \
- 'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)
-
-
-class Xavier(Initializer):
- """ "Xavier" initialization for weights
-
- Parameters
- ----------
- rnd_type: str, optional
- Random generator type, can be ``'gaussian'`` or ``'uniform'``.
-
- factor_type: str, optional
- Can be ``'avg'``, ``'in'``, or ``'out'``.
-
- magnitude: float, optional
- Scale of random number.
- """
- def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3):
- super(Xavier, self).__init__(rnd_type=rnd_type,
- factor_type=factor_type,
- magnitude=magnitude)
- self.rnd_type = rnd_type
- self.factor_type = factor_type
- self.magnitude = float(magnitude)
-
- def _init_weight(self, name, arr):
- shape = arr.shape
- hw_scale = 1.
- if len(shape) < 2:
- raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at'
- ' least 2D.'.format(name))
- if len(shape) > 2:
- hw_scale = np.prod(shape[2:])
- fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale
- factor = 1.
- if self.factor_type == "avg":
- factor = (fan_in + fan_out) / 2.0
- elif self.factor_type == "in":
- factor = fan_in
- elif self.factor_type == "out":
- factor = fan_out
- else:
- raise ValueError("Incorrect factor type")
- # Hack for mobilenet, because there is less connectivity
- if "depthwise" in name:
- factor = 3 * 3
- scale = np.sqrt(self.magnitude / factor)
- if self.rnd_type == "uniform":
- arr[:] = np.random.uniform(-scale, scale, size=arr.shape)
- else:
- raise ValueError("Unknown random type")
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-a simple multilayer perceptron
-"""
-from .. import symbol as sym
-from . utils import create_workload
-
-def get_symbol(num_classes=1000):
- data = sym.Variable('data')
- data = sym.flatten(data=data)
- fc1 = sym.dense(data=data, name='fc1', units=128)
- act1 = sym.relu(data=fc1, name='relu1')
- fc2 = sym.dense(data=act1, name='fc2', units=64)
- act2 = sym.relu(data=fc2, name='relu2')
- fc3 = sym.dense(data=act2, name='fc3', units=num_classes)
- mlp = sym.softmax(data=fc3, name='softmax')
- return mlp
-
-def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224), dtype="float32"):
- """Get benchmark workload for a simple multilayer perceptron
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of claseses
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- Returns
- -------
- net : nnvm.symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(num_classes=num_classes)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Helper utility to get mobilenet workload for testing."""
-# pylint: disable=invalid-name
-from __future__ import absolute_import as _abs
-
-from .. import symbol as sym
-from . utils import create_workload
-
-def conv_block(data, name, channels,
- kernel_size=(3, 3), strides=(1, 1), padding=(1, 1),
- epsilon=1e-5):
- """Helper function to construct conv-bn-relu"""
- # convolution + bn + relu
- conv = sym.conv2d(data=data, channels=channels,
- kernel_size=kernel_size, strides=strides,
- padding=padding, use_bias=False,
- layout="NCHW", name=name + "_conv")
- bn = sym.batch_norm(data=conv, epsilon=epsilon, name=name + "_bn")
- act = sym.relu(data=bn, name=name + "_relu")
- return act
-
-def separable_conv_block(data, name, depthwise_channels,
- pointwise_channels, kernel_size=(3, 3),
- downsample=False, padding=(1, 1),
- epsilon=1e-5):
- """Helper function to get a separable conv block"""
- if downsample:
- strides = (2, 2)
- else:
- strides = (1, 1)
- # depthwise convolution + bn + relu
- conv1 = sym.conv2d(data=data, channels=depthwise_channels,
- groups=depthwise_channels, kernel_size=kernel_size, strides=strides,
- padding=padding, use_bias=False, layout="NCHW",
- name=name + "_depthwise_conv1")
- bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1")
- act1 = sym.relu(data=bn1, name=name + "_relu1")
- # pointwise convolution + bn + relu
- conv2 = sym.conv2d(data=act1, channels=pointwise_channels, kernel_size=(1, 1), strides=(1, 1),
- padding=(0, 0), use_bias=False, layout="NCHW", name=name + "_conv2")
- bn2 = sym.batch_norm(data=conv2, epsilon=epsilon, name=name + "_bn2")
- act2 = sym.relu(data=bn2, name=name + "_relu2")
- return act2
-
-def mobile_net(num_classes=1000, alpha=1.0, is_shallow=False):
- """Function to construct a MobileNet"""
- data = sym.Variable("data")
- body = conv_block(data, "conv_block_1", int(32*alpha), strides=(2, 2))
- body = separable_conv_block(body, "separable_conv_block_1",
- int(32*alpha), int(64*alpha))
- body = separable_conv_block(body, "separable_conv_block_2",
- int(64*alpha), int(128*alpha), downsample=True)
- body = separable_conv_block(body, "separable_conv_block_3",
- int(128*alpha), int(128*alpha))
- body = separable_conv_block(body, "separable_conv_block_4",
- int(128*alpha), int(256*alpha), downsample=True)
- body = separable_conv_block(body, "separable_conv_block_5",
- int(256*alpha), int(256*alpha))
- body = separable_conv_block(body, "separable_conv_block_6",
- int(256*alpha), int(512*alpha), downsample=True)
- if is_shallow:
- body = separable_conv_block(body, "separable_conv_block_7",
- int(512*alpha), int(1024*alpha), downsample=True)
- body = separable_conv_block(body, "separable_conv_block_8",
- int(1024*alpha), int(1024*alpha))
- else:
- for i in range(7, 12):
- body = separable_conv_block(body, "separable_conv_block_%d" % i,
- int(512*alpha), int(512*alpha))
- body = separable_conv_block(body, "separable_conv_block_12",
- int(512*alpha), int(1024*alpha), downsample=True)
- body = separable_conv_block(body, "separable_conv_block_13",
- int(1024*alpha), int(1024*alpha))
- pool = sym.global_avg_pool2d(data=body, name="pool")
- flatten = sym.flatten(data=pool, name="flatten")
- fc = sym.dense(data=flatten, units=num_classes, use_bias=False, name="fc")
- softmax = sym.softmax(data=fc, name="softmax")
- return softmax
-
-
-def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224), dtype="float32"):
- """Get benchmark workload for mobilenet
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of classes
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- net = mobile_net(num_classes=num_classes, alpha=1.0, is_shallow=False)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-MobileNetV2, load model from gluon model zoo
-
-Reference:
-Inverted Residuals and Linear Bottlenecks:
-Mobile Networks for Classification, Detection and Segmentation
-https://arxiv.org/abs/1801.04381
-"""
-
-from .utils import create_workload
-from ..frontend.mxnet import _from_mxnet_impl
-
-def get_workload(batch_size, num_classes=1000, multiplier=1.0, dtype="float32"):
- """Get benchmark workload for mobilenet
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of classes
-
- multiplier : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- import mxnet as mx
- from mxnet.gluon.model_zoo.vision.mobilenet import MobileNetV2
-
- image_shape = (1, 3, 224, 224)
-
- block = MobileNetV2(multiplier=multiplier, classes=num_classes)
-
- data = mx.sym.Variable('data')
- sym = block(data)
- sym = mx.sym.SoftmaxOutput(sym)
-
- net = _from_mxnet_impl(sym, {})
-
- return create_workload(net, batch_size, image_shape[1:], dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-'''
-Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
-Original author Wei Wu
-
-Implemented the following paper:
-
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
-'''
-# pylint: disable=unused-argument
-from .. import symbol as sym
-from . utils import create_workload
-
-def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True):
- """Return ResNet Unit symbol for building ResNet
- Parameters
- ----------
- data : str
- Input data
- num_filter : int
- Number of output channels
- bnf : int
- Bottle neck channels factor with regard to num_filter
- stride : tuple
- Stride used in convolution
- dim_match : Boolean
- True means channel number between input and output is the same,
- otherwise means differ
- name : str
- Base name of the operators
- """
- if bottle_neck:
- bn1 = sym.batch_norm(data=data, epsilon=2e-5, name=name + '_bn1')
- act1 = sym.relu(data=bn1, name=name + '_relu1')
- conv1 = sym.conv2d(
- data=act1, channels=int(num_filter*0.25), kernel_size=(1, 1),
- strides=stride, padding=(0, 0), use_bias=False, name=name + '_conv1')
- bn2 = sym.batch_norm(data=conv1, epsilon=2e-5, name=name + '_bn2')
- act2 = sym.relu(data=bn2, name=name + '_relu2')
- conv2 = sym.conv2d(
- data=act2, channels=int(num_filter*0.25), kernel_size=(3, 3),
- strides=(1, 1), padding=(1, 1), use_bias=False, name=name + '_conv2')
- bn3 = sym.batch_norm(data=conv2, epsilon=2e-5, name=name + '_bn3')
- act3 = sym.relu(data=bn3, name=name + '_relu3')
- conv3 = sym.conv2d(
- data=act3, channels=num_filter, kernel_size=(1, 1),
- strides=(1, 1), padding=(0, 0), use_bias=False, name=name + '_conv3')
- if dim_match:
- shortcut = data
- else:
- shortcut = sym.conv2d(
- data=act1, channels=num_filter, kernel_size=(1, 1),
- strides=stride, use_bias=False, name=name+'_sc')
- return sym.elemwise_add(conv3, shortcut)
- else:
- bn1 = sym.batch_norm(data=data, epsilon=2e-5, name=name + '_bn1')
- act1 = sym.relu(data=bn1, name=name + '_relu1')
- conv1 = sym.conv2d(
- data=act1, channels=num_filter, kernel_size=(3, 3),
- strides=stride, padding=(1, 1), use_bias=False, name=name + '_conv1')
- bn2 = sym.batch_norm(data=conv1, epsilon=2e-5, name=name + '_bn2')
- act2 = sym.relu(data=bn2, name=name + '_relu2')
- conv2 = sym.conv2d(
- data=act2, channels=num_filter, kernel_size=(3, 3),
- strides=(1, 1), padding=(1, 1), use_bias=False, name=name + '_conv2')
- if dim_match:
- shortcut = data
- else:
- shortcut = sym.conv2d(
- data=act1, channels=num_filter, kernel_size=(1, 1),
- strides=stride, use_bias=False, name=name+'_sc')
- return sym.elemwise_add(conv2, shortcut)
-
-def resnet(units, num_stages, filter_list, num_classes, image_shape,
- bottle_neck=True):
- """Return ResNet symbol of
- Parameters
- ----------
- units : list
- Number of units in each stage
- num_stages : int
- Number of stage
- filter_list : list
- Channel size of each stage
- num_classes : int
- Ouput size of symbol
- dataset : str
- Dataset type, only cifar10 and imagenet supports
- """
- num_unit = len(units)
- assert num_unit == num_stages
- data = sym.Variable(name='data')
- data = sym.batch_norm(data=data, epsilon=2e-5, scale=False, name='bn_data')
- (_, height, _) = image_shape
- if height <= 32: # such as cifar10
- body = sym.conv2d(
- data=data, channels=filter_list[0], kernel_size=(3, 3),
- strides=(1, 1), padding=(1, 1), use_bias=False, name="conv0")
- else: # often expected to be 224 such as imagenet
- body = sym.conv2d(
- data=data, channels=filter_list[0], kernel_size=(7, 7),
- strides=(2, 2), padding=(3, 3), use_bias=False, name="conv0")
- body = sym.batch_norm(data=body, epsilon=2e-5, name='bn0')
- body = sym.relu(data=body, name='relu0')
- body = sym.max_pool2d(data=body, pool_size=(3, 3), strides=(2, 2), padding=(1, 1))
-
- for i in range(num_stages):
- body = residual_unit(
- body, filter_list[i+1], (1 if i == 0 else 2, 1 if i == 0 else 2),
- False, name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck)
- for j in range(units[i]-1):
- body = residual_unit(
- body, filter_list[i+1], (1, 1), True,
- name='stage%d_unit%d' % (i + 1, j + 2), bottle_neck=bottle_neck)
- bn1 = sym.batch_norm(data=body, epsilon=2e-5, name='bn1')
- relu1 = sym.relu(data=bn1, name='relu1')
- # Although kernel is not used here when global_pool=True, we should put one
- pool1 = sym.global_avg_pool2d(data=relu1, name='pool1')
- flat = sym.flatten(data=pool1)
- fc1 = sym.dense(data=flat, units=num_classes, name='fc1')
- return sym.softmax(data=fc1, name='softmax')
-
-def get_symbol(num_classes, num_layers=50, image_shape=(3, 224, 224), **kwargs):
- """
- Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
- Original author Wei Wu
- """
- (_, height, _) = image_shape
- if height <= 28:
- num_stages = 3
- if (num_layers-2) % 9 == 0 and num_layers >= 164:
- per_unit = [(num_layers-2)//9]
- filter_list = [16, 64, 128, 256]
- bottle_neck = True
- elif (num_layers-2) % 6 == 0 and num_layers < 164:
- per_unit = [(num_layers-2)//6]
- filter_list = [16, 16, 32, 64]
- bottle_neck = False
- else:
- raise ValueError("no experiments done on num_layers {}".format(num_layers))
- units = per_unit * num_stages
- else:
- if num_layers >= 50:
- filter_list = [64, 256, 512, 1024, 2048]
- bottle_neck = True
- else:
- filter_list = [64, 64, 128, 256, 512]
- bottle_neck = False
- num_stages = 4
- if num_layers == 18:
- units = [2, 2, 2, 2]
- elif num_layers == 34:
- units = [3, 4, 6, 3]
- elif num_layers == 50:
- units = [3, 4, 6, 3]
- elif num_layers == 101:
- units = [3, 4, 23, 3]
- elif num_layers == 152:
- units = [3, 8, 36, 3]
- elif num_layers == 200:
- units = [3, 24, 36, 3]
- elif num_layers == 269:
- units = [3, 30, 48, 8]
- else:
- raise ValueError("no experiments done on num_layers {}".format(num_layers))
-
- return resnet(units=units,
- num_stages=num_stages,
- filter_list=filter_list,
- num_classes=num_classes,
- image_shape=image_shape,
- bottle_neck=bottle_neck)
-
-def get_workload(batch_size=1, num_classes=1000, num_layers=18,
- image_shape=(3, 224, 224), dtype="float32", **kwargs):
- """Get benchmark workload for resnet
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of classes
-
- num_layers : int, optional
- Number of layers
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- kwargs : dict
- Extra arguments
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(num_classes=num_classes, num_layers=num_layers,
- image_shape=image_shape, **kwargs)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=unused-argument
-
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-from .. import symbol as sym
-from . utils import create_workload
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
- net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
- left = _make_fire_conv(net, expand1x1_channels, 1, 0)
- right = _make_fire_conv(net, expand3x3_channels, 3, 1)
- # NOTE : Assume NCHW layout here
- net = sym.concatenate(left, right, axis=1)
-
- return net
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
- net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size),
- padding=(padding, padding))
- net = sym.relu(net)
- return net
-
-# Net
-def get_symbol(num_classes, version, **kwargs):
- """Get symbol of SqueezeNet
-
- Parameters
- ----------
- num_classes: int
- The number of classification results
-
- version : str, optional
- "1.0" or "1.1" of SqueezeNet
- """
- assert version in ['1.0', '1.1'], ("Unsupported SqueezeNet version {version}:"
- "1.0 or 1.1 expected".format(version=version))
- net = sym.Variable("data")
- if version == '1.0':
- net = sym.conv2d(net, channels=96, kernel_size=(7, 7), strides=(2, 2), padding=(3, 3))
- net = sym.relu(net)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 16, 64, 64)
- net = _make_fire(net, 16, 64, 64)
- net = _make_fire(net, 32, 128, 128)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 32, 128, 128)
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 64, 256, 256)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 64, 256, 256)
- else:
- net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1))
- net = sym.relu(net)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 16, 64, 64)
- net = _make_fire(net, 16, 64, 64)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 32, 128, 128)
- net = _make_fire(net, 32, 128, 128)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 64, 256, 256)
- net = _make_fire(net, 64, 256, 256)
- net = sym.dropout(net, rate=0.5)
- net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1))
- net = sym.relu(net)
- net = sym.global_avg_pool2d(net)
- net = sym.flatten(net)
- return sym.softmax(net)
-
-def get_workload(batch_size=1, num_classes=1000, version='1.0',
- image_shape=(3, 224, 224), dtype="float32", **kwargs):
- """Get benchmark workload for SqueezeNet
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of classes
-
- version : str, optional
- "1.0" or "1.1" of SqueezeNet
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- kwargs : dict
- Extra arguments
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(num_classes=num_classes, version=version, **kwargs)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Helper utility to create common workload for testing."""
-from __future__ import absolute_import as _abs
-
-import numpy as np
-import tvm
-from ..compiler import graph_util
-from ..import graph
-from . init import Xavier
-
-def create_workload(net, batch_size, image_shape=(3, 224, 224),
- dtype="float32", initializer=None, seed=0):
- """Helper function to create benchmark workload for input network
-
- Parameters
- ----------
- net : nnvm.Symbol
- The selected network symbol to use
-
- batch_size : int
- The batch size used in the model
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- initializer : Initializer
- The initializer used
-
- seed : int
- The seed used in initialization.
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- if image_shape is None:
- image_shape = (3, 224, 224)
- data_shape = (batch_size,) + image_shape
- params = {}
- g = graph.create(net)
- input_shapes, _ = graph_util.infer_shape(g, data=data_shape)
- shape_dict = dict(zip(g.index.input_names, input_shapes))
- np.random.seed(seed)
- initializer = initializer if initializer else Xavier()
- for k, v in shape_dict.items():
- if k == "data":
- continue
- init_value = np.zeros(v).astype(dtype)
- initializer(k, init_value)
- params[k] = tvm.nd.array(init_value, ctx=tvm.cpu(0))
- return net, params
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""References:
-
-Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for
-large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
-"""
-from .. import symbol as sym
-from . utils import create_workload
-
-def get_feature(internel_layer, layers, filters, batch_norm=False):
- """Get VGG feature body as stacks of convoltions."""
- for i, num in enumerate(layers):
- for j in range(num):
- internel_layer = sym.conv2d(
- data=internel_layer, kernel_size=(3, 3), padding=(1, 1),
- channels=filters[i], name="conv%s_%s"%(i + 1, j + 1))
- if batch_norm:
- internel_layer = sym.batch_norm(
- data=internel_layer, name="bn%s_%s" %(i + 1, j + 1))
- internel_layer = sym.relu(data=internel_layer, name="relu%s_%s" %(i + 1, j + 1))
- internel_layer = sym.max_pool2d(
- data=internel_layer, pool_size=(2, 2), strides=(2, 2), name="pool%s"%(i + 1))
- return internel_layer
-
-def get_classifier(input_data, num_classes):
- """Get VGG classifier layers as fc layers."""
- flatten = sym.flatten(data=input_data, name="flatten")
- fc6 = sym.dense(data=flatten, units=4096, name="fc6")
- relu6 = sym.relu(data=fc6, name="relu6")
- drop6 = sym.dropout(data=relu6, rate=0.5, name="drop6")
- fc7 = sym.dense(data=drop6, units=4096, name="fc7")
- relu7 = sym.relu(data=fc7, name="relu7")
- drop7 = sym.dropout(data=relu7, rate=0.5, name="drop7")
- fc8 = sym.dense(data=drop7, units=num_classes, name="fc8")
- return fc8
-
-def get_symbol(num_classes, num_layers=11, batch_norm=False):
- """
- Parameters
- ----------
- num_classes : int, default 1000
- Number of classification classes.
- num_layers : int
- Number of layers for the variant of densenet. Options are 11, 13, 16, 19.
- batch_norm : bool, default False
- Use batch normalization.
- """
- vgg_spec = {11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]),
- 13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]),
- 16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
- 19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512])}
- if num_layers not in vgg_spec:
- raise ValueError("Invalide num_layers {}. Choices are 11,13,16,19.".format(num_layers))
- layers, filters = vgg_spec[num_layers]
- data = sym.Variable(name="data")
- feature = get_feature(data, layers, filters, batch_norm)
- classifier = get_classifier(feature, num_classes)
- symbol = sym.softmax(data=classifier, name='softmax')
- return symbol
-
-def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224),
- dtype="float32", **kwargs):
- """Get benchmark workload for VGG nets.
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of claseses
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- kwargs : dict
- Extra arguments
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(num_classes=num_classes, **kwargs)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, unused-argument
-"""Convert an NNVM graph to Relay."""
-import numpy
-
-from tvm import relay, nd
-from tvm.relay import op, expr, var
-from tvm.relay.frontend.common import StrAttrsDict
-from tvm.relay.frontend.nnvm_common import _rename, _binop_scalar, _rbinop_scalar, \
- _elemwise_sum, _softmax_op, _compare, _reduce
-from .symbol import Symbol
-from .compiler import graph_attr
-from .graph import create as graph_create
-
-def _nn_batch_flatten(children, attrs, odtype='float32'):
- assert len(children) == 1
- return op.nn.batch_flatten(children[0])
-
-
-def _dense(children, attrs, odtype='float32'):
- use_bias = attrs.get_bool('use_bias', True)
- units = attrs.get_int('units')
- dense = op.nn.dense(children[0], children[1], units=units)
- if use_bias:
- return op.nn.bias_add(dense, children[2])
- else:
- return dense
-
-def _conv2d(children, attrs, odtype='float32'):
- use_bias = attrs.get_bool('use_bias', True)
-
- if use_bias:
- data, weight, bias = children
- else:
- data, weight = children
-
- kernel_size = attrs.get_int_tuple('kernel_size')
- channels = attrs.get_int('channels')
- strides = attrs.get_int_tuple('strides', (1, 1))
- padding = attrs.get_int_tuple('padding', (0, 0))
- dilation = attrs.get_int_tuple('dilation', (1, 1))
- groups = attrs.get_int('groups', 1)
- data_layout = attrs.get_str('layout', 'NCHW')
- kernel_layout = attrs.get_str('kernel_layout', 'OIHW')
- out_layout = ''
- out_dtype = attrs.get_str('out_dtype', '')
-
- conv_out = op.nn.conv2d(
- data,
- weight,
- kernel_size=kernel_size,
- channels=channels,
- strides=strides,
- padding=padding,
- dilation=dilation,
- groups=groups,
- data_layout=data_layout,
- kernel_layout=kernel_layout,
- out_layout=out_layout,
- out_dtype=out_dtype)
-
- if use_bias:
- return op.nn.bias_add(conv_out, bias)
- else:
- return conv_out
-
-
-def _conv2d_transpose(children, attrs, odtype='float32'):
- use_bias = attrs.get_bool('use_bias', False)
-
- if use_bias:
- data, weight, bias = children
- else:
- data, weight = children
-
- strides = attrs.get_int_tuple('strides', (1, 1))
- padding = attrs.get_int_tuple('padding', (0, 0))
- dilation = attrs.get_int_tuple('dilation', (1, 1))
- groups = attrs.get_int('groups', 1)
- data_layout = attrs.get_str('layout', 'NCHW')
- kernel_layout = attrs.get_str('kernel_layout', 'OIHW')
- out_dtype = attrs.get_str('out_dtype', '')
-
- out_conv2d = op.nn.conv2d_transpose(
- data,
- weight,
- strides=strides,
- padding=padding,
- dilation=dilation,
- groups=groups,
- data_layout=data_layout,
- kernel_layout=kernel_layout,
- out_dtype=out_dtype)
-
- if use_bias:
- return op.nn.bias_add(out_conv2d, bias)
- else:
- return out_conv2d
-
-
-def _batch_norm(children, attrs, odtype='float32'):
- data, gamma, beta, moving_mean, moving_view = children
- axis = attrs.get_int('axis', 1)
- epsilon = attrs.get_float('epsilon', 1e-05)
- center = attrs.get_bool('center', True)
- scale = attrs.get_bool('scale', True)
-
- return op.nn.batch_norm(
- data,
- gamma,
- beta,
- moving_mean,
- moving_view,
- axis=axis,
- epsilon=epsilon,
- center=center,
- scale=scale)[0]
-
-
-def _max_pool2d(children, attrs, odtype='float32'):
- assert len(children) == 1
- data = children[0]
- pool_size = attrs.get_int_tuple('pool_size', (1, 1))
- strides = attrs.get_int_tuple('strides', (1, 1))
- padding = attrs.get_int_tuple('padding', (0, 0))
- layout = attrs.get_str('layout', 'NCHW')
- ceil_mode = attrs.get_bool('ceil_mode', False)
-
- return op.nn.max_pool2d(
- data,
- pool_size=pool_size,
- strides=strides,
- padding=padding,
- layout=layout,
- ceil_mode=ceil_mode)
-
-
-def _reshape(children, attrs, odtype='float32'):
- data = children[0]
- shape = attrs.get_int_list('shape')
- return op.reshape(data, shape)
-
-
-def _transpose(children, attrs, odtype='float32'):
- axes = attrs.get_int_list('axes', None)
- return op.transpose(children[0], axes=axes)
-
-
-def _clip(children, attrs, odtype='float32'):
- a_min = attrs.get_float('a_min')
- a_max = attrs.get_float('a_max')
- return op.clip(children[0], a_min, a_max)
-
-
-def _cast(children, attrs, odtype='float32'):
- data = children[0]
- dtype = attrs.get_str('dtype')
- return data.astype(dtype)
-
-
-def _expand_dims(children, attrs, odtype='float32'):
- data = children[0]
- axis = attrs.get_int('axis')
- num_newaxis = attrs.get_int('num_newaxis', 1)
- return op.transform.expand_dims(data, axis, num_newaxis=num_newaxis)
-
-
-def broadcast_to(children, attrs, odtype='float32'):
- # TODO(@jroesch) export broadcast to?
- data = children[0]
- shape = attrs.get_int_tuple('shape')
- array = numpy.zeros(shape).astype(odtype)
- rconst = relay.Constant(nd.array(array))
- return op.broadcast_to_like(data, rconst)
-
-
-def _global_avg_pool2d(children, attrs, odtype='float32'):
- data = children[0]
- layout = attrs.get_str('layout', "NCHW")
- return op.nn.global_avg_pool2d(data, layout)
-
-
-def _avg_pool2d(children, attrs, odtype='float32'):
- data = children[0]
- pool_size = attrs.get_int_tuple('pool_size', (1, 1))
- strides = attrs.get_int_tuple('strides', (1, 1))
- padding = attrs.get_int_tuple('padding', (0, 0))
- layout = attrs.get_str('layout', "NCHW")
- ceil_mode = attrs.get_bool('ceil_mode', False)
- count_include_pad = attrs.get_bool('layout', False)
- return op.nn.avg_pool2d(
- data,
- pool_size=pool_size,
- strides=strides,
- padding=padding,
- layout=layout,
- ceil_mode=ceil_mode,
- count_include_pad=count_include_pad)
-
-
-def _upsampling(children, attrs, odtype='float32'):
- scale = attrs.get_int('scale')
- layout = attrs.get_str('layout', 'NCHW')
- method = attrs.get_str('method', 'NEAREST_NEIGHBOR')
- return op.nn.upsampling(
- children[0],
- scale_h=scale,
- scale_w=scale,
- layout=layout,
- method=method)
-
-
-def _pad(children, attrs, odtype='float32'):
- pad_value = attrs.get_float('pad_value', 0.0)
- pad_width = attrs.get_tuple_tuple_int('pad_width')
- return op.nn.pad(children[0], pad_width, pad_value=pad_value)
-
-def _leaky_relu(children, attrs, odtype='float32'):
- alpha = attrs.get_float('alpha')
- return op.nn.leaky_relu(children[0], alpha)
-
-
-def _full_like(children, attrs, odtype='float32'):
- fill_value = relay.const(attrs.get_float('fill_value'), dtype='float32')
- return op.full_like(children[0], fill_value)
-
-
-def _strided_slice(children, attrs, odtype='float32'):
- begin = attrs.get_int_list('begin')
- end = attrs.get_int_list('end')
- strides = attrs.get_int_list('stride', None)
- return op.strided_slice(children[0], begin, end, strides=strides)
-
-
-def _split(children, attrs, odtype='float32'):
- indices_or_sections = None
- try:
- indices_or_sections = attrs.get_int('indices_or_sections', None)
- except ValueError:
- indices_or_sections = indices_or_sections or attrs.get_int_tuple(
- 'indices_or_sections')
-
- axis = attrs.get_int('axis', 0)
-
- return op.split(children[0], indices_or_sections, axis)
-
-def _squeeze(children, attrs, odtype='float32'):
- axis = attrs.get_int_tuple('axis', None)
- axis = [axis] if isinstance(axis, int) else axis
-
- return op.squeeze(children[0], axis)
-
-def _concatenate(children, attrs, odtype='float32'):
- axis = attrs.get_int('axis', 1)
- return op.concatenate(children, axis)
-
-def _dropout(children, attrs, odtype='float32'):
- rate = attrs.get_float('rate', 0.5)
- return op.nn.dropout(children[0], rate)
-
-def _mean(children, attrs, odtype='float32'):
- axis = attrs.get_int_tuple('axis', None)
- keepdims = attrs.get_bool('keepdims')
-
- return op.mean(children[0], axis, keepdims)
-
-
-def _prelu(children, attrs, odtype='float32'):
- axis = attrs.get_int('axis', 1)
- return op.nn.prelu(children[0], children[1], axis)
-
-
-def _lrn(children, attrs, odtype='float32'):
- size = attrs.get_int("size", 5)
- axis = attrs.get_int("axis", 1)
- bias = attrs.get_float("bias", 2)
- alpha = attrs.get_float("alpha", 1e-05)
- beta = attrs.get_float("beta", 0.75)
- return op.nn.lrn(children[0], size, axis, bias, alpha, beta)
-
-
-def _l2_nomalize(children, attrs, odtype='float32'):
- eps = attrs.get_float('eps')
- axis = attrs.get_int_tuple('axis', None)
- return op.nn.l2_normalize(children[0], eps, axis)
-
-
-def _take(children, attrs, odtype='float32'):
- axis = attrs.get_int('axis', None)
- return op.take(children[0], children[1], axis)
-
-
-def _matmul(children, attrs, odtype='float32'):
- input_1_t = op.transpose(children[1], axes=(1, 0))
- return op.nn.dense(children[0], input_1_t)
-
-
-def _collapse_sum(children, attrs, odtype='float32'):
- for key in ["axis", "keepdims", "exclude"]:
- if key in attrs.attrs:
- raise NotImplementedError("Parameter '" + key + "' is not supported.")
- return op.collapse_sum_like(children[0], children[1])
-
-
-def _not_implemented(new_op):
- def _impl(children, attrs, odtype='float32'):
- raise NotImplementedError(str(new_op) + " is not implemented.")
- return _impl
-
-
-NNVM_OP_2_RELAY_OP = {
- 'flatten': _nn_batch_flatten,
- 'dense': _dense,
- 'softmax': _softmax_op(op.nn.softmax),
- 'log_softmax': _softmax_op(op.nn.log_softmax),
- 'conv2d': _conv2d,
- 'batch_norm': _batch_norm,
- 'max_pool2d': _max_pool2d,
- 'reshape': _reshape,
- 'transpose': _transpose,
- 'dropout': _dropout,
- 'mean': _mean,
- # Addition
- '__add_scalar__': _binop_scalar(op.add),
- 'broadcast_add' : _rename(op.add),
- 'elemwise_add' : _rename(op.add),
- # Subtraction
- '__sub_scalar__' : _binop_scalar(op.subtract),
- '__rsub_scalar__': _rbinop_scalar(op.subtract),
- 'broadcast_sub' : _rename(op.subtract),
- 'elemwise_sub' : _rename(op.subtract),
- # Multiply
- '__mul_scalar__': _binop_scalar(op.multiply),
- 'broadcast_mul' : _rename(op.multiply),
- 'elemwise_mul' : _rename(op.multiply),
- # Division
- '__div_scalar__': _binop_scalar(op.divide),
- 'broadcast_div' : _rename(op.divide),
- 'elemwise_div' : _rename(op.divide),
- 'broadcast_mod' : _rename(op.mod),
- # Negative
- 'negative': _rename("negative"),
- # Power
- '__pow_scalar__': _binop_scalar(op.power),
- '__rpow_scalar__': _rbinop_scalar(op.power),
- 'broadcast_pow': _rename(op.power),
- # Sum
- 'sum': _reduce(op.sum),
- 'elemwise_sum': _elemwise_sum,
- 'collapse_sum': _collapse_sum,
- 'broadcast_max': _rename(op.maximum),
- 'broadcast_min': _rename(op.minimum),
-
- # Comparsion
- 'greater': _compare(op.greater),
- 'broadcast_greater': _compare(op.greater),
- 'greater_equal': _compare(op.greater_equal),
- 'broadcast_greater_equal': _compare(op.greater_equal),
- 'less': _compare(op.less),
- 'broadcast_less': _compare(op.less),
- 'less_equal': _compare(op.less_equal),
- 'broadcast_less_equal': _compare(op.less_equal),
- 'broadcast_equal': _compare(op.equal),
- 'broadcast_not_equal': _compare(op.not_equal),
-
- # Activations
- 'sigmoid': _rename('sigmoid'),
- 'relu': _rename('nn.relu'),
- 'exp': _rename('exp'),
- 'log': _rename('log'),
- 'tanh': _rename('tanh'),
- 'leaky_relu': _leaky_relu,
- 'prelu': _prelu,
- 'clip': _clip,
- 'round': _rename('round'),
- 'cast': _cast,
- 'expand_dims': _expand_dims,
- 'broadcast_to': broadcast_to,
- '__lshift_scalar__': _binop_scalar(op.left_shift),
- '__rshift_scalar__': _binop_scalar(op.right_shift),
- 'broadcast_left_shift': _rename(op.left_shift),
- 'broadcast_right_shift': _rename(op.right_shift),
- 'copy': _rename(op.copy),
- 'global_avg_pool2d': _global_avg_pool2d,
- 'avg_pool2d': _avg_pool2d,
- 'conv2d_transpose': _conv2d_transpose,
- 'upsampling': _upsampling,
- 'pad': _pad,
- 'full_like': _full_like,
- 'strided_slice': _strided_slice,
- 'split': _split,
- 'squeeze': _squeeze,
- 'concatenate': _concatenate,
- 'abs': _rename(op.abs),
- 'ceil': _rename(op.ceil),
- 'floor': _rename(op.floor),
- 'trunc': _rename(op.trunc),
- 'take': _take,
- 'lrn': _lrn,
- 'l2_normalize': _l2_nomalize,
- 'matmul': _matmul,
- 'zeros_like': _rename(op.zeros_like),
- 'reshape_like': _rename(op.reshape_like),
- 'ones_like': _rename(op.ones_like),
-
- 'expand_like': _not_implemented("expand_like"),
- 'gather_nd': _not_implemented("gather_nd"),
- 'block_grad': _not_implemented("block_grad"),
-}
-
-
-def to_relay(graph, shape_dict, dtype_dict, params):
- """Convert an NNVM graph into the corresponding Relay expression.
-
- Parameters
- ----------
- graph : Graph
- The input graph.
-
- shape_dict : dict of str to shape
- The input shape.
-
- dtype_dict : dict of str to str/dtype
- The input shape.
-
- params : dict of str to array
- The parameters.
-
- Returns
- -------
- (expr, params) : Tuple[relay.Expr, dict of str to array]
- The corresponding Relay expression and parameters.
- """
- if isinstance(graph, Symbol):
- graph = graph_create(graph)
-
- param_shapes = dict((k, params[k].shape) for k in params)
- shape_dict = shape_dict.copy()
- shape_dict.update(param_shapes)
- graph = graph_attr.set_shape_inputs(graph, shape_dict)
- graph = graph_attr.set_dtype_inputs(graph, dtype_dict)
- graph = graph.apply(["InferShape", "InferType"])
- shape = graph.json_attr("shape")
- dtype = [graph_attr.TCODE_TO_DTYPE[di] for di in graph.json_attr("dtype")]
-
- gidx = graph.index
- relay_map = {}
- fn_params = []
-
- for nid, node in enumerate(gidx.nodes):
- children = []
- for i in node['inputs']:
- child = relay_map[i[0]]
- if isinstance(child, expr.TupleWrapper):
- children.append(child[i[1]])
- else:
- children.append(child)
-
- oshape = shape[gidx.entry_id(nid, 0)]
- odtype = dtype[gidx.entry_id(nid, 0)]
- attrs = node.get("attrs", {})
- node_name = node["name"]
- op_name = node["op"]
-
- if op_name == "null":
- v = var(node_name, shape=oshape, dtype=odtype)
- fn_params.append(v)
- relay_map[nid] = v
- else:
- if op_name in NNVM_OP_2_RELAY_OP:
- str_attrs = StrAttrsDict(attrs)
- call = NNVM_OP_2_RELAY_OP[op_name](children, str_attrs, odtype)
- relay_map[nid] = call
- else:
- raise Exception(
- "nnvm.to_relay: unsupported operator: {0}".format(op_name))
-
- outputs = []
- for nid, idx, _ in gidx.output_entries:
- output = relay_map[nid]
- if isinstance(output, expr.TupleWrapper):
- outputs.append(output[idx])
- else:
- outputs.append(output)
-
- if len(outputs) == 1:
- body = outputs[0]
- else:
- body = expr.Tuple(outputs)
-
- func = relay.Function(fn_params, body)
- return func, params
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Tensor operator property registry
-
-Provide information to lower and schedule tensor operators.
-"""
-from .attr_dict import AttrDict
-from . import tensor
-from . import nn
-from . import transform
-from . import reduction
-from . import vision
-from . import image
-
-from .registry import OpPattern
-from .registry import register_compute, register_schedule, register_pattern
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Attr dictionary object used by schedule functions"""
-import tvm
-
-_dict_get = tvm.get_global_func("nnvm.compiler._dict_get")
-_dict_size = tvm.get_global_func("nnvm.compiler._dict_size")
-_dict_keys = tvm.get_global_func("nnvm.compiler._dict_keys")
-
-class AttrDict(object):
- """Attribute dictionary in nnvm.
-
- Used by python registration of compute and schedule function.
- AttrDict is passed as the first argument to schedule and compute function.
- """
- _tvm_tcode = 18
-
- def __init__(self, handle):
- self.handle = handle
-
- def __del__(self):
- tvm.nd.free_extension_handle(self.handle, 18)
-
- @property
- def _tvm_handle(self):
- return self.handle.value
-
- def __getitem__(self, key):
- return _dict_get(self, key)
-
- def keys(self):
- """Get list of keys in the dict.
-
- Returns
- -------
- keys : list of str
- List of keys
- """
- return [x.value for x in _dict_keys(self)]
-
- def get_int_tuple(self, key):
- """Get tuple of integer from attr dict
-
- Parameters
- ----------
- key : str
- The attr key
-
- Returns
- -------
- tuple : tuple of int
- The result tuple
- """
- return tuple(int(x) for x in self[key][1:-1].split(",") if x)
-
- def get_int_pair_tuple(self, key):
- """Get tuple of integer pairs from attr dict
-
- Parameters
- ----------
- key : str
- The attr key
-
- Returns
- -------
- tuple : tuple of int pairs
- The result tuple
- """
- flat = [int(x.strip(' [] ')) for x in self[key][1:-1].split(",")]
- return tuple((flat[i], flat[i+1]) for i in range(0, len(flat), 2))
-
- def get_int(self, key):
- """Get integer from attr dict
-
- Parameters
- ----------
- key : str
- The attr key
-
- Returns
- -------
- value : int
- The result value
- """
- return int(self[key])
-
- def get_float_tuple(self, key):
- """Get tuple of float from attr dict
-
- Parameters
- ----------
- key : str
- The attr key
-
- Returns
- -------
- tuple : tuple of float
- The result tuple
- """
- return tuple(float(x) for x in self[key][1:-1].split(",") if x)
-
- def get_float(self, key):
- """Get float from attr dict
-
- Parameters
- ----------
- key : str
- The attr key
-
- Returns
- -------
- value : float
- The result value
- """
- return float(self[key])
-
- def get_bool(self, key):
- """Get bool from attr dict
-
- Parameters
- ----------
- key : str
- The attr key
-
- Returns
- -------
- value : bool
- The result value
- """
- lowercase = self[key].lower()
- if lowercase == "1":
- return True
- if lowercase == "0":
- return False
- if lowercase == "true":
- return True
- if lowercase == "false":
- return False
- raise ValueError("Wrong bool format for key %s" % key)
-
- def get_str(self, key):
- """Get string from attr dict
-
- Parameters
- ----------
- key : str
- The attr key
-
- Returns
- -------
- value : str
- The result value
- """
- return self[key]
-
- def __repr__(self):
- return str({k : self[k] for k in self.keys()})
-
-
-tvm.register_extension(AttrDict, AttrDict)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Definition of image ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-from . import registry as reg
-from .registry import OpPattern
-
-# resize
-@reg.register_schedule("resize")
-def schedule_resize(_, outs, target):
- """Schedule definition of resize"""
- with tvm.target.create(target):
- return topi.generic.schedule_injective(outs)
-
-reg.register_pattern("resize", OpPattern.INJECTIVE)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument, missing-docstring, no-else-return
-"""Definition of nn ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-from topi.util import get_const_int, get_const_tuple
-from .tensor import _fschedule_broadcast, _fschedule_injective
-from . import registry as reg
-from .registry import OpPattern
-
-# relu
-reg.register_schedule("relu", _fschedule_broadcast)
-reg.register_pattern("relu", OpPattern.ELEMWISE)
-
-
-# leaky_relu
-reg.register_schedule("leaky_relu", _fschedule_broadcast)
-reg.register_pattern("leaky_relu", OpPattern.ELEMWISE)
-
-# prelu
-reg.register_schedule("prelu", _fschedule_broadcast)
-reg.register_pattern("prelu", OpPattern.BROADCAST)
-
-# flatten
-reg.register_schedule("flatten", _fschedule_broadcast)
-reg.register_pattern("flatten", OpPattern.INJECTIVE)
-
-
-# pad
-reg.register_schedule("pad", _fschedule_broadcast)
-reg.register_pattern("pad", OpPattern.INJECTIVE)
-
-
-# layout transform
-reg.register_schedule("__layout_transform__", _fschedule_injective)
-reg.register_pattern("__layout_transform__", OpPattern.INJECTIVE)
-
-
-@reg.register_schedule("softmax")
-def schedule_softmax(_, outs, target):
- """Schedule definition of softmax"""
- with tvm.target.create(target):
- return topi.generic.schedule_softmax(outs)
-
-reg.register_pattern("softmax", OpPattern.OPAQUE)
-
-
-# log softmax
-@reg.register_schedule("log_softmax")
-def schedule_log_softmax(_, outs, target):
- """Schedule definition of softmax"""
- with tvm.target.create(target):
- return topi.generic.schedule_softmax(outs)
-
-# Mark softmax as extern as we do not fuse it in call cases
-reg.register_pattern("log_softmax", OpPattern.OPAQUE)
-
-
-# dense
-@reg.register_compute("dense")
-def compute_dense(attrs, inputs, _):
- """Compute definition of dense"""
- if attrs.get_bool("use_bias"):
- return topi.nn.dense(inputs[0], inputs[1], inputs[2])
- return topi.nn.dense(inputs[0], inputs[1])
-
-@reg.register_schedule("dense")
-def schedule_dense(_, outs, target):
- """Schedule definition of dense"""
- with tvm.target.create(target):
- return topi.generic.schedule_dense(outs)
-
-reg.register_pattern("dense", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-#matmul
-reg.register_pattern("matmul", OpPattern.OUT_ELEMWISE_FUSABLE)
-reg.register_schedule("matmul", _fschedule_injective)
-
-# conv2d
-@reg.register_compute("conv2d")
-def compute_conv2d(attrs, inputs, _):
- """Compute definition of conv2d"""
- padding = attrs.get_int_tuple("padding")
- strides = attrs.get_int_tuple("strides")
- dilation = attrs.get_int_tuple("dilation")
- groups = attrs.get_int("groups")
- channels = attrs.get_int("channels")
- layout = attrs["layout"]
- kernel_layout = attrs["kernel_layout"]
- out_dtype = attrs["out_dtype"]
- out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
- assert layout in ["NCHW", "NHWC", "NCHW4c"]
- (dilation_h, dilation_w) = dilation
- if dilation_h < 1 or dilation_w < 1:
- raise ValueError("dilation should be positive value")
-
- if groups == 1 and layout == 'NCHW4c' and inputs[0].dtype == 'int8':
- # pylint: disable=assignment-from-no-return
- out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding,
- dilation, layout, out_dtype)
- # pylint: enable=assignment-from-no-return
- elif groups == 1:
- out = topi.nn.conv2d(
- inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype)
- elif layout == "NCHW" and \
- groups == get_const_int(inputs[0].shape[1]) and \
- groups == channels:
- out = topi.nn.depthwise_conv2d_nchw(
- inputs[0], inputs[1], strides, padding, dilation, out_dtype)
- elif layout in ["NCHW", "NCHW4c"]:
- out = topi.nn.group_conv2d_nchw(inputs[0], inputs[1], strides, padding, dilation, groups,
- out_dtype)
- elif layout == "NHWC" and \
- kernel_layout == "HWOI" and \
- groups == get_const_int(inputs[0].shape[3]) and \
- groups == channels:
- out = topi.nn.depthwise_conv2d_nhwc(
- inputs[0], inputs[1], strides, padding, dilation, out_dtype)
- else:
- raise ValueError("not support arbitrary group number for now")
-
- if attrs.get_bool("use_bias"):
- bias = inputs[2]
- expand_axis = 1 if layout in ["NCHW", "NCHW4c"] else 0
- bias = topi.expand_dims(bias, axis=expand_axis, num_newaxis=2)
- out = topi.add(out, bias)
- return out
-
-@reg.register_schedule("conv2d")
-def schedule_conv2d(attrs, outs, target):
- """Schedule definition of conv2d"""
- groups = attrs.get_int("groups")
- channels = attrs.get_int("channels")
- layout = attrs["layout"]
- kernel_layout = attrs["kernel_layout"]
-
- with tvm.target.create(target):
- if groups == 1 and layout == "NCHW":
- return topi.generic.schedule_conv2d_nchw(outs)
- elif groups == 1 and layout == "NCHW4c":
- return topi.generic.schedule_conv2d_nchw(outs)
- elif groups == 1 and layout == "NHWC":
- return topi.generic.schedule_conv2d_nhwc(outs)
- elif groups == channels and layout == "NCHW":
- return topi.generic.schedule_depthwise_conv2d_nchw(outs)
- elif groups == channels and layout == "NHWC" and kernel_layout == "HWOI":
- return topi.generic.schedule_depthwise_conv2d_nhwc(outs)
- elif layout in ["NCHW", "NCHW4c"]:
- return topi.generic.schedule_group_conv2d_nchw(outs)
- else:
- raise ValueError("No compatible schedule")
-
-@reg.register_alter_op_layout("conv2d")
-def alter_conv2d_layout(attrs, inputs, tinfos):
- """Replace conv2d op with other layouts or algorithms"""
- import nnvm.symbol as sym
-
- # map relay op names to nnvm op names
- sym.contrib_conv2d_winograd_without_weight_transform = \
- sym.contrib.conv2d_winograd_without_weight_transform
- sym.contrib_conv2d_winograd_weight_transform = \
- sym.contrib.conv2d_winograd_weight_transform
- sym.contrib_conv2d_winograd_nnpack_without_weight_transform = \
- sym.contrib.conv2d_winograd_nnpack_without_weight_transform
- sym.contrib_conv2d_winograd_nnpack_weight_transform = \
- sym.contrib.conv2d_winograd_nnpack_weight_transform
- sym.nn = sym
-
- # map relay argument names to nnvm argument names
- raw_reshape = sym.reshape
- def _reshape(*args, **kwargs):
- if "newshape" in kwargs:
- kwargs['shape'] = kwargs.pop('newshape')
- return raw_reshape(*args, **kwargs)
- sym.reshape = _reshape
-
- return topi.nn.conv2d_alter_layout(attrs, inputs, tinfos, sym)
-
-reg.register_pattern("conv2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-# convolution NCHWc
-@reg.register_compute("_contrib_conv2d_NCHWc")
-def compute_contrib_conv2d_NCHWc(attrs, inputs, _):
- """Compute definition of conv2d NCHWc"""
- padding = attrs.get_int_tuple("padding")
- strides = attrs.get_int_tuple("strides")
- dilation = attrs.get_int_tuple("dilation")
- out_channel = attrs.get_int("channels")
- groups = attrs.get_int("groups")
- layout = attrs.get_str("layout")
- out_layout = attrs.get_str("out_layout")
- out_dtype = attrs.get_str("out_dtype")
- out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
- if layout == "NCHW":
- _, in_channel, _, _ = get_const_tuple(inputs[0].shape)
- else:
- _, in_channel_chunk, _, _, in_channel_block = get_const_tuple(inputs[0].shape)
- in_channel = in_channel_chunk * in_channel_block
- assert dilation == (1, 1), "not support dilate now"
- if groups == 1:
- # pylint: disable=assignment-from-no-return
- out = topi.nn.conv2d_NCHWc(inputs[0], inputs[1], strides, padding, dilation,
- layout, out_layout, out_dtype)
- # pylint: enable=assignment-from-no-return
- elif groups == in_channel and groups == out_channel:
- # pylint: disable=assignment-from-no-return
- out = topi.nn.depthwise_conv2d_NCHWc(inputs[0], inputs[1], strides, padding,
- dilation, layout, out_layout, out_dtype)
- # pylint: enable=assignment-from-no-return
- else:
- raise ValueError("not support arbitrary group number > 1 for now")
- if attrs.get_bool("use_bias"):
- bias = inputs[2]
- bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
- out = topi.add(out, bias)
- return out
-
-@reg.register_schedule("_contrib_conv2d_NCHWc")
-def schedule_contrib_conv2d_NCHWc(attrs, outs, target):
- """Schedule definition of conv2d NCHWc"""
- groups = attrs.get_int("groups")
- out_channel = attrs.get_int("channels")
- with tvm.target.create(target):
- if groups == 1:
- return topi.generic.schedule_conv2d_NCHWc(outs)
- elif groups == out_channel:
- return topi.generic.schedule_depthwise_conv2d_NCHWc(outs)
- else:
- raise ValueError("not support group number > 1 for now")
-
-reg.register_pattern("_contrib_conv2d_NCHWc", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-@reg.register_compute("_contrib_conv2d_winograd_weight_transform")
-def compute_contrib_conv2d_winograd_weight_transform(attrs, inputs, _):
- return topi.nn.conv2d_winograd_weight_transform(inputs[0], attrs.get_int('tile_size'))
-
-@reg.register_schedule("_contrib_conv2d_winograd_weight_transform")
-def schedule_contrib_conv2d_winograd_weight_transform(attrs, outs, target):
- with tvm.target.create(target):
- return topi.generic.schedule_conv2d_winograd_weight_transform(outs)
-
-reg.register_pattern("_contrib_conv2d_winograd_weight_transform", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-@reg.register_compute("_contrib_conv2d_winograd_without_weight_transform")
-def compute_contrib_conv2d_winograd_without_weight_transform(attrs, inputs, _):
- """Compute definition of conv2d NCHWc"""
- padding = attrs.get_int_tuple("padding")
- strides = attrs.get_int_tuple("strides")
- dilation = attrs.get_int_tuple("dilation")
- groups = attrs.get_int("groups")
- layout = attrs.get_str("layout")
- out_dtype = attrs.get_str("out_dtype")
- tile_size = attrs.get_int("tile_size")
- out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
- assert dilation == (1, 1), "Do not support dilate now"
- assert groups == 1, "Do not supoort arbitrary group number"
-
- # pylint: disable=assignment-from-no-return
- out = topi.nn.conv2d_winograd_without_weight_transform(
- inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype,
- tile_size)
-
- if attrs.get_bool("use_bias"):
- bias = inputs[2]
- bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
- out = topi.add(out, bias)
- return out
-
-@reg.register_schedule("_contrib_conv2d_winograd_without_weight_transform")
-def schedule_contrib_conv2d_winograd_without_weight_transform(attrs, outs, target):
- with tvm.target.create(target):
- return topi.generic.schedule_conv2d_winograd_without_weight_transform(outs)
-
-reg.register_pattern("_contrib_conv2d_winograd_without_weight_transform",
- OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-@reg.register_compute("_contrib_conv2d_winograd_nnpack_weight_transform")
-def compute_contrib_conv2d_winograd_nnpack_weight_transform(attrs, inputs, _):
- convolution_algorithm = attrs.get_int('convolution_algorithm')
- out_dype = attrs.get_str('out_dtype')
- return topi.nn.conv2d_winograd_nnpack_weight_transform(
- inputs[0], convolution_algorithm, out_dype)
-
-
-@reg.register_schedule("_contrib_conv2d_winograd_nnpack_weight_transform")
-def schedule_contrib_conv2d_winograd_nnpack_weight_transform(attrs, outs, target):
- with tvm.target.create(target):
- return topi.generic.schedule_conv2d_winograd_nnpack_weight_transform(outs)
-
-reg.register_pattern("_contrib_conv2d_winograd_nnpack_weight_transform", OpPattern.OPAQUE)
-
-
-@reg.register_compute("_contrib_conv2d_winograd_nnpack_without_weight_transform")
-def compute_contrib_conv2d_winograd_nnpack_without_weight_transform(attrs, inputs, _):
- padding = attrs.get_int_tuple("padding")
- strides = attrs.get_int_tuple("strides")
- dilation = attrs.get_int_tuple("dilation")
- groups = attrs.get_int("groups")
- layout = attrs.get_str("layout")
- out_dtype = attrs.get_str("out_dtype")
- out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
- assert dilation == (1, 1), "Do not support dilate now"
- assert groups == 1, "Do not supoort arbitrary group number"
-
- # pylint: disable=assignment-from-no-return
- out = topi.nn.conv2d_winograd_nnpack_without_weight_transform(
- inputs[0], inputs[1], inputs[2] if attrs.get_bool("use_bias") else None,
- strides, padding, dilation, layout, out_dtype)
- return out
-
-@reg.register_schedule("_contrib_conv2d_winograd_nnpack_without_weight_transform")
-def schedule_contrib_conv2d_winograd_nnpack_without_weight_transform(attrs, outs, target):
- with tvm.target.create(target):
- return topi.generic.schedule_conv2d_winograd_nnpack_without_weight_transform(outs)
-
-reg.register_pattern("_contrib_conv2d_winograd_nnpack_without_weight_transform",
- OpPattern.OPAQUE)
-
-
-# conv2d_transpose
-@reg.register_compute("conv2d_transpose")
-def compute_conv2d_transpose(attrs, inputs, _):
- """Compute definition of conv2d_transpose"""
- padding = attrs.get_int_tuple("padding")
- strides = attrs.get_int_tuple("strides")
- dilation = attrs.get_int_tuple("dilation")
- groups = attrs.get_int("groups")
- out_dtype = attrs.get_str("out_dtype")
- layout = attrs["layout"]
- out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
-
- assert layout == "NCHW", "only support nchw for now"
- assert dilation == (1, 1), "not support dilate now"
- assert groups == 1, "only support groups == 1 for now"
-
- out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding, out_dtype)
- if attrs.get_bool("use_bias"):
- bias = inputs[2]
- bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
- out = topi.add(out, bias)
- output_padding = attrs.get_int_tuple("output_padding")
- out = topi.nn.pad(out, \
- [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]])
- return out
-
-@reg.register_schedule("conv2d_transpose")
-def schedule_conv2d_transpose(attrs, outs, target):
- """Schedule definition of conv2d_transpose"""
- with tvm.target.create(target):
- return topi.generic.schedule_conv2d_transpose_nchw(outs)
-
-reg.register_pattern("conv2d_transpose", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-# max_pool2d
-@reg.register_schedule("max_pool2d")
-def schedule_max_pool2d(attrs, outs, target):
- """Schedule definition of max_pool2d"""
- layout = attrs["layout"]
- with tvm.target.create(target):
- return topi.generic.schedule_pool(outs, layout)
-
-reg.register_pattern("max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-# avg_pool2d
-@reg.register_schedule("avg_pool2d")
-def schedule_avg_pool2d(attrs, outs, target):
- """Schedule definition of avg_pool2d"""
- layout = attrs["layout"]
- with tvm.target.create(target):
- return topi.generic.schedule_pool(outs, layout)
-
-reg.register_pattern("avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-# global_max_pool2d
-@reg.register_schedule("global_max_pool2d")
-def schedule_global_max_pool2d(_, outs, target):
- """Schedule definition of global_max_pool2d"""
- with tvm.target.create(target):
- return topi.generic.schedule_adaptive_pool(outs)
-
-reg.register_pattern("global_max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-# global_avg_pool2d
-@reg.register_schedule("global_avg_pool2d")
-def schedule_global_avg_pool2d(_, outs, target):
- """Schedule definition of global_avg_pool2d"""
- with tvm.target.create(target):
- return topi.generic.schedule_adaptive_pool(outs)
-
-reg.register_pattern("global_avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-# upsampling
-@reg.register_schedule("upsampling")
-def schedule_upsampling(_, outs, target):
- """Schedule definition of upsampling"""
- with tvm.target.create(target):
- return topi.generic.schedule_injective(outs)
-
-reg.register_pattern("upsampling", OpPattern.INJECTIVE)
-
-@reg.register_compute("lrn")
-def compute_lrn(attrs, inputs, _):
- """Compute definition of lrn"""
- size = attrs.get_int("size")
- axis = attrs.get_int("axis")
- alpha = attrs.get_float("alpha")
- beta = attrs.get_float("beta")
- bias = attrs.get_float("bias")
- return topi.nn.lrn(inputs[0], size, axis, alpha, beta, bias)
-
-@reg.register_schedule("lrn")
-def schedule_lrn(attrs, outs, target):
- """Schedule definition of lrn"""
- with tvm.target.create(target):
- return topi.generic.schedule_lrn(outs)
-
-reg.register_pattern("lrn", OpPattern.OPAQUE)
-
-@reg.register_compute("l2_normalize")
-def compute_l2_normalize(attrs, inputs, _):
- """Compute definition of l2 normalize"""
- eps = attrs.get_float("eps")
- axis = attrs.get_int_tuple("axis")
- return topi.nn.l2_normalize(inputs[0], eps, axis)
-
-@reg.register_schedule("l2_normalize")
-def schedule_l2_normalize(attrs, outs, target):
- """Schedule definition of l2 normalize"""
- with tvm.target.create(target):
- return topi.generic.schedule_l2_normalize(outs)
-
-reg.register_pattern("l2_normalize", OpPattern.OUT_ELEMWISE_FUSABLE)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Reduction ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-import topi.cuda
-from . import registry as reg
-from .registry import OpPattern
-
-def _schedule_reduce(_, outs, target):
- """Generic schedule for reduce"""
- with tvm.target.create(target):
- return topi.generic.schedule_reduce(outs)
-
-
-_fschedule_reduce = tvm.convert(_schedule_reduce)
-
-def _compute_reduce(f):
- """auxiliary function"""
- def _compute(attrs, inputs, out_info):
- axis = attrs.get_int_tuple("axis")
- keepdims = attrs.get_bool("keepdims")
- if axis:
- return f(inputs[0], axis=axis, keepdims=keepdims)
- return f(inputs[0], keepdims=keepdims)
- return _compute
-
-# sum
-reg.register_pattern("sum", OpPattern.COMM_REDUCE)
-reg.register_schedule("sum", _fschedule_reduce)
-
-# max
-reg.register_pattern("max", OpPattern.COMM_REDUCE)
-reg.register_schedule("max", _fschedule_reduce)
-
-# min
-reg.register_pattern("min", OpPattern.COMM_REDUCE)
-reg.register_schedule("min", _fschedule_reduce)
-
-# collapse sum
-reg.register_pattern("collapse_sum", OpPattern.COMM_REDUCE)
-reg.register_schedule("collapse_sum", _fschedule_reduce)
-
-# argmax
-reg.register_pattern("argmax", OpPattern.COMM_REDUCE)
-reg.register_schedule("argmax", _fschedule_reduce)
-
-# argmin
-reg.register_pattern("argmin", OpPattern.COMM_REDUCE)
-reg.register_schedule("argmin", _fschedule_reduce)
-
-# mean
-reg.register_pattern("mean", OpPattern.COMM_REDUCE)
-reg.register_schedule("mean", _fschedule_reduce)
-
-# product
-reg.register_pattern("prod", OpPattern.COMM_REDUCE)
-reg.register_schedule("prod", _fschedule_reduce)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Information registry to register operator information for compiler"""
-import tvm
-
-class OpPattern(object):
- """Operator generic patterns
-
- See Also
- --------
- top.tag : Contains explanation of the tag type.
- """
- # Elementwise operator
- ELEMWISE = 0
- # Broadcast operator
- BROADCAST = 1
- # Injective mapping
- INJECTIVE = 2
- # Comunication
- COMM_REDUCE = 3
- # Complex op, can still fuse ewise into it
- OUT_ELEMWISE_FUSABLE = 4
- # Not fusable opaque op
- OPAQUE = 8
-
-_register_compute = tvm.get_global_func("nnvm._register_compute")
-_register_schedule = tvm.get_global_func("nnvm._register_schedule")
-_register_pattern = tvm.get_global_func("nnvm._register_pattern")
-_register_alter_op_layout = tvm.get_global_func("nnvm.compiler._register_alter_op_layout")
-
-def register_compute(op_name, f=None, level=10):
- """Register compute function for operator
-
- Parameters
- ----------
- op_name : str
- The name of operator
-
- f : function
- The schedule function
-
- level : int
- The priority level
-
- Returns
- -------
- fregister : function
- Register function if f is not specified.
- """
- def register(myf):
- """internal register function"""
- _register_compute(op_name, myf, level)
- return myf
- return register(f) if f else register
-
-
-def register_schedule(op_name, f=None, level=10):
- """Register schedule function for operator
-
- Parameters
- ----------
- op_name : str
- The name of operator
-
- f : function
- The schedule function
-
- level : int
- The priority level
-
- Returns
- -------
- fregister : function
- Register function if f is not specified.
- """
- def register(myf):
- """internal register function"""
- _register_schedule(op_name, myf, level)
- return myf
- return register(f) if f else register
-
-
-def register_pattern(op_name, pattern, level=10):
- """Register pattern code for operator
-
- Parameters
- ----------
- op_name : str
- The name of operator
-
- pattern : int
- The pattern code.
-
- level : int
- The priority level
- """
- _register_pattern(op_name, pattern, level)
-
-
-def register_alter_op_layout(op_name, f=None, level=10):
- """Register alter layout function for operator
-
- Parameters
- ----------
- op_name : str
- The name of operator
-
- f : function
- The schedule function
-
- level : int
- The priority level
-
- Returns
- -------
- fregister : function
- Register function if f is not specified.
- """
- def register(myf):
- """internal register function"""
- _register_alter_op_layout(op_name, myf, level)
- return myf
- return register(f) if f else register
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Tensor ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-import topi.cuda
-from . import registry as reg
-from .registry import OpPattern
-
-def _schedule_injective(_, outs, target):
- """Generic schedule for binary bcast"""
- with tvm.target.create(target):
- return topi.generic.schedule_injective(outs)
-
-def _compute_binary_scalar(f):
- """auxiliary function"""
- @tvm.tag_scope(topi.tag.ELEMWISE)
- def _compute(attrs, x, _):
- x = x[0]
- scalar = attrs.get_float("scalar")
- scalar = tvm.const(scalar, x.dtype)
- return tvm.compute(x.shape, lambda *i: f(x(*i), scalar))
- return _compute
-
-
-def _compute_unary(f):
- """auxiliary function"""
- def _compute(attrs, x, _):
- return f(x[0])
- return _compute
-
-
-def _compute_binary(f):
- """auxiliary function"""
- def _compute(attrs, x, _):
- return f(x[0], x[1])
- return _compute
-
-
-_fschedule_injective = tvm.convert(_schedule_injective)
-_fschedule_broadcast = _fschedule_injective
-_fschedule_elemwise = _fschedule_injective
-
-# Assign requires special treatment in the compiler
-# The compute and schedule are designed as
-# copy from rhs to output
-reg.register_pattern("_assign", OpPattern.OPAQUE)
-reg.register_schedule("_assign", _fschedule_broadcast)
-
-# copy
-reg.register_pattern("copy", OpPattern.ELEMWISE)
-reg.register_schedule("copy", _fschedule_broadcast)
-
-# cast
-reg.register_pattern("cast", OpPattern.ELEMWISE)
-reg.register_schedule("cast", _fschedule_broadcast)
-
-# floor
-reg.register_pattern("floor", OpPattern.ELEMWISE)
-reg.register_schedule("floor", _fschedule_broadcast)
-
-# ceil
-reg.register_pattern("ceil", OpPattern.ELEMWISE)
-reg.register_schedule("ceil", _fschedule_broadcast)
-
-# round
-reg.register_pattern("round", OpPattern.ELEMWISE)
-reg.register_schedule("round", _fschedule_broadcast)
-
-# abs
-reg.register_pattern("abs", OpPattern.ELEMWISE)
-reg.register_schedule("abs", _fschedule_broadcast)
-
-# trunc
-reg.register_pattern("trunc", OpPattern.ELEMWISE)
-reg.register_schedule("trunc", _fschedule_broadcast)
-
-# exp
-reg.register_pattern("exp", OpPattern.ELEMWISE)
-reg.register_schedule("exp", _fschedule_broadcast)
-
-# sqrt
-reg.register_pattern("sqrt", OpPattern.ELEMWISE)
-reg.register_schedule("sqrt", _fschedule_broadcast)
-
-# log
-reg.register_pattern("log", OpPattern.ELEMWISE)
-reg.register_schedule("log", _fschedule_broadcast)
-
-# tanh
-reg.register_pattern("tanh", OpPattern.ELEMWISE)
-reg.register_schedule("tanh", _fschedule_broadcast)
-
-# negative
-reg.register_pattern("negative", OpPattern.ELEMWISE)
-reg.register_schedule("negative", _fschedule_broadcast)
-
-# sigmoid
-reg.register_pattern("sigmoid", OpPattern.ELEMWISE)
-reg.register_schedule("sigmoid", _fschedule_broadcast)
-
-# add_scalar
-reg.register_pattern("__add_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__add_scalar__", _fschedule_broadcast)
-
-# sub_calar
-reg.register_pattern("__sub_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__sub_scalar__", _fschedule_broadcast)
-
-# rsub_scalar
-reg.register_pattern("__rsub_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__rsub_scalar__", _fschedule_broadcast)
-
-# mul_scalar
-reg.register_pattern("__mul_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__mul_scalar__", _fschedule_broadcast)
-
-# div_scalar
-reg.register_pattern("__div_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__div_scalar__", _fschedule_broadcast)
-
-# rdiv_scalar
-reg.register_pattern("__rdiv_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__rdiv_scalar__", _fschedule_broadcast)
-
-# pow_scalar
-reg.register_pattern("__pow_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__pow_scalar__", _fschedule_broadcast)
-
-# rpow_scalar
-reg.register_pattern("__rpow_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__rpow_scalar__", _fschedule_broadcast)
-
-# lshift_scalar
-reg.register_pattern("__lshift_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__lshift_scalar__", _fschedule_broadcast)
-
-# rshift_scalar
-reg.register_pattern("__rshift_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__rshift_scalar__", _fschedule_broadcast)
-
-# logical_and
-reg.register_pattern("logical_and", OpPattern.ELEMWISE)
-reg.register_schedule("logical_and", _fschedule_broadcast)
-
-# logical_or
-reg.register_pattern("logical_or", OpPattern.ELEMWISE)
-reg.register_schedule("logical_or", _fschedule_broadcast)
-
-# logical_not
-reg.register_pattern("logical_not", OpPattern.ELEMWISE)
-reg.register_schedule("logical_not", _fschedule_broadcast)
-
-# elemwise_add
-reg.register_pattern("elemwise_add", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_add", _fschedule_broadcast)
-
-# elemwise_sub
-reg.register_pattern("elemwise_sub", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_sub", _fschedule_broadcast)
-
-# elemwise_mul
-reg.register_pattern("elemwise_mul", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_mul", _fschedule_broadcast)
-
-# elemwise_div
-reg.register_pattern("elemwise_div", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_div", _fschedule_broadcast)
-
-# elemwise_mod
-reg.register_pattern("elemwise_mod", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_mod", _fschedule_broadcast)
-
-# elemwise_pow
-reg.register_pattern("elemwise_pow", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_pow", _fschedule_broadcast)
-
-# broadcast_add
-reg.register_pattern("broadcast_add", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_add", _fschedule_broadcast)
-
-# broadcast_sub
-reg.register_pattern("broadcast_sub", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_sub", _fschedule_broadcast)
-
-# broadcast_mul
-reg.register_pattern("broadcast_mul", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_mul", _fschedule_broadcast)
-
-# broadcast_div
-reg.register_pattern("broadcast_div", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_div", _fschedule_broadcast)
-
-# broadcast mod
-reg.register_pattern("broadcast_mod", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_mod", _fschedule_broadcast)
-
-# broadcast max
-reg.register_pattern("broadcast_max", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_max", _fschedule_broadcast)
-
-# broadcast min
-reg.register_pattern("broadcast_min", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_min", _fschedule_broadcast)
-
-# broadcast pow
-reg.register_pattern("broadcast_pow", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_pow", _fschedule_broadcast)
-
-# broadcast left_shift
-reg.register_pattern("broadcast_left_shift", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_left_shift", _fschedule_broadcast)
-
-# broadcast right_shift
-reg.register_pattern("broadcast_right_shift", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_right_shift", _fschedule_broadcast)
-
-# broadcast greater
-reg.register_pattern("broadcast_greater", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_greater", _fschedule_broadcast)
-
-# broadcast less
-reg.register_pattern("broadcast_less", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_less", _fschedule_broadcast)
-
-# broadcast equal
-reg.register_pattern("broadcast_equal", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_equal", _fschedule_broadcast)
-
-# broadcast not_equal
-reg.register_pattern("broadcast_not_equal", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_not_equal", _fschedule_broadcast)
-
-# broadcast greater_equal
-reg.register_pattern("broadcast_greater_equal", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_greater_equal", _fschedule_broadcast)
-
-# broadcast less_equal
-reg.register_pattern("broadcast_less_equal", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_less_equal", _fschedule_broadcast)
-
-# broadcast_to
-reg.register_pattern("broadcast_to", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_to", _fschedule_broadcast)
-
-# clip
-reg.register_pattern("clip", OpPattern.ELEMWISE)
-reg.register_schedule("clip", _fschedule_elemwise)
-
-# elemwise sum
-reg.register_pattern("elemwise_sum", OpPattern.ELEMWISE)
-reg.register_schedule("elemwise_sum", _fschedule_elemwise)
-
-# full
-reg.register_pattern("full", OpPattern.OUT_ELEMWISE_FUSABLE)
-reg.register_schedule("full", _fschedule_elemwise)
-
-# full_like
-reg.register_pattern("full_like", OpPattern.ELEMWISE)
-reg.register_schedule("full_like", _fschedule_elemwise)
-
-# zeros
-reg.register_pattern("zeros", OpPattern.OUT_ELEMWISE_FUSABLE)
-reg.register_schedule("zeros", _fschedule_elemwise)
-
-# zeros_like
-reg.register_pattern("zeros_like", OpPattern.ELEMWISE)
-reg.register_schedule("zeros_like", _fschedule_elemwise)
-
-# ones
-reg.register_pattern("ones", OpPattern.OUT_ELEMWISE_FUSABLE)
-reg.register_schedule("ones", _fschedule_elemwise)
-
-# ones_like
-reg.register_pattern("ones_like", OpPattern.ELEMWISE)
-reg.register_schedule("ones_like", _fschedule_elemwise)
-
-# greater
-reg.register_pattern("greater", OpPattern.ELEMWISE)
-reg.register_schedule("greater", _fschedule_elemwise)
-
-# less
-reg.register_pattern("less", OpPattern.ELEMWISE)
-reg.register_schedule("less", _fschedule_elemwise)
-
-# block_grad
-reg.register_compute("block_grad", _compute_unary(topi.identity))
-reg.register_pattern("block_grad", OpPattern.ELEMWISE)
-reg.register_schedule("block_grad", _fschedule_elemwise)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Tensor transformation ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-from .tensor import _fschedule_broadcast, _fschedule_injective
-from . import registry as reg
-from .registry import OpPattern
-
-# expand_dims
-reg.register_pattern("expand_dims", OpPattern.BROADCAST)
-reg.register_schedule("expand_dims", _fschedule_broadcast)
-
-# expand_like
-@reg.register_compute("expand_like")
-def compute_expand_like(attrs, inputs, _):
- """Compute definition of expand_like"""
- if len(inputs[0].shape) == len(inputs[1].shape):
- # If the number of dimensions is not changed then it is just a broadcasting
- return topi.broadcast_to(inputs[0], inputs[1].shape)
-
- exclude = attrs.get_bool("exclude")
- axis = attrs.get_int_tuple("axis")
- if exclude:
- exclude_axis = (axis,) if isinstance(axis, int) else axis
- axis = []
- for item in range(len(inputs[1].shape)):
- if item not in exclude_axis:
- axis.append(item)
- axis = tuple(axis)
-
- return topi.transform.expand_like(inputs[0], inputs[1], axis)
-reg.register_pattern("expand_like", OpPattern.BROADCAST)
-reg.register_schedule("expand_like", _fschedule_broadcast)
-
-# reshape_like
-@reg.register_compute("reshape_like")
-def compute_reshape_like(attrs, inputs, out_info):
- """Compute definition of reshape_like"""
- return topi.reshape(inputs[0], inputs[1].shape)
-reg.register_pattern("reshape_like", OpPattern.INJECTIVE)
-reg.register_schedule("reshape_like", _fschedule_injective)
-
-# transpose
-reg.register_pattern("transpose", OpPattern.INJECTIVE)
-reg.register_schedule("transpose", _fschedule_injective)
-
-# flip
-reg.register_pattern("flip", OpPattern.INJECTIVE)
-reg.register_schedule("flip", _fschedule_injective)
-
-# reshape
-reg.register_pattern("reshape", OpPattern.INJECTIVE)
-reg.register_schedule("reshape", _fschedule_injective)
-
-# squeeze
-reg.register_pattern("squeeze", OpPattern.INJECTIVE)
-reg.register_schedule("squeeze", _fschedule_injective)
-
-# concatenate
-@reg.register_schedule("concatenate")
-def schedule_concatenate(_, outs, target):
- """Schedule definition of concatenate"""
- with tvm.target.create(target):
- return topi.generic.schedule_concatenate(outs)
-
-reg.register_pattern("concatenate", OpPattern.INJECTIVE)
-
-# split
-reg.register_pattern("split", OpPattern.INJECTIVE)
-reg.register_schedule("split", _fschedule_injective)
-
-# take
-reg.register_pattern("take", OpPattern.INJECTIVE)
-reg.register_schedule("take", _fschedule_injective)
-
-# strided_slice
-reg.register_pattern("strided_slice", OpPattern.INJECTIVE)
-reg.register_schedule("strided_slice", _fschedule_injective)
-
-# slice_like
-reg.register_pattern("slice_like", OpPattern.INJECTIVE)
-reg.register_schedule("slice_like", _fschedule_injective)
-
-# where
-reg.register_pattern("where", OpPattern.INJECTIVE)
-reg.register_schedule("where", _fschedule_injective)
-
-# gather_nd
-reg.register_pattern("gather_nd", OpPattern.INJECTIVE)
-reg.register_schedule("gather_nd", _fschedule_injective)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Definition of nn ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-from . import registry as reg
-from .registry import OpPattern
-
-@reg.register_compute("yolo_reorg")
-def compute_reorg(attrs, inputs, _):
- """Compute definition of reorg"""
- return topi.vision.reorg(inputs[0], attrs.get_int("stride"))
-
-@reg.register_schedule("yolo_reorg")
-def schedule_reorg(attrs, outs, target):
- """Schedule definition of reorg"""
- with tvm.target.create(target):
- return topi.generic.schedule_injective(outs)
-
-reg.register_pattern("yolo_reorg", OpPattern.INJECTIVE)
-
-# multibox_prior
-@reg.register_schedule("multibox_prior")
-def schedule_multibox_prior(_, outs, target):
- """Schedule definition of multibox_prior"""
- with tvm.target.create(target):
- return topi.generic.schedule_multibox_prior(outs)
-
-@reg.register_compute("multibox_prior")
-def compute_multibox_prior(attrs, inputs, _):
- """Compute definition of multibox_prior"""
- sizes = attrs.get_float_tuple('sizes')
- ratios = attrs.get_float_tuple('ratios')
- steps = attrs.get_float_tuple('steps')
- offsets = attrs.get_float_tuple('offsets')
- clip = attrs.get_bool('clip')
-
- return topi.vision.ssd.multibox_prior(inputs[0], sizes, ratios,
- steps, offsets, clip)
-
-reg.register_pattern("multibox_prior", OpPattern.OPAQUE)
-
-# multibox_transform_loc
-@reg.register_schedule("multibox_transform_loc")
-def schedule_multibox_transform_loc(_, outs, target):
- """Schedule definition of multibox_detection"""
- with tvm.target.create(target):
- return topi.generic.schedule_multibox_transform_loc(outs)
-
-@reg.register_compute("multibox_transform_loc")
-def compute_multibox_transform_loc(attrs, inputs, _):
- """Compute definition of multibox_detection"""
- clip = attrs.get_bool('clip')
- threshold = attrs.get_float('threshold')
- variance = attrs.get_float_tuple('variances')
-
- return topi.vision.ssd.multibox_transform_loc(inputs[0], inputs[1], inputs[2],
- clip, threshold, variance)
-
-reg.register_pattern("multibox_detection", OpPattern.OPAQUE)
-
-# non-maximum suppression
-@reg.register_schedule("non_max_suppression")
-def schedule_nms(_, outs, target):
- """Schedule definition of non_max_suppression"""
- with tvm.target.create(target):
- return topi.generic.schedule_nms(outs)
-
-@reg.register_compute("non_max_suppression")
-def compute_nms(attrs, inputs, _):
- """Compute definition of non_max_suppression"""
- return_indices = attrs.get_bool('return_indices')
- max_output_size = attrs.get_int('max_output_size')
- iou_threshold = attrs.get_float('iou_threshold')
- force_suppress = attrs.get_bool('force_suppress')
- top_k = attrs.get_int('top_k')
- id_index = attrs.get_int('id_index')
- invalid_to_bottom = attrs.get_bool('invalid_to_bottom')
-
- return topi.vision.non_max_suppression(inputs[0], inputs[1],
- max_output_size=max_output_size,
- iou_threshold=iou_threshold,
- force_suppress=force_suppress,
- top_k=top_k, id_index=id_index,
- return_indices=return_indices,
- invalid_to_bottom=invalid_to_bottom)
-
-reg.register_pattern("non_max_suppression", OpPattern.OPAQUE)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import os
-import sys
-from setuptools import find_packages
-from distutils.core import setup
-
-def config_cython():
- # temporary disable cython for now
- # as NNVM uses local DLL build
- return []
- try:
- from Cython.Build import cythonize
- from distutils.extension import Extension
- if sys.version_info >= (3, 0):
- subdir = "_cy3"
- else:
- subdir = "_cy2"
- ret = []
- path = "nnvm/cython"
-
- for fn in os.listdir(path):
- if not fn.endswith(".pyx"):
- continue
- ret.append(Extension(
- "nnvm/%s/%s" % (subdir, fn[:-4]),
- ["nnvm/cython/%s" % fn],
- include_dirs=["../include/"],
- language="c++"))
- return cythonize(ret)
- except:
- print("Cython is not installed, will compile without cython module")
- return []
-
-# We can not import `libinfo.py` in setup.py directly since __init__.py
-# Will be invoked which introduces dependences
-CURRENT_DIR = os.path.dirname(__file__)
-libinfo_py = os.path.join(CURRENT_DIR, './nnvm/libinfo.py')
-libinfo = {'__file__': libinfo_py}
-exec(compile(open(libinfo_py, "rb").read(), libinfo_py, 'exec'), libinfo, libinfo)
-
-__version__ = libinfo['__version__']
-if not os.getenv('CONDA_BUILD'):
- LIB_PATH = libinfo['find_lib_path']()
- _, LIB_NAME = os.path.split(LIB_PATH[0])
- curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
- rpath = os.path.relpath(LIB_PATH[0], curr_path)
- setup_kwargs = dict(
- include_package_data=True,
- data_files=[('nnvm', [rpath])]
- )
-else:
- setup_kwargs = {}
-
-setup(name='nnvm',
- version=__version__,
- description="NNVM: Open Compiler for AI Frameworks",
- zip_safe=False,
- install_requires=[
- 'numpy'
- ],
- packages=find_packages(),
- url='https://github.com/dmlc/nnvm',
- **setup_kwargs)
- c_api: NNVM C API
- core: NNVM core data structure
- pass: NNVM pass
-
-The following components are generic NNVM compiler and defines tensor operator set
-
-- top: NNVM core tensor operators
-- compiler: NNVM compiler toolchain
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file alter_op_layout.cc
- * \brief Alter the operator layouts. Keep inferred layouts (if any) from previous stages.
- * e.g., convolution may calculates faster with NCHW16c layout.
- */
-#include <nnvm/pass.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/layout.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/pass_functions.h>
-#include <tvm/operation.h>
-#include <algorithm>
-#include <functional>
-#include "compile_engine.h"
-#include "graph_transform.h"
-
-namespace nnvm {
-namespace compiler {
-namespace {
-
-tvm::Array<tvm::Tensor> GetTensorInfo(const IndexedGraph& idx_graph,
- const uint32_t nid,
- const ShapeVector& shape_vec,
- const DTypeVector& dtype_vec) {
- tvm::Array<tvm::Tensor> vec;
- for (uint32_t i = 0; i < idx_graph[nid].source->num_outputs(); ++i) {
- tvm::Array<tvm::Expr> shape;
- for (int64_t x : shape_vec[idx_graph.entry_id(nid, i)]) {
- CHECK_LE(x, static_cast<int64_t>(std::numeric_limits<int>::max()));
- shape.push_back(tvm::make_const(tvm::DataType::Int(32), x));
- }
- vec.push_back(tvm::placeholder(
- shape, GetTVMType(dtype_vec[idx_graph.entry_id(nid, i)])));
- }
- return vec;
-}
-
-Graph AlterOpLayout(const Graph& src) {
- static auto& falter_op_layout =
- Op::GetAttr<nnvm::compiler::FTVMAlterOpLayout >("FTVMAlterOpLayout");
-
- const ShapeVector& shape_vec = src.GetAttr<ShapeVector>("shape");
- const DTypeVector& dtype_vec = src.GetAttr<DTypeVector>("dtype");
- const IndexedGraph& idx_graph = src.indexed_graph();
-
- std::vector<std::vector<Layout> > in_layouts_of_node(idx_graph.num_nodes());
- std::vector<std::vector<Layout> > out_layouts_of_node(idx_graph.num_nodes());
- std::unordered_map<const Node*, uint32_t> unchanged_nodes;
-
- if (src.HasAttr("layout")) {
- // record layouts so that LayoutTransform pass can fix layouts correctly,
- // e.g., conv2d can be replaced by some contrib implement
- // whose layout is different from the original one
- // (which was imported from a model file).
- const auto& layouts = src.GetAttr<std::vector<Layout> >("layout");
- for (uint32_t nid = 0; nid < idx_graph.num_nodes(); ++nid) {
- const auto &inode = idx_graph[nid];
- // record input layouts for all nodes,
- // while replaced nodes will ignore the records here and have undefined input layouts.
- std::vector<Layout> in_layout;
- for (const auto& e : inode.inputs) {
- in_layout.emplace_back(layouts[idx_graph.entry_id(e)]);
- }
- in_layouts_of_node[nid] = in_layout;
-
- std::vector<Layout> out_layout;
- for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
- out_layout.emplace_back(layouts[idx_graph.entry_id(nid, i)]);
- }
- out_layouts_of_node[nid] = out_layout;
- }
- }
-
- auto transform = [&](uint32_t nid,
- const NodePtr& n,
- std::vector<NodeEntry>* ret) {
- nnvm::compiler::FTVMAlterOpLayout fn_alter_op_layout =
- falter_op_layout.get(n->op(), nullptr);
- if (fn_alter_op_layout == nullptr) {
- // will restore the original input layouts later.
- unchanged_nodes[n.get()] = nid;
- return false;
- }
-
- // construct parameters for registered function
- std::vector<Symbol> op_inputs;
- tvm::Array<tvm::Tensor> tensor_infos;
- CHECK_EQ(n->num_inputs(), idx_graph[nid].inputs.size());
- for (uint32_t i = 0; i < n->num_inputs(); ++i) {
- const nnvm::NodeEntry& input = n->inputs[i];
- // input operator
- Symbol op_input;
- op_input.outputs.push_back(input);
- op_inputs.push_back(op_input);
-
- // input tinfo, extract from the original graph
- // because it was where infer_shape & infer_type applied.
- tvm::Array<tvm::Tensor> op_output_tinfos =
- GetTensorInfo(idx_graph, idx_graph[nid].inputs[i].node_id,
- shape_vec, dtype_vec);
- tensor_infos.push_back(op_output_tinfos[input.index]);
- }
- // callback registered function to get a new operator.
- Symbol op;
- bool do_alter =
- fn_alter_op_layout(n->attrs, Symbol::CreateGroup(op_inputs), tensor_infos, &op);
-
- if (do_alter) {
- *ret = op.outputs;
- } else {
- // will restore the original input layouts later.
- unchanged_nodes[n.get()] = nid;
- }
- return do_alter;
- };
-
- Graph ret = nnvm::compiler::GraphTransform(src, transform);
-
- if (src.HasAttr("layout")) {
- // restore the layouts to return graph
- const auto& ret_idx = ret.indexed_graph();
- std::vector<Layout> ret_layouts(ret_idx.num_node_entries(), Layout::Undef());
- for (uint32_t nid = 0; nid < ret_idx.num_nodes(); ++nid) {
- const auto& inode = ret_idx[nid];
- if (unchanged_nodes.count(inode.source)) {
- const std::vector<Layout>& in_layouts =
- in_layouts_of_node[unchanged_nodes[inode.source]];
- for (uint32_t i = 0; i < inode.inputs.size(); ++i) {
- const auto& e = inode.inputs[i];
- ret_layouts[ret_idx.entry_id(e)] = in_layouts[i];
- }
- const std::vector<Layout>& out_layouts =
- out_layouts_of_node[unchanged_nodes[inode.source]];
- for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
- ret_layouts[ret_idx.entry_id(nid, i)] = out_layouts[i];
- }
- }
- }
-
- // cannot call indexed_graph() before return the origin Graph,
- // thus create a new one.
- nnvm::Graph new_ret;
- new_ret.outputs = ret.outputs;
- new_ret.attrs["layout"] = std::make_shared<any>(std::move(ret_layouts));
- return new_ret;
- }
-
- return ret;
-}
-
-// register pass
-NNVM_REGISTER_PASS(AlterOpLayout)
-.set_body(AlterOpLayout)
-.set_change_graph(true);
-
-} // namespace
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file compile_engine.cc
- * \brief The compile engine.
- */
-#include <dmlc/common.h>
-#include <tvm/ir.h>
-#include <tvm/operation.h>
-#include <nnvm/graph.h>
-#include <nnvm/node.h>
-#include <nnvm/pass_functions.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <mutex>
-#include <tuple>
-#include <vector>
-#include <limits>
-#include <unordered_map>
-#include "graph_hash.h"
-#include "compile_engine.h"
-
-namespace nnvm {
-namespace compiler {
-
-using namespace tvm;
-
-/*!
- * \brief Get type flag from TVM Type
- *
- * \param type the tvm type.
- * \return corresponding DLDataType
- */
-int GetTypeFlag(tvm::DataType type) {
- if (type == tvm::DataType::Float(32)) return 0;
- if (type == tvm::DataType::Float(64)) return 1;
- if (type == tvm::DataType::Float(16)) return 2;
- if (type == tvm::DataType::UInt(8)) return 3;
- if (type == tvm::DataType::Int(32)) return 4;
- if (type == tvm::DataType::Int(8)) return 5;
- if (type == tvm::DataType::Int(64)) return 6;
- if (type == tvm::DataType::Int(16)) return 7;
- if (type == tvm::DataType::UInt(16)) return 8;
- if (type == tvm::DataType::UInt(32)) return 9;
- if (type == tvm::DataType::UInt(64)) return 10;
- if (type == tvm::DataType::UInt(1)) return 11;
- LOG(FATAL) << "cannot convert " << type;
- return 0;
-}
-// convert from type flag to tvm type.
-DataType GetTVMType(int type_flag) {
- switch (type_flag) {
- case 0:
- return tvm::DataType::Float(32);
- case 1:
- return tvm::DataType::Float(64);
- case 2:
- return tvm::DataType::Float(16);
- case 3:
- return tvm::DataType::UInt(8);
- case 4:
- return tvm::DataType::Int(32);
- case 5:
- return tvm::DataType::Int(8);
- case 6:
- return tvm::DataType::Int(64);
- case 7:
- return tvm::DataType::Int(16);
- case 8:
- return tvm::DataType::UInt(16);
- case 9:
- return tvm::DataType::UInt(32);
- case 10:
- return tvm::DataType::UInt(64);
- case 11:
- return tvm::DataType::UInt(1);
- default:
- LOG(FATAL) << "unknown type_flag=" << type_flag;
- return DataType::Float(32);
- }
-}
-
-// internal compile engine
-class CompileEngine {
- public:
- static CompileEngine* Global() {
- static CompileEngine inst;
- return &inst;
- }
- // lower graph possible get back an cached op.
- GraphFunc Lower(Graph graph,
- const Array<tvm::Tensor>& inputs,
- const std::string& target,
- int master_idx) {
- GraphKey key = GraphKeyNode::make(graph, inputs, target);
- std::lock_guard<std::mutex> lock(mutex_);
- auto it = cache_.find(key);
- if (it != cache_.end()) {
- ++(it->second->use_count);
- return it->second->graph_func;
- }
- GraphFunc f = DoLower(key->graph, key->inputs, key->target, master_idx);
- auto n = tvm::make_node<GraphCacheEntryNode>();
- n->graph_func = f;
- n->use_count = 1;
- n->master_idx = master_idx;
- cache_[key] = GraphCacheEntry(n);
- return f;
- }
- // List all items in the cache.
- Array<NodeRef> ListCacheItems() {
- std::lock_guard<std::mutex> lock(mutex_);
- Array<NodeRef> items;
- for (auto& kv : cache_) {
- items.push_back(kv.first);
- auto n = tvm::make_node<GraphCacheEntryNode>(*(kv.second.operator->()));
- items.push_back(GraphCacheEntry(n));
- }
- return items;
- }
- // Find the function given graph key.
- GraphCacheEntry Find(const GraphKey& key) {
- std::lock_guard<std::mutex> lock(mutex_);
- auto it = cache_.find(key);
- if (it != cache_.end()) {
- return it->second;
- } else {
- return GraphCacheEntry();
- }
- }
- // Set the given function on given graph key.
- void Set(const GraphKey& key, GraphFunc func) {
- std::lock_guard<std::mutex> lock(mutex_);
- auto n = tvm::make_node<GraphCacheEntryNode>();
- n->graph_func = func;
- n->use_count = 1;
- cache_[key] = GraphCacheEntry(n);
- }
- // Clear the function cache.
- void Clear() {
- std::lock_guard<std::mutex> lock(mutex_);
- cache_.clear();
- }
-
- // get schedule and its args
- std::tuple<Schedule, Array<tvm::Tensor>, Graph>
- GetScheduleArgs(Graph graph,
- const Array<tvm::Tensor> &inputs,
- const std::string &target,
- int master_idx,
- std::string *readable_name,
- Array<tvm::Tensor> *outputs) {
- // shape, type
- static auto& fcompute =
- nnvm::Op::GetAttr<FTVMCompute>("FTVMCompute");
- static auto& fschedule =
- nnvm::Op::GetAttr<FTVMSchedule>("FTVMSchedule");
-
- std::vector<TShape> ishape;
- std::vector<int> idtype;
-
- for (const tvm::Tensor t : inputs) {
- std::vector<dim_t> shape;
- for (Expr v : t->shape) {
- CHECK(v.as<tvm::ir::IntImm>());
- shape.push_back(v.as<tvm::ir::IntImm>()->value);
- }
- ishape.emplace_back(TShape(shape.begin(), shape.end()));
- idtype.emplace_back(GetTypeFlag(t->dtype));
- }
- graph = pass::InferShape(graph, ishape);
- graph = pass::InferType(graph, idtype);
-
- const ShapeVector& shape_vec = graph.GetAttr<ShapeVector>("shape");
- const DTypeVector& dtype_vec = graph.GetAttr<DTypeVector>("dtype");
- const IndexedGraph& idx = graph.indexed_graph();
- CHECK_EQ(inputs.size(), idx.input_nodes().size());
-
- std::vector<tvm::Tensor> tensor_vec(idx.num_node_entries());
- for (size_t i = 0; i < idx.input_nodes().size(); ++i) {
- uint32_t nid = idx.input_nodes()[i];
- tensor_vec[idx.entry_id(nid, 0)] = inputs[i];
- }
-
- std::ostringstream readable_name_os;
- readable_name_os << "fuse";
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) continue;
- Array<Tensor> op_inputs, out_info;
- readable_name_os << "_" << inode.source->op()->name;
- // input array
- for (const IndexedGraph::NodeEntry& e : inode.inputs) {
- const tvm::Tensor& t = tensor_vec[idx.entry_id(e)];
- CHECK(t.defined());
- op_inputs.push_back(t);
- }
- // output hint
- for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
- Array<Expr> shape;
- for (int64_t x : shape_vec[idx.entry_id(nid, i)]) {
- CHECK_LE(x, static_cast<int64_t>(std::numeric_limits<int>::max()));
- shape.push_back(make_const(DataType::Int(32), x));
- }
- out_info.push_back(
- placeholder(shape,
- GetTVMType(dtype_vec[idx.entry_id(nid, i)])));
- }
- // get default
- Array<Tensor> out = fcompute[inode.source->op()](
- inode.source->attrs, op_inputs, out_info);
- CHECK_EQ(out.size(), inode.source->num_outputs());
-
- // check output dimentions also match
- // This check is to make sure the NNVM operator Infer match with Compute result.
- // Missing this check may pass the build but leads to runtime errors.
- for (uint32_t i = 0; i < out.size(); ++i) {
- CHECK_EQ(out[i].ndim(), out_info[i].ndim()) << inode.source->op()->name;
- tvm::Tensor inferred_tensor = out[i];
- tvm::Tensor computed_tensor = out_info[i];
- for (uint32_t j = 0; j < inferred_tensor->shape.size(); ++j) {
- if ((as_const_int(inferred_tensor->shape[j])) &&
- (as_const_int(computed_tensor->shape[j])))
- CHECK_EQ((*as_const_int(inferred_tensor->shape[j])),
- (*as_const_int(computed_tensor->shape[j]))) << inode.source->op()->name;
- }
- }
-
- // schedule on root node, and use master's schedule
- for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
- uint32_t eid = idx.entry_id(nid, index);
- tensor_vec[eid] = out[index];
- }
- }
- // Schedule on final output.
- Array<Tensor> all_args = inputs;
- Array<Tensor> outs;
- for (const IndexedGraph::NodeEntry& e : idx.outputs()) {
- const tvm::Tensor& t = tensor_vec[idx.entry_id(e)];
- CHECK(t.defined());
- outs.push_back(t);
- all_args.push_back(t);
- }
-
- Schedule sch = fschedule[idx[master_idx].source->op()](
- idx[master_idx].source->attrs, outs, target);
-
- // store extra return values
- if (readable_name != nullptr) {
- *readable_name = readable_name_os.str();
- }
- if (outputs != nullptr) {
- *outputs = outs;
- }
-
- return std::make_tuple(sch, all_args, graph);
- }
-
- // run the actual lowering process
- GraphFunc DoLower(Graph graph,
- const Array<tvm::Tensor>& inputs,
- const std::string& target,
- int master_idx) {
- std::string readable_name;
- Array<tvm::Tensor> all_args;
- Array<tvm::Tensor> outputs;
- Schedule sch;
-
- std::tie(sch, all_args, graph) = GetScheduleArgs(
- graph, inputs, target, master_idx,
- &readable_name, &outputs);
-
- auto gf = tvm::make_node<GraphFuncNode>();
- gf->target = target;
- gf->func_name = GetUniqeName(readable_name);
- gf->inputs = inputs;
- gf->outputs = outputs;
- static const PackedFunc& flower = GetPackedFunc("nnvm.compiler.lower");
- gf->funcs = flower(sch, all_args, gf->func_name, graph);
- return GraphFunc(gf);
- }
-
- private:
- // Get unique name
- std::string GetUniqeName(std::string name) {
- while (true) {
- auto it = name_map_.find(name);
- if (it == name_map_.end()) {
- name_map_[name] = 1;
- return name;
- } else {
- std::ostringstream os;
- os << name << "_" << it->second;
- ++(it->second);
- name = os.str();
- }
- }
- return name;
- }
-
- // global mutex
- std::mutex mutex_;
- // the name map
- std::unordered_map<std::string, int> name_map_;
- // the compiler cache
- std::unordered_map<GraphKey, GraphCacheEntry,
- GraphKeyHash, GraphKeyEqual> cache_;
-};
-
-GraphFunc GraphLower(Graph graph,
- const Array<tvm::Tensor>& inputs,
- const std::string& target,
- int master_idx) {
- return CompileEngine::Global()->Lower(
- graph, inputs, target, master_idx);
-}
-
-// Expose cache to front end
-TVM_REGISTER_GLOBAL("nnvm.compiler.ListCacheItems")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
- *rv = CompileEngine::Global()->ListCacheItems();
- });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler.ClearCache")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
- CompileEngine::Global()->Clear();
- });
-
-// NOTE: this involves graph lookup and can be slow
-TVM_REGISTER_GLOBAL("nnvm.compiler.GetCacheItem")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
- *rv = CompileEngine::Global()->Find(args[0]);
- });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler.SetCacheItem")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
- CompileEngine::Global()->Set(args[0], args[1]);
- });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler.GraphKeyGetGraph")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
- *rv = args[0].operator GraphKey()->graph;
- });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler.MakeGraphKey")
-.set_body_typed(GraphKeyNode::make);
-
-// This can be used to extract workloads from nnvm compiler
-TVM_REGISTER_GLOBAL("nnvm.compiler.CacheItem2ScheduleArgs")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- Array<tvm::NodeRef> item = args[0];
-
- const GraphKeyNode *key = reinterpret_cast<const GraphKeyNode *>(item[0].get());
- const GraphCacheEntryNode *value = reinterpret_cast<const GraphCacheEntryNode *>(item[1].get());
-
- // extract arguments from cached item
- Graph graph = key->graph;
- const Array<tvm::Tensor> &inputs = key->inputs;
- std::string target = args[1];
- int master_idx = value->master_idx;
-
- Schedule sch;
- Array<tvm::Tensor> all_args;
- std::tie(sch, all_args, graph) =
- CompileEngine::Global()->GetScheduleArgs(
- graph, inputs, target, master_idx, nullptr, nullptr);
-
- Array<tvm::NodeRef> ret;
- ret.push_back(sch);
- ret.push_back(all_args);
- *rv = ret;
- });
-
-TVM_REGISTER_NODE_TYPE(GraphFuncNode);
-TVM_REGISTER_NODE_TYPE(GraphCacheEntryNode);
-
-TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable)
-.set_dispatch<GraphFuncNode>([](const ObjectRef& ref, IRPrinter* p) {
- auto* op = static_cast<const GraphFuncNode*>(ref.get());
- p->stream << "GraphFunc(name=" << op->func_name
- << ", addr=" << op << ")";
-});
-
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file compile_engine.h
- * \brief Internal engine to compile a subgraph fragment and cache compilation.
- */
-#ifndef NNVM_COMPILER_COMPILE_ENGINE_H_
-#define NNVM_COMPILER_COMPILE_ENGINE_H_
-
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/tuple.h>
-#include <nnvm/pass.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/operation.h>
-#include <tvm/lowered_func.h>
-#include <string>
-#include <utility>
-#include "graph_hash.h"
-
-namespace nnvm {
-namespace compiler {
-
-/*! \brief A TVM Node to represent compiled graph function */
-struct GraphFuncNode : public tvm::Node {
- /* \brief compiled target */
- std::string target;
- /*! \brief Function name */
- std::string func_name;
- /* \brief The inputs to the function */
- tvm::Array<Tensor> inputs;
- /* \brief The outputs to the function */
- tvm::Array<Tensor> outputs;
- /*! \brief The lowered functions */
- tvm::Array<tvm::LoweredFunc> funcs;
-
- void VisitAttrs(tvm::AttrVisitor* v) {
- v->Visit("target", &target);
- v->Visit("func_name", &func_name);
- v->Visit("inputs", &inputs);
- v->Visit("outputs", &outputs);
- v->Visit("funcs", &funcs);
- }
-
- static constexpr const char* _type_key = "GraphFunc";
- TVM_DECLARE_NODE_TYPE_INFO(GraphFuncNode, tvm::Node);
-};
-
-TVM_DEFINE_NODE_REF(GraphFunc, GraphFuncNode);
-
-/*! \brief Cache Entry in the graph */
-struct GraphCacheEntryNode : public tvm::Node {
- /*! \brief The graph function */
- GraphFunc graph_func;
- /*! \brief Usage statistics */
- int use_count{0};
- /*! \brief Index of the master node for calling schedule*/
- int master_idx;
-
- void VisitAttrs(tvm::AttrVisitor* v) {
- v->Visit("graph_func", &graph_func);
- v->Visit("use_count", &use_count);
- v->Visit("master_idx", &master_idx);
- }
- static constexpr const char* _type_key = "GraphCacheEntry";
- TVM_DECLARE_NODE_TYPE_INFO(GraphCacheEntryNode, tvm::Node);
-};
-
-class GraphCacheEntry : public ::tvm::NodeRef {
- public:
- GraphCacheEntry() {}
- explicit GraphCacheEntry(::tvm::NodePtr<::tvm::Node> n) : NodeRef(n) {}
- GraphCacheEntryNode* operator->() {
- return static_cast<GraphCacheEntryNode*>(get_mutable());
- }
- using ContainerType = GraphCacheEntryNode;
-};
-
-/*!
- * \brief Call compile engine to lower a graph with given inputs.
- *
- * \param graph The graph to be compiled
- * \param inputs The input specification.
- * \param target The build target
- * \param master_idx The index of master node for calling schedule
- *
- * \return func A lowered tvm function.
- */
-GraphFunc GraphLower(Graph graph,
- const Array<tvm::Tensor>& inputs,
- const std::string& target,
- int master_idx);
-
-/*!
- * \brief Get type flag from TVM Type
- *
- * \param type the tvm type
- * \return corresponding DLDataType
- */
-int GetTypeFlag(tvm::DataType type);
-
-/*!
- * \brief Get TVM Type from type flag
- *
- * \param type_flag the type flag
- * \return corresponding TVM type
- */
-tvm::DataType GetTVMType(int type_flag);
-
-} // namespace compiler
-} // namespace nnvm
-
-#endif // NNVM_COMPILER_COMPILE_ENGINE_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file fold_scale_axis.cc
- * \author Fold scaling parameter of axis into weight of conv/dense
-*/
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "pattern_util.h"
-#include "graph_transform.h"
-
-namespace nnvm {
-namespace compiler {
-
-enum FoldScaleKind {
- // No folding is applied
- kNone,
- // The folding decision is pending, we can fold on a state.
- kPending,
- // The original operator that contains the scale.
- kProvider,
- // The final conumer of axis scale using multiply
- // Likely be a conv or dense operator.
- kMulConsumer,
- // The final conumer of axis scale using division
- kDivConsumer
-};
-
-struct FoldChainInfo {
- // Entry kind
- FoldScaleKind kind{kNone};
- // The output axis to be folded
- int axis{0};
- // Source node in the fold chain
- int source{0};
-};
-
-// The entry of folding chains on which
-// we should perform folding on
-struct FoldChainEntry {
- // Fold information
- FoldChainInfo info;
- // Number of outgoing fork count
- // in forward propagation.
- int fork_count{0};
- // Following field only used by provider.
- // The input index
- int fold_input_index{1};
- // The scale entry
- NodeEntry scale_entry;
-};
-
-// Try to pass axis scaling to backward,
-// Given that we we know the status of current fold axis.
-// return whether the forward signal is consumed.
-using FScaleAxisBackward = std::function<
- bool(const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- const FoldChainInfo& out_info,
- std::vector<FoldChainInfo>* in_info)>;
-
-
-// Try to pass axis scaling to forward,
-// Given that we we know the status of one of its input to be pending
-// also update other input info
-// return whether the forward signal is consumed.
-using FScaleAxisForward = std::function<
- bool(const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- std::vector<FoldChainInfo>* in_info,
- FoldChainInfo* out_info)>;
-
-
-// Detect if there is a scaling axis happening
-bool DetectScaleAxis(const IndexedGraph& idx,
- uint32_t nid,
- const ShapeVector& shape_vec,
- const std::vector<uint32_t>& ref_count,
- bool is_forward,
- std::vector<FoldChainEntry>* chain) {
- const IndexedGraph::Node& inode = idx[nid];
- static const Op* bcast_mul = Op::Get("broadcast_mul");
- static const Op* expand_dims = Op::Get("expand_dims");
- if (inode.source->op() != bcast_mul) return false;
- const TShape& oshape = shape_vec[idx.entry_id(nid, 0)];
- CHECK_NE(oshape.ndim(), 0);
- if (oshape.ndim() <= 1) return false;
- for (int i = 0; i < 2; ++i) {
- const IndexedGraph::NodeEntry& a = inode.inputs[i];
- const IndexedGraph::NodeEntry& b = inode.inputs[1 - i];
- std::pair<int, int> axis =
- MatchBroadcast1DAxis(oshape, shape_vec[idx.entry_id(a)]);
- if (axis.first != -1 &&
- shape_vec[idx.entry_id(b)] == oshape) {
- if (ref_count[a.node_id] != 1) return false;
- if (is_forward && ref_count[nid] != 1) return false;
- if (!is_forward && ref_count[b.node_id] != 1) return false;
- const IndexedGraph::Node& anode = idx[a.node_id];
- // mark the current entry.
- FoldChainEntry& e = (*chain)[nid];
- if (anode.source->is_variable()) {
- e.fold_input_index = 1 - i;
- e.scale_entry = inode.source->inputs[1 - i];
- } else if (anode.source->op() == expand_dims &&
- shape_vec[idx.entry_id(anode.source->inputs[0])].ndim() == 1) {
- e.fold_input_index = 1 - i;
- e.scale_entry = anode.source->inputs[0];
- } else {
- return false;
- }
- e.info.axis = axis.first;
- e.info.kind = kPending;
- e.info.source = nid;
- e.fork_count = 1;
- // In the backward message passing
- // We need to eagerly pass it to the input
- // In the forward message passing
- // we will "pull" the message from input.
- if (!is_forward) {
- FoldChainEntry& enext = (*chain)[b.node_id];
- enext.info.axis = e.info.axis;
- enext.info.kind = kPending;
- enext.info.source = nid;
- }
- return true;
- }
- }
- return false;
-}
-
-Graph FoldScaleAxis(Graph src) {
- // Operator pattern
- static auto& fbackward =
- nnvm::Op::GetAttr<FScaleAxisBackward>("FScaleAxisBackward");
- static auto& fforward =
- nnvm::Op::GetAttr<FScaleAxisForward>("FScaleAxisForward");
- const IndexedGraph& idx = src.indexed_graph();
- const ShapeVector& shape_vec = src.GetAttr<ShapeVector>("shape");
- std::vector<uint32_t> ref_count = GetNodeRefCounts(idx);
- std::vector<FoldChainEntry> bwd_chain(idx.num_nodes());
- std::vector<FoldChainEntry> fwd_chain(idx.num_nodes());
- // shape hint for the inference.
- std::vector<TShape> in_shape, out_shape;
-
- // perform backward folding.
- for (uint32_t i = idx.num_nodes(); i != 0; --i) {
- uint32_t nid = i - 1;
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) continue;
- if (DetectScaleAxis(idx, nid, shape_vec,
- ref_count, false, &bwd_chain)) continue;
- if (bwd_chain[nid].info.kind != kPending) continue;
- // if referred by multiple node, cannot do propagation
- if (ref_count[nid] != 1 || !fbackward.count(inode.source->op())) {
- bwd_chain[nid].info.kind = kNone; continue;
- }
- // get input shape and output shape.
- in_shape.clear(); out_shape.clear();
- for (const IndexedGraph::NodeEntry& e : inode.inputs) {
- in_shape.push_back(shape_vec[idx.entry_id(e)]);
- }
- for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
- out_shape.push_back(shape_vec[idx.entry_id(nid, i)]);
- }
- std::vector<FoldChainInfo> in_info(in_shape.size(), FoldChainInfo());
- bool consumed = fbackward[inode.source->op()](
- inode.source->attrs,
- in_shape,
- out_shape,
- bwd_chain[nid].info,
- &in_info);
- CHECK_EQ(in_info.size(), in_shape.size());
- // propagate back.
- bool can_prop = true;
- for (size_t i = 0; i < in_info.size(); ++i) {
- const IndexedGraph::NodeEntry& e = inode.inputs[i];
- if (ref_count[e.node_id] != 1 ||
- idx[e.node_id].source->num_outputs() != 1) {
- can_prop = false; break;
- }
- }
- if (!can_prop) continue;
- for (size_t i = 0; i < in_info.size(); ++i) {
- const IndexedGraph::NodeEntry& e = inode.inputs[i];
- bwd_chain[e.node_id].info = in_info[i];
- }
- // mark consumed by making the source as provider.
- if (consumed) {
- bwd_chain[bwd_chain[nid].info.source].info.kind = kProvider;
- }
- }
-
-
- // perform forward folding.
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) continue;
- // skip scales that are already folded in backward.
- if (bwd_chain[nid].info.kind == kProvider) continue;
- if (DetectScaleAxis(idx, nid, shape_vec,
- ref_count, true, &fwd_chain)) continue;
- if (inode.source->num_outputs() != 1) continue;
- // Do state update
- // get input shape and output shape.
- std::vector<FoldChainInfo> in_info;
- FoldChainInfo out_info;
- int num_inpending = 0;
- in_shape.clear(); out_shape.clear();
- for (const IndexedGraph::NodeEntry& e : inode.inputs) {
- in_shape.push_back(shape_vec[idx.entry_id(e)]);
- // input information
- in_info.push_back(fwd_chain[e.node_id].info);
- if (fwd_chain[e.node_id].info.kind == kPending) {
- ++num_inpending;
- }
- }
- for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
- out_shape.push_back(shape_vec[idx.entry_id(nid, i)]);
- }
- if (num_inpending != 1 ||
- !fforward.count(inode.source->op())) continue;
- bool consumed = fforward[inode.source->op()](
- inode.source->attrs,
- in_shape,
- out_shape,
- &in_info,
- &out_info);
- // update input info
- for (size_t i = 0; i < in_info.size(); ++i) {
- fwd_chain[inode.inputs[i].node_id].info = in_info[i];
- }
- if (consumed) {
- fwd_chain[nid].info = out_info;
- for (size_t i = 0; i < in_info.size(); ++i) {
- if (in_info[i].kind == kPending) {
- if (--fwd_chain[in_info[i].source].fork_count == 0) {
- fwd_chain[in_info[i].source].info.kind = kProvider;
- }
- }
- }
- } else {
- // can propagate condition
- if (inode.source->num_outputs() == 1) {
- fwd_chain[nid].info = out_info;
- if (out_info.kind == kPending) {
- // When there is multiple reference to input
- // every path have to be consumed
- fwd_chain[out_info.source].fork_count += ref_count[nid] - 1;
- }
- }
- }
- }
-
- auto transform = [&](uint32_t nid, const NodePtr& n, std::vector<NodeEntry>* ret) {
- NodeEntry rvalue = NodeEntry{n, 0, 0};
- {
- // Backward chain
- const FoldChainEntry& e = bwd_chain[nid];
- if (e.info.kind == kMulConsumer &&
- bwd_chain[e.info.source].info.kind == kProvider) {
- const FoldChainEntry& se = bwd_chain[e.info.source];
- CHECK_EQ(n->num_outputs(), 1);
- NodeEntry scale = ExpandBiasToMatchAxis(
- se.scale_entry,
- shape_vec[idx.entry_id(nid, 0)].ndim(),
- shape_vec[idx.entry_id(se.scale_entry)].ndim(),
- e.info.axis);
- rvalue = MakeNode("broadcast_mul", n->attrs.name + "_sc",
- {rvalue, scale});
- } else if (e.info.kind == kProvider) {
- rvalue = n->inputs[e.fold_input_index];
- }
- }
- // Note that the value might get transformed twice if it
- // folds value from both fwd and backward chain.
- {
- // forward chain
- const FoldChainEntry& e = fwd_chain[nid];
- if (e.info.kind == kMulConsumer &&
- fwd_chain[e.info.source].info.kind == kProvider) {
- const FoldChainEntry& se = fwd_chain[e.info.source];
- CHECK_EQ(n->num_outputs(), 1);
- NodeEntry scale = ExpandBiasToMatchAxis(
- se.scale_entry,
- shape_vec[idx.entry_id(nid, 0)].ndim(),
- shape_vec[idx.entry_id(se.scale_entry)].ndim(),
- e.info.axis);
- rvalue = MakeNode("broadcast_mul", n->attrs.name + "_sc",
- {rvalue, scale});
- } else if (e.info.kind == kDivConsumer &&
- fwd_chain[e.info.source].info.kind == kProvider) {
- const FoldChainEntry& se = fwd_chain[e.info.source];
- CHECK_EQ(n->num_outputs(), 1);
- NodeEntry scale = ExpandBiasToMatchAxis(
- se.scale_entry,
- shape_vec[idx.entry_id(nid, 0)].ndim(),
- shape_vec[idx.entry_id(se.scale_entry)].ndim(),
- e.info.axis);
- rvalue = MakeNode("broadcast_div", n->attrs.name + "_sc",
- {rvalue, scale});
- } else if (e.info.kind == kProvider) {
- rvalue = n->inputs[e.fold_input_index];
- }
- }
- if (rvalue.node == n) {
- return false;
- } else {
- *ret = {rvalue};
- return true;
- }
- };
- return GraphTransform(src, transform);
-}
-
-NNVM_REGISTER_PASS(FoldScaleAxis)
-.set_body(FoldScaleAxis);
-
-// property registration.
-bool ReluScaleAxisBackward(
- const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- const FoldChainInfo& out_info,
- std::vector<FoldChainInfo>* in_axis) {
- (*in_axis)[0] = out_info;
- return false;
-}
-
-bool ReluScaleAxisForward(
- const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- std::vector<FoldChainInfo>* in_info,
- FoldChainInfo* out_info) {
- *out_info = (*in_info)[0];
- return false;
-}
-
-NNVM_REGISTER_OP(relu)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", ReluScaleAxisBackward);
-
-NNVM_REGISTER_OP(leaky_relu)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", ReluScaleAxisBackward);
-
-NNVM_REGISTER_OP(relu)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", ReluScaleAxisForward);
-
-NNVM_REGISTER_OP(leaky_relu)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", ReluScaleAxisForward);
-
-// property registration.
-template <typename T>
-bool Pool2DBackward(
- const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- const FoldChainInfo& out_info,
- std::vector<FoldChainInfo>* in_axis) {
- const T& param = nnvm::get<T>(attrs.parsed);
- if (out_info.axis == 1 && param.layout == "NCHW") {
- (*in_axis)[0] = out_info;
- }
- return false;
-}
-
-template <typename T>
-bool Pool2DForward(
- const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- std::vector<FoldChainInfo>* in_info,
- FoldChainInfo* out_info) {
- const T& param = nnvm::get<T>(attrs.parsed);
- if ((*in_info)[0].axis == 1 && param.layout == "NCHW") {
- *out_info = (*in_info)[0];
- }
- return false;
-}
-
-NNVM_REGISTER_OP(max_pool2d)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", Pool2DBackward<top::MaxPool2DParam>);
-
-NNVM_REGISTER_OP(avg_pool2d)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", Pool2DBackward<top::AvgPool2DParam>);
-
-NNVM_REGISTER_OP(max_pool2d)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", Pool2DForward<top::MaxPool2DParam>);
-
-NNVM_REGISTER_OP(avg_pool2d)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", Pool2DForward<top::AvgPool2DParam>);
-
-
-
-bool BroadcastAddSubScaleAxisBackward(
- const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- const FoldChainInfo& out_info,
- std::vector<FoldChainInfo>* in_axis) {
- if (out_info.kind != kPending) return false;
- for (int i = 0; i < 2; ++i) {
- std::pair<int, int> m = MatchBroadcast1DAxis(out_shape[0], in_shape[1 - i]);
- if (m.second != -1 &&
- in_shape[i] == out_shape[0] &&
- m.first == out_info.axis) {
- (*in_axis)[i].kind = kPending;
- (*in_axis)[i].axis = out_info.axis;
- (*in_axis)[i].source = out_info.source;
- (*in_axis)[1 - i].kind = kMulConsumer;
- (*in_axis)[1 - i].axis = m.second;
- (*in_axis)[1 - i].source = out_info.source;
- return false;
- }
- }
- return false;
-}
-
-bool BroadcastAddSubScaleAxisForward(
- const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- std::vector<FoldChainInfo>* in_info,
- FoldChainInfo* out_info) {
- for (int i = 0; i < 2; ++i) {
- if ((*in_info)[i].kind == kPending) {
- std::pair<int, int> m = MatchBroadcast1DAxis(out_shape[0], in_shape[1 - i]);
- if (m.second != -1 &&
- in_shape[i] == out_shape[0] &&
- m.first == (*in_info)[i].axis) {
- out_info->kind = kPending;
- out_info->axis = m.first;
- out_info->source = (*in_info)[i].source;
- (*in_info)[1 - i].kind = kDivConsumer;
- (*in_info)[1 - i].axis = m.second;
- (*in_info)[1 - i].source = (*in_info)[i].source;
- return false;
- }
- }
- }
- return false;
-}
-
-NNVM_REGISTER_OP(broadcast_add)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", BroadcastAddSubScaleAxisBackward);
-
-NNVM_REGISTER_OP(broadcast_sub)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", BroadcastAddSubScaleAxisBackward);
-
-NNVM_REGISTER_OP(broadcast_add)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", BroadcastAddSubScaleAxisForward);
-
-NNVM_REGISTER_OP(broadcast_sub)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", BroadcastAddSubScaleAxisForward);
-
-bool Conv2DScaleAxisBackward(
- const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- const FoldChainInfo& out_info,
- std::vector<FoldChainInfo>* in_axis) {
- using top::Conv2DParam;
- const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
- if (out_info.kind != kPending) return false;
- // only optimize for kernel layout OIHW for now
- if (param.kernel_layout == "OIHW" && out_info.axis == 1) {
- (*in_axis)[1].kind = kMulConsumer;
- (*in_axis)[1].axis = 0;
- (*in_axis)[1].source = out_info.source;
- if (param.use_bias) {
- (*in_axis)[2].kind = kMulConsumer;
- (*in_axis)[2].axis = 0;
- (*in_axis)[2].source = out_info.source;
- }
- return true;
- } else {
- return false;
- }
-}
-
-bool Conv2DScaleAxisForward(
- const NodeAttrs& attrs,
- const std::vector<TShape>& in_shape,
- const std::vector<TShape>& out_shape,
- std::vector<FoldChainInfo>* in_info,
- FoldChainInfo* out_info) {
- using top::Conv2DParam;
- const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
- if ((*in_info)[0].kind != kPending) return false;
- // only optimize for nchw for now
- if (param.kernel_layout == "OIHW" && (*in_info)[0].axis == 1) {
- // Check whether it is depthwise conv2d
- if (param.use_bias) {
- CHECK_EQ(in_shape.size(), 3U) << "Input:[data, weight, bias]";
- } else {
- CHECK_EQ(in_shape.size(), 2U) << "Input:[data, weight]";
- }
-
- auto dshape = in_shape.at(0);
- CHECK_EQ(dshape.ndim(), 4U) << "Input data shape should be 4D";
-
- // TODO(FrozenGene): Currently, we don't support conv2d's groups != in channels.
- if (param.groups > 1 && dshape[1] != param.groups) {
- LOG(WARNING) << "FoldScaleAxis optimization doesn't support conv2d "
- << "with groups != in channels. We will skip FoldScaleAxis "
- << "optimization for this op.";
- return false;
- }
-
-
- // input channel equals to groups, which means depthwise conv2d
- bool is_depthwise_conv2d = (dshape[1] == param.groups);
-
- // if it is depthwise convolution, the weight fold axis should along to axis 0.
- // For example:
- // data shape [1,54,63,127] weights shape [54,1,3,3], scale shape [54]
- // depthwise convolution's weights shape means we have divided the data shape's channel
- // to groups parties. Here, we divide 54 channels into 54 parties. Every part size is 1.
- // weights shape's first dimision means how many parties we have divided (mapping to
- // input shape's channel). So, in the depthwise convolution, we shouldn't do like
- // traditional convolution(i.e. OIHW)
-
- // Backgroud of this algorithm:
-
- // Original Graph:
- // Graph(%x,
- // %in_scale,
- // %weight,
- // %bias,
- // %out_scale) {
- // %1 = __add_scalar__(%x, scalar='1')
- // %3 = expand_dims(%in_scale, num_newaxis='2', axis='1')
- // %4 = broadcast_mul(%1, %3)
- // %7 = conv2d(%4, %weight, %bias, padding='(1, 1)', kernel_size='(3, 3)', channels='2')
- // %8 = relu(%7)
- // %10 = expand_dims(%out_scale, num_newaxis='2', axis='1')
- // %11 = broadcast_mul(%8, %10)
- // ret %11
- // }
-
- // Optimized Graph:
- // Graph(%x,
- // %weight,
- // %out_scale,
- // %in_scale,
- // %bias) {
- // %1 = __add_scalar__(%x, scalar='1')
- // %4 = expand_dims(%out_scale, num_newaxis='3', axis='1')
- // %5 = broadcast_mul(%weight, %4)
- // %7 = expand_dims(%in_scale, num_newaxis='2', axis='1')
- // %8 = broadcast_mul(%5, %7)
- // %10 = broadcast_mul(%bias, %out_scale)
- // %11 = conv2d(%1, %8, %10, padding='(1, 1)', kernel_size='(3, 3)', channels='2')
- // %12 = relu(%11)
- // ret %12
- // }
-
- // Conv2DScaleAxisForward will need in_scale. Conv2DScaleAxisBackward will need out_scale.
- // in_scale will apply into input data's channel (in_channel). out_scale will apply in
- // conv2d's result, which will apply in weight's output channel.
- // So, default Conv2DScaleAxisForward will fold axis 1 (weights' input channel).
- // Conv2DScaleAxisBackward will fold axis 0 (weights' output channel).
- // But depthwise convolution is another story as said previously.
- (*in_info)[1].kind = kMulConsumer;
- (*in_info)[1].axis = is_depthwise_conv2d ? 0 : 1;
- (*in_info)[1].source = (*in_info)[0].source;
- return true;
- } else {
- return false;
- }
-}
-
-NNVM_REGISTER_OP(conv2d)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", Conv2DScaleAxisBackward);
-
-NNVM_REGISTER_OP(conv2d)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", Conv2DScaleAxisForward);
-
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_compile.cc
- * \brief Compile a graph. It lowers the graph nodes into low level IR.
- */
-
-#include <dmlc/parameter.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <nnvm/graph.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/pass_functions.h>
-#include <nnvm/tuple.h>
-#include <tvm/lowered_func.h>
-#include <tvm/runtime/packed_func.h>
-
-#include "compile_engine.h"
-#include "graph_fuse.h"
-#include "graph_runtime.h"
-#include "pattern_util.h"
-
-namespace nnvm {
-namespace compiler {
-
-using namespace tvm;
-
-// Decorate the result of PlanMemory
-// This function does two things:
-// - Give separate memory to each variable.
-// - Tie the memory of output/lhs in assign node properly
-// so the execution of assign can have side effect.
-nnvm::Graph DecorateMemoryPlan(
- nnvm::Graph g,
- const std::vector<int>& assign_flag) {
- const IndexedGraph& idx = g.indexed_graph();
- StorageVector storage_vec = g.MoveCopyAttr<StorageVector>("storage_id");
- g.attrs.erase("storage_allocated_bytes");
- g.attrs.erase("storage_inplace_index");
- size_t num_not_allocated = g.MoveCopyAttr<size_t>(
- "storage_num_not_allocated");
- CHECK_EQ(num_not_allocated, 0U)
- << "Can only build inference graph with all statically allocated memory";
-
- // Reassign variable id so that they are different.
- int max_id = 0;
- for (size_t i = 0; i < storage_vec.size(); ++i) {
- max_id = std::max(storage_vec[i] + 1, max_id);
- }
- for (uint32_t nid : idx.input_nodes()) {
- storage_vec[idx.entry_id(nid, 0)] = max_id++;
- }
- // Tie up the assign node storage properly.
- for (uint32_t nid = 0 ; nid < idx.num_nodes(); ++nid) {
- if (assign_flag[nid] == 0) continue;
- const auto& inode = idx[nid];
- int var_storage_id = storage_vec[idx.entry_id(inode.inputs[0])];
- storage_vec[idx.entry_id(nid, 0)] = var_storage_id;
-
- if (assign_flag[nid] == 2) {
- storage_vec[idx.entry_id(inode.inputs[1])] = var_storage_id;
- }
- }
- g.attrs["storage_id"] = std::make_shared<any>(std::move(storage_vec));
- return g;
-}
-
-nnvm::Graph GraphCompile(const nnvm::Graph& g) {
- // Get attributes from the graph.
- const ShapeVector& shape_vec = g.GetAttr<ShapeVector>("shape");
- const DTypeVector& dtype_vec = g.GetAttr<DTypeVector>("dtype");
- const GroupVec& group_vec = g.GetAttr<GroupVec>("group_root");
- const MasterVec& master_vec = g.GetAttr<MasterVec>("group_master");
- const PatternVec& pattern_vec = g.GetAttr<PatternVec>("pattern");
-
- CHECK(g.HasAttr("fused_entry")) << "Fusion hasn't been applied yet.";
- FuseEntryVec fuse_entries = g.GetAttr<FuseEntryVec>("fused_entry");
-
- std::string target = g.GetAttr<std::string>("target");
- std::string target_host;
-
- if (g.HasAttr("target_host")) {
- target_host = g.GetAttr<std::string>("target_host");
- }
- // Specially handle assign.
- const nnvm::Op* assign_op = nnvm::Op::Get("_assign");
-
- // Start lowering.
- Array<tvm::LoweredFunc> func_list;
- std::unordered_set<const tvm::Node*> func_set;
- const IndexedGraph& idx = g.indexed_graph();
-
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) continue;
- int root_id = group_vec[nid];
- if (static_cast<int>(nid) != root_id) continue;
- int master = master_vec[root_id];
- FuseEntry& fe = fuse_entries[root_id];
-
- const IndexedGraph& subidx = fe.subgraph.indexed_graph();
- CHECK_EQ(subidx.input_nodes().size(), fe.imap.size());
- CHECK_EQ(subidx.input_nodes().size(), fe.input_info.size());
-
- Array<Tensor> inputs;
- for (uint32_t sub_input_id : subidx.input_nodes()) {
- auto it = fe.input_info.find(subidx[sub_input_id].source);
- inputs.push_back(it->second);
- }
- // Find master idx in the subgraph.
- int sub_master_idx = -1;
- for (uint32_t i = 0; i < subidx.num_nodes(); i++) {
- if (subidx[i].source->op() == idx[master].source->op()) {
- sub_master_idx = i;
- break;
- }
- }
- CHECK_NE(sub_master_idx, -1) << "A master node not found in the subgraph.";
- fe.compiled_func = GraphLower(fe.subgraph, inputs, target, sub_master_idx);
- for (LoweredFunc f : fe.compiled_func->funcs) {
- if (!func_set.count(f.get())) {
- func_set.insert(f.get());
- func_list.push_back(f);
- }
- }
- }
-
- const nnvm::Op* tvm_op = nnvm::Op::Get("tvm_op");
-
- std::unordered_map<uint32_t, nnvm::NodePtr> old_new;
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) {
- // Only copy name since that is sufficient.
- nnvm::NodePtr np = nnvm::Node::Create();
- np->attrs.name = inode.source->attrs.name;
- old_new[nid] = np;
- continue;
- }
- int root_id = group_vec[nid];
- if (static_cast<int>(nid) != root_id) continue;
-
- // Handle normal op
- FuseEntry& fe = fuse_entries[root_id];
- const IndexedGraph& subidx = fe.subgraph.indexed_graph();
- nnvm::NodePtr np = nnvm::Node::Create();
- np->attrs.op = tvm_op;
- np->attrs.name = inode.source->attrs.name;
- TVMOpParam param;
- param.func_name = fe.compiled_func->func_name;
- param.num_inputs = static_cast<uint32_t>(fe.imap.size());
- param.num_outputs = static_cast<uint32_t>(fe.subgraph.outputs.size());
- param.flatten_data = fe.flatten_data;
- param.UpdateDict(&(np->attrs.dict));
- np->attrs.parsed = std::move(param);
-
- for (uint32_t sub_input_id : subidx.input_nodes()) {
- // Need to make sure subgraph input order is consistent to the order of
- // the graph input.
- auto rit = fe.reverse_imap.find(subidx[sub_input_id].source);
- CHECK(rit != fe.reverse_imap.end());
- const IndexedGraph::NodeEntry& e = rit->second;
- auto it = old_new.find(e.node_id);
- CHECK(it != old_new.end())
- << "cannot find node_id=" << e.node_id;
- np->inputs.emplace_back(
- nnvm::NodeEntry{it->second, e.index, e.version});
- }
- for (const uint32_t node_id : inode.control_deps) {
- auto it = old_new.find(node_id);
- CHECK(it != old_new.end());
- np->control_deps.emplace_back(it->second);
- }
- old_new[nid] = np;
- }
- nnvm::Graph ret;
- for (const auto& e : idx.outputs()) {
- auto it = old_new.find(group_vec[e.node_id]);
- CHECK(it != old_new.end())
- << "cannot find node_id=" << e.node_id;
- ret.outputs.emplace_back(
- nnvm::NodeEntry{it->second, e.index, e.version});
- }
-
- // Reference counter of each op node.
- // For now, always store result when an op is referred more than once.
- std::vector<uint32_t> ref_count = GetNodeRefCounts(idx);
- for (const auto& e : idx.outputs()) {
- // This line will realize all the outputs.
- ref_count[e.node_id] += 1;
- }
-
- const IndexedGraph& new_idx = ret.indexed_graph();
-
- // Handling assign:
- //
- // assign is a special operator that mutates the variable.
- // Currently assign is implemented as output = copy(input[1])
- // Then we run DecorageMemoryPlan to force
- // output.storage = input[0].storage
- //
- std::vector<int> assign_flag(new_idx.num_nodes(), 0);
- ShapeVector new_shape_vec = ShapeVector(new_idx.num_node_entries(), TShape());
- DTypeVector new_dtype_vec = DTypeVector(new_idx.num_node_entries());
- std::vector<std::string> new_dltype_vec(new_idx.num_node_entries());
-
- for (const auto& kv : old_new) {
- uint32_t nid = kv.first;
- const auto& inode = idx[nid];
- uint32_t new_nid = new_idx.node_id(kv.second.get());
- if (inode.source->op() == assign_op) {
- // Check if rhs of assign can be computed inplace.
- // If yes, we can simply set that memory to be assign target
- // and change assign to nop.
- const IndexedGraph::NodeEntry& rhs = inode.inputs[1];
- if (ref_count[rhs.node_id] <= 1 &&
- !(idx[rhs.node_id].source->is_variable()) &&
- pattern_vec[group_vec[rhs.node_id]] <= kBroadcast) {
- assign_flag[new_nid] = 2;
- TVMOpParam& param = dmlc::get<TVMOpParam>(kv.second->attrs.parsed);
- param.func_name = "__nop";
- param.UpdateDict(&(kv.second->attrs.dict));
- } else {
- assign_flag[new_nid] = 1;
- }
- }
- for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
- uint32_t new_eid = new_idx.entry_id(new_idx.node_id(kv.second.get()), i);
- uint32_t old_eid = idx.entry_id(nid, i);
- new_shape_vec[new_eid] = shape_vec[old_eid];
- new_dtype_vec[new_eid] = dtype_vec[old_eid];
- new_dltype_vec[new_eid] = tvm::runtime::TVMType2String(
- GetDLType(dtype_vec[old_eid]));
- }
- }
- ret.attrs["shape"] = std::make_shared<any>(std::move(new_shape_vec));
- ret.attrs["dtype"] = std::make_shared<any>(std::move(new_dtype_vec));
- ret.attrs["dltype"] = std::make_shared<any>(std::move(new_dltype_vec));
-
- // Setup module
- static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target");
- tvm::runtime::Module module = fbuild(func_list, target, target_host);
- ret.attrs["module"] = std::make_shared<any>(std::move(module));
- ret = nnvm::ApplyPass(ret, "PlanMemory");
- ret = DecorateMemoryPlan(ret, assign_flag);
- return ret;
-}
-
-NNVM_REGISTER_PASS(GraphCompile)
- .set_body(GraphCompile)
- .depend_graph_attr("shape")
- .depend_graph_attr("dtype")
- .depend_graph_attr("fused_entry")
- .depend_graph_attr("group_root")
- .depend_graph_attr("pattern")
- .depend_graph_attr("group_master");
-
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_fuse.cc
- * \brief Fuse the operators together.
- */
-#include <dmlc/parameter.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <nnvm/graph.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/pass_functions.h>
-#include <nnvm/tuple.h>
-#include <tvm/lowered_func.h>
-#include <tvm/runtime/packed_func.h>
-#include <memory>
-#include <utility>
-#include <limits>
-#include <unordered_map>
-
-#include "graph_fuse.h"
-#include "graph_runtime.h"
-#include "pattern_util.h"
-
-namespace nnvm {
-namespace compiler {
-using namespace tvm;
-
-// Partition the graph into segments
-// Each segment will be compiled into one operator.
-// Also mark the property of the segment.
-nnvm::Graph GraphFindFusibleGroups(nnvm::Graph g) {
- const IndexedGraph& idx = g.indexed_graph();
- int opt_level = 2;
- if (g.attrs.count("opt_level") != 0) {
- opt_level = g.MoveCopyAttr<int>("opt_level");
- }
-
- // Get attributes from the graph
- const ShapeVector& shape_vec = g.GetAttr<ShapeVector>("shape");
-
- // Reference counter of each op node
- // For now, always store result when an op is referred more than once.
- std::vector<uint32_t> ref_count = GetNodeRefCounts(idx);
- for (const auto& e : idx.outputs()) {
- // this line will realize all the outputs
- ref_count[e.node_id] += 1;
- }
- // Pattern for the subgraph
- PatternVec pattern_vec(idx.num_nodes(), kOpaque);
- // Whether node can be fused to parent.
- std::vector<FuseRule> fuse_vec(idx.num_nodes(), FuseRule::kUknown);
- // Master node id of fusion segment.
- std::vector<int> master_vec(idx.num_nodes(), -1);
- // Operator pattern
- static auto& op_pattern = nnvm::Op::GetAttr<TOpPattern>("TOpPattern");
-
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) {
- fuse_vec[nid] = FuseRule::kRealize; continue;
- }
- TOpPattern pt = op_pattern.get(inode.source->op(), kOpaque);
-
- if (pt <= kBroadcast) {
- // Check if we can fuse to the master.
- int chosen_master = -1;
- bool ewise = inode.source->num_outputs() == 1;
- bool mark_as_injective = false;
- for (const auto& e : inode.inputs) {
- if (fuse_vec[e.node_id] == FuseRule::kUknown) {
- TOpPattern ipt = pattern_vec[e.node_id];
- if (ipt != kElemWise) ewise = false;
- if (ipt <= kBroadcast) {
- fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
- } else if (ipt == kInjective) {
- fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
- mark_as_injective = true;
- } else if (ipt == kOutEWiseFusable &&
- chosen_master == -1 &&
- shape_vec[idx.entry_id(nid, 0)] == shape_vec[idx.entry_id(e)]) {
- chosen_master = master_vec[e.node_id];
- fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
- } else {
- fuse_vec[e.node_id] = FuseRule::kRealize;
- }
- }
- if (ewise) {
- if (shape_vec[idx.entry_id(nid, 0)] != shape_vec[idx.entry_id(e)]) {
- ewise = false;
- }
- }
- }
- master_vec[nid] = chosen_master;
- if (chosen_master != -1) {
- pt = kOutEWiseFusable;
- } else if (mark_as_injective) {
- pt = kInjective;
- } else {
- pt = ewise ? kElemWise : kBroadcast;
- }
- } else if (pt == kInjective || pt == kCommReduce) {
- // Fuse to the comm reduce or injective
- for (const auto& e : inode.inputs) {
- if (fuse_vec[e.node_id] == FuseRule::kUknown) {
- TOpPattern ipt = pattern_vec[e.node_id];
- if (ipt <= kInjective) {
- fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
- } else {
- fuse_vec[e.node_id] = FuseRule::kRealize;
- }
- }
- }
- if (pt == kCommReduce) {
- master_vec[nid] = nid;
- }
- } else {
- // Realize
- master_vec[nid] = nid;
- for (const auto& e : inode.inputs) {
- if (fuse_vec[e.node_id] == FuseRule::kUknown) {
- fuse_vec[e.node_id] = FuseRule::kRealize;
- if (master_vec[e.node_id] == -1) {
- master_vec[e.node_id] = e.node_id;
- }
- }
- }
- }
-
- pattern_vec[nid] = pt;
- if (ref_count[nid] > 1 || opt_level < 1) {
- fuse_vec[nid] = FuseRule::kRealize;
- if (master_vec[nid] == -1) {
- master_vec[nid] = nid;
- }
- }
- }
-
- // Point to the group root id of each node.
- GroupVec group_vec(idx.num_nodes(), -1);
- std::vector<std::vector<uint32_t> > node_ids_per_group(idx.num_nodes());
- for (uint32_t i = idx.num_nodes(); i != 0; --i) {
- uint32_t nid = i - 1;
- const auto& inode = idx[nid];
- bool is_root = false;
- if (group_vec[nid] == -1) {
- group_vec[nid] = nid;
- node_ids_per_group[nid].push_back(nid);
- is_root = true;
- }
-
- // Check if injective op and out_ewise_fusable op (e.g. conv2d) are in the same group.
- bool parent_out_ewise = false;
- bool parent_injective = false;
- for (const auto& e : inode.inputs) {
- if (fuse_vec[e.node_id] != FuseRule::kFuseToMaster) continue;
- TOpPattern pt = pattern_vec[e.node_id];
- if (pt == kOutEWiseFusable) {
- parent_out_ewise = true;
- } else if (pt == kInjective) {
- parent_injective = true;
- }
- }
- // Change the master node from out_ewise_fusable op to itself
- if (parent_injective && parent_out_ewise) {
- master_vec[nid] = nid;
- if (!is_root) {
- // Children nodes in the same group might be pointing to a master node in a different group.
- for (uint32_t j : node_ids_per_group[group_vec[nid]]) {
- master_vec[j] = nid;
- }
- }
- }
-
- // Propagate the group id.
- for (const auto& e : inode.inputs) {
- TOpPattern pt = pattern_vec[e.node_id];
- if (parent_out_ewise && parent_injective) {
- if (pt == kOutEWiseFusable) {
- continue; // Do not fuse out_ewise_fusable op
- } else if (pt == kInjective) {
- master_vec[e.node_id] = nid;
- }
- }
- if (fuse_vec[e.node_id] == FuseRule::kFuseToMaster) {
- CHECK(group_vec[e.node_id] == -1||
- group_vec[e.node_id] == group_vec[nid]);
- group_vec[e.node_id] = group_vec[nid];
- node_ids_per_group[group_vec[nid]].push_back(e.node_id);
- }
- }
- }
-
- /*
- Above algorithm will not fuse a node whose output is fed to more than one
- child node. This is because in general, it does not make sense to fuse multiple
- children branches with their parent, as in the following example.
-
- conv2d
- / | \
- / | \
- op op op
- | | |
- | | |
-
- However, when all children branches meet at a certain node, there is a possibility for
- further operator fusion. For example, all nodes in the following subgraph can be fused
- into a single node, if three 'in-between' nodes and the bottom node are all element wise
- operation.
-
- conv2d
- / | \
- / | \
- op op op
- \ | /
- \ | /
- elemwise add
- |
-
- This pattern is not uncommon. For example, it arises when conv2d op is followed by exponential
- linear unit. If bias add and batch normalization are also present, they can be fused as well.
-
- In fact, above fusion algorithm already fuses three in-between nodes and the element wise
- add node in the figure above. The following code fuses the conv2d node with the already
- fused children nodes. The following patterns are supported.
-
- * Any number of child nodes from the top node
- * The path from the top node to bottom node can contain any number of element wise ops.
-
- The only restriction is that in-between nodes cannot have more than one child.
-
- The overview of the algorithm below is as follows:
-
- 1. Check if all children nodes are fused into a single op by the existing fusion algorithm
- 2. Fuse the parent node to children nodes, and update its group id to be the children's group id
- 3. If the parent node originally belongs to another group (for example, conv + batch norm),
- propagate the new group id to a grand parent and upward
- */
- if (opt_level >= 1) {
- std::vector<std::vector<uint32_t> > children_group_ids(idx.num_nodes());
- for (uint32_t nid = idx.num_nodes() - 1; nid != 0; --nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) continue;
- CHECK_NE(group_vec[nid], -1);
- if (inode.inputs.size() != 1) continue;
- const uint32_t parent_nid = inode.inputs[0].node_id;
- // if parent node has more than one child, record each child's group id.
- if (ref_count[parent_nid] > 1) children_group_ids[parent_nid].push_back(group_vec[nid]);
- }
-
- std::vector<int> new_group_id(idx.num_nodes(), -1);
- for (uint32_t nid = idx.num_nodes() - 1; nid != 0; --nid) {
- if (new_group_id[group_vec[nid]] != -1) {
- // propagate new group id from child
- group_vec[nid] = new_group_id[group_vec[nid]];
- }
- TOpPattern pt = op_pattern.get(idx[nid].source->op(), kOpaque);
- if (pt == kOpaque) continue;
- const auto& group_ids = children_group_ids[nid];
- if (group_ids.size() <= 1) continue;
- const uint32_t child_group_id = group_ids[0];
- const auto& children_node_ids = node_ids_per_group[child_group_id];
-
- auto is_same_group_id = [child_group_id](uint32_t id) {
- return id == child_group_id;
- };
- auto is_fusible_pattern = [&idx](uint32_t child_nid) {
- TOpPattern child_pt = op_pattern.get(idx[child_nid].source->op(), kOpaque);
- return child_pt <= kBroadcast;
- };
- // fuse this node with children if
- // all children belong to the same group and
- // all nodes in the group are element wise or broadcast op.
- const bool can_be_fused = std::all_of(group_ids.begin(), group_ids.end(), is_same_group_id) &&
- std::all_of(children_node_ids.begin(), children_node_ids.end(), is_fusible_pattern);
-
- if (can_be_fused) {
- new_group_id[group_vec[nid]] = child_group_id;
- group_vec[nid] = child_group_id;
- for (uint32_t nid2 : node_ids_per_group[child_group_id]) {
- pattern_vec[nid2] = pattern_vec[nid];
- master_vec[nid2] = master_vec[nid];
- }
- }
- }
- }
-
- g.attrs["group_root"] = std::make_shared<any>(std::move(group_vec));
- g.attrs["group_master"] = std::make_shared<any>(std::move(master_vec));
- g.attrs["pattern"] = std::make_shared<any>(std::move(pattern_vec));
- return g;
-}
-
-NNVM_REGISTER_PASS(GraphFindFusibleGroups)
-.set_body(GraphFindFusibleGroups)
-.depend_graph_attr("shape")
-.depend_graph_attr("dtype");
-
-// Fuse the partitioned graph into segments.
-// Create a new graph with fused nodes.
-// Also inherit attribute shape, dltype from the previous graph.
-nnvm::Graph GraphFuse(nnvm::Graph g) {
- CHECK(g.HasAttr("group_root") && g.HasAttr("pattern"))
- << "GraphFindFusibleGroups pass hasn't been applied yet.";
-
- const IndexedGraph& idx = g.indexed_graph();
- // Get attributes from the graph
- const ShapeVector& shape_vec = g.GetAttr<ShapeVector>("shape");
- const DTypeVector& dtype_vec = g.GetAttr<DTypeVector>("dtype");
- const GroupVec& group_vec = g.GetAttr<GroupVec>("group_root");
- const PatternVec& pattern_vec = g.GetAttr<PatternVec>("pattern");
-
- // Specially handle assign op.
- const nnvm::Op* assign_op = nnvm::Op::Get("_assign");
-
- FuseEntryVec fuse_entries(idx.num_nodes());
- // Setup inputs and placeholder.
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) continue;
- CHECK_GE(group_vec[nid], 0);
- int root_id = group_vec[nid];
- FuseEntry& fe = fuse_entries[root_id];
- fe.flatten_data = (pattern_vec[root_id] == kElemWise ||
- inode.source->op() == assign_op);
- for (const auto& e : inode.inputs) {
- if (group_vec[e.node_id] != root_id && fe.imap.count(e) == 0) {
- Array<Expr> shape;
- if (fe.flatten_data) {
- // Elementwise support flatten
- int64_t prod = 1;
- for (int64_t x : shape_vec[idx.entry_id(e)]) {
- prod *= x;
- }
- CHECK_LE(prod, static_cast<int64_t>(std::numeric_limits<int>::max()));
- shape.push_back(make_const(DataType::Int(32), prod));
- } else {
- for (int64_t x : shape_vec[idx.entry_id(e)]) {
- CHECK_LE(x, static_cast<int64_t>(std::numeric_limits<int>::max()));
- shape.push_back(make_const(DataType::Int(32), x));
- }
- }
- std::ostringstream os_name;
- os_name << "input" << fe.imap.size();
- Tensor data = placeholder(
- shape, DataType(GetDLType(dtype_vec[idx.entry_id(e)])),
- os_name.str());
- NodeEntry garg = Symbol::CreateVariable(os_name.str()).outputs[0];
- fe.imap[e] = garg;
- fe.reverse_imap[garg.node.get()] = e;
- fe.input_info[garg.node.get()] = std::move(data);
- }
- }
- }
-
- // Setup the Subgraph
- std::vector<NodeEntry> subgraph_vec(idx.num_node_entries());
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) continue;
- int root_id = group_vec[nid];
- FuseEntry& fe = fuse_entries[root_id];
- // Create a subgraph node.
- NodePtr gnode = Node::Create();
- gnode->attrs = inode.source->attrs;
- // Set input entries for the subgraph node.
- for (const auto& e : inode.inputs) {
- if (group_vec[e.node_id] != root_id) {
- auto it = fe.imap.find(e);
- CHECK(it != fe.imap.end());
- gnode->inputs.push_back(it->second);
- } else {
- const NodeEntry& ne = subgraph_vec[idx.entry_id(e)];
- CHECK(!idx[e.node_id].source->is_variable());
- CHECK(ne.node != nullptr);
- gnode->inputs.push_back(ne);
- }
- }
- // Schedule on the root node and use the master's schedule
- if (static_cast<int>(nid) != root_id) {
- for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
- uint32_t eid = idx.entry_id(nid, index);
- subgraph_vec[eid] = NodeEntry{gnode, index, 0};
- }
- } else {
- for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
- fe.subgraph.outputs.push_back(NodeEntry{gnode, index, 0});
- }
- }
- }
- g.attrs["fused_entry"] = std::make_shared<any>(std::move(fuse_entries));
- return g;
-}
-
-NNVM_REGISTER_PASS(GraphFuse)
- .set_body(GraphFuse)
- .set_change_graph(true)
- .provide_graph_attr("fused_entry")
- .depend_graph_attr("shape")
- .depend_graph_attr("dtype")
- .depend_graph_attr("group_root")
- .depend_graph_attr("group_master");
-
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_fuse.h
- * \brief Definition of structs used by graph fusion
-*/
-#ifndef NNVM_COMPILER_GRAPH_FUSE_H_
-#define NNVM_COMPILER_GRAPH_FUSE_H_
-
-#include <nnvm/graph.h>
-#include <vector>
-#include <unordered_map>
-
-#include "compile_engine.h"
-
-namespace nnvm {
-namespace compiler {
-
-// The single fuse rule.
-enum class FuseRule {
- kUknown,
- kFuseToMaster,
- kRealize
-};
-
-/*!
- * \brief Get DLDataType from dtype flag.
- *
- * \param type_flag The data type flag
- * \return corresponding DLDataType
- */
-inline DLDataType GetDLType(int type_flag) {
- return GetTVMType(type_flag);
-}
-
-struct INodeEntryHash {
- size_t operator()(const IndexedGraph::NodeEntry& e) const {
- return e.node_id;
- }
-};
-
-struct INodeEntryEqual {
- size_t operator()(const IndexedGraph::NodeEntry &a,
- const IndexedGraph::NodeEntry &b) const {
- return a.node_id == b.node_id && a.index == b.index;
- }
-};
-
-// Auxiliary data structure for representing fused op.
-struct FuseEntry {
- // Subgraph of the fragment
- Graph subgraph;
- // The input map
- std::unordered_map<IndexedGraph::NodeEntry, nnvm::NodeEntry, INodeEntryHash,
- INodeEntryEqual>
- imap;
- // Reverse map to the old input entry
- std::unordered_map<const Node *, IndexedGraph::NodeEntry> reverse_imap;
- // TVM Placeholder for inputs
- std::unordered_map<const Node *, Tensor> input_info;
- // Whether we can flatten data
- bool flatten_data;
- // The corresponding function.
- GraphFunc compiled_func;
-};
-
-// GroupVec stores the root node ids of the fused nodes.
-using GroupVec = std::vector<int>;
-
-// MasterVec stores master node ids of fused groups.
-using MasterVec = std::vector<int>;
-
-// FuseVec stores fused entries.
-using FuseEntryVec = std::vector<FuseEntry>;
-
-// PatternVec stores operator patterns.
-using PatternVec = std::vector<TOpPattern>;
-
-} // namespace compiler
-} // namespace nnvm
-
-#endif // NNVM_COMPILER_GRAPH_FUSE_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_deep_compare.cc
- * \brief Deep compare two graph structure
- */
-#include <dmlc/common.h>
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <tvm/ir.h>
-#include <tvm/runtime/packed_func.h>
-#include <functional>
-#include <vector>
-#include <utility>
-#include <algorithm>
-#include "node_attr.h"
-#include "graph_hash.h"
-
-namespace nnvm {
-namespace compiler {
-
-using namespace tvm;
-using tvm::ir::IntImm;
-
-size_t HashPlaceHolder(const Tensor& t) {
- size_t key = t->shape.size();
- key = dmlc::HashCombine(key, (t->dtype.code() << 8) | t->dtype.bits());
- for (Expr s : t->shape) {
- if (const IntImm* op = s.as<IntImm>()) {
- key = dmlc::HashCombine(key, op->value);
- }
- }
- return key;
-}
-
-bool PlaceHolderEqual(const Tensor& a, const Tensor& b) {
- if (a->shape.size() != b->shape.size()) return false;
- if (a->dtype != b->dtype) return false;
- for (size_t i = 0; i < a->shape.size(); ++i) {
- const IntImm* a_value = a->shape[i].as<IntImm>();
- const IntImm* b_value = b->shape[i].as<IntImm>();
- if (a_value && b_value == nullptr) return false;
- if (b_value && a_value == nullptr) return false;
- if (a_value == nullptr && b_value == nullptr) {
- continue;
- }
- if (a_value->value != b_value->value) return false;
- }
- return true;
-}
-
-size_t GraphKeyHash::Hash(const GraphKey& gkey) {
- if (gkey->cache_hash_key_ != 0) return gkey->cache_hash_key_;
- size_t key = dmlc::HashCombine(GraphHash(gkey->graph), gkey->target);
- key = dmlc::HashCombine(key, gkey->inputs.size());
- for (size_t i = 0; i < gkey->inputs.size(); ++i) {
- key = dmlc::HashCombine(key, HashPlaceHolder(gkey->inputs[i]));
- }
- if (key == 0) key = 1;
- gkey->cache_hash_key_ = key;
- return key;
-}
-
-bool GraphKeyEqual::Equal(const GraphKey& a,
- const GraphKey& b) {
- if (a->target != b->target) return false;
- if (a->inputs.size() != b->inputs.size()) return false;
- for (size_t i = 0; i < a->inputs.size(); ++i) {
- if (!PlaceHolderEqual(a->inputs[i], b->inputs[i])) return false;
- }
- if (GraphDeepCompare(a->graph, b->graph, false).length() != 0) return false;
- return true;
-}
-
-GraphKey GraphKeyNode::make(Graph graph,
- tvm::Array<Tensor> inputs,
- std::string target) {
- auto n = tvm::make_node<GraphKeyNode>();
- n->graph = std::move(graph);
- n->inputs = inputs;
- n->target = std::move(target);
- return GraphKey(n);
-}
-
-TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable)
-.set_dispatch<GraphKeyNode>([](const ObjectRef& ref, IRPrinter* p) {
- auto* op = static_cast<const GraphKeyNode*>(ref.get());
- p->stream << "GraphKeyNode("<< op << ")";
-});
-
-
-// Run graph hash
-size_t GraphHash(const Graph& graph) {
- const IndexedGraph& idx = graph.indexed_graph();
- size_t key = 0;
- // Combine a linearized sequence of ops in subgraph
- key = dmlc::HashCombine(key, idx.num_nodes());
- std::hash<std::string> str_hash;
- std::vector<size_t> hash_temp;
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const IndexedGraph::Node& inode = idx[nid];
- // Use name instad op address so it is deterministic across runs
- if (inode.source->is_variable()) continue;
- key = dmlc::HashCombine(key, inode.source->op()->name);
- hash_temp.clear();
- for (const auto& kv : GetAttrDict(inode.source->attrs)) {
- hash_temp.push_back(dmlc::HashCombine(str_hash(kv.first), kv.second));
- }
- // to make sure it is deterministic
- // since unordered_map is not deterministic
- std::sort(hash_temp.begin(), hash_temp.end());
- for (size_t value : hash_temp) {
- key = dmlc::HashCombine(key, value);
- }
- }
- return key;
-}
-
-// deep compare the graph structure
-// not considering the graph attributes
-// return non-empty error message if the graph mismatch.
-// the comparator won't match name of intermediate node.
-// compare_var_attr
-std::string GraphDeepCompare(const Graph& a,
- const Graph& b,
- bool compare_variable_attr) {
- const IndexedGraph& idxa = a.indexed_graph();
- const IndexedGraph& idxb = b.indexed_graph();
- std::ostringstream err;
- if (idxa.num_nodes() != idxb.num_nodes()) {
- err << "Number of nodes mismatch (" << idxa.num_nodes() << " v.s " << idxb.num_nodes() << ")";
- return err.str();
- }
- if (idxa.num_node_entries() != idxb.num_node_entries()) {
- err << "Number of node entry mismatch";
- return err.str();
- }
- if (idxa.outputs().size() != idxb.outputs().size()) {
- err << "Number of outputs mismatch";
- return err.str();
- }
- for (size_t i = 0; i < idxa.outputs().size(); ++i) {
- if (idxa.outputs()[i].node_id != idxb.outputs()[i].node_id ||
- idxa.outputs()[i].index != idxb.outputs()[i].index) {
- err << "Output entry mismatch";
- return err.str();
- }
- }
- if (idxa.input_nodes().size() != idxb.input_nodes().size()) {
- err << "Number of inputs mismatch";
- return err.str();
- }
-
- for (uint32_t nid = 0; nid < idxa.num_nodes(); ++nid) {
- const IndexedGraph::Node& anode = idxa[nid];
- const IndexedGraph::Node& bnode = idxb[nid];
- if (anode.source->op() != bnode.source->op()) {
- err << "Node mismatch ";
- return err.str();
- }
- if (anode.source->is_variable()) {
- CHECK(bnode.source->is_variable());
- if (!compare_variable_attr) continue;
- }
- AttrDict adict = GetAttrDict(anode.source->attrs);
- AttrDict bdict = GetAttrDict(bnode.source->attrs);
-
- auto fmatch = [&err, &anode](const AttrDict& adict, const AttrDict& bdict) {
- for (const auto& kv : adict) {
- auto it = bdict.find(kv.first);
- if (it != bdict.end()) {
- if (it->second != kv.second) {
- err << "Node attr mismatch, op=" << anode.source->attrs.name
- << " attr_key=" << kv.first << " " << it->second
- << " v.s. " << kv.second;
- return false;
- }
- } else {
- err << "One attr_key=" << kv.first << " is missing in another "
- << "op=" << anode.source->attrs.name;
- return false;
- }
- }
- return true;
- };
- if (!fmatch(adict, bdict)) return err.str();
- if (adict.size() != bdict.size()) {
- CHECK(!fmatch(bdict, adict));
- return err.str();
- }
- if (anode.inputs.size() != bnode.inputs.size()) {
- err << "Node input mismatch, op=" << anode.source->attrs.name;
- return err.str();
- }
- if (anode.control_deps.size() != bnode.control_deps.size()) {
- err << "Node control_deps mistach, op=" << anode.source->attrs.name;
- return err.str();
- }
- for (size_t i = 0; i < anode.inputs.size(); ++i) {
- const IndexedGraph::NodeEntry& ae = anode.inputs[i];
- const IndexedGraph::NodeEntry& be = bnode.inputs[i];
- if (ae.node_id != be.node_id ||
- ae.index != be.index ||
- ae.version != be.version) {
- err << "Node input mismatch on, op=" << anode.source->attrs.name;
- return err.str();
- }
- }
- for (size_t i = 0; i < anode.control_deps.size(); ++i) {
- if (anode.control_deps[i] != bnode.control_deps[i]) {
- err << "Node control_dep mismatch on, op=" << anode.source->attrs.name;
- return err.str();
- }
- }
- }
- return "";
-}
-
-TVM_REGISTER_GLOBAL("nnvm.graph.DeepCompare")
-.set_body_typed(GraphDeepCompare);
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_hash.h
- * \brief The graph hashing function.
- */
-#ifndef NNVM_COMPILER_GRAPH_HASH_H_
-#define NNVM_COMPILER_GRAPH_HASH_H_
-
-#include <dmlc/common.h>
-#include <nnvm/graph.h>
-#include <tvm/operation.h>
-#include <string>
-#include <utility>
-
-namespace nnvm {
-namespace compiler {
-
-class GraphKey;
-
-/*! \brief Key to a graph compiler cache */
-struct GraphKeyNode : public tvm::Node {
- /*! \brief The graph structure */
- Graph graph;
- /* \brief The inputs to the function */
- tvm::Array<Tensor> inputs;
- /*! \brief The target */
- std::string target;
- // Cached internal hash key, invisible to the user.
- // The graph hash key is ensured always not to be 0
- mutable size_t cache_hash_key_{0};
-
- void VisitAttrs(tvm::AttrVisitor* v) {
- v->Visit("inputs", &inputs);
- v->Visit("target", &target);
- }
-
- static GraphKey make(Graph graph,
- tvm::Array<Tensor> inputs,
- std::string target);
- static constexpr const char* _type_key = "GraphKey";
- TVM_DECLARE_NODE_TYPE_INFO(GraphKeyNode, tvm::Node);
-};
-
-TVM_DEFINE_NODE_REF(GraphKey, GraphKeyNode);
-
-/*! \brief Hashing function for graph key */
-struct GraphKeyHash {
- size_t operator()(const GraphKey& gkey) const {
- return Hash(gkey);
- }
- static size_t Hash(const GraphKey& gkey);
-};
-
-/*! \brief function for graph key */
-struct GraphKeyEqual {
- bool operator()(const GraphKey& a,
- const GraphKey& b) const {
- return Equal(a, b);
- }
- static bool Equal(const GraphKey& a, const GraphKey& b);
-};
-
-/*!
- * \brief Create a hash code for a given graph.
- * \return The hash code of the graph.
- */
-size_t GraphHash(const Graph& graph);
-
-/*!
- * \brief Compare two graphs
- * return empty string if they are equal
- * otherwise return error message
- * \param a The first graph.
- * \param b The second graph.
- * \return empty string if they are equal, otherwise return error message.
- */
-std::string GraphDeepCompare(const Graph& a,
- const Graph& b,
- bool compare_variable_attr);
-} // namespace compiler
-} // namespace nnvm
-
-#endif // NNVM_COMPILER_GRAPH_HASH_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_runtime.cc
- * \brief Interface code with TVM graph runtime.
-*/
-#include <dmlc/memory_io.h>
-#include <tvm/runtime/registry.h>
-
-#include <utility>
-#include "graph_runtime.h"
-
-namespace nnvm {
-namespace compiler {
-
-using tvm::Object;
-using tvm::ObjectPtr;
-using tvm::runtime::TVMArgs;
-using tvm::runtime::TVMRetValue;
-using tvm::runtime::PackedFunc;
-
-DMLC_REGISTER_PARAMETER(TVMOpParam);
-
-// parser
-inline void TVMOpParamParser(nnvm::NodeAttrs* attrs) {
- TVMOpParam param;
- param.Init(attrs->dict);
- attrs->parsed = std::move(param);
-}
-
-NNVM_REGISTER_OP(tvm_op)
-.set_attr_parser(TVMOpParamParser)
-.set_num_inputs([](const NodeAttrs& attrs) {
- const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
- return param.num_inputs;
- })
-.set_num_outputs([](const NodeAttrs& attrs) {
- const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
- return param.num_outputs;
- });
-
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- CHECK_EQ(args.size() % 2, 0u);
- size_t num_params = args.size() / 2;
- std::vector<std::string> names;
- names.reserve(num_params);
- std::vector<DLTensor*> arrays;
- arrays.reserve(num_params);
- for (size_t i = 0; i < num_params * 2; i += 2) {
- names.emplace_back(args[i].operator std::string());
- arrays.emplace_back(args[i + 1].operator DLTensor*());
- }
- std::string bytes;
- dmlc::MemoryStringStream strm(&bytes);
- dmlc::Stream* fo = &strm;
- uint64_t header = kTVMNDArrayListMagic, reserved = 0;
- fo->Write(header);
- fo->Write(reserved);
- fo->Write(names);
- {
- uint64_t sz = static_cast<uint64_t>(arrays.size());
- fo->Write(sz);
- for (size_t i = 0; i < sz; ++i) {
- tvm::runtime::SaveDLTensor(fo, arrays[i]);
- }
- }
- TVMByteArray arr;
- arr.data = bytes.c_str();
- arr.size = bytes.length();
- *rv = arr;
- });
-
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._load_param_dict")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- std::string bytes = args[0];
- std::vector<std::string> names;
- dmlc::MemoryStringStream memstrm(&bytes);
- dmlc::Stream* strm = &memstrm;
- uint64_t header, reserved;
- CHECK(strm->Read(&header))
- << "Invalid parameters file format";
- CHECK(header == kTVMNDArrayListMagic)
- << "Invalid parameters file format";
- CHECK(strm->Read(&reserved))
- << "Invalid parameters file format";
- CHECK(strm->Read(&names))
- << "Invalid parameters file format";
- uint64_t sz;
- strm->Read(&sz, sizeof(sz));
- size_t size = static_cast<size_t>(sz);
- CHECK(size == names.size())
- << "Invalid parameters file format";
- tvm::Array<NDArrayWrapper> ret;
- for (size_t i = 0; i < size; ++i) {
- tvm::runtime::NDArray temp;
- temp.Load(strm);
- auto n = tvm::make_node<NDArrayWrapperNode>();
- n->name = std::move(names[i]);
- n->array = temp;
- ret.push_back(NDArrayWrapper(n));
- }
- *rv = ret;
- });
-
-TVM_REGISTER_NODE_TYPE(NDArrayWrapperNode);
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_runtime.h
- * \brief Interface code with TVM graph runtime.
-*/
-#ifndef NNVM_COMPILER_GRAPH_RUNTIME_H_
-#define NNVM_COMPILER_GRAPH_RUNTIME_H_
-
-#include <nnvm/graph.h>
-#include <tvm/base.h>
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <tvm/runtime/ndarray.h>
-#include <vector>
-#include <string>
-
-namespace nnvm {
-namespace compiler {
-
-/*! \brief Magic number for NDArray list file */
-constexpr uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7;
-
-struct TVMOpParam : public dmlc::Parameter<TVMOpParam> {
- std::string func_name;
- uint32_t num_inputs;
- uint32_t num_outputs;
- uint32_t flatten_data;
-
- DMLC_DECLARE_PARAMETER(TVMOpParam) {
- DMLC_DECLARE_FIELD(func_name);
- DMLC_DECLARE_FIELD(num_inputs).set_default(1);
- DMLC_DECLARE_FIELD(num_outputs).set_default(1);
- DMLC_DECLARE_FIELD(flatten_data).set_default(0);
- }
-};
-
-
-/*!
- * \brief wrapper node container for exchange.
- */
-struct NDArrayWrapperNode : public ::tvm::Node {
- std::string name;
- tvm::runtime::NDArray array;
-
- void VisitAttrs(tvm::AttrVisitor* v) {
- v->Visit("name", &name);
- v->Visit("array", &array);
- }
-
- static constexpr const char* _type_key = "NDArrayWrapper";
- TVM_DECLARE_NODE_TYPE_INFO(NDArrayWrapperNode, tvm::Node);
-};
-
-TVM_DEFINE_NODE_REF(NDArrayWrapper, NDArrayWrapperNode);
-
-} // namespace compiler
-} // namespace nnvm
-
-#endif // NNVM_COMPILER_GRAPH_RUNTIME_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_transform.h
- * \brief A mutator class that does local pattern matching and mutates a node.
-*/
-#ifndef NNVM_COMPILER_GRAPH_TRANSFORM_H_
-#define NNVM_COMPILER_GRAPH_TRANSFORM_H_
-
-#include <nnvm/graph.h>
-#include <vector>
-#include <utility>
-#include <unordered_map>
-
-namespace nnvm {
-namespace compiler {
-
-/*!
- * \brief Transform the graph to build a new Graph, in post DFS order.
- *
- * Automatically copies node when some of its children or control_deps changed.
- * This function won't be called in Variable.
- *
- * \param graph The original graph
- *
- * \param ftransform Function of (int nid, const NodePtr& node, std::vector<NodeEntry>* out) -> bool
- *
- * If empty vector is returned, it means original entries should be kept.
- *
- * \tparam FTransform The transformation function.
- */
-template<typename FTransform>
-Graph GraphTransform(Graph graph, FTransform ftransform) {
- const IndexedGraph& idx = graph.indexed_graph();
- // new nodes
- std::vector<NodeEntry> new_entry_map(idx.num_node_entries());
- std::vector<bool> updated(idx.num_node_entries(), false);
-
- // setup inputs and placeholder.
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- bool need_copy = false;
- for (const IndexedGraph::NodeEntry& e : inode.inputs) {
- if (updated[idx.entry_id(e)]) {
- need_copy = true; break;
- }
- }
- if (!need_copy) {
- for (const uint32_t cid : inode.control_deps) {
- const auto& cnode = idx[cid];
- for (uint32_t i = 0 ; i < cnode.source->num_outputs(); ++i) {
- if (updated[idx.entry_id(cid, i)]) {
- need_copy = true;
- }
- }
- if (need_copy) break;
- }
- }
-
- if (!need_copy) {
- std::vector<NodeEntry> ret;
- if (ftransform(nid, inode.weak_ref.lock(), &ret)) {
- CHECK_EQ(ret.size(), static_cast<size_t>(inode.source->num_outputs()));
- for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) {
- updated[idx.entry_id(nid, i)] = true;
- new_entry_map[idx.entry_id(nid, i)] = ret[i];
- }
- }
- } else {
- NodePtr node = Node::Create();
- node->attrs = inode.source->attrs;
- for (size_t i = 0; i < inode.inputs.size(); ++i) {
- const IndexedGraph::NodeEntry& e = inode.inputs[i];
- if (updated[idx.entry_id(e)]) {
- node->inputs.push_back(new_entry_map[idx.entry_id(e)]);
- } else {
- node->inputs.push_back(inode.source->inputs[i]);
- }
- }
- for (size_t i = 0; i < inode.control_deps.size(); ++i) {
- const uint32_t cid = inode.control_deps[i];
- const auto& cnode = idx[cid];
- CHECK_NE(cnode.source->num_outputs(), 0U);
- NodePtr selected_ptr;
- for (uint32_t j = 0 ; j < cnode.source->num_outputs(); ++j) {
- NodePtr cptr = updated[idx.entry_id(cid, j)] ?
- new_entry_map[idx.entry_id(cid, j)].node : inode.source->control_deps[i];
- if (selected_ptr == nullptr) {
- selected_ptr = std::move(cptr);
- } else {
- CHECK(selected_ptr.get() == cptr.get())
- << "Control dependency node changed to more than one node";
- }
- }
- node->control_deps.push_back(selected_ptr);
- }
- std::vector<NodeEntry> ret;
- if (ftransform(nid, node, &ret)) {
- CHECK_EQ(ret.size(), static_cast<size_t>(inode.source->num_outputs()));
- for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) {
- updated[idx.entry_id(nid, i)] = true;
- new_entry_map[idx.entry_id(nid, i)] = ret[i];
- }
- } else {
- for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) {
- updated[idx.entry_id(nid, i)] = true;
- new_entry_map[idx.entry_id(nid, i)] = NodeEntry{node, i, 0};
- }
- }
- }
- }
- Graph ret;
- for (size_t i = 0; i < idx.outputs().size(); ++i) {
- const IndexedGraph::NodeEntry& e = idx.outputs()[i];
- if (updated[idx.entry_id(e)]) {
- ret.outputs.push_back(new_entry_map[idx.entry_id(e)]);
- } else {
- ret.outputs.push_back(graph.outputs[i]);
- }
- }
- return ret;
-}
-
-} // namespace compiler
-} // namespace nnvm
-
-#endif // NNVM_COMPILER_GRAPH_TRANSFORM_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file node_attr.h
- * \brief utility to access node attributes
-*/
-#ifndef NNVM_COMPILER_NODE_ATTR_H_
-#define NNVM_COMPILER_NODE_ATTR_H_
-
-#include <nnvm/op.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <unordered_map>
-#include <string>
-
-namespace nnvm {
-namespace compiler {
-
-using AttrDict = std::unordered_map<std::string, std::string>;
-/*!
- * \brief Get canonicalized attr dict from node
- * \param attrs The node attrs
- * \return The attribute dict
- */
-inline AttrDict GetAttrDict(const NodeAttrs& attrs) {
- static auto& fgetdict = nnvm::Op::GetAttr<FGetAttrDict>("FGetAttrDict");
- if (fgetdict.count(attrs.op)) {
- return fgetdict[attrs.op](attrs);
- } else {
- return attrs.dict;
- }
-}
-
-} // namespace compiler
-} // namespace nnvm
-#endif // NNVM_COMPILER_NODE_ATTR_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file packed_func_ext.cc
- * \brief Registeration of extension type.
- */
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include "node_attr.h"
-#include "compile_engine.h"
-
-namespace tvm {
-namespace runtime {
-
-TVM_REGISTER_EXT_TYPE(nnvm::Graph);
-TVM_REGISTER_EXT_TYPE(nnvm::Symbol);
-TVM_REGISTER_EXT_TYPE(nnvm::compiler::AttrDict);
-
-} // namespace runtime
-} // namespace tvm
-
-namespace nnvm {
-DMLC_JSON_ENABLE_ANY(int, int);
-} // namespace nnvm
-
-namespace nnvm {
-namespace compiler {
-
-using tvm::Tensor;
-using tvm::Array;
-using tvm::Node;
-using tvm::runtime::TVMArgs;
-using tvm::runtime::TVMRetValue;
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._dict_get")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- const AttrDict& dict = args[0].AsExtension<AttrDict>();
- std::string key = args[1];
- auto it = dict.find(key);
- if (it != dict.end()) {
- *rv = it->second;
- } else {
- *rv = nullptr;
- }
- });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._dict_size")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- const AttrDict& dict = args[0].AsExtension<AttrDict>();
- *rv = static_cast<int64_t>(dict.size());
- });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._dict_keys")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- const AttrDict& dict = args[0].AsExtension<AttrDict>();
- tvm::Array<tvm::Expr> keys;
- for (const auto& kv : dict) {
- keys.push_back(kv.first);
- }
- *rv = keys;
- });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._register_alter_op_layout")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown
- PackedFunc* f = new PackedFunc(args[1].operator PackedFunc());
- Op& op = ::dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(args[0]);
- auto fpack = [f](const NodeAttrs& attrs,
- const Symbol& inputs,
- const Array<Tensor>& tinfos,
- Symbol* ret_symbol) {
- TVMRetValue ret = (*f)(GetAttrDict(attrs), inputs, tinfos);
- if (ret.type_code() == TVMTypeCode::kNull) {
- return false;
- }
- CHECK_EQ(ret.type_code(), tvm::runtime::extension_type_info<Symbol>::code)
- << " expected " << "Symbol (code = " << tvm::runtime::extension_type_info<Symbol>::code
- << ") but get code = " << ret.type_code();
- *ret_symbol = *(static_cast<Symbol*>(ret.value().v_handle));
- return true;
- };
- op.set_attr<FTVMAlterOpLayout>("FTVMAlterOpLayout", fpack, args[2]);
-});
-
-// custom version of TVM compute
-TVM_REGISTER_GLOBAL("nnvm._register_compute")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown
- PackedFunc* f = new PackedFunc(args[1].operator PackedFunc());
- Op& op = ::dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(args[0]);
- auto fcompute = [f](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info)
- -> Array<Tensor> {
- TVMRetValue ret = (*f)(GetAttrDict(attrs), inputs, out_info);
- if (ret.IsObjectRef<tvm::Tensor>()) {
- return {ret.operator Tensor()};
- } else {
- return ret;
- }
- };
- op.set_attr<FTVMCompute>("FTVMCompute", fcompute, args[2]);
- });
-
-TVM_REGISTER_GLOBAL("nnvm._register_schedule")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown
- PackedFunc* f = new PackedFunc(args[1].operator PackedFunc());
- Op& op = ::dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(args[0]);
- auto fschedule = [f](const NodeAttrs& attrs,
- const Array<Tensor>& outs,
- const std::string& target) {
- return (*f)(GetAttrDict(attrs), outs, target).operator Schedule();
- };
- op.set_attr<FTVMSchedule>("FTVMSchedule", fschedule, args[2]);
- });
-
-TVM_REGISTER_GLOBAL("nnvm._register_pattern")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- Op& op = ::dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(args[0]);
- op.set_attr<TOpPattern>("TOpPattern", args[1].operator int(), args[2]);
- });
-
-TVM_REGISTER_GLOBAL("nnvm.graph._move_module")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- const nnvm::Graph& g = args[0].AsExtension<Graph>();
- *rv = const_cast<nnvm::Graph*>(&g)->
- MoveCopyAttr<tvm::runtime::Module>(args[1]);
- });
-
-TVM_REGISTER_GLOBAL("nnvm.graph._move_graph")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
- const nnvm::Graph& g = args[0].AsExtension<Graph>();
- std::string key = args[1];
- if (g.attrs.count(key)) {
- *rv = const_cast<nnvm::Graph*>(&g)->
- MoveCopyAttr<nnvm::Graph>(key);
- } else {
- *rv = nullptr;
- }
- });
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file pattern_util.h
- * \brief Utilities for doing various pattern matching in graph.
-*/
-#ifndef NNVM_COMPILER_PATTERN_UTIL_H_
-#define NNVM_COMPILER_PATTERN_UTIL_H_
-
-#include <nnvm/graph.h>
-#include <vector>
-#include <utility>
-#include <string>
-#include <unordered_map>
-
-namespace nnvm {
-namespace compiler {
-
-/*!
- * \brief find axis in oshape, such that:
- * bias_shape = [1,1, ... oshape[axis], 1,1,]
- *
- * This is used to detect bias or scaling factor on channel dimension.
- * \param oshape The output shape
- * \param bias_shape The shape of bias or scaling factor.
- * \return Pair of matched axis in o shape and bias_shape if found.
- */
-inline std::pair<int, int> MatchBroadcast1DAxis(
- const TShape& oshape, const TShape& bias_shape) {
- dim_t axis_dim = bias_shape.ndim();
- for (dim_t i = bias_shape.ndim(); i != 0; --i, --axis_dim) {
- if (bias_shape[i - 1] != 1) break;
- }
- // everything is 1
- if (axis_dim == 0) {
- return {oshape.ndim() - bias_shape.ndim(), 0};
- }
- axis_dim = axis_dim - 1;
- // The bias shape is not 1D
- for (dim_t i = 0; i < axis_dim; ++i) {
- if (bias_shape[i] != 1) return {-1, -1};
- }
- int axis = static_cast<int>(
- oshape.ndim() - bias_shape.ndim() + axis_dim);
- if (oshape[axis] != bias_shape[axis_dim]) return {-1, -1};
- return {axis, axis_dim};
-}
-
-/*!
- * \brief Expand bias dimension to match needed axis.
- *
- * \param bias The bias NodeEntry
- * \param out_dim output dimension.
- * \param bias_dim The current bias dimension.
- * \param axis The axis we want to match on.
- */
-inline NodeEntry
-ExpandBiasToMatchAxis(NodeEntry bias,
- int out_dim,
- int bias_dim,
- int axis) {
- if (bias_dim != 1) {
- bias = MakeNode("squeeze", bias.node->attrs.name + "_sqz", {bias});
- }
- int num_pad_axis = out_dim - axis - 1;
- if (num_pad_axis > 0) {
- std::unordered_map<std::string, std::string> kwargs{
- {"axis", "1"},
- {"num_newaxis", std::to_string(num_pad_axis)}};
- return MakeNode("expand_dims", bias.node->attrs.name + "_expand",
- {bias}, kwargs);
-
- } else {
- return bias;
- }
-}
-
-/*!
- * \brief Get the reference count of each node.
- * \param idx The IndexedGraph
- * \return ref_count vector of length number nodes.
- */
-inline std::vector<uint32_t>
-GetNodeRefCounts(const IndexedGraph& idx) {
- std::vector<uint32_t> ref_count(idx.num_nodes(), 0);
- for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
- const auto& inode = idx[nid];
- if (inode.source->is_variable()) continue;
- for (const auto& e : inode.inputs) {
- ++ref_count[e.node_id];
- }
- }
- for (const auto& e : idx.outputs()) {
- // this line will realize all the outputs
- ref_count[e.node_id] += 1;
- }
- return ref_count;
-}
-} // namespace compiler
-} // namespace nnvm
-#endif // NNVM_COMPILER_PATTERN_UTIL_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file precompute_prune.cc
- * \brief Split the graph into a pre-compute graph and a execution graph.
- *
- * The pre-compute graph outputs parameters that can be taken
- * by execution graph during execution phase.
- */
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <unordered_set>
-
-namespace nnvm {
-namespace compiler {
-
-nnvm::Graph PrecomputePrune(nnvm::Graph src) {
- const auto& plist
- = src.GetAttr<std::vector<std::string> >("param_name_list");
- std::unordered_set<std::string> params(plist.begin(), plist.end());
-
- std::unordered_set<nnvm::Node*> pruned;
- nnvm::NodeEntryMap<nnvm::NodePtr> entry_var;
- std::unordered_set<std::string> unique_name;
- // number of edges that are not variable
- int non_var_edge = 0;
-
- auto replace_pruned_entry = [&] (const NodeEntry& e) {
- if (!entry_var.count(e)) {
- if (!e.node->is_variable()) {
- ++non_var_edge;
- }
- nnvm::NodePtr var = nnvm::Node::Create();
- var->attrs.name = e.node->attrs.name;
- if (e.version) {
- var->attrs.name += "_" + std::to_string(e.version);
- }
- if (e.node->num_outputs() != 1) {
- var->attrs.name += "_output" + std::to_string(e.index);
- }
- entry_var.emplace(e, var);
- CHECK(!unique_name.count(var->attrs.name));
- unique_name.insert(var->attrs.name);
- return nnvm::NodeEntry{var, 0, 0};
- } else {
- return nnvm::NodeEntry{entry_var.at(e), 0, 0};
- }
- };
-
- DFSVisit(src.outputs, [&](const nnvm::NodePtr& n) {
- bool can_be_pruned = true;
- if (n->is_variable()) {
- if (params.count(n->attrs.name)) {
- pruned.emplace(n.get());
- }
- can_be_pruned = false;
- }
-
- for (const auto& e : n->inputs) {
- if (!pruned.count(e.node.get())) {
- can_be_pruned = false;
- }
- }
- if (can_be_pruned) {
- pruned.emplace(n.get());
- } else {
- // scan again to find edge nodes, skip variables
- for (auto& e : n->inputs) {
- if (pruned.count(e.node.get())) {
- e = replace_pruned_entry(e);
- }
- }
- }
- });
-
- // nothing being pruned.
- if (non_var_edge == 0) {
- return src;
- }
-
- for (auto& e : src.outputs) {
- if (pruned.count(e.node.get())) {
- e = replace_pruned_entry(e);
- }
- }
-
- nnvm::Graph pre_graph;
- pre_graph.outputs.reserve(entry_var.size());
- std::vector<std::string> output_names;
- output_names.reserve(entry_var.size());
-
- for (auto kv : entry_var) {
- pre_graph.outputs.emplace_back(kv.first);
- output_names.emplace_back(kv.second->attrs.name);
- }
- // new parameter list
- pre_graph.attrs["output_names"] =
- std::make_shared<dmlc::any>(std::move(output_names));
- src.attrs["precompute_graph"] =
- std::make_shared<dmlc::any>(std::move(pre_graph));
- return src;
-}
-
-NNVM_REGISTER_PASS(PrecomputePrune)
-.set_body(PrecomputePrune);
-} // namespace compiler
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file simplify_inference.cc
- * \author Ziheng Jiang
-*/
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "graph_transform.h"
-#include "pattern_util.h"
-
-namespace nnvm {
-namespace compiler {
-
-std::vector<NodeEntry>
-BatchNormToInferUnpack(const nnvm::NodeAttrs& attrs,
- nnvm::NodeEntry data,
- nnvm::NodeEntry gamma,
- nnvm::NodeEntry beta,
- nnvm::NodeEntry moving_mean,
- nnvm::NodeEntry moving_var,
- TShape dshape,
- TShape bshape) {
- CHECK_NE(dshape.ndim(), 0);
- CHECK(attrs.op);
- static const Op* bn_op = Op::Get("batch_norm");
- CHECK(attrs.op == bn_op);
- const auto& param = nnvm::get<top::BatchNormParam>(attrs.parsed);
- std::string bn_name = attrs.name;
-
- // transform batch_norm(data) to scale * data + shift
- NodeEntry var_add_eps = MakeNode(
- "__add_scalar__", bn_name + "_add_eps",
- {moving_var}, {{"scalar", std::to_string(param.epsilon)}});
-
- NodeEntry sqrt = MakeNode(
- "sqrt", bn_name + "_sqrt", {var_add_eps});
-
- NodeEntry scale = MakeNode(
- "__rdiv_scalar__", bn_name + "_div",
- {sqrt}, {{"scalar", "1"}});
-
- if (param.scale) {
- scale = MakeNode(
- "elemwise_mul", bn_name + "_gamma_mul_div",
- {scale, gamma});
- }
-
- NodeEntry neg_mean = MakeNode(
- "negative", bn_name + "_neg_mean", {moving_mean});
-
- NodeEntry shift = MakeNode(
- "elemwise_mul", bn_name + "_neg_mean_mul_a",
- {neg_mean, scale});
-
- if (param.center) {
- shift = MakeNode(
- "elemwise_add", bn_name + "_add_beta", {shift, beta});
- }
- int axis = param.axis;
- scale = ExpandBiasToMatchAxis(scale, dshape.ndim()-bshape.ndim()+1, 1, axis);
- shift = ExpandBiasToMatchAxis(shift, dshape.ndim()-bshape.ndim()+1, 1, axis);
-
- NodeEntry out = MakeNode("broadcast_mul", bn_name + "_a_mul_data",
- {data, scale});
- out = MakeNode("broadcast_add", bn_name + "_out",
- {out, shift});
- // It is invalid to ref the other values of BN after inference transform.
- NodeEntry undef = MakeNode("__undef__", "undef", {});
- return {out, undef, undef};
-}
-
-Graph SimplifyInference(nnvm::Graph src) {
- // Get attributes from the graph
- const IndexedGraph& idx = src.indexed_graph();
- const ShapeVector& shape_vec = src.GetAttr<ShapeVector>("shape");
- auto transform = [&](uint32_t nid, const NodePtr& n, std::vector<NodeEntry>* ret) {
- if (n->is_variable()) return false;
- static const Op* bn_op = Op::Get("batch_norm");
- static const Op* dropout_op = Op::Get("dropout");
- if (n->op() == bn_op) {
- *ret = BatchNormToInferUnpack(
- n->attrs,
- n->inputs[0],
- n->inputs[1],
- n->inputs[2],
- n->inputs[3],
- n->inputs[4],
- shape_vec[idx.entry_id(nid, 0)],
- shape_vec[idx.entry_id(nid, 1)]);
- return true;
- } else if (n->op() == dropout_op) {
- NodeEntry undef = MakeNode("__undef__", "undef", {});
- *ret = {n->inputs[0], undef};
- return true;
- } else {
- return false;
- }
- };
- return GraphTransform(src, transform);
-}
-
-NNVM_REGISTER_PASS(SimplifyInference)
-.set_body(SimplifyInference)
-.set_change_graph(true);
-
-} // namespace compiler
-} // namespace nnvm
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
#include <nnvm/pass.h>
#include <nnvm/graph_attr_types.h>
#include <nnvm/op_attr_types.h>
-#include <nnvm/top/tensor.h>
#include <memory>
#include "graph_algorithm.h"
namespace nnvm {
namespace pass {
namespace {
- using namespace nnvm::top;
+
// Return bytes of data flag.
static int GetDTypeSize(int type_flag) {
switch (type_flag) {
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file elemwise_op_common.h
- * \brief Common operator utilities
- */
-#ifndef NNVM_TOP_ELEMWISE_OP_COMMON_H_
-#define NNVM_TOP_ELEMWISE_OP_COMMON_H_
-
-#include <nnvm/layout.h>
-#include <nnvm/top/nn.h>
-#include <string>
-#include <vector>
-#include <utility>
-#include <functional>
-#include "op_common.h"
-
-namespace nnvm {
-namespace top {
-
-template<typename AttrType, bool (*is_none)(const AttrType&),
- bool (*assign)(AttrType*, const AttrType&), bool reverse_infer,
- std::string (*attr_string)(const AttrType&),
- int n_in = -1, int n_out = -1>
-inline bool ElemwiseAttr(const nnvm::NodeAttrs& attrs,
- std::vector<AttrType> *in_attrs,
- std::vector<AttrType> *out_attrs,
- const AttrType& none) {
- AttrType dattr = none;
- size_t in_size = in_attrs->size();
- size_t out_size = out_attrs->size();
- if (n_in != -1)
- in_size = static_cast<size_t>(n_in);
- if (n_out != -1)
- out_size = static_cast<size_t>(n_out);
-
- auto deduce = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
- for (size_t i = 0; i < size; ++i) {
- CHECK(assign(&dattr, (*vec)[i]))
- << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
- << name << ": " << "expected " << attr_string(dattr)
- << ", got " << attr_string((*vec)[i]);
- }
- };
- deduce(in_attrs, in_size, "input");
- if (reverse_infer) deduce(out_attrs, out_size, "output");
-
- auto write = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
- for (size_t i = 0; i < size; ++i) {
- CHECK(assign(&(*vec)[i], dattr))
- << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
- << name << ": " << "expected " << attr_string(dattr)
- << ", got " << attr_string((*vec)[i]);
- }
- };
- write(in_attrs, in_size, "input");
- write(out_attrs, out_size, "output");
-
- if (is_none(dattr)) return false;
- return true;
-}
-
-template<int n_in, int n_out>
-inline bool ElemwiseShape(const NodeAttrs& attrs,
- std::vector<TShape> *in_attrs,
- std::vector<TShape> *out_attrs) {
- if (n_in != -1) {
- CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in)) << " in operator " << attrs.name;
- }
- if (n_out != -1) {
- CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out)) << " in operator " << attrs.name;
- }
- return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
- attrs, in_attrs, out_attrs, TShape());
-}
-
-template<int n_in, int n_out>
-inline bool ElemwiseType(const NodeAttrs& attrs,
- std::vector<int> *in_attrs,
- std::vector<int> *out_attrs) {
- if (n_in != -1) {
- CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in)) << " in operator " << attrs.name;
- }
- if (n_out != -1) {
- CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out)) << " in operator " << attrs.name;
- }
- return ElemwiseAttr<int, type_is_none, type_assign, true, type_string>(
- attrs, in_attrs, out_attrs, -1);
-}
-
-inline bool ElementWiseReduceShape(const NodeAttrs& attrs,
- std::vector<TShape> *in_attrs,
- std::vector<TShape> *out_attrs) {
- CHECK_EQ(out_attrs->size(), 1);
- return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
- attrs, in_attrs, out_attrs, TShape());
-}
-
-inline bool ElementWiseReduceType(const NodeAttrs& attrs,
- std::vector<int> *in_attrs,
- std::vector<int> *out_attrs) {
- CHECK_EQ(out_attrs->size(), 1);
- return ElemwiseAttr<int, type_is_none, type_assign, true, type_string>(
- attrs, in_attrs, out_attrs, -1);
-}
-
-template<int n_in, int n_out>
-inline bool ElemwiseFixedLayout(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts,
- const std::function<Layout(const Layout& in)>& finfer) {
- const size_t in_size = (n_in == -1) ? in_layouts->size() : static_cast<size_t>(n_in);
- const size_t out_size = (n_out == -1) ? out_layouts->size() : static_cast<size_t>(n_out);
-
- auto deduce = [&](Layout *target, const std::vector<Layout> *vec,
- size_t size, const char *name) {
- for (size_t i = 0; i < size; ++i) {
- if (vec->at(i).defined()) {
- if (!target->defined()) {
- *target = vec->at(i);
- }
- CHECK_EQ(*target, vec->at(i))
- << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
- << name << ": " << "expected " << *target
- << ", got " << vec->at(i);
- }
- }
- };
-
- Layout in, last_in, out;
- deduce(&in, in_layouts, in_size, "input");
- deduce(&last_in, last_in_layouts, in_size, "input (last infer pass)");
- deduce(&out, out_layouts, out_size, "output");
-
- if (!last_in.defined()) {
- last_in = in;
- } else {
- // else we copy in_layout produced by last infer pass to in_layout,
- // and let LayoutTransform pass
- // to insert an layout_transform node to fix the input layout.
- in = last_in;
- }
-
- out = finfer(in);
-
- auto write = [](std::vector<Layout> *vec, Layout& value, size_t size) {
- for (size_t i = 0; i < size; ++i) {
- vec->at(i) = value;
- }
- };
- if (in.defined()) write(in_layouts, in, in_size);
- if (out.defined()) write(out_layouts, out, out_size);
-
- return true;
-}
-
-/*! \brief Fix the input layout as the previous inferred (if any) and copy to output */
-template<int n_in, int n_out>
-inline bool ElemwiseFixedLayoutCopyToOut(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- return ElemwiseFixedLayout<n_in, n_out>(
- attrs, in_layouts, last_in_layouts, out_layouts, [](const Layout& in) {
- return in;
- });
-}
-
-/*! \brief Fix the input layout as the previous inferred (if any) and do not define output */
-template<int n_in, int n_out>
-inline bool ElemwiseFixedLayoutUnknownOut(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- return ElemwiseFixedLayout<n_in, n_out>(
- attrs, in_layouts, last_in_layouts, out_layouts, [](const Layout& in) {
- return Layout::Undef();
- });
-}
-
-/*! \brief take arbitrary input layout and copy to output */
-template<int n_in, int n_out>
-inline bool ElemwiseArbitraryLayout(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- const size_t in_size = (n_in == -1) ? in_layouts->size() : static_cast<size_t>(n_in);
- const size_t out_size = (n_out == -1) ? out_layouts->size() : static_cast<size_t>(n_out);
-
- Layout in;
- for (size_t i = 0; i < in_size; ++i) {
- if (!in.defined()) in = in_layouts->at(i);
- CHECK_EQ(in, in_layouts->at(i))
- << "Incompatible attr in node " << attrs.name << " at " << i
- << "-th input: expected " << in
- << ", got " << in_layouts->at(i);
- }
-
- if (in.defined()) {
- for (size_t i = 0; i < out_size; ++i) {
- out_layouts->at(i) = in;
- }
- }
-
- return true;
-}
-
-/*!
- * \brief try to convert right layout to left layout if they are different.
- * if the converting fails, it will use the last inferred layouts.
- */
-inline bool ElemwiseBinaryKeepLeftLayout(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- CHECK_EQ(in_layouts->size(), 2U);
- CHECK_EQ(last_in_layouts->size(), 2U);
- CHECK_EQ(out_layouts->size(), 1U);
-
- const Layout& lhs_last = (*last_in_layouts)[0];
- const Layout& rhs_last = (*last_in_layouts)[1];
- CHECK((lhs_last.defined() && rhs_last.defined()) ||
- (!lhs_last.defined() && !rhs_last.defined()));
-
- const Layout& lhs = (*in_layouts)[0];
- const Layout& rhs = (*in_layouts)[1];
-
- if (!lhs.defined() && !rhs.defined()) {
- CHECK(!lhs_last.defined() && !rhs_last.defined())
- << "Lost input layouts in node " << attrs.name
- << ": last inferred lhs=" << lhs_last << ", rhs=" << rhs_last;
- return true;
- } else if (!lhs.defined()) {
- CHECK(!lhs_last.defined() && !rhs_last.defined());
- in_layouts->at(0) = rhs;
- out_layouts->at(0) = rhs;
- return true;
- } else if (!rhs.defined()) {
- CHECK(!lhs_last.defined() && !rhs_last.defined());
- in_layouts->at(1) = lhs;
- out_layouts->at(0) = lhs;
- return true;
- }
-
- if (lhs == rhs) {
- // for same layout, we can always do binary calculation
- // and pass the layout to next layer
- out_layouts->at(0) = lhs;
- return true;
- }
-
- if (rhs.convertible(lhs)) {
- in_layouts->at(1) = lhs;
- out_layouts->at(0) = lhs;
- } else {
- CHECK(lhs_last.defined() && rhs_last.defined())
- << "Incompatible input layouts in node " << attrs.name
- << ". lhs: " << lhs << ", rhs: " << rhs;
- CHECK(lhs_last == rhs_last);
- in_layouts->at(0) = lhs_last;
- in_layouts->at(1) = rhs_last;
- out_layouts->at(0) = lhs_last;
- }
-
- return true;
-}
-
-#define NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \
- NNVM_REGISTER_OP(name) \
- .set_num_inputs(1) \
- .set_num_outputs(1) \
- .set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>) \
- .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) \
- .set_attr<FCorrectLayout>("FCorrectLayout", \
- ElemwiseArbitraryLayout<1, 1>) \
- .set_attr<FInplaceOption>("FInplaceOption", \
- [](const NodeAttrs& attrs){ \
- return std::vector<std::pair<int, int> >{{0, 0}}; \
- }) \
- .add_argument("data", "Tensor", "The input tensor.")
-
-
-#define NNVM_REGISTER_INIT_OP(name) \
- NNVM_REGISTER_OP(name) \
- .set_num_inputs(0) \
- .set_num_outputs(1)
-
-
-#define NNVM_REGISTER_INIT_LIKE_OP(name) \
- NNVM_REGISTER_ELEMWISE_UNARY_OP(name) \
- .set_attr<FGradient>("FGradient", MakeZeroGradNodes) \
- .add_argument("data", "Symbol", "The input")
-
-
-#define NNVM_REGISTER_ELEMWISE_BINARY_OP(name) \
- NNVM_REGISTER_OP(name) \
- .set_num_inputs(2) \
- .set_num_outputs(1) \
- .set_attr<FInferShape>("FInferShape", ElemwiseShape<2, 1>) \
- .set_attr<FInferType>("FInferType", ElemwiseType<2, 1>) \
- .set_attr<FCorrectLayout>("FCorrectLayout", \
- ElemwiseBinaryKeepLeftLayout) \
- .set_attr<FInplaceOption>("FInplaceOption", \
- [](const NodeAttrs& attrs) { \
- return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}}; \
- }) \
- .add_argument("lhs", "Tensor", "first input") \
- .add_argument("rhs", "Tensor", "second input")
-
-
-#define NNVM_REGISTER_ELEMWISE_REDUCE_OP(name) \
- NNVM_REGISTER_OP(name) \
- .set_num_inputs([](const NodeAttrs& attrs) { \
- return static_cast<uint32_t>( \
- dmlc::get<ElementWiseReduceParam>(attrs.parsed).num_args); \
- }) \
- .set_attr_parser(ParamParser<ElementWiseReduceParam>) \
- .set_attr<FGetAttrDict>("FGetAttrDict", \
- ParamGetAttrDict<ElementWiseReduceParam>) \
- .set_attr<nnvm::FInferShape>("FInferShape", \
- ElementWiseReduceShape) \
- .set_attr<FCorrectLayout>("FCorrectLayout", \
- ElemwiseFixedLayoutCopyToOut<-1, 1>) \
- .set_attr<nnvm::FInferType>("FInferType", ElementWiseReduceType) \
- .add_argument("args", "Symbol[]", "Positional input arguments")
-
-
-#define NNVM_REGISTER_INDICATOR_OP(name) \
- NNVM_REGISTER_OP(name) \
- .set_num_outputs(1) \
- .set_attr<FInferType>( \
- "FInferType", [](const NodeAttrs& attrs, \
- std::vector<int>* in_attrs, \
- std::vector<int>* out_attrs) { \
- CHECK_EQ(out_attrs->size(), 1U); \
- NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, \
- static_cast<int>(kFloat32)); \
- return true; \
- }) \
- .set_attr<FCorrectLayout>("FCorrectLayout", \
- ElemwiseFixedLayoutUnknownOut<1, 1>) \
- .set_attr<FGradient>( \
- "FGradient", [](const NodePtr& n, \
- const std::vector<NodeEntry>& ograds) { \
- return MakeZeroGradNodes(n, ograds); \
- })
-
-
-} // namespace top
-} // namespace nnvm
-#endif // NNVM_TOP_ELEMWISE_OP_COMMON_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file resize.cc
- * \brief Property def of resize operators.
- */
-#include <tvm/operation.h>
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/layout.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include "../nn/nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/elemwise.h"
-#include "topi/transform.h"
-#include "topi/image/resize.h"
-#include "resize.h"
-
-namespace nnvm {
-namespace top {
-using tvm::Expr;
-using tvm::Array;
-using tvm::Tensor;
-using nnvm::compiler::FTVMCompute;
-
-DMLC_REGISTER_PARAMETER(ResizeParam);
-
-inline bool ResizeInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- static const Layout kNCHW("NCHW");
- const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
- CHECK_EQ(in_shape->size(), 1U);
- CHECK_EQ(out_shape->size(), 1U);
- TShape dshape = (*in_shape)[0];
- if (dshape.ndim() == 0) return false;
- dshape = ConvertLayout(dshape, param.layout, kNCHW);
-
- TShape oshape = dshape;
- oshape[2] = param.size[0];
- oshape[3] = param.size[1];
-
- oshape = ConvertLayout(oshape, kNCHW, param.layout);
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-
- return true;
-}
-
-inline bool ResizeLayout(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
- CHECK_EQ(in_layouts->size(), 1U);
- CHECK_EQ(out_layouts->size(), 1U);
- const Layout layout(param.layout);
- NNVM_ASSIGN_LAYOUT(*in_layouts, 0, layout);
- NNVM_ASSIGN_LAYOUT(*out_layouts, 0, layout);
- return true;
-}
-
-NNVM_REGISTER_OP(resize)
-.describe(R"(Perform resize to input array with nearest neighbour or bilinear interpolation.
-
-- **data**: data is 4D array of shape
- (batch_size, channels, in_height, in_width) for NCHW
- (batch_size, in_height, in_width, channels) for NHWC
-
-- **out**: Output is 4D array of shape
- for layout NCHW
- (batch_size, channels, size[0], size[1])
-
- for layout NHWC
- (batch_size, size[0], size[1], channels)
-
-)" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(ResizeParam::__FIELDS__())
-.set_attr_parser(ParamParser<ResizeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ResizeParam>)
-.set_attr<FInferShape>("FInferShape", ResizeInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ResizeLayout)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
- Array<Expr> oshape;
- if (param.layout == "NCHW") {
- oshape.push_back(out_info[0]->shape[2]);
- oshape.push_back(out_info[0]->shape[3]);
- } else {
- oshape.push_back(out_info[0]->shape[1]);
- oshape.push_back(out_info[0]->shape[2]);
- }
-
- return Array<Tensor>{ topi::image::resize(inputs[0], oshape, param.layout,
- param.align_corners, param.method)};
-})
-.set_support_level(2);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file resize.h
- */
-#ifndef NNVM_TOP_IMAGE_RESIZE_H_
-#define NNVM_TOP_IMAGE_RESIZE_H_
-
-#include <string>
-#include <vector>
-#include <utility>
-#include <iostream>
-#include <sstream>
-
-namespace nnvm {
-namespace top {
-
-struct ResizeParam : public dmlc::Parameter<ResizeParam> {
- TShape size;
- std::string layout;
- std::string method;
- bool align_corners;
-
- DMLC_DECLARE_PARAMETER(ResizeParam) {
- DMLC_DECLARE_FIELD(size)
- .describe("Output size");
- DMLC_DECLARE_FIELD(layout)
- .set_default("NCHW")
- .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc."
- "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
- "dimensions respectively. Resize is applied on the 'H' and"
- "'W' dimensions.");
- DMLC_DECLARE_FIELD(method)
- .set_default("BILINEAR")
- .describe("Specify the mode to use for scaling."
- "NEAREST_NEIGHBOR - Nearest Neighbor"
- "BILINEAR - Bilinear Interpolation");
- DMLC_DECLARE_FIELD(align_corners)
- .set_default(false)
- .describe("Should be true to preserve the values at the corner pixels");
- }
-};
-
-} // namespace top
-} // namespace nnvm
-#endif // NNVM_TOP_IMAGE_RESIZE_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file convolution.cc
- * \brief Convolution operators
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/layout.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include <tvm/tensor.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <tvm/operation.h>
-#include "nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/nn.h"
-
-
-using tvm::Tensor;
-using tvm::Array;
-using nnvm::compiler::FTVMCompute;
-
-namespace nnvm {
-namespace top {
-
-// conv2d
-DMLC_REGISTER_PARAMETER(Conv2DParam);
-
-inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- static const Layout kNCHW("NCHW");
- static const Layout kOIHW("OIHW");
-
- const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
-
- const Layout in_layout(param.layout);
- const Layout kernel_layout(param.kernel_layout);
- CHECK(in_layout.convertible(kNCHW))
- << "Conv only support input layouts that are convertible from NCHW."
- << " But got " << in_layout;
- CHECK(kernel_layout.convertible(kOIHW))
- << "Conv only support kernel layouts that are convertible from OIHW."
- << " But got "<< kernel_layout;
-
- Layout out_layout(param.out_layout);
- if (!out_layout.defined()) out_layout = in_layout;
- CHECK(out_layout.convertible(kNCHW))
- << "Conv only support output layouts that are convertible from NCHW."
- << " But got " << out_layout;
-
- if (param.use_bias) {
- CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
- } else {
- CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
- }
- CHECK_EQ(out_shape->size(), 1U);
-
- TShape dshape = in_shape->at(0);
- if (dshape.ndim() == 0) return false;
- dshape = ConvertLayout(dshape, in_layout, kNCHW);
-
- CHECK_EQ(dshape.ndim(), 4U) << "Input data should be 4D";
- CHECK_EQ(param.kernel_size.ndim(), 2U);
- CHECK_EQ(param.strides.ndim(), 2U)
- << "incorrect stride size: " << param.strides;
- CHECK_EQ(param.dilation.ndim(), 2U)
- << "incorrect dilate size: " << param.dilation;
- CHECK_EQ(dshape[1] % param.groups, 0U)
- << "input channels must divide group size";
- CHECK_EQ(param.channels % param.groups, 0U)
- << "output channels must divide group size";
-
- TShape wshape({param.channels,
- dshape[1] / param.groups,
- param.kernel_size[0],
- param.kernel_size[1]});
-
- wshape = ConvertLayout(wshape, kOIHW, kernel_layout);
-
- if (in_shape->at(Conv2DParam::kWeight).ndim() == 0) {
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kWeight, wshape);
- }
- if (param.use_bias) {
- static const Layout default_bias_layout("C");
- TShape bias_shape({param.channels});
- auto oc_block = out_layout.subsizeof('C');
- if (oc_block > 0) {
- size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0;
- bias_shape = ConvertLayout(bias_shape, default_bias_layout,
- default_bias_layout.split('C', split_axis, oc_block));
- }
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kBias, bias_shape);
- }
- // dilation
- dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0];
- dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1];
- TShape oshape({dshape[0], param.channels, 0, 0});
- if (dshape[2] != 0) {
- oshape[2] = (dshape[2] + param.padding[0] * 2 - dilated_ksize_y) / param.strides[0] + 1;
- }
- if (dshape[3] != 0) {
- oshape[3] = (dshape[3] + param.padding[1] * 2 - dilated_ksize_x) / param.strides[1] + 1;
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, ConvertLayout(oshape, kNCHW, out_layout));
- // Perform incomplete shape inference. Fill in the missing values in data shape.
- // 1) We can always fill in the batch_size.
- // 2) We can back-calculate the input height/width if the corresponding stride is 1.
- oshape = ConvertLayout((*out_shape)[0], out_layout, kNCHW);
- dshape[0] = oshape[0];
- if (oshape[2] && param.strides[0] == 1) {
- dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param.padding[0];
- }
- if (oshape[3] && param.strides[1] == 1) {
- dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param.padding[1];
- }
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kData,
- ConvertLayout(dshape, kNCHW, in_layout));
- // Check whether the kernel sizes are valid
- if (dshape[2] != 0) {
- CHECK_LE(dilated_ksize_y, dshape[2] + 2 * param.padding[0])
- << "kernel size exceed input";
- }
- if (dshape[3] != 0) {
- CHECK_LE(dilated_ksize_x, dshape[3] + 2 * param.padding[1])
- << "kernel size exceed input";
- }
- return true;
-}
-
-template<class Param>
-inline bool WinogradConv2DInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- static const Layout kNCHW("NCHW");
- static const Layout kOIHW("OIHW");
-
- const Param& param = nnvm::get<Param>(attrs.parsed);
-
- const Layout in_layout(param.layout);
- const Layout kernel_layout(param.kernel_layout);
- CHECK(in_layout.convertible(kNCHW))
- << "Conv only support input layouts that are convertible from NCHW."
- << " But got " << in_layout;
- CHECK(kernel_layout.convertible(kOIHW))
- << "Conv only support kernel layouts that are convertible from OIHW."
- << " But got "<< kernel_layout;
-
- Layout out_layout(param.out_layout);
- if (!out_layout.defined()) out_layout = in_layout;
- CHECK(out_layout.convertible(kNCHW))
- << "Conv only support output layouts that are convertible from NCHW."
- << " But got " << out_layout;
-
- if (param.use_bias) {
- CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
- } else {
- CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
- }
- CHECK_EQ(out_shape->size(), 1U);
-
- TShape dshape = in_shape->at(0);
- if (dshape.ndim() == 0) return false;
- dshape = ConvertLayout(dshape, in_layout, kNCHW);
-
- CHECK_EQ(dshape.ndim(), 4U) << "Input data should be 4D";
- CHECK_EQ(param.kernel_size.ndim(), 2U);
- CHECK_EQ(param.strides.ndim(), 2U)
- << "incorrect stride size: " << param.strides;
- CHECK_EQ(param.dilation.ndim(), 2U)
- << "incorrect dilate size: " << param.dilation;
- CHECK_EQ(dshape[1] % param.groups, 0U)
- << "input channels must divide group size";
- CHECK_EQ(param.channels % param.groups, 0U)
- << "output channels must divide group size";
-
- // NOTE: Do not check weight shape here!
- // Different backend requires different layout to compute
- // the batch gemm stage in winograd efficiently, but we want to
- // make this NNVM symbol work for all backends.
- // So we accept all weight shapes, and assume the TOPI developers
- // can handle this correctly in alter_op_layout.
-
- if (param.use_bias) {
- static const Layout default_bias_layout("C");
- TShape bias_shape({param.channels});
- auto oc_block = out_layout.subsizeof('C');
- if (oc_block > 0) {
- size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0;
- bias_shape = ConvertLayout(bias_shape, default_bias_layout,
- default_bias_layout.split('C', split_axis, oc_block));
- }
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, WinogradConv2DParam::kBias, bias_shape);
- }
- // dilation
- dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0];
- dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1];
- TShape oshape({dshape[0], param.channels, 0, 0});
- if (dshape[2] != 0) {
- oshape[2] = (dshape[2] + param.padding[0] * 2 - dilated_ksize_y) / param.strides[0] + 1;
- }
- if (dshape[3] != 0) {
- oshape[3] = (dshape[3] + param.padding[1] * 2 - dilated_ksize_x) / param.strides[1] + 1;
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, ConvertLayout(oshape, kNCHW, out_layout));
- // Perform incomplete shape inference. Fill in the missing values in data shape.
- // 1) We can always fill in the batch_size.
- // 2) We can back-calculate the input height/width if the corresponding stride is 1.
- oshape = ConvertLayout((*out_shape)[0], out_layout, kNCHW);
- dshape[0] = oshape[0];
- if (oshape[2] && param.strides[0] == 1) {
- dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param.padding[0];
- }
- if (oshape[3] && param.strides[1] == 1) {
- dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param.padding[1];
- }
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, WinogradConv2DParam::kData,
- ConvertLayout(dshape, kNCHW, in_layout));
- // Check whether the kernel sizes are valid
- if (dshape[2] != 0) {
- CHECK_LE(dilated_ksize_y, dshape[2] + 2 * param.padding[0])
- << "kernel size exceed input";
- }
- if (dshape[3] != 0) {
- CHECK_LE(dilated_ksize_x, dshape[3] + 2 * param.padding[1])
- << "kernel size exceed input";
- }
- return true;
-}
-
-template <typename PARAM>
-inline bool Conv2DInferType(const nnvm::NodeAttrs& attrs,
- std::vector<int>* in_type,
- std::vector<int>* out_type) {
- const PARAM& param = nnvm::get<PARAM>(attrs.parsed);
- if (param.use_bias) {
- CHECK_EQ(in_type->size(), 3U) << "Input:[data, weight, bias]";
- } else {
- CHECK_EQ(in_type->size(), 2U) << "Input:[data, weight]";
- }
- CHECK_EQ(out_type->size(), 1U);
- if (param.out_dtype != -1) {
- CHECK(!type_is_none((*in_type)[0]));
- for (size_t i = 1; i < in_type->size(); ++i) {
- NNVM_ASSIGN_INPUT_TYPE(attrs, *in_type, i, (*in_type)[0]);
- }
- NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_type, 0, param.out_dtype);
- } else {
- ElemwiseType<-1, 1>(attrs, in_type, out_type);
- }
- return true;
-}
-
-
-template<typename PARAM>
-inline bool Conv2DCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- const PARAM& param = nnvm::get<PARAM>(attrs.parsed);
-
- const Layout in_layout(param.layout);
- Layout out_layout(param.out_layout);
- if (!out_layout.defined()) out_layout = in_layout;
-
- const Layout kernel_layout(param.kernel_layout);
- if (param.use_bias) {
- CHECK_EQ(ilayouts->size(), 3U) << "Input:[data, weight, bias]";
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout);
- // automatically decide bias layout
- Layout bias_layout("C");
- auto oc_block = out_layout.subsizeof('C');
- if (oc_block > 0) {
- size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0;
- bias_layout = bias_layout.split('C', split_axis, oc_block);
- }
- NNVM_ASSIGN_LAYOUT(*ilayouts, 2, bias_layout);
- } else {
- CHECK_EQ(ilayouts->size(), 2U) << "Input:[data, weight]";
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout);
- }
-
- CHECK_EQ(olayouts->size(), 1U);
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, out_layout);
-
- return true;
-}
-
-NNVM_REGISTER_OP(conv2d)
-.describe(R"code(2D convolution layer (e.g. spatial convolution over images).
-
-This layer creates a convolution kernel that is convolved
-with the layer input to produce a tensor of
-outputs. If `use_bias` is True,
-a bias vector is created and added to the outputs.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
- (batch_size, in_channels, height, width) if `layout` is `NCHW`.
-- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
-- **bias**: (channels,)
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
- (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_argument("weight", "4D Tensor", "Weight matrix.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(Conv2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<Conv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<Conv2DParam>)
-.set_attr<FInferShape>("FInferShape", Conv2DInferShape)
-.set_attr<FInferType>("FInferType", Conv2DInferType<Conv2DParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DCorrectLayout<Conv2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<Conv2DParam>)
-.set_support_level(2)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- return MakeGradNode("_conv2d_grad", n,
- {ograds[0], n->inputs[Conv2DParam::kData],
- n->inputs[Conv2DParam::kWeight]},
- n->attrs.dict);
-});
-
-NNVM_REGISTER_OP(_contrib_conv2d_NCHWc)
-.describe(R"code(2D convolution layer (e.g. spatial convolution over images).
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "5D Tensor", "Packed input data.")
-.add_argument("weight", "6D Tensor", "Packed weight matrix.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(Conv2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<Conv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<Conv2DParam>)
-.set_attr<FInferShape>("FInferShape", Conv2DInferShape)
-.set_attr<FInferType>("FInferType", Conv2DInferType<Conv2DParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DCorrectLayout<Conv2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<Conv2DParam>)
-.set_support_level(2);
-
-NNVM_REGISTER_OP(_contrib_conv2d_winograd_weight_transform)
-.describe(R"code(Weight transformation of winograd fast convolution algorithm.
-Separate this into another nnvm symbol in order to enable Precompute Pass to compute the
-weight transformation in advance.
-
-- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
-)code" NNVM_ADD_FILELINE)
-.add_argument("weight", "4D Tensor", "Weight tensor.")
-.add_arguments(WinogradWeightTransformParam::__FIELDS__())
-.set_attr_parser(ParamParser<WinogradWeightTransformParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<WinogradWeightTransformParam>)
-.set_attr<FInferShape>("FInferShape", [](const nnvm::NodeAttrs& attrs,
- std::vector<TShape> *in_shape,
- std::vector<TShape> *out_shape) {
- const auto& param = nnvm::get<WinogradWeightTransformParam>(attrs.parsed);
- const TShape &wshape = (*in_shape)[0];
-
- CHECK_EQ(wshape.ndim(), 4) << "Weight should be a 4 dimensional tensor";
-
- TShape oshape({param.tile_size + wshape[2] - 1,
- param.tile_size + wshape[3] - 1,
- wshape[0],
- wshape[1]});
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
- })
-.set_attr<FCorrectLayout>("FCorrectLayot", [](const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- Layout layout("OIHW");
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, layout);
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, layout);
- return true;
-})
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(5);
-
-DMLC_REGISTER_PARAMETER(WinogradWeightTransformParam);
-
-NNVM_REGISTER_OP(_contrib_conv2d_winograd_without_weight_transform)
-.describe(R"code(Compute conv2d with winograd algorithm.
-
-- **data**: Input is 4D array of shape (batch_size, in_channels, height, width)
-- **weight**: Any shape
- We do not check shape for this input tensor.
-
-- **bias**: (channels,)
-- **out**: Output is 4D array of shape (batch_size, channels, out_height, out_width)
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_argument("weight", "Tensor", "Transformed weight tensor.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(WinogradConv2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<WinogradConv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<WinogradConv2DParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<WinogradConv2DParam>)
-.set_attr<FInferShape>("FInferShape", WinogradConv2DInferShape<WinogradConv2DParam>)
-.set_attr<FInferType>("FInferType", Conv2DInferType<WinogradConv2DParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DCorrectLayout<WinogradConv2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<WinogradConv2DParam>)
-.set_support_level(5);
-
-DMLC_REGISTER_PARAMETER(WinogradConv2DParam);
-
-
-inline bool Conv2DWinogradNNPACKWTInferType(const nnvm::NodeAttrs& attrs,
- std::vector<int>* in_type,
- std::vector<int>* out_type) {
- const WinogradNNPACKWeightTransformParam& param =
- nnvm::get<WinogradNNPACKWeightTransformParam>(attrs.parsed);
-
- CHECK_EQ(in_type->size(), 1U) << "Input:[weight]";
- CHECK_EQ(out_type->size(), 1U);
-
- if (param.out_dtype != -1) {
- NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_type, 0, param.out_dtype);
- } else {
- ElemwiseType<1, 1>(attrs, in_type, out_type);
- }
- return true;
-}
-
-NNVM_REGISTER_OP(_contrib_conv2d_winograd_nnpack_weight_transform)
-.describe(R"code(Weight transformation of winograd fast convolution algorithm.
-Separate this into another nnvm symbol in order to enable Precompute Pass to compute the
-weight transformation in advance.
-- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
-)code" NNVM_ADD_FILELINE)
-.add_argument("weight", "4D Tensor", "Weight tensor.")
-.add_arguments(WinogradNNPACKWeightTransformParam::__FIELDS__())
-.set_attr_parser(ParamParser<WinogradNNPACKWeightTransformParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<WinogradNNPACKWeightTransformParam>)
-.set_attr<FInferShape>("FInferShape", [](const nnvm::NodeAttrs& attrs,
- std::vector<TShape> *in_shape,
- std::vector<TShape> *out_shape) {
- const TShape &wshape = (*in_shape)[0];
- CHECK_EQ(wshape.ndim(), 4) << "Weight should be a 4 dimensional tensor";
- TShape oshape({wshape[0], wshape[1], 8, 8});
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-})
-.set_attr<FCorrectLayout>("FCorrectLayout", [](const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- Layout layout("OIHW");
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, layout);
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, layout);
- return true;
-})
-.set_attr<FInferType>("FInferType", Conv2DWinogradNNPACKWTInferType)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(5);
-
-DMLC_REGISTER_PARAMETER(WinogradNNPACKWeightTransformParam);
-
-NNVM_REGISTER_OP(_contrib_conv2d_winograd_nnpack_without_weight_transform)
-.describe(R"code(Compute conv2d with winograd nnpack.
-- **data**: Input is 4D array of shape (batch_size, in_channels, height, width)
-- **weight**: Any shape
- We do not check shape for this input tensor.
-- **bias**: (channels,)
-- **out**: Output is 4D array of shape (batch_size, channels, out_height, out_width)
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_argument("weight", "4D Tensor", "Transformed weight tensor.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(Conv2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<Conv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<Conv2DParam>)
-.set_attr<FInferShape>("FInferShape", WinogradConv2DInferShape<Conv2DParam>)
-.set_attr<FInferType>("FInferType", Conv2DInferType<Conv2DParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DCorrectLayout<Conv2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<Conv2DParam>)
-.set_support_level(5);
-
-
-NNVM_REGISTER_OP(_conv2d_grad)
- .describe(R"code(2D convolution grad.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("ograd", "4D Tensor", "Output grad.")
-.add_argument("data", "4D Tensor", "Input data of conv2d.")
-.add_argument("weight", "4D Tensor", "Input weight.")
-.set_num_inputs(3)
-.set_num_outputs(UseBiasNumInputs<Conv2DParam>)
-.set_attr<FListOutputNames>("FListOutputNames", UseBiasListInputNames<Conv2DParam>)
-.set_attr_parser(ParamParser<Conv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
-.set_attr<FInferShape>(
- "FInferShape", [](const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kData, in_attrs->at(1));
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kWeight, in_attrs->at(2));
- if (param.use_bias) {
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kBias, TShape({param.channels}));
- }
- return true;
-})
-.set_attr<FInferType>("FInferType", ElemwiseType<3, -1>)
-.set_attr<TIsBackward>("TIsBackward", true);
-
-
-DMLC_REGISTER_PARAMETER(Conv2DTransposeParam);
-
-inline bool Conv2DTransposeInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- static const Layout kNCHW("NCHW");
- static const Layout kOIHW("OIHW");
- const Conv2DTransposeParam& param = nnvm::get<Conv2DTransposeParam>(attrs.parsed);
- const Layout layout(param.layout);
- const Layout kernel_layout(param.kernel_layout);
- if (param.use_bias) {
- CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
- } else {
- CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
- }
- CHECK_EQ(out_shape->size(), 1U);
-
- const TShape& dshape = (*in_shape)[Conv2DTransposeParam::kData];
- if (dshape.ndim() == 0) return false;
- TShape dshape_nchw = ConvertLayout(dshape, layout, kNCHW);
-
- CHECK_EQ(dshape_nchw[1] % param.groups, 0U)
- << "input num_filter must divide group size";
- CHECK_EQ(param.channels % param.groups, 0U)
- << "output num_filter must divide group size";
- CHECK_EQ(param.kernel_size.ndim(), 2U)
- << "incorrect kernel size: " << param.kernel_size;
- CHECK_EQ(param.strides.ndim(), 2U)
- << "incorrect stride size: " << param.strides;
- CHECK_EQ(param.dilation.ndim(), 2U)
- << "incorrect dilate size: " << param.dilation;
-
- TShape wshape({dshape_nchw[1],
- param.channels / param.groups,
- param.kernel_size[0],
- param.kernel_size[1]});
- wshape = ConvertLayout(wshape, kOIHW, kernel_layout);
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DTransposeParam::kWeight, wshape);
-
- if (param.use_bias) {
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape,
- Conv2DTransposeParam::kBias,
- TShape({param.channels}));
- }
- // dilation
- dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0];
- dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1];
- // output shape.
- TShape oshape({dshape_nchw[0], param.channels, 0, 0});
- oshape[2] = (param.strides[0] * (dshape_nchw[2] - 1) + dilated_ksize_y -
- 2 * param.padding[0] + param.output_padding[0]);
-
- oshape[3] = (param.strides[1] * (dshape_nchw[3] - 1) + dilated_ksize_x -
- 2 * param.padding[1] + param.output_padding[1]);
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0,
- ConvertLayout(oshape, kNCHW, layout));
- return true;
-}
-
-inline bool Conv2DTransposeCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- const Conv2DTransposeParam& param = nnvm::get<Conv2DTransposeParam>(attrs.parsed);
-
- const Layout in_layout(param.layout);
-
- const Layout kernel_layout(param.kernel_layout);
- if (param.use_bias) {
- CHECK_EQ(ilayouts->size(), 3U) << "Input:[data, weight, bias]";
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 2, Layout("C"));
- } else {
- CHECK_EQ(ilayouts->size(), 2U) << "Input:[data, weight]";
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout);
- }
-
- CHECK_EQ(olayouts->size(), 1U);
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, in_layout);
-
- return true;
-}
-
-NNVM_REGISTER_OP(conv2d_transpose)
-.describe(R"code(Transposed 2D convolution layer (sometimes called Deconvolution).
-
-The need for transposed convolutions generally arises
-from the desire to use a transformation going in the opposite direction
-of a normal convolution, i.e., from something that has the shape of the
-output of some convolution to something that has the shape of its input
-while maintaining a connectivity pattern that is compatible with
-said convolution.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
- (batch_size, in_channels, height, width) if `layout` is `NCHW`.
-- **weight**: (in_channels, channels, kernel_size[0], kernel_size[1])
-- **bias**: (channels,)
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
-v (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
-
- out_height and out_width are calculated as::
- out_height = (height-1)*strides[0]-2*padding[0]+kernel_size[0]+output_padding[0]
- out_width = (width-1)*strides[1]-2*padding[1]+kernel_size[1]+output_padding[1]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_argument("weight", "4D Tensor", "Weight matrix.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(Conv2DTransposeParam::__FIELDS__())
-.set_attr_parser(ParamParser<Conv2DTransposeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DTransposeParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<Conv2DTransposeParam>)
-.set_attr<FInferShape>("FInferShape", Conv2DTransposeInferShape)
-.set_attr<FInferType>("FInferType", Conv2DInferType<Conv2DTransposeParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DTransposeCorrectLayout)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<Conv2DTransposeParam>)
-.set_support_level(2);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nn.cc
- * \brief Property def of nn operators.
- */
-#include <tvm/operation.h>
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/layout.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/nn/dense.h"
-#include "topi/nn.h"
-#include "topi/nn/softmax.h"
-
-namespace nnvm {
-namespace top {
-
-using tvm::Var;
-using tvm::Expr;
-using tvm::Tensor;
-using tvm::Array;
-using nnvm::compiler::FTVMCompute;
-
-// dense
-DMLC_REGISTER_PARAMETER(DenseParam);
-
-inline bool DenseInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- const DenseParam& param = nnvm::get<DenseParam>(attrs.parsed);
- if (param.use_bias) {
- CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
- } else {
- CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
- }
- CHECK_EQ(out_shape->size(), 1U);
- // reverse infer
- if ((*out_shape)[0].ndim() != 0) {
- TShape dshape = (*out_shape)[0];
- dshape[dshape.ndim() - 1] = 0;
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, DenseParam::kData, dshape);
- }
- dim_t num_inputs = 0;
- if ((*in_shape)[DenseParam::kData].ndim() != 0) {
- TShape oshape = (*in_shape)[DenseParam::kData];
- num_inputs = oshape[oshape.ndim() - 1];
- oshape[oshape.ndim() - 1] = param.units;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- }
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, DenseParam::kWeight,
- TShape({param.units, num_inputs}));
- if (param.use_bias) {
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, DenseParam::kBias, TShape({param.units}));
- }
- return true;
-}
-
-NNVM_REGISTER_OP(dense)
-.describe(R"code(Applies a linear transformation: :math:`Y = XW^T + b`.
-
-- **data**: `(x1, x2, ..., xn, input_dim)`
-- **weight**: `(units, input_dim)`
-- **bias**: `(units,)`
-- **out**: `(x1, x2, ..., xn, units)`
-
-The learnable parameters include both ``weight`` and ``bias``.
-
-If ``use_bias`` is set to be false, then the ``bias`` term is ignored.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "nD Tensor", "Input data.")
-.add_argument("weight", "2D Tensor", "Weight matrix.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(DenseParam::__FIELDS__())
-.set_attr_parser(ParamParser<DenseParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<DenseParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<DenseParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<DenseParam>)
-.set_attr<FInferShape>("FInferShape", DenseInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
-// leave weight & bias layout undefined
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutCopyToOut<1, 1>)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- const DenseParam& param = nnvm::get<DenseParam>(n->attrs.parsed);
-
- NodeEntry data_grad = MakeNode("matmul",
- n->attrs.name + "_data_grad",
- {ograds[0], n->inputs[DenseParam::kWeight]});
- NodeEntry w_grad_sub = MakeNode("matmul",
- n->attrs.name + "_weight_grad_sub0",
- {ograds[0], n->inputs[DenseParam::kData]},
- {{"transpose_a", "true"}});
- TShape w_reduce_axis = {0, -1};
- std::ostringstream w_oss; w_oss << w_reduce_axis;
- NodeEntry w_grad = MakeNode("sum", n->attrs.name + "_weight_grad",
- {w_grad_sub},
- {{"axis", w_oss.str()}, {"exclude", "true"}});
- std::vector<NodeEntry> grads = {data_grad, w_grad};
-
- if (param.use_bias) {
- TShape axis = {-1};
- std::ostringstream b_oss; b_oss << axis;
- grads.push_back(MakeNode("sum", n->attrs.name + "_bias_grad",
- {ograds[0]},
- {{"axis", b_oss.str()}, {"exclude", "true"}}));
- }
- return grads;
-})
-.set_support_level(1);
-
-// relu
-NNVM_REGISTER_ELEMWISE_UNARY_OP(relu)
-.describe(R"code(Computes rectified linear.
-
-.. math::
- max(input, 0)
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::relu(inputs[0], 0.0f) };
- })
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // y = relu(x)
- // grad = indicator(x > 0) * ograd
- NodeEntry sub0 = MakeNode("zeros_like", n->attrs.name + "_sub0",
- {n->inputs[0]});
- NodeEntry sub1 = MakeNode("greater", n->attrs.name + "_sub1",
- {n->inputs[0], sub0}, {{"exclude", "true"}});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad",
- {ograds[0], sub1})
- };
-})
-.set_support_level(1);
-
-// dropout
-DMLC_REGISTER_PARAMETER(DropoutParam);
-
-NNVM_REGISTER_OP(dropout)
-.describe(R"(Applies dropout operation to input array.
-
-- During training, each element of the input is set to zero with probability p.
- The whole array is rescaled by :math:`1/(1-p)` to keep the expected
- sum of the input unchanged.
-
-)" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input to which dropout will be applied")
-.add_arguments(DropoutParam::__FIELDS__())
-.set_attr_parser(ParamParser<DropoutParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<DropoutParam>)
-.set_num_inputs(1)
-.set_num_outputs(2)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 2>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 2>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_attr<FNumVisibleOutputs>("FNumVisibleOutputs", [](const NodeAttrs& attrs) {
- return 1;
- })
-.set_attr<FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"output", "mask"};
- })
-.set_support_level(1);
-
-// batchnorm
-DMLC_REGISTER_PARAMETER(BatchNormParam);
-
-inline bool BatchNormInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- const BatchNormParam& param = nnvm::get<BatchNormParam>(attrs.parsed);
- CHECK_EQ(in_shape->size(), 5U)
- << "Input:[data, gamma, beta, moving_mean, moving_var]";
- CHECK_EQ(out_shape->size(), 3U);
- const TShape &dshape = in_shape->at(0);
- if (dshape.ndim() == 0) return false;
- CHECK((size_t)param.axis < dshape.Size());
-
- TShape bshape({dshape[param.axis]});
- if (in_shape->at(1).ndim() == 0) in_shape->at(1) = bshape;
- if (in_shape->at(2).ndim() == 0) in_shape->at(2) = bshape;
- if (in_shape->at(3).ndim() == 0) in_shape->at(3) = bshape;
- if (in_shape->at(4).ndim() == 0) in_shape->at(4) = bshape;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, dshape);
- out_shape->at(1) = in_shape->at(3);
- out_shape->at(2) = in_shape->at(4);
- return true;
-}
-
-inline bool BatchNormCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- const BatchNormParam& param = nnvm::get<BatchNormParam>(attrs.parsed);
- CHECK_EQ(in_layouts->size(), 5U);
- CHECK_EQ(last_in_layouts->size(), 5U);
- CHECK_EQ(out_layouts->size(), 3U);
-
- Layout data_layout = in_layouts->at(0);
- const Layout& origin_data_layout = last_in_layouts->at(0);
- Layout param_layout("C");
- if (data_layout.defined()) {
- if (data_layout.indexof('C') != param.axis) {
- CHECK(origin_data_layout.defined())
- << "Channel in data layout " << data_layout
- << " is not at index " << param.axis;
- // convert it to the original one.
- data_layout = origin_data_layout;
- NNVM_ASSIGN_LAYOUT(*in_layouts, 0, origin_data_layout);
- } else if (data_layout.indexof('c') >= 0 &&
- static_cast<uint32_t>(data_layout.indexof('c')) != (data_layout.ndim()-1)) {
- CHECK(origin_data_layout.defined())
- << "sub-channel c in data layout " << data_layout
- << " does not at the final dimension";
- // convert it to the original one.
- data_layout = origin_data_layout;
- NNVM_ASSIGN_LAYOUT(*in_layouts, 0, origin_data_layout);
- } else {
- for (Layout::LayoutDim axis : data_layout) {
- if (Layout::is_subdim(axis) && axis != 'c') {
- CHECK(origin_data_layout.defined())
- << "sub-axis other than c appears in data layout " << data_layout;
- // convert it to the original one.
- data_layout = origin_data_layout;
- NNVM_ASSIGN_LAYOUT(*in_layouts, 0, origin_data_layout);
- break;
- }
- }
- }
-
- // decide the param layout
- if (data_layout.defined()) {
- auto channel_block = data_layout.subsizeof('C');
- if (channel_block > 0) {
- param_layout = param_layout.split('C', 1, channel_block);
- }
- }
- }
-
- NNVM_ASSIGN_LAYOUT(*in_layouts, 0, data_layout);
- NNVM_ASSIGN_LAYOUT(*in_layouts, 1, param_layout);
- NNVM_ASSIGN_LAYOUT(*in_layouts, 2, param_layout);
- NNVM_ASSIGN_LAYOUT(*in_layouts, 3, param_layout);
- NNVM_ASSIGN_LAYOUT(*in_layouts, 4, param_layout);
-
- NNVM_ASSIGN_LAYOUT(*out_layouts, 0, data_layout);
- NNVM_ASSIGN_LAYOUT(*out_layouts, 1, param_layout);
- NNVM_ASSIGN_LAYOUT(*out_layouts, 2, param_layout);
- return true;
-}
-
-NNVM_REGISTER_OP(batch_norm)
-.describe(R"(Batch normalization layer (Ioffe and Szegedy, 2014).
-Normalizes the input at each batch, i.e. applies a transformation
-that maintains the mean activation close to 0 and the activation
-standard deviation close to 1.
-
-.. math::
-
- data\_mean[i] = mean(data[:,i,:,...]) \\
- data\_var[i] = var(data[:,i,:,...])
-
-Then compute the normalized output, which has the same shape as input, as following:
-
-.. math::
-
- out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i]
-
-Both *mean* and *var* returns a scalar by treating the input as a vector.
-
-Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta`` have shape *(k,)*.
-
-Besides the inputs and the outputs, this operator accepts two auxiliary
-states, ``moving_mean`` and ``moving_var``, which are *k*-length
-vectors. They are global statistics for the whole dataset, which are updated
-by::
-
- moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
- moving_var = moving_var * momentum + data_var * (1 - momentum)
-
-The parameter ``axis`` specifies which axis of the input shape denotes
-the 'channel' (separately normalized groups). The default is 1. Specifying -1 sets the channel
-axis to be the last item in the input shape.
-
-.. note::
- This operator can be optimized away for inference.
-)" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input to which dropout will be applied")
-.add_argument("gamma", "Tensor", "The gamma scale factor")
-.add_argument("beta", "Tensor", "The beta offset factor")
-.add_argument("moving_mean", "Tensor", "running mean of input")
-.add_argument("moving_var", "Tensor", "running variance of input")
-.add_arguments(BatchNormParam::__FIELDS__())
-.set_attr_parser(ParamParser<BatchNormParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<BatchNormParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", BatchNormCorrectLayout)
-.set_num_inputs(5)
-.set_num_outputs(3)
-.set_attr<FInferShape>("FInferShape", BatchNormInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<5, 3>)
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"data", "gamma", "beta", "moving_mean", "moving_var"};
- })
-.set_attr<FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"output", "mean", "var"};
- })
-.set_attr<FNumVisibleOutputs>("FNumVisibleOutputs", [](const NodeAttrs& attrs) {
- return 1;
- })
-.set_attr<FMutateInputs>("FMutateInputs", [](const NodeAttrs& attrs) {
- return std::vector<uint32_t>{3, 4};
- })
-.set_support_level(1);
-
-// softmax
-DMLC_REGISTER_PARAMETER(SoftmaxParam);
-
-NNVM_REGISTER_OP(softmax)
-.describe(R"code(Computes softmax.
-
-.. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}
-
-.. note::
- This operator can be optimized away for inference.
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(SoftmaxParam::__FIELDS__())
-.set_attr_parser(ParamParser<SoftmaxParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<SoftmaxParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutCopyToOut<1, 1>)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
- return Array<Tensor>{ topi::nn::softmax(inputs[0], param.axis) };
- })
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // grad_x = grad_y dot jacobian of softmax
- //
- // jacobian of softmax
- // [-y1y1 + y1, -y1y2, ... ]
- // [ ... , -y2y2 + y2, ... ]
- // [ ... ... ]
- // [ ... ,-ynyn + yn]
- //
- // grad_x =
- // [-y1*(ograd1*y1 - ograd1 + ograd2*y2 + ...),
- // -y2*(ograd1*y1 - ograd2 + ograd2*y2 + ...),
- // ...
- // -yn*(ograd1*y1 - ogradn + ograd2*y2 + ...)]
-
- // grad_x = ograd elemwise_mul output
- // grad_x = sum(grad_x, keepdim, axis)
- // grad_x = grad_x broadcast_mul output
- // grad_x = neg grad_x
- // grad_x = grad_x + ograd elemwise_mul output
- const SoftmaxParam& param = nnvm::get<SoftmaxParam>(n->attrs.parsed);
- NodeEntry output = NodeEntry{n, 0, 0};
- NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub0", {ograds[0], output});
- NodeEntry sub1 = MakeNode("sum", n->attrs.name + "_grad_sub1", {sub0},
- {{"axis", std::to_string(param.axis)}, {"keepdims", "true"}});
- NodeEntry sub2 = MakeNode("broadcast_mul", n->attrs.name + "_grad_sub2", {sub1, output});
- return std::vector<NodeEntry> {
- MakeNode("elemwise_sub", n->attrs.name + "_grad", {sub0, sub2})
- };
-});
-
-// log_softmax
-NNVM_REGISTER_OP(log_softmax)
-.describe(R"code(Computes log softmax.
-
-.. math:: \text{log_softmax}(x)_i = \log \frac{exp(x_i)}{\sum_j exp(x_j)}
-
-.. note::
- This operator can be optimized away for inference.
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(SoftmaxParam::__FIELDS__())
-.set_attr_parser(ParamParser<SoftmaxParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<SoftmaxParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutCopyToOut<1, 1>)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
- CHECK(param.axis == -1 || param.axis == static_cast<int32_t>(inputs[0].ndim()) - 1)
- << "log_softmax currently only works on last dimension";
- return Array<Tensor>{ topi::nn::log_softmax(inputs[0]) };
- })
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // grad_x = grad_y dot jacobian of logsoftmax
- //
- // jacobian of logsoftmax
- // [-y1 + 1, -y2, ... ]
- // [ ... , -y2 + 1, ... ]
- // [ ... ... ]
- // [ ... ,-yn + 1]
- //
- // grad_x =
- // [ograd1 - exp(y1)*(ograd1 + ... + ogradn),
- // ograd2 - exp(y2)*(ograd1 + ... + ogradn),
- // ...
- // ogradn - exp(yn)*(ograd1 + ... + ogradn)]
-
- // grad_x = sum(ograd, keepdim, axis)
- // sigma = exp(output)
- // grad_x = grad_x elemwise_mul sigma
- // grad_x = neg grad_x
- // grad_x = grad_x + ograd
- const SoftmaxParam& param = nnvm::get<SoftmaxParam>(n->attrs.parsed);
- NodeEntry output = NodeEntry{n, 0, 0};
- NodeEntry sub0 = MakeNode("sum", n->attrs.name + "_grad_sub0", {ograds[0]},
- {{"axis", std::to_string(param.axis)}, {"keepdims", "true"}});
- NodeEntry sub1 = MakeNode("exp", n->attrs.name + "_grad_sub1", {output});
- NodeEntry sub2 = MakeNode("broadcast_mul", n->attrs.name + "_grad_sub2", {sub0, sub1});
- return std::vector<NodeEntry> {
- MakeNode("elemwise_sub", n->attrs.name + "_grad", {ograds[0], sub2})
- };
-})
-.set_support_level(1);
-
-// leaky_relu
-DMLC_REGISTER_PARAMETER(LeakyReLUParam);
-
-NNVM_REGISTER_OP(leaky_relu)
-.describe(R"code(Leaky version of a Rectified Linear Unit.
-
-`y = x > 0 ? x : alpha * x`
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(LeakyReLUParam::__FIELDS__())
-.set_attr_parser(ParamParser<LeakyReLUParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<LeakyReLUParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const LeakyReLUParam& param = nnvm::get<LeakyReLUParam>(attrs.parsed);
- return Array<Tensor>{ topi::leaky_relu(inputs[0], param.alpha) };
- })
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // y = leak_relu(x)
- // grad = indicator(x > 0) + alpha * indicator(x < 0)
- const LeakyReLUParam& param = nnvm::get<LeakyReLUParam>(n->attrs.parsed);
- NodeEntry zero = MakeNode("zeros_like", n->attrs.name + "_grad_zero",
- {n->inputs[0]});
- NodeEntry sub0 = MakeNode("greater", n->attrs.name + "_pos_grad",
- {n->inputs[0], zero});
- NodeEntry sub1 = MakeNode("less", n->attrs.name + "_neg_grad",
- {n->inputs[0], zero});
- NodeEntry sub2 = MakeNode("__mul_scalar__", n->attrs.name + "_neg_mul_2",
- {sub1},
- {{"scalar", std::to_string(param.alpha)}});
- NodeEntry sub3 = MakeNode("elemwise_add", n->attrs.name + "_sub3", {sub0, sub2});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad", {ograds[0], sub3})
- };
-})
-.set_support_level(1);
-
-// prelu
-DMLC_REGISTER_PARAMETER(PReLUParam);
-
-inline bool PReluInferShape(const nnvm::NodeAttrs &attrs,
- std::vector<TShape> *in_shape,
- std::vector<TShape> *out_shape) {
- const PReLUParam ¶m = nnvm::get<PReLUParam>(attrs.parsed);
- TShape dshape = in_shape->at(0);
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 0, dshape);
-
- // The case of parametric relu
- CHECK(size_t(param.axis) < dshape.Size())
- << "Wrong axis (" << param.axis << ")value.";
-
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 1, TShape({dshape[param.axis]}));
-
- TShape oshape(dshape);
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-}
-
-inline bool PReluCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- const PReLUParam& param = nnvm::get<PReLUParam>(attrs.parsed);
- CHECK_EQ(in_layouts->size(), 2U);
- CHECK_EQ(last_in_layouts->size(), 2U);
- CHECK_EQ(out_layouts->size(), 1U);
-
- const Layout& data_layout = last_in_layouts->at(0).defined() ?
- last_in_layouts->at(0) : in_layouts->at(0);
- if (data_layout.defined()) {
- CHECK(data_layout.indexof('C') == param.axis && !data_layout.contains('c'))
- << "Channel in data layout " << data_layout
- << " is not at index " << param.axis;
- }
-
- NNVM_ASSIGN_LAYOUT(*in_layouts, 0, data_layout);
- NNVM_ASSIGN_LAYOUT(*in_layouts, 1, Layout("C"));
- NNVM_ASSIGN_LAYOUT(*out_layouts, 0, data_layout);
-
- return true;
-}
-
-NNVM_REGISTER_OP(prelu)
-.describe(R"code(Parametric version of a Rectified Linear Unit.
-It accepts two arguments: an input ``x`` and a channelwise slope ``alpha``
-and computes the output as :math:`PReLU(x) y = x > 0 ? x : alpha * x`,
-where :math:`*` is an channelwise multiplication for each sample in the
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_argument("alpha", "Tensor", "Input channelwise alpha.")
-.add_arguments(PReLUParam::__FIELDS__())
-.set_attr_parser(ParamParser<PReLUParam>)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", PReluInferShape)
-.set_attr<FCorrectLayout>("FCorrectLayout", PReluCorrectLayout)
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"data", "alpha"};
- })
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const PReLUParam& param = nnvm::get<PReLUParam>(attrs.parsed);
- return Array<Tensor>{ topi::prelu(inputs[0], inputs[1], param.axis)};
- })
-.set_support_level(4);
-
-DMLC_REGISTER_PARAMETER(PadParam);
-
-inline bool PadInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- const PadParam& param = nnvm::get<PadParam>(attrs.parsed);
- CHECK_EQ(in_shape->size(), 1U);
- CHECK_EQ(out_shape->size(), 1U);
- TShape dshape = (*in_shape)[0];
- if (dshape.ndim() == 0) return false;
- CHECK_EQ(param.pad_width.ndim(), dshape.ndim());
- TShape oshape = dshape;
- for (uint32_t i = 0; i < dshape.ndim(); i++) {
- CHECK_EQ(param.pad_width[i].ndim(), 2U);
- int pad_before = param.pad_width[i][0];
- int pad_after = param.pad_width[i][1];
- oshape[i] = dshape[i] + pad_before + pad_after;
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-}
-
-NNVM_REGISTER_OP(pad)
-.describe(R"code(Pad for n-D tensor.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "n-D Tensor", "Input data.")
-.add_arguments(PadParam::__FIELDS__())
-.set_attr_parser(ParamParser<PadParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<PadParam>)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_attr<FInferShape>("FInferShape", PadInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutCopyToOut<1, 1>)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const PadParam& param = nnvm::get<PadParam>(attrs.parsed);
- auto pad_width = param.pad_width;
- CHECK(pad_width.ndim() == inputs[0]->shape.size() &&
- pad_width[0].ndim() == 2)
- << "Illegal pad_width";
- Array<tvm::Expr> pad_before;
- for (size_t i = 0; i < pad_width.ndim(); ++i) {
- pad_before.push_back(tvm::make_const(tvm::DataType::Int(32), pad_width[i][0]));
- }
- Array<tvm::Expr> pad_after;
- for (size_t i = 0; i < pad_width.ndim(); ++i) {
- pad_after.push_back(tvm::make_const(tvm::DataType::Int(32), pad_width[i][1]));
- }
- return Array<Tensor>{ topi::pad(inputs[0], pad_before, pad_after,
- tvm::make_const(inputs[0]->dtype, param.pad_value)) };
-})
-.set_support_level(1);
-
-// layout transformer
-DMLC_REGISTER_PARAMETER(LayoutTransformParam);
-
-inline bool LayoutTransformInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
- CHECK_EQ(out_attrs->size(), 1U);
- const LayoutTransformParam& param = nnvm::get<LayoutTransformParam>(attrs.parsed);
- const TShape &dshape = (*in_attrs)[0];
- if (dshape.ndim() == 0) return false;
- const TShape &oshape = ConvertLayout(dshape,
- Layout(param.src_layout),
- Layout(param.dst_layout));
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
- return true;
-}
-
-NNVM_REGISTER_OP(__layout_transform__)
-.describe(R"code(Transform the input data layout.
-
-For transforming from NCHW to N16cHWC, the `__layout_transform__` operator reshapes
-the input array by output[n, c, h, w, C] = data[n, C*16+c, h, w]
-
-)code" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(LayoutTransformParam::__FIELDS__())
-.set_attr_parser(ParamParser<LayoutTransformParam>)
-.set_attr<FInferShape>("FInferShape", LayoutTransformInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>(
- "FCorrectLayout", [](const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- const LayoutTransformParam& param = nnvm::get<LayoutTransformParam>(attrs.parsed);
- CHECK_EQ(ilayouts->size(), 1U);
- CHECK_EQ(olayouts->size(), 1U);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, Layout(param.src_layout));
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, Layout(param.dst_layout));
- return true;
-})
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& outputs) {
- const LayoutTransformParam& param = nnvm::get<LayoutTransformParam>(attrs.parsed);
- return Array<Tensor>{
- topi::layout_transform(inputs[0], param.src_layout, param.dst_layout)
- };
-})
-.set_support_level(1);
-
-DMLC_REGISTER_PARAMETER(LRNParam);
-
-inline bool LRNInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- TShape dshape = (*in_shape)[0];
- TShape oshape = dshape;
-
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-}
-
-NNVM_REGISTER_OP(lrn)
-.describe(R"code(LRN layer)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.set_attr_parser(ParamParser<LRNParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<LRNParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", LRNInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_support_level(1);
-
-DMLC_REGISTER_PARAMETER(L2NormalizeParam);
-
-inline bool L2NormalizeInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- TShape dshape = (*in_shape)[0];
- TShape oshape = dshape;
-
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-}
-
-NNVM_REGISTER_OP(l2_normalize)
-.describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.set_attr_parser(ParamParser<L2NormalizeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<L2NormalizeParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", L2NormalizeInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_support_level(1);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nn_common.h
- * \brief Common utilities for nn ops.
- */
-#ifndef NNVM_TOP_NN_NN_COMMON_H_
-#define NNVM_TOP_NN_NN_COMMON_H_
-
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <nnvm/layout.h>
-#include <nnvm/top/nn.h>
-#include <string>
-#include <vector>
-#include <utility>
-#include <algorithm>
-
-namespace nnvm {
-namespace top {
-
-template<typename ParamType>
-inline uint32_t UseBiasNumInputs(const NodeAttrs& attrs) {
- const ParamType& param = get<ParamType>(attrs.parsed);
- return param.use_bias ? 3 : 2;
-}
-
-template<typename ParamType>
-inline std::vector<std::string> UseBiasListInputNames(const NodeAttrs& attrs) {
- const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
- if (param.use_bias) {
- return {"data", "weight", "bias"};
- } else {
- return {"data", "weight"};
- }
-}
-
-/*!
- * \brief Convert shape in src_layout to shape in dst_layout
- * \param src original shape
- * \param src_layout layout of original shape
- * \param dst_layout target layout
- * \return shape in target layout
- */
-inline TShape ConvertLayout(TShape src, const Layout& src_layout, const Layout& dst_layout) {
- if (src_layout == dst_layout) {
- return src;
- } else if (!src_layout.defined()) {
- LOG(FATAL) << "cannot convert undefined layout to " << dst_layout;
- } else if (!dst_layout.defined()) {
- LOG(FATAL) << "cannot convert " << src_layout << " to undefined layout";
- }
-
- CHECK(src_layout.convertible(dst_layout)) << "cannot convert from "
- << src_layout << " to " << dst_layout;
-
- TShape dst(dst_layout.ndim());
- for (size_t i = 0; i < src_layout.ndim(); ++i) {
- Layout::LayoutDim src_dim = src_layout[i];
- if (Layout::is_superdim(src_dim)) {
- int dst_major_pos = dst_layout.indexof(Layout::to_superdim(src_dim));
- int dst_minor_pos = dst_layout.indexof(Layout::to_subdim(src_dim));
- int src_minor_pos = src_layout.indexof(Layout::to_subdim(src_dim));
- int src_factor = src_layout.subsizeof(src_dim);
- int dst_factor = dst_layout.subsizeof(src_dim);
-
- uint32_t src_dim_size = src[i];
- if (src_minor_pos >= 0) {
- CHECK_EQ(src_factor, src[src_minor_pos]) << "src shape " << src
- << " does not agree with layout " << src_layout;
- src_dim_size *= src_factor;
- }
-
- dst[dst_major_pos] = src_dim_size;
- if (dst_minor_pos >= 0) {
- CHECK_GT(dst_factor, 0);
- CHECK_LE(dst_factor, src_dim_size) << "Converting " << src
- << " from " << src_layout
- << " to " << dst_layout
- << ": cannot split dimension size of "
- << src_dim_size << " by " << dst_factor;
- dst[dst_major_pos] /= dst_factor;
- dst[dst_minor_pos] = dst_factor;
- }
- }
- }
- return dst;
-}
-
-} // namespace top
-} // namespace nnvm
-
-#endif // NNVM_TOP_NN_NN_COMMON_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-/*!
- * \file pooling.cc
- * \brief Property def of pooling operators.
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/nn.h>
-#include "nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/nn/pooling.h"
-
-namespace nnvm {
-namespace top {
-using namespace tvm;
-using namespace nnvm::compiler;
-
-DMLC_REGISTER_PARAMETER(MaxPool2DParam);
-
-template <typename T>
-inline bool Pool2DInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- const T& param = nnvm::get<T>(attrs.parsed);
- CHECK_EQ(in_shape->size(), 1U);
- CHECK_EQ(out_shape->size(), 1U);
-
- TShape dshape = (*in_shape)[0];
- if (dshape.ndim() == 0) return false;
-
- CHECK_GE(dshape.ndim(), 2U)
- << "Pool2D only support input >= 2-D: input must have height and width";
-
- Layout layout(param.layout);
- CHECK(layout.contains('H') && layout.contains('W') &&
- !layout.contains('h') && !layout.contains('w'))
- << "Invalid layout " << layout
- << ". Pool2D layout must have H and W, which cannot be split";
-
- const auto hidx = layout.indexof('H');
- const auto widx = layout.indexof('W');
-
- dim_t pad_h, pad_w;
- if (param.padding.ndim() == 1) {
- pad_h = param.padding[0] * 2;
- pad_w = param.padding[0] * 2;
- } else if (param.padding.ndim() == 2) {
- // (top, left)
- pad_h = param.padding[0] * 2;
- pad_w = param.padding[1] * 2;
- } else if (param.padding.ndim() == 4) {
- // (top, left, bottom, right)
- pad_h = param.padding[0] + param.padding[2];
- pad_w = param.padding[1] + param.padding[3];
- } else {
- return false;
- }
-
- TShape oshape = dshape;
- CHECK(param.pool_size[0] <= dshape[hidx] + pad_h)
- << "pool size (" << param.pool_size[0] << ") exceeds input (" << dshape[hidx]
- << " padded to " << (dshape[hidx] + pad_h) << ")";
- CHECK(param.pool_size[1] <= dshape[widx] + pad_w)
- << "pool size (" << param.pool_size[1] << ") exceeds input (" << dshape[widx]
- << " padded to " << (dshape[widx] + pad_w) << ")";
-
- if (!param.ceil_mode) {
- oshape[hidx] = ((dshape[hidx] + pad_h - param.pool_size[0]) /
- param.strides[0]) + 1;
- oshape[widx] = ((dshape[widx] + pad_w - param.pool_size[1]) /
- param.strides[1]) + 1;
- } else {
- oshape[hidx] = ((dshape[hidx] + pad_h - param.pool_size[0] +
- param.strides[0] - 1) / param.strides[0]) + 1;
- oshape[widx] = ((dshape[widx] + pad_w - param.pool_size[1] +
- param.strides[1] - 1) / param.strides[1]) + 1;
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-}
-
-template <typename T>
-inline bool Pool2DCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- const T ¶m = nnvm::get<T>(attrs.parsed);
- CHECK_EQ(ilayouts->size(), 1);
- CHECK_EQ(last_ilayouts->size(), 1);
- CHECK_EQ(olayouts->size(), 1);
-
- Layout input = (*ilayouts)[0];
- const Layout layout(param.layout);
-
- if (input.defined()) {
- CHECK(input.convertible(layout)) << "Invalid input layout " << input;
- if (input.indexof('W') != layout.indexof('W') ||
- input.indexof('H') != layout.indexof('H') ||
- input.contains('w') || input.contains('h')) {
- // as long as the index doesn't change for width and height
- // pool2d can keep the input layout.
- input = layout;
- }
- } else {
- input = layout;
- }
-
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, input);
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, input);
-
- return true;
-}
-
-NNVM_REGISTER_OP(max_pool2d)
-.describe(R"code(Max pooling operation for one dimensional data.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
- (batch_size, channels, height, width) if `layout` is `NCHW`.
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
- (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
- out_height and out_width are calculated as::
-
- out_height = floor((height+padding[0]+padding[2]-pool_size[0])/strides[0])+1
- out_width = floor((width+padding[1]+padding[3]-pool_size[1])/strides[1])+1
-
- where padding will be an expanded array based on number of values passed as::
- one int : all sides same padding used.
- two int : bottom, right use same as top and left.
- four int: padding width in the order of (top, left, bottom, right).
-
- When `ceil_mode` is `True`, ceil will be used instead of floor in this
- equation.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(MaxPool2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<MaxPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MaxPool2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_attr<FInferShape>("FInferShape", Pool2DInferShape<MaxPool2DParam>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Pool2DCorrectLayout<MaxPool2DParam>)
-.set_attr<FTVMCompute>("FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const MaxPool2DParam& param = nnvm::get<MaxPool2DParam>(attrs.parsed);
- auto pool_size = ShapeToArray(param.pool_size);
- auto strides = ShapeToArray(param.strides);
- auto padding = ShapeToArray(param.padding);
- auto ceil_mode = param.ceil_mode;
-
- Layout layout(param.layout);
- CHECK(layout.convertible(Layout("NCHW")))
- << "max_pool2d currently only supports layouts that are convertible from NCHW";
- CHECK_EQ(layout.indexof('h'), -1) << "max_pool2d does not support input split on height";
- CHECK_EQ(layout.indexof('w'), -1) << "max_pool2d does not support input split on width";
-
- CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
- << "Pool2D only support 4-D input (e.g., NCHW)"
- << " or 5-D input (last dimension is a split of channel)";
-
- if (param.padding.ndim() == 1) {
- padding.push_back(padding[0]);
- padding.push_back(padding[0]);
- padding.push_back(padding[0]);
- } else if (param.padding.ndim() == 2) {
- padding.push_back(padding[0]);
- padding.push_back(padding[1]);
- }
-
- return Array<Tensor>{
- topi::nn::pool(inputs[0], pool_size, strides, padding,
- topi::nn::kMaxPool, ceil_mode, layout.name())};
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- return MakeGradNode("_max_pool2d_grad", n,
- {ograds[0], n->inputs[0], NodeEntry{n, 0, 0}},
- n->attrs.dict);
-})
-.set_support_level(2);
-
-NNVM_REGISTER_OP(_max_pool2d_grad)
- .describe(R"code(Max pooling 2D grad.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("ograd", "4D Tensor", "Output grad.")
-.add_argument("input", "4D Tensor", "Input data of max_pool2d grad.")
-.add_argument("output", "4D Tensor", "Output data of max_pool2d grad.")
-.set_num_inputs(3)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<MaxPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MaxPool2DParam>)
-.set_attr<FInferShape>("FInferShape", AssignOutputAttr<TShape, 1, 0>)
-.set_attr<FInferType>("FInferType", ElemwiseType<3, 1>)
-.set_attr<TIsBackward>("TIsBackward", true);
-
-DMLC_REGISTER_PARAMETER(AvgPool2DParam);
-
-NNVM_REGISTER_OP(avg_pool2d)
-.describe(R"code(Average pooling operation for one dimensional data.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
- (batch_size, channels, height, width) if `layout` is `NCHW`.
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
- (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
- out_height and out_width are calculated as::
-
- out_height = floor((height+padding[0]+padding[2]-pool_size[0])/strides[0])+1
- out_width = floor((width+padding[1]+padding[3]-pool_size[1])/strides[1])+1
-
- where padding will be an expanded array based on number of values passed as::
- one int : all sides same padding used.
- two int : bottom, right use same as top and left.
- four int: padding width in the order of (top, left, bottom, right).
-
- When `ceil_mode` is `True`, ceil will be used instead of floor in this
- equation.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(AvgPool2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<AvgPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<AvgPool2DParam>)
-.set_attr<FInferShape>("FInferShape", Pool2DInferShape<AvgPool2DParam>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Pool2DCorrectLayout<AvgPool2DParam>)
-.set_attr<FTVMCompute>("FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const AvgPool2DParam& param = nnvm::get<AvgPool2DParam>(attrs.parsed);
- auto pool_size = ShapeToArray(param.pool_size);
- auto strides = ShapeToArray(param.strides);
- auto padding = ShapeToArray(param.padding);
- auto ceil_mode = param.ceil_mode;
- auto count_include_pad = param.count_include_pad;
-
- Layout layout(param.layout);
- CHECK(layout.convertible(Layout("NCHW")))
- << "avg_pool2d currently only supports layouts that are convertible from NCHW";
- CHECK_EQ(layout.indexof('h'), -1) << "avg_pool2d does not support input split on height";
- CHECK_EQ(layout.indexof('w'), -1) << "avg_pool2d does not support input split on width";
-
- CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
- << "Pool2D only support 4-D input (e.g., NCHW)"
- << " or 5-D input (last dimension is a split of channel)";
-
- if (param.padding.ndim() == 1) {
- padding.push_back(padding[0]);
- padding.push_back(padding[0]);
- padding.push_back(padding[0]);
- } else if (param.padding.ndim() == 2) {
- padding.push_back(padding[0]);
- padding.push_back(padding[1]);
- }
-
- return Array<Tensor>{
- topi::nn::pool(inputs[0], pool_size, strides, padding,
- topi::nn::kAvgPool, ceil_mode, layout.name(), count_include_pad)};
-})
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(2);
-
-
-DMLC_REGISTER_PARAMETER(GlobalPool2DParam);
-
-inline bool GlobalPool2DInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- static const Layout kNCHW("NCHW");
- const GlobalPool2DParam& param = nnvm::get<GlobalPool2DParam>(attrs.parsed);
- CHECK_EQ(in_shape->size(), 1U);
- CHECK_EQ(out_shape->size(), 1U);
-
- TShape dshape = (*in_shape)[0];
- if (dshape.ndim() == 0) return false;
-
- CHECK_GE(dshape.ndim(), 2U)
- << "Pool2D only support input >= 2-D: input must have height and width";
-
- Layout layout(param.layout);
- CHECK(layout.contains('H') && layout.contains('W') &&
- !layout.contains('h') && !layout.contains('w'))
- << "Invalid layout " << layout
- << ". Pool2D layout must have H and W, which cannot be split";
-
- const auto hidx = layout.indexof('H');
- const auto widx = layout.indexof('W');
-
- TShape oshape = dshape;
- oshape[hidx] = oshape[widx] = 1;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-}
-
-inline bool GlobalPool2DCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- const GlobalPool2DParam ¶m = nnvm::get<GlobalPool2DParam>(attrs.parsed);
- CHECK_EQ(ilayouts->size(), 1);
- CHECK_EQ(last_ilayouts->size(), 1);
- CHECK_EQ(olayouts->size(), 1);
-
- Layout input = (*ilayouts)[0];
- const Layout layout(param.layout);
-
- if (input.defined()) {
- CHECK(input.convertible(layout)) << "Invalid input layout " << input;
- if (input.indexof('W') != layout.indexof('W') ||
- input.indexof('H') != layout.indexof('H') ||
- input.contains('w') || input.contains('h')) {
- // as long as the index doesn't change for width and height
- // pool2d can keep the input layout.
- input = layout;
- }
- } else {
- input = layout;
- }
-
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, input);
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, input);
-
- return true;
-}
-
-NNVM_REGISTER_OP(global_max_pool2d)
-.describe(R"code(Global max pooling operation for 2D data.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
- (batch_size, channels, height, width) if `layout` is `NCHW`.
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
- (batch_size, channels, 1, 1) if `layout` is `NCHW`.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(GlobalPool2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<GlobalPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<GlobalPool2DParam>)
-.set_attr<FInferShape>("FInferShape", GlobalPool2DInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", GlobalPool2DCorrectLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const GlobalPool2DParam& param = nnvm::get<GlobalPool2DParam>(attrs.parsed);
- Layout layout(param.layout);
- CHECK(layout.convertible(Layout("NCHW")))
- << "global_max_pool2d currently only supports layouts that are convertible from NCHW";
- CHECK_EQ(layout.indexof('h'), -1)
- << "global_max_pool2d does not support input split on height";
- CHECK_EQ(layout.indexof('w'), -1)
- << "global_max_pool2d does not support input split on width";
-
- CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
- << "Pool2D only support 4-D input (e.g., NCHW)"
- << " or 5-D input (last dimension is a split of channel)";
-
- return Array<Tensor>{
- topi::nn::global_pool(inputs[0], topi::nn::kMaxPool, layout.name()) };
-})
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(2);
-
-
-NNVM_REGISTER_OP(global_avg_pool2d)
-.describe(R"code(Global average pooling operation for 2D data.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
- (batch_size, channels, height, width) if `layout` is `NCHW`.
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
- (batch_size, channels, 1, 1) if `layout` is `NCHW`.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(GlobalPool2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<GlobalPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<GlobalPool2DParam>)
-.set_attr<FInferShape>("FInferShape", GlobalPool2DInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", GlobalPool2DCorrectLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const GlobalPool2DParam& param = nnvm::get<GlobalPool2DParam>(attrs.parsed);
- Layout layout(param.layout);
- CHECK(layout.convertible(Layout("NCHW")))
- << "global_avg_pool2d currently only supports layouts that are convertible from NCHW";
- CHECK_EQ(layout.indexof('h'), -1)
- << "global_avg_pool2d does not support input split on height";
- CHECK_EQ(layout.indexof('w'), -1)
- << "global_avg_pool2d does not support input split on width";
-
- CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
- << "Pool2D only support 4-D input (e.g., NCHW)"
- << " or 5-D input (last dimension is a split of channel)";
-
- return Array<Tensor>{
- topi::nn::global_pool(inputs[0], topi::nn::kAvgPool, layout.name()) };
-})
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(2);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file upsampling.cc
- * \brief Property def of upsampling operators.
- */
-#include <tvm/operation.h>
-#include <tvm/expr.h>
-#include <nnvm/layout.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/elemwise.h"
-#include "topi/transform.h"
-#include "topi/nn/upsampling.h"
-
-namespace nnvm {
-namespace top {
-using tvm::Expr;
-using tvm::Array;
-using tvm::Tensor;
-using nnvm::compiler::FTVMCompute;
-
-DMLC_REGISTER_PARAMETER(UpSamplingParam);
-
-inline bool UpSamplingInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- static const Layout kNCHW("NCHW");
- const UpSamplingParam& param = nnvm::get<UpSamplingParam>(attrs.parsed);
- CHECK_EQ(in_shape->size(), 1U);
- CHECK_EQ(out_shape->size(), 1U);
- TShape dshape = (*in_shape)[0];
- if (dshape.ndim() == 0) return false;
-
- dshape = ConvertLayout(dshape, param.layout, kNCHW);
- TShape oshape = dshape;
- oshape[2] = oshape[2] * param.scale;
- oshape[3] = oshape[3] * param.scale;
- oshape = ConvertLayout(oshape, kNCHW, param.layout);
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-
- return true;
-}
-
-inline bool UpsamplingLayout(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- const UpSamplingParam& param = nnvm::get<UpSamplingParam>(attrs.parsed);
- CHECK_EQ(in_layouts->size(), 1U);
- CHECK_EQ(out_layouts->size(), 1U);
- const Layout layout(param.layout);
- NNVM_ASSIGN_LAYOUT(*in_layouts, 0, layout);
- NNVM_ASSIGN_LAYOUT(*out_layouts, 0, layout);
- return true;
-}
-
-NNVM_REGISTER_OP(upsampling)
-.describe(R"(Perform upsampling to input array with nearest neighbour or bilinear interpolation.
-
-- **data**: data is 4D array of shape
- (batch_size, channels, in_height, in_width) for NCHW
- (batch_size, in_height, in_width, channels) for NHWC
-
-- **out**: Output is 4D array of shape
- for layout NCHW
- (batch_size, channels, in_height*scale, in_width*scale)
-
- for layout NHWC
- (batch_size, in_height*scale, in_width*scale, channels)
-
-)" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(UpSamplingParam::__FIELDS__())
-.set_attr_parser(ParamParser<UpSamplingParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<UpSamplingParam>)
-.set_attr<FInferShape>("FInferShape", UpSamplingInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", UpsamplingLayout)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const UpSamplingParam& param = nnvm::get<UpSamplingParam>(attrs.parsed);
- Array<Expr> oshape;
- if (param.layout == "NCHW") {
- oshape.push_back(out_info[0]->shape[2]);
- oshape.push_back(out_info[0]->shape[3]);
- } else {
- oshape.push_back(out_info[0]->shape[1]);
- oshape.push_back(out_info[0]->shape[2]);
- }
-
- return Array<Tensor>{ topi::nn::upsampling(inputs[0], oshape, param.layout, param.method)};
-})
-.set_support_level(2);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file op_common.h
- * \brief Common operator utilities
- */
-#ifndef NNVM_TOP_OP_COMMON_H_
-#define NNVM_TOP_OP_COMMON_H_
-
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <nnvm/top/tensor.h>
-#include <string>
-#include <vector>
-#include <utility>
-#include <unordered_map>
-#include <unordered_set>
-
-namespace nnvm {
-namespace top {
-/*!
- * \brief Parse keyword arguments as PType arguments and save to parsed
- * \tparam PType the parameter type.
- * \param attrs The attributes.
- */
-template<typename PType>
-inline void ParamParser(nnvm::NodeAttrs* attrs) {
- PType param;
- try {
- param.Init(attrs->dict);
- } catch (const dmlc::ParamError& e) {
- std::ostringstream os;
- os << e.what();
- os << ", in operator " << attrs->op->name << "("
- << "name=\"" << attrs->name << "\"";
- for (const auto& k : attrs->dict) {
- os << ", " << k.first << "=\"" << k.second << "\"";
- }
- os << ")";
- throw dmlc::ParamError(os.str());
- }
- attrs->parsed = std::move(param);
-}
-
-/*!
- * \brief Parse keyword arguments as PType arguments and save to parsed
- * \tparam PType the arameter type.
- * \param attrs The attributes.
- */
-template<typename PType>
-inline std::unordered_map<std::string, std::string>
-ParamGetAttrDict(const nnvm::NodeAttrs& attrs) {
- std::unordered_map<std::string, std::string> dict = attrs.dict;
- nnvm::get<PType>(attrs.parsed).UpdateDict(&dict);
- return dict;
-}
-
-/*! \brief check if shape is empty or contains unkown (0) dim. */
-inline bool shape_is_none(const TShape& x) {
- return x.ndim() == 0 || x.Size() == 0;
-}
-
-/*! \brief check if type is none (-1) */
-inline bool type_is_none(const int& x) {
- return x == -1;
-}
-
-/*! \brief check if shape is scalar({1}). */
-inline bool shape_is_scalar(const TShape& x) {
- return x.ndim() == 1 && x.Size() == 1;
-}
-
-/*! \brief get string representation of shape */
-inline std::string shape_string(const TShape& x) {
- std::ostringstream os;
- os << x;
- return os.str();
-}
-
-/*! \brief get string representation of shape */
-inline std::string type_string(const int& x) {
- return std::to_string(x);
-}
-
-/*!
- * \brief Assign x to y. Checks for compatiblity when y is not empty.
- * Allow missing dim in both x and y (as 0).
- * \param y target shape.
- * \param x source shape.
- * \return whether x and y are compatible.
- */
-inline bool shape_assign(TShape *y, const TShape& x) {
- if (y->ndim() == 0) {
- *y = x;
- return true;
- } else if (y->ndim() != x.ndim()) {
- return x.ndim() == 0;
- } else {
- for (size_t i = 0; i < y->ndim(); ++i) {
- if ((*y)[i] == 0) {
- (*y)[i] = x[i];
- } else if ((*y)[i] != x[i] && x[i] != 0) {
- return false;
- }
- }
- return true;
- }
-}
-
-/*!
- * \brief Assign x to y. Checks for compatiblity when y is not -1.
- * \param y target type.
- * \param x source type.
- * \return whether x and y are compatible.
- */
-inline bool type_assign(int *y, const int& x) {
- if (*y == -1) {
- *y = x;
- return true;
- } else if (*y != x && x != -1) {
- return false;
- }
- return true;
-}
-
-template<typename AttrType>
-inline std::string attr_assign_error_msg(const NodeAttrs& attrs,
- int index, bool is_input,
- const AttrType& expected,
- const AttrType& actual,
- const char* attr_name) {
- static const auto& flist_inputs = Op::GetAttr<FListInputNames>("FListInputNames");
- static const auto& flist_outputs = Op::GetAttr<FListOutputNames>("FListOutputNames");
- const auto& flist = is_input ? flist_inputs : flist_outputs;
- std::string name;
- if (flist.count(attrs.op)) {
- name = flist[attrs.op](attrs)[index];
- } else {
- name = (is_input ? "data" : "output") + std::to_string(index);
- }
- std::ostringstream msg;
- msg << "Operator " << attrs.op->name << "(";
- for (const auto& kv : attrs.dict) msg << kv.first << "=" << kv.second << ", ";
- msg << "name=" << attrs.name << ") expects " << name << "\'s " << attr_name
- << " to be " << expected << ", but got " << actual << ".";
- return msg.str();
-}
-
-/*!
- * \brief macro assign shape to input if out is unknown otherwise check consistency
- * Use macro so we can see the error file more clearly
- * \param inputs the shape array to store the result
- * \param index the index of in the array
- * \param shape the inferred shape
- */
-#define NNVM_ASSIGN_INPUT_SHAPE(attrs, inputs, index, shape) \
- { \
- if (!shape_assign(&(inputs)[index], TShape(shape))) { \
- LOG(FATAL) << attr_assign_error_msg(attrs, index, true, shape, \
- (inputs)[index], "shape"); \
- } \
- }
-
-/*!
- * \brief macro assign shape to out if out is unknown otherwise check consistency
- * Use macro so we can see the error file more clearly
- * \param inputs the shape array to store the result
- * \param index the index of in the array
- * \param shape the inferred shape
- */
-#define NNVM_ASSIGN_OUTPUT_SHAPE(attrs, outputs, index, shape) \
- { \
- if (!shape_assign(&(outputs)[index], TShape(shape))) { \
- LOG(FATAL) << attr_assign_error_msg(attrs, index, false, shape, \
- (outputs)[index], "shape"); \
- } \
- }
-
-/*!
- * \brief macro assign type to out if out is unknown (-1) otherwise check consistency
- * Use macro so we can see the error file more clearly
- * \param inputs the type array to store the result
- * \param index the index of in the array
- * \param type the inferred type
- */
-#define NNVM_ASSIGN_INPUT_TYPE(attrs, inputs, index, type) \
- { \
- if (!type_assign(&(inputs)[index], type)) { \
- LOG(FATAL) << attr_assign_error_msg(attrs, index, true, type, \
- (inputs)[index], "type"); \
- } \
- }
-
-/*!
- * \brief macro assign type to out if out is unknown (-1) otherwise check consistency
- * Use macro so we can see the error file more clearly
- * \param inputs the type array to store the result
- * \param index the index of in the array
- * \param type the inferred type
- */
-#define NNVM_ASSIGN_OUTPUT_TYPE(attrs, outputs, index, type) \
- { \
- if (!type_assign(&(outputs)[index], type)) { \
- LOG(FATAL) << attr_assign_error_msg(attrs, index, false, type, \
- (outputs)[index], "type"); \
- } \
- }
-
-#define NNVM_ASSIGN_LAYOUT(outputs, index, layout) \
- { \
- if (layout.defined()) { \
- (outputs)[index] = layout; \
- } \
- }
-
-/*!
- * \brief macro assign rhs shape to lhs
- * Use macro so we can see the error file more clearly
- * \param lhs lhs shape
- * \param rhs rhs shape
- */
-#define SHAPE_ASSIGN(lhs, rhs) \
- if ((lhs).ndim() == 0) (lhs) = (rhs); \
- else \
- CHECK_EQ(lhs, rhs) << "shape inference inconsistent"; \
-
-/*!
- * \brief macro assign rhs type to lhs
- * Use macro so we can see the error file more clearly
- * \param lhs lhs type
- * \param rhs rhs type
- */
-#define DTYPE_ASSIGN(lhs, rhs) \
- if ((lhs) == -1) (lhs) = (rhs); \
- else \
- CHECK_EQ(lhs, rhs) << "type inference inconsistent"; \
-
-// simply return the shape as same
-inline bool SameShape(const NodeAttrs& attrs,
- std::vector<TShape> *ishape,
- std::vector<TShape> *oshape) {
- if (ishape->size() == 0 || (*ishape)[0].ndim() == 0) return false;
- for (TShape& pshape : *oshape) {
- pshape = (*ishape)[0];
- }
- for (TShape& pshape : *ishape) {
- pshape = (*ishape)[0];
- }
- return true;
-}
-
-// return shape from node attrs
-template<typename PType>
-inline bool ZeroShape(const NodeAttrs& attrs,
- std::vector<TShape> *ishape,
- std::vector<TShape> *oshape) {
- const TShape& ts = dmlc::get<PType>(attrs.parsed).shape;
- if (ts.ndim() != 0) {
- SHAPE_ASSIGN(oshape->at(0), ts);
- return true;
- } else {
- return false;
- }
-}
-
-// do not infer layout
-inline bool ZeroLayout(const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- return true;
-}
-
-// simply assign output shape or type from input
-template<typename AttrType, int in_index, int out_index>
-inline bool AssignOutputAttr(const NodeAttrs& attrs,
- std::vector<AttrType> *in_attrs,
- std::vector<AttrType> *out_attrs) {
- CHECK_LT(in_index, in_attrs->size());
- CHECK_LT(out_index, out_attrs->size());
- const TShape &dshape = in_attrs->at(in_index);
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, out_index, dshape);
- return true;
-}
-
-// return type from node attrs
-template<typename PType>
-inline bool ZeroType(const NodeAttrs& attrs,
- std::vector<int> *iattr,
- std::vector<int> *oattr) {
- int dtype = dmlc::get<PType>(attrs.parsed).dtype;
- DTYPE_ASSIGN(oattr->at(0), dtype);
- return true;
-}
-
-// Make zero grad node
-inline std::vector<NodeEntry> MakeZeroGradNodes(
- const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- std::vector<NodeEntry> ret;
- for (uint32_t i = 0; i < n->num_inputs(); ++i) {
- std::ostringstream os;
- ret.push_back(MakeNode("zeros_like", n->attrs.name + "_zero_grad",
- {n->inputs[i]}));
- }
- return ret;
-}
-
-// Helper to make gradient node
-inline std::vector<NodeEntry> MakeGradNode(
- const char* op_name,
- const NodePtr& n,
- std::vector<NodeEntry> inputs,
- std::unordered_map<std::string, std::string> attr = {{}}) {
- NodePtr p = Node::Create();
- p->attrs.op = nnvm::Op::Get(op_name);
- p->attrs.name = n->attrs.name + "_grad";
- p->inputs = std::move(inputs);
- p->attrs.dict = std::move(attr);
- if (p->attrs.op->attr_parser) {
- p->attrs.op->attr_parser(&p->attrs);
- }
- std::vector<NodeEntry> ret;
- for (uint32_t i = 0; i < p->num_outputs(); ++i) {
- ret.emplace_back(NodeEntry{p, i, 0});
- }
- return ret;
-}
-
-
-} // namespace top
-} // namespace nnvm
-
-#endif // NNVM_TOP_OP_COMMON_H_
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file broadcast.cc
- * \brief broadcast operator.
- */
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/tensor.h>
-#include <nnvm/top/nn.h>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/broadcast.h"
-#include "topi/elemwise.h"
-
-namespace nnvm {
-namespace top {
-using namespace tvm;
-using namespace nnvm::compiler;
-
-// broadcast_to
-DMLC_REGISTER_PARAMETER(BroadcastToParam);
-
-inline bool BroadcastToInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 1U);
- CHECK_EQ(out_attrs->size(), 1U);
- const TShape& ishape = (*in_attrs)[0];
- if (ishape.ndim() == 0) return false;
-
- const BroadcastToParam& param = nnvm::get<BroadcastToParam>(attrs.parsed);
- CHECK_EQ(ishape.ndim(), param.shape.ndim())
- << "Operand of shape " << ishape
- << " cannot be broadcasted to " << param.shape;
- TShape oshape = param.shape;
- for (dim_t i = 0; i < ishape.ndim(); ++i) {
- if (oshape[i] != 0) {
- CHECK(ishape[i] == oshape[i] || ishape[i] == 1)
- << "Array cannot be broadcasted from " <<
- ishape << " to " << param.shape;
- } else {
- oshape[i] = ishape[i];
- }
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
- return true;
-}
-
-NNVM_REGISTER_OP(broadcast_to)
-.describe(R"code(Broadcasts the input array to a new shape.
-
-Broadcasting is a mechanism that allows NDArrays to perform arithmetic operations
-with arrays of different shapes efficiently without creating multiple copies of arrays.
-Also see, `Broadcasting <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more explanation.
-
-Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
-`(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.
-
-For example::
-
- broadcast_to([[1,2,3]], shape=(2,3)) = [[ 1., 2., 3.],
- [ 1., 2., 3.]])
-
-The dimension which you do not want to change can also be kept as `0` which means copy the original value.
-So with `shape=(2,0)`, we will obtain the same result as in the above example.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(BroadcastToParam::__FIELDS__())
-.set_attr_parser(ParamParser<BroadcastToParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<BroadcastToParam>)
-.set_attr<FInferShape>("FInferShape", BroadcastToInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const BroadcastToParam& param = nnvm::get<BroadcastToParam>(attrs.parsed);
- auto shape = ShapeToArray(param.shape);
- return Array<Tensor>{ topi::broadcast_to(inputs[0], shape) };
- })
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_support_level(4);
-
-// binary broadcast op
-inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 2U);
- CHECK_EQ(out_attrs->size(), 1U);
- const TShape& lhs = (*in_attrs)[0];
- const TShape& rhs = (*in_attrs)[1];
-
- // avoid pre-mature shape inference.
- if (lhs.ndim() == 0 || rhs.ndim() == 0) return false;
-
- if (lhs == rhs) {
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *out_attrs, 0, lhs);
- return true;
- }
- TShape out(std::max(lhs.ndim(), rhs.ndim()));
- dim_t bl = out.ndim() - lhs.ndim();
- dim_t br = out.ndim() - rhs.ndim();
- for (dim_t i = 0; i < out.ndim(); ++i) {
- dim_t l = 1, r = 1;
- if (i >= bl) l = lhs[i - bl];
- if (i >= br) r = rhs[i - br];
- if (l != r) {
- if (l == 0 || r == 0) {
- out[i] = 0;
- } else {
- CHECK(l == 1 || r == 1)
- << "operands could not be broadcast together with shapes "
- << lhs << " " << rhs << ", l=" << l << ", r=" << r;
- out[i] = std::max(l, r);
- }
- } else {
- out[i] = l;
- }
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out);
- return true;
-}
-
-inline bool BinaryBroadcastCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- CHECK_EQ(ilayouts->size(), 2U);
- CHECK_EQ(olayouts->size(), 1U);
- Layout lhs = (*ilayouts)[0];
- Layout rhs = (*ilayouts)[1];
- Layout out(Layout::Undef());
-
- if (lhs.defined() && rhs.defined()) {
- if (lhs == rhs) {
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, lhs);
- return true;
- }
- // For example, NCHW <-> CHW, N16nCH16cW <-> HCW16c, etc, are broadcast-convertible
- // because as the definition, CHW can broadcast with NCHW.
- // For the second case, we can convert HCW16c to CH16cW then it can broadcast with N16nCH16cW.
- // But CNHW <-> CHW, NCHW16n <-> CHW are not,
- // because not matter how we adjust the layout of 'CHW',
- // we can never have an 'N' between 'C' and "HW".
- size_t l_start = 0, r_start = 0;
- size_t l = 0, r = 0;
- bool find_first_match = false;
- while (l < lhs.ndim() && r < rhs.ndim()) {
- if (!rhs.contains(Layout::to_superdim(lhs[l]))) {
- CHECK(!find_first_match) << lhs << " and " << rhs << " are not broadcast-convertible";
- l_start = ++l;
- } else if (!lhs.contains(Layout::to_superdim(rhs[r]))) {
- CHECK(!find_first_match) << lhs << " and " << rhs << " are not broadcast-convertible";
- r_start = ++r;
- } else {
- find_first_match = true;
- ++l; ++r;
- }
- }
- if (l_start > 0 && r_start > 0) {
- LOG(FATAL) << lhs << " and " << rhs << " are not broadcast-convertible";
- } else if (l_start > 0) {
- rhs = lhs.sublayout(l_start, lhs.ndim()-l_start);
- out = lhs;
- } else if (r_start > 0) {
- lhs = rhs.sublayout(r_start, rhs.ndim()-r_start);
- out = rhs;
- } else {
- // prior to keep left layout
- rhs = lhs;
- out = lhs;
- }
- } else if (lhs.defined()) {
- const Layout& last_lhs = last_ilayouts->at(0);
- if (last_lhs.defined()) {
- CHECK(lhs.convertible(last_lhs)) << "current lhs layout " << lhs
- << " cannot be converted to the original one " << last_lhs;
- lhs = last_lhs;
- // cannot decide output layout
- }
- } else if (rhs.defined()) {
- const Layout& last_rhs = last_ilayouts->at(1);
- if (last_rhs.defined()) {
- CHECK(rhs.convertible(last_rhs)) << "current rhs layout " << rhs
- << " cannot be converted to the original one " << last_rhs;
- rhs = last_rhs;
- // cannot decide output layout
- }
- }
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, lhs);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 1, rhs);
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, out);
- return true;
-}
-
-#define NNVM_REGISTER_BINARY_BROADCAST_OP(name, TOPIOp) \
- NNVM_REGISTER_OP(name) \
- .set_num_inputs(2) \
- .set_num_outputs(1) \
- .set_attr<FInferShape>("FInferShape", BinaryBroadcastShape) \
- .set_attr<FInferType>("FInferType", ElemwiseType<2, 1>) \
- .set_attr<FCorrectLayout>("FCorrectLayout", \
- BinaryBroadcastCorrectLayout) \
- .set_attr<FInplaceOption>("FInplaceOption", \
- [](const NodeAttrs& attrs) { \
- return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}}; \
- }) \
- .set_attr<FTVMCompute>( \
- "FTVMCompute", [](const NodeAttrs& attrs, \
- const Array<Tensor>& inputs, \
- const Array<Tensor>& out_info) { \
- return Array<Tensor>{ \
- topi::TOPIOp(inputs[0], inputs[1]) }; \
- }) \
- .add_argument("lhs", "Tensor", "first input") \
- .add_argument("rhs", "Tensor", "second input")
-
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_add, add)
-.add_alias("__add_symbol__")
-.describe(R"code(Returns element-wise sum of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 1., 1.],
- [ 1., 1., 1.]]
-
- y = [[ 0.],
- [ 1.]]
-
- broadcast_add(x, y) = [[ 1., 1., 1.],
- [ 2., 2., 2.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- return std::vector<NodeEntry>{
- MakeNode("collapse_sum", n->attrs.name + "_dlhs", { ograds[0], n->inputs[0] }),
- MakeNode("collapse_sum", n->attrs.name + "_drhs", { ograds[0], n->inputs[1] })
- };
-});
-
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_sub, subtract)
-.add_alias("__sub_symbol__")
-.describe(R"code(Returns element-wise difference of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 1., 1.],
- [ 1., 1., 1.]]
-
- y = [[ 0.],
- [ 1.]]
-
- broadcast_sub(x, y) = [[ 1., 1., 1.],
- [ 0., 0., 0.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- return std::vector<NodeEntry>{
- MakeNode("collapse_sum", n->attrs.name + "_dlhs", { ograds[0], n->inputs[0] }),
- MakeNode("collapse_sum", n->attrs.name + "_drhs", {
- MakeNode("negative", n->attrs.name + "_drhs_neg", {ograds[0]}),
- n->inputs[1]
- })
- };
-});
-
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_mul, multiply)
-.add_alias("__mul_symbol__")
-.describe(R"code(Returns element-wise product of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 1., 1.],
- [ 1., 1., 1.]]
-
- y = [[ 0.],
- [ 1.]]
-
- broadcast_mul(x, y) = [[ 0., 0., 0.],
- [ 1., 1., 1.]]
-)code" NNVM_ADD_FILELINE)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- NodeEntry dlhs = MakeNode("collapse_sum", n->attrs.name + "_dlhs_sum", {
- MakeNode("broadcast_mul", n->attrs.name + "_dlhs_mul",
- { n->inputs[1], ograds[0] }),
- n->inputs[0]
- });
- NodeEntry drhs = MakeNode("collapse_sum", n->attrs.name + "_drhs_sum", {
- MakeNode("broadcast_mul", n->attrs.name + "_drhs_mul",
- { n->inputs[0], ograds[0] }),
- n->inputs[1]
- });
- return std::vector<NodeEntry>{ dlhs, drhs };
-});
-
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_div, divide)
-.add_alias("__div_symbol__")
-.describe(R"code(Returns element-wise division of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 6., 6., 6.],
- [ 6., 6., 6.]]
-
- y = [[ 2.],
- [ 3.]]
-
- broadcast_div(x, y) = [[ 3., 3., 3.],
- [ 2., 2., 2.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- NodeEntry dlhs = MakeNode("collapse_sum", n->attrs.name + "_dlhs_sum", {
- MakeNode("broadcast_div", n->attrs.name + "_dlhs_div",
- { ograds[0], n->inputs[1] }),
- n->inputs[0]
- });
- NodeEntry dy = MakeNode("broadcast_div", n->attrs.name + "_drhs_div", {
- NodeEntry{n, 0, 0},
- MakeNode("negative", n->attrs.name + "_rhs_neg", {n->inputs[1]})
- });
- NodeEntry drhs = MakeNode("collapse_sum", n->attrs.name + "_drhs_sum", {
- MakeNode("broadcast_mul", n->attrs.name + "_drhs_mul", { dy, ograds[0] }),
- n->inputs[1]
- });
- return std::vector<NodeEntry>{ dlhs, drhs };
-});
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_mod, mod)
-.add_alias("__mod_symbol__")
-.describe(R"code(Returns element-wise mod of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 3.]]
-
- broadcast_mod(x, y) = [[ 1., 0., 1.],
- [ 1., 2., 0.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_max, maximum)
-.add_alias("__max_symbol__")
-.describe(R"code(Returns element-wise max of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 3.]]
-
- broadcast_max(x, y) = [[ 2., 2., 3.],
- [ 4., 5., 6.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_min, minimum)
-.add_alias("__min_symbol__")
-.describe(R"code(Returns element-wise minimum of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 3.]]
-
- broadcast_min(x, y) = [[ 1., 2., 2.],
- [ 3., 3., 3.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_pow, power)
-.add_alias("__pow_symbol__")
-.describe(R"code(Returns element-wise x^y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 1.],
- [ 2.]]
-
- broadcast_pow(x, y) = [[ 1., 2., 3. ],
- [ 16., 25., 36.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_left_shift, left_shift)
-.add_alias("__left_shift_symbol__")
-.describe(R"code(Returns element-wise x << y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 1.]]
-
- broadcast_left_shift(x, y) = [[ 4., 8., 12.],
- [ 8., 10., 12.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_right_shift, right_shift)
-.add_alias("__right_shift_symbol__")
-.describe(R"code(Returns element-wise x >> y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 4., 8., 12.],
- [ 8., 10., 12.]]
-
- y = [[ 2.],
- [ 1.]]
-
- broadcast_right_shift(x, y) = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_greater, greater)
-.add_alias("__greater_symbol__")
-.describe(R"code(Returns element-wise x > y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 3.]]
-
- broadcast_greater(x, y) = [[ 0., 0., 1.],
- [ 1., 1., 1.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::cast(topi::greater(inputs[0], inputs[1]), out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_less, less)
-.add_alias("__less_symbol__")
-.describe(R"code(Returns element-wise x < y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 3.]]
-
- broadcast_less(x, y) = [[ 1., 0., 0.],
- [ 0., 0., 0.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::cast(topi::less(inputs[0], inputs[1]), out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_equal, equal)
-.add_alias("__equal_symbol__")
-.describe(R"code(Returns element-wise x == y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 5.]]
-
- broadcast_equal(x, y) = [[ 0., 1., 0.],
- [ 0., 1., 0.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::cast(topi::equal(inputs[0], inputs[1]), out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_not_equal, not_equal)
-.add_alias("__not_equal_symbol__")
-.describe(R"code(Returns element-wise x != y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 4.]]
-
- broadcast_not_equal(x, y) = [[ 1., 0., 1.],
- [ 0., 1., 1.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::cast(topi::not_equal(inputs[0],
- inputs[1]),
- out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_greater_equal, greater_equal)
-.add_alias("__greater_equal_symbol__")
-.describe(R"code(Returns element-wise x >= y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 2.],
- [ 6.]]
-
- broadcast_greater_equal(x, y) = [[ 0., 1., 1.],
- [ 0., 0., 1.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::cast(topi::greater_equal(inputs[0],
- inputs[1]),
- out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_less_equal, less_equal)
-.add_alias("__less_equal_symbol__")
-.describe(R"code(Returns element-wise x <= y of the input arrays with broadcasting.
-
-Example::
-
- x = [[ 1., 2., 3.],
- [ 4., 5., 6.]]
-
- y = [[ 1.],
- [ 5.]]
-
- broadcast_less_equal(x, y) = [[ 1., 0., 0.],
- [ 1., 1., 0.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::cast(topi::less_equal(inputs[0],
- inputs[1]),
- out_info[0]->dtype) };
-}, 11);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file elemwise.cc
- * \brief Elemenwise operators
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/tensor.h>
-#include <cmath>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/broadcast.h"
-#include "topi/elemwise.h"
-#include "topi/tags.h"
-#include "../../compiler/compile_engine.h"
-
-namespace nnvm {
-namespace top {
-
-using namespace tvm;
-using namespace nnvm::compiler;
-
-// undefined op
-NNVM_REGISTER_ELEMWISE_UNARY_OP(__undef__)
-.describe(R"code(undefined op.
-
-Used to produce invalide node during optimization.
-
-)code" NNVM_ADD_FILELINE)
-.set_num_outputs(1)
-.set_num_inputs(0);
-
-// floor
-NNVM_REGISTER_ELEMWISE_UNARY_OP(floor)
-.describe(R"code(Take floor input array, computed element-wise.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::floor(inputs[0]) };
-});
-
-// ceil
-NNVM_REGISTER_ELEMWISE_UNARY_OP(ceil)
-.describe(R"code(Take ceil input array, computed element-wise.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::ceil(inputs[0]) };
-});
-
-// trunc
-NNVM_REGISTER_ELEMWISE_UNARY_OP(trunc)
-.describe(R"code(Take truncated value of the input, element-wise.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::trunc(inputs[0]) };
-});
-
-// round
-NNVM_REGISTER_ELEMWISE_UNARY_OP(round)
-.describe(R"code(Round elements of the input to nearest integer.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::round(inputs[0]) };
-});
-
-// abs
-NNVM_REGISTER_ELEMWISE_UNARY_OP(abs)
-.describe(R"code(Take absolute value of elements of the input.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::abs(inputs[0]) };
-});
-
-// sigmoid
-NNVM_REGISTER_ELEMWISE_UNARY_OP(sigmoid)
-.describe(R"code(Computes sigmoid.
-
-.. math::
- Y = 1 / (1 + exp(-X))
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::sigmoid(inputs[0]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // y = 1 / (1 + exp(-n0))
- // grad_0 = grad_y * y * (1 - y)
- NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
- {ograds[0], NodeEntry{n, 0, 0}});
- NodeEntry sub1 = MakeNode("__rsub_scalar__", n->attrs.name + "_grad_sub_1",
- {NodeEntry{n, 0, 0}}, {{"scalar", "1"}});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
- {sub0, sub1})
- };
-});
-
-// tanh
-NNVM_REGISTER_ELEMWISE_UNARY_OP(tanh)
-.describe(R"code(Computes hyperbolic tangent.
-
-.. math::
- Y = sinh(X) / cosh(X)
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::tanh(inputs[0]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // y = sinh(n0) / cosh(n0)
- // grad_0 = grad_y * (1 - y^2)
- NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
- {NodeEntry{n, 0, 0}, NodeEntry{n, 0, 0}});
- NodeEntry sub1 = MakeNode("__rsub_scalar__", n->attrs.name + "_grad_sub_1",
- {sub0}, {{"scalar", "1"}});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
- {ograds[0], sub1})
- };
-});
-
-// exp
-NNVM_REGISTER_ELEMWISE_UNARY_OP(exp)
-.describe(R"code(Returns the exp input array, computed element-wise.
-
-.. math::
- exp(x)
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::exp(inputs[0]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // y = exp(n0)
- // grad_0 = grad_y * y
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
- {ograds[0], NodeEntry{n, 0, 0}})
- };
-});
-
-// log
-NNVM_REGISTER_ELEMWISE_UNARY_OP(log)
-.describe(R"code(Returns the log input array, computed element-wise.
-
-.. math::
- log(x)
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::log(inputs[0]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // y = log(n0)
- // grad_0 = grad_y / n0
- return std::vector<NodeEntry>{
- MakeNode("elemwise_div", n->attrs.name + "_grad_0",
- {ograds[0], n->inputs[0]})
- };
-});
-
-// sqrt
-NNVM_REGISTER_ELEMWISE_UNARY_OP(sqrt)
-.describe(R"code(Returns the sqrt input array, computed element-wise.
-
-.. math::
- \sqrt(x)
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::sqrt(inputs[0]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // y = sqrt(n0)
- // grad_0 = grad_y / (2 * y)
- NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
- {NodeEntry{n, 0, 0}}, {{"scalar", "2"}});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_div", n->attrs.name + "_grad_0",
- {ograds[0], sub0})
- };
-});
-
-// binary ops
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_add)
-.describe(R"code(Element-wise add
-
-)code")
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::add(inputs[0], inputs[1]) };
- })
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = n0 + n1
- // grad_0 = grad_y
- // grad_1 = grad_y
- return std::vector<NodeEntry>{ MakeNode("copy", n->attrs.name + "_grad_0",
- {ograds[0]}),
- MakeNode("copy", n->attrs.name + "_grad_0",
- {ograds[0]}) };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_sub)
-.describe(R"code(Element-wise substraction
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::subtract(inputs[0], inputs[1]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = n0 - n1
- // grad_0 = grad_y
- // grad_1 = - grad_y
- return std::vector<NodeEntry>{
- ograds[0],
- MakeNode("negative", n->attrs.name + "_grad_1", {ograds[0]}),
- };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_mul)
-.describe(R"code(Element-wise multiplication
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::multiply(inputs[0], inputs[1]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = n0 * n1
- // grad_0 = grad_y * n1
- // grad_1 = grad_y * n0
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
- {ograds[0], n->inputs[1]}),
- MakeNode("elemwise_mul", n->attrs.name + "_grad_1",
- {ograds[0], n->inputs[0]})
- };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_div)
-.describe(R"code(Element-wise division
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::divide(inputs[0], inputs[1]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = n0 / n1
- // grad_0 = grad_y / n1
- // grad_1 = - grad_y * n0 / n1^2
- NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
- {ograds[0], n->inputs[0]});
- NodeEntry sub1 = MakeNode("negative", n->attrs.name + "_grad_sub_1",
- {sub0});
- NodeEntry sub2 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_2",
- {n->inputs[1], n->inputs[1]});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_div", n->attrs.name + "_grad_0",
- {ograds[0], n->inputs[1]}),
- MakeNode("elemwise_div", n->attrs.name + "_grad_1",
- {sub1, sub2})
- };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_mod)
- .describe(R"code(Element-wise modulo
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::mod(inputs[0], inputs[1]) };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_pow)
- .describe(R"code(Element-wise power
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::power(inputs[0], inputs[1]) };
-});
-
-// logical
-NNVM_REGISTER_ELEMWISE_BINARY_OP(logical_and)
-.describe(R"code(Elementwise compute the logical AND
-
-)code")
-.set_support_level(4)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::logical_and(inputs[0], inputs[1]) };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(logical_or)
-.describe(R"code(Elementwise compute the logical OR
-
-)code")
-.set_support_level(4)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::logical_or(inputs[0], inputs[1]) };
-});
-
-// negative
-NNVM_REGISTER_ELEMWISE_UNARY_OP(negative)
-.describe(R"code(Elemenwise numeric negative
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::negative(inputs[0]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = - n0
- // grad_0 = - grad_y
- return std::vector<NodeEntry>{
- MakeNode("negative", n->attrs.name + "_grad_0", {ograds[0]}),
- };
-});
-
-// logical NOT
-NNVM_REGISTER_ELEMWISE_UNARY_OP(logical_not)
-.describe(R"code(Elementwise compute the logical NOT
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(4)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::logical_not(inputs[0]) };
-});
-
-// copy
-NNVM_REGISTER_ELEMWISE_UNARY_OP(copy)
-.describe(R"code(Copy tensor to another one.
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::identity(inputs[0]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = copy(n0)
- // grad_0 = grad_y
- return std::vector<NodeEntry>{ MakeNode("copy", n->attrs.name + "_grad_0",
- {ograds[0]}) };
-});
-
-DMLC_REGISTER_PARAMETER(InitOpParam);
-DMLC_REGISTER_PARAMETER(InitOpWithScalarParam);
-DMLC_REGISTER_PARAMETER(FillValueParam);
-
-// full
-NNVM_REGISTER_INIT_OP(full)
-.describe(R"code(Fill array with scalar value
-
-)code" NNVM_ADD_FILELINE)
-.set_attr_parser(ParamParser<InitOpWithScalarParam>)
-.set_attr<FGetAttrDict>(
- "FGetAttrDict", ParamGetAttrDict<InitOpWithScalarParam>)
-.add_arguments(InitOpWithScalarParam::__FIELDS__())
-.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpWithScalarParam>)
-.set_attr<FInferType>("FInferType", ZeroType<InitOpWithScalarParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ZeroLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const InitOpWithScalarParam& param = nnvm::get<InitOpWithScalarParam>(attrs.parsed);
- Array<Expr> shape = ShapeToArray(param.shape);
- DataType dtype = GetTVMType(param.dtype);
- Expr fill_value = tvm::make_const(dtype, param.fill_value);
- return Array<Tensor>{ topi::full(shape, dtype, fill_value) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INIT_OP(zeros)
-.describe(R"code(Fill target with zeros
-
-)code" NNVM_ADD_FILELINE)
-.set_attr_parser(ParamParser<InitOpParam>)
-.set_attr<FGetAttrDict>(
- "FGetAttrDict", ParamGetAttrDict<InitOpParam>)
-.add_arguments(InitOpParam::__FIELDS__())
-.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpParam>)
-.set_attr<FInferType>("FInferType", ZeroType<InitOpParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ZeroLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const InitOpParam& param = nnvm::get<InitOpParam>(attrs.parsed);
- Array<Expr> shape = ShapeToArray(param.shape);
- DataType dtype = GetTVMType(param.dtype);
- Expr fill_value = tvm::make_const(dtype, 0);
- return Array<Tensor>{ topi::full(shape, dtype, fill_value) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INIT_OP(ones)
-.describe(R"code(Fill target with ones
-
-)code" NNVM_ADD_FILELINE)
-.set_attr_parser(ParamParser<InitOpParam>)
-.set_attr<FGetAttrDict>(
- "FGetAttrDict", ParamGetAttrDict<InitOpParam>)
-.add_arguments(InitOpParam::__FIELDS__())
-.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpParam>)
-.set_attr<FInferType>("FInferType", ZeroType<InitOpParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ZeroLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const InitOpParam& param = nnvm::get<InitOpParam>(attrs.parsed);
- Array<Expr> shape = ShapeToArray(param.shape);
- DataType dtype = GetTVMType(param.dtype);
- Expr fill_value = tvm::make_const(dtype, 1);
- return Array<Tensor>{ topi::full(shape, dtype, fill_value) };
-})
-.set_support_level(4);
-
-// full_like
-NNVM_REGISTER_INIT_LIKE_OP(full_like)
-.describe(R"code(Return an scalar value array with the same shape and type
-as the input array
-
-)code" NNVM_ADD_FILELINE)
-.add_arguments(FillValueParam::__FIELDS__())
-.set_attr_parser(ParamParser<FillValueParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<FillValueParam>)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const FillValueParam& param = nnvm::get<FillValueParam>(attrs.parsed);
- const Expr fill_value = tvm::make_const(out_info[0]->dtype, param.fill_value);
- return Array<Tensor> { topi::full_like(inputs[0], fill_value) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INIT_LIKE_OP(zeros_like)
-.describe(R"code(Return an array of zeros with the same shape and type
-as the input array.
-
-)code")
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor> { topi::full_like(inputs[0],
- tvm::make_const(out_info[0]->dtype, 0)) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INIT_LIKE_OP(ones_like)
-.describe(R"code(Return an array of ones with the same shape and type
-as the input array.
-
-)code")
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor> { topi::full_like(inputs[0],
- tvm::make_const(out_info[0]->dtype, 1)) };
-})
-.set_support_level(4);
-
-// unary scalar op
-DMLC_REGISTER_PARAMETER(ScalarParam);
-
-#define NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(op) \
- NNVM_REGISTER_ELEMWISE_UNARY_OP(op) \
- .add_arguments(ScalarParam::__FIELDS__()) \
- .set_attr_parser(ParamParser<ScalarParam>) \
- .set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ScalarParam>)
-
-inline Tensor binary_scalar_op(const NodeAttrs& attrs,
- const Tensor& x,
- std::function<Expr(Expr, Expr)> f) {
- const ScalarParam& param = nnvm::get<ScalarParam>(attrs.parsed);
- auto scalar_val = static_cast<float>(param.scalar);
- return compute(x->shape, [&](const Array<Var>& i) {
- auto scalar_const = make_const(x->dtype, scalar_val);
- return f(x(i), scalar_const);
- }, "tensor", topi::kElementWise);
-}
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__add_scalar__)
-.describe(R"code(Tensor add scalar
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
- [](Expr x, Expr y) { return x + y; }) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- return std::vector<NodeEntry>{ MakeNode("copy", n->attrs.name + "_grad_0",
- {ograds[0]}) };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__sub_scalar__)
-.describe(R"code(Tensor substract scalar
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
- [](Expr x, Expr y) { return x - y; }) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- return std::vector<NodeEntry>{ograds[0]};
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rsub_scalar__)
-.describe(R"code(scalar substract Tensor
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
- [](Expr x, Expr y) { return y - x; }) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- return std::vector<NodeEntry>{
- MakeNode("negative", n->attrs.name + "_grad_0", {ograds[0]})
- };
-});
-
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__lshift_scalar__)
-.describe(R"code(Tensor left shift by scalar
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ScalarParam& param = nnvm::get<ScalarParam>(attrs.parsed);
- int scalar_val = static_cast<int>(param.scalar);
- return Array<Tensor>{
- topi::left_shift(inputs[0],
- make_const(inputs[0]->dtype, scalar_val))};
- });
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rshift_scalar__)
-.describe(R"code(Tensor right shift by scalar
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ScalarParam& param = nnvm::get<ScalarParam>(attrs.parsed);
- int scalar_val = static_cast<int>(param.scalar);
- return Array<Tensor>{
- topi::right_shift(inputs[0],
- make_const(inputs[0]->dtype, scalar_val))};
- });
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__mul_scalar__)
-.describe(R"code(Tensor multiplies scalar
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
- [](Expr x, Expr y) { return x * y; }) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = n0 * scalar
- // grad_0 = grad_y * scalar
- return std::vector<NodeEntry>{
- MakeNode("__mul_scalar__", n->attrs.name + "_grad_0",
- {ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}})
- };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__div_scalar__)
-.describe(R"code(Tensor divides scalar
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
- [](Expr x, Expr y) { return x / y; }) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = n0 / scalar
- // grad_0 = grad_y / scalar
- return std::vector<NodeEntry>{
- MakeNode("__div_scalar__", n->attrs.name + "_grad_0",
- {ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}})
- };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rdiv_scalar__)
-.describe(R"code(scalar divides Tensor
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
- [](Expr x, Expr y) { return y / x; }) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = scalar / n0
- // grad_0 = - grad_y * scalar / n0^2
- NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
- {ograds[0]},
- {{"scalar", n->attrs.dict["scalar"]}});
- NodeEntry sub1 = MakeNode("negative", n->attrs.name + "_grad_sub_1",
- {sub0});
- NodeEntry sub2 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_2",
- {n->inputs[0], n->inputs[0]});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_div", n->attrs.name + "_grad_0",
- {sub1, sub2})
- };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__pow_scalar__)
-.describe(R"code(Tensor power scalar
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
- [](Expr x, Expr y) { return tvm::pow(x, y); }) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = n0^scalar
- // grad_0 = grad_y * scalar * n0^(scalar - 1)
- double scalar = std::stod(n->attrs.dict["scalar"]);
- NodeEntry sub0 = MakeNode("__pow_scalar__", n->attrs.name + "_grad_sub_0",
- {n->inputs[0]},
- {{"scalar", std::to_string(scalar - 1)}});
- NodeEntry sub1 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_1",
- {ograds[0]},
- {{"scalar", std::to_string(scalar)}});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
- {sub0, sub1})
- };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__)
-.describe(R"code(scalar power Tensor
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
- [](Expr x, Expr y) { return tvm::pow(y, x); }) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = scalar^n0
- // grad_0 = grad_y * scalar^n0 * log(scalar)
- double num = std::stod(n->attrs.dict["scalar"]);
- NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
- {NodeEntry{n, 0, 0}},
- {{"scalar", std::to_string(std::log(num))}});
- return std::vector<NodeEntry>{
- MakeNode("__mul_symbol__", n->attrs.name + "_grad_0",
- {ograds[0], sub0})
- };
-});
-
-DMLC_REGISTER_PARAMETER(ElementWiseReduceParam);
-
-NNVM_REGISTER_ELEMWISE_REDUCE_OP(elemwise_sum)
-.describe(R"code(Adds all input arguments element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ElementWiseReduceParam& param = nnvm::get<ElementWiseReduceParam>(attrs.parsed);
- CHECK_EQ(param.num_args, inputs.size()) << """Compute definition of elemwise sum""";
- return Array<Tensor>{ topi::elemwise_sum(inputs) };
-})
-.set_attr<nnvm::FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- CHECK_EQ(ograds.size(), 1);
- std::vector<NodeEntry> ret;
- for (size_t i = 0; i < n->inputs.size(); i++) {
- ret.push_back(MakeNode("copy", n->attrs.name + "_grad_0", {ograds[0]}));
- }
- return ret;
- })
-.set_support_level(4);
-
-NNVM_REGISTER_ELEMWISE_UNARY_OP(block_grad)
-.describe(R"code(Blocks gradient computation for input.
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<nnvm::FInplaceIdentity>(
- "FInplaceIdentity", [](const NodeAttrs& attrs){
- return std::vector<bool>{true};
-})
-.set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
-.set_support_level(4);
-
-DMLC_REGISTER_PARAMETER(IndicatorParam);
-
-// indicator function
-NNVM_REGISTER_INDICATOR_OP(greater)
-.describe(R"code(Greater function that returns a mask tensor
-with 1.0 if (left > right), otherwise 0.0 element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("lhs", "Tensor", "First input")
-.add_argument("rhs", "Tensor", "Second input")
-.set_num_inputs(2)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::cast(topi::greater(inputs[0], inputs[1]), out_info[0]->dtype) };
-})
-.set_support_level(4);
-
-
-NNVM_REGISTER_INDICATOR_OP(less)
- .describe(R"code(Less function that returns a mask tensor
-with 1.0 if (left < right), otherwise 0.0 element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("lhs", "Tensor", "First input")
-.add_argument("rhs", "Tensor", "Second input")
-.set_num_inputs(2)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::cast(topi::less(inputs[0], inputs[1]), out_info[0]->dtype) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INDICATOR_OP(_max_mask)
- .describe(R"code(Function that returns a mask tensor
-with 1.0 if the value is maximum over given axes, otherwise 0.0 element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input")
-.set_num_inputs(1)
-.add_arguments(IndicatorParam::__FIELDS__())
-.set_attr_parser(ParamParser<IndicatorParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_support_level(1);
-
-NNVM_REGISTER_INDICATOR_OP(_min_mask)
- .describe(R"code(Function that returns a mask tensor
-with 1.0 if the value is minimum over given axes, otherwise 0.0 element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input")
-.set_num_inputs(1)
-.add_arguments(IndicatorParam::__FIELDS__())
-.set_attr_parser(ParamParser<IndicatorParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_support_level(1);
-
-
-DMLC_REGISTER_PARAMETER(ClipParam);
-
-NNVM_REGISTER_OP(clip)
-.describe(R"doc(Clips (limits) the values in an array.
-Given an interval, values outside the interval are clipped to the interval edges.
-Clipping ``x`` between `a_min` and `a_x` would be::
- clip(x, a_min, a_max) = max(min(x, a_max), a_min))
-Example::
- x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
- clip(x,1,8) = [ 1., 1., 2., 3., 4., 5., 6., 7., 8., 8.]
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<ClipParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ClipParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ClipParam params = get<ClipParam>(attrs.parsed);
- return Array<Tensor>{
- topi::clip(inputs[0], tvm::make_const(tvm::DataType::Float(32), params.a_min),
- tvm::make_const(tvm::DataType::Float(32), params.a_max)) };
- })
-.add_argument("data", "NDArray-or-Symbol", "Input array.")
-.add_arguments(ClipParam::__FIELDS__())
-.set_attr<nnvm::FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- // y = clip(x, a_min, a_max)
- // min_mask = greater_equal(x, a_min*ones_like(x))
- // => ones_like(x) - less(x, a_min)
- // max_mask = less_equal(x, a_max*ones_like(x))
- // => ones_like(x) - greater(x, a_max)
- // grad_x = min_mask * max_mask * grad_y
- CHECK_EQ(ograds.size(), 1);
-
- NodeEntry sub0 = MakeNode("ones_like", n->attrs.name + "_grad_sub_0",
- {n->inputs[0]});
- // min_mask
- NodeEntry sub1 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_1",
- {sub0}, {{"scalar", n->attrs.dict["a_min"]}});
- NodeEntry sub2 = MakeNode("less", n->attrs.name + "_grad_sub_2",
- {n->inputs[0], sub1});
- NodeEntry sub3 = MakeNode("elemwise_sub", n->attrs.name + "_grad_sub_3",
- {sub0, sub2});
-
- // max_mask
- NodeEntry sub4 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_4",
- {sub0}, {{"scalar", n->attrs.dict["a_max"]}});
- NodeEntry sub5 = MakeNode("greater", n->attrs.name + "_grad_sub_5",
- {n->inputs[0], sub4});
- NodeEntry sub6 = MakeNode("elemwise_sub", n->attrs.name + "_grad_sub_6",
- {sub0, sub5});
-
- // min_mask * max_mask
- NodeEntry sub7 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_7",
- {sub3, sub6});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad",
- {sub7, ograds[0]})
- };
- })
-.set_support_level(4);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file matrix_op.cc
- * \brief Matrix operators
- */
-#include <topi/transform.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/tensor.h>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-
-namespace nnvm {
-namespace top {
-
-using namespace nnvm::compiler;
-
-DMLC_REGISTER_PARAMETER(MatMulParam);
-
-inline bool DotShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape> *in_attrs,
- std::vector<TShape> *out_attrs) {
- const MatMulParam& param = nnvm::get<MatMulParam>(attrs.parsed);
- CHECK_EQ(in_attrs->size(), 2U);
- CHECK_EQ(out_attrs->size(), 1U);
- TShape lshape = (*in_attrs)[0];
- TShape rshape = (*in_attrs)[1];
-
- if (lshape.ndim() == 1) lshape = TShape{1, lshape[0]};
- if (rshape.ndim() == 1) rshape = TShape{1, rshape[0]};
-
- if (param.transpose_a) std::reverse(lshape.begin(), lshape.end());
- if (param.transpose_b) std::reverse(rshape.begin(), rshape.end());
-
- CHECK_EQ(lshape[lshape.ndim() - 1], rshape[0])
- << "dot shape inconsistent: " << lshape << " X " << rshape;
-
- TShape oshape(lshape.ndim() + rshape.ndim() - 2);
- for (uint32_t i = 0; i < lshape.ndim() - 1; i++) oshape[i] = lshape[i];
- for (uint32_t i = 1; i < rshape.ndim(); i++) oshape[i + lshape.ndim() - 2] = rshape[i];
-
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
- return true;
-}
-
-inline bool DotCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- const MatMulParam& param = nnvm::get<MatMulParam>(attrs.parsed);
- CHECK_EQ(ilayouts->size(), 2U);
- CHECK_EQ(olayouts->size(), 1U);
- const Layout& lhs = last_ilayouts->at(0).defined() ? last_ilayouts->at(0)
- : ilayouts->at(0);
- const Layout& rhs = last_ilayouts->at(1).defined() ? last_ilayouts->at(1)
- : ilayouts->at(1);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, lhs);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 1, rhs);
-
- if (lhs.ndim() > 1 && rhs.ndim() > 1) {
- // concat lhs and rhs layout
- const Layout& lhs_out = param.transpose_a ? lhs.reverse() : lhs;
- const Layout& rhs_out = param.transpose_b ? rhs.reverse() : rhs;
- Layout out = lhs_out.sublayout(0, lhs_out.ndim()-1) +
- rhs_out.sublayout(1, rhs_out.ndim()-1);
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, out);
- }
- return true;
-}
-
-NNVM_REGISTER_OP(matmul)
-.describe(R"doc(Matrix multiplication of two arrays.
-
-``dot``'s behavior depends on the input array dimensions:
-
-- 1-D arrays: inner product of vectors
-- 2-D arrays: matrix multiplication
-- N-D arrays: a sum product over the last axis of the first input and the first
- axis of the second input
-
- For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape `(k,r,s)`, the
- result array will have shape `(n,m,r,s)`. It is computed by::
-
- dot(x,y) = sum(x[i,j,:]*y[:,a,b])
-
-)doc" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<MatMulParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MatMulParam>)
-.add_arguments(MatMulParam::__FIELDS__())
-.add_argument("lhs", "NDArray-or-Symbol", "The first input")
-.add_argument("rhs", "NDArray-or-Symbol", "The second input")
-.set_attr<FInferShape>("FInferShape", DotShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", DotCorrectLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const MatMulParam& param = nnvm::get<MatMulParam>(attrs.parsed);
- return Array<Tensor>{
- topi::matmul(inputs[0], inputs[1], param.transpose_a, param.transpose_b)
- };
- })
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- // z = x dot y
- // xshape (n,m,k), yshape (k,r,s)
- const MatMulParam& param = nnvm::get<MatMulParam>(n->attrs.parsed);
- bool Ta = param.transpose_a;
- bool Tb = param.transpose_b;
- // Ta = false, Tb = false
- // grad_x = grad_z dot y.T
- // grad_y = x.T dot grad_z
- if (!Ta && !Tb) {
- return std::vector<NodeEntry>{
- MakeNode("matmul", n->attrs.name + "_grad_0",
- {ograds[0], n->inputs[1]},
- {{"transpose_a", "false"},
- {"transpose_b", "true"}}),
- MakeNode("matmul", n->attrs.name + "_grad_1",
- {n->inputs[0], ograds[0]},
- {{"transpose_a", "true"},
- {"transpose_b", "false"}})
- };
- } else if (Ta && !Tb) {
- // Ta = true, Tb = false
- // grad_x = y dot grad_z.T
- // grad_y = x dot grad_z
- return std::vector<NodeEntry>{
- MakeNode("matmul", n->attrs.name + "_grad_0",
- {n->inputs[1], ograds[0]},
- {{"transpose_a", "false"},
- {"transpose_b", "true"}}),
- MakeNode("matmul", n->attrs.name + "_grad_1",
- {n->inputs[0], ograds[0]},
- {{"transpose_a", "false"},
- {"transpose_b", "false"}})
- };
- } else if (!Ta && Tb) {
- // Ta = false, Tb = true
- // grad_x = grad_z dot y
- // grad_y = grad_z.T dot x
- return std::vector<NodeEntry>{
- MakeNode("matmul", n->attrs.name + "_grad_0",
- {ograds[0], n->inputs[1]},
- {{"transpose_a", "false"},
- {"transpose_b", "false"}}),
- MakeNode("matmul", n->attrs.name + "_grad_1",
- {ograds[0], n->inputs[0]},
- {{"transpose_a", "true"},
- {"transpose_b", "false"}})
- };
- } else {
- // Ta = true, Tb = true
- // grad_x = y.T dot grad_z.T
- // grad_y = grad_z.T dot x.T
- return std::vector<NodeEntry>{
- MakeNode("matmul", n->attrs.name + "_grad_0",
- {n->inputs[1], ograds[0]},
- {{"transpose_a", "true"},
- {"transpose_b", "true"}}),
- MakeNode("matmul", n->attrs.name + "_grad_1",
- {ograds[0], n->inputs[0]},
- {{"transpose_a", "true"},
- {"transpose_b", "true"}})
- };
- }
-});
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file reduce.cc
- * \brief reduce operator.
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/tensor.h>
-#include <numeric>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/detail/constant_utils.h"
-#include "topi/elemwise.h"
-#include "topi/reduction.h"
-#include "topi/transform.h"
-
-namespace nnvm {
-namespace top {
-using namespace tvm;
-using namespace nnvm::compiler;
-
-
-// reduce
-DMLC_REGISTER_PARAMETER(ReduceParam);
-
-inline TShape GetReduceAxes(const uint32_t indim,
- const TShape& axis,
- bool exclude) {
- if (axis.ndim() == 0) {
- TShape r_axes(indim);
- std::iota(r_axes.begin(), r_axes.end(), 0);
- return r_axes;
- }
-
- CHECK_LT(axis[axis.ndim() - 1], indim)
- << "Reduction axis " << axis[axis.ndim() - 1]
- << " exceeds input dimensions " << indim;
-
- TShape in_axis = axis;
- for (auto& i : in_axis) {
- i = i < 0 ? i + indim : i;
- CHECK_GE(i, 0) << "axis out of bounds in reduce operator";
- CHECK_LT(i, indim) << "axis out of bounds in reduce operator";
- }
- std::sort(in_axis.begin(), in_axis.end());
- if (!exclude) return in_axis;
- TShape r_axis(indim - in_axis.ndim());
- for (unsigned i = 0, j = 0, k = 0; i < indim; ++i) {
- if (j < in_axis.ndim() && i == in_axis[j]) {
- ++j;
- continue;
- }
- r_axis[k++] = i;
- }
- return r_axis;
-}
-
-inline TShape ReduceShapeImpl(const TShape& ishape,
- const TShape& axis,
- bool keepdims,
- bool exclude) {
- uint32_t indim = ishape.ndim();
- TShape r_axes = GetReduceAxes(indim, axis, exclude);
- if (!r_axes.ndim()) return ishape;
- if (r_axes.ndim() == indim)
- return TShape(keepdims ? indim : 1);
-
- CHECK(r_axes.ndim() < indim);
- if (keepdims) {
- TShape oshape(ishape);
- for (unsigned i = 0, j = 0; i < indim; ++i) {
- if (j >= r_axes.ndim() || i != r_axes[j]) continue;
- oshape[i] = 1;
- ++j;
- }
- return oshape;
- }
-
- TShape oshape(indim - r_axes.ndim());
- for (unsigned i = 0, j = 0, k = 0; i < indim; ++i) {
- if (j < r_axes.ndim() && i == r_axes[j]) {
- ++j;
- continue;
- }
- oshape[k++] = ishape[i];
- }
- return oshape;
-}
-
-inline bool ReduceShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 1U);
- CHECK_EQ(out_attrs->size(), 1U);
- if ((*in_attrs)[0].ndim() == 0) return false;
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- NNVM_ASSIGN_OUTPUT_SHAPE(
- attrs, *out_attrs, 0,
- ReduceShapeImpl((*in_attrs)[0], param.axis,
- param.keepdims, param.exclude));
- return true;
-}
-
-inline bool CollapseShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 2U);
- CHECK_EQ(out_attrs->size(), 1U);
- if ((*in_attrs)[0].ndim() == 1) return false;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, (*in_attrs)[1]);
- return true;
-}
-
-template<typename PType>
-inline void AxesParamParser(nnvm::NodeAttrs* attrs) {
- PType param;
- param.Init(attrs->dict);
- std::sort(¶m.axis[0], ¶m.axis[param.axis.ndim()]);
- attrs->parsed = std::move(param);
-}
-
-#define NNVM_REGISTER_BASE_REDUCE_OP(op) \
- NNVM_REGISTER_OP(op) \
- .add_arguments(ReduceParam::__FIELDS__()) \
- .set_attr_parser(AxesParamParser<ReduceParam>) \
- .set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ReduceParam>) \
- .set_num_outputs(1)
-
-#define NNVM_REGISTER_REDUCE_OP(op) \
- NNVM_REGISTER_BASE_REDUCE_OP(op) \
- .add_argument("data", "Tensor", "The input") \
- .set_attr<FInferShape>("FInferShape", ReduceShape) \
- .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>) \
- .set_attr<FCorrectLayout>("FCorrectLayout", \
- ElemwiseFixedLayoutUnknownOut<1, 1>) \
- .set_num_inputs(1)
-
-NNVM_REGISTER_REDUCE_OP(sum)
-.describe(R"code(Computes the sum of array elements over given axes.
-
-Example::
-
- data = [[[1,2],[2,3],[1,3]],
- [[1,4],[4,3],[5,2]],
- [[7,1],[7,2],[7,3]]]
-
- sum(data, axis=1)
- [[ 4. 8.]
- [ 10. 9.]
- [ 21. 6.]]
-
- sum(data, axis=[1,2])
- [ 12. 19. 27.]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
- param.axis, param.exclude);
- if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
- auto axis = ShapeToIntArray(r_axes);
- return Array<Tensor>{
- topi::sum(inputs[0], axis, param.keepdims, true) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
- bool exclude = param.exclude;
- TShape p_axis = param.axis;
- if (!param.exclude && param.axis.ndim() == 0) {
- exclude = true;
- p_axis = TShape();
- }
- std::ostringstream axis; axis << p_axis;
- return std::vector<NodeEntry>{
- MakeNode("expand_like", n->attrs.name + "_grad",
- {ograds[0], n->inputs[0]},
- {{"axis", axis.str()},
- {"exclude", std::to_string(exclude)}})
- };
-});
-
-NNVM_REGISTER_REDUCE_OP(max)
-.describe(R"code(Computes the max of array elements over given axes.
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
- param.axis, param.exclude);
- auto axis = ShapeToIntArray(r_axes);
- return Array<Tensor>{
- topi::max(inputs[0], axis, param.keepdims, true) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
- std::ostringstream axis; axis << param.axis;
- NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0",
- {ograds[0], n->inputs[0]},
- {{"axis", axis.str()},
- {"exclude", std::to_string(param.exclude)}});
- NodeEntry sub1 = MakeNode("_max_mask", n->attrs.name + "_grad_sub1",
- {ograds[0]},
- {{"axis", axis.str()},
- {"exclude", std::to_string(param.exclude)}});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1})
- };
-});
-
-NNVM_REGISTER_REDUCE_OP(min)
-.describe(R"code(Computes the min of array elements over given axes.
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
- param.axis, param.exclude);
- auto axis = ShapeToIntArray(r_axes);
- return Array<Tensor>{
- topi::min(inputs[0], axis, param.keepdims, true) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
- std::ostringstream axis; axis << param.axis;
- NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0",
- {ograds[0], n->inputs[0]},
- {{"axis", axis.str()},
- {"exclude", std::to_string(param.exclude)}});
- NodeEntry sub1 = MakeNode("_min_mask", n->attrs.name + "_grad_sub1",
- {ograds[0]},
- {{"axis", axis.str()},
- {"exclude", std::to_string(param.exclude)}});
- return std::vector<NodeEntry>{
- MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1})
- };
-});
-
-NNVM_REGISTER_BASE_REDUCE_OP(collapse_sum)
-.add_argument("data", "Tensor", "The input")
-.add_argument("as", "Tensor", "The reference")
-.set_attr<FInferShape>("FInferShape", CollapseShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<2, 1>)
-.set_num_inputs(2)
-.describe(R"code(Reduces lhs to the shape of rhs via sum)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::collapse_sum(inputs[0], inputs[1]->shape) };
-});
-
-inline bool InferFixedType(const NodeAttrs& attrs,
- std::vector<int>* in_attrs,
- std::vector<int>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 1U);
- CHECK_EQ(out_attrs->size(), 1U);
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, param.dtype);
- return true;
-}
-
-NNVM_REGISTER_BASE_REDUCE_OP(argmax)
-.describe(R"code(Creates an operation that finds the indices of the maximum
-values over a given axis.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "The input")
-.set_attr<FInferShape>("FInferShape", ReduceShape)
-.set_attr<FInferType>("FInferType", InferFixedType)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
- param.axis, param.exclude);
- auto axis = ShapeToIntArray(r_axes);
- Tensor out = topi::argmax(inputs[0], axis, param.keepdims, true);
- if (param.dtype == kFloat32) out = topi::cast(out, out_info[0]->dtype);
- return Array<Tensor>{out};
-});
-
-NNVM_REGISTER_BASE_REDUCE_OP(argmin)
-.describe(R"code(Creates an operation that finds the indices of the minimum
-values over a given axis.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "The input")
-.set_attr<FInferShape>("FInferShape", ReduceShape)
-.set_attr<FInferType>("FInferType", InferFixedType)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
- param.axis, param.exclude);
- auto axis = ShapeToIntArray(r_axes);
- Tensor out = topi::argmin(inputs[0], axis, param.keepdims, true);
- if (param.dtype == kFloat32) out = topi::cast(out, out_info[0]->dtype);
- return Array<Tensor>{out};
-});
-
-NNVM_REGISTER_REDUCE_OP(mean)
- .describe(R"code(Computes the mean of array elements over given axes.
-
-Example::
-
- data = [[[1,2],[2,3],[1,3]],
- [[1,4],[4,3],[5,2]],
- [[7,1],[7,2],[7,3]]]
-
- mean(data)
- [3.22]
-
- mean(data, axis=[1,2])
- [ 2. 3.16666667 4.5]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
- param.axis, param.exclude);
- if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
- auto axis = ShapeToIntArray(r_axes);
-
- Expr count = make_const(inputs[0]->dtype, 1);
- for (auto& i : r_axes) {
- count *= cast(inputs[0]->dtype, inputs[0]->shape[i]);
- }
-
- return Array<Tensor>{
- topi::divide(topi::sum(inputs[0], axis, param.keepdims, true), count) };
-});
-
-NNVM_REGISTER_REDUCE_OP(prod)
- .describe(R"code(Computes the products of array elements over given axes.
-
-Example::
-
- data = [[[1,2],[2,3],[1,3]],
- [[1,4],[4,3],[5,2]],
- [[7,1],[7,2],[7,3]]]
-
- mean(data, axis=1)
- [35562240]
-
- mean(data, axis=[1,2])
- [ 36 480 2058]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
- TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
- param.axis, param.exclude);
- if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
- auto axis = ShapeToIntArray(r_axes);
- return Array<Tensor>{
- topi::prod(inputs[0], axis, param.keepdims, true) };
-});
-
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file state_op.cc
- * \brief Experimental operators
- * Currently we only support assign
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/tensor.h>
-#include <topi/elemwise.h>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-
-namespace nnvm {
-namespace top {
-
-using namespace tvm;
-using namespace nnvm::compiler;
-
-NNVM_REGISTER_OP(_assign)
-.describe(R"doc(Assign rhs to the lhs.
-
-lhs must be a Variable.
-This is an experimental operator.
-
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FMutateInputs>(
- "FMutateInputs", [](const NodeAttrs& attrs) {
- return std::vector<uint32_t>{0};
-})
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- // This implementation is needed for the special
- // logic handling assign in the compiler
- // It simply copies the result of rhs the output
- // The later decoration in compiler will change
- // the memory assignment of assign to tie
- // the lhs to the output.
- return Array<Tensor>{ topi::identity(inputs[1]) };
-})
-.set_attr<FInferShape>("FInferShape", SameShape)
-.set_attr<FCorrectLayout>(
- "FCorrectLayout", [](const NodeAttrs& attrs,
- std::vector<Layout> *in_layouts,
- const std::vector<Layout> *last_in_layouts,
- std::vector<Layout> *out_layouts) {
- NNVM_ASSIGN_LAYOUT(*in_layouts, 1, (*in_layouts)[0]);
- NNVM_ASSIGN_LAYOUT(*out_layouts, 0, (*in_layouts)[0]);
- return true;
-})
-.set_attr<FInplaceOption>(
- "FInplaceOption", [](const NodeAttrs& attrs) {
- return std::vector<std::pair<int, int> >{{1, 0}};
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- return std::vector<NodeEntry>{
- MakeNode("zeros_like", n->attrs.name + "_zero_grad",
- {n->inputs[0]}),
- ograds[0]
- };
-});
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file transform.cc
- * \brief Injective transformation of shape or type.
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/tensor.h>
-#include <cctype>
-#include <sstream>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/nn/flatten.h"
-#include "topi/transform.h"
-#include "topi/elemwise.h"
-#include "topi/detail/constant_utils.h"
-#include "../../compiler/compile_engine.h"
-
-namespace nnvm {
-namespace top {
-using namespace tvm;
-using namespace nnvm::compiler;
-
-// flatten
-inline bool FlattenInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
- CHECK_EQ(out_attrs->size(), 1U);
- const TShape &dshape = (*in_attrs)[0];
- if (dshape.ndim() == 0) return false;
- uint32_t target_dim = 1;
- for (uint32_t i = 1; i < dshape.ndim(); ++i) {
- target_dim *= dshape[i];
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0,
- TShape({dshape[0], target_dim}));
- return true;
-}
-
-NNVM_REGISTER_OP(flatten)
-.describe(R"code(Flattens the input into a 2-D array.
-
-For an input array with shape ``(d1, d2, ..., dk)``, `flatten` operation reshapes
-the input array into an output array of shape ``(d1, d2*...*dk)``.
-
-Example::
-
- x = [[
- [1,2,3],
- [4,5,6],
- [7,8,9]
- ],
- [ [1,2,3],
- [4,5,6],
- [7,8,9]
- ]],
-
- flatten(x) = [[ 1., 2., 3., 4., 5., 6., 7., 8., 9.],
- [ 1., 2., 3., 4., 5., 6., 7., 8., 9.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", FlattenInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.add_argument("data", "Tensor", "Input data.")
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::nn::flatten(inputs[0]) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- return MakeGradNode("reshape_like", n,
- {ograds[0], n->inputs[0]});
-})
-.set_support_level(1);
-
-// concatenate
-DMLC_REGISTER_PARAMETER(ConcatenateParam);
-
-inline bool ConcatenateInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- const ConcatenateParam& param = nnvm::get<ConcatenateParam>(attrs.parsed);
- TShape dshape;
- dim_t size = 0;
- bool has_zero = false;
- int axis = param.axis >= 0 ? param.axis : in_shape->at(0).ndim() + param.axis;
- for (size_t i = 0; i < in_shape->size(); ++i) {
- TShape tmp = (*in_shape)[i];
- if (tmp.ndim()) {
- CHECK_LT(static_cast<dim_t>(axis), tmp.ndim())
- << "concat dim " << axis << " out of range of input shape " << tmp;
- has_zero = tmp[axis] == 0 || has_zero;
- size += tmp[axis];
- tmp[axis] = 0;
- shape_assign(&dshape, tmp);
- }
- }
-
- TShape tmp = (*out_shape)[0];
- if (tmp.ndim()) {
- CHECK_LT(static_cast<dim_t>(axis), tmp.ndim())
- << "concat dim " << axis << " out of range of input shape " << tmp;
- tmp[axis] = 0;
- shape_assign(&dshape, tmp);
- }
-
- if (dshape.ndim() == 0) return false;
-
- for (size_t i = 0; i < in_shape->size(); ++i) {
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, i, dshape);
- }
-
- if (!has_zero) dshape[axis] = size;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, dshape);
- return dshape.Size() != 0;
-}
-
-inline bool ConcatenateCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- const ConcatenateParam& param = nnvm::get<ConcatenateParam>(attrs.parsed);
- CHECK_EQ(ilayouts->size(), last_ilayouts->size());
- CHECK_EQ(olayouts->size(), 1U);
-
- Layout layout;
- if (!ilayouts->at(0).defined()) {
- layout = last_ilayouts->at(0);
- } else if (param.axis >= static_cast<int>(ilayouts->at(0).ndim())) {
- CHECK(last_ilayouts->at(0).defined())
- << "Current input layout " << ilayouts->at(0)
- << " is invalid but last input layout is not "
- "defined for the first input.";
- layout = last_ilayouts->at(0);
- } else if (last_ilayouts->at(0).defined()
- && ilayouts->at(0)[param.axis]
- != last_ilayouts->at(0)[param.axis]) {
- layout = last_ilayouts->at(0);
- } else {
- layout = ilayouts->at(0);
- }
-
- for (size_t i = 0; i < ilayouts->size(); ++i) {
- NNVM_ASSIGN_LAYOUT(*ilayouts, i, layout);
- }
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, layout);
- return true;
-}
-
-NNVM_REGISTER_OP(concatenate)
-.describe(R"code(Joins input arrays along a given axis.
-
-The dimensions of the input arrays should be the same except the axis along
-which they will be concatenated.
-The dimension of the output array along the concatenated axis will be equal
-to the sum of the corresponding dimensions of the input arrays.
-
-Example::
-
- x = [[1,1],[2,2]]
- y = [[3,3],[4,4],[5,5]]
- z = [[6,6], [7,7],[8,8]]
-
- concatenate(x,y,z,axis=0) = [[ 1., 1.],
- [ 2., 2.],
- [ 3., 3.],
- [ 4., 4.],
- [ 5., 5.],
- [ 6., 6.],
- [ 7., 7.],
- [ 8., 8.]]
-
- Note that you cannot concat x,y,z along dimension 1 since dimension
- 0 is not the same for all the input arrays.
-
- concatenate(y,z,axis=1) = [[ 3., 3., 6., 6.],
- [ 4., 4., 7., 7.],
- [ 5., 5., 8., 8.]]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor-or-Tensor[]", "List of arrays to concatenate")
-.add_arguments(ConcatenateParam::__FIELDS__())
-.set_attr_parser(ParamParser<ConcatenateParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ConcatenateParam>)
-.set_attr<FInferShape>("FInferShape", ConcatenateInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ConcatenateCorrectLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ConcatenateParam& param = nnvm::get<ConcatenateParam>(attrs.parsed);
- return Array<Tensor>{ topi::concatenate(inputs, param.axis) };
-})
-.set_num_outputs(1)
-.set_num_inputs(kVarg)
-.set_support_level(1);
-
-// expand_dims
-DMLC_REGISTER_PARAMETER(ExpandDimsParam);
-
-inline bool ExpandDimsInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- const ExpandDimsParam& param = nnvm::get<ExpandDimsParam>(attrs.parsed);
- CHECK_EQ(in_shape->size(), 1U);
- const TShape& dshape = in_shape->at(0);
- int ndim = static_cast<int>(dshape.ndim());
- CHECK(param.axis >= -ndim - 1 && param.axis <= ndim)
- << "with axis = " << param.axis << " ndim = " << ndim;
- int axis = param.axis < 0 ? ndim + param.axis + 1 : param.axis;
- std::vector<dim_t> oshape;
- for (int i = 0; i < axis; ++i) {
- oshape.push_back(dshape[i]);
- }
- for (int i = 0; i < param.num_newaxis; ++i) {
- oshape.push_back(1);
- }
- for (int i = axis; i < ndim; ++i) {
- oshape.push_back(dshape[i]);
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0,
- TShape(oshape.begin(), oshape.end()));
- return true;
-}
-
-NNVM_REGISTER_OP(expand_dims)
-.describe(R"code(Inserts a new axis of size 1 into the array shape
-
-For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1, num_newaxis=5)``
-will return a new array with shape ``(2,1,1,1,1,1,3,4)``.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input tensor")
-.add_arguments(ExpandDimsParam::__FIELDS__())
-.set_attr_parser(ParamParser<ExpandDimsParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ExpandDimsParam>)
-.set_attr<FInferShape>("FInferShape", ExpandDimsInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const ExpandDimsParam& param = nnvm::get<ExpandDimsParam>(attrs.parsed);
- return Array<Tensor>{ topi::expand_dims(inputs[0], param.axis, param.num_newaxis) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds){
- return std::vector<NodeEntry> {
- MakeNode("collapse_sum", n->attrs.name + "_grad", {ograds[0], n->inputs[0]})
- };
-})
-.set_support_level(1);
-
-NNVM_REGISTER_OP(expand_like)
- .describe(R"code(Expand an input array with the shape of second array.
-This operation can be thought of as a composition of expand_dims and broadcast_to.
-If the dimensions are already expanded then it just broadcasts.
-Examples::
- input = [ 12. 19. 27.]
- input.shape = (3,)
- new_shape_array = [[[1,2],[2,3],[1,3]],
- [[1,4],[4,3],[5,2]],
- [[7,1],[7,2],[7,3]]]
- new_shape_array.shape = (3, 3, 2)
- expand_like(input, [1,2], new_shape_array) =
- [[[12,12],[12,12],[12,12]],
- [[19,19],[19,19],[19,19]],
- [[27,27],[27,27],[27,27]]]
-)code" NNVM_ADD_FILELINE)
-.add_argument("input", "Tensor", "Source input")
-.add_argument("shape_like", "Tensor", "Input with new shape")
-.add_arguments(IndicatorParam::__FIELDS__())
-.set_attr_parser(ParamParser<IndicatorParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", AssignOutputAttr<TShape, 1, 0>)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
-// never transform layout of the second input array.
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- const IndicatorParam& param = nnvm::get<IndicatorParam>(n->attrs.parsed);
- std::ostringstream axis;
- axis << param.axis;
-
- if (param.axis.ndim() == 0 && !param.exclude) {
- // Special case needed because sum interprets axis=[] differently
- return std::vector<NodeEntry>{
- ograds[0],
- MakeNode("zeros_like", n->attrs.name + "_zero_grad", {n->inputs[1]})
- };
- }
-
- auto sum_node =
- MakeNode("sum", n->attrs.name + "_sum_grad",
- {ograds[0]},
- {{"axis", axis.str()},
- {"exclude", std::to_string(param.exclude)}});
-
- return std::vector<NodeEntry>{
- MakeNode("reshape_like", n->attrs.name + "_grad",
- {sum_node, n->inputs[0]}),
- MakeNode("zeros_like", n->attrs.name + "_zero_grad", {n->inputs[1]})
- };
- })
- .set_support_level(4);
-
-// split
-DMLC_REGISTER_PARAMETER(SplitParam);
-
-inline void SplitParamParser(nnvm::NodeAttrs* attrs) {
- SplitParam param;
- param.Init(attrs->dict);
- if (!std::isdigit(attrs->dict.at("indices_or_sections")[0])) {
- param.equal_split = false;
- } else {
- CHECK_EQ(param.indices_or_sections.ndim(), 1);
- param.equal_split = true;
- }
- attrs->parsed = std::move(param);
-}
-
-inline bool SplitInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
- const TShape& dshape = (*in_shape)[0];
- if (dshape.ndim() == 0) return false;
-
- auto axis = param.axis;
- if (axis < 0) {
- axis += dshape.ndim();
- }
- CHECK_LT(axis, dshape.ndim())
- << "axis should be within input dimension range but got " << axis;
- CHECK_GT(axis, -1)
- << "axis should be within input dimension range but got " << axis;
-
- if (param.equal_split) {
- int num_outputs = param.indices_or_sections[0];
- CHECK_EQ(out_shape->size(), static_cast<size_t>(num_outputs));
- TShape oshape = dshape;
- CHECK_EQ(oshape[axis] % num_outputs, 0)
- << "indices_or_sections need to be able to divide input.shape[axis] got sections "
- << num_outputs << " and dimension " << oshape[axis];
- oshape[axis] /= num_outputs;
-
- for (size_t i = 0; i < out_shape->size(); ++i) {
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, i, oshape);
- }
- } else {
- dim_t num_outputs = param.indices_or_sections.ndim() + 1;
- CHECK_EQ(out_shape->size(), static_cast<size_t>(num_outputs));
- TShape oshape = dshape;
- dim_t begin = 0;
- for (dim_t i = 0; i < num_outputs - 1; ++i) {
- CHECK_GT(param.indices_or_sections[i], begin)
- << "indices_or_sections need to be a sorted ascending list got "
- << param.indices_or_sections;
- oshape[axis] = param.indices_or_sections[i] - begin;
- begin = param.indices_or_sections[i];
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, i, oshape);
- }
- CHECK_LT(begin, dshape[axis])
- << "The sum of sections must match the input.shape[axis]";
- oshape[axis] = dshape[axis] - begin;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, num_outputs - 1, oshape);
- }
- return true;
-}
-
-inline uint32_t SplitNumOutputs(const NodeAttrs& attrs) {
- const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
- if (param.equal_split) {
- return static_cast<uint32_t>(param.indices_or_sections[0]);
- } else {
- return static_cast<uint32_t>(param.indices_or_sections.ndim()) + 1;
- }
-}
-
-// Intentionally not add ParamGetAttrDict for indices_or_sections.
-NNVM_REGISTER_OP(split)
-.describe(R"code(Splits an array along a particular axis into multiple sub-arrays.
-
-**Note** that `indices_or_sections` should evenly divide the length of the axis
-along which to split the array.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Array to be splitted")
-.add_arguments(SplitParam::__FIELDS__())
-.set_attr_parser(SplitParamParser)
-.set_attr<FInferShape>("FInferShape", SplitInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, -1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, -1>)
-.set_num_inputs(1)
-.set_num_outputs(SplitNumOutputs)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
- if (param.equal_split) {
- return Array<Tensor>{
- topi::split_sections(inputs[0], param.indices_or_sections[0], param.axis) };
- } else {
- Array<Integer> indices;
- for (auto i : param.indices_or_sections) {
- indices.push_back(static_cast<int>(i));
- }
- return Array<Tensor>{ topi::split(inputs[0], indices, param.axis) };
- }
-})
-.set_support_level(3);
-
-// cast
-DMLC_REGISTER_PARAMETER(CastParam);
-
-inline bool CastInferType(const NodeAttrs& attrs,
- std::vector<int>* in_attrs,
- std::vector<int>* out_attrs) {
- const CastParam& param = nnvm::get<CastParam>(attrs.parsed);
- CHECK_EQ(out_attrs->size(), 1U);
- NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, param.dtype);
- return true;
-}
-
-NNVM_REGISTER_OP(cast)
-.describe(R"code(Cast the content of input to dtype.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data array")
-.add_arguments(CastParam::__FIELDS__())
-.set_attr_parser(ParamParser<CastParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<CastParam>)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<FInferType>("FInferType", CastInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const CastParam& param = nnvm::get<CastParam>(attrs.parsed);
- DataType dtype = GetTVMType(param.dtype);
- return Array<Tensor>{ topi::cast(inputs[0], dtype) };
-})
-.set_support_level(1);
-
-
-// reshape
-DMLC_REGISTER_PARAMETER(ReshapeParam);
-
-inline bool ReshapeInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- const ReshapeParam& param = nnvm::get<ReshapeParam>(attrs.parsed);
- CHECK_GT(param.shape.ndim(), 0);
- CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
- CHECK_EQ(out_attrs->size(), 1U);
-
- const TShape &dshape = (*in_attrs)[0];
- if (dshape.ndim() == 0) return false;
-
- const Tuple<int64_t>& target_shape = param.shape;
- std::vector<int64_t> oshape;
- dim_t src_idx = 0;
- int infer_idx = -1;
-
- for (dim_t i = 0; i < target_shape.ndim(); ++i) {
- int svalue = target_shape[i];
- // special flag handling for shape inference.
- if (svalue > 0) {
- oshape.push_back(svalue);
- ++src_idx;
- } else if (svalue == 0) {
- // keep same
- CHECK_LT(src_idx, dshape.ndim());
- oshape.push_back(dshape[src_idx++]);
- } else if (svalue == -1) {
- // inference based on rest
- CHECK_LT(infer_idx, 0)
- << "One and only one dim can be inferred";
- infer_idx = i;
- oshape.push_back(1);
- ++src_idx;
- } else if (svalue == -2) {
- // copy all remaining dims from source
- while (src_idx < dshape.ndim()) {
- oshape.push_back(dshape[src_idx++]);
- }
- } else if (svalue == -3) {
- // merge two dims from source
- CHECK_LT(src_idx + 1, dshape.ndim());
- dim_t d1 = dshape[src_idx++];
- dim_t d2 = dshape[src_idx++];
- oshape.push_back(d1 * d2);
- } else if (svalue == -4) {
- // split the source dim s into two dims
- // read the left dim and then the right dim (either can be -1)
- CHECK_LT(i + 2, target_shape.ndim());
- CHECK_LT(src_idx, dshape.ndim());
- dim_t d0 = dshape[src_idx++];
- int d1 = target_shape[++i];
- int d2 = target_shape[++i];
- CHECK(d1 != -1 || d2 != -1) << "Split dims cannot both be -1.";
- if (d1 == -1) d1 = d0 / d2;
- if (d2 == -1) d2 = d0 / d1;
- CHECK_EQ(d1 * d2, static_cast<int>(d0)) <<
- "Split dims " << d1 << ", " << d2 << " do not divide original dim " << d0;
- oshape.push_back(d1);
- oshape.push_back(d2);
- }
- }
-
- if (infer_idx >= 0) {
- if (dshape.Size() > 0) {
- int new_size = 1;
- for (int x : oshape) {
- new_size *= x;
- }
- oshape[infer_idx] = dshape.Size() / new_size;
- } else {
- oshape[infer_idx] = 0;
- }
- }
- TShape out_shape(oshape.begin(), oshape.end());
- CHECK_EQ(out_shape.Size(), dshape.Size())
- << "Target shape size is different to source. "
- << "Target: " << out_shape
- << "\nSource: " << dshape;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out_shape);
- return true;
-}
-
-NNVM_REGISTER_OP(reshape)
-.describe(R"code(Reshapes the input array.
-
-Given an array and a shape, this function returns a copy of the array in the new shape.
-The shape is a tuple of integers such as (2,3,4). The size of the new shape should be same as the size of the input array.
-
-Example::
-
- reshape([1,2,3,4], shape=(2,2)) = [[1,2], [3,4]]
-
-To give user more convenience in without doing manual shape inference,
-some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
-The significance of each is explained below:
-
-- ``0`` copy this dimension from the input to the output shape.
-
- Example::
-
- - input shape = (2,3,4), shape = (4,0,2), output shape = (4,3,2)
- - input shape = (2,3,4), shape = (2,0,0), output shape = (2,3,4)
-
-- ``-1`` infers the dimension of the output shape by using the remainder of the input dimensions
- keeping the size of the new array same as that of the input array.
- At most one dimension of shape can be -1.
-
- Example::
-
- - input shape = (2,3,4), shape = (6,1,-1), output shape = (6,1,4)
- - input shape = (2,3,4), shape = (3,-1,8), output shape = (3,1,8)
- - input shape = (2,3,4), shape=(-1,), output shape = (24,)
-
-- ``-2`` copy all/remainder of the input dimensions to the output shape.
-
- Example::
-
- - input shape = (2,3,4), shape = (-2,), output shape = (2,3,4)
- - input shape = (2,3,4), shape = (2,-2), output shape = (2,3,4)
- - input shape = (2,3,4), shape = (-2,1,1), output shape = (2,3,4,1,1)
-
-- ``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
-
- Example::
-
- - input shape = (2,3,4), shape = (-3,4), output shape = (6,4)
- - input shape = (2,3,4,5), shape = (-3,-3), output shape = (6,20)
- - input shape = (2,3,4), shape = (0,-3), output shape = (2,12)
- - input shape = (2,3,4), shape = (-3,-2), output shape = (6,4)
-
-- ``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
-
- Example::
-
- - input shape = (2,3,4), shape = (-4,1,2,-2), output shape =(1,2,3,4)
- - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = (2,1,3,4)
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(ReshapeParam::__FIELDS__())
-.set_attr_parser(ParamParser<ReshapeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ReshapeParam>)
-.set_attr<FInferShape>("FInferShape", ReshapeInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{ topi::reshape(inputs[0], out_info[0]->shape) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- return std::vector<NodeEntry>{
- MakeNode("reshape_like", n->attrs.name + "_grad",
- {ograds[0], n->inputs[0]})
- };
-})
-.set_support_level(3);
-
-inline bool ReshapeLikeInferType(const NodeAttrs &attrs,
- std::vector<int> *in_attrs,
- std::vector<int> *out_attrs) {
- CHECK_EQ(in_attrs->size(), 2U);
- CHECK_EQ(out_attrs->size(), 1U);
- NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, (*in_attrs)[0]);
- return true;
-}
-
-NNVM_REGISTER_OP(reshape_like)
- .describe(R"code(Reshapes the input array by the size of another array.
-For an input array with shape ``(d1, d2, ..., dk)``, `reshape_like` operation reshapes
-the input array into an output array with the same shape as the second input array.
-.. note::
- Sizes for both array should be compatible.
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_argument("shape_like", "Tensor", "Input data.")
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FInferShape>(
- "FInferShape", [](const NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->at(0).Size(), in_attrs->at(1).Size())
- << "Reshape inputs size should be compatible";
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, in_attrs->at(1));
- return true;
-})
-.set_attr<FInferType>("FInferType", ReshapeLikeInferType)
-// never transform layout of the second input array.
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- return std::vector<NodeEntry>{
- MakeNode("reshape_like", n->attrs.name + "_grad", {ograds[0], n->inputs[0]}),
- MakeNode("zeros_like", n->attrs.name + "_zero_grad", { n->inputs[1]})
- };
-})
-.set_support_level(4);
-
-// squeeze
-DMLC_REGISTER_PARAMETER(SqueezeParam);
-
-inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- const SqueezeParam& param = nnvm::get<SqueezeParam>(attrs.parsed);
- CHECK_EQ(in_attrs->size(), 1U);
- CHECK_EQ(out_attrs->size(), 1U);
- const TShape& shp = (*in_attrs)[0];
- if (shp.ndim() == 0) return false;
-
- std::vector<int64_t> oshape;
- if (param.axis.ndim() == 0) {
- for (dim_t i = 0; i < shp.ndim(); ++i) {
- if (shp[i] != 1) {
- oshape.emplace_back(shp[i]);
- }
- }
- } else {
- std::unordered_set<dim_t> axis_checker;
- for (size_t i = 0; i < param.axis.ndim(); ++i) {
- int real_axis;
- if (param.axis[i] < 0) {
- real_axis = param.axis[i] + static_cast<int>(shp.ndim());
- } else {
- real_axis = param.axis[i];
- }
- CHECK(real_axis < static_cast<int>(shp.ndim()) && real_axis >= 0);
- axis_checker.insert(real_axis);
- }
- for (size_t i = 0; i < shp.ndim(); ++i) {
- if (axis_checker.find(i) == axis_checker.end()) {
- oshape.emplace_back(shp[i]);
- } else {
- CHECK_EQ(shp[i], 1) << "The squeezed axis must have shape 1!"
- << "Want to squeeze " << i
- << ", which has shape" << shp[i];
- }
- }
- }
- if (oshape.size() == 0) {
- // Handles the case where all axes are squeezed.
- oshape.push_back(1);
- }
- TShape out_shape(oshape.begin(), oshape.end());
- CHECK_EQ(out_shape.Size(), shp.Size())
- << "Target shape size is different to source. "
- << "Target: " << out_shape
- << "\nSource: " << shp;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out_shape);
- return true;
-}
-
-NNVM_REGISTER_OP(squeeze)
-.describe(R"code(Squeeze axises in the array.
-
-Examples::
-
- x = [[[0], [1], [2]]]
- x.shape = (1, 3, 1)
-
- squeeze(x) = [0, 1, 2]
-
- squeeze(x, 0) = [[0], [1], [2]]
-
- squeeze(x, (0, 2)) = [0, 1, 2]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Source input")
-.add_arguments(SqueezeParam::__FIELDS__())
-.set_attr_parser(ParamParser<SqueezeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<SqueezeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", SqueezeShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const SqueezeParam& param = nnvm::get<SqueezeParam>(attrs.parsed);
- auto axis = ShapeToIntArray(param.axis);
- return Array<Tensor>{ topi::squeeze(inputs[0], axis, true) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- return std::vector<NodeEntry>{
- MakeNode("reshape_like", n->attrs.name + "_grad",
- {ograds[0], n->inputs[0]})
- };
-})
-.set_support_level(1);
-
-// transpose
-DMLC_REGISTER_PARAMETER(TransposeParam);
-
-inline bool TransposeShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
- CHECK_EQ(in_attrs->size(), 1U);
- CHECK_EQ(out_attrs->size(), 1U);
- const TShape& shp = (*in_attrs)[0];
- if (shp.ndim() == 0) return false;
-
- TShape ret(shp.ndim());
- if (param.axes.ndim() == 0) {
- for (dim_t i = 0; i < shp.ndim(); ++i) {
- ret[i] = shp[shp.ndim() - 1 - i];
- }
- } else {
- CHECK_EQ(shp.ndim(), param.axes.ndim());
- for (size_t i = 0; i < shp.ndim(); ++i) {
- CHECK(param.axes[i] < shp.ndim());
- ret[i] = shp[param.axes[i]];
- }
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, ret);
- return true;
-}
-
-inline bool TransposeCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
- CHECK_EQ(ilayouts->size(), 1U);
- CHECK_EQ(olayouts->size(), 1U);
-
- const Layout& input = last_ilayouts->at(0).defined()
- ? last_ilayouts->at(0)
- : ilayouts->at(0);
-
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, input);
-
- if (input.defined()) {
- std::ostringstream new_layout;
- if (param.axes.ndim() == 0) {
- for (size_t i = 0; i < input.ndim(); ++i) {
- new_layout << input.at(input.ndim() - 1 - i);
- }
- } else {
- CHECK_EQ(input.ndim(), param.axes.ndim());
- for (size_t i = 0; i < input.ndim(); ++i) {
- CHECK(param.axes[i] < static_cast<int>(input.ndim()));
- new_layout << input.at(param.axes[i]);
- }
- }
- NNVM_ASSIGN_LAYOUT(*olayouts, 0, Layout(new_layout.str()));
- }
-
- return true;
-}
-
-NNVM_REGISTER_OP(transpose)
-.describe(R"code(Permutes the dimensions of an array.
-
-Examples::
-
- x = [[ 1, 2],
- [ 3, 4]]
-
- transpose(x) = [[ 1., 3.],
- [ 2., 4.]]
-
- x = [[[ 1., 2.],
- [ 3., 4.]],
-
- [[ 5., 6.],
- [ 7., 8.]]]
-
- transpose(x) = [[[ 1., 5.],
- [ 3., 7.]],
-
- [[ 2., 6.],
- [ 4., 8.]]]
-
- transpose(x, axes=(1,0,2)) = [[[ 1., 2.],
- [ 5., 6.]],
-
- [[ 3., 4.],
- [ 7., 8.]]]
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Source input")
-.add_arguments(TransposeParam::__FIELDS__())
-.set_attr_parser(ParamParser<TransposeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<TransposeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", TransposeShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", TransposeCorrectLayout)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_support_level(4)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
- auto axes = ShapeToIntArray(param.axes);
- return Array<Tensor>{ topi::transpose(inputs[0], axes) };
-})
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- const TransposeParam& param = nnvm::get<TransposeParam>(n->attrs.parsed);
- std::ostringstream oss; oss << param.axes;
- return std::vector<NodeEntry>{
- MakeNode("transpose", n->attrs.name + "_t", {ograds[0]}, {{"axes", oss.str()}})
- };
-});
-
-// strided_slice
-DMLC_REGISTER_PARAMETER(StridedSliceParam);
-
-inline void StridedSliceParamParser(nnvm::NodeAttrs* attrs) {
- StridedSliceParam param;
- param.Init(attrs->dict);
- attrs->parsed = std::move(param);
-}
-
-inline bool StridedSliceInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- const StridedSliceParam& param = nnvm::get<StridedSliceParam>(attrs.parsed);
- const TShape& dshape = (*in_shape)[0];
- if (dshape.ndim() == 0) return false;
- TShape oshape = dshape;
- dim_t num_axis = dshape.ndim();
-
- std::vector<int64_t> begin_vec;
- std::copy(param.begin.begin(), param.begin.end(), std::back_inserter(begin_vec));
- for (dim_t i = begin_vec.size(); i < num_axis; ++i) {
- begin_vec.push_back(0);
- }
-
- std::vector<int64_t> end_vec;
- std::copy(param.end.begin(), param.end.end(), std::back_inserter(end_vec));
- for (dim_t i = end_vec.size(); i < num_axis; ++i) {
- end_vec.push_back(dshape[i]);
- }
-
- std::vector<int64_t> stride_vec;
- std::copy(param.stride.begin(), param.stride.end(), std::back_inserter(stride_vec));
- for (dim_t i = stride_vec.size(); i < num_axis; ++i) {
- stride_vec.push_back(1);
- }
-
- for (dim_t i = 0; i < num_axis; ++i) {
- int64_t begin_range = stride_vec[i] < 0 ? -1 : 0;
- int64_t end_range = stride_vec[i] < 0 ? dshape[i] - 1 : dshape[i];
- int64_t begin = begin_vec[i] < 0 ? dshape[i] + begin_vec[i] : begin_vec[i];
- int64_t end = end_vec[i] < 0 ? dshape[i] + end_vec[i] : end_vec[i];
- begin = std::min(std::max(begin, begin_range), end_range);
- end = std::min(std::max(end, begin_range), end_range);
-
- int interval = std::abs(end - begin);
- int slice_size = static_cast<int>((interval
- + std::abs(stride_vec[i]) - 1) / std::abs(stride_vec[i]));
- CHECK(stride_vec[i] < 0 ? (end < begin) : (begin < end))
- << ": Input [Begin=" << begin_vec[i] << ", End=" << end_vec[i]
- << "] is invalid for axis=" << i;
- oshape[i] = slice_size;
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-}
-
-NNVM_REGISTER_OP(strided_slice)
-.describe(R"code(Strided slice of an array.
-
-Examples::
-
- x = [[ 1., 4., 7., 10.],
- [ 2., 5., 8., 11.],
- [ 3., 6., 9., 12.]]
-
- strided_slice(x, begin=[0, 1], end=[2, 4], stride=[1, 1]) = [[ 4., 7., 10.],
- [ 5., 8., 11.]]
-
- x = [[[ 1., 2.],
- [ 3., 4.]],
-
- [[ 5., 6.],
- [ 7., 8.]]]
-
- strided_slice(x, begin=[0, 0], end=[2, 2]) = [[[ 1., 2.],
- [ 3., 4.]],
-
- [[ 5., 6.],
- [ 7., 8.]]]
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Array to be sliced")
-.add_arguments(StridedSliceParam::__FIELDS__())
-.set_attr_parser(StridedSliceParamParser)
-.set_attr<FInferShape>("FInferShape", StridedSliceInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const StridedSliceParam& param = nnvm::get<StridedSliceParam>(attrs.parsed);
- Array<Integer> begin;
- Array<Integer> end;
- Array<Integer> stride;
-
- for (int64_t i : param.begin) {
- begin.push_back(static_cast<int>(i));
- }
-
- for (int64_t i : param.end) {
- end.push_back(static_cast<int>(i));
- }
-
- for (int64_t i : param.stride) {
- stride.push_back(static_cast<int>(i));
- }
-
- return Array<Tensor>{
- topi::strided_slice(inputs[0], begin, end, stride)
- };
-})
-.set_support_level(1);
-
-// Flip
-DMLC_REGISTER_PARAMETER(FlipParam);
-
-NNVM_REGISTER_OP(flip)
-.describe(R"code(Reverse the elements of an array.
-
-Examples::
-
- x = [[ 1, 2],
- [ 3, 4]]
-
- flip(x) = [[ 3., 4.],
- [ 1., 2.]]
-
- x = [[[ 1., 2.],
- [ 3., 4.]],
-
- [[ 5., 6.],
- [ 7., 8.]]]
-
- flip(x) = [[[ 5., 6.],
- [ 7., 8.]],
-
- [[ 1., 2.],
- [ 3., 4.]]]
-
- flip(x, axis=1) = [[[ 3., 4.],
- [ 1., 2.]],
-
- [[ 7., 8.],
- [ 5., 6.]]]
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Source input")
-.add_arguments(FlipParam::__FIELDS__())
-.set_attr_parser(ParamParser<FlipParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<FlipParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_support_level(4)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const FlipParam& param = nnvm::get<FlipParam>(attrs.parsed);
- return Array<Tensor>{ topi::flip(inputs[0], param.axis) };
-});
-
-
-// take
-DMLC_REGISTER_PARAMETER(TakeParam);
-
-inline bool TakeInferShape(const NodeAttrs& attrs,
- std::vector<TShape>* in_shape,
- std::vector<TShape>* out_shape) {
- CHECK_EQ(in_shape->size(), 2U);
- CHECK_EQ(out_shape->size(), 1U);
- const TShape& dshape = (*in_shape)[0];
- const TShape& indicesshape = (*in_shape)[1];
- if (dshape.ndim() == 0) return false;
- if (indicesshape.ndim() == 0) return false;
-
- const TakeParam& param = nnvm::get<TakeParam>(attrs.parsed);
- TShape oshape((!param.axis ? 0: dshape.ndim() - 1) + indicesshape.ndim());
- if (!param.axis) {
- for (size_t j = 0; j < indicesshape.ndim(); ++j) {
- oshape[j] = indicesshape[j];
- }
- } else {
- int axis = param.axis.value();
- if (axis < 0) {
- axis += dshape.ndim();
- }
- CHECK_LT(axis, dshape.ndim());
-
- size_t posi = 0;
- for (size_t i = 0; i < dshape.ndim(); ++i) {
- if (static_cast<int>(i) == axis) {
- for (size_t j = 0; j < indicesshape.ndim(); ++j) {
- oshape[posi++] = indicesshape[j];
- }
- } else {
- oshape[posi++] = dshape[i];
- }
- }
- }
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 0, dshape);
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 1, indicesshape);
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return dshape.Size() != 0;
-}
-
-inline bool TakeInferType(const NodeAttrs& attrs,
- std::vector<int>* in_attrs,
- std::vector<int>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 2U);
- CHECK_EQ(out_attrs->size(), 1U);
- CHECK_EQ((*in_attrs)[1], kInt32);
- NNVM_ASSIGN_INPUT_TYPE(attrs, *in_attrs, 0, (*in_attrs)[0]);
- NNVM_ASSIGN_INPUT_TYPE(attrs, *in_attrs, 1, static_cast<int>(kInt32));
- NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, (*in_attrs)[0]);
- return true;
-}
-
-inline bool TakeCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- CHECK_EQ(ilayouts->size(), last_ilayouts->size());
- CHECK_EQ(olayouts->size(), 1U);
-
- for (size_t i = 0; i < ilayouts->size(); ++i) {
- const Layout& input = last_ilayouts->at(i).defined() ?
- last_ilayouts->at(i) : ilayouts->at(i);
- NNVM_ASSIGN_LAYOUT(*ilayouts, i, input);
- }
-
- return true;
-}
-
-NNVM_REGISTER_OP(take)
-.describe(R"code(Take elements from an array along an axis.
-
-When axis is not None, this function does the same thing as 'fancy' indexing
-(indexing arrays using arrays); however, it can be easier to use if you need
-elements along a given axis.
-
-**Note** that when axis is none the flattened input array is used.
-
-Examples::
-
- a = [[ 1, 2],
- [ 3, 4]]
- indices = [3, 0, 2]
- take(a, indices) = [ 4, 1, 3]
-
- a = [[ 1., 2.],
- [ 3., 4.]]
- indices = [1, 0]
- take(a, indices, axis=1) = [[ 2., 1.],
- [ 4., 3.]]
-
- )code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Array to be indexed")
-.add_argument("indices", "Tensor", "The indices of the values to extract")
-.add_arguments(TakeParam::__FIELDS__())
-.set_attr_parser(ParamParser<TakeParam>)
-.set_attr<FInferShape>("FInferShape", TakeInferShape)
-.set_attr<FInferType>("FInferType", TakeInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", TakeCorrectLayout)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const TakeParam& param = nnvm::get<TakeParam>(attrs.parsed);
- if (!param.axis) {
- return Array<Tensor>{
- topi::take(inputs[0], inputs[1]) };
- } else {
- return Array<Tensor>{
- topi::take(inputs[0], inputs[1], param.axis.value()) };
- }
- });
-
-
-// SliceLike
-DMLC_REGISTER_PARAMETER(SliceLikeParam);
-
-inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 2U);
- CHECK_EQ(out_attrs->size(), 1U);
- const SliceLikeParam& param = nnvm::get<SliceLikeParam>(attrs.parsed);
- const TShape& src_shape = in_attrs->at(0);
- const TShape& target_shape = in_attrs->at(1);
- Tuple<dim_t> end_idx;
- end_idx = Tuple<dim_t>(src_shape);
- if (param.axis.ndim() == 0) {
- for (size_t i = 0; i < src_shape.ndim(); ++i) {
- if (i < target_shape.ndim()) {
- end_idx[i] = target_shape[i];
- CHECK_LE(end_idx[i], src_shape[i])
- << "End index of axis " << i << " exceeds input shape: "
- << end_idx[i] << " vs " << src_shape[i];
- }
- }
- } else {
- for (auto i : param.axis) {
- if (i < 0) {
- i = src_shape.ndim() + i;
- }
- CHECK_LT(i, target_shape.ndim())
- << "Axis " << i << " exceeds dimension "
- << target_shape.ndim()<< " of target_shape.";
- end_idx[i] = target_shape[i];
- CHECK_LE(end_idx[i], src_shape[i])
- << "End index of axis " << i << " exceeds input shape: "
- << end_idx[i] << " vs " << src_shape[i];
- }
- }
- TShape out_shape = TShape(std::move(end_idx));
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out_shape);
- return true;
-}
-
-// Adapter function to make int array.
-Array<Integer> GetIntArray(Array<Expr> arr) {
- for (size_t i = 0; i < arr.size(); ++i) {
- CHECK(!arr[i].defined() || arr[i].as<IntImm>())
- << "Expect an int array";
- }
- return Downcast<Array<Integer> >(arr);
-}
-
-NNVM_REGISTER_OP(slice_like)
-.describe(R"code(Slice the first input respect to the second input.
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data to be sliced.")
-.add_argument("slice_like", "Tensor", "Tensor with target shape")
-.set_num_inputs(2)
-.set_num_outputs(1)
-.add_arguments(SliceLikeParam::__FIELDS__())
-.set_attr_parser(ParamParser<SliceLikeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<SliceLikeParam>)
-.set_attr<FInferShape>("FInferShape", SliceLikeShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseBinaryKeepLeftLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- const auto& param = nnvm::get<SliceLikeParam>(attrs.parsed);
- Array<Expr> src_shape = inputs[0]->shape;
- Array<Expr> target_shape = inputs[1]->shape;
- Array<Expr> begin_idx, end_idx, strides;
- for (size_t i = 0; i < src_shape.size(); ++i) {
- begin_idx.push_back(make_const(tvm::DataType::Int(32), 0));
- strides.push_back(make_const(tvm::DataType::Int(32), 1));
- }
- end_idx = Array<Expr>(src_shape);
- if (param.axis.ndim() == 0) {
- for (size_t i = 0; i < src_shape.size(); ++i) {
- if (i < target_shape.size()) {
- end_idx.Set(i, target_shape[i]);
- CHECK_LE(topi::GetConstInt(end_idx[i]),
- topi::GetConstInt(src_shape[i]))
- << "End index of axis " << i << " exceeds input shape: "
- << topi::GetConstInt(end_idx[i]) << " vs "
- << topi::GetConstInt(src_shape[i]);
- }
- }
- } else {
- for (int axis : param.axis) {
- if (axis < 0) {
- axis = static_cast<int>(src_shape.size()) + axis;
- }
- end_idx.Set(static_cast<size_t>(axis), target_shape[axis]);
- CHECK_LE(topi::GetConstInt(end_idx[axis]),
- topi::GetConstInt(src_shape[axis]))
- << "End index of axis " << axis << " exceeds input shape: "
- << topi::GetConstInt(end_idx[axis]) << " vs "
- << topi::GetConstInt(src_shape[axis]);
- }
- }
- return Array<Tensor>{
- topi::strided_slice(inputs[0],
- GetIntArray(begin_idx),
- GetIntArray(end_idx),
- GetIntArray(strides))
- };
-})
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"data", "slice_like"};
-})
-.set_support_level(4);
-
-// where
-inline bool WhereShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 3U);
- CHECK_EQ(out_attrs->size(), 1U);
- const TShape& cond_shape = in_attrs->at(0);
- const TShape& x_shape = in_attrs->at(1);
- const TShape& y_shape = in_attrs->at(2);
- CHECK_EQ(x_shape, y_shape) << "x and y must have the same shape: "
- << x_shape << " vs " << y_shape;
- if (cond_shape != x_shape) {
- CHECK_EQ(cond_shape.ndim(), 1)
- << "Shape of condition " << cond_shape
- << " must be either equal to x or has dimension of 1.";
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, x_shape);
- return true;
-}
-
-inline bool WhereInferType(const NodeAttrs &attrs,
- std::vector<int> *in_attrs,
- std::vector<int> *out_attrs) {
- DTYPE_ASSIGN(out_attrs->at(0), in_attrs->at(1));
- return true;
-}
-
-inline bool WhereCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- CHECK_EQ(ilayouts->size(), last_ilayouts->size());
- CHECK_EQ(olayouts->size(), 1U);
-
- for (size_t i = 0; i < ilayouts->size(); ++i) {
- const Layout& input = last_ilayouts->at(i).defined() ?
- last_ilayouts->at(i) : ilayouts->at(i);
- NNVM_ASSIGN_LAYOUT(*ilayouts, i, input);
- }
-
- return true;
-}
-
-NNVM_REGISTER_OP(where)
-.describe(R"code(
-Return the elements, either from x or y, depending on the condition.
-
-Given three ndarrays, condition, x, and y, return an ndarray with the elements
-from x or y, depending on the elements from condition are true or false.
-x and y must have the same shape. If condition has the same shape as x,
-each element in the output array is from x if the corresponding element
-in the condition is true, and from y if false.
-
-If condition does not have the same shape as x, it must be a 1D array whose
-size is the same as x’s first dimension size. Each row of the output array
-is from x’s row if the corresponding element from condition is true, and
-from y’s row if false.
-
-Note that all non-zero values are interpreted as True in condition.
-
-Examples::
-
- x = [[1, 2], [3, 4]]
- y = [[5, 6], [7, 8]]
- cond = [[0, 1], [-1, 0]]
- where(cond, x, y) = [[5, 2], [3, 8]]
-
-
- cond = [1, 0]
- where(cond, x, y) = [[1, 2], [7, 8]]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("condition", "Tensor", "Condition array")
-.add_argument("x", "Tensor", "First array to be selected")
-.add_argument("y", "Tensor", "Second array to be selected")
-.set_num_inputs(3)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", WhereShape)
-.set_attr<FInferType>("FInferType", WhereInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", WhereCorrectLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{
- topi::where(inputs[0], inputs[1], inputs[2])
- };
- })
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"condition", "x", "y"};
-})
-.set_support_level(4);
-
-// gather_nd
-inline bool GatherNDInferShape(const nnvm::NodeAttrs& attrs,
- std::vector<TShape>* in_attrs,
- std::vector<TShape>* out_attrs) {
- CHECK_EQ(in_attrs->size(), 2U);
- CHECK_EQ(out_attrs->size(), 1U);
- const TShape& data_shape = in_attrs->at(0);
- const TShape& indices_shape = in_attrs->at(1);
- CHECK_GT(indices_shape.ndim(), 1) << "indices must have at least 2 dimensions";
- CHECK_LE(indices_shape[0], data_shape.ndim()) <<
- "dim 0 of indices must be no more than rank of data";
- std::vector<dim_t> oshape;
- for (size_t i = 1; i < indices_shape.ndim(); ++i) {
- oshape.push_back(indices_shape[i]);
- }
- for (size_t i = indices_shape[0]; i < data_shape.ndim(); ++i) {
- oshape.push_back(data_shape[i]);
- }
- if (oshape.size() == 0) {
- oshape.push_back(1);
- }
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0,
- TShape(oshape.begin(), oshape.end()));
- return true;
-}
-
-inline bool GatherNDInferType(const NodeAttrs &attrs,
- std::vector<int> *in_attrs,
- std::vector<int> *out_attrs) {
- CHECK_EQ(in_attrs->size(), 2U);
- CHECK_EQ(out_attrs->size(), 1U);
- NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, (*in_attrs)[0]);
- return true;
-}
-
-inline bool GatherNDCorrectLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- CHECK_EQ(ilayouts->size(), last_ilayouts->size());
- CHECK_EQ(olayouts->size(), 1U);
-
- for (size_t i = 0; i < ilayouts->size(); ++i) {
- const Layout& input = last_ilayouts->at(i).defined() ?
- last_ilayouts->at(i) : ilayouts->at(i);
- NNVM_ASSIGN_LAYOUT(*ilayouts, i, input);
- }
-
- return true;
-}
-
-NNVM_REGISTER_OP(gather_nd)
-.describe(R"code(
-Gather elements or slices from ``data`` into a tensor specified by ``indices``.
-
-The shape of output tensor is inferred from ``indices``. Given ``data`` with
-shape ``(X0, X1, ..., X_{N-1})`` and ``indices`` with shape ``(Y_0, ...,
-Y_{M-1})``, the output will have shape ``(Y_1, ..., Y_{M-1}, X_{Y_0}, ...,
-X_{N-1})`` when ``Y_0 < N``, or ``(Y_1, ..., Y_{M-1})`` when ``Y_0 == N``. The
-operator is invalid when ``Y_0 > N``.
-
-The element in output is defined as follows::
-
- output[y_1, ..., y_{M-1}, x_{Y_0}, ..., x_{N-1}] = data[indices[0, y_1, ..., y_{M-1}],
- ...,
- indices[Y_0-1, y_1, ..., y_{M-1}],
- x_{Y_0}, ..., x_{N-1}]
-
-Examples::
-
- data = [[0, 1], [2, 3]]
- indices = [[1], [0]]
- gather_nd(data, indices) = [2]
-
- data = [[0, 1], [2, 3]]
- indices = [[1, 1, 0], [0, 1, 0]]
- gather_nd(data, indices) = [2, 3, 0]
-
- data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
- indices = [[0, 1], [1, 0]]
- gather_nd(data, indices) = [[3, 4], [5, 6]]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_argument("indices", "Tensor", "Indices of data")
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", GatherNDInferShape)
-.set_attr<FInferType>("FInferType", GatherNDInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", GatherNDCorrectLayout)
-.set_attr<FTVMCompute>(
- "FTVMCompute", [](const NodeAttrs& attrs,
- const Array<Tensor>& inputs,
- const Array<Tensor>& out_info) {
- return Array<Tensor>{
- topi::gather_nd(inputs[0], inputs[1]) };
- })
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"data", "indices"};
-})
-.set_support_level(3);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nms.cc
- * \brief Property def of SSD non-maximum suppression operator.
- */
-
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/top/nn.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-
-namespace nnvm {
-namespace top {
-using compiler::FTVMCompute;
-using tvm::Tensor;
-using tvm::Array;
-
-DMLC_REGISTER_PARAMETER(NonMaximumSuppressionParam);
-
-bool NMSShape(const NodeAttrs& attrs,
- std::vector<TShape> *in_attrs,
- std::vector<TShape> *out_attrs) {
- const NonMaximumSuppressionParam& param =
- nnvm::get<NonMaximumSuppressionParam>(attrs.parsed);
- CHECK_EQ(in_attrs->size(), 2U) << "Inputs: [data, valid_count]";
- TShape dshape = in_attrs->at(0);
- TShape vshape = in_attrs->at(1);
- CHECK_EQ(dshape.ndim(), 3U) << "Input data should be 3-D.";
- CHECK_EQ(vshape.ndim(), 1U) << "Input valid count should be 1-D.";
- CHECK_EQ(dshape[2], 6U) << "Data input should have shape "
- "(batch_size, num_anchors, 6).";
- CHECK_EQ(dshape[0], vshape[0]) << "batch_size mismatch.";
- out_attrs->clear();
- if (param.return_indices) {
- TShape oshape = TShape(2);
- oshape[0] = dshape[0];
- oshape[1] = dshape[1];
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
- } else {
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, dshape);
- }
- return true;
-}
-
-inline bool NMSInferType(const NodeAttrs &attrs,
- std::vector<int> *in_attrs,
- std::vector<int> *out_attrs) {
- DTYPE_ASSIGN(out_attrs->at(0), in_attrs->at(0));
- return true;
-}
-
-inline bool NMSInferLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- static const Layout kNCHW("NCHW");
- CHECK_EQ(ilayouts->size(), 2U);
- CHECK_EQ(olayouts->size(), 1U);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, kNCHW);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kNCHW);
- return true;
-}
-
-NNVM_REGISTER_OP(non_max_suppression)
- .describe(R"doc("Non-maximum suppression."
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<NonMaximumSuppressionParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict",
- ParamGetAttrDict<NonMaximumSuppressionParam>)
-.add_arguments(NonMaximumSuppressionParam::__FIELDS__())
-.add_argument("data", "Tensor", "Input data.")
-.add_argument("valid_count", "Tensor", "Number of valid anchor boxes.")
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"data", "valid_count"};
-})
-.set_attr<FInferShape>("FInferShape", NMSShape)
-.set_attr<FInferType>("FInferType", NMSInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", NMSInferLayout)
-.set_support_level(4);
-
-} // namespace top
-} // namespace nnvm
-
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file multibox_op.cc
- * \brief Property def of SSD multibox related operators.
- */
-
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/top/nn.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include "../../op_common.h"
-#include "../../elemwise_op_common.h"
-
-namespace nnvm {
-namespace top {
-using compiler::FTVMCompute;
-using tvm::Tensor;
-using tvm::Array;
-
-DMLC_REGISTER_PARAMETER(MultiBoxPriorParam);
-
-bool MultiBoxPriorShape(const NodeAttrs& attrs,
- std::vector<TShape> *in_attrs,
- std::vector<TShape> *out_attrs) {
- const MultiBoxPriorParam& param = nnvm::get<MultiBoxPriorParam>(attrs.parsed);
- CHECK_EQ(in_attrs->size(), 1U) << "Inputs: [data]" << in_attrs->size();
- TShape dshape = in_attrs->at(0);
- CHECK_GE(dshape.ndim(), 4U) << "Input data should be 4D: "
- "[batch, channel, height, width]";
- int in_height = dshape[2];
- CHECK_GT(in_height, 0) << "Input height should > 0";
- int in_width = dshape[3];
- CHECK_GT(in_width, 0) << "Input width should > 0";
- // since input sizes are same in each batch, we could share MultiBoxPrior
- TShape oshape = TShape(3);
- int num_sizes = param.sizes.ndim();
- int num_ratios = param.ratios.ndim();
- oshape[0] = 1;
- oshape[1] = in_height * in_width * (num_sizes + num_ratios - 1);
- oshape[2] = 4;
- CHECK_EQ(param.steps.ndim(), 2) << "Step ndim must be 2: (step_y, step_x)";
- CHECK_GE(param.steps[0] * param.steps[1], 0) << "Must specify both "
- "step_y and step_x";
- out_attrs->clear();
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
- return true;
-}
-
-inline bool MultiBoxPriorLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- static const Layout kNCHW("NCHW");
- CHECK_EQ(ilayouts->size(), 1U);
- CHECK_EQ(olayouts->size(), 1U);
- NNVM_ASSIGN_LAYOUT(*ilayouts, 0, kNCHW);
- return true;
-}
-
-NNVM_REGISTER_OP(multibox_prior)
- .describe(R"doc("Generate prior(anchor) boxes from data, sizes and ratios."
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<MultiBoxPriorParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MultiBoxPriorParam>)
-.add_arguments(MultiBoxPriorParam::__FIELDS__())
-.add_argument("data", "Tensor", "Input data")
-.set_attr<FInferShape>("FInferShape", MultiBoxPriorShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", MultiBoxPriorLayout)
-.set_attr<FGradient>(
- "FGradient", [](const NodePtr& n,
- const std::vector<NodeEntry>& ograds) {
- return std::vector<NodeEntry>{
- MakeNode("zeros_like", n->attrs.name + "_zero_grad",
- {n->inputs[0]}),
- ograds[0]
- };
-})
-.set_support_level(4);
-
-DMLC_REGISTER_PARAMETER(MultiBoxTransformLocParam);
-
-bool MultiBoxTransformLocShape(const NodeAttrs& attrs,
- std::vector<TShape> *in_attrs,
- std::vector<TShape> *out_attrs) {
- CHECK_EQ(in_attrs->size(), 3U) << "Inputs: [cls_prob, loc_pred, anchor]";
- TShape cshape = in_attrs->at(0);
- TShape lshape = in_attrs->at(1);
- TShape ashape = in_attrs->at(2);
- CHECK_EQ(cshape.ndim(), 3U) << "Class probability should be 3-D.";
- CHECK_EQ(lshape.ndim(), 2U) << "Location prediction should be 2-D.";
- CHECK_EQ(ashape.ndim(), 3U) << "Anchor should be 3-D.";
- CHECK_EQ(cshape[2], ashape[1]) << "Number of anchors mismatch.";
- CHECK_EQ(cshape[2] * 4, lshape[1]) << "# anchors mismatch with # loc.";
- CHECK_GT(ashape[1], 0U) << "Number of anchors must > 0.";
- CHECK_EQ(ashape[2], 4U);
- TShape oshape0 = TShape(3);
- oshape0[0] = cshape[0];
- oshape0[1] = ashape[1];
- oshape0[2] = 6; // [id, prob, xmin, ymin, xmax, ymax]
- TShape oshape1 = TShape(1);
- oshape1[0] = cshape[0];
- out_attrs->clear();
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape0);
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 1, oshape1);
- return true;
-}
-
-inline bool MultiBoxTransformLocLayout(const NodeAttrs& attrs,
- std::vector<Layout> *ilayouts,
- const std::vector<Layout> *last_ilayouts,
- std::vector<Layout> *olayouts) {
- CHECK_EQ(ilayouts->size(), 3U);
- CHECK_EQ(last_ilayouts->size(), 3U);
- CHECK_EQ(olayouts->size(), 2U);
- for (size_t i = 0; i < last_ilayouts->size(); ++i) {
- const Layout& last_layout = last_ilayouts->at(i);
- if (last_layout.defined()) {
- NNVM_ASSIGN_LAYOUT(*ilayouts, i, last_layout);
- }
- }
- return true;
-}
-
-inline bool MultiBoxTransformLocInferType(const NodeAttrs &attrs,
- std::vector<int> *in_attrs,
- std::vector<int> *out_attrs) {
- DTYPE_ASSIGN(out_attrs->at(0), in_attrs->at(0));
- DTYPE_ASSIGN(out_attrs->at(1), 4U);
- return true;
-}
-
-NNVM_REGISTER_OP(multibox_transform_loc)
- .describe(R"doc("Location transformation for multibox detection."
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(3)
-.set_num_outputs(2)
-.set_attr_parser(ParamParser<MultiBoxTransformLocParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict",
- ParamGetAttrDict<MultiBoxTransformLocParam>)
-.add_arguments(MultiBoxTransformLocParam::__FIELDS__())
-.add_argument("cls_prob", "Tensor", "Class probabilities.")
-.add_argument("loc_pred", "Tensor", "Location regression predictions.")
-.add_argument("anchor", "Tensor", "Multibox prior anchor boxes")
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
- return std::vector<std::string>{"cls_prob", "loc_pred", "anchor"};
-})
-.set_attr<FInferShape>("FInferShape", MultiBoxTransformLocShape)
-.set_attr<FInferType>("FInferType", MultiBoxTransformLocInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", MultiBoxTransformLocLayout)
-.set_support_level(4);
-
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file reorg.cc
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "../../op_common.h"
-#include "../../elemwise_op_common.h"
-#include "reorg.h"
-
-namespace nnvm {
-namespace top {
-
-// reorg
-DMLC_REGISTER_PARAMETER(ReorgParam);
-
-inline bool ReorgInferShape(const nnvm::NodeAttrs &attrs,
- std::vector<TShape> *in_shape,
- std::vector<TShape> *out_shape) {
- const ReorgParam ¶m = nnvm::get<ReorgParam>(attrs.parsed);
- TShape dshape = in_shape->at(0);
- if (dshape.ndim() == 0)
- return false;
- NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 0, dshape);
- CHECK_EQ(dshape.ndim(), 4) << "Input data should be 4D";
- CHECK_GT(param.stride, 0U) << "Stride value cannot be 0";
- TShape oshape({dshape[0], 0, 0, 0});
- oshape[1] = dshape[1] * param.stride * param.stride;
- oshape[2] = dshape[2] / param.stride;
- oshape[3] = dshape[3] / param.stride;
- NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
- return true;
-}
-
-NNVM_REGISTER_OP(yolo_reorg)
-.describe(R"(Perform reorg operation on input array based on the stride value.
-- **data**: Input is 4D array of shape (batch_size, channels, in_height, in_width).
-- **out**: Output is 4D array of shape (batch_size, channels/(stride*stride), in_height*stride, in_width*stride).
-)" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_support_level(5)
-.add_argument("data", "Tensor", "Data input to reorganize")
-.set_attr_parser(ParamParser<ReorgParam>)
-.add_arguments(ReorgParam::__FIELDS__())
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ReorgParam>)
-.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
-.set_attr<FInferShape>("FInferShape", ReorgInferShape);
-} // namespace top
-} // namespace nnvm
+++ /dev/null
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file reorg.h
- */
-#ifndef NNVM_TOP_VISION_YOLO_REORG_H_
-#define NNVM_TOP_VISION_YOLO_REORG_H_
-
-#include <string>
-#include <vector>
-#include <utility>
-#include <iostream>
-#include <sstream>
-
-namespace nnvm {
-namespace top {
-
-template <typename AttrType,
- bool (*is_none)(const AttrType &),
- bool (*assign)(AttrType *,
- const AttrType &),
- bool reverse_infer,
- std::string (*attr_string)(const AttrType &),
- int n_in = -1,
- int n_out = -1>
-inline bool ReorgAttr(const nnvm::NodeAttrs &attrs,
- std::vector<AttrType> *in_attrs,
- std::vector<AttrType> *out_attrs,
- const AttrType &none) {
- AttrType dattr = none;
- size_t in_size = in_attrs->size();
- size_t out_size = out_attrs->size();
- if (n_in != -1) {
- in_size = static_cast<size_t>(n_in);
- }
- if (n_out != -1) {
- out_size = static_cast<size_t>(n_out);
- }
-
- auto deduce = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
- for (size_t i = 0; i < size; ++i) {
- if (i == 0) {
- CHECK(assign(&dattr, (*vec)[i]))
- << "Incompatible attr in node " << attrs.name << " at " << i
- << "-th " << name << ": "
- << "expected " << attr_string(dattr) << ", got "
- << attr_string((*vec)[i]);
- }
- }
- };
- deduce(in_attrs, in_size, "input");
-
- auto write = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
- for (size_t i = 0; i < size; ++i) {
- CHECK(assign(&(*vec)[i], dattr))
- << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
- << name << ": "
- << "expected " << attr_string(dattr) << ", got "
- << attr_string((*vec)[i]);
- }
- };
- write(out_attrs, out_size, "output");
-
- if (is_none(dattr)) {
- return false;
- }
- return true;
-}
-
-template <int n_in, int n_out>
-inline bool ReorgShape(const NodeAttrs &attrs,
- std::vector<TShape> *in_attrs,
- std::vector<TShape> *out_attrs) {
- if (n_in != -1) {
- CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in))
- << " in operator " << attrs.name;
- }
- if (n_out != -1) {
- CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out))
- << " in operator " << attrs.name;
- }
- return ReorgAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
- attrs, in_attrs, out_attrs, TShape());
-}
-
-template <int n_in, int n_out>
-inline bool ReorgType(const NodeAttrs &attrs,
- std::vector<int> *in_attrs,
- std::vector<int> *out_attrs) {
- if (n_in != -1) {
- CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in))
- << " in operator " << attrs.name;
- }
- if (n_out != -1) {
- CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out))
- << " in operator " << attrs.name;
- }
- return ReorgAttr<int, type_is_none, type_assign, true, type_string>(
- attrs, in_attrs, out_attrs, -1);
-}
-
-struct ReorgParam : public dmlc::Parameter<ReorgParam> {
- int stride;
-
- DMLC_DECLARE_PARAMETER(ReorgParam) {
- DMLC_DECLARE_FIELD(stride).set_default(1).describe("Stride value");
- }
-};
-} // namespace top
-} // namespace nnvm
-#endif // NNVM_TOP_VISION_YOLO_REORG_H_
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unittest cases for AlterOpLayout pass"""
-from nnvm import symbol as sym
-from nnvm.compiler import graph_attr
-from nnvm.top import registry as reg
-import nnvm.graph as graph
-
-def get_layouts(g):
- ldict = {}
- vlayout = g.json_attr("layout")
- entry_ptr = g.index.entry_ptr
- for i, n in enumerate(g.index.nodes):
- begin, end = entry_ptr[i], entry_ptr[i + 1]
- ldict[n["name"]] = vlayout[begin:end]
- return ldict
-
-
-def test_alter_conv2d_layout():
- data = sym.Variable("data", shape=(1, 32, 512, 512))
- conv = sym.conv2d(data, name="conv", channels=16,
- kernel_size=(3,3), padding=(1,1),
- use_bias=False, layout="NCHW")
- # split here
- convs = sym.split(conv, indices_or_sections=2)
- relus = [sym.relu(x, name="relu") for x in convs]
- relu = sym.concatenate(*relus)
- flatten = sym.flatten(relu, name="flatten")
- softmax = sym.softmax(flatten, name="softmax")
- g = graph.create(softmax)
-
- g = g.apply("CorrectLayout")
- g = graph_attr.set_dtype_inputs(g, "float32")
- g = g.apply(["InferShape", "InferType"])
- layouts_origin = get_layouts(g)
-
- @reg.register_alter_op_layout("conv2d", level=100)
- def alter_conv2d_layout(attrs, inputs, tinfos):
- new_attrs = {k : attrs[k] for k in attrs.keys()}
- new_attrs["layout"] = "NCHW16c"
- new_attrs["kernel_layout"] = "NCHW16c"
- new_attrs["name"] = "conv_alter"
- return sym.conv2d(inputs[0], inputs[1], **new_attrs)
-
- g = g.apply("AlterOpLayout")
- layouts = get_layouts(g)
-
- # check copy layouts
- for node in ["data", "relu", "flatten", "softmax", "conv_weight"]:
- assert layouts[node] == layouts_origin[node]
- assert layouts["conv_alter"] == layouts_origin["conv"]
-
-
-def test_consecutive_alter_layout():
- data = sym.Variable("data", shape=(1, 32, 512, 512))
- pool1 = sym.global_avg_pool2d(data, name="global_avg_pool2d_1", layout="NCHW")
- pool2 = sym.global_avg_pool2d(pool1, name="global_avg_pool2d_2", layout="NCHW")
- relu = sym.relu(pool2, name="relu")
-
- g = graph.create(relu)
- g = g.apply("CorrectLayout")
- g = graph_attr.set_dtype_inputs(g, "float32")
- g = g.apply(["InferShape", "InferType"])
- assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW']
-
- @reg.register_alter_op_layout("global_avg_pool2d", level=100)
- def alter_global_avg_pool2d_layout(attrs, inputs, tinfos):
- new_attrs = {k : attrs[k] for k in attrs.keys()}
- new_attrs["layout"] = "NCHW16c"
- return sym.global_avg_pool2d(inputs[0], **new_attrs)
-
- g = g.apply("AlterOpLayout")
-
- # pool1 get replaced - output layout of pool1 is not recorded
- # pool2 get replaced - input layout of pool2 is not recorded
- # thus the second entry must be undefined - it can neither recover from pool1's output,
- # nor from pool2's input.
- assert g.json_attr("layout") == ['NCHW', '__undef__', 'NCHW', 'NCHW']
-
-
-def test_alter_func_return_none():
- data = sym.Variable("data", shape=(1, 32, 512, 512))
- pool1 = sym.global_max_pool2d(data, name="pool1", layout="NCHW")
- pool2 = sym.global_max_pool2d(pool1, name="pool2", layout="NCHW")
- relu = sym.relu(pool2, name="relu")
-
- g = graph.create(relu)
- g = g.apply("CorrectLayout")
- g = graph_attr.set_dtype_inputs(g, "float32")
- g = g.apply(["InferShape", "InferType"])
- assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW']
-
- @reg.register_alter_op_layout("global_max_pool2d", level=100)
- def alter_global_max_pool2d_layout(attrs, inputs, tinfos):
- return None
-
- g = g.apply("AlterOpLayout")
-
- # alter func return none, nothing get replaced,
- # the layouts should remain the same
- assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW']
-
-
-if __name__ == "__main__":
- test_alter_conv2d_layout()
- test_consecutive_alter_layout()
- test_alter_func_return_none()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test task extraction for autotvm"""
-
-import nnvm.testing
-import nnvm.compiler
-from tvm import autotvm
-
-def get_network(name, batch_size):
- """Get the symbol definition and random weight of a network"""
- input_shape = (batch_size, 3, 224, 224)
- output_shape = (batch_size, 1000)
-
- if name == 'resnet-18':
- net, params = nnvm.testing.resnet.get_workload(num_layers=18, batch_size=batch_size)
- elif name == 'mobilenet':
- net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
- elif name == 'squeezenet v1.1':
- net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
- elif name == 'vgg-16':
- net, params = nnvm.testing.vgg.get_workload(num_layers=16, batch_size=batch_size)
- elif name == 'dcgan':
- net, params = nnvm.testing.dcgan.get_workload(batch_size=batch_size)
- input_shape = (batch_size, 100)
- else:
- raise ValueError("Unsupported network: " + name)
-
- return net, params, input_shape, output_shape
-
-def test_task_extraction():
- target = 'llvm'
- dtype = 'float32'
-
- net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1)
- tasks = autotvm.task.extract_from_graph(net, target=target,
- shape={'data': input_shape}, dtype=dtype,
- symbols=(nnvm.sym.conv2d,))
- assert len(tasks) == 12
-
- net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1)
- tasks = autotvm.task.extract_from_graph(net, target=target,
- shape={'data': input_shape}, dtype=dtype,
- symbols=(nnvm.sym.dense,))
- assert len(tasks) == 1
-
- net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1)
- tasks = autotvm.task.extract_from_graph(net, target=target,
- shape={'data': input_shape}, dtype=dtype,
- symbols=(nnvm.sym.conv2d, nnvm.sym.dense))
- assert len(tasks) == 13
-
- net, params, input_shape, out_shape = get_network('mobilenet', batch_size=1)
- tasks = autotvm.task.extract_from_graph(net, target=target,
- shape={'data': input_shape}, dtype=dtype,
- symbols=(nnvm.sym.conv2d, nnvm.sym.dense))
- assert len(tasks) == 20
-
- net, params, input_shape, out_shape = get_network('dcgan', batch_size=1)
- tasks = autotvm.task.extract_from_graph(net, target=target,
- shape={'data': input_shape}, dtype=dtype,
- symbols=(nnvm.sym.conv2d_transpose,))
- assert len(tasks) == 4
-
-if __name__ == '__main__':
- test_task_extraction()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import tvm
-from tvm.contrib import graph_runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.compiler.build_module import _run_graph, precompute_prune
-
-def test_compile():
- x = sym.Variable("x")
- y = sym.Variable("y")
- z = sym.exp(y + x)
- shape = (10, 128)
- dtype = tvm.float32
- shape_dict = {"x": shape, "y": shape}
- def verify(graph, lib):
- m = graph_runtime.create(graph, lib, tvm.cpu(0))
- # get member functions
- set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
- na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- nb = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- # set inputs
- set_input("x", na)
- set_input("y", nb)
- # execute
- run()
- # get outputs
- out = tvm.nd.empty(shape, dtype)
- get_output(0, out)
- tvm.testing.assert_allclose(
- out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
-
- graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
- assert graph.index.num_nodes == 3
- verify(graph, lib)
-
- with nnvm.compiler.build_config(opt_level=0):
- graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
- # print(graph.ir())
- assert graph.index.num_nodes == 4
- verify(graph, lib)
-
-def test_run():
- x = sym.Variable("x")
- y = sym.Variable("y")
- z = sym.exp(y + x)
- shape = (10, 10)
- dtype = tvm.float32
- nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- res = _run_graph(z, {"x": nx, "y": ny})
- tvm.testing.assert_allclose(
- res[0].asnumpy(), np.exp(nx.asnumpy() + ny.asnumpy()))
-
-
-def test_precompute_prune():
- x = sym.Variable("x") + 1
- a = sym.Variable("a")
- y = sym.Variable("y")
- z = y + x + a
- shape = (10, 10)
- dtype = tvm.float32
- nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- params = {"x": nx, "a": na}
- graph, lib, params = nnvm.compiler.build(
- z, "llvm", shape={"y": ny.shape}, params=params)
- assert graph.index.num_nodes == 4
- m = graph_runtime.create(graph, lib, tvm.cpu(0))
- params["y"] = ny
- res = tvm.nd.empty(shape)
- m["load_params"](nnvm.compiler.save_param_dict(params))
- m.run()
- out = m.get_output(0, out=res)
- tvm.testing.assert_allclose(
- res.asnumpy(), nx.asnumpy() + 1 + ny.asnumpy() + na.asnumpy())
-
-
-def test_dtypes():
- x = sym.Variable("x")
- y = sym.relu(x)
- dshape = (1, 3, 32, 32)
- oshape = dshape
- for dtype in ['float32', 'float64', 'int32', 'int16', 'int8', 'int64']:
- graph, lib, _ = nnvm.compiler.build(y, 'llvm', {"x": dshape}, dtype=dtype)
- m = graph_runtime.create(graph, lib, tvm.cpu())
- if 'float' in dtype:
- data = np.random.uniform(size=dshape).astype(dtype)
- elif 'int' in dtype:
- data = np.random.randint(-127, 127, dshape).astype(dtype)
- m.run(x=data)
- data = (data > 0) * data
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- tvm.testing.assert_allclose(out.asnumpy(), data, atol=1e-5, rtol=1e-5)
-
-def test_ndarray_output():
- x = sym.Variable("x")
- y = sym.Variable("y")
- z = x + y
- shape = (10, 10)
- dtype = tvm.float32
- nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- params = {"x": nx, "ny": ny}
- graph, lib, params = nnvm.compiler.build(
- z, "llvm", shape={"y": ny.shape, "x": nx.shape}, params=params)
- m = graph_runtime.create(graph, lib, tvm.cpu(0))
- m.set_input("x", nx)
- m.set_input("y", ny)
- m.run()
- out = m.get_output(0)
- tvm.testing.assert_allclose(
- out.asnumpy(), nx.asnumpy() + ny.asnumpy())
-
-def test_ndarray_input():
- x = sym.Variable("x")
- y = sym.Variable("y")
- z = x + y
- shape = (10, 10)
- dtype = tvm.float32
- nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- params = {"x": nx, "ny": ny}
- graph, lib, params = nnvm.compiler.build(
- z, "llvm", shape={"y": ny.shape, "x": nx.shape}, params=params)
- m = graph_runtime.create(graph, lib, tvm.cpu(0))
- m.set_input("x", nx)
- m.set_input("y", ny)
- in_x = tvm.nd.empty(shape, dtype)
- in_y = tvm.nd.empty(shape, dtype)
- m.get_input("x", in_x)
- m.get_input("y", in_y)
- tvm.testing.assert_allclose(nx.asnumpy(), in_x.asnumpy())
- tvm.testing.assert_allclose(ny.asnumpy(), in_y.asnumpy())
- in_nx = m.get_input("x")
- in_ny = m.get_input("y")
- tvm.testing.assert_allclose(nx.asnumpy(), in_nx.asnumpy())
- tvm.testing.assert_allclose(ny.asnumpy(), in_ny.asnumpy())
-
-def test_num_outputs():
- x = sym.Variable('x')
- z = sym.split(x, indices_or_sections=5, axis=1)
- shape = (10, 10)
- dtype = tvm.float32
- nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- params = {"x": nx}
- graph, lib, params = nnvm.compiler.build(
- z, "llvm", shape={"x": nx.shape}, params=params)
- m = graph_runtime.create(graph, lib, tvm.cpu(0))
- assert m.get_num_outputs() == 5
-
-if __name__ == "__main__":
- test_precompute_prune()
- test_compile()
- test_run()
- test_dtypes()
- test_ndarray_output()
- test_ndarray_input()
- test_num_outputs()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-
-def test_compile_cache():
- x = sym.Variable("x")
- y = sym.Variable("y")
- z = sym.exp(y + x)
- shape = (10, 1)
- dtype = tvm.float32
- shape_dict = {"x": shape, "y": shape}
- def verify(graph, lib):
- m = graph_runtime.create(graph, lib, tvm.cpu(0))
- # get member functions
- na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- nb = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
- m.run(x=na, y=nb)
- # get outputs
- out = m.get_output(0, tvm.nd.empty(shape, dtype))
- tvm.testing.assert_allclose(
- out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
-
- engine = nnvm.compiler.engine
- graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
- inputs = [tvm.placeholder((10,)), tvm.placeholder((10,))]
-
- gkey = nnvm.compiler.graph_key(nnvm.graph.create(z), inputs, "llvm")
- gkey2 = nnvm.compiler.graph_key(nnvm.graph.create(z), inputs + inputs, "llvm")
- gf = engine[gkey]
- assert gf is not None
- assert engine[gkey2] is None
- graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
- assert graph.index.num_nodes == 3
- verify(graph, lib)
- # Test various set external cache
- engine.clear_cache()
- engine[gkey] = gf
-
-if __name__ == "__main__":
- test_compile_cache()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unittest cases for fold_axis"""
-import tvm
-import nnvm
-import nnvm.testing.resnet
-import numpy as np
-from nnvm import symbol as sym
-from nnvm.compiler import graph_util, graph_attr
-
-def test_fold_axis_conv():
- # Before simplify
- def before(x, conv_weight, conv_bias, in_scale, out_scale, channels):
- x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
- y = sym.conv2d(x, conv_weight, conv_bias,
- channels=channels,
- kernel_size=(3, 3),
- padding=(1, 1),
- name="conv")
- y = sym.relu(y)
- y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2)
- return y
-
- def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels):
- conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3)
- conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
- conv_bias = conv_bias * out_scale
- y = sym.conv2d(x,
- conv_weight,
- conv_bias,
- channels=channels,
- kernel_size=(3, 3),
- padding=(1, 1),
- name="conv")
- y = sym.relu(y)
- return y
-
- def check(shape, channels):
- x = sym.Variable("x") + 1
- weight = sym.Variable("weight")
- bias = sym.Variable("bias")
- in_scale = sym.Variable("in_scale")
- out_scale = sym.Variable("out_scale")
- y1 = before(x, weight, bias, in_scale, out_scale, channels)
- y2 = expected(x, weight, bias, in_scale, out_scale, channels)
- ishape = {"x": shape, "out_scale": (channels,), "in_scale": (shape[1],)}
- g1 = nnvm.graph.create(y1)
- g2 = nnvm.graph.create(y2)
- graph_attr.set_shape_inputs(g1, ishape)
- g1 = g1.apply("InferShape").apply("FoldScaleAxis")
- # assert graph equals as expected
- graph_util.check_graph_equal(g1, g2)
-
- check((2, 4, 10, 10), 2)
-
-def test_fold_axis_depthwise_conv():
- # Before simplify
- def before(x, conv_weight, conv_bias, in_scale, out_scale, channels):
- x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
- y = sym.conv2d(x, conv_weight, conv_bias,
- channels=channels,
- kernel_size=(3, 3),
- padding=(1, 1),
- groups=54,
- name="depthiwise_conv")
- y = sym.relu(y)
- y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2)
- return y
-
- def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels):
- conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3)
- conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=3)
- conv_bias = conv_bias * out_scale
- y = sym.conv2d(x,
- conv_weight,
- conv_bias,
- channels=channels,
- kernel_size=(3, 3),
- padding=(1, 1),
- groups=54,
- name="depthiwise_conv")
- y = sym.relu(y)
- return y
-
- def check(shape, channels):
- x = sym.Variable("x") + 1
- weight = sym.Variable("weight")
- bias = sym.Variable("bias")
- in_scale = sym.Variable("in_scale")
- out_scale = sym.Variable("out_scale")
- y1 = before(x, weight, bias, in_scale, out_scale, channels)
- y2 = expected(x, weight, bias, in_scale, out_scale, channels)
- ishape = {"x": shape, "out_scale": (channels,), "in_scale": (shape[1],)}
- g1 = nnvm.graph.create(y1)
- g2 = nnvm.graph.create(y2)
- graph_attr.set_shape_inputs(g1, ishape)
- g1 = g1.apply("InferShape").apply("FoldScaleAxis")
- # assert graph equals as expected
- graph_util.check_graph_equal(g1, g2)
-
- check((1, 54, 63, 127), 54)
-
-def test_fold_fail():
- # Before simplify
- def before(x, scale, channels):
- y = sym.conv2d(x,
- channels=channels,
- kernel_size=(3, 3),
- padding=(1, 1),
- name="conv")
- y = y * sym.expand_dims(scale, axis=1, num_newaxis=1)
- return y
-
- def check(shape, channels):
- x = sym.Variable("x")
- bias = sym.Variable("bias")
- scale = sym.Variable("scale")
- y1 = before(x, scale, channels)
- ishape = {"x": shape, "scale": (channels,), "bias": (channels,)}
- g1 = nnvm.graph.create(y1)
- graph_attr.set_shape_inputs(g1, ishape)
- g2 = g1.apply("InferShape").apply("FoldScaleAxis")
- # assert graph equals as expected
- graph_util.check_graph_equal(g1, g2)
-
- check((2, 10, 10, 10), 10)
-
-
-def test_fold_resnet():
- batch_size = 1
- num_classes = 1000
- image_shape = (3, 224, 224)
- data_shape = (batch_size,) +image_shape
- net, params = nnvm.testing.resnet.get_workload(
- batch_size=1, image_shape=image_shape)
- ishape = {"data" : data_shape}
- graph = nnvm.graph.create(net)
- data = np.random.uniform(size=data_shape).astype("float32")
- # Initial pass do shape type inference
- shape, _ = graph_util.infer_shape(graph, **ishape)
- ishape.update(zip(graph.index.input_names, shape))
-
- def run_prune(graph, params, opt_level):
- # Apply optimization
- with nnvm.compiler.build_config(opt_level=0):
- graph = nnvm.compiler.optimize(graph, ishape)
- graph, params = nnvm.compiler.build_module.precompute_prune(graph, params)
- params["data"] = data
- return nnvm.compiler.build_module._run_graph(graph, params)
-
- x = run_prune(graph, params, 0)
- y = run_prune(graph, params, 3)
- tvm.testing.assert_allclose(y[0].asnumpy(), x[0].asnumpy())
-
-
-if __name__ == "__main__":
- test_fold_resnet()
- test_fold_axis_conv()
- test_fold_fail()
- test_fold_axis_depthwise_conv()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unittest cases for graph pass"""
-import nnvm
-import nnvm.compiler
-from nnvm import symbol as sym
-from nnvm.compiler import graph_util, graph_attr
-
-def test_infer_attr():
- x = sym.Variable("x")
- y = x * 2
- g = nnvm.graph.create(y)
- ishape, oshape = graph_util.infer_shape(g, x=(10,20))
- assert tuple(oshape[0]) == (10, 20)
-
- itype, otype = graph_util.infer_dtype(g, x="float32")
- assert otype[0] == "float32"
-
-if __name__ == "__main__":
- test_infer_attr()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime as runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-
-def get_sym(layout, kernel_layout, channels):
- data = sym.Variable(name="data")
- data = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1),
- layout=layout, kernel_layout=kernel_layout, use_bias=True)
- data = sym.max_pool2d(data=data, pool_size=(2, 2), strides=(2, 2), layout=layout)
- data = sym.upsampling(data=data, scale=2, layout=layout)
- softmax_axis = 1
- if layout == "NHWC":
- softmax_axis = 3
- data = sym.softmax(data=data, axis=softmax_axis)
- return data
-
-
-def build_and_run(sym, params, data, out_shape):
- ctx = tvm.cpu(0)
- graph, lib, params = nnvm.compiler.build(sym, "llvm", shape={"data":data.shape}, params=params)
- module = runtime.create(graph, lib, ctx)
- module.set_input(**params)
- module.set_input("data", data)
- module.run()
- out = module.get_output(0, tvm.nd.empty(out_shape))
- return out.asnumpy()
-
-
-def test_nhwc():
- data_shape = (1, 3, 224, 224)
- out_channel = 8
- nchw_sym = get_sym("NCHW", "OIHW", out_channel)
- nhwc_sym = get_sym("NHWC", "HWIO", out_channel)
- conv_weight = np.random.uniform(-1, 1, (out_channel, 3, 3, 3)).astype(np.float32)
- conv_bias = np.random.uniform(-1, 1, (out_channel)).astype(np.float32)
- nchw_params = {
- "conv2d0_weight" : tvm.nd.array(conv_weight, ctx=tvm.cpu(0)),
- "conv2d0_bias" : tvm.nd.array(conv_bias, ctx=tvm.cpu(0))
- }
- nhwc_params = {
- "conv2d1_weight" : tvm.nd.array(conv_weight.transpose(2, 3, 1, 0), ctx=tvm.cpu(0)),
- "conv2d1_bias" : tvm.nd.array(conv_bias, ctx=tvm.cpu(0))
- }
-
- data = np.random.uniform(-1, 1, data_shape).astype(np.float32)
- oshape = (1, out_channel, 224, 224)
- oshape_nhwc = (1, 224, 224, out_channel)
- nchw_output = build_and_run(nchw_sym, nchw_params, data, oshape)
- nhwc_output = build_and_run(nhwc_sym, nhwc_params, data.transpose(0, 2, 3, 1), oshape_nhwc)
- tvm.testing.assert_allclose(nchw_output, nhwc_output.transpose(0, 3, 1, 2), rtol=1e-5, atol=1e-5)
-
-
-if __name__ == "__main__":
- test_nhwc()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm
-import numpy as np
-import tvm
-import topi.testing
-from tvm.contrib import graph_runtime
-from nnvm import symbol as sym
-from nnvm.compiler import graph_util, graph_attr
-from nnvm.testing import ctx_list, utils
-
-def test_ewise_injective():
- x = sym.Variable("x")
- y = x * 2
- y = sym.flatten(y) + 1
- dshape = (10, 2, 3)
- shape_dict = {"x": dshape}
- dtype = "float32"
- target = "llvm"
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- assert graph.index.num_nodes == 2
- m = graph_runtime.create(graph, lib, ctx)
- x_np = np.random.uniform(size=dshape).astype(dtype)
- m.run(x=x_np)
- out = m.get_output(0, tvm.nd.empty((10, 6)))
- tvm.testing.assert_allclose(
- out.asnumpy(), x_np.reshape(out.shape) * 2 + 1,
- atol=1e-5, rtol=1e-5)
-
-
-def test_conv_ewise_injective():
- x = sym.Variable("x")
- y = sym.conv2d(x, channels=32, kernel_size=(3, 3), groups=32,
- name="y", padding=(1,1))
- y = sym.flatten(y + 1) + 1
- dtype = "float32"
- dshape = (1, 32, 18, 18)
- kshape = (32, 1, 3, 3)
- oshape = (1, 32* 18 * 18)
- shape_dict = {"x": dshape}
-
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- # print(graph.ir(join_entry_attrs=["shape"]))
- assert graph.index.num_nodes == 5
- # set input
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
- m.run(x=data, y_weight=kernel, y_bias=bias)
- # get output
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- c_np = topi.testing.depthwise_conv2d_python_nchw(
- data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
- c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + 1
- c_np = c_np.reshape(c_np.shape[0], np.prod(c_np.shape[1:])) + 1
- tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_injective_reduce_injective():
- x = sym.Variable("x")
- x = sym.flatten(x) + 1
- y = sym.sum(x, axis=1)
- dtype = "float32"
- dshape = (32, 1, 18, 18)
- shape_dict = {"x": dshape}
-
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- assert graph.index.num_nodes == 2
- data = np.random.uniform(size=dshape).astype(dtype)
- m.run(x=data)
- c_np = np.sum(data.reshape(32, 18 * 18) + 1, axis=1)
- # get output
- out = m.get_output(0, tvm.nd.empty(c_np.shape, dtype))
- tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_injective_conv2d():
- channels = 16
- data = sym.Variable(name="data")
- pool = sym.global_avg_pool2d(data=data)
- weight = sym.reshape(pool, shape=[1, channels, 1, 1])
- residual = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1),
- layout="NCHW", kernel_layout="OIHW", use_bias=False, name="conv")
- net = weight * data + residual
- size = 56
- dtype="float32"
- dshape = (1, channels, size, size)
- kshape = (channels, channels, 3, 3)
- oshape = dshape
- shape_dict = {"data": dshape}
-
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(net, target, shape_dict)
- # data, global_avg_pool, conv weight, conv op, fused elemwise add
- assert graph.index.num_nodes == 5
-
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- m = graph_runtime.create(graph, lib, ctx)
- m.run(data=data, conv_weight=kernel)
- # get output
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- residual = topi.testing.conv2d_nchw_python(
- data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
- weight = np.mean(data.asnumpy(), axis=(2, 3))
- c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual
- tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_concatenate_conv2d():
- ch = 3
- size = 8
- data = sym.Variable(name="data")
- concat = sym.concatenate(data, data, axis=1)
- conv = sym.conv2d(data=concat, kernel_size=(1,1), channels=ch*2, use_bias=False, name="conv")
- net = sym.elemwise_add(concat, conv)
-
- dtype="float32"
- dshape = (1, ch, size, size)
- kshape = (ch*2, ch*2, 1, 1)
- oshape = (1, ch*2, size, size)
- shape_dict = {"data": dshape}
-
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(net, target, shape_dict)
- # data, conv weight, conv op, concat
- assert graph.index.num_nodes == 4
-
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- m = graph_runtime.create(graph, lib, ctx)
- m.run(data=data, conv_weight=kernel)
- # get output
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-
- concat = np.concatenate((data.asnumpy(), data.asnumpy()), axis=1)
- conv = topi.testing.conv2d_nchw_python(
- concat, kernel.asnumpy(), (1,1), 'SAME')
- ref = concat + conv
- tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
-
-
-def test_residual_block_layout_transform():
- ch = 16
- size = 32
- data = sym.Variable(name="data")
- conv1 = sym.conv2d(data=data, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv1")
- layout_transform1 = sym.__layout_transform__(data=conv1, src_layout="NCHW", dst_layout="NCHW8c")
- layout_transform2 = sym.__layout_transform__(data=layout_transform1, src_layout="NCHW8c", dst_layout="NCHW")
- conv2 = sym.conv2d(data=conv1, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv2")
- elemwise_sum = sym.elemwise_add(layout_transform2, conv2)
- out = sym.relu(elemwise_sum)
-
- dtype="float32"
- dshape = (1, ch, size, size)
- kshape = (ch, ch, 3, 3)
- oshape = (1, ch, size, size)
- shape_dict = {"data": dshape}
-
- target = "llvm" # only test on llvm since it involves NCHW8c layout
- ctx = tvm.context(target, 0)
- graph, lib, _ = nnvm.compiler.build(out, target, shape_dict)
- # data, conv1 weight, conv1, layout transform + elemwise add + relu, conv2 weight, conv2 op
- assert graph.index.num_nodes == 6
-
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- kernel1 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- kernel2 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- m = graph_runtime.create(graph, lib, ctx)
- m.run(data=data, conv1_weight=kernel1, conv2_weight=kernel2)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-
- conv1 = topi.testing.conv2d_nchw_python(
- data.asnumpy(), kernel1.asnumpy(), (1,1), 'SAME')
- conv2 = topi.testing.conv2d_nchw_python(
- conv1, kernel2.asnumpy(), (1,1), 'SAME')
- ref = np.maximum(conv1 + conv2, 0)
- tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
-
-
-def build_and_run(sym, params, data, out_shape, target, ctx, opt_level=2):
- with nnvm.compiler.build_config(opt_level=opt_level):
- graph, lib, params = nnvm.compiler.build(sym, target, shape={"data":data.shape}, params=params)
- module = graph_runtime.create(graph, lib, ctx)
- module.set_input(**params)
- module.set_input("data", data)
- module.run()
- out = module.get_output(0, tvm.nd.empty(out_shape))
- return out.asnumpy(), graph
-
-
-def test_fuse_conv2d_elu():
- def elu(data):
- return -0.5 * sym.relu(1 - sym.exp(data)) + sym.relu(data)
-
- def get_sym(out_channel):
- data = sym.Variable(name="data")
- data = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channel, padding=(1, 1),
- layout="NCHW", kernel_layout="OIHW", use_bias=True)
- data = sym.batch_norm(data)
- data = elu(data)
- return data
-
- in_channel = 8
- out_channel = 16
- size = 64
- dshape = (1, in_channel, size, size)
- oshape = (1, out_channel, size, size)
- data = np.random.uniform(-1, 1, dshape).astype(np.float32)
-
- for target, ctx in ctx_list():
- sym1 = get_sym(out_channel)
- sym2 = get_sym(out_channel)
- _, params1 = utils.create_workload(sym1, 1, dshape[1:], seed=0)
- _, params2 = utils.create_workload(sym2, 1, dshape[1:], seed=0)
- output1, g1 = build_and_run(sym1, params1, data, oshape, target, ctx, opt_level=2)
- output2, g2 = build_and_run(sym2, params2, data, oshape, target, ctx, opt_level=0)
- tvm.testing.assert_allclose(output1, output2, rtol=1e-5, atol=1e-5)
- # data, conv weight, bias, batch norm gamma, batch norm beta, conv op
- assert g1.index.num_nodes == 6
-
-if __name__ == "__main__":
- test_injective_reduce_injective()
- test_ewise_injective()
- test_conv_ewise_injective()
- test_fuse_conv2d_elu()
- test_injective_conv2d()
- test_concatenate_conv2d()
- test_residual_block_layout_transform()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import nnvm
-import nnvm.compiler.optimizer as optimizer
-import nnvm.compiler.lr_scheduler as lr_scheduler
-
-from nnvm.testing.config import ctx_list
-from tvm.contrib import graph_runtime
-
-
-def helper(symbol, inputs, params, update_func, run_times, target, ctx, dtype="float32"):
- ishapes = {}
- np_inputs = {}
- params_dict = {}
- for (name, shape, s) in inputs:
- ishapes.update({name: shape})
- np_inputs.update({name: np.random.uniform(size=shape).astype(dtype)})
- for (name, shape, s) in params:
- np_inputs.update({name: np.random.uniform(size=shape).astype(dtype)})
- params_dict.update({name: np_inputs[name]})
-
- graph, lib, rt_params = nnvm.compiler.build(symbol, target, shape=ishapes)
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input(**np_inputs)
- m.set_input(**rt_params)
- for _ in range(run_times):
- m.run()
- y_np = update_func(**np_inputs)
- out = m.get_output(0, tvm.nd.empty(y_np.shape, dtype))
- tvm.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
-
-
-def test_sgd():
- for target, ctx in ctx_list():
- data = nnvm.sym.Variable("data")
- weight = nnvm.sym.Variable("weight")
- out = nnvm.sym.elemwise_mul(data, weight ** 2)
-
- dshape = (1, 2, 3)
- wshape = dshape
-
- base_lr = 0.1
- lr_factor = 0.5
- rescale_grad = 0.2
- wd = 0.1
- clip_gradient = 0.25
-
- scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr, step=1, factor=lr_factor)
- opt = optimizer.SGD(learning_rate=base_lr, lr_scheduler=scheduler,
- rescale_grad=rescale_grad, clip_gradient=clip_gradient,
- wd=wd)
- opt_sym = opt.minimize(out, var=weight)
-
- inputs = [("data", dshape, data)]
- params = [("weight", wshape, weight)]
-
- def update_func(data, weight):
- gradient_0 = data * 2 * weight * rescale_grad
- gradient_0 = np.clip(gradient_0, -clip_gradient, clip_gradient)
- weight_0 = weight - base_lr * lr_factor * (gradient_0 + wd * weight)
- gradient_1 = data * 2 * weight_0 * rescale_grad
- gradient_1 = np.clip(gradient_1, -clip_gradient, clip_gradient)
- weight_1 = weight_0 - base_lr * (lr_factor ** 2) * (gradient_1 + wd * weight_0)
- return weight_1
-
- helper(opt_sym, inputs, params, update_func, 2, target, ctx)
-
-
-
-def test_adam():
- for target, ctx in ctx_list():
- data = nnvm.sym.Variable("data")
- weight = nnvm.sym.Variable("weight")
- out = nnvm.sym.elemwise_mul(data, weight ** 2)
-
- dshape = (1, 2, 3)
- wshape = dshape
-
- base_lr = 0.1
- beta1 = 0.9
- beta2 = 0.999
- epsilon = 1e-8
- lr_factor = 0.5
- rescale_grad = 0.2
- wd = 0.1
- clip_gradient = 0.25
-
- scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr, step=1, factor=lr_factor)
- opt = optimizer.Adam(learning_rate=base_lr, beta1=beta1, beta2=beta2, epsilon=epsilon,
- lr_scheduler=scheduler, rescale_grad=rescale_grad,
- clip_gradient=clip_gradient, wd=wd)
- opt_sym = opt.minimize(out, var=weight)
-
- inputs = [("data", dshape, data)]
- params = [("weight", wshape, weight)]
-
- def update_func(data, weight):
- rate_0 = np.sqrt(1 - beta2) / (1 - beta1)
- lr_0 = base_lr * lr_factor * rate_0
- gradient_0 = data * 2 * weight * rescale_grad
- gradient_0 = np.clip(gradient_0, -clip_gradient, clip_gradient)
- m_0 = (1 - beta1) * gradient_0
- v_0 = (1 - beta2) * (gradient_0 ** 2)
- weight_0 = weight - lr_0 * (m_0 / (np.sqrt(v_0) + epsilon) + wd * weight)
- rate_1 = np.sqrt(1 - beta2 ** 2) / (1 - beta1 ** 2)
- lr_1 = base_lr * (lr_factor ** 2) * rate_1
- gradient_1 = data * 2 * weight_0 * rescale_grad
- gradient_1 = np.clip(gradient_1, -clip_gradient, clip_gradient)
- m_1 = beta1 * m_0 + (1 - beta1) * gradient_1
- v_1 = beta2 * v_0 + (1 - beta2) * (gradient_1 ** 2)
- weight_1 = weight_0 - lr_1 * (m_1 / (np.sqrt(v_1) + epsilon) + wd * weight_0)
- return weight_1
-
- helper(opt_sym, inputs, params, update_func, 2, target, ctx)
-
-if __name__ == "__main__":
- test_sgd()
- test_adam()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import os
-import numpy as np
-import nnvm.compiler
-import tvm
-import json
-import base64
-from tvm._ffi.base import py_str
-from tvm import rpc
-from tvm.contrib import util, graph_runtime
-
-
-def test_save_load():
- x = np.random.uniform(size=(10, 2)).astype("float32")
- y = np.random.uniform(size=(1, 2, 3)).astype("float32")
- x[:] = 1
- y[:] = 1
- params = {"x": x, "y": y}
- param_bytes = nnvm.compiler.save_param_dict(params)
- assert isinstance(param_bytes, bytearray)
- param2 = nnvm.compiler.load_param_dict(param_bytes)
- assert len(param2) == 2
- np.testing.assert_equal(param2["x"].asnumpy(), x)
- np.testing.assert_equal(param2["y"].asnumpy(), y)
-
-
-def test_ndarray_reflection():
- x = np.random.uniform(size=(10, 2)).astype("float32")
- xx = tvm.nd.array(x)
- xnode = tvm.make.node("NDArrayWrapper", name="xx", array=xx)
- xnode2 = tvm.make.node("NDArrayWrapper", name="x2", array=xx)
- assert xnode.array.same_as(xx)
- json_str = tvm.save_json([xnode, xnode2])
- json_dict = json.loads(json_str)
- b64_str = json_dict["b64ndarrays"][0]
- decoded = py_str(base64.b64encode(base64.b64decode(b64_str)))
- assert b64_str == decoded
- xlist = tvm.load_json(json_str)
- np.testing.assert_equal(xlist[0].array.asnumpy(), xx.asnumpy())
- assert xlist[1].array == xlist[0].array
-
-
-def test_bigendian_rpc_param():
- """Test big endian rpc when there is a PowerPC RPC server available"""
- host = os.environ.get("TVM_POWERPC_TEST_HOST", None)
- port = os.environ.get("TVM_POWERPC_TEST_PORT", 9090)
- if host is None:
- return
-
- def verify_nnvm(remote, target, shape, dtype):
- x = nnvm.sym.Variable("x")
- y = x + 1
- graph, lib, _ = nnvm.compiler.build(
- y, target,
- shape={"x": shape},
- dtype={"x": dtype})
-
- temp = util.tempdir()
- path_dso = temp.relpath("dev_lib.o")
- lib.save(path_dso)
- remote.upload(path_dso)
- lib = remote.load_module("dev_lib.o")
- a = np.random.randint(0, 256, size=shape).astype(dtype)
- a[:] = 1
- params = {"x" : a}
- ctx = remote.cpu(0)
- m = graph_runtime.create(graph, lib, ctx)
- # uses save param_dict
- m.load_params(nnvm.compiler.save_param_dict(params))
- m.run()
- out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx))
- tvm.testing.assert_allclose(a + 1, out.asnumpy())
-
- print("Test RPC connection to PowerPC...")
- remote = rpc.connect(host, port)
- target = "llvm -mtriple=powerpc-linux-gnu"
- for dtype in ["float32", "float64", "int32", "int8"]:
- verify_nnvm(remote, target, (10,), dtype)
-
-
-
-if __name__ == "__main__":
- test_ndarray_reflection()
- test_save_load()
- test_bigendian_rpc_param()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import tvm
-from tvm import rpc
-from tvm.contrib import util, graph_runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-import numpy as np
-import time
-
-def test_rpc_executor():
- host = "localhost"
- port = 9021
- server = rpc.Server(host, port, use_popen=True)
- time.sleep(1)
- x = sym.Variable("x")
- y = sym.Variable("y")
- z = sym.exp(y + x)
- shape = (10, 128)
- dtype = tvm.float32
- shape_dict = {"x": shape, "y": shape}
- tmp = util.tempdir()
- lib_name = tmp.relpath("net.o")
-
- graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
- # save module
- lib.save(lib_name)
- remote = rpc.connect(host, port)
- remote.upload(lib_name)
- ctx = remote.cpu(0)
- # load remote
- rlib = remote.load_module("net.o")
-
- # Create remotemodule
- m = graph_runtime.create(graph, rlib, remote.cpu(0))
- # get member functions
- set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
- na = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
- nb = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
- # set inputs
- set_input("x", na)
- set_input("y", nb)
- # execute
- run()
- # get outputs
- out = tvm.nd.empty(shape, dtype, ctx)
- get_output(0, out)
- tvm.testing.assert_allclose(
- out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
- server.terminate()
-
-if __name__ == "__main__":
- test_rpc_executor()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unittest cases for simplify batch_norm"""
-import nnvm
-from nnvm import symbol as sym
-from nnvm.compiler import graph_util, graph_attr
-
-def test_simplify_batchnorm():
- def simple_bn(x, gamma, beta, moving_mean, moving_var,
- axis=1, epsilon=1e-5, shape=None):
- # expect = (x - moving_mean) / sym.sqrt(moving_var + eps) * gamma + beta
- scale = sym.elemwise_mul(1 / sym.sqrt(moving_var + epsilon), gamma)
- shift = sym.elemwise_add(
- sym.elemwise_mul(sym.negative(moving_mean), scale), beta)
- # for 2D
- num_newaxis=len(shape) - axis - 1
- if num_newaxis:
- scale = sym.expand_dims(scale, axis=1, num_newaxis=num_newaxis)
- shift = sym.expand_dims(shift, axis=1, num_newaxis=num_newaxis)
- return x * scale + shift
-
-
- # Before simplify
- def check(dim, axis, nstep):
- eps = 0.01
- x = sym.Variable("x") + 1
- beta = sym.Variable("beta")
- gamma = sym.Variable("gamma")
- moving_var = sym.Variable("moving_var")
- moving_mean = sym.Variable("moving_mean")
- y1, y2 = x, sym.Variable("xx") + 1
- ishape = {"x": tuple(10 for i in range(dim))}
- for i in range(nstep):
- y1 = sym.batch_norm(
- y1 + 1, gamma, beta, moving_mean, moving_var, epsilon=eps, axis=axis)
- y1 = sym.dropout(y1)
- y2 = simple_bn(y2 + 1, gamma, beta, moving_mean, moving_var,
- epsilon=eps, axis=axis, shape=ishape["x"])
- g = nnvm.graph.create(y1)
- g2 = nnvm.graph.create(y2)
- graph_attr.set_shape_inputs(g, ishape)
- g1 = g.apply("InferShape").apply("SimplifyInference")
- # assert graph equals as expected
- graph_util.check_graph_equal(g1, g2)
-
- check(2, 1, 1)
- check(4, 0, 3)
- check(4, 1, 2)
-
-if __name__ == "__main__":
- test_simplify_batchnorm()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm
-from nnvm import testing
-from nnvm import to_relay
-import tvm
-from tvm.relay import transform
-from tvm.relay import create_executor
-from tvm.contrib import graph_runtime
-import numpy as np
-
-def check_model(sym, shapes, dtypes, params):
- net = nnvm.graph.create(sym)
- graph_json, mod, params = nnvm.compiler.build(
- net,
- 'llvm',
- shape=shapes,
- dtype=dtypes,
- params=params)
- nnvm_rts = graph_runtime.create(graph_json, mod, tvm.cpu(0))
- inputs = {}
- for name in shapes:
- np_array = np.random.rand(*shapes[name]).astype('float32')
- inputs[name] = tvm.nd.array(np_array)
-
- nnvm_rts.set_input(**params)
- nnvm_rts.run(**inputs)
- nnvm_out = nnvm_rts.get_output(0)
- relay_model, params = to_relay.to_relay(net, shapes, dtypes, params)
- mod = tvm.relay.Module.from_expr(relay_model)
- mod = transform.InferType()(mod)
- relay_rts = create_executor(kind='graph', mod=mod, ctx=tvm.cpu(0), target='llvm')
- inputs.update(params)
- relay_out = relay_rts.evaluate()(*list(inputs.values()))
- np.testing.assert_allclose(nnvm_out.asnumpy(), relay_out.asnumpy())
-
-# def test_mlp():
-# mlp, params = testing.mlp.get_workload(1)
-# shapes = { "data": (10, 3, 224, 224) }
-# dtypes = { "data": 'float32' }
-# check_model(mlp, shapes, dtypes, params)
-
-if __name__ == "__main__":
- test_mlp()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import tvm
-from tvm.contrib import graph_runtime
-
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-
-
-def test_update():
- w = sym.Variable("w")
- w2 = sym.Variable("w2")
- w = sym._assign(w, w + 1)
- w2 = sym._assign(w2, w + 1)
-
- dshape = (5, 3, 18, 18)
- shape_dict = {"w": dshape, "w2":dshape}
- dtype = "float32"
-
- def check(target, ctx):
- graph, lib, _ = nnvm.compiler.build(w2, target, shape_dict)
-
- m = graph_runtime.create(graph, lib, ctx)
-
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- m.set_input("w", data)
- m.run()
- out = m.get_input("w2", tvm.nd.empty(dshape, dtype))
- tvm.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 2, rtol=1e-5)
-
- m.run()
- out = m.get_input("w2", tvm.nd.empty(dshape, dtype))
- tvm.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 3, rtol=1e-5)
-
- for target, ctx in ctx_list():
- check(target, ctx)
-
-
-if __name__ == "__main__":
- test_update()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-import topi.testing
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm.testing.check_computation import check_function
-
-def test_check_function():
- # test the testing function
-
- x = sym.Variable("x")
- y = sym.Variable("y")
-
- # different styles of returning gradients from the backward function
- check_function(x + 2*y, lambda x, y: x + 2*y,
- lambda x, y, head_grads: [head_grads, 2*head_grads],
- shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
- check_function(x + 2*y, lambda x, y: x + 2*y,
- lambda x, y, head_grads: (head_grads, 2*head_grads),
- shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
- check_function(x + 2*y, lambda x, y: x + 2*y,
- lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads},
- shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
- check_function(x + 2*y, lambda x, y: x + 2*y,
- lambda x, y, head_grads: {'y': 2*head_grads},
- shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
- check_function(x + 2*y, lambda x, y: x + 2*y,
- lambda x, y, head_grads: [2*head_grads],
- grad_input_vars=[y],
- shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
- check_function(x + 2*y, lambda x, y: x + 2*y,
- lambda x, y, head_grads: 2*head_grads,
- grad_input_vars=[y],
- shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
- check_function(x + 2*y, lambda x, y: x + 2*y,
- lambda x, y, head_grads: 2*head_grads,
- grad_input_vars=[y],
- shape={'x': (1, 2), y: (1, 2)}, dtype='float64')
-
- # test just numerical gradients
- # different styles of shape and dtype passing
- check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)},
- numerical_grads=True)
- check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype='float32',
- numerical_grads=True)
- check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype={x: 'float32', 'y': 'float32'},
- numerical_grads=True)
- check_function(x + 2*y, shape=(1, 2), dtype='float32',
- numerical_grads=True)
-
- # specifying variable attributes on variable creation
- # (in this case type codes must be used)
- x = sym.Variable("x", dtype=0, shape=(1, 2))
- check_function(x + 2*y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True)
- y = sym.Variable("y", dtype=0, shape=(1, 2))
-
- # shape overriding
- def _fwd1(x, y):
- assert x.shape == (1, 1)
- assert y.shape == (1, 2)
- return x + 2*y
- check_function(x + 2*y, _fwd1, shape={x: (1, 1)})
-
- # in_range
- def _fwd2(x, y):
- assert x.shape == (100,)
- assert (x <= 0.9).all()
- assert (x >= 0.8).all()
- return x + 2*y
- check_function(x + 2*y, _fwd2, shape=(100,), in_range=(0.8, 0.9), numerical_grads=False)
- check_function(x + 2*y, _fwd2, shape=(100,), in_range={'x': (0.8, 0.9)}, numerical_grads=False)
- check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0],
- in_range={'head_grads_0': (1.0, 1.0)})
- # explicit passing of values
- check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0],
- values={'head_grads_0': np.full((1, 2), 1.0)})
-
- # check that the function reports errors
- def _check_function_must_fail(*args, **kwargs):
- error = AssertionError
- if 'error' in kwargs:
- error = kwargs['error']
- del kwargs['error']
- try:
- check_function(*args, quiet=True, **kwargs)
- except error:
- pass
- else:
- raise AssertionError("check_function didn't raise an exception")
-
- _check_function_must_fail(x + 2*y, error=ValueError)
- _check_function_must_fail(x + 2*y, lambda x, y: x + y)
- _check_function_must_fail(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0])
- _check_function_must_fail(sym.block_grad(x + 2*y), numerical_grads=True)
- _check_function_must_fail(x*x, numerical_grads=True,
- numerical_grads_params={'atol': 0.0, 'rtol': 0.0})
- _check_function_must_fail(sym.log(-x*x), numerical_grads=True, error=ValueError)
-
- # different styles of returning results from the forward function
- check_function(x + 2*y, lambda x, y: [x + 2*y], numerical_grads=False)
- _check_function_must_fail(x + 2*y, lambda x, y: [x + 2*y, x], numerical_grads=False,
- error=ValueError)
- _check_function_must_fail(x + 2*y, lambda x, y: [], numerical_grads=False,
- error=ValueError)
-
- # multiple outputs
- z = sym.Group([2*x + y, x + 2*y])
- check_function(z, lambda x, y: [2*x + y, x + 2*y])
- check_function(z, lambda x, y: (2*x + y, x + 2*y))
- check_function(z, backward=lambda x, y, head_grads: [2*head_grads[0] + head_grads[1],
- head_grads[0] + 2*head_grads[1]])
- _check_function_must_fail(z, backward=lambda x, y, head_grads: [2*head_grads[0],
- 2*head_grads[1]])
- check_function(z, backward=lambda x, y, head_grads: [head_grads[1], 2*head_grads[1]],
- in_range={'head_grads_0': (0, 0)})
- check_function(z, numerical_grads=True)
-
- z = sym.Group([sym.block_grad(2*x + y), x + 2*y])
- check_function(z, lambda x, y: [2*x + y, x + 2*y], numerical_grads=False)
- _check_function_must_fail(z, lambda x, y: [2*x + y, x + 2*y])
- _check_function_must_fail(z, numerical_grads=True)
-
- z = sym.Group([2*x + y, sym.block_grad(x + 2*y)])
- _check_function_must_fail(z, numerical_grads=True)
-
- z = sym.Group([2*x + y, x + 2*y, x, y, sym.sum(x)])
- check_function(z, lambda x, y: [2*x + y, x + 2*y, x, y, np.sum(x)])
-
- # passing additional parameters to forward and backward
- def _fwd3(x, p):
- assert p == 'v'
- return x + 1
- def _bwd3(x, p, head_grads):
- assert p == 'v'
- return head_grads
- check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'})
-
- # implicitly created variables and shape/dtype inference for inputs
- x = sym.Variable("x", shape=(2, 3), dtype=0)
- b = sym.Variable("b")
- y = sym.dense(data=x, bias=b, units=4)
- # Don't check gradients on cuda because is doesn't yet support ewise after reduce
- check_function(y, exclude_targets={'cuda'}, numerical_grads=True)
- check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True)
- check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True)
-
- x = sym.Variable("x")
- b = sym.Variable("b")
- w = sym.Variable("w")
- y = sym.dense(data=x, bias=b, weight=w, units=4)
- def _fwd_dense(x, w, b):
- return np.dot(x, w.T) + b
- check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'x': 'float32'}, numerical_grads=False)
- check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'w': 'float64'}, numerical_grads=False)
- _check_function_must_fail(y, _fwd_dense, shape={'x': (1,2)},
- dtype={'w': 'float64', 'b': 'float32'},
- numerical_grads=False,
- error=nnvm._base.NNVMError)
- # fails because no shape
- _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError)
- # ok because type is float32 by default
- check_function(y, _fwd_dense, shape={'x': (1,2)}, numerical_grads=False)
-
-def test_relu():
- x = sym.Variable("x")
- y = sym.relu(sym.leaky_relu(x, alpha=0.3) - 0.2)
-
- def forward(x):
- x = (x < 0) * x * 0.3 + (x > 0) * x - 0.2
- return (x > 0) * x
-
- def backward(head_grads, x):
- sub = (x < 0) * x * 0.3 + (x > 0) * x - 0.2
- return [(sub > 0).astype("float") * \
- ((x > 0).astype("float") + 0.3 * (x < 0).astype("float")) * head_grads]
-
- shape = {'x': (1, 3, 32, 32)}
- check_function(y, forward, backward, shape=shape)
-
-def test_prelu_nchw():
- x = sym.Variable("x")
- a = sym.Variable("a")
- y = sym.prelu(data=x, alpha=a)
-
- def forward(x, a):
- return (x < 0) * (x * a.reshape(3, 1, 1)) + (x>=0) * x
-
- shape = {'x': (1, 3, 32, 32), 'a': (3,)}
- check_function(y, forward, shape=shape)
-
-def test_prelu_nhwc():
- x = sym.Variable("x")
- a = sym.Variable("a")
- y = sym.prelu(data=x, alpha=a, axis=3)
-
- def forward(x, a):
- return (x < 0) * (x * a.reshape(1, 1, 3)) + (x>=0) * x
-
- shape = {'x': (1, 32, 32, 3), 'a': (3,)}
- check_function(y, forward, shape=shape)
-
-def test_sym_scalar_pow():
- scalar = 3
- x = sym.Variable("x")
- y = x**scalar
-
- def forward(x):
- return x**scalar
-
- def backward(head_grads, x):
- return [scalar * x**(scalar - 1) * head_grads]
-
- shape = {'x': (1, 3, 32, 32)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_scalar_sym_pow():
- scalar = 3
- x = sym.Variable("x")
- y = scalar**x
-
- def forward(x):
- return scalar**x
-
- def backward(head_grads, x):
- return [np.log(scalar) * scalar**x * head_grads]
-
- shape = {'x': (1, 3, 32, 32)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_exp():
- x = sym.Variable("x")
- y = sym.exp(x)
-
- def forward(x):
- return np.exp(x)
-
- def backward(head_grads, x):
- return [np.exp(x) * head_grads]
-
- shape = {'x': (1, 3, 32, 32)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_log():
- x = sym.Variable("x")
- y = sym.log(x)
-
- def forward(x):
- return np.log(x)
-
- def backward(head_grads, x):
- return [1. / x * head_grads]
-
- shape = {'x': (1, 3, 32, 32)}
- check_function(y, forward, backward, in_range=(0.002, 2.0), shape=shape)
-
-
-def test_tanh():
- x = sym.Variable("x")
- y = sym.tanh(x)
-
- def forward(x):
- return np.sinh(x) / np.cosh(x)
-
- def backward(head_grads, x):
- y_np = forward(x)
- return [(1 - y_np**2) * head_grads]
-
- shape = {'x': (1, 3, 32, 32)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_sigmoid():
- x = sym.Variable("x")
- y = sym.sigmoid(x)
-
- def forward(x):
- return 1.0 / (1.0 + np.exp(-x))
-
- def backward(head_grads, x):
- y_np = forward(x)
- return [y_np *(1 - y_np) * head_grads]
-
- shape = {'x': (1, 3, 32, 32)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_softmax():
- x = sym.Variable("x")
- y = sym.softmax(x)
-
- def forward(x):
- return topi.testing.softmax_python(x)
-
- def backward(head_grads, x):
- y = topi.testing.softmax_python(x)
- grad = y * (head_grads - np.sum(y * head_grads, axis=1, keepdims=True))
- return [grad]
-
- check_function(y, forward, backward,
- shape={'x': (10, 1000)}, numerical_grads=False)
- check_function(y, forward, backward,
- shape={'x': (2, 10)})
-
-
-def test_log_softmax():
- x = sym.Variable("x")
- y = sym.log_softmax(x)
-
- def forward(x):
- return topi.testing.log_softmax_python(x)
-
- def backward(head_grads, x):
- y = topi.testing.log_softmax_python(x)
- grad = head_grads - np.exp(y) * np.sum(head_grads, axis=1, keepdims=True)
- return [grad]
-
- check_function(y, forward, backward,
- shape={'x': (10, 1000)}, numerical_grads=False)
- check_function(y, forward, backward,
- shape={'x': (2, 10)})
-
-
-def test_dense():
- x = sym.Variable("x", shape=(10, 100))
- w = sym.Variable("dense_weight", shape=(3, 100))
- b = sym.Variable("dense_bias", shape=(3,))
- y = sym.dense(x, w, b, use_bias=True, units=3, name="dense")
- y = sym.flatten(y)
-
- def forward(x, dense_weight, dense_bias):
- return np.dot(x, dense_weight.T) + dense_bias
- shape = {
- 'x': (10, 100),
- 'w': (3, 100),
- 'b': (3,)
- }
- # Don't check gradients on cuda because is doesn't yet support ewise after reduce
- check_function(y, forward, shape=shape,
- exclude_targets={'cuda'}, numerical_grads=True)
- check_function(y, forward, shape=shape,
- only_targets={'cuda'}, numerical_grads=False)
-
-
-def test_batchnorm():
- x = sym.Variable("x")
- beta = sym.Variable("beta")
- gamma = sym.Variable("gamma")
- moving_var = sym.Variable("moving_var")
- moving_mean = sym.Variable("moving_mean")
- eps = 1e-5
- y = sym.batch_norm(
- x, gamma, beta, moving_mean, moving_var, epsilon=eps)
-
- def forward(x, gamma, beta, moving_mean, moving_var):
- return (x - moving_mean) / np.sqrt(moving_var + eps) * gamma + beta
-
- shape = {
- 'x': (10, 20),
- 'gamma': (20,),
- 'beta': (20,),
- 'moving_mean': (20,),
- 'moving_var': (20,)
- }
-
- check_function(y, forward, in_range=(0.001, 1.0), shape=shape)
-
-
-def verify_concatenate(ishape, axis):
- x = [sym.Variable("x%d" % i, shape=ishape[i]) for i in range(len(ishape))]
- y = sym.concatenate(*x, axis=axis) + 1
-
- def forward(**kwargs):
- return np.concatenate(list(kwargs.values()), axis=axis) + 1
-
- check_function(y, forward)
-
-
-def test_concatenate():
- verify_concatenate([(2, 3, 4), (1, 3, 4)], axis=0)
- verify_concatenate([(2, 4), (2, 7)], axis=1)
-
-
-def verify_split(ishape, indices_or_sections, axis):
- x = sym.Variable("x", shape=ishape)
- y = sym.split(x, indices_or_sections=indices_or_sections, axis=axis)
-
- def forward(x):
- return np.split(x, indices_or_sections, axis=axis)
-
- check_function(y, forward)
-
-
-def test_split():
- verify_split((2, 3), 2, axis=0)
- verify_split((5, 3), [3], axis=0)
- verify_split((5, 9, 3), [3, 4], axis=1)
-
-def verify_strided_slice(ishape, begin, end, strideinp=None):
- stride = strideinp if strideinp else [1, 1, 1]
- x = sym.Variable("x", shape=ishape)
- if strideinp:
- y = sym.strided_slice(x, begin = begin, end = end, stride = stride) + 1
- else:
- y = sym.strided_slice(x, begin = begin, end = end) + 1
-
- for i in range(len(begin), 3):
- begin.append(0)
- for i in range(len(end), 3):
- end.append(ishape[i])
-
- def test_forward(x):
- return x[begin[0]:end[0]:stride[0],
- begin[1]:end[1]:stride[1], begin[2]:end[2]:stride[2]] + 1
-
- check_function(y, test_forward)
-
-def test_strided_slice():
- verify_strided_slice((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2])
- verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1])
- verify_strided_slice((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1])
- verify_strided_slice((3, 4, 3), [1, 0, 0], [2, 2, 3], [1, 1, 2])
- verify_strided_slice((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1])
- verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3])
- verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 1000, 3])
- verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4])
- verify_strided_slice((3, 4, 3), [1, 1], [4, 4, 3])
-
-def verify_take(src_shape, indices_src, axis=None):
- src_dtype = "float32"
- indices_dtype = "int32"
- indices_src = np.array(indices_src, dtype=indices_dtype)
- a = sym.Variable("a", shape=src_shape)
- indices = sym.Variable("indices", shape=indices_src.shape)
- y = sym.take(a, indices, axis=axis)
-
- def forward(a, indices):
- return np.take(a, indices=indices, axis=axis)
-
- a_src = np.arange(np.prod(src_shape), dtype=src_dtype).reshape(src_shape)
-
- check_function(y, forward,
- dtype={'a': src_dtype, 'indices': indices_dtype},
- values={'a': a_src, 'indices': indices_src})
-
-def test_take():
- verify_take((4,), [1])
- verify_take((4,), [[0,1,2,3]])
- verify_take((3,3,3), [[11,25]])
- verify_take((4,), [[0,1],[2,3]])
- verify_take((4,), [1], 0)
- verify_take((2,2), [[[1,0],[0,1]]], 0)
- verify_take((2,2), [[[1,0],[0,1]]], 1)
- verify_take((4,3,5,6), [[2,1,0,0]], -2)
-
-
-def verify_squeeze(shape, axis):
- x = sym.Variable("x")
- if axis is not None:
- y = sym.squeeze(x, axis=axis)
- else:
- y = sym.squeeze(x)
- y = y + 1
-
- def forward(x):
- return np.squeeze(x, axis=axis) + 1
-
- def backward(head_grads, x):
- return [np.reshape(head_grads, x.shape)]
-
- check_function(y, forward, backward, shape=shape)
-
-
-def test_squeeze():
- verify_squeeze((1, 3, 2, 5), None)
- verify_squeeze((1, 3, 1), axis=0)
- verify_squeeze((1, 3, 2, 5, 1), axis=-1)
-
-
-def test_pad():
- x = sym.Variable("x")
- y = sym.pad(x, pad_width=((0, 0), (0, 0), (0, 1), (2, 3)), pad_value=1.)
-
- def forward(x):
- return np.pad(x,
- pad_width=((0, 0), (0, 0), (0, 1), (2, 3)),
- mode='constant', constant_values=1.)
-
- shape = {'x': (1, 3, 28, 28)}
- check_function(y, forward, shape=shape)
-
-def verify_lrn(ishape, size, axis, bias, alpha, beta):
- x = sym.Variable("x", shape=ishape)
- y = sym.lrn(x, size=size, axis=axis, bias=bias, alpha=alpha, beta=beta)
-
- def forward1(x):
- return topi.testing.lrn_python(x, size, axis, bias, alpha, beta)
-
- check_function(y, forward1)
-
- def forward2(x):
- y = forward1(x)
- return (y > 0)*y
-
- #Checking LRN op followed by elementwise op relu
- check_function(sym.relu(y), forward2, in_range={'x': (-10.0, 10.0)})
-
-def verify_l2_normalize(ishape, eps, axis):
- x = sym.Variable("x", shape=ishape)
- y = sym.l2_normalize(x, eps=eps, axis=axis)
-
- def forward1(x):
- return topi.testing.l2_normalize_python(x, eps, axis)
-
- check_function(y, forward1)
-
- def forward2(x):
- y = forward1(x)
- return (y > 0)*y
-
- #Checking L2 normalization op followed by elementwise op relu
- check_function(sym.relu(y), forward2, in_range={'x': (-10.0, 10.0)})
-
-def test_lrn():
- verify_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)
- verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
-
-def test_l2_normalize():
- verify_l2_normalize((1, 3, 20, 20), 0.001, (1,))
- verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2))
-
-def verify_gather_nd(src_shape, indices_src):
- src_dtype = "float32"
- indices_dtype = "int32"
- indices_src = np.array(indices_src, dtype=indices_dtype)
- a = sym.Variable("a", shape=src_shape)
- indices = sym.Variable("indices", shape=indices_src.shape)
- y = sym.gather_nd(a, indices)
-
- def forward(a, indices):
- return topi.testing.gather_nd_python(a, indices)
-
- a_src = np.arange(np.prod(src_shape), dtype=src_dtype).reshape(src_shape)
-
- check_function(y, forward,
- dtype={'a': src_dtype, 'indices': indices_dtype},
- values={'a': a_src, 'indices': indices_src})
-
-def test_gather_nd():
- verify_gather_nd((4,), [[1]])
- verify_gather_nd((4,), [[1, 3, 2]])
- verify_gather_nd((2, 3), [[1]])
- verify_gather_nd((2, 3), [[1], [0]])
- verify_gather_nd((2, 3), [[1, 0], [0, 2]])
- verify_gather_nd((2, 3, 4), [[1, 0], [0, 2]])
- verify_gather_nd((2, 3, 4), [[1, 0], [0, 2], [3, 1]])
- verify_gather_nd((2, 3, 4), [[[1, 0], [0, 1]], [[0, 2], [1, 2]],
- [[3, 1], [0, 2]]])
- verify_gather_nd((2, 3, 4, 5), [[1, 0], [0, 2]])
- verify_gather_nd((2, 3, 4, 5), [[1, 0], [2, 1], [3, 2], [4, 2]])
-
-if __name__ == "__main__":
- test_check_function()
- test_split()
- test_concatenate()
- test_log_softmax()
- test_batchnorm()
- test_dense()
- test_relu()
- test_prelu_nchw()
- test_prelu_nhwc()
- test_sym_scalar_pow()
- test_scalar_sym_pow()
- test_exp()
- test_log()
- test_tanh()
- test_sigmoid()
- test_softmax()
- test_squeeze()
- test_pad()
- test_take()
- test_lrn()
- test_l2_normalize()
- test_strided_slice()
- test_gather_nd()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import tvm
-from tvm.contrib import graph_runtime
-import topi
-import topi.testing
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-
-
-def test_conv2d():
- def run_test_conv2d(sym, dtype, dshape, kshape, oshape, shape_dict, padding):
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(sym, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
- m.run(x=data, y_weight=kernel, y_bias=bias)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- c_np = topi.testing.conv2d_nchw_python(
- data.asnumpy(), kernel.asnumpy(), 1, padding)
- c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
- tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
- x = sym.Variable("x")
- y = sym.conv2d(x, channels=10, kernel_size=(3,3),
- name="y", padding=(1,1))
- dtype = "float32"
- dshape = (1, 3, 18, 18)
- kshape = (10, 3, 3, 3)
- oshape = (1, 10, 18, 18)
- shape_dict = {"x": dshape}
- run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (1,1))
-
- x = sym.Variable("x")
- y = sym.conv2d(x, channels=10, kernel_size=(1,3),
- name="y", padding=(0,1))
- dtype = "float32"
- dshape = (1, 3, 224, 224)
- kshape = (10, 3, 1, 3)
- oshape = (1, 10, 224, 224)
- shape_dict = {"x": dshape}
- run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (0,1))
-
-
-def test_mixed_precision():
- x = sym.Variable("x")
- dtype = "int8"
- out_dtype="int32"
- y = sym.conv2d(x,
- channels=10,
- kernel_size=(3,3),
- name="y",
- padding=(1,1),
- use_bias=False,
- out_dtype="int32")
- dshape = (1, 3, 18, 18)
- kshape = (10, 3, 3, 3)
- oshape = (1, 10, 18, 18)
- shape_dict = {"x": dshape}
- dtype_dict = {"x": dtype}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(-127, 127, size=dshape).astype(dtype))
- kernel = tvm.nd.array(np.random.uniform(-127, 127, size=kshape).astype(dtype))
- m.run(x=data, y_weight=kernel)
- out = m.get_output(0, tvm.nd.empty(oshape, out_dtype))
- c_np = topi.testing.conv2d_nchw_python(
- data.asnumpy().astype(out_dtype),
- kernel.asnumpy().astype(out_dtype), 1, 1)
- tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_dilated_conv2d():
- dilation = 3
- x = sym.Variable("x")
- y = sym.conv2d(x, channels=10, kernel_size=(3, 3), dilation=(dilation, dilation),
- name="y", padding=(1, 1))
- dtype = "float32"
- dshape = (1, 3, 18, 18)
- kshape = (10, 3, 3, 3)
- oshape = (1, 10, 14, 14)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
- kernel_np = np.random.uniform(size=kshape).astype(dtype)
- kernel = tvm.nd.array(kernel_np)
- dkernel_np = topi.testing.dilate_python(kernel_np, (1, 1, dilation, dilation))
- m.run(x=data, y_weight=kernel, y_bias=bias)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- c_np = topi.testing.conv2d_nchw_python(
- data.asnumpy(), dkernel_np, 1, 1)
- c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
- tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_grouped_conv2d_nchw():
- x = sym.Variable("x")
- y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32,
- name="y", padding=(1,1))
- dtype = "float32"
- dshape = (1, 32, 18, 18)
- kshape = (32, 1, 3, 3)
- oshape = (1, 32, 18, 18)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
- m.run(x=data, y_weight=kernel, y_bias=bias)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- c_np = topi.testing.depthwise_conv2d_python_nchw(
- data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
- c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
- tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-def test_grouped_conv2d_nhwc():
- x = sym.Variable("x")
- y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32,
- name="y", padding=(1,1), layout="NHWC", kernel_layout ='HWOI')
- dtype = "float32"
- dshape = (1, 18, 18, 32)
- kshape = (3, 3, 32, 1)
- oshape = (1, 18, 18, 32)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- bias = tvm.nd.array(np.random.uniform(size=kshape[2]).astype(dtype))
- m.run(x=data, y_weight=kernel, y_bias=bias)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- c_np = topi.testing.depthwise_conv2d_python_nhwc(
- data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
- c_np = c_np + bias.asnumpy().reshape(1, 1, kshape[2])
- tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_conv2d_transpose():
- x = sym.Variable("x")
- y = sym.conv2d_transpose(x, channels=10, kernel_size=(3,3), strides=(2,2),
- name="y", padding=(1,1), output_padding=(2,2))
- dtype = "float32"
- dshape = (1, 3, 18, 18)
- kshape = (3, 10, 3, 3)
- oshape = (1, 10, 37, 37)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
- bias = tvm.nd.array(np.random.uniform(size=kshape[1]).astype(dtype))
- m.run(x=data, y_weight=kernel, y_bias=bias)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- c_np = topi.testing.conv2d_transpose_nchw_python(
- data.asnumpy(), kernel.asnumpy(), 2, 1)
- c_np = c_np + bias.asnumpy().reshape(kshape[1], 1, 1)
- d_np = np.zeros(shape=oshape)
- d_np[:,:,0:c_np.shape[2],0:c_np.shape[3]] = c_np
- tvm.testing.assert_allclose(out.asnumpy(), d_np, rtol=1e-5)
-
-
-def test_max_pool2d():
- x = sym.Variable("x")
- y = sym.max_pool2d(x, pool_size=(2,2), strides=(2,2),
- padding=(0,0), name="y", ceil_mode=True)
- dtype = "float32"
- dshape = (1, 3, 28, 28)
- oshape = (1, 3, 14, 14)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- b_np = np.max(data.asnumpy().reshape(1,3,14,2,14,2), axis=(3,5))
- tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_avg_pool2d():
- x = sym.Variable("x")
- y = sym.avg_pool2d(x, pool_size=(2,2), strides=(2,2), padding=(0,0), name="y")
- dtype = "float32"
- dshape = (1, 3, 28, 28)
- oshape = (1, 3, 14, 14)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- b_np = np.mean(data.asnumpy().reshape(1,3,14,2,14,2), axis=(3,5))
- tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_avg_pool2d_no_count_pad():
- kh, kw = (4, 4)
- sh, sw = (2, 2)
- ph, pw = (2, 2)
-
- x = sym.Variable("x")
- y = sym.avg_pool2d(x, pool_size=(kh, kw), strides=(sw, sw), padding=(ph, pw),
- name="y", count_include_pad=False)
- dtype = "float32"
- n = 1
- (ic, ih, iw) = (3, 28, 28)
- (oc, oh, ow) = (3, 15, 15)
-
- a_np = np.random.uniform(low=0.001, size=(n, ic, ih, iw)).astype(dtype)
- pad_np = np.zeros(shape=(n, ic, ih+2*ph, iw+2*pw)).astype(dtype)
- no_zero = (range(n), range(ic), (range(ph, ih+ph)), (range(pw, iw+pw)))
- pad_np[np.ix_(*no_zero)] = a_np
- b_np = np.zeros(shape=(n, oc, oh, ow)).astype(dtype)
-
- for i in range(oh):
- for j in range(ow):
- pad_count = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] > 0, axis=(2,3))
- b_np[:,:,i,j] = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw],
- axis=(2,3)) / np.maximum(pad_count, 1)
- b_np = np.maximum(b_np, 0.0)
- shape_dict = {"x": (n, ic, ih, iw)}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(a_np)
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty((n, oc, oh, ow), dtype))
- tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_global_max_pool2d():
- x = sym.Variable("x")
- y = sym.global_max_pool2d(x, name="y")
- dtype = "float32"
- dshape = (1, 1024, 7, 7)
- oshape = (1, 1024, 1, 1)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- b_np = np.max(data.asnumpy(), axis=(2,3), keepdims=True)
- tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_global_avg_pool2d():
- x = sym.Variable("x")
- y = sym.global_avg_pool2d(x, name="y")
- dtype = "float32"
- dshape = (1, 1024, 7, 7)
- oshape = (1, 1024, 1, 1)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- b_np = np.mean(data.asnumpy(), axis=(2,3), keepdims=True)
- tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_upsampling_nearest_neighbor():
- x = sym.Variable("x")
- scale = 2
- y = sym.upsampling(x, scale=scale, name="y")
- dtype = "float32"
- dshape = (1, 16, 32, 32)
- oshape = (1, 16, 32*scale, 32*scale)
- shape_dict = {"x": dshape}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
- m = graph_runtime.create(graph, lib, ctx)
- a_np = np.random.uniform(size=dshape).astype(dtype)
- data = tvm.nd.array(a_np)
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- b_np = topi.testing.upsampling_python(a_np, (scale, scale), "NCHW")
- tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-def test_upsampling_bilinear():
- x = sym.Variable("x")
- scale = 2
- y = sym.upsampling(x, scale=scale, method="BILINEAR", name="y", layout="NCHW")
- dtype = "float32"
- dshape = (1, 4, 32, 32)
- oshape = (1, 4, 32*scale, 32*scale)
- shape_dict = {"x": dshape}
- dtype_dict = {"x": dtype}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict)
- m = graph_runtime.create(graph, lib, ctx)
- a_np = np.random.uniform(size=dshape).astype(dtype)
- data = tvm.nd.array(a_np)
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- b_np = topi.testing.bilinear_resize_python(a_np, (32*scale, 32*scale), "NCHW", align_corners=False)
- tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
-
-def test_resize_bilinear():
- x = sym.Variable("x")
- y = sym.resize(x, size=(60, 60), method="BILINEAR", name="y", layout="NHWC", align_corners=True)
- dtype = "float32"
- dshape = (1, 32, 32, 4)
- oshape = (1, 60, 60, 4)
- shape_dict = {"x": dshape}
- dtype_dict = {"x": dtype}
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict)
- m = graph_runtime.create(graph, lib, ctx)
- a_np = np.random.uniform(size=dshape).astype(dtype)
- data = tvm.nd.array(a_np)
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty(oshape, dtype))
- b_np = topi.testing.bilinear_resize_python(a_np, (60, 60), "NHWC")
- tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
-
-if __name__ == "__main__":
- test_mixed_precision()
- test_conv2d()
- test_dilated_conv2d()
- test_grouped_conv2d_nchw()
- test_grouped_conv2d_nhwc()
- test_conv2d_transpose()
- test_max_pool2d()
- test_avg_pool2d()
- test_avg_pool2d_no_count_pad()
- test_global_max_pool2d()
- test_global_avg_pool2d()
- test_upsampling_nearest_neighbor()
- test_upsampling_bilinear()
- test_resize_bilinear()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-import topi.testing
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm.testing.check_computation import check_function
-
-def check_map(symfunc, np_func, np_backward=None, dtype="float32", rnd_min=-1, rnd_max=1):
- x = sym.Variable("x")
- y = symfunc(x)
- shape = {'x': (1, 3, 32, 32)}
- check_function(y, lambda x: np_func(x), np_backward,
- dtype=dtype, shape=shape, in_range=(rnd_min, rnd_max))
-
-
-def test_floor():
- check_map(sym.floor, np.floor)
-
-def test_ceil():
- check_map(sym.ceil, np.ceil)
-
-def test_trunc():
- check_map(sym.trunc, np.trunc)
-
-def test_round():
- check_map(sym.round, np.round)
-
-def test_abs():
- check_map(sym.abs, np.abs)
- check_map(sym.abs, np.abs, dtype = "int32")
- check_map(sym.abs, np.abs, dtype = "int8")
-
-def test_shift():
- n = 3
- for dtype in ["int32", "int8"]:
- check_map(lambda x : x >> n, lambda x: x >> n, dtype=dtype, rnd_min=-100, rnd_max=100)
- check_map(lambda x : x << n, lambda x: x << n, dtype=dtype, rnd_min=-100, rnd_max=100)
-
-if __name__ == "__main__":
- test_shift()
- test_floor()
- test_ceil()
- test_round()
- test_abs()
- test_trunc()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import math
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-import topi
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm.testing.check_computation import check_function
-
-def verify_transpose(dshape, axes):
- x = sym.Variable("x")
- if axes:
- y = sym.transpose(x, axes=axes)
- else:
- y = sym.transpose(x)
- y = y + 1
- dtype = "float32"
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
- m = graph_runtime.create(graph, lib, ctx)
- # set input
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- m.run(x=data)
- out_np = np.transpose(data.asnumpy(), axes=axes) + 1
- out = m.get_output(0, tvm.nd.empty(out_np.shape))
- tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
-
-def verify_reduce_explicit(dshape, data, result, fsym, oshape=None, otype='float32', **kwargs):
- """ Verify reduce operations by comparign its result with `result` """
- x = sym.Variable("x")
- y = fsym(x + 0, **kwargs)
- for target, ctx in ctx_list():
- # TODO(yuruofei): remove when cuda reduce schedule is done
- if target == 'cuda' and fsym == sym.mean:
- continue
- graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
- m = graph_runtime.create(graph, lib, ctx)
- # set input
- m.run(x=data)
- # oshape set to None means do not test the shape-correctness
- oshape = result.shape if isinstance(result, np.ndarray) else (1,) if oshape is None else oshape
- out = m.get_output(0, tvm.nd.empty(oshape, dtype=otype))
- if isinstance(result, np.ndarray):
- np.testing.assert_equal(out.asnumpy().shape, result.shape)
- tvm.testing.assert_allclose(out.asnumpy(), result, atol=1e-5, rtol=1e-5)
- else:
- tvm_out = out.asnumpy()
- assert abs(result - tvm_out) <= (1e-5 + 1e-5 * abs(tvm_out))
-
-def verify_reduce(dshape, fnp, fsym, oshape=None, otype='float32', **kwargs):
- """ Verify reduce operations by generating data at random and calling numpy
- version as reference """
- data = np.random.uniform(size=dshape).astype(otype)
- result = fnp(data + 0, **kwargs)
- verify_reduce_explicit(dshape, data, result, fsym, oshape=oshape, otype=otype, **kwargs)
-
-def verify_collapse(dshape, target_shape, fnp):
- x = sym.Variable("x", shape=dshape)
- t = sym.Variable("t", shape=target_shape)
- y = sym.collapse_sum(x, t)
- dtype = "float32"
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target,
- {"x": dshape, "t": target_shape})
- m = graph_runtime.create(graph, lib, ctx)
- data = np.random.uniform(size=dshape).astype(dtype)
- m.run(x=data)
- out = m.get_output(0, tvm.nd.empty(target_shape))
- out_np = fnp(data)
- tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
-
-
-def test_transpose():
- verify_transpose((2, 3, 4), (0, 2, 1))
- verify_transpose((2, 3, 4), None)
-
-
-def test_reduce():
-
- def _with_keepdims(func):
- """ Wrapper around numpy's argmax/argmin with `keepdims` argument supported """
- def wrapper(data, axis=None, keepdims=False):
- if not keepdims:
- return func(data, axis=axis)
- else:
- if axis is not None:
- out_shape = list(data.shape)
- out_shape[axis] = 1
- else:
- out_shape = [1 for _ in range(len(data.shape))]
- return func(data, axis=axis).reshape(out_shape)
- return wrapper
-
- verify_reduce((2, 3, 4), np.max, sym.max, axis=1, keepdims=True)
- verify_reduce((4, 4, 3), np.min, sym.min, keepdims=True)
- verify_reduce((4, 4, 3), np.sum, sym.sum, axis=(0, 2))
- verify_reduce((4, 4, 3), np.sum, sym.sum)
- verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1), keepdims=False)
- verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 2), keepdims=False)
- verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1), keepdims=True)
- verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 2), keepdims=True)
- verify_reduce((128, 24, 128), np.mean, sym.mean, keepdims=True)
- verify_reduce((128, 24, 128), np.mean, sym.mean, keepdims=False)
- verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1, 2), keepdims=True)
-
- data = np.array([[[1,2],[3,4]],[[3,44],[5,6]]], dtype=np.float32)
- verify_reduce_explicit([2,2,2], data, np.array([[1,1],[1,0]]), sym.argmax, otype='int32', axis=[0,2], exclude=True)
- verify_reduce_explicit([2,2,2], data, np.array([[0,0],[0,1]]), sym.argmin, otype='int32', axis=[0,2], exclude=True)
- shape = [4, 4, 3]
- for axis in [None, 0, 1, 2]:
- for keepdims in [True,False]:
- kwargs = { 'keepdims':keepdims }
- if axis is None:
- # FIXME: NNVM doesn't support setting `axis=None` explicitly.
- kwargs.update({'oshape': [1,1,1] if keepdims else [1] })
- else:
- kwargs.update({'axis': axis})
- kwargs.update({'oshape': shape[:axis]+[1]+shape[axis+1:] if keepdims else shape[:axis]+shape[axis+1:]})
-
- verify_reduce(shape, _with_keepdims(np.argmax), sym.argmax, otype='int32', **kwargs)
- verify_reduce(shape, _with_keepdims(np.argmin), sym.argmin, otype='int32', **kwargs)
-
-
-def test_collapse():
- verify_collapse((2, 3, 4), (1,), lambda x: x.sum())
- verify_collapse((2, 3, 4), (1, 1, 1), lambda x: x.sum(keepdims=True))
- verify_collapse((2, 3, 4), (1, 1), lambda x: x.sum().reshape(1, 1))
- verify_collapse((2, 3, 4), (1, 4), lambda x: x.reshape(-1, 4).sum(0, keepdims=True))
- verify_collapse((2, 3, 4), (3, 4), lambda x: x.sum(0))
- verify_collapse((2, 3, 4), (1, 3, 4), lambda x: x.sum(0, keepdims=True))
- verify_collapse((2, 3, 4), (1, 1, 4), lambda x: x.sum((0, 1), keepdims=True))
- verify_collapse((2, 3, 4), (2, 1, 4), lambda x: x.sum(1, keepdims=True))
- verify_collapse((2, 3, 4), (2, 1, 1), lambda x: x.sum((1, 2), keepdims=True))
- verify_collapse((2, 3, 4), (2, 3, 1), lambda x: x.sum(2, keepdims=True))
- verify_collapse((2, 3, 4), (2, 3, 4), lambda x: x)
-
-
-def verify_flip(ishape, axis):
- x = sym.Variable("x")
- y = sym.flip(x, axis=axis) + 1
- dtype = "float32"
- x_np = np.random.uniform(size=ishape).astype(dtype)
- res = np.flip(x_np, axis) + 1
-
- for target, ctx in ctx_list():
- # set input
- graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape})
- m = graph_runtime.create(graph, lib, ctx)
- m.run(x=x_np)
- out = m.get_output(0, tvm.nd.empty(res.shape))
- tvm.testing.assert_allclose(out.asnumpy(), res, atol=1e-5, rtol=1e-5)
-
-
-def test_flip():
- verify_flip((3, 4, 3), 1)
- verify_flip((3, 4, 3), 0)
- verify_flip((3, 4, 3), 2)
- verify_flip((3, 4, 3), -1)
- verify_flip((3, 4, 3), -3)
- verify_flip((3, 4, 3), -2)
-
-
-def verify_reshape(dshape, oshape):
- x = sym.Variable("x")
- y = sym.reshape(x, shape=oshape)
- y = y + 1
- dtype = "float32"
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
- m = graph_runtime.create(graph, lib, ctx)
- # set input
- data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
- m.run(x=data)
- out_np = data.asnumpy().reshape(oshape) + 1
- out = m.get_output(0, tvm.nd.empty(out_np.shape))
- tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
-
-
-def test_reshape():
- verify_reshape((2, 3, 4), (-1, 2, 1))
- verify_reshape((2, 3, 4), (8, 3))
- verify_reshape((4, 7), (2, 7, 2))
-
-
-def test_clip():
- x = sym.Variable("x")
- a_min=0.2
- a_max=0.75
- y = sym.clip(x, a_min=a_min, a_max=a_max)
-
- def forward(x):
- return np.clip(x, a_min=a_min, a_max=a_max)
-
- def backward(head_grads, x):
- mask1 = np.greater_equal(x, a_min).astype("float")
- mask2 = np.less_equal(x, a_max).astype("float")
- return [head_grads * mask1 * mask2]
-
- shape = {'x': (3, 4, 5)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_broadcast():
- a = sym.Variable("a")
- b = sym.Variable("b")
- shape = {'a': (3, 4, 5), 'b': (1, 5)}
-
- def _collapse(g):
- return g.reshape(-1, shape['b'][-1]).sum(0, keepdims=True)
-
- y = sym.broadcast_add(a, b)
- def _backward_add(head_grads, a, b):
- da = head_grads
- db = _collapse(head_grads)
- return da, db
- check_function(y, lambda a, b: a + b, _backward_add, shape=shape)
-
- y = sym.broadcast_sub(a, b)
- def _backward_sub(head_grads, a, b):
- da = head_grads
- db = -_collapse(head_grads)
- return da, db
- check_function(y, lambda a, b: a - b, _backward_sub, shape=shape)
-
- y = sym.broadcast_mul(a, b)
- def _backward_mul(head_grads, a, b):
- da = head_grads * b
- db = _collapse(head_grads * a)
- return da, db
- check_function(y, lambda a, b: a * b, _backward_mul, shape=shape)
-
- y = sym.broadcast_div(a, b)
- def _backward_div(head_grads, a, b):
- da = head_grads / b
- db = _collapse(- head_grads * a / b**2)
- return da, db
- # We avoid computing numerical derivatives too close to zero here
- check_function(y, lambda a, b: a / b, _backward_div, shape=shape, numerical_grads=False)
- check_function(y, lambda a, b: a / b, _backward_div, shape=shape,
- in_range={'b': (0.1, 20)})
-
- y = sym.broadcast_mod(a, b)
- check_function(y,
- lambda a, b: np.mod(a, b),
- in_range={'a': (0.001, 100), 'b': (1, 100)}, dtype='int32', shape=shape)
-
- y = sym.broadcast_max(a, b)
- check_function(y, lambda a, b: np.maximum(a, b), shape=shape)
-
- y = sym.broadcast_min(a, b)
- check_function(y, lambda a, b: np.minimum(a, b), shape=shape)
-
- y = sym.broadcast_pow(a, b)
- check_function(y,
- lambda a, b: np.power(a, b),
- in_range={'a': (0.001, 100), 'b': (0.001, 2)}, shape=shape)
-
- y = sym.broadcast_left_shift(a, b)
- check_function(y, lambda a, b: a << b, dtype='int32', shape=shape)
-
- y = sym.broadcast_right_shift(a, b)
- check_function(y, lambda a, b: a >> b, dtype='int32', shape=shape)
-
- y = sym.broadcast_greater(a, b)
- check_function(y, lambda a, b: np.greater(a, b), shape=shape)
-
- y = sym.broadcast_less(a, b)
- check_function(y, lambda a, b: np.less(a, b), shape=shape)
-
- y = sym.broadcast_equal(a, b)
- check_function(y, lambda a, b: np.equal(a, b),
- in_range={'a': (-2, 2), 'b': (-2, 2)}, dtype='int32', shape=shape)
-
- y = sym.broadcast_not_equal(a, b)
- check_function(y, lambda a, b: np.not_equal(a, b),
- in_range={'a': (-2, 2), 'b': (-2, 2)}, dtype='int32', shape=shape)
-
- y = sym.broadcast_greater_equal(a, b)
- check_function(y, lambda a, b: np.greater_equal(a, b),
- in_range={'a': (-3, 3), 'b': (-3, 3)}, dtype='int32', shape=shape)
-
- y = sym.broadcast_less_equal(a, b)
- check_function(y, lambda a, b: np.less_equal(a, b),
- in_range={'a': (-3, 3), 'b': (-3, 3)}, dtype='int32', shape=shape)
-
-def test_greater():
- l = sym.Variable("l")
- r = sym.Variable("r")
- y = sym.greater(l, r)
-
- def forward(l, r):
- return np.greater(l, r).astype("float32")
-
- def backward(head_grads, l, r):
- return {'l': np.zeros_like(l)}
-
- shape = {'l': (3, 4, 5), 'r': (3, 4, 5)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_less():
- l = sym.Variable("l")
- r = sym.Variable("r")
- y = sym.less(l, r)
-
- def forward(l, r):
- return np.less(l, r).astype("float32")
-
- def backward(head_grads, l, r):
- return {'l': np.zeros_like(l)}
-
- shape = {'l': (3, 4, 5), 'r': (3, 4, 5)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_reshape_like():
- x = sym.Variable("x")
- y = sym.Variable("y")
- z = sym.reshape_like(x, y)
-
- def forward(x, y):
- return np.reshape(x, y.shape)
-
- def backward(head_grads, x, y):
- return [np.reshape(head_grads, x.shape),
- np.zeros_like(y)]
-
- shape = {'x': (3, 4, 5), 'y': (5, 4, 3)}
- check_function(z, forward, backward, shape=shape)
-
-
-def verify_expand_like(in_shape, out_shape, axis, exclude):
- x = sym.Variable("x")
- y = sym.Variable("y")
- z = sym.expand_like(x, y, axis=axis, exclude=exclude)
-
- def forward(x, y):
- odim = len(out_shape)
-
- if len(x.shape) == len(y.shape):
- return np.broadcast_to(x, y.shape)
-
- if x.shape == (1,) and len(y.shape) == odim:
- x = np.reshape(x, ())
-
- real_axis = [i if i >= 0 else i + odim for i in axis]
- real_axis = sorted(real_axis)
- if exclude:
- real_axis = list(set(range(odim)) - set(real_axis))
- for i in real_axis:
- x = np.expand_dims(x, i).astype(x.dtype)
- for i in real_axis:
- x = np.concatenate([x]*out_shape[i], axis=i).astype(x.dtype)
-
- return x
-
- def backward(head_grads, x, y):
- odim = len(out_shape)
-
- keepdims = len(x.shape) == len(y.shape)
-
- if x.shape == (1,) and len(y.shape) == odim:
- x = np.reshape(x, ())
-
- real_axis = [i if i >= 0 else i + odim for i in axis]
- real_axis = sorted(real_axis)
- if exclude:
- real_axis = list(set(range(odim)) - set(real_axis))
- return [np.sum(head_grads, axis=tuple(real_axis), keepdims=keepdims),
- np.zeros_like(y)]
-
-
- shape = {'x': in_shape, 'y': out_shape}
- check_function(z, forward, backward, shape=shape)
-
-
-def test_expand_like():
- verify_expand_like((3,), (3, 2), [1], False)
- verify_expand_like((2,), (2, 3), [1], False)
- verify_expand_like((3, 4), (3, 5, 4), [1], False)
- verify_expand_like((5, 7), (5, 6, 7, 8), [0, 2], True)
- verify_expand_like((2, 3), (2, 3), [], False)
- verify_expand_like((1,), (2, 3), [0, 1], False)
- verify_expand_like((1, 1), (2, 3), [0, 1], False)
- verify_expand_like((2, 1), (2, 3), [1], False)
- verify_expand_like((1, 3), (2, 3), [0], False)
-
-
-def verify_elemwise_sum(num_args):
- s = [sym.Variable("input" + str(i)) for i in range(num_args)]
- y = sym.elemwise_sum(*s, num_args=num_args)
-
- def forward(**inputs):
- return np.sum(np.array(list(inputs.values())), axis=0)
-
- def backward(head_grads, **inputs):
- return [head_grads] * num_args
-
- shape = {s[i]: (3, 4, 5) for i in range(num_args)}
- check_function(y, forward, backward, shape=shape)
-
-
-def test_elemwise_sum():
- verify_elemwise_sum(1)
- verify_elemwise_sum(5)
- verify_elemwise_sum(7)
-
-
-def test_block_grad():
- x = sym.Variable("x")
- y = sym.block_grad(x)
-
- def forward(x):
- return x
-
- def backward(head_grads, x):
- return [np.zeros_like(head_grads)]
-
-
- shape = {'x': (3, 4, 5)}
- # Numerical grad checking would fail for this function
- check_function(y, forward, backward, shape=shape, numerical_grads=False)
-
-
-def test_full():
- shape = (3, 4, 5)
- value = 7
- dtype = "float32"
- for target, ctx in ctx_list():
- data = sym.Variable("data", dtype=dtype)
- # full_like
- s = sym.full_like(data=data, fill_value=value, name="s")
- graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape})
- m = graph_runtime.create(graph, lib, ctx)
- m.run(data=np.random.uniform(size=shape).astype(dtype))
- out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
- tvm.testing.assert_allclose(
- out.asnumpy(),
- np.full(shape, fill_value=value, dtype=dtype),
- atol=1e-5, rtol=1e-5)
- # ones_like
- s = sym.ones_like(data=data, fill_value=value, name="s")
- graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape})
- m = graph_runtime.create(graph, lib, ctx)
- m.run(data=np.random.uniform(size=shape).astype(dtype))
- out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
- tvm.testing.assert_allclose(
- out.asnumpy(),
- np.full(shape, fill_value=1, dtype=dtype),
- atol=1e-5, rtol=1e-5)
- # zeros_like
- s = sym.zeros_like(data=data, fill_value=value, name="s")
- graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape})
- m = graph_runtime.create(graph, lib, ctx)
- m.run(data=np.random.uniform(size=shape).astype(dtype))
- out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
- tvm.testing.assert_allclose(
- out.asnumpy(),
- np.full(shape, fill_value=0, dtype=dtype),
- atol=1e-5, rtol=1e-5)
- # full
- s = sym.full(shape=shape, dtype=dtype, fill_value=value, name="s")
- graph, lib, _ = nnvm.compiler.build(s, target)
- m = graph_runtime.create(graph, lib, ctx)
- m.run()
- out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
- tvm.testing.assert_allclose(
- out.asnumpy(),
- np.full(shape, fill_value=value, dtype=dtype),
- atol=1e-5, rtol=1e-5)
- # ones
- s = sym.ones(shape=shape, dtype=dtype, name="s")
- graph, lib, _ = nnvm.compiler.build(s, target)
- m = graph_runtime.create(graph, lib, ctx)
- m.run()
- out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
- tvm.testing.assert_allclose(
- out.asnumpy(),
- np.full(shape, fill_value=1, dtype=dtype),
- atol=1e-5, rtol=1e-5)
- # zeros
- s = sym.zeros(shape=shape, dtype=dtype, name="s")
- graph, lib, _ = nnvm.compiler.build(s, target)
- m = graph_runtime.create(graph, lib, ctx)
- m.run()
- out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
- tvm.testing.assert_allclose(
- out.asnumpy(),
- np.full(shape, fill_value=0, dtype=dtype),
- atol=1e-5, rtol=1e-5)
-
-def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1),
- offsets=(0.5, 0.5), clip=False):
- data = sym.Variable("data")
- out = sym.multibox_prior(data=data, sizes=sizes, ratios=ratios, steps=steps,
- offsets=offsets, clip=clip)
-
- in_height = dshape[2]
- in_width = dshape[3]
- num_sizes = len(sizes)
- num_ratios = len(ratios)
- size_ratio_concat = sizes + ratios
- steps_h = steps[0] if steps[0] > 0 else 1.0 / in_height
- steps_w = steps[1] if steps[1] > 0 else 1.0 / in_width
- offset_h = offsets[0]
- offset_w = offsets[1]
-
- oshape = (1, in_height * in_width * (num_sizes + num_ratios - 1), 4)
- dtype = "float32"
- np_out = np.zeros(oshape).astype(dtype)
-
- for i in range(in_height):
- center_h = (i + offset_h) * steps_h
- for j in range(in_width):
- center_w = (j + offset_w) * steps_w
- for k in range(num_sizes + num_ratios - 1):
- w = size_ratio_concat[k] * in_height / in_width / 2.0 if k < num_sizes else \
- size_ratio_concat[0] * in_height / in_width * math.sqrt(size_ratio_concat[k + 1]) / 2.0
- h = size_ratio_concat[k] / 2.0 if k < num_sizes else \
- size_ratio_concat[0] / math.sqrt(size_ratio_concat[k + 1]) / 2.0
- count = i * in_width * (num_sizes + num_ratios - 1) + j * (num_sizes + num_ratios - 1) + k
- np_out[0][count][0] = center_w - w
- np_out[0][count][1] = center_h - h
- np_out[0][count][2] = center_w + w
- np_out[0][count][3] = center_h + h
- if clip:
- np_out = np.clip(np_out, 0, 1)
-
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape})
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input("data", np.random.uniform(size=dshape).astype(dtype))
- m.run()
- tvm_out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype))
- tvm.testing.assert_allclose(tvm_out.asnumpy(), np_out, atol=1e-5, rtol=1e-5)
-
-def test_multibox_prior():
- verify_multibox_prior((1, 3, 50, 50))
- verify_multibox_prior((1, 3, 224, 224), sizes=(0.5, 0.25, 0.1), ratios=(1, 2, 0.5))
- verify_multibox_prior((1, 32, 32, 32), sizes=(0.5, 0.25), ratios=(1, 2), steps=(2, 2), clip=True)
-
-def test_multibox_transform_loc():
- batch_size = 1
- num_anchors = 3
- num_classes = 3
- cls_prob = sym.Variable("cls_prob")
- loc_preds = sym.Variable("loc_preds")
- anchors = sym.Variable("anchors")
- transform_loc_data, valid_count = sym.multibox_transform_loc(cls_prob=cls_prob, loc_pred=loc_preds,
- anchor=anchors)
- out = sym.non_max_suppression(data=transform_loc_data, valid_count=valid_count, return_indices=False)
-
- # Manually create test case
- np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]])
- np_loc_preds = np.array([[0.1, -0.2, 0.3, 0.2, 0.2, 0.4, 0.5, -0.3, 0.7, -0.2, -0.4, -0.8]])
- np_anchors = np.array([[[-0.1, -0.1, 0.1, 0.1], [-0.2, -0.2, 0.2, 0.2], [1.2, 1.2, 1.5, 1.5]]])
-
- expected_np_out = np.array([[[1, 0.69999999, 0, 0, 0.10818365, 0.10008108],
- [0, 0.44999999, 1, 1, 1, 1],
- [0, 0.30000001, 0, 0, 0.22903419, 0.20435292]]])
-
- dtype = "float32"
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes),
- "loc_preds": (batch_size, num_anchors * 4),
- "anchors": (1, num_anchors, 4)})
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)})
- m.run()
- tvm_out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype))
- tvm.testing.assert_allclose(tvm_out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5)
-
-def test_non_max_suppression():
- dshape = (1, 5, 6)
- data = sym.Variable("data")
- valid_count = sym.Variable("valid_count", dtype="int32")
- iou_threshold = 0.7
- force_suppress = True
- top_k = 2
- out = sym.non_max_suppression(data=data, valid_count=valid_count, return_indices=False,
- iou_threshold=iou_threshold, force_suppress=force_suppress, top_k=top_k)
-
- np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80],
- [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79],
- [1, 0.5, 100, 60, 70, 110]]]).astype("float32")
- np_valid_count = np.array([4]).astype("int32")
- np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45],
- [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1],
- [-1, -1, -1, -1, -1, -1]]])
-
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)},
- dtype={"data": "float32", "valid_count": "int32"})
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input(**{"data": np_data, "valid_count": np_valid_count})
- m.run()
- tvm_out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32"))
- tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)
-
-def np_slice_like(np_data, np_shape_like, axis=[]):
- begin_idx = [0 for _ in np_data.shape]
- end_idx = list(np_data.shape)
- if len(axis) > 0:
- for i in axis:
- if i < 0:
- i = len(np_data.shape) + i
- end_idx[i] = np_shape_like.shape[i]
- else:
- for i in range(len(np_data.shape)):
- if i < len(np_shape_like.shape):
- end_idx[i] = np_shape_like.shape[i]
- slice_idx = []
- for b, e in zip(begin_idx, end_idx):
- slice_idx.append(slice(b, e))
- np_result = np_data[slice_idx]
- return np_result
-
-def verify_slice_like(np_data, np_shape_like, axis=[]):
- dtype = "float32"
- np_data = np_data.astype(dtype)
- np_shape_like = np_shape_like.astype(dtype)
- np_result = np_slice_like(np_data, np_shape_like, axis)
- data1 = sym.Variable("data1")
- data2 = sym.Variable("data2")
- net = sym.slice_like(data=data1, slice_like=data2, axis=axis)
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(net, target, {"data1": np_data.shape,
- "data2": np_shape_like.shape})
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input(**{"data1": np_data, "data2": np_shape_like})
- m.run()
- out = m.get_output(0, tvm.nd.empty(np_result.shape, dtype))
- tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)
-
-def test_slice_like():
- np_data = np.random.uniform(size=(3, 4, 5))
- np_shape_like = np.random.uniform(size=(1, 2, 3))
- verify_slice_like(np_data, np_shape_like)
- np_data = np.random.uniform(size=(3, 4, 5))
- np_shape_like = np.random.uniform(size=(1, 2))
- verify_slice_like(np_data, np_shape_like)
- np_data = np.random.uniform(size=(3, 4, 5))
- np_shape_like = np.random.uniform(size=(1, 2, 3))
- axis = (1, 2)
- verify_slice_like(np_data, np_shape_like, axis)
- np_data = np.random.uniform(size=(3, 4, 5))
- np_shape_like = np.random.uniform(size=(1, 2, 3))
- axis = (-1, -3)
- verify_slice_like(np_data, np_shape_like, axis)
- np_data = np.random.uniform(size=(1, 3, 224, 224))
- np_shape_like = np.random.uniform(size=(1, 3, 112, 112))
- axis = (2, 3)
- verify_slice_like(np_data, np_shape_like, axis)
-
-def verify_where(condition, x, y):
- dtype = "float32"
- if len(condition.shape) == 1:
- np_out = np.array([xv if c else yv for (c,xv,yv) in zip(condition,x,y)])
- else:
- np_out = np.where(condition, x, y)
- cond_var = sym.Variable("condition")
- x_var = sym.Variable("x")
- y_var = sym.Variable("y")
- net = sym.where(cond_var, x_var, y_var)
- for target, ctx in ctx_list():
- graph, lib, _ = nnvm.compiler.build(net, target, {"condition": condition.shape,
- "x": x.shape, "y": y.shape})
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input(**{"condition": condition, "x": x, "y": y})
- m.run()
- out = m.get_output(0, tvm.nd.empty(x.shape, dtype))
- tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5)
-
-def test_where():
- shape = (13, 8, 224, 224, 6)
- condition = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
- x = np.random.uniform(size=shape).astype("float32")
- y = np.random.uniform(size=shape).astype("float32")
- verify_where(condition, x, y)
- condition = np.random.uniform(low=-1, high=1, size=(shape[0],)).astype("float32")
- x = np.random.uniform(size=shape).astype("float32")
- y = np.random.uniform(size=shape).astype("float32")
- verify_where(condition, x, y)
-
-def test_argmax():
- dshape = (204800, 2)
- oshape = (1, 320, 640)
-
- dtype = "float32"
- x = sym.Variable("x", shape=dshape, dtype=dtype)
- x = sym.reshape(x, shape=(1, 320, 640, 2))
- x = sym.transpose(x, axes=(0, 3, 1, 2))
- y = sym.argmax(x, axis=1)
- target_str = "llvm"
- target = tvm.target.create(target_str)
- ctx = tvm.context(target_str, 0)
- with nnvm.compiler.build_config(opt_level=2):
- graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
- m = graph_runtime.create(graph, lib, ctx)
- data = np.random.uniform(size=dshape).astype(dtype)
- m.run(x=data)
- np_reshape = np.reshape(data, (1, 320, 640, 2))
- np_transpose = np.transpose(np_reshape, axes=(0, 3, 1, 2))
- np_argmax = np.argmax(np_transpose, axis=1)
- out = m.get_output(0)
- np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5)
-
-if __name__ == "__main__":
- test_reshape()
- test_broadcast()
- test_reduce()
- test_collapse()
- test_transpose()
- test_clip()
- test_greater()
- test_less()
- test_reshape_like()
- test_expand_like()
- test_elemwise_sum()
- test_block_grad()
- test_full()
- test_flip()
- test_multibox_prior()
- test_multibox_transform_loc()
- test_non_max_suppression()
- test_slice_like()
- test_where()
- test_argmax()
- print(nnvm.compiler.engine.dump())
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Store for caffe2 examples and common models."""
-from __future__ import absolute_import as _abs
-import os
-import importlib
-
-models = [
- 'squeezenet',
- 'resnet50',
- 'vgg19',
-]
-
-# skip download if model exist
-for model in models:
- try:
- locals()['c2_' + model] = importlib.import_module('caffe2.python.models.' + model)
- except ImportError:
- os.system("python -m caffe2.python.models.download -i -f " + model)
- locals()['c2_' + model] = importlib.import_module('caffe2.python.models.' + model)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=unused-argument
-
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-from nnvm import symbol as sym
-from nnvm.testing.utils import create_workload
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
- net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
- left = _make_fire_conv(net, expand1x1_channels, 1, 0)
- right = _make_fire_conv(net, expand3x3_channels, 3, 1)
- # NOTE : Assume NCHW layout here
- net = sym.concatenate(left, right, axis=1)
-
- return net
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
- net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size),
- padding=(padding, padding))
- net = sym.relu(net)
- return net
-
-# Net
-def get_symbol(num_classes, version, **kwargs):
- """Get symbol of SqueezeNet
-
- Parameters
- ----------
- num_classes: int
- The number of classification results
-
- version : str, optional
- "1.0" or "1.1" of SqueezeNet
- """
- assert version == '1.1', ("Unsupported SqueezeNet version {version}:"
- "1.1 expected".format(version=version))
- net = sym.Variable("data")
-
- net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2))
- net = sym.relu(net)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 16, 64, 64)
- net = _make_fire(net, 16, 64, 64)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 32, 128, 128)
- net = _make_fire(net, 32, 128, 128)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 64, 256, 256)
- net = _make_fire(net, 64, 256, 256)
-
- net = sym.dropout(net, rate=0.5)
- net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1))
- net = sym.relu(net)
- net = sym.global_avg_pool2d(net)
- return sym.softmax(net, axis=1)
-
-def get_workload(batch_size=1, num_classes=1000, version='1.0',
- image_shape=(3, 224, 224), dtype="float32", **kwargs):
- """Get benchmark workload for SqueezeNet
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of classes
-
- version : str, optional
- "1.0" or "1.1" of SqueezeNet
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- kwargs : dict
- Extra arguments
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(num_classes=num_classes, version=version, **kwargs)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import nnvm
-import tvm
-from tvm.contrib import graph_runtime
-from nnvm.testing.config import ctx_list
-from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19
-
-from caffe2.python import workspace
-
-
-def get_tvm_output(model,
- input_data,
- target,
- ctx,
- output_shape,
- output_dtype='float32'):
- """ Generic function to execute and get tvm output"""
- sym, params = nnvm.frontend.from_caffe2(model.init_net, model.predict_net)
-
- # supporting multiple inputs in caffe2 in a bit tricky,
- # because the input names can appear at the beginning or end of model.predict_net.external_input
- assert isinstance(input_data, np.ndarray)
-
- # here we use the first input blob to the first op to get the input name
- input_names = model.predict_net.op[0].input[0]
- shape_dict = {input_names: input_data.shape}
- dtype_dict = {input_names: input_data.dtype}
-
- graph, lib, params = nnvm.compiler.build(
- sym, target, shape=shape_dict, dtype=dtype_dict, params=params)
-
- m = graph_runtime.create(graph, lib, ctx)
-
- # set inputs
- m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
- m.set_input(**params)
-
- # execute
- m.run()
-
- # get outputs
- if isinstance(output_shape, list) and isinstance(output_dtype, list):
- tvm_output_list = []
- for i, s in enumerate(output_shape):
- tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
- tvm_output_list.append(tvm_output.asnumpy())
- return tvm_output_list
- else:
- tvm_output = m.get_output(0, tvm.nd.empty((output_shape),
- output_dtype))
- return tvm_output.asnumpy()
-
-
-def get_caffe2_output(model, x, dtype='float32'):
- workspace.RunNetOnce(model.init_net)
-
- input_blob = model.predict_net.op[0].input[0]
- workspace.FeedBlob(input_blob, x.astype(dtype))
- workspace.RunNetOnce(model.predict_net)
-
- output_blob = model.predict_net.external_output[0]
- c2_output = workspace.FetchBlob(output_blob)
- return c2_output
-
-
-def verify_caffe2_forward_impl(model, data_shape, out_shape):
- dtype = 'float32'
- data = np.random.uniform(size=data_shape).astype(dtype)
- c2_out = get_caffe2_output(model, data, dtype)
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, data, target, ctx, out_shape, dtype)
- tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-def test_squeezenet1_1():
- verify_caffe2_forward_impl(c2_squeezenet, (1, 3, 224, 224),
- (1, 1000, 1, 1))
-
-
-def test_resnet50():
- verify_caffe2_forward_impl(c2_resnet50, (1, 3, 224, 224),
- (1, 1000))
-
-
-def test_vgg19():
- verify_caffe2_forward_impl(c2_vgg19, (1, 3, 224, 224), (1, 1000))
-
-
-if __name__ == '__main__':
- test_squeezenet1_1()
- test_resnet50()
- test_vgg19()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test graph equality of caffe2 models."""
-import nnvm
-from nnvm.compiler import graph_util, graph_attr
-from model_zoo import c2_squeezenet, squeezenet
-
-def compare_graph(init, predict, nnvm_sym, ishape):
- caffe2_sym, params = nnvm.frontend.from_caffe2(init, predict)
- g1 = nnvm.graph.create(caffe2_sym)
- g2 = nnvm.graph.create(nnvm_sym)
- input_name = predict.external_input[0]
- ishapes = {input_name: ishape}
- graph_attr.set_shape_inputs(g1, ishapes)
- graph_attr.set_shape_inputs(g2, ishapes)
- g1 = g1.apply("InferShape").apply("SimplifyInference")
- g2 = g2.apply("InferShape").apply("SimplifyInference")
- graph_util.check_graph_equal(g1, g2)
-
-def test_squeeze_net():
- symbol, params = squeezenet.get_workload(version='1.1')
- compare_graph(c2_squeezenet.init_net, c2_squeezenet.predict_net, symbol, ishape=(1, 3, 224, 224))
-
-
-if __name__ == '__main__':
- test_squeeze_net()
+++ /dev/null
-*.mlmodel
-*.jpg
-*.png
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-from PIL import Image
-import numpy as np
-from tvm.contrib.download import download_testdata
-
-def get_mobilenet():
- url = 'https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel'
- dst = 'mobilenet.mlmodel'
- real_dst = download_testdata(url, dst, module='coreml')
- return real_dst
-
-def get_resnet50():
- url = 'https://docs-assets.developer.apple.com/coreml/models/Resnet50.mlmodel'
- dst = 'resnet50.mlmodel'
- real_dst = download_testdata(url, dst, module='coreml')
- return real_dst
-
-def get_cat_image():
- url = 'https://gist.githubusercontent.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5daea686d6473a62aacd1a5885849/cat.png'
- dst = 'cat.png'
- real_dst = download_testdata(url, dst, module='data')
- img = Image.open(real_dst).resize((224, 224))
- img = np.transpose(img, (2, 0, 1))[np.newaxis, :]
- return np.asarray(img)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-from coremltools.models.neural_network import NeuralNetworkBuilder
-from coremltools.models import datatypes
-
-import tvm
-from tvm.contrib import graph_runtime
-import topi
-import topi.testing
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm import frontend
-import coremltools as cm
-import model_zoo
-
-def get_tvm_output(symbol, x, params, target, ctx,
- out_shape=(1, 1000), input_name='image', dtype='float32'):
- shape_dict = {input_name : x.shape}
- with nnvm.compiler.build_config(opt_level=2):
- graph, lib, params = nnvm.compiler.build(symbol, target, shape_dict, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
- m.set_input(**params)
- m.run()
- # get outputs
- out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
- return out.asnumpy()
-
-def run_model_checkonly(model_file, model_name=''):
- model = cm.models.MLModel(model_file)
- sym, params = nnvm.frontend.from_coreml(model)
- x = model_zoo.get_cat_image()
- for target, ctx in ctx_list():
- tvm_output = get_tvm_output(sym, x, params, target, ctx)
- print(target, ctx, model_name, 'prediction id: ', np.argmax(tvm_output.flat))
-
-def test_mobilenet_checkonly():
- model_file = model_zoo.get_mobilenet()
- run_model_checkonly(model_file, 'mobilenet')
-
-def test_resnet50_checkonly():
- model_file = model_zoo.get_resnet50()
- run_model_checkonly(model_file, 'resnet50')
-
-def run_tvm_graph(graph_def, input_data, input_name, output_shape, output_dtype='float32'):
- """ Generic function to compile on nnvm and execute on tvm """
-
- sym, params = nnvm.frontend.from_coreml(graph_def)
- target = 'llvm'
- if isinstance(input_data, list):
- shape_dict = {}
- dtype_dict = {}
- for i, e in enumerate(input_name):
- shape_dict[e] = input_data[i].shape
- dtype_dict[e] = input_data[i].dtype
- else:
- shape_dict = {input_name: input_data.shape}
- dtype_dict = {input_name: input_data.dtype}
-
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict,
- dtype=dtype_dict, params=params)
-
- ctx = tvm.cpu(0)
- from tvm.contrib import graph_runtime
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- if isinstance(input_data, list):
- for i, e in enumerate(input_name):
- m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
- else:
- m.set_input(input_name, tvm.nd.array(input_data.astype(input_data.dtype)))
-
- m.set_input(**params)
- # execute
- m.run()
- # get outputs
- if isinstance(output_shape, list) and isinstance(output_dtype, list):
- tvm_output_list = []
- for i, s in enumerate(output_shape):
- tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
- tvm_output_list.append(tvm_output.asnumpy())
- return tvm_output_list
- else:
- tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype))
- return tvm_output.asnumpy()
-
-def verify_AddLayerParams(input_dim, alpha=2):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim).astype(dtype)
- a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-
- b_np = np.add(a_np1, a_np2) + alpha
- inputs = [('input1', datatypes.Array(*input_dim)),
- ('input2', datatypes.Array(*input_dim))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(inputs, output)
- builder.add_elementwise(name='Add',
- alpha=alpha,
- input_names=['input1', 'input2'],
- output_name='output',
- mode='ADD')
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model,
- [a_np1, a_np2],
- ['input1', 'input2'],
- b_np.shape,
- dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_AddLayerParams():
- verify_AddLayerParams((1, 2, 2), 0)
- verify_AddLayerParams((1, 2, 2), 1)
- verify_AddLayerParams((1, 3, 3), 2)
-
-def verify_MultiplyLayerParams(input_dim, alpha):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim).astype(dtype)
- a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-
- b_np = np.multiply(a_np1, a_np2) * alpha
- inputs = [('input1', datatypes.Array(*input_dim)),
- ('input2', datatypes.Array(*input_dim))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(inputs, output)
- builder.add_elementwise(name='Mul',
- alpha=alpha,
- input_names=['input1', 'input2'],
- output_name='output',
- mode='MULTIPLY')
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model,
- [a_np1, a_np2],
- ['input1', 'input2'],
- b_np.shape,
- dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_MultiplyLayerParams():
- verify_MultiplyLayerParams((1, 2, 2), 0)
- verify_MultiplyLayerParams((1, 2, 2), 1)
- verify_MultiplyLayerParams((1, 3, 3), 2)
-
-def verify_ConcatLayerParams(input1_dim, input2_dim):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input1_dim).astype(dtype)
- a_np2 = np.random.uniform(size=input2_dim).astype(dtype)
-
- b_np = np.concatenate((a_np1, a_np2), axis=1)
- inputs = [('input1', datatypes.Array(*input1_dim)),
- ('input2', datatypes.Array(*input2_dim))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(inputs, output)
- builder.add_elementwise(name='Concate',
- input_names=['input1', 'input2'],
- output_name='output',
- mode='CONCAT')
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model,
- [a_np1, a_np2],
- ['input1', 'input2'],
- b_np.shape,
- dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_ConcatLayerParams():
- verify_ConcatLayerParams((1, 1, 2, 2), (1, 2, 2, 2))
- verify_ConcatLayerParams((1, 2, 4, 4), (1, 3, 4, 4))
-
-def verify_UpsampleLayerParams(input_dim, scale, mode):
- dtype = "float32"
-
- a_np = np.full(input_dim, 1, dtype=dtype)
- if mode == 'NN':
- b_np = topi.testing.upsampling_python(a_np, (scale, scale))
- else:
- new_h = input_dim[2] * scale
- new_w = input_dim[3] * scale
- b_np = topi.testing.bilinear_resize_python(a_np, (new_h, new_w), 'NCHW')
-
- input = [('input', datatypes.Array(*input_dim))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(input, output)
- builder.add_upsample(name='Upsample',
- scaling_factor_h=scale,
- scaling_factor_w=scale,
- mode=mode,
- input_name='input',
- output_name='output')
-
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_UpsampleLayerParams():
- verify_UpsampleLayerParams((1, 16, 32, 32), 2, 'NN')
- verify_UpsampleLayerParams((1, 4, 6, 6), 3, 'BILINEAR')
-
-def verify_l2_normalize(input_dim, eps):
- dtype = "float32"
-
- a_np = np.random.uniform(size=input_dim).astype(dtype)
- b_np = topi.testing.l2_normalize_python(a_np, eps, 1)
-
- input = [('input', datatypes.Array(*input_dim))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(input, output)
- builder.add_l2_normalize(name='L2', epsilon=eps, input_name='input', output_name='output')
-
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_l2_normalize():
- verify_l2_normalize((1, 3, 20, 20), 0.001)
-
-def verify_lrn(input_dim, size, bias, alpha, beta):
- dtype = "float32"
- axis=1
- a_np = np.random.uniform(size=input_dim).astype(dtype)
- b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
-
- input = [('input', datatypes.Array(*input_dim))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(input, output)
- builder.add_lrn(name='LRN',
- input_name='input',
- output_name='output',
- alpha=alpha,
- beta=beta,
- k=bias,
- local_size=size)
-
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_lrn():
- verify_lrn((1, 3, 10, 20), 3, 1.0, 1.0, 0.5)
-
-def verify_average(input_dim1, input_dim2, axis=0):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim1).astype(dtype)
- a_np2 = np.random.uniform(size=input_dim2).astype(dtype)
-
- b_np = np.mean((a_np1, a_np2), axis=axis)
-
- inputs = [('input1', datatypes.Array(*input_dim1)),
- ('input2', datatypes.Array(*input_dim2))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(inputs, output)
- builder.add_elementwise(name='MEAN',
- input_names=['input1', 'input2'],
- output_name='output',
- mode='AVE')
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model,
- [a_np1, a_np2],
- ['input1', 'input2'],
- b_np.shape,
- dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_average():
- verify_average((1, 3, 20, 20), (1, 3, 20, 20))
- verify_average((3, 20, 20), (1, 3, 20, 20))
- verify_average((20, 20), (1, 3, 20, 20))
-
-def verify_max(input_dim):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim).astype(dtype)
- a_np2 = np.random.uniform(size=input_dim).astype(dtype)
- a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
- b_np = np.max((a_np1, a_np2, a_np3), axis=0)
-
- inputs = [('input1', datatypes.Array(*input_dim)),
- ('input2', datatypes.Array(*input_dim)),
- ('input3', datatypes.Array(*input_dim))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(inputs, output)
- builder.add_elementwise(name='Max',
- input_names=['input1', 'input2', 'input3'],
- output_name='output',
- mode='MAX')
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model,
- [a_np1, a_np2, a_np3],
- ['input1', 'input2', 'input3'],
- b_np.shape,
- dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_max():
- verify_max((1, 3, 20, 20))
- verify_max((20, 20))
-
-def verify_min(input_dim):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim).astype(dtype)
- a_np2 = np.random.uniform(size=input_dim).astype(dtype)
- a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
- b_np = np.min((a_np1, a_np2, a_np3), axis=0)
-
- inputs = [('input1', datatypes.Array(*input_dim)),
- ('input2', datatypes.Array(*input_dim)),
- ('input3', datatypes.Array(*input_dim))]
- output = [('output', datatypes.Array(*b_np.shape))]
- builder = NeuralNetworkBuilder(inputs, output)
- builder.add_elementwise(name='Min',
- input_names=['input1', 'input2', 'input3'],
- output_name='output',
- mode='MIN')
- model = cm.models.MLModel(builder.spec)
- for target, ctx in ctx_list():
- out = run_tvm_graph(model,
- [a_np1, a_np2, a_np3],
- ['input1', 'input2', 'input3'],
- b_np.shape,
- dtype)
- tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_min():
- verify_min((1, 3, 20, 20))
- verify_min((20, 20))
-
-if __name__ == '__main__':
- test_mobilenet_checkonly()
- test_resnet50_checkonly()
- test_forward_AddLayerParams()
- test_forward_ConcatLayerParams()
- test_forward_MultiplyLayerParams()
- test_forward_UpsampleLayerParams()
- test_forward_l2_normalize()
- test_forward_lrn()
- test_forward_average()
- test_forward_max()
- test_forward_min()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile Darknet Models
-=====================
-This article is a test script to test darknet models with NNVM.
-All the required models and libraries will be downloaded from the internet
-by the script.
-"""
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-from tvm.contrib.download import download_testdata
-download_testdata.__test__ = False
-from nnvm import frontend
-from tvm.relay.testing.darknet import LAYERTYPE
-from tvm.relay.testing.darknet import __darknetffi__
-import nnvm.compiler
-
-DARKNET_LIB = 'libdarknet2.0.so'
-DARKNETLIB_URL = 'https://github.com/siju-samuel/darknet/blob/master/lib/' \
- + DARKNET_LIB + '?raw=true'
-LIB = __darknetffi__.dlopen(download_testdata(DARKNETLIB_URL, DARKNET_LIB, module='darknet'))
-
-DARKNET_TEST_IMAGE_NAME = 'dog.jpg'
-DARKNET_TEST_IMAGE_URL = 'https://github.com/siju-samuel/darknet/blob/master/data/' + DARKNET_TEST_IMAGE_NAME +'?raw=true'
-DARKNET_TEST_IMAGE_PATH = download_testdata(DARKNET_TEST_IMAGE_URL, DARKNET_TEST_IMAGE_NAME, module='data')
-
-def _read_memory_buffer(shape, data, dtype='float32'):
- length = 1
- for x in shape:
- length *= x
- data_np = np.zeros(length, dtype=dtype)
- for i in range(length):
- data_np[i] = data[i]
- return data_np.reshape(shape)
-
-def _get_tvm_output(net, data, build_dtype='float32'):
- '''Compute TVM output'''
- dtype = 'float32'
- sym, params = frontend.darknet.from_darknet(net, dtype)
-
- target = 'llvm'
- shape_dict = {'data': data.shape}
- graph, library, params = nnvm.compiler.build(sym, target, shape_dict,
- build_dtype, params=params)
- # Execute on TVM
- ctx = tvm.cpu(0)
- m = graph_runtime.create(graph, library, ctx)
- # set inputs
- m.set_input('data', tvm.nd.array(data.astype(dtype)))
- m.set_input(**params)
- m.run()
- # get outputs
- tvm_out = []
- for i in range(m.get_num_outputs()):
- tvm_out.append(m.get_output(i).asnumpy())
- return tvm_out
-
-def _load_net(cfg_url, cfg_name, weights_url, weights_name):
- cfg_path = download_testdata(cfg_url, cfg_name, module='darknet')
- weights_path = download_testdata(weights_url, weights_name, module='darknet')
- net = LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0)
- return net
-
-def verify_darknet_frontend(net, build_dtype='float32'):
- '''Test network with given input image on both darknet and tvm'''
- def get_darknet_output(net, img):
- LIB.network_predict_image(net, img)
- out = []
- for i in range(net.n):
- layer = net.layers[i]
- if layer.type == LAYERTYPE.REGION:
- attributes = np.array([layer.n, layer.out_c, layer.out_h,
- layer.out_w, layer.classes,
- layer.coords, layer.background],
- dtype=np.int32)
- out.insert(0, attributes)
- out.insert(0, _read_memory_buffer((layer.n*2, ), layer.biases))
- layer_outshape = (layer.batch, layer.out_c,
- layer.out_h, layer.out_w)
- out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
- elif layer.type == LAYERTYPE.YOLO:
- attributes = np.array([layer.n, layer.out_c, layer.out_h,
- layer.out_w, layer.classes,
- layer.total],
- dtype=np.int32)
- out.insert(0, attributes)
- out.insert(0, _read_memory_buffer((layer.total*2, ), layer.biases))
- out.insert(0, _read_memory_buffer((layer.n, ), layer.mask, dtype='int32'))
- layer_outshape = (layer.batch, layer.out_c,
- layer.out_h, layer.out_w)
- out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
- elif i == net.n-1:
- if layer.type == LAYERTYPE.CONNECTED:
- darknet_outshape = (layer.batch, layer.out_c)
- elif layer.type in [LAYERTYPE.SOFTMAX]:
- darknet_outshape = (layer.batch, layer.outputs)
- else:
- darknet_outshape = (layer.batch, layer.out_c,
- layer.out_h, layer.out_w)
- out.insert(0, _read_memory_buffer(darknet_outshape, layer.output))
- return out
-
- dtype = 'float32'
-
- img = LIB.letterbox_image(LIB.load_image_color(DARKNET_TEST_IMAGE_PATH.encode('utf-8'), 0, 0), net.w, net.h)
- darknet_output = get_darknet_output(net, img)
- batch_size = 1
- data = np.empty([batch_size, img.c, img.h, img.w], dtype)
- i = 0
- for c in range(img.c):
- for h in range(img.h):
- for k in range(img.w):
- data[0][c][h][k] = img.data[i]
- i = i + 1
-
- tvm_out = _get_tvm_output(net, data, build_dtype)
- for tvm_outs, darknet_out in zip(tvm_out, darknet_output):
- tvm.testing.assert_allclose(darknet_out, tvm_outs, rtol=1e-3, atol=1e-3)
-
-def verify_rnn_forward(net):
- '''Test network with given input data on both darknet and tvm'''
- def get_darknet_network_predict(net, data):
- return LIB.network_predict(net, data)
- from cffi import FFI
- ffi = FFI()
- np_arr = np.zeros([1, net.inputs], dtype='float32')
- np_arr[0, 84] = 1
- cffi_arr = ffi.cast('float*', np_arr.ctypes.data)
- tvm_out = _get_tvm_output(net, np_arr)[0]
- darknet_output = get_darknet_network_predict(net, cffi_arr)
- darknet_out = np.zeros(net.outputs, dtype='float32')
- for i in range(net.outputs):
- darknet_out[i] = darknet_output[i]
- last_layer = net.layers[net.n-1]
- darknet_outshape = (last_layer.batch, last_layer.outputs)
- darknet_out = darknet_out.reshape(darknet_outshape)
- tvm.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-4, atol=1e-4)
-
-def test_forward_extraction():
- '''test extraction model'''
- model_name = 'extraction'
- cfg_name = model_name + '.cfg'
- weights_name = model_name + '.weights'
- cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
- weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
- net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_alexnet():
- '''test alexnet model'''
- model_name = 'alexnet'
- cfg_name = model_name + '.cfg'
- weights_name = model_name + '.weights'
- cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
- weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
- net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_resnet50():
- '''test resnet50 model'''
- model_name = 'resnet50'
- cfg_name = model_name + '.cfg'
- weights_name = model_name + '.weights'
- cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
- weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
- net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_yolov2():
- '''test yolov2 model'''
- model_name = 'yolov2'
- cfg_name = model_name + '.cfg'
- weights_name = model_name + '.weights'
- cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
- weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
- net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
- build_dtype = {}
- verify_darknet_frontend(net, build_dtype)
- LIB.free_network(net)
-
-def test_forward_yolov3():
- '''test yolov3 model'''
- model_name = 'yolov3'
- cfg_name = model_name + '.cfg'
- weights_name = model_name + '.weights'
- cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
- weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
- net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
- build_dtype = {}
- verify_darknet_frontend(net, build_dtype)
- LIB.free_network(net)
-
-def test_forward_convolutional():
- '''test convolutional layer'''
- net = LIB.make_network(1)
- layer = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
- net.layers[0] = layer
- net.w = net.h = 224
- LIB.resize_network(net, 224, 224)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_dense():
- '''test fully connected layer'''
- net = LIB.make_network(1)
- layer = LIB.make_connected_layer(1, 75, 20, 1, 0, 0)
- net.layers[0] = layer
- net.w = net.h = 5
- LIB.resize_network(net, 5, 5)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_dense_batchnorm():
- '''test fully connected layer with batchnorm'''
- net = LIB.make_network(1)
- layer = LIB.make_connected_layer(1, 12, 2, 1, 1, 0)
- for i in range(5):
- layer.rolling_mean[i] = np.random.rand(1)
- layer.rolling_variance[i] = np.random.rand(1)
- layer.scales[i] = np.random.rand(1)
- net.layers[0] = layer
- net.w = net.h = 2
- LIB.resize_network(net, 2, 2)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_maxpooling():
- '''test maxpooling layer'''
- net = LIB.make_network(1)
- layer = LIB.make_maxpool_layer(1, 224, 224, 3, 2, 2, 0)
- net.layers[0] = layer
- net.w = net.h = 224
- LIB.resize_network(net, 224, 224)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_avgpooling():
- '''test avgerage pooling layer'''
- net = LIB.make_network(1)
- layer = LIB.make_avgpool_layer(1, 224, 224, 3)
- net.layers[0] = layer
- net.w = net.h = 224
- LIB.resize_network(net, 224, 224)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_batch_norm():
- '''test batch normalization layer'''
- net = LIB.make_network(1)
- layer = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 1, 0, 0, 0)
- for i in range(32):
- layer.rolling_mean[i] = np.random.rand(1)
- layer.rolling_variance[i] = np.random.rand(1)
- net.layers[0] = layer
- net.w = net.h = 224
- LIB.resize_network(net, 224, 224)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_shortcut():
- '''test shortcut layer'''
- net = LIB.make_network(3)
- layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
- layer_2 = LIB.make_convolutional_layer(1, 111, 111, 32, 32, 1, 1, 1, 0, 1, 0, 0, 0, 0)
- layer_3 = LIB.make_shortcut_layer(1, 0, 111, 111, 32, 111, 111, 32)
- layer_3.activation = 1
- layer_3.alpha = 1
- layer_3.beta = 1
- net.layers[0] = layer_1
- net.layers[1] = layer_2
- net.layers[2] = layer_3
- net.w = net.h = 224
- LIB.resize_network(net, 224, 224)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_reorg():
- '''test reorg layer'''
- net = LIB.make_network(2)
- layer_1 = LIB.make_convolutional_layer(1, 222, 222, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
- layer_2 = LIB.make_reorg_layer(1, 110, 110, 32, 2, 0, 0, 0)
- net.layers[0] = layer_1
- net.layers[1] = layer_2
- net.w = net.h = 222
- LIB.resize_network(net, 222, 222)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_region():
- '''test region layer'''
- net = LIB.make_network(2)
- layer_1 = LIB.make_convolutional_layer(1, 19, 19, 3, 425, 1, 1, 1, 0, 1, 0, 0, 0, 0)
- layer_2 = LIB.make_region_layer(1, 19, 19, 5, 80, 4)
- layer_2.softmax = 1
- net.layers[0] = layer_1
- net.layers[1] = layer_2
- net.w = net.h = 19
- LIB.resize_network(net, 19, 19)
- build_dtype = {}
- verify_darknet_frontend(net, build_dtype)
- LIB.free_network(net)
-
-def test_forward_yolo_op():
- '''test yolo layer'''
- net = LIB.make_network(2)
- layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 14, 1, 3, 2, 0, 1, 0, 0, 0, 0)
- layer_2 = LIB.make_yolo_layer(1, 111, 111, 2, 9, __darknetffi__.NULL, 2)
- net.layers[0] = layer_1
- net.layers[1] = layer_2
- net.w = net.h = 224
- LIB.resize_network(net, 224, 224)
- build_dtype = {}
- verify_darknet_frontend(net, build_dtype)
- LIB.free_network(net)
-
-def test_forward_upsample():
- '''test upsample layer'''
- net = LIB.make_network(1)
- layer = LIB.make_upsample_layer(1, 19, 19, 3, 3)
- layer.scale = 1
- net.layers[0] = layer
- net.w = net.h = 19
- LIB.resize_network(net, 19, 19)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_l2normalize():
- '''test l2 normalization layer'''
- net = LIB.make_network(1)
- layer = LIB.make_l2norm_layer(1, 224*224*3)
- layer.c = layer.out_c = 3
- layer.h = layer.out_h = 224
- layer.w = layer.out_w = 224
- net.layers[0] = layer
- net.w = net.h = 224
- LIB.resize_network(net, 224, 224)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_elu():
- '''test elu activation layer'''
- net = LIB.make_network(1)
- layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
- layer_1.activation = 8
- net.layers[0] = layer_1
- net.w = net.h = 224
- LIB.resize_network(net, 224, 224)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_softmax():
- '''test softmax layer'''
- net = LIB.make_network(1)
- layer_1 = LIB.make_softmax_layer(1, 75, 1)
- layer_1.temperature = 1
- net.layers[0] = layer_1
- net.w = net.h = 5
- LIB.resize_network(net, net.w, net.h)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_softmax_temperature():
- '''test softmax layer'''
- net = LIB.make_network(1)
- layer_1 = LIB.make_softmax_layer(1, 75, 1)
- layer_1.temperature = 0.8
- net.layers[0] = layer_1
- net.w = net.h = 5
- LIB.resize_network(net, net.w, net.h)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_rnn():
- '''test RNN layer'''
- net = LIB.make_network(1)
- batch = 1
- inputs = 256
- outputs = 256
- steps = 1
- activation = 1
- batch_normalize = 0
- adam = 0
- layer_1 = LIB.make_rnn_layer(batch, inputs, outputs, steps, activation, batch_normalize, adam)
- net.layers[0] = layer_1
- net.inputs = inputs
- net.outputs = outputs
- net.w = net.h = 0
- LIB.resize_network(net, net.w, net.h)
- verify_rnn_forward(net)
- LIB.free_network(net)
-
-def _test_forward_crnn():
- '''test CRNN layer'''
- net = LIB.make_network(1)
- batch = 1
- c = 3
- h = 224
- w = 224
- hidden_filters = c
- output_filters = c
- steps = 1
- activation = 0
- batch_normalize = 0
- inputs = 256
- outputs = 256
- layer_1 = LIB.make_crnn_layer(batch, h, w, c, hidden_filters, output_filters,
- steps, activation, batch_normalize)
- net.layers[0] = layer_1
- net.inputs = inputs
- net.outputs = output_filters * h * w
- net.w = w
- net.h = h
- LIB.resize_network(net, net.w, net.h)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-def test_forward_lstm():
- '''test LSTM layer'''
- net = LIB.make_network(1)
- batch = 1
- inputs = 256
- outputs = 256
- steps = 1
- batch_normalize = 0
- adam = 0
- layer_1 = LIB.make_lstm_layer(batch, inputs, outputs, steps, batch_normalize, adam)
- net.layers[0] = layer_1
- net.inputs = inputs
- net.outputs = outputs
- net.w = net.h = 0
- LIB.resize_network(net, net.w, net.h)
- verify_rnn_forward(net)
- LIB.free_network(net)
-
-def test_forward_gru():
- '''test GRU layer'''
- net = LIB.make_network(1)
- batch = 1
- inputs = 256
- outputs = 256
- steps = 1
- batch_normalize = 0
- adam = 0
- layer_1 = LIB.make_gru_layer(batch, inputs, outputs, steps, batch_normalize, adam)
- net.layers[0] = layer_1
- net.inputs = inputs
- net.outputs = outputs
- net.w = net.h = 0
- LIB.resize_network(net, net.w, net.h)
- verify_rnn_forward(net)
- LIB.free_network(net)
-
-def test_forward_activation_logistic():
- '''test logistic activation layer'''
- net = LIB.make_network(1)
- batch = 1
- h = 224
- w = 224
- c = 3
- n = 32
- groups = 1
- size = 3
- stride = 2
- padding = 0
- activation = 0
- batch_normalize = 0
- binary = 0
- xnor = 0
- adam = 0
- layer_1 = LIB.make_convolutional_layer(batch, h, w, c, n, groups, size, stride, padding,
- activation, batch_normalize, binary, xnor, adam)
- net.layers[0] = layer_1
- net.w = w
- net.h = h
- LIB.resize_network(net, net.w, net.h)
- verify_darknet_frontend(net)
- LIB.free_network(net)
-
-if __name__ == '__main__':
- test_forward_resnet50()
- test_forward_alexnet()
- test_forward_extraction()
- test_forward_yolov2()
- test_forward_yolov3()
- test_forward_convolutional()
- test_forward_maxpooling()
- test_forward_avgpooling()
- test_forward_batch_norm()
- test_forward_shortcut()
- test_forward_dense()
- test_forward_dense_batchnorm()
- test_forward_softmax()
- test_forward_softmax_temperature()
- test_forward_rnn()
- test_forward_reorg()
- test_forward_region()
- test_forward_yolo_op()
- test_forward_upsample()
- test_forward_l2normalize()
- test_forward_elu()
- test_forward_rnn()
-# FIXME: Skip CRNN test since it causes segfault in libdarknet2.0.so
-# _test_forward_crnn()
- test_forward_lstm()
- test_forward_gru()
- test_forward_activation_logistic()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import nnvm
-import tvm
-from tvm.contrib import graph_runtime
-from nnvm.testing.config import ctx_list
-import keras
-
-# prevent keras from using up all gpu memory
-import tensorflow as tf
-from keras.backend.tensorflow_backend import set_session
-config = tf.ConfigProto()
-config.gpu_options.per_process_gpu_memory_fraction = 0.5
-set_session(tf.Session(config=config))
-
-
-def verify_keras_frontend(keras_model, need_transpose=True):
- # Keras frontend currently supports tensorflow backend only.
- assert(keras.backend.backend() == 'tensorflow')
-
- in_shapes = []
- for layer in keras_model._input_layers:
- in_shapes.append(tuple(dim.value if dim.value is not None else 1 for dim in layer.input.shape))
-
- def get_keras_output(xs, dtype='float32'):
- return keras_model.predict(xs)
-
- def get_tvm_output(xs, target, ctx, dtype='float32'):
- sym, params = nnvm.frontend.from_keras(keras_model)
- shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)}
- with nnvm.compiler.build_config(opt_level=2):
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- for name, x in zip(keras_model.input_names, xs):
- m.set_input(name, tvm.nd.array(x.astype(dtype)))
- m.set_input(**params)
- m.run()
-
- return [m.get_output(i).asnumpy() for i in range(m.get_num_outputs())]
-
- def to_channels_first(arr):
- return arr.transpose([0, -1] + list(range(1, arr.ndim - 1)))
-
- def to_channels_last(arr):
- return arr.transpose([0] + list(range(2, arr.ndim)) + [1])
-
- xs = [np.random.uniform(size=shape, low=-1.0, high=1.0) for shape in in_shapes]
- keras_out = get_keras_output(xs)
-
- keras_out = keras_out if isinstance(keras_out, list) else [keras_out]
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output([to_channels_first(x) for x in xs] if need_transpose else xs, target, ctx)
- for kout, tout in zip(keras_out, tvm_out):
- if need_transpose:
- tout = to_channels_last(tout)
- tvm.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5)
-
-def test_forward_elemwise_add():
- r = []
- data = keras.layers.Input(shape=(32,32,3))
- x = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
- r.append(x)
- x = keras.layers.Conv2D(8, (3, 3), padding="same")(x)
- r.append(x)
- x = keras.layers.Conv2D(8, (3, 3), padding="same")(x)
- # add two symbols
- y = keras.layers.add([keras.layers.add([x, r[0]]), r[1]])
- y = keras.layers.GlobalAveragePooling2D()(y)
- keras_model = keras.models.Model(data, y)
- verify_keras_frontend(keras_model)
- # add three symbols
- y = keras.layers.add([x, r[0], r[1]])
- y = keras.layers.GlobalAveragePooling2D()(y)
- keras_model = keras.models.Model(data, y)
- verify_keras_frontend(keras_model)
-
-
-def _test_forward_dense():
- data = keras.layers.Input(shape=(32,32,1))
- x = keras.layers.Flatten()(data)
- x = keras.layers.Dropout(0.5)(x)
- x = keras.layers.Dense(10, activation='relu', kernel_initializer='uniform')(x)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model)
-
-def _test_forward_dense_with_3d_inp():
- data = keras.layers.Input(shape=(1, 20))
- x = keras.layers.Dense(10, activation='relu', kernel_initializer='uniform')(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model, need_transpose=False)
-
-def test_forward_dense():
- _test_forward_dense()
- _test_forward_dense_with_3d_inp()
-
-def test_forward_pool():
- data = keras.layers.Input(shape=(32,32,1))
- # maxpool
- x = keras.layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model)
- # avgpool
- y = keras.layers.AveragePooling2D((3, 3), strides=(1, 1), padding='same')(data)
- keras_model = keras.models.Model(data, y)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_conv():
- data = keras.layers.Input(shape=(32,32,3))
- conv_funcs = [keras.layers.Conv2D(filters=10, kernel_size=(3,3),
- strides=(2,2), padding='same'),
- keras.layers.Conv2D(filters=10, kernel_size=(3,3),
- dilation_rate=(2,2), padding='same'),
- keras.layers.DepthwiseConv2D(kernel_size=(3,3), padding='same'),
- keras.layers.Conv2DTranspose(filters=10, kernel_size=(3,3), padding='valid'),
- keras.layers.SeparableConv2D(filters=10, kernel_size=(3,3), padding='same')]
- for conv_func in conv_funcs:
- x = conv_func(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_upsample():
- data = keras.layers.Input(shape=(32,32,3))
- x = keras.layers.UpSampling2D(size=(3,3))(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_reshape():
- data = keras.layers.Input(shape=(32,32,3))
- x = keras.layers.Reshape(target_shape=(32,32,3))(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_crop():
- data = keras.layers.Input(shape=(32,32,3))
- x = keras.layers.Cropping2D(cropping=((1, 1), (1, 1)))(data)
- x = keras.layers.Cropping2D(cropping=(1, 1))(x)
- x = keras.layers.Cropping2D(cropping=1)(x)
- x = keras.layers.Cropping2D(cropping=((0, 1), (1, 0)))(x)
- x = keras.layers.Cropping2D(cropping=(1, 0))(x)
- x = keras.layers.Cropping2D(cropping=0)(x)
- x = keras.layers.Add()([x, x])
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_vgg16():
- keras_model = keras.applications.vgg16.VGG16(include_top=True, weights='imagenet',
- input_shape=(224,224,3), classes=1000)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_xception():
- keras_model = keras.applications.xception.Xception(include_top=True, weights='imagenet',
- input_shape=(299,299,3), classes=1000)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_resnet50():
- keras_model = keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet',
- input_shape=(224,224,3), classes=1000)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_mobilenet():
- keras_model = keras.applications.mobilenet.MobileNet(include_top=True, weights='imagenet',
- input_shape=(224,224,3), classes=1000)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_activations():
- data = keras.layers.Input(shape=(32,32,3))
- weights = np.random.rand(1, 32, 32, 3)
- act_funcs = [keras.layers.Activation('softmax'),
- keras.layers.Activation('softplus'),
- keras.layers.ReLU(),
- keras.layers.ReLU(max_value=6.),
- keras.layers.LeakyReLU(alpha=0.3),
- keras.layers.PReLU(weights=weights, alpha_initializer="zero"),
- keras.layers.ELU(alpha=0.5),
- keras.layers.Activation('selu'),
- keras.layers.ThresholdedReLU(theta=0.5),
- keras.layers.Activation('softsign'),
- keras.layers.Activation('hard_sigmoid'),
- keras.layers.Activation('sigmoid'),
- keras.layers.Activation('tanh'),
- keras.layers.Activation('linear')]
- for act_func in act_funcs:
- x = act_func(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_multi_inputs():
- data1 = keras.layers.Input(shape=(32,32,3))
- data2 = keras.layers.Input(shape=(32,32,3))
- x = keras.layers.Conv2D(8, (3, 3), padding="same")(data1)
- y = keras.layers.Conv2D(8, (3, 3), padding="same")(data2)
- z = keras.layers.add([x, y])
- z = keras.layers.GlobalAveragePooling2D()(z)
- keras_model = keras.models.Model([data1, data2], z)
- verify_keras_frontend(keras_model)
-
-
-def test_forward_multi_outputs():
- data = keras.layers.Input(shape=(32,32,3))
- x = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
- x = keras.layers.GlobalAveragePooling2D()(x)
- y = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
- y = keras.layers.GlobalAveragePooling2D()(y)
- keras_model = keras.models.Model(data, [x, y])
- verify_keras_frontend(keras_model)
-
-
-def test_forward_reuse_layers():
- # reuse conv2d
- data = keras.layers.Input(shape=(32,32,3))
- conv2d = keras.layers.Conv2D(8, (3, 3), padding="same")
- x = conv2d(data)
- y = conv2d(data)
- z = keras.layers.add([x, y])
- z = keras.layers.GlobalAveragePooling2D()(z)
- keras_model = keras.models.Model(data, z)
- verify_keras_frontend(keras_model)
-
- # reuse add
- data = keras.layers.Input(shape=(32,32,3))
- x = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
- add = keras.layers.Add()
- x = add([x, x])
- x = add([x, x])
- z = keras.layers.GlobalAveragePooling2D()(x)
- keras_model = keras.models.Model(data, z)
- verify_keras_frontend(keras_model)
-
-def _test_LSTM(time_steps, inputs, hidden, return_state=True):
- data = keras.layers.Input(shape=(time_steps, inputs))
- lstm_out = keras.layers.LSTM(hidden,
- return_state=return_state,
- recurrent_activation='sigmoid',
- activation='tanh')
- x = lstm_out(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model, need_transpose=False)
-
-def _test_LSTM_MultiLayer(inputs, hidden):
- inputs = keras.layers.Input(shape=(1, inputs))
- layer = keras.layers.LSTM(hidden, return_state=True, return_sequences=True,
- recurrent_activation='sigmoid',
- activation='tanh')
- outputs = layer(inputs)
- output, state = outputs[0], outputs[1:]
- output = keras.layers.LSTM(hidden, recurrent_activation='sigmoid',
- activation='tanh')(output, initial_state=state)
- keras_model = keras.models.Model(inputs, output)
- verify_keras_frontend(keras_model, need_transpose=False)
-
-
-def test_forward_LSTM():
- _test_LSTM(1, 8, 8, return_state=True)
- _test_LSTM(1, 4, 4, return_state=False)
- _test_LSTM(20, 16, 256, return_state=False)
- _test_LSTM_MultiLayer(4, 4)
-
-def _test_RNN(inputs, units):
- data = keras.layers.Input(shape=(1, inputs))
- rnn_out = keras.layers.SimpleRNN(units, return_state=True,
- activation='tanh')
- x = rnn_out(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model, need_transpose=False)
-
-def _test_RNN_MultiLayer(inputs, units):
- inputs = keras.layers.Input(shape=(1, inputs))
- layer = keras.layers.SimpleRNN(units, return_state=True, return_sequences=True,
- activation='tanh')
- outputs = layer(inputs)
- output, state = outputs[0], outputs[1:]
- output = keras.layers.SimpleRNN(units, activation='tanh')(output, initial_state=state)
- keras_model = keras.models.Model(inputs, output)
- verify_keras_frontend(keras_model, need_transpose=False)
-
-def test_forward_RNN():
- _test_RNN(2, 4)
- _test_RNN(4, 3)
- _test_RNN_MultiLayer(4, 12)
-
-def _test_GRU(inputs, units):
- data = keras.layers.Input(shape=(1, inputs))
- gru_out = keras.layers.GRU(units,
- return_state=True,
- recurrent_activation='sigmoid',
- activation='tanh')
- x = gru_out(data)
- keras_model = keras.models.Model(data, x)
- verify_keras_frontend(keras_model, need_transpose=False)
-
-def _test_GRU_MultiLayer(inputs, units):
- inputs = keras.layers.Input(shape=(1, inputs))
- layer = keras.layers.GRU(units,
- return_state=True,
- return_sequences=True,
- recurrent_activation='sigmoid',
- activation='tanh')
- outputs = layer(inputs)
- output, state = outputs[0], outputs[1:]
- output = keras.layers.GRU(units, recurrent_activation='sigmoid',
- activation='tanh')(output, initial_state=state)
- keras_model = keras.models.Model(inputs, output)
- verify_keras_frontend(keras_model, need_transpose=False)
-
-def test_forward_GRU():
- _test_GRU(2, 4)
- _test_GRU(4, 3)
- _test_GRU_MultiLayer(4, 4)
-
-if __name__ == '__main__':
- test_forward_elemwise_add()
- test_forward_activations()
- test_forward_dense()
- test_forward_pool()
- test_forward_conv()
- test_forward_upsample()
- test_forward_reshape()
- test_forward_crop()
- test_forward_vgg16()
- test_forward_xception()
- test_forward_resnet50()
- test_forward_mobilenet()
-
- test_forward_multi_inputs()
- test_forward_multi_outputs()
- test_forward_reuse_layers()
- test_forward_LSTM()
- test_forward_RNN()
- test_forward_GRU()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""MXNet and NNVM model zoo."""
-from __future__ import absolute_import
-from . import mlp, resnet, vgg, dqn, dcgan, squeezenet, inception_v3
-import nnvm.testing
-
-_num_class = 1000
-
-# mlp fc
-mx_mlp = mlp.get_symbol(_num_class)
-nnvm_mlp = nnvm.testing.mlp.get_workload(1, _num_class)[0]
-
-# resnet fc
-mx_resnet = {}
-nnvm_resnet = {}
-for num_layer in [18, 34, 50, 101, 152, 200, 269]:
- mx_resnet[num_layer] = resnet.get_symbol(_num_class, num_layer, '3,224,224')
- nnvm_resnet[num_layer] = nnvm.testing.resnet.get_workload(
- 1, _num_class, num_layers=num_layer)[0]
-
-# vgg fc
-mx_vgg = {}
-nnvm_vgg = {}
-for num_layer in [11, 13, 16, 19]:
- mx_vgg[num_layer] = vgg.get_symbol(_num_class, num_layer)
- nnvm_vgg[num_layer] = nnvm.testing.vgg.get_workload(
- 1, _num_class, num_layers=num_layer)[0]
-
-# squeezenet
-mx_squeezenet = {}
-nnvm_squeezenet = {}
-for version in ['1.0', '1.1']:
- mx_squeezenet[version] = squeezenet.get_symbol(version=version)
- nnvm_squeezenet[version] = nnvm.testing.squeezenet.get_workload(1, version=version)[0]
-
-# inception
-mx_inception_v3 = inception_v3.get_symbol()
-nnvm_inception_v3 = nnvm.testing.inception_v3.get_workload(1)[0]
-
-# dqn
-mx_dqn = dqn.get_symbol()
-nnvm_dqn = nnvm.testing.dqn.get_workload(1)[0]
-
-# dcgan generator
-mx_dcgan = dcgan.get_symbol()
-nnvm_dcgan = nnvm.testing.dcgan.get_workload(1)[0]
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-The MXNet symbol of DCGAN generator
-
-Adopted from:
-https://github.com/tqchen/mxnet-gan/blob/master/mxgan/generator.py
-
-Reference:
-Radford, Alec, Luke Metz, and Soumith Chintala.
-"Unsupervised representation learning with deep convolutional generative adversarial networks."
-arXiv preprint arXiv:1511.06434 (2015).
-"""
-
-import mxnet as mx
-
-def deconv2d(data, ishape, oshape, kshape, name, stride=(2, 2)):
- """a deconv layer that enlarges the feature map"""
- target_shape = (oshape[-2], oshape[-1])
- pad_y = (kshape[0] - 1) // 2
- pad_x = (kshape[1] - 1) // 2
- adj_y = (target_shape[0] + 2 * pad_y - kshape[0]) % stride[0]
- adj_x = (target_shape[1] + 2 * pad_x - kshape[1]) % stride[1]
-
- net = mx.sym.Deconvolution(data,
- kernel=kshape,
- stride=stride,
- pad=(pad_y, pad_x),
- adj=(adj_y, adj_x),
- num_filter=oshape[0],
- no_bias=True,
- name=name)
- return net
-
-def deconv2d_bn_relu(data, prefix, **kwargs):
- """a block of deconv + batch norm + relu"""
- eps = 1e-5 + 1e-12
-
- net = deconv2d(data, name="%s_deconv" % prefix, **kwargs)
- net = mx.sym.BatchNorm(net, eps=eps, name="%s_bn" % prefix)
- net = mx.sym.Activation(net, name="%s_act" % prefix, act_type='relu')
- return net
-
-def get_symbol(oshape=(3, 64, 64), ngf=128, code=None):
- """get symbol of dcgan generator"""
- assert oshape[-1] == 64, "Only support 64x64 image"
- assert oshape[-2] == 64, "Only support 64x64 image"
-
- code = mx.sym.Variable("data") if code is None else code
- net = mx.sym.FullyConnected(code, name="g1", num_hidden=ngf*8*4*4, no_bias=True, flatten=False)
- net = mx.sym.Activation(net, act_type='relu')
- # 4 x 4
- net = mx.sym.reshape(net, shape=(-1, ngf * 8, 4, 4))
- # 8 x 8
- net = deconv2d_bn_relu(
- net, ishape=(ngf * 8, 4, 4), oshape=(ngf * 4, 8, 8), kshape=(4, 4), prefix="g2")
- # 16x16
- net = deconv2d_bn_relu(
- net, ishape=(ngf * 4, 8, 8), oshape=(ngf * 2, 16, 16), kshape=(4, 4), prefix="g3")
- # 32x32
- net = deconv2d_bn_relu(
- net, ishape=(ngf * 2, 16, 16), oshape=(ngf, 32, 32), kshape=(4, 4), prefix="g4")
- # 64x64
- net = deconv2d(
- net, ishape=(ngf, 32, 32), oshape=oshape[-3:], kshape=(4, 4), name="g5_deconv")
- net = mx.sym.Activation(net, act_type='tanh')
- return net
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-The mxnet symbol of Nature DQN
-
-Reference:
-Mnih, Volodymyr, et al.
-"Human-level control through deep reinforcement learning."
-Nature 518.7540 (2015): 529.
-"""
-
-import mxnet as mx
-
-def get_symbol(num_action=18):
- data = mx.sym.Variable(name='data')
- net = mx.sym.Convolution(data, kernel=(8, 8), stride=(4, 4),
- num_filter=32, name='conv1')
- net = mx.sym.Activation(net, act_type='relu', name='relu1')
- net = mx.sym.Convolution(net, kernel=(4, 4), stride=(2, 2),
- num_filter=64, name='conv2')
- net = mx.sym.Activation(net, act_type='relu', name='relu2')
- net = mx.sym.Convolution(net, kernel=(3, 3), stride=(1, 1),
- num_filter=64, name='conv3')
- net = mx.sym.Activation(net, act_type='relu', name='relu3')
- net = mx.sym.FullyConnected(net, num_hidden=512, name='fc4')
- net = mx.sym.Activation(net, act_type='relu', name='relu4')
- net = mx.sym.FullyConnected(net, num_hidden=num_action, name='fc5', flatten=False)
-
- return net
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Inception V3, suitable for images with around 299 x 299
-
-Reference:
-Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." arXiv preprint arXiv:1512.00567 (2015).
-
-Adopted from https://github.com/apache/incubator-mxnet/blob/
- master/example/image-classification/symbols/inception-v3.py
-"""
-import mxnet as mx
-import numpy as np
-
-def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''):
- conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix))
- bn = mx.sym.BatchNorm(data=conv, eps=2e-5, name='%s%s_batchnorm' % (name, suffix))
- act = mx.sym.Activation(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix))
- return act
-
-
-def Inception7A(data,
- num_1x1,
- num_3x3_red, num_3x3_1, num_3x3_2,
- num_5x5_red, num_5x5,
- pool, proj,
- name):
- tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name))
- tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv')
- tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1')
- tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv')
- tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1')
- tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2')
- pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
- cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv')
- concat = mx.sym.Concat(*[tower_1x1, tower_5x5, tower_3x3, cproj], name='ch_concat_%s_chconcat' % name)
- return concat
-
-# First Downsample
-def Inception7B(data,
- num_3x3,
- num_d3x3_red, num_d3x3_1, num_d3x3_2,
- pool,
- name):
- tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name))
- tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv')
- tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1')
- tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2')
- pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0,0), pool_type="max", name=('max_pool_%s_pool' % name))
- concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name)
- return concat
-
-def Inception7C(data,
- num_1x1,
- num_d7_red, num_d7_1, num_d7_2,
- num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4,
- pool, proj,
- name):
- tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
- tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv')
- tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1')
- tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2')
- tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv')
- tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1')
- tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2')
- tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3')
- tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4')
- pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
- cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv')
- # concat
- concat = mx.sym.Concat(*[tower_1x1, tower_d7, tower_q7, cproj], name='ch_concat_%s_chconcat' % name)
- return concat
-
-def Inception7D(data,
- num_3x3_red, num_3x3,
- num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3,
- pool,
- name):
- tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), suffix='_conv')
- tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0,0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1')
- tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), suffix='_conv')
- tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1')
- tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2')
- tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3')
- pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
- # concat
- concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name='ch_concat_%s_chconcat' % name)
- return concat
-
-def Inception7E(data,
- num_1x1,
- num_d3_red, num_d3_1, num_d3_2,
- num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2,
- pool, proj,
- name):
- tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
- tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv')
- tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv')
- tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1')
- tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), suffix='_conv')
- tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1')
- tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv')
- tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1')
- pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
- cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv')
- # concat
- concat = mx.sym.Concat(*[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name)
- return concat
-
-def get_symbol(num_classes=1000, **kwargs):
- data = mx.sym.Variable(name="data")
- # stage 1
- conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv")
- conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1")
- conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2")
- pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool")
- # stage 2
- conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3")
- conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4")
- pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1")
-
- # # stage 3
- in3a = Inception7A(pool1, 64,
- 64, 96, 96,
- 48, 64,
- "avg", 32, "mixed")
- in3b = Inception7A(in3a, 64,
- 64, 96, 96,
- 48, 64,
- "avg", 64, "mixed_1")
- in3c = Inception7A(in3b, 64,
- 64, 96, 96,
- 48, 64,
- "avg", 64, "mixed_2")
- in3d = Inception7B(in3c, 384,
- 64, 96, 96,
- "max", "mixed_3")
- # stage 4
- in4a = Inception7C(in3d, 192,
- 128, 128, 192,
- 128, 128, 128, 128, 192,
- "avg", 192, "mixed_4")
- in4b = Inception7C(in4a, 192,
- 160, 160, 192,
- 160, 160, 160, 160, 192,
- "avg", 192, "mixed_5")
- in4c = Inception7C(in4b, 192,
- 160, 160, 192,
- 160, 160, 160, 160, 192,
- "avg", 192, "mixed_6")
- in4d = Inception7C(in4c, 192,
- 192, 192, 192,
- 192, 192, 192, 192, 192,
- "avg", 192, "mixed_7")
- in4e = Inception7D(in4d, 192, 320,
- 192, 192, 192, 192,
- "max", "mixed_8")
- # stage 5
- in5a = Inception7E(in4e, 320,
- 384, 384, 384,
- 448, 384, 384, 384,
- "avg", 192, "mixed_9")
- in5b = Inception7E(in5a, 320,
- 384, 384, 384,
- 448, 384, 384, 384,
- "max", 192, "mixed_10")
- # pool
- pool = mx.sym.Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", name="global_pool")
- flatten = mx.sym.Flatten(data=pool, name="flatten")
- fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1', flatten=False)
- softmax = mx.sym.SoftmaxOutput(data=fc1, name='softmax')
- return softmax
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-a simple multilayer perceptron
-"""
-import mxnet as mx
-
-def get_symbol(num_classes=10, **kwargs):
- data = mx.symbol.Variable('data')
- data = mx.sym.Flatten(data=data)
- try:
- fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128, flatten=False)
- act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
- fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64, flatten=False)
- act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
- fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes, flatten=False)
- mlp = mx.symbol.softmax(data = fc3, name = 'softmax')
- except:
- fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
- act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
- fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
- act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
- fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes)
- mlp = mx.symbol.softmax(data = fc3, name = 'softmax')
- return mlp
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-'''
-Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
-Original author Wei Wu
-
-Implemented the following paper:
-
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
-'''
-import mxnet as mx
-import numpy as np
-
-def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
- """Return ResNet Unit symbol for building ResNet
- Parameters
- ----------
- data : str
- Input data
- num_filter : int
- Number of output channels
- bnf : int
- Bottle neck channels factor with regard to num_filter
- stride : tuple
- Stride used in convolution
- dim_match : Boolean
- True means channel number between input and output is the same, otherwise means differ
- name : str
- Base name of the operators
- workspace : int
- Workspace used in convolution operator
- """
- if bottle_neck:
- bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
- act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
- conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.25), kernel=(1,1), stride=stride, pad=(0,0),
- no_bias=True, workspace=workspace, name=name + '_conv1')
- bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
- act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
- conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(1,1),
- no_bias=True, workspace=workspace, name=name + '_conv2')
- bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
- act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3')
- conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
- workspace=workspace, name=name + '_conv3')
- if dim_match:
- shortcut = data
- else:
- shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
- workspace=workspace, name=name+'_sc')
- if memonger:
- shortcut._set_attr(mirror_stage='True')
- return conv3 + shortcut
- else:
- bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
- act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
- conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1),
- no_bias=True, workspace=workspace, name=name + '_conv1')
- bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
- act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
- conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
- no_bias=True, workspace=workspace, name=name + '_conv2')
- if dim_match:
- shortcut = data
- else:
- shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
- workspace=workspace, name=name+'_sc')
- if memonger:
- shortcut._set_attr(mirror_stage='True')
- return conv2 + shortcut
-
-def resnet(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, dtype='float32', memonger=False):
- """Return ResNet symbol of
- Parameters
- ----------
- units : list
- Number of units in each stage
- num_stages : int
- Number of stage
- filter_list : list
- Channel size of each stage
- num_classes : int
- Ouput size of symbol
- dataset : str
- Dataset type, only cifar10 and imagenet supports
- workspace : int
- Workspace used in convolution operator
- dtype : str
- Precision (float32 or float16)
- """
- num_unit = len(units)
- assert(num_unit == num_stages)
- data = mx.sym.Variable(name='data')
- if dtype == 'float32':
- # data = mx.sym.identity(data=data, name='id')
- data = data
- else:
- if dtype == 'float16':
- data = mx.sym.Cast(data=data, dtype=np.float16)
- data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
- (nchannel, height, width) = image_shape
- if height <= 32: # such as cifar10
- body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1),
- no_bias=True, name="conv0", workspace=workspace)
- else: # often expected to be 224 such as imagenet
- body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),
- no_bias=True, name="conv0", workspace=workspace)
- body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
- body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
- body = mx.sym.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
-
- for i in range(num_stages):
- body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
- name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace,
- memonger=memonger)
- for j in range(units[i]-1):
- body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
- bottle_neck=bottle_neck, workspace=workspace, memonger=memonger)
- bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
- relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
- # Although kernel is not used here when global_pool=True, we should put one
- pool1 = mx.sym.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
- flat = mx.sym.Flatten(data=pool1)
- try:
- fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1', flatten=False)
- except:
- fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
- if dtype == 'float16':
- fc1 = mx.sym.Cast(data=fc1, dtype=np.float32)
- return mx.sym.softmax(data=fc1, name='softmax')
-
-def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, dtype='float32', **kwargs):
- """
- Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
- Original author Wei Wu
- """
- image_shape = [int(l) for l in image_shape.split(',')]
- (nchannel, height, width) = image_shape
- if height <= 28:
- num_stages = 3
- if (num_layers-2) % 9 == 0 and num_layers >= 164:
- per_unit = [(num_layers-2)//9]
- filter_list = [16, 64, 128, 256]
- bottle_neck = True
- elif (num_layers-2) % 6 == 0 and num_layers < 164:
- per_unit = [(num_layers-2)//6]
- filter_list = [16, 16, 32, 64]
- bottle_neck = False
- else:
- raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
- units = per_unit * num_stages
- else:
- if num_layers >= 50:
- filter_list = [64, 256, 512, 1024, 2048]
- bottle_neck = True
- else:
- filter_list = [64, 64, 128, 256, 512]
- bottle_neck = False
- num_stages = 4
- if num_layers == 18:
- units = [2, 2, 2, 2]
- elif num_layers == 34:
- units = [3, 4, 6, 3]
- elif num_layers == 50:
- units = [3, 4, 6, 3]
- elif num_layers == 101:
- units = [3, 4, 23, 3]
- elif num_layers == 152:
- units = [3, 8, 36, 3]
- elif num_layers == 200:
- units = [3, 24, 36, 3]
- elif num_layers == 269:
- units = [3, 30, 48, 8]
- else:
- raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
-
- return resnet(units = units,
- num_stages = num_stages,
- filter_list = filter_list,
- num_classes = num_classes,
- image_shape = image_shape,
- bottle_neck = bottle_neck,
- workspace = conv_workspace,
- dtype = dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-import mxnet as mx
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
- net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
- left = _make_fire_conv(net, expand1x1_channels, 1, 0)
- right = _make_fire_conv(net, expand3x3_channels, 3, 1)
- # NOTE : Assume NCHW layout here
- net = mx.sym.concat(left, right, dim=1)
-
- return net
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
- net = mx.sym.Convolution(net, num_filter=channels, kernel=(kernel_size, kernel_size),
- pad=(padding, padding))
- net = mx.sym.Activation(net, act_type='relu')
- return net
-
-# Net
-def get_symbol(num_classes=1000, version='1.0', **kwargs):
- """Get symbol of SqueezeNet
-
- Parameters
- ----------
- num_classes: int
- The number of classification results
-
- version : str, optional
- "1.0" or "1.1" of SqueezeNet
- """
- assert version in ['1.0', '1.1'], ("Unsupported SqueezeNet version {version}:"
- "1.0 or 1.1 expected".format(version=version))
- net = mx.sym.Variable("data")
- if version == '1.0':
- net = mx.sym.Convolution(net, num_filter=96, kernel=(7, 7), stride=(2, 2), pad=(3, 3))
- net = mx.sym.Activation(net, act_type='relu')
- net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
- net = _make_fire(net, 16, 64, 64)
- net = _make_fire(net, 16, 64, 64)
- net = _make_fire(net, 32, 128, 128)
- net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
- net = _make_fire(net, 32, 128, 128)
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 64, 256, 256)
- net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
- net = _make_fire(net, 64, 256, 256)
- else:
- net = mx.sym.Convolution(net, num_filter=64, kernel=(3, 3), stride=(2, 2), pad=(1, 1))
- net = mx.sym.Activation(net, act_type='relu')
- net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
- net = _make_fire(net, 16, 64, 64)
- net = _make_fire(net, 16, 64, 64)
- net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
- net = _make_fire(net, 32, 128, 128)
- net = _make_fire(net, 32, 128, 128)
- net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 64, 256, 256)
- net = _make_fire(net, 64, 256, 256)
- net = mx.sym.Dropout(net, p=0.5)
- net = mx.sym.Convolution(net, num_filter=num_classes, kernel=(1, 1))
- net = mx.sym.Activation(net, act_type='relu')
- net = mx.sym.Pooling(data=net, global_pool=True, kernel=(13, 13), pool_type='avg')
- net = mx.sym.flatten(net)
- return mx.sym.softmax(net)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""References:
-
-Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for
-large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
-"""
-
-import mxnet as mx
-import numpy as np
-
-def get_feature(internel_layer, layers, filters, batch_norm = False, **kwargs):
- for i, num in enumerate(layers):
- for j in range(num):
- internel_layer = mx.sym.Convolution(data = internel_layer, kernel=(3, 3), pad=(1, 1), num_filter=filters[i], name="conv%s_%s" %(i + 1, j + 1))
- if batch_norm:
- internel_layer = mx.symbol.BatchNorm(data=internel_layer, name="bn%s_%s" %(i + 1, j + 1))
- internel_layer = mx.sym.Activation(data=internel_layer, act_type="relu", name="relu%s_%s" %(i + 1, j + 1))
- internel_layer = mx.sym.Pooling(data=internel_layer, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool%s" %(i + 1))
- return internel_layer
-
-def get_classifier(input_data, num_classes, **kwargs):
- flatten = mx.sym.Flatten(data=input_data, name="flatten")
- try:
- fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6", flatten=False)
- relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
- drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
- fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7", flatten=False)
- relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
- drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
- fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8", flatten=False)
- except:
- fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6")
- relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
- drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
- fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7")
- relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
- drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
- fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8")
- return fc8
-
-def get_symbol(num_classes, num_layers=11, batch_norm=False, dtype='float32', **kwargs):
- """
- Parameters
- ----------
- num_classes : int, default 1000
- Number of classification classes.
- num_layers : int
- Number of layers for the variant of densenet. Options are 11, 13, 16, 19.
- batch_norm : bool, default False
- Use batch normalization.
- dtype: str, float32 or float16
- Data precision.
- """
- vgg_spec = {11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]),
- 13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]),
- 16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
- 19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512])}
- if num_layers not in vgg_spec:
- raise ValueError("Invalide num_layers {}. Possible choices are 11,13,16,19.".format(num_layers))
- layers, filters = vgg_spec[num_layers]
- data = mx.sym.Variable(name="data")
- if dtype == 'float16':
- data = mx.sym.Cast(data=data, dtype=np.float16)
- feature = get_feature(data, layers, filters, batch_norm)
- classifier = get_classifier(feature, num_classes)
- if dtype == 'float16':
- classifier = mx.sym.Cast(data=classifier, dtype=np.float32)
- symbol = mx.sym.softmax(data=classifier, name='softmax')
- return symbol
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import topi
-import tvm
-from tvm.contrib import graph_runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm import frontend
-import mxnet as mx
-from mxnet import gluon
-from mxnet.gluon.model_zoo import vision
-import model_zoo
-
-
-def verify_mxnet_frontend_impl(mx_symbol, data_shape=(1, 3, 224, 224), out_shape=(1, 1000),
- gluon_impl=False, name=None, dtype='float32'):
- """Use name different from test to avoid pytest picking it up"""
- if gluon_impl:
- def get_gluon_output(name, x):
- net = vision.get_model(name)
- net.collect_params().initialize(mx.init.Xavier())
- net_sym = gluon.nn.SymbolBlock(outputs=net(mx.sym.var('data')),
- inputs=mx.sym.var('data'),
- params=net.collect_params())
- out = net_sym(mx.nd.array(x.astype(dtype))).asnumpy()
- return out, net_sym
- else:
- def get_mxnet_output(symbol, x, dtype='float32'):
- from collections import namedtuple
- Batch = namedtuple('Batch', ['data'])
- mod = mx.mod.Module(symbol, label_names=None)
- mod.bind(data_shapes=[('data', x.shape)], for_training=False)
- mod.init_params()
- mod.forward(Batch([mx.nd.array(x.astype(dtype))]))
- out = mod.get_outputs()[0].asnumpy()
- args, auxs = mod.get_params()
- return out, args, auxs
-
- def get_tvm_output(symbol, x, args, auxs, target, ctx, dtype='float32'):
- if gluon_impl:
- new_sym, params = frontend.from_mxnet(symbol)
- else:
- new_sym, params = frontend.from_mxnet(symbol, args, auxs)
-
- dshape = x.shape
- shape_dict = {'data': dshape}
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- m.set_input("data", tvm.nd.array(x.astype(dtype)))
- m.set_input(**params)
- m.run()
- # get outputs
- out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
- return out.asnumpy()
-
- # random input
- x = np.random.uniform(size=data_shape)
- if gluon_impl:
- gluon_out, gluon_sym = get_gluon_output(name, x)
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(gluon_sym, x, None, None, target, ctx, dtype)
- tvm.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5)
- else:
- mx_out, args, auxs = get_mxnet_output(mx_symbol, x, dtype)
- assert "data" not in args
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, ctx, dtype)
- tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_mlp():
- mlp = model_zoo.mx_mlp
- verify_mxnet_frontend_impl(mlp)
-
-def test_forward_vgg():
- for n in [11]:
- mx_sym = model_zoo.mx_vgg[n]
- verify_mxnet_frontend_impl(mx_sym)
-
-def test_forward_resnet():
- for n in [18]:
- mx_sym = model_zoo.mx_resnet[n]
- verify_mxnet_frontend_impl(mx_sym)
-
-def test_forward_elu():
- data = mx.sym.var('data')
- data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
- mx_sym = mx.sym.LeakyReLU(data, act_type='elu')
- verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_rrelu():
- data = mx.sym.var('data')
- data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
- mx_sym = mx.sym.LeakyReLU(data, act_type='rrelu', lower_bound=0.3, upper_bound=0.7)
- verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_prelu():
- data = mx.sym.var('data')
- data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
- mx_sym = mx.sym.LeakyReLU(data, act_type='prelu')
- verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_softrelu():
- data = mx.sym.var('data')
- data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
- mx_sym = mx.sym.Activation(data, act_type='softrelu')
- verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_fc_flatten():
- # test flatten=True option in mxnet 0.11.1
- data = mx.sym.var('data')
- try:
- mx_sym = mx.sym.FullyConnected(data, num_hidden=100, flatten=True)
- verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100))
- mx_sym = mx.sym.FullyConnected(mx.sym.Flatten(data), num_hidden=100, flatten=False)
- verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100))
- except:
- pass
-
-def test_forward_clip():
- data = mx.sym.var('data')
- data = mx.sym.concat(data, -data, dim=1) # negative part explicitly
- mx_sym = mx.sym.clip(data, a_min=0, a_max=1)
- verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_split():
- data = mx.sym.var('data')
- mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=False)
- verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 1, 2, 1))
-
-def test_forward_split_squeeze():
- data = mx.sym.var('data')
- mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=True)
- verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 2, 1))
-
-def test_forward_expand_dims():
- data = mx.sym.var('data')
- mx_sym = mx.sym.expand_dims(data, axis=1)
- verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 1, 3, 4))
-
-def test_forward_pooling():
- data = mx.sym.var('data')
- mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='avg')
- verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
-
- mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='max')
- verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
-
-def test_forward_lrn():
- data = mx.sym.var('data')
- mx_sym = mx.sym.LRN(data, alpha=2, beta=2, knorm=1, nsize=5)
- verify_mxnet_frontend_impl(mx_sym, (1, 10, 24, 24), (1, 10, 24, 24))
-
-def test_forward_ones():
- data = mx.sym.var('data')
- ones = mx.sym.ones(shape=(2, 3, 4), dtype='float32')
- mx_sym = mx.sym.elemwise_add(data, ones)
- verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-def test_forward_zeros():
- data = mx.sym.var('data')
- zeros = mx.sym.zeros(shape=(2, 3, 4), dtype='float32')
- mx_sym = mx.sym.elemwise_add(data, zeros)
- verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-def test_forward_ones_like():
- data = mx.sym.var('data')
- mx_sym = mx.sym.ones_like(data, dtype='float32')
- verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-def test_forward_zeros_like():
- data = mx.sym.var('data')
- mx_sym = mx.sym.zeros_like(data, dtype='float32')
- verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-def test_forward_argmax():
- data = mx.sym.var('data')
- mx_sym = mx.sym.argmax(data, axis=1)
- verify_mxnet_frontend_impl(mx_sym, (5, 3), (5,))
-
-def test_forward_argmin():
- data = mx.sym.var('data')
- mx_sym = mx.sym.argmin(data, axis=0)
- verify_mxnet_frontend_impl(mx_sym, (5, 4), (4,))
-
-def test_forward_where():
- cond = mx.sym.var('cond')
- x = mx.sym.var('x')
- y = mx.sym.var('y')
- dshape = (2, 2)
- dtype = 'float32'
- mx_sym = mx.sym.where(cond, x, y)
- np_cond = np.array([[0, 1], [-1, 0]]).astype(dtype)
- np_x = np.random.uniform(size=dshape).astype(dtype)
- np_y = np.random.uniform(size=dshape).astype(dtype)
- mx_cond = mx.nd.array(np_cond)
- mx_x = mx.nd.array(np_x)
- mx_y = mx.nd.array(np_y)
- mod = mx.mod.Module(mx_sym, label_names=None, data_names=['cond', 'x', 'y'])
- mod.bind(data_shapes=[('cond', dshape), ('x', dshape), ('y', dshape)], for_training=False)
- mod.init_params()
- args, auxs = mod.get_params()
- mx_out = mx.nd.where(mx_cond, mx_x, mx_y).asnumpy()
- out_shape = dshape
- new_sym, params = frontend.from_mxnet(mx_sym, args, auxs)
- shape_dict = {'cond': dshape, 'x': dshape, 'y': dshape}
- for target, ctx in ctx_list():
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- m.set_input("cond", tvm.nd.array(np_cond))
- m.set_input("x", tvm.nd.array(np_x))
- m.set_input("y", tvm.nd.array(np_y))
- m.set_input(**params)
- m.run()
- # get outputs
- tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
- tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_slice():
- data = mx.sym.var('data')
- mx_sym = mx.sym.slice(data, begin=(0, 1), end=(2, 4))
- verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 3))
- mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2))
- verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2))
-
-def test_forward_maximum():
- a = mx.sym.var('a')
- b = mx.sym.var('b')
- dshape = (10, 20)
- dtype = 'float32'
- mx_sym = mx.sym._internal._maximum(a, b)
- np_a = np.random.uniform(size=dshape).astype(dtype)
- np_b = np.random.uniform(size=dshape).astype(dtype)
- mx_a = mx.nd.array(np_a)
- mx_b = mx.nd.array(np_b)
- mod = mx.mod.Module(mx_sym, label_names=None, data_names=['a', 'b'])
- mod.bind(data_shapes=[('a', dshape), ('b', dshape)], for_training=False)
- mod.init_params()
- args, auxs = mod.get_params()
- mx_out = mx.nd._internal._maximum(mx_a, mx_b).asnumpy()
- out_shape = dshape
- new_sym, params = frontend.from_mxnet(mx_sym, args, auxs)
- shape_dict = {'a': dshape, 'b': dshape}
- for target, ctx in ctx_list():
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- m.set_input("a", tvm.nd.array(np_a))
- m.set_input("b", tvm.nd.array(np_b))
- m.set_input(**params)
- m.run()
- # get outputs
- tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
- tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_minimum():
- a = mx.sym.var('a')
- b = mx.sym.var('b')
- dshape = (10, 20)
- dtype = 'float32'
- mx_sym = mx.sym._internal._minimum(a, b)
- np_a = np.random.uniform(size=dshape).astype(dtype)
- np_b = np.random.uniform(size=dshape).astype(dtype)
- mx_a = mx.nd.array(np_a)
- mx_b = mx.nd.array(np_b)
- mod = mx.mod.Module(mx_sym, label_names=None, data_names=['a', 'b'])
- mod.bind(data_shapes=[('a', dshape), ('b', dshape)], for_training=False)
- mod.init_params()
- args, auxs = mod.get_params()
- mx_out = mx.nd._internal._minimum(mx_a, mx_b).asnumpy()
- out_shape = dshape
- new_sym, params = frontend.from_mxnet(mx_sym, args, auxs)
- shape_dict = {'a': dshape, 'b': dshape}
- for target, ctx in ctx_list():
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- m.set_input("a", tvm.nd.array(np_a))
- m.set_input("b", tvm.nd.array(np_b))
- m.set_input(**params)
- m.run()
- # get outputs
- tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
- tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-if __name__ == '__main__':
- test_forward_mlp()
- test_forward_vgg()
- test_forward_resnet()
- test_forward_elu()
- test_forward_rrelu()
- test_forward_prelu()
- test_forward_softrelu()
- test_forward_fc_flatten()
- test_forward_clip()
- test_forward_split()
- test_forward_split_squeeze()
- test_forward_expand_dims()
- test_forward_pooling()
- test_forward_lrn()
- test_forward_ones()
- test_forward_zeros()
- test_forward_ones_like()
- test_forward_zeros_like()
- test_forward_argmax()
- test_forward_argmin()
- test_forward_where()
- test_forward_slice()
- test_forward_maximum()
- test_forward_minimum()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import mxnet as mx
-import nnvm
-from nnvm.compiler import graph_util, graph_attr
-import model_zoo
-
-def compare_graph(sym1, sym2, ishape=(2, 3, 224, 224)):
- g1 = nnvm.graph.create(sym1)
- g2 = nnvm.graph.create(sym2)
- graph_attr.set_shape_inputs(g1, {'data':ishape})
- graph_attr.set_shape_inputs(g2, {'data':ishape})
- g1 = g1.apply("InferShape").apply("SimplifyInference")
- g2 = g2.apply("InferShape").apply("SimplifyInference")
- graph_util.check_graph_equal(g1, g2)
-
-def test_mlp():
- mx_sym = model_zoo.mx_mlp
- from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
- nnvm_sym = model_zoo.nnvm_mlp
- compare_graph(from_mx_sym, nnvm_sym)
-
-def test_vgg():
- for n in [11, 13, 16, 19]:
- mx_sym = model_zoo.mx_vgg[n]
- from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
- nnvm_sym = model_zoo.nnvm_vgg[n]
- compare_graph(from_mx_sym, nnvm_sym)
-
-def test_resnet():
- for n in [18, 34, 50, 101]:
- mx_sym = model_zoo.mx_resnet[n]
- from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
- nnvm_sym = model_zoo.nnvm_resnet[n]
- compare_graph(from_mx_sym, nnvm_sym)
-
-def test_squeezenet():
- for version in ['1.0', '1.1']:
- mx_sym = model_zoo.mx_squeezenet[version]
- from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
- nnvm_sym = model_zoo.nnvm_squeezenet[version]
- compare_graph(from_mx_sym, nnvm_sym)
-
-def test_inception_v3():
- mx_sym = model_zoo.mx_inception_v3
- from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
- nnvm_sym = model_zoo.nnvm_inception_v3
- compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 3, 299, 299))
-
-def test_dqn():
- mx_sym = model_zoo.mx_dqn
- from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
- nnvm_sym = model_zoo.nnvm_dqn
- compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 4, 84, 84))
-
-def test_dcgan():
- mx_sym = model_zoo.mx_dcgan
- from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
- nnvm_sym = model_zoo.nnvm_dcgan
- compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 100))
-
-def test_multi_outputs():
- def compose(F, **kwargs):
- x = F.sym.Variable('x')
- y = F.sym.Variable('y')
- z = F.sym.split(x, **kwargs)
- return F.sym.broadcast_sub(F.sym.broadcast_add(z[0], z[2]), y)
- mx_sym = compose(mx, num_outputs=3, axis=1)
- from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
- nnvm_sym = compose(nnvm, indices_or_sections=3, axis=1)
- compare_graph(from_mx_sym, nnvm_sym)
-
-if __name__ == '__main__':
- test_mlp()
- test_vgg()
- test_resnet()
- test_multi_outputs()
- test_dqn()
- test_dcgan()
- test_squeezenet()
- test_inception_v3()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Store for onnx examples and common models."""
-from __future__ import absolute_import as _abs
-import os
-import logging
-from .super_resolution import get_super_resolution
-from tvm.contrib.download import download_testdata
-
-
-URLS = {
- 'super_resolution.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/super_resolution_0.2.onnx',
- 'squeezenet1_1.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/squeezenet1_1_0.2.onnx',
- 'lenet.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/lenet_0.2.onnx',
- 'resnet18_1_0.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/b385b1b242dc89a35dd808235b885ed8a19aedc1/resnet18_1.0.onnx'}
-
-# download and add paths
-for k, v in URLS.items():
- name = k.split('.')[0]
- relpath = os.path.join('onnx', k)
- abspath = download_testdata(v, relpath, module='onnx')
- locals()[name] = abspath
-
-# symbol for graph comparison
-super_resolution_sym = get_super_resolution()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=unused-argument
-
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-from nnvm import symbol as sym
-from nnvm.testing.utils import create_workload
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
- net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
- left = _make_fire_conv(net, expand1x1_channels, 1, 0)
- right = _make_fire_conv(net, expand3x3_channels, 3, 1)
- # NOTE : Assume NCHW layout here
- net = sym.concatenate(left, right, axis=1)
-
- return net
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
- net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size),
- padding=(padding, padding))
- net = sym.relu(net)
- return net
-
-# Net
-def get_symbol(num_classes, version, **kwargs):
- """Get symbol of SqueezeNet
-
- Parameters
- ----------
- num_classes: int
- The number of classification results
-
- version : str, optional
- "1.0" or "1.1" of SqueezeNet
- """
- assert version == '1.1', ("Unsupported SqueezeNet version {version}:"
- "1.1 expected".format(version=version))
- net = sym.Variable("data")
-
- net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2))
- net = sym.relu(net)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 16, 64, 64)
- net = _make_fire(net, 16, 64, 64)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 32, 128, 128)
- net = _make_fire(net, 32, 128, 128)
- net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 48, 192, 192)
- net = _make_fire(net, 64, 256, 256)
- net = _make_fire(net, 64, 256, 256)
-
- net = sym.dropout(net, rate=0.5)
- net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1))
- net = sym.relu(net)
- net = sym.global_avg_pool2d(net)
- return sym.softmax(net, axis=1)
-
-def get_workload(batch_size=1, num_classes=1000, version='1.0',
- image_shape=(3, 224, 224), dtype="float32", **kwargs):
- """Get benchmark workload for SqueezeNet
-
- Parameters
- ----------
- batch_size : int
- The batch size used in the model
-
- num_classes : int, optional
- Number of classes
-
- version : str, optional
- "1.0" or "1.1" of SqueezeNet
-
- image_shape : tuple, optional
- The input image shape
-
- dtype : str, optional
- The data type
-
- kwargs : dict
- Extra arguments
-
- Returns
- -------
- net : nnvm.Symbol
- The computational graph
-
- params : dict of str to NDArray
- The parameters.
- """
- net = get_symbol(num_classes=num_classes, version=version, **kwargs)
- return create_workload(net, batch_size, image_shape, dtype)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""NNVM symbol corresponding to super_resolution.onnx example."""
-from nnvm import sym
-
-def get_super_resolution():
- factor = 3
- size = 224
- data = sym.Variable(name='9')
- conv1 = sym.conv2d(data, channels=64, kernel_size=(5, 5), padding=(2, 2), use_bias=False)
- relu1 = sym.relu(conv1 + sym.expand_dims(sym.Variable(name='2', shape=(64)), axis=1, num_newaxis=2))
- conv2 = sym.conv2d(relu1, channels=64, kernel_size=(3, 3), padding=(1, 1), use_bias=False)
- relu2 = sym.relu(conv2 + sym.expand_dims(sym.Variable(name='4', shape=(64)), axis=1, num_newaxis=2))
- conv3 = sym.conv2d(relu2, channels=32, kernel_size=(3, 3), padding=(1, 1), use_bias=False)
- relu3 = sym.relu(conv3 + sym.expand_dims(sym.Variable(name='6', shape=(32)), axis=1, num_newaxis=2))
- conv4 = sym.conv2d(relu3, channels=factor**2, kernel_size=(3, 3), padding=(1, 1), use_bias=False)
- conv4 = conv4 + sym.expand_dims(sym.Variable(name='8', shape=(factor**2)), axis=1, num_newaxis=2)
- # TODO(zhreshold): allow shape inference for batch size > 1
- r1 = sym.reshape(conv4, shape=(1, 1, factor, factor, size, size))
- t1 = sym.transpose(r1, axes=(0, 1, 4, 2, 5, 3))
- r2 = sym.reshape(t1, shape=(1, 1, size * factor, size * factor))
- return r2
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import math
-import nnvm
-import topi
-import topi.testing
-import tvm
-from tvm.contrib import graph_runtime
-from nnvm.testing.config import ctx_list
-import onnx
-from model_zoo import super_resolution, squeezenet1_1, lenet, resnet18_1_0
-from onnx import helper, TensorProto
-
-def get_tvm_output(graph_def, input_data, target, ctx, output_shape=None, output_dtype='float32'):
- """ Generic function to execute and get tvm output"""
-
- sym, params = nnvm.frontend.from_onnx(graph_def)
- target = 'llvm'
- if isinstance(input_data, list):
- input_names = {}
- shape_dict = {}
- dtype_dict = {}
- for i, _ in enumerate(input_data):
- input_names[i] = graph_def.graph.input[i].name
- shape_dict[input_names[i]] = input_data[i].shape
- dtype_dict[input_names[i]] = input_data[i].dtype
- else:
- input_names = graph_def.graph.input[0].name
- shape_dict = {input_names: input_data.shape}
- dtype_dict = {input_names: input_data.dtype}
-
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict,
- dtype=dtype_dict, params=params)
-
- ctx = tvm.cpu(0)
- from tvm.contrib import graph_runtime
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- if isinstance(input_data, list):
- for i, e in enumerate(input_names):
- m.set_input(input_names[i], tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
- else:
- m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
-
- m.set_input(**params)
- # execute
- m.run()
- # get outputs
- if isinstance(output_shape, list) and isinstance(output_dtype, list):
- tvm_output_list = []
- for i, _ in enumerate(output_shape):
- tvm_output = m.get_output(i)
- tvm_output_list.append(tvm_output.asnumpy())
- return tvm_output_list
- else:
- tvm_output = m.get_output(0)
- return tvm_output.asnumpy()
-
-def get_caffe2_output(model, x, dtype='float32'):
- import caffe2.python.onnx.backend
- prepared_backend = caffe2.python.onnx.backend.prepare(model)
- W = {model.graph.input[0].name: x.astype(dtype)}
- c2_out = prepared_backend.run(W)[0]
- return c2_out
-
-
-def verify_onnx_forward_impl(graph_file, data_shape, out_shape):
- dtype = 'float32'
- x = np.random.uniform(size=data_shape)
- model = onnx.load_model(graph_file)
- c2_out = get_caffe2_output(model, x, dtype)
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, x, target, ctx, out_shape, dtype)
- tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def verify_super_resolution_example():
- verify_onnx_forward_impl(super_resolution, (1, 1, 224, 224), (1, 1, 672, 672))
-
-def verify_squeezenet1_1():
- verify_onnx_forward_impl(squeezenet1_1, (1, 3, 224, 224), (1, 1000))
-
-def verify_lenet():
- verify_onnx_forward_impl(lenet, (1, 1, 28, 28), (1, 10))
-
-def verify_resnet18():
- verify_onnx_forward_impl(resnet18_1_0, (1, 3, 224, 224), (1, 1000))
-
-
-def test_reshape():
- in_shape = (4, 3, 3, 4)
- ref_shape = (3, 4, 4, 3)
-
- ref_array = np.array(ref_shape)
- ref_node = onnx.helper.make_node('Constant',
- inputs=[],
- outputs=['ref_in'],
- value=onnx.helper.make_tensor(name = 'const_tensor',
- data_type = onnx.TensorProto.INT32,
- dims = ref_array.shape,
- vals = ref_array.flatten().astype(int)))
- reshape_node = helper.make_node("Reshape", ["in", "ref_in"], ["out"])
-
- graph = helper.make_graph([ref_node, reshape_node],
- "reshape_test",
- inputs = [helper.make_tensor_value_info("in",
- TensorProto.FLOAT, list(in_shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(ref_shape))])
-
- model = helper.make_model(graph, producer_name='reshape_test')
-
- for target, ctx in ctx_list():
- x = np.random.uniform(size=in_shape).astype('int32')
- tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')
-
- tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
-
-def test_reshape_like():
- in_shape = (4, 3, 3, 4)
- ref_shape = (3, 4, 4, 3)
-
- ref_array = np.random.uniform(size=ref_shape).astype('float32')
- ref_node = onnx.helper.make_node('Constant',
- inputs=[],
- outputs=['ref_in'],
- value=onnx.helper.make_tensor(name = 'const_tensor',
- data_type = onnx.TensorProto.FLOAT,
- dims = ref_array.shape,
- vals = ref_array.flatten().astype(float)))
- copy_node = helper.make_node("Identity", ["ref_in"], ["copy_in"])
- reshape_node = helper.make_node("Reshape", ["in", "copy_in"], ["out"])
-
- graph = helper.make_graph([ref_node, copy_node, reshape_node],
- "reshape_like_test",
- inputs = [helper.make_tensor_value_info("in",
- TensorProto.FLOAT, list(in_shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(ref_shape))])
-
- model = helper.make_model(graph, producer_name='reshape_like_test')
-
- for target, ctx in ctx_list():
- x = np.random.uniform(size=in_shape).astype('float32')
- tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')
-
- tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
-
-def _test_power_iteration(x_shape, y_shape):
- if isinstance(y_shape, int):
- y_shape = [y_shape]
-
- x = np.random.uniform(size=x_shape).astype(np.float32)
- y = np.random.uniform(size=y_shape).astype(np.float32)
-
- np_res = np.power(x, y).astype(np.float32)
-
- res = helper.make_node("Pow", ['x', 'y'], ['out'])
-
- graph = helper.make_graph([res],
- 'power_test',
- inputs = [helper.make_tensor_value_info("x",
- TensorProto.FLOAT, list(x_shape)),
- helper.make_tensor_value_info("y",
- TensorProto.FLOAT, list(y_shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(np_res.shape))])
-
- model = helper.make_model(graph, producer_name='power_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [x, y], target, ctx, np_res.shape)
- tvm.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_power():
- _test_power_iteration((1, 3), (1))
- _test_power_iteration((2, 3), (2, 3))
- _test_power_iteration((2, 3), (1, 3))
-
-def test_squeeze():
- in_shape = (1, 3, 1, 3, 1, 1)
- out_shape = (3, 3)
- y = helper.make_node("Squeeze", ['in'], ['out'], axes=[0, 2, 4, 5])
-
- graph = helper.make_graph([y],
- 'squeeze_test',
- inputs = [helper.make_tensor_value_info("in",
- TensorProto.FLOAT, list(in_shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(out_shape))])
-
- model = helper.make_model(graph, producer_name='squeeze_test')
-
- for target, ctx in ctx_list():
- x = np.random.uniform(size=in_shape).astype('float32')
- tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')
-
- tvm.testing.assert_allclose(out_shape, tvm_out.shape)
-
-def test_unsqueeze():
- in_shape = (3, 3)
- axis = (0, 3, 4)
- out_shape = (1, 3, 3, 1, 1)
- y = helper.make_node("Unsqueeze", ['in'], ['out'], axes=list(axis))
-
- graph = helper.make_graph([y],
- 'squeeze_test',
- inputs = [helper.make_tensor_value_info("in",
- TensorProto.FLOAT, list(in_shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(out_shape))])
-
- model = helper.make_model(graph, producer_name='squeeze_test')
-
- for target, ctx in ctx_list():
- x = np.random.uniform(size=in_shape).astype('float32')
- tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')
-
- tvm.testing.assert_allclose(out_shape, tvm_out.shape)
-
-def verify_gather(in_shape, indices, axis, dtype):
- x = np.random.uniform(size=in_shape).astype(dtype)
- indices = np.array(indices, dtype="int32")
- out_np = np.take(x, indices, axis=axis)
-
- y = helper.make_node("Gather", ['in', 'indices'], ['out'], axis=axis)
-
- graph = helper.make_graph([y],
- 'gather_test',
- inputs = [helper.make_tensor_value_info("in",
- TensorProto.FLOAT, list(in_shape)),
- helper.make_tensor_value_info("indices",
- TensorProto.INT32, list(indices.shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(out_np.shape))])
- model = helper.make_model(graph, producer_name='gather_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [x, indices], target, ctx, out_np.shape)
- tvm.testing.assert_allclose(out_np, tvm_out)
-
-def test_gather():
- verify_gather((4,), [1], 0, 'int32')
- verify_gather((1,4), [0], 0, 'int32')
- verify_gather((4,), [[[1,0],[0,1]]], 0, 'float32')
- verify_gather((2,2), [[[1,0],[0,1]]], 1, 'int32')
- verify_gather((3,3,3), [[[1,0]]], -1, 'int32')
- verify_gather((4,3,5,6), [[2,1,0,0]], 0, 'float32')
-
-def _test_slice_iteration(indata, outdata, starts, ends, axes=None):
- if axes:
- y = helper.make_node("Slice", ['in'], ['out'], axes=axes, starts=starts, ends=ends)
- else:
- y = helper.make_node("Slice", ['in'], ['out'], starts=starts, ends=ends)
-
- graph = helper.make_graph([y],
- 'slice_test',
- inputs = [helper.make_tensor_value_info("in",
- TensorProto.FLOAT, list(indata.shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(outdata.shape))])
-
- model = helper.make_model(graph, producer_name='slice_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32')
-
- tvm.testing.assert_allclose(outdata, tvm_out)
-
-def test_slice():
- x = np.random.randn(20, 10, 5).astype(np.float32)
- _test_slice_iteration(x, x[0:3, 0:10], (0, 0), (3, 10), (0, 1))
- _test_slice_iteration(x, x[:, :, 3:4], (0, 0, 3), (20, 10, 4))
- _test_slice_iteration(x, x[:, 1:1000], (1), (1000), (1))
- _test_slice_iteration(x, x[:, 0:-1], (0), (-1), (1))
-
-def _test_onnx_op_elementwise(inshape, outfunc, npargs, dtype, opname, kwargs, rtol=1e-7, atol=1e-7):
- indata = np.random.uniform(-1, 1, size=inshape).astype(dtype)
- outdata = outfunc(indata, **npargs)
-
- y = helper.make_node(opname, ['in'], ['out'], **kwargs)
-
- graph = helper.make_graph([y],
- opname+'_test',
- inputs = [helper.make_tensor_value_info("in",
- TensorProto.FLOAT, list(indata.shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(outdata.shape))])
-
- model = helper.make_model(graph, producer_name=opname+'_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, dtype)
-
- tvm.testing.assert_allclose(outdata, tvm_out, rtol=rtol, atol=atol)
-
-def test_floor():
- _test_onnx_op_elementwise((2, 4, 5, 6), np.floor, {}, 'float32', 'Floor', {})
-
-def test_ceil():
- _test_onnx_op_elementwise((2, 4, 5, 6), np.ceil, {}, 'float32', 'Ceil', {})
-
-def test_clip():
- _test_onnx_op_elementwise((2, 4, 5, 6),
- np.clip,
- {'a_min': -1.0, 'a_max': 1.0},
- 'float32',
- 'Clip',
- {'min': -1.0, 'max': 1.0})
-
-def test_matmul():
- a_shape = (4, 3)
- b_shape = (3, 4)
-
- a_array = np.random.uniform(size=a_shape).astype('float32')
- b_array = np.random.uniform(size=b_shape).astype('float32')
- out_np = np.matmul(a_array, b_array)
-
- mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-
- graph = helper.make_graph([mul_node],
- "matmul_test",
- inputs = [helper.make_tensor_value_info("a",
- TensorProto.FLOAT, list(a_shape)),
- helper.make_tensor_value_info("b",
- TensorProto.FLOAT, list(b_shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(out_np.shape))])
-
- model = helper.make_model(graph, producer_name='matmul_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [a_array, b_array], target, ctx, out_np.shape)
- tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None):
- in_array = np.random.uniform(size=shape).astype(dtype)
-
- if alpha == None and beta == None and bias==None:
- alpha = 0.0001
- beta = 0.75
- bias = 1.0
- node = onnx.helper.make_node('LRN', inputs=['in'], outputs=['out'], size=nsize)
- else:
- node = onnx.helper.make_node('LRN', inputs=['in'], outputs=['out'], alpha=alpha,
- beta=beta, bias=bias, size=nsize)
-
- graph = helper.make_graph([node],
- "lrn_test",
- inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(shape))],
- outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(shape))])
- model = helper.make_model(graph, producer_name='lrn_test')
-
- def _get_python_lrn():
- square_sum = np.zeros(shape).astype(dtype)
- for n, c, h, w in np.ndindex(in_array.shape):
- square_sum[n, c, h, w] = sum(in_array[n,
- max(0, c - int(math.floor((nsize - 1) / 2))): \
- min(5, c + int(math.ceil((nsize - 1) / 2)) + 1),
- h,
- w] ** 2)
- py_out = in_array / ((bias + (alpha / nsize) * square_sum) ** beta)
- return py_out
-
- for target, ctx in ctx_list():
- new_sym, params = nnvm.frontend.from_onnx(model)
-
- input_name = model.graph.input[0].name
- shape_dict = {input_name: in_array.shape}
- dtype_dict = {input_name: dtype}
- graph, lib, params = nnvm.compiler.build(new_sym, target,
- shape_dict, dtype_dict, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- m.set_input(input_name, tvm.nd.array(in_array.astype(dtype)))
- m.set_input(**params)
- m.run()
- # get outputs
- tvm_out = m.get_output(0, tvm.nd.empty(shape, dtype))
- py_out = _get_python_lrn()
- tvm.testing.assert_allclose(py_out, tvm_out.asnumpy(), rtol=1e-5, atol=1e-5)
-
-def test_lrn():
- verify_lrn((5, 5, 5, 5), 3, 'float32')
- verify_lrn((5, 5, 5, 5), 3, 'float32', alpha=0.0002, beta=0.5, bias=2.0)
-
-def _test_upsample_nearest():
- scale = 2
- in_shape = (1, 1, 3, 3)
- out_shape = (1, 1, 3*scale, 3*scale)
- y = helper.make_node("Upsample", ['in'], ['out'], mode='nearest', scales=[1.0, 1.0, 2.0, 2.0])
-
- in_array = np.random.uniform(size=in_shape).astype(np.float32)
- out_array = topi.testing.upsampling_python(in_array, (scale, scale), "NCHW")
-
- graph = helper.make_graph([y],
- 'upsample_nearest_test',
- inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
- outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))])
-
- model = helper.make_model(graph, producer_name='upsample_nearest_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, in_array, target, ctx, out_shape, 'float32')
- tvm.testing.assert_allclose(out_array, tvm_out)
-
-def _test_upsample_bilinear():
- scale = 2
- in_shape = (1, 1, 3, 3)
- out_shape = (1, 1, 3*scale, 3*scale)
- y = helper.make_node("Upsample", ['in'], ['out'], mode='linear', scales=[1.0, 1.0, 2.0, 2.0])
-
- in_array = np.random.uniform(size=in_shape).astype(np.float32)
- out_array = topi.testing.bilinear_resize_python(in_array, (3*scale, 3*scale), "NCHW", align_corners=False)
-
- graph = helper.make_graph([y],
- 'upsample_bilinear_test',
- inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
- outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))])
-
- model = helper.make_model(graph, producer_name='upsample_bilinear_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, in_array, target, ctx, out_shape, 'float32')
- tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
-
-def _test_upsample_bilinear_opset9():
- scale = 2
- in_shape = (1, 1, 3, 3)
- out_shape = (1, 1, 3*scale, 3*scale)
- y = helper.make_node("Upsample", ['in','scales'], ['out'], mode='linear')
- scales=[1.0, 1.0, 2.0, 2.0]
- in_array = np.random.uniform(size=in_shape).astype(np.float32)
- out_array = topi.testing.bilinear_resize_python(in_array, (3*scale, 3*scale), "NCHW", align_corners=False)
-
- ref_array = np.array(scales)
- ref_node = helper.make_node('Constant',
- inputs=[],
- outputs=['scales'],
- value=onnx.helper.make_tensor(name = 'const_tensor',
- data_type = TensorProto.FLOAT,
- dims = ref_array.shape,
- vals = ref_array.flatten().astype(float)))
-
- graph = helper.make_graph([ref_node, y],
- 'upsample_bilinear_opset9_test',
- inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
- outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))])
-
- model = helper.make_model(graph, producer_name='upsample_bilinear_opset9_test')
- inputs = []
- inputs.append(in_array)
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, inputs, target, ctx, out_shape, 'float32')
- tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_upsample():
- _test_upsample_nearest()
- _test_upsample_bilinear()
- _test_upsample_bilinear_opset9()
-
-def _test_softmax(inshape, axis):
- opname = 'Softmax'
- indata = np.random.uniform(size=inshape).astype(np.float32)
- outshape = inshape
- outdata = topi.testing.softmax_python(indata)
- if isinstance(axis, int):
- y = helper.make_node(opname, ['in'], ['out'], axis = axis)
- elif axis is None:
- y = helper.make_node(opname, ['in'], ['out'])
-
- graph = helper.make_graph([y],
- opname+'_test',
- inputs = [helper.make_tensor_value_info("in",
- TensorProto.FLOAT, list(indata.shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(outdata.shape))])
-
- model = helper.make_model(graph, producer_name=opname+'_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, indata, target, ctx, outshape, 'float32')
- tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_softmax():
- _test_softmax((1, 10), None)
- _test_softmax((1, 10), 1)
-
-def verify_min(input_dim):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim).astype(dtype)
- a_np2 = np.random.uniform(size=input_dim).astype(dtype)
- a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
- b_np = np.min((a_np1, a_np2, a_np3), axis=0)
-
- min_node = helper.make_node("Min", ["a_np1", "a_np2", "a_np3"], ["out"])
-
- graph = helper.make_graph([min_node],
- "Min_test",
- inputs = [helper.make_tensor_value_info("a_np1",
- TensorProto.FLOAT, list(input_dim)),
- helper.make_tensor_value_info("a_np2",
- TensorProto.FLOAT, list(input_dim)),
- helper.make_tensor_value_info("a_np3",
- TensorProto.FLOAT, list(input_dim))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(b_np.shape))])
-
- model = helper.make_model(graph, producer_name='Min_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
- tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_min():
- verify_min((1, 3, 20, 20))
- verify_min((20, 20))
-
-def verify_max(input_dim):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim).astype(dtype)
- a_np2 = np.random.uniform(size=input_dim).astype(dtype)
- a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
- b_np = np.max((a_np1, a_np2, a_np3), axis=0)
-
- max_node = helper.make_node("Max", ["a_np1", "a_np2", "a_np3"], ["out"])
-
- graph = helper.make_graph([max_node],
- "Max_test",
- inputs = [helper.make_tensor_value_info("a_np1",
- TensorProto.FLOAT, list(input_dim)),
- helper.make_tensor_value_info("a_np2",
- TensorProto.FLOAT, list(input_dim)),
- helper.make_tensor_value_info("a_np3",
- TensorProto.FLOAT, list(input_dim))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(b_np.shape))])
-
- model = helper.make_model(graph, producer_name='Max_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
- tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_max():
- verify_max((1, 3, 20, 20))
- verify_max((20, 20))
-
-def verify_mean(input_dim):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim).astype(dtype)
- a_np2 = np.random.uniform(size=input_dim).astype(dtype)
- a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
- b_np = np.mean((a_np1, a_np2, a_np3), axis=0)
-
- mean_node = helper.make_node("Mean", ["a_np1", "a_np2", "a_np3"], ["out"])
-
- graph = helper.make_graph([mean_node],
- "Mean_test",
- inputs = [helper.make_tensor_value_info("a_np1",
- TensorProto.FLOAT, list(input_dim)),
- helper.make_tensor_value_info("a_np2",
- TensorProto.FLOAT, list(input_dim)),
- helper.make_tensor_value_info("a_np3",
- TensorProto.FLOAT, list(input_dim))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(b_np.shape))])
-
- model = helper.make_model(graph, producer_name='Mean_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
- tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_mean():
- verify_mean((1, 3, 20, 20))
- verify_mean((20, 20))
-
-def verify_hardsigmoid(input_dim, alpha, beta):
- dtype = 'float32'
-
- a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-
- b_np = np.clip(a_np1 * alpha + beta, 0, 1)
-
- hardsigmoid_node = helper.make_node("HardSigmoid", ["a_np1"], ["out"], alpha=alpha, beta=beta)
-
- graph = helper.make_graph([hardsigmoid_node],
- "HardSigmoid_test",
- inputs = [helper.make_tensor_value_info("a_np1",
- TensorProto.FLOAT, list(input_dim))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(b_np.shape))])
-
- model = helper.make_model(graph, producer_name='HardSigmoid_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape)
- tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_hardsigmoid():
- verify_hardsigmoid((1, 3, 20, 20), 0.5, 0.6)
- verify_hardsigmoid((20, 20), 0.3, 0.4)
-
-def verify_argmin(input_dim, axis=None, keepdims=None):
- def _argmin_numpy(data, axis=0, keepdims=True):
- result = np.argmin(data, axis=axis)
- if (keepdims == 1):
- result = np.expand_dims(result, axis)
- return result.astype(data.dtype)
-
- a_np1 = np.random.uniform(-10, 10, input_dim).astype(np.int32)
- if keepdims is None and axis is None:
- b_np = _argmin_numpy(a_np1)
- node = onnx.helper.make_node('ArgMin',
- inputs=['a_np1'],
- outputs=['out'])
- elif axis is None:
- b_np = _argmin_numpy(a_np1, keepdims=keepdims)
- node = onnx.helper.make_node('ArgMin',
- inputs=['a_np1'],
- outputs=['out'],
- keepdims=keepdims)
- elif keepdims is None:
- b_np = _argmin_numpy(a_np1, axis=axis)
- node = onnx.helper.make_node('ArgMin',
- inputs=['a_np1'],
- outputs=['out'],
- axis=axis)
- else:
- b_np = _argmin_numpy(a_np1, axis=axis, keepdims=keepdims)
- node = onnx.helper.make_node('ArgMin',
- inputs=['a_np1'],
- outputs=['out'],
- axis=axis,
- keepdims=keepdims)
- graph = helper.make_graph([node],
- "argmin_test",
- inputs = [helper.make_tensor_value_info("a_np1",
- TensorProto.INT32, list(a_np1.shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.INT32, list(b_np.shape))])
-
- model = helper.make_model(graph, producer_name='argmin_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
- tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def verify_argmax(input_dim, axis=None, keepdims=None):
- def _argmax_numpy(data, axis=0, keepdims=True):
- result = np.argmax(data, axis=axis)
- if (keepdims == 1):
- result = np.expand_dims(result, axis)
- return result.astype(data.dtype)
-
- a_np1 = np.random.uniform(-10, 10, input_dim).astype(np.int32)
-
- if keepdims is None and axis is None:
- b_np = _argmax_numpy(a_np1)
- node = onnx.helper.make_node('ArgMax',
- inputs=['a_np1'],
- outputs=['out'])
- elif axis is None:
- b_np = _argmax_numpy(a_np1, keepdims=keepdims)
- node = onnx.helper.make_node('ArgMax',
- inputs=['a_np1'],
- outputs=['out'],
- keepdims=keepdims)
- elif keepdims is None:
- b_np = _argmax_numpy(a_np1, axis=axis)
- node = onnx.helper.make_node('ArgMax',
- inputs=['a_np1'],
- outputs=['out'],
- axis=axis)
- else:
- b_np = _argmax_numpy(a_np1, axis=axis, keepdims=keepdims)
- node = onnx.helper.make_node('ArgMax',
- inputs=['a_np1'],
- outputs=['out'],
- axis=axis,
- keepdims=keepdims)
-
- graph = helper.make_graph([node],
- "argmax_test",
- inputs = [helper.make_tensor_value_info("a_np1",
- TensorProto.INT32, list(a_np1.shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.INT32, list(b_np.shape))])
-
- model = helper.make_model(graph, producer_name='argmax_test')
-
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
- tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_arg_min_max():
- '''Verify argmin and argmax'''
- verify_argmin([3,4,4])
- verify_argmax([3,4,4])
- verify_argmin([3,4,4], axis=1)
- verify_argmax([3,4,4], axis=0)
- verify_argmin([3,4,4], keepdims=0)
- verify_argmax([3,4,4], keepdims=1)
- for axis in [0,1,2]:
- for keepdims in [True,False]:
- verify_argmin([3,4,4], axis, keepdims)
- verify_argmax([3,4,4], axis, keepdims)
-
-def verify_constantfill(is_shape, input_dim, out_dim, value, dtype, **kwargs):
- input_a = np.random.uniform(size=input_dim).astype(dtype)
- out = np.empty(shape=out_dim, dtype=dtype)
- out.fill(value)
-
- if is_shape == True:
- fill_node = helper.make_node("ConstantFill", [], ["out"], shape=input_dim, value=value, **kwargs)
- else:
- fill_node = helper.make_node("ConstantFill", ["input_a"], ["out"], value=value, dtype=dtype, **kwargs)
-
- graph = helper.make_graph([fill_node],
- "fill_test",
- inputs = [helper.make_tensor_value_info("input_a",
- TensorProto.FLOAT, list(input_dim))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(out.shape))])
-
- model = helper.make_model(graph, producer_name='fill_test')
-
- for target, ctx in ctx_list():
- if is_shape == True:
- tvm_out = get_tvm_output(model, [], target, ctx, out.shape)
- else:
- tvm_out = get_tvm_output(model, [input_a], target, ctx, out.shape)
-
- tvm.testing.assert_allclose(out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_constantfill():
- verify_constantfill(True, (2, 3, 4, 5), (2, 3, 4, 5), 10, 'float32')
- verify_constantfill(False, (2, 3, 4, 5), (2, 3, 4, 5), 10, 'float32')
- verify_constantfill(True, (2, 3, 4, 5), (2, 3, 4, 5, 4, 5, 6), 10, 'float32', extra_shape=(4, 5, 6))
-
-
-def verify_pad(indata, pads, value=0.0):
- indata = np.array(indata).astype(np.float32)
- # numpy expect result
- len_dim = len(pads) // 2
- np_pads = [(pads[i], pads[i+len_dim]) for i in range(len_dim)]
- outdata = np.pad(indata, pad_width=np_pads, mode='constant', constant_values=value)
- # onnx graph
- node = helper.make_node(
- 'Pad',
- inputs=['input'],
- outputs=['output'],
- mode='constant',
- pads=pads,
- value=value
- )
- graph = helper.make_graph([node],
- 'pad_test',
- inputs = [helper.make_tensor_value_info("input",
- TensorProto.FLOAT, list(indata.shape))],
- outputs = [helper.make_tensor_value_info("output",
- TensorProto.FLOAT, list(outdata.shape))])
- model = helper.make_model(graph, producer_name='pad_test')
- # tvm result
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32')
- tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_pad():
- verify_pad(np.random.randn(2, 2).astype(np.float32), [0, 1, 0, 0], 0.0)
- verify_pad(np.random.randn(2, 3).astype(np.float32), [1, 0, 0, 1], 0.0)
- verify_pad(np.random.randn(3, 2).astype(np.float32), [0, 0, 1, 0], 5.0)
-
-def verify_reduce_x(name, indata, axis, keepdims):
- indata = np.array(indata).astype(np.float32)
- # numpy expect result
- if name == 'ReduceMax':
- outdata = np.maximum.reduce(indata, axis=axis, keepdims=keepdims == 1)
- elif name == 'ReduceMin':
- outdata = np.minimum.reduce(indata, axis=axis, keepdims=keepdims == 1)
- elif name == 'ReduceSum':
- outdata = np.sum(indata, axis=axis, keepdims=keepdims == 1)
- elif name == 'ReduceMean':
- outdata = np.mean(indata, axis=axis, keepdims=keepdims == 1)
- else:
- raise Exception('unsupport op: {}'.format(name))
- if len(np.asarray(outdata).shape) == 0:
- outdata = np.asarray([outdata])
- # onnx graph
- if axis is None:
- node = helper.make_node(name, inputs=['input'], outputs=['output'],
- keepdims=keepdims)
- else:
- node = helper.make_node(name, inputs=['input'], outputs=['output'],
- axis=axis, keepdims=keepdims)
- graph = helper.make_graph([node],
- '{}_test'.format(name),
- inputs = [helper.make_tensor_value_info("input",
- TensorProto.FLOAT, list(indata.shape))],
- outputs = [helper.make_tensor_value_info("output",
- TensorProto.FLOAT, list(outdata.shape))])
- model = helper.make_model(graph, producer_name='{}_test'.format(name))
- # tvm result
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32')
- tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_reduce_max():
- verify_reduce_x("ReduceMax",
- np.random.randn(3, 2, 2).astype(np.float32),
- axis=None, keepdims=1)
- verify_reduce_x("ReduceMax",
- np.random.randn(3, 2, 3).astype(np.float32),
- axis=None, keepdims=0)
- verify_reduce_x("ReduceMax",
- np.random.randn(3, 3, 3).astype(np.float32),
- axis=(1,), keepdims=1)
-
-def test_reduce_min():
- verify_reduce_x("ReduceMin",
- np.random.randn(3, 2, 2).astype(np.float32),
- axis=None, keepdims=1)
- verify_reduce_x("ReduceMin",
- np.random.randn(3, 2, 3).astype(np.float32),
- axis=None, keepdims=0)
- verify_reduce_x("ReduceMin",
- np.random.randn(3, 3, 3).astype(np.float32),
- axis=(1,), keepdims=1)
-
-def test_reduce_sum():
- verify_reduce_x("ReduceSum",
- np.random.randn(3, 2, 2).astype(np.float32),
- axis=None, keepdims=1)
- verify_reduce_x("ReduceSum",
- np.random.randn(3, 2, 3).astype(np.float32),
- axis=None, keepdims=0)
- verify_reduce_x("ReduceSum",
- np.random.randn(3, 3, 3).astype(np.float32),
- axis=(1,), keepdims=1)
-
-def test_reduce_mean():
- verify_reduce_x("ReduceMean",
- np.random.randn(3, 2, 2).astype(np.float32),
- axis=None, keepdims=1)
- verify_reduce_x("ReduceMean",
- np.random.randn(3, 2, 3).astype(np.float32),
- axis=None, keepdims=0)
- verify_reduce_x("ReduceMean",
- np.random.randn(3, 3, 3).astype(np.float32),
- axis=(1,), keepdims=1)
-
-def verify_split(indata, outdatas, split, axis=0):
- indata = np.array(indata).astype(np.float32)
- outdatas = [np.array(o).astype(np.float32) for o in outdatas]
- node = helper.make_node(
- 'Split',
- inputs=['input'],
- outputs=['output_{}'.format(i) for i in range(len(split))],
- axis=axis,
- split=split
- )
- graph = helper.make_graph([node],
- 'split_test',
- inputs = [helper.make_tensor_value_info("input",
- TensorProto.FLOAT, list(indata.shape))],
- outputs = [helper.make_tensor_value_info("output_{}".format(i),
- TensorProto.FLOAT, list(outdatas[i].shape))
- for i in range(len(split))
- ])
- model = helper.make_model(graph, producer_name='split_test')
-
- for target, ctx in ctx_list():
- output_shape = [o.shape for o in outdatas]
- output_type = ['float32', 'float32', 'float32']
- tvm_out = get_tvm_output(model, indata, target, ctx, output_shape, output_type)
- for o, t in zip(outdatas, tvm_out):
- tvm.testing.assert_allclose(o, t)
-
-def test_split():
- # 1D
- verify_split([1., 2., 3., 4., 5., 6.], [[1., 2.], [3., 4.], [5., 6.]], [2, 2, 2], 0)
- verify_split([1., 2., 3., 4., 5., 6.], [[1., 2.], [3.], [4., 5., 6.]], [2, 1, 3], 0)
- # 2D
- verify_split([[1., 2., 3., 4.], [7., 8., 9., 10.]],
- [[[1., 2.], [7., 8.]], [[3., 4.], [9., 10.]]], [2, 2], 1)
-
-def test_binary_ops():
- in_shape = (1, 2, 3, 3)
- dtype = "float32"
- out_shape = in_shape
-
- def verify_binary_ops(op, x, y, out_np, broadcast=None, rtol=1e-7, atol=1e-7):
- if broadcast is None:
- z = helper.make_node(op, ['in1', 'in2'], ['out'])
- else:
- z = helper.make_node(op, ['in1', 'in2'], ['out'], broadcast=1)
- graph = helper.make_graph([z],
- '_test',
- inputs = [helper.make_tensor_value_info("in1",
- TensorProto.FLOAT, list(in_shape)),
- helper.make_tensor_value_info("in2",
- TensorProto.FLOAT, list(in_shape))],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(out_shape))])
- model = helper.make_model(graph, producer_name='_test')
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [x, y], target, ctx)
- tvm.testing.assert_allclose(out_np, tvm_out, rtol=rtol, atol=atol)
-
- x = np.random.uniform(size=in_shape).astype(dtype)
- y = np.random.uniform(size=in_shape).astype(dtype)
- z = np.random.uniform(size=(3,)).astype(dtype)
- verify_binary_ops("Add",x, y, x + y, broadcast=None)
- verify_binary_ops("Add", x, z, x + z, broadcast=True)
- verify_binary_ops("Sub", x, y, x - y, broadcast=None)
- verify_binary_ops("Sub", x, z, x - z, broadcast=True)
- verify_binary_ops("Mul",x, y, x * y, broadcast=None)
- verify_binary_ops("Mul", x, z, x * z, broadcast=True)
- verify_binary_ops("Div", x, y, x / y, broadcast=None, rtol=1e-5, atol=1e-5)
- verify_binary_ops("Div", x, z, x / z, broadcast=True, rtol=1e-5, atol=1e-5)
- verify_binary_ops("Sum", x, y, x + y, broadcast=None)
-
-def test_single_ops():
- in_shape = (1, 2, 3, 3)
- dtype = "float32"
- out_shape = in_shape
-
- def verify_single_ops(op, x, out_np, rtol=1e-7, atol=1e-7):
- z = helper.make_node(op, ['in1'], ['out'])
- graph = helper.make_graph([z],
- '_test',
- inputs = [helper.make_tensor_value_info("in1",
- TensorProto.FLOAT, list(in_shape)),],
- outputs = [helper.make_tensor_value_info("out",
- TensorProto.FLOAT, list(out_shape))])
- model = helper.make_model(graph, producer_name='_test')
- for target, ctx in ctx_list():
- tvm_out = get_tvm_output(model, [x], target, ctx)
- tvm.testing.assert_allclose(out_np, tvm_out, rtol=rtol, atol=atol)
-
- x = np.random.uniform(size=in_shape).astype(dtype)
- verify_single_ops("Neg",x, -x)
- verify_single_ops("Abs",x, np.abs(x))
- verify_single_ops("Reciprocal",x, 1/x, rtol=1e-5, atol=1e-5)
- verify_single_ops("Sqrt",x, np.sqrt(x), rtol=1e-5, atol=1e-5)
- verify_single_ops("Relu",x, np.maximum(x, 0))
- verify_single_ops("Exp",x, np.exp(x), rtol=1e-5, atol=1e-5)
- verify_single_ops("Log",x, np.log(x), rtol=1e-5, atol=1e-5)
- verify_single_ops("Log",x, np.log(x), rtol=1e-5, atol=1e-5)
- verify_single_ops("Tanh",x, np.tanh(x), rtol=1e-5, atol=1e-5)
- verify_single_ops("Sigmoid",x, 1 / (1 + np.exp(-x)), rtol=1e-5, atol=1e-5)
- verify_single_ops("Softsign",x, x / (1 + np.abs(x)), rtol=1e-5, atol=1e-5)
- verify_single_ops("SoftPlus",x, np.log(1 + np.exp(x)), rtol=1e-5, atol=1e-5)
-
-def test_leaky_relu():
- def leaky_relu_x(x, alpha):
- return np.where(x >= 0, x, x * alpha)
- _test_onnx_op_elementwise((2, 4, 5, 6),
- leaky_relu_x,
- {'alpha': 0.25},
- 'float32',
- 'LeakyRelu',
- {'alpha': 0.25})
-
-def test_elu():
- def elu_x(x, alpha):
- return np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
- _test_onnx_op_elementwise((2, 4, 5, 6),
- elu_x,
- {'alpha': 0.25},
- 'float32',
- 'Elu',
- {'alpha': 0.25})
-
-def test_selu():
- def selu_x(x, alpha, gamma):
- return gamma * np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
- _test_onnx_op_elementwise((2, 4, 5, 6),
- selu_x,
- {'alpha': 0.25, 'gamma': 0.3},
- 'float32',
- 'Selu',
- {'alpha': 0.25, 'gamma': 0.3})
-
-def test_ThresholdedRelu():
- def ThresholdedRelu_x(x, alpha):
- out_np = np.clip(x, alpha, np.inf)
- out_np[out_np == alpha] = 0
- return out_np
- _test_onnx_op_elementwise((2, 4, 5, 6),
- ThresholdedRelu_x,
- {'alpha': 0.25},
- 'float32',
- 'ThresholdedRelu',
- {'alpha': 0.25})
-
-def test_ScaledTanh():
- def ScaledTanh_x(x, alpha, beta):
- return alpha * np.tanh(beta * x)
- _test_onnx_op_elementwise((2, 4, 5, 6),
- ScaledTanh_x,
- {'alpha': 0.25, 'beta': 0.3},
- 'float32',
- 'ScaledTanh',
- {'alpha': 0.25, 'beta': 0.3})
-
-def test_ParametricSoftplus():
- def ParametricSoftplus_x(x, alpha, beta):
- return alpha * np.log(np.exp(beta * x) + 1)
- _test_onnx_op_elementwise((2, 4, 5, 6),
- ParametricSoftplus_x,
- {'alpha': 0.25, 'beta': 0.3},
- 'float32',
- 'ParametricSoftplus',
- {'alpha': 0.25, 'beta': 0.3})
-
-def test_Scale():
- def Scale_x(x, scale):
- return scale * x
- _test_onnx_op_elementwise((2, 4, 5, 6),
- Scale_x,
- {'scale': 0.25},
- 'float32',
- 'Scale',
- {'scale': 0.25})
-
-def test_LogSoftmax():
- _test_onnx_op_elementwise((1, 4),
- topi.testing.log_softmax_python,
- {},
- 'float32',
- 'LogSoftmax',
- {'axis': 1},
- rtol=1e-5,
- atol=1e-5)
-
-if __name__ == '__main__':
- # verify_super_resolution_example()
- # verify_squeezenet1_1()
- # verify_lenet()
- verify_resnet18()
- test_reshape()
- test_reshape_like()
- test_power()
- test_squeeze()
- test_unsqueeze()
- test_slice()
- test_floor()
- test_ceil()
- test_clip()
- test_matmul()
- test_gather()
- test_lrn()
- test_upsample()
- test_forward_min()
- test_forward_max()
- test_forward_mean()
- test_forward_hardsigmoid()
- test_forward_arg_min_max()
- test_softmax()
- test_constantfill()
- test_pad()
- test_reduce_max()
- test_reduce_min()
- test_reduce_sum()
- test_reduce_mean()
- test_split()
- test_binary_ops()
- test_single_ops()
- test_leaky_relu()
- test_elu()
- test_selu()
- test_ThresholdedRelu()
- test_ScaledTanh()
- test_ParametricSoftplus()
- test_Scale()
- test_LogSoftmax()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument
-"""
-Tensorflow testcases
-====================
-This article is a test script to test tensorflow operator with NNVM.
-"""
-from __future__ import print_function
-import numpy as np
-import nnvm.compiler
-import tvm
-import tensorflow as tf
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import graph_util
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops import init_ops
-from tensorflow.core.framework import graph_pb2
-
-import tvm.relay.testing.tf as tf_testing
-
-#######################################################################
-# Generic run functions for TVM & tensorflow
-# ------------------------------------------
-def convert_to_list(x):
- if not isinstance(x, list):
- x = [x]
- return x
-
-def run_tvm_graph(graph_def, input_data, input_node, num_output=1, target='llvm', out_names=None):
- """ Generic function to compile on nnvm and execute on tvm """
- input_data = convert_to_list(input_data)
- input_node = convert_to_list(input_node)
-
- layout = None
- if target == "cuda":
- layout = "NCHW"
- target_host = 'llvm'
-
- if isinstance(input_data, list):
- shape_dict = {}
- dtype_dict = {}
- for i, e in enumerate(input_node):
- shape_dict[e] = input_data[i].shape
- dtype_dict[e] = input_data[i].dtype
- else:
- shape_dict = {input_node: input_data.shape}
- dtype_dict = {input_node: input_data.dtype}
-
- sym, params = nnvm.frontend.from_tensorflow(graph_def, layout=layout, shape=shape_dict, outputs=out_names)
- graph, lib, params = nnvm.compiler.build(sym, target=target, target_host=target_host, shape=shape_dict,
- dtype=dtype_dict, params=params)
-
- ctx = tvm.context(target, 0)
- from tvm.contrib import graph_runtime
- m = graph_runtime.create(graph, lib, ctx)
- # set inputs
- for i, e in enumerate(input_node):
- m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-
- m.set_input(**params)
- # execute
- m.run()
- # get outputs
- assert out_names is None or num_output == len(out_names),"out_names: {} num_output: {}".format(
- out_names, num_output)
- tvm_output_list = []
- for i in range(0, num_output):
- tvm_output = m.get_output(i)
- tvm_output_list.append(tvm_output.asnumpy())
- return tvm_output_list
-
-def run_tf_graph(sess, input_data, input_node, output_node):
- """ Generic function to execute tensorflow """
- input_data = convert_to_list(input_data)
- input_node = convert_to_list(input_node)
- output_node = convert_to_list(output_node)
-
- tensor = [0] * len(output_node)
- for i in range(len(output_node)):
- tensor[i] = sess.graph.get_tensor_by_name(output_node[i])
-
- input_dict = {}
- for i, e in enumerate(input_node):
- input_dict[e] = input_data[i]
-
- output_data = sess.run(tensor, input_dict)
- return output_data
-
-
-def compare_tf_with_tvm(in_data, in_name, out_name, init_global_variables=False, no_gpu=False):
- """Generic function to generate and compare tensorflow and TVM output"""
-
- out_name = convert_to_list(out_name)
- out_node = [0]*len(out_name)
- for i in range(len(out_name)):
- out_node[i] = out_name[i].split(':')[0] if ":" in out_name[i] else out_name[i]
-
- in_data = convert_to_list(in_data)
- in_name = convert_to_list(in_name)
- in_node = [0]*len(in_name)
- for i in range(len(in_name)):
- in_node[i] = in_name[i].split(':')[0] if ":" in in_name[i] else in_name[i]
-
- with tf.Session() as sess:
- if init_global_variables:
- sess.run(variables.global_variables_initializer())
- final_graph_def = tf.graph_util.convert_variables_to_constants(
- sess,
- sess.graph.as_graph_def(add_shapes=True),
- out_node,
- )
- tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-
- for device in ["llvm", "cuda"]:
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- continue
- if no_gpu and device == 'cuda':
- continue
-
- tvm_output = run_tvm_graph(final_graph_def, in_data, in_node,
- num_output=len(out_node), target=device, out_names=out_name)
- # since the names from tensorflow and nnvm runs are not exactly same,
- # first len(tf_output) will be compared
- for i in range(len(tf_output)):
- tvm.testing.assert_allclose(tf_output[i], tvm_output[i], atol=1e-5, rtol=1e-5)
-
- sess.close()
-
-def is_gpu_available():
- from tensorflow.python.client import device_lib
- local_device_protos = device_lib.list_local_devices()
- gpu_list = [x.name for x in local_device_protos if x.device_type == 'GPU']
- if len(gpu_list) > 0:
- print("Tensorflow GPU:", gpu_list)
- return True
- else:
- return False
-
-#######################################################################
-# Pooling
-# -------
-def _test_pooling_iteration(input_shape, **kwargs):
- """ One iteration of pool operation with given shapes and attributes """
-
- x = -np.arange(
- np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-
- with tf.Graph().as_default():
- in_data = array_ops.placeholder(shape=input_shape, dtype='float32')
- nn_ops.pool(in_data, **kwargs)
-
- if kwargs['pooling_type'] == 'MAX':
- out_name = 'max_pool:0'
- else:
- out_name = 'avg_pool:0'
-
- compare_tf_with_tvm(x, 'Placeholder:0', out_name)
-
-def _test_pooling(input_shape, **kwargs):
- _test_pooling_iteration(input_shape, **kwargs)
-
- if is_gpu_available():
- input_shape = [input_shape[ii] for ii in (0, 3, 1, 2)]
- kwargs['data_format'] = 'NCHW'
- _test_pooling_iteration(input_shape, **kwargs)
-
-def test_forward_pooling():
- """ Pooling """
-
- for pool_type in ['AVG', 'MAX']:
- _test_pooling(input_shape=[2, 9, 10, 2],
- window_shape=[1, 1],
- padding='SAME',
- pooling_type=pool_type,
- dilation_rate=[1, 1],
- strides=[1, 1])
-
- _test_pooling(input_shape=[2, 10, 9, 2],
- window_shape=[1, 1],
- padding='SAME',
- pooling_type=pool_type,
- dilation_rate=[1, 1],
- strides=[1, 1])
-
- _test_pooling(input_shape=[2, 9, 10, 2],
- window_shape=[2, 1],
- padding='SAME',
- pooling_type=pool_type,
- dilation_rate=[1, 1],
- strides=[1, 1])
-
- _test_pooling(input_shape=[2, 10, 9, 2],
- window_shape=[2, 3],
- padding='SAME',
- pooling_type=pool_type,
- dilation_rate=[1, 1],
- strides=[2, 1])
-
-#######################################################################
-# Convolution
-# -----------
-
-def _test_convolution(tensor_in_sizes, filter_in_sizes,
- dilations, strides, padding, data_format):
- """ One iteration of convolution with given shapes and attributes """
-
- total_size_1 = 1
- total_size_2 = 1
- for s in tensor_in_sizes:
- total_size_1 *= s
- for s in filter_in_sizes:
- total_size_2 *= s
- # Initializes the input tensor with array containing incrementing
- # numbers from 1.
- data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
- filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
- with tf.Graph().as_default():
- in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype='float32')
- in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype='float32')
- if data_format == 'NHWC':
- strides = [1] + strides + [1]
- dilations = [1] + dilations + [1]
- else:
- strides = [1, 1] + strides
- dilations = [1, 1] + dilations
-
- nn_ops.conv2d(in_data,
- in_filter,
- strides=strides,
- padding=padding,
- data_format=data_format)
-
- compare_tf_with_tvm(np.reshape(data_array, tensor_in_sizes).astype('float32'),
- 'Placeholder:0', 'Conv2D:0')
-
-def test_forward_convolution():
- if is_gpu_available():
- _test_convolution([4, 176, 8, 8], [1, 1, 176, 32], [1, 1], [1, 1], 'SAME', 'NCHW')
- _test_convolution([4, 19, 17, 17], [3, 3, 19, 19], [1, 1], [2, 2], 'VALID', 'NCHW')
- _test_convolution([4, 124, 17, 17], [1, 1, 124, 19], [1, 1], [1, 1], 'SAME', 'NCHW')
- _test_convolution([4, 12, 17, 17], [3, 3, 12, 32], [1, 1], [2, 2], 'VALID', 'NCHW')
-
- _test_convolution([4, 8, 8, 176], [1, 1, 176, 32], [1, 1], [1, 1], 'SAME', 'NHWC')
- _test_convolution([4, 17, 17, 19], [3, 3, 19, 19], [1, 1], [2, 2], 'VALID', 'NHWC')
- _test_convolution([4, 17, 17, 124], [1, 1, 124, 19], [1, 1], [1, 1], 'SAME', 'NHWC')
- _test_convolution([4, 17, 17, 12], [3, 3, 12, 32], [1, 1], [2, 2], 'VALID', 'NHWC')
-
-#######################################################################
-# Reshape
-# -------
-
-def _test_reshape(data, out_shape):
- """ One iteration of reshape operation with given data and out shape """
-
- with tf.Graph().as_default():
- in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
- array_ops.reshape(in_data, out_shape)
-
- compare_tf_with_tvm(data, 'Placeholder:0', 'Reshape:0')
-
-def test_forward_reshape():
- _test_reshape(np.arange(6.0), [2, 3])
- _test_reshape(np.arange(6), [-1, 2])
- _test_reshape(np.arange(6), [3, -1])
- _test_reshape(np.arange(6), [-1])
-
-#######################################################################
-#######################################################################
-# Squeeze
-# -------
-
-def _test_squeeze(data, squeeze_dims=None):
- """ One iteration of squeeze """
-
- if squeeze_dims is None:
- squeeze_dims = []
-
- with tf.Graph().as_default():
- in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
- if squeeze_dims:
- array_ops.squeeze(in_data, squeeze_dims)
- else:
- array_ops.squeeze(in_data)
-
- compare_tf_with_tvm(data, 'Placeholder:0', 'Squeeze:0')
-
-def test_forward_squeeze():
- """ Squeeze """
-
- # Nothing to squeeze.
- _test_squeeze(np.arange(2).reshape((2)))
- _test_squeeze(np.arange(6).reshape((2, 3)))
-
- # Squeeze the middle element away.
- _test_squeeze(np.arange(4).reshape((2, 1, 2)))
-
- # Squeeze on both ends.
- _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)))
-
- # Positive squeeze dim index.
- _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0])
- _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [2, 4])
- _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0, 4, 2])
-
- # Negative squeeze dim index.
- _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-1])
- _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5])
- _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5, -1])
-
-#######################################################################
-# ConcatV2
-# --------
-
-def _test_concat_v2(data, dim):
- """ One iteration of ConcatV2 """
-
- with tf.Graph().as_default():
- gen_array_ops._concat_v2(data, dim)
-
- compare_tf_with_tvm(data, ['ConcatV2/values_0:0', 'ConcatV2/values_1:0'],
- 'ConcatV2:0')
-
-def _test_forward_concat_v2():
- t1 = np.array([])
- t2 = np.array([])
- test_concat_v2([t1, t2], 0)
-
- t1 = np.array([[1, 2, 3], [4, 5, 6]])
- t2 = np.array([[7, 8, 9], [10, 11, 12]])
-
- _test_concat_v2([t1, t2], 1)
-
-#######################################################################
-# Sigmoid
-# -------
-
-def _test_sigmoid(data):
- """ One iteration of sigmoid """
-
- with tf.Graph().as_default():
- in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
- sigmoid_out = math_ops.sigmoid(in_data)
-
- compare_tf_with_tvm(data, 'Placeholder:0', 'Sigmoid:0')
-
-def test_forward_sigmoid():
- """ Sigmoid """
-
- _test_sigmoid(np.random.uniform(size=(3, 4, 4, 3)).astype('float32'))
-
-#######################################################################
-# Argmin/Argmax
-# -------------
-
-def _test_argx(func, data, **kwargs):
-
- with tf.Graph().as_default():
- inp = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="c0")
- func(inp, name="argx0", **kwargs, output_type=tf.int32)
-
- compare_tf_with_tvm(data, 'c0:0', 'argx0:0')
-
-def test_forward_argminmax():
- for axis in [None,0,1,2]:
- data = np.random.uniform(size=(8,4,9)).astype('float32')
- _test_argx(tf.argmax, data=data, axis=axis)
- _test_argx(tf.argmin, data=data, axis=axis)
-
-#######################################################################
-# Reduce
-# ------
-
-def _test_reduce(func, data, **kwargs):
- """ One iteration of a reduce operation"""
-
- with tf.Graph().as_default():
- inp = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="c0")
- func(inp, name="reducex0", **kwargs)
-
- compare_tf_with_tvm(data, 'c0:0', 'reducex0:0')
-
-def test_forward_reduce():
- data = np.random.uniform(size=(8,4,9)).astype('float32')
- _test_reduce(tf.reduce_sum, data=data)
- _test_reduce(tf.reduce_sum, data=data, axis=0)
- _test_reduce(tf.reduce_sum, data=data, axis=(0,1))
-
-
-#######################################################################
-# Variable
-# --------
-
-def _test_variable(data):
- """ One iteration of a variable """
-
- tf.reset_default_graph()
- input_op = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
- input_tensor = array_ops.reshape(input_op, data.shape)
-
- size = input_tensor.shape.dims[1]
- with variable_scope.variable_scope("linear", reuse=None):
- w = variable_scope.get_variable(
- "w", shape=[size, size], dtype=input_tensor.dtype)
- math_ops.matmul(input_tensor, w)
-
- compare_tf_with_tvm(data, 'Placeholder:0', 'MatMul:0', init_global_variables=True)
-
-def test_forward_variable():
- """Variable type op test"""
- _test_variable(np.random.uniform(size=(32, 100)).astype('float32'))
-
-
-#######################################################################
-# StridedSlice
-# ------------
-
-def _test_stridedslice(ip_shape, begin, end, stride, dtype,
- begin_mask=0, end_mask=0, new_axis_mask=0,
- shrink_axis_mask=0, ellipsis_mask=0):
- """ One iteration of a Stridedslice """
-
- tf.reset_default_graph()
- in_data = tf.placeholder(dtype, ip_shape, name="in_data")
- tf.strided_slice(in_data, begin, end, stride, begin_mask=begin_mask,
- end_mask=end_mask, new_axis_mask=new_axis_mask,
- shrink_axis_mask=shrink_axis_mask,
- ellipsis_mask=ellipsis_mask, name="strided_slice")
- np_data = np.random.uniform(size=ip_shape).astype(dtype)
-
- compare_tf_with_tvm(np_data, 'in_data:0', 'strided_slice:0')
-
-def test_forward_stridedslice():
- '''test StridedSlice'''
-
- _test_stridedslice((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], 'float32')
- _test_stridedslice((3, 4, 3), [1, 0], [4, 3], [2, 1], 'float32', ellipsis_mask=8)
- _test_stridedslice((3, 4, 3), [1, 0], [4, 2], [2, 1], 'float32', ellipsis_mask=2)
- _test_stridedslice((3, 4, 5, 3), [1, 0], [4, 2], [2, 1], 'float32', ellipsis_mask=2)
- _test_stridedslice((3, 4, 5, 3), [1, 0, 1], [4, 2, 2], [2, 1, 1], 'float32', ellipsis_mask=2)
- _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 2], [2, 1, 1], 'float32', new_axis_mask=5)
- _test_stridedslice((3, 4, 3), [1, 1, 1], [4, 4, 1], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=4)
- _test_stridedslice((6, 4, 5), [1, 1, 1], [6, 3, 4], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=5)
- _test_stridedslice((3, 4, 3), [1, 1, 2], [4, 4, 3], [2, 1, 1], 'float32', ellipsis_mask=4, new_axis_mask=2)
- _test_stridedslice((3, 4, 3), [1, 1, 2], [4, 4, 3], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=3)
- _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 1], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=3)
- _test_stridedslice((3, 4, 3), [1, 1, 2], [4, 4, 3], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=2)
- _test_stridedslice((3,4), [1, 0], [4, 4], [1, 1], 'float32', shrink_axis_mask=2)
- _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], 'float32', shrink_axis_mask=2, new_axis_mask=2)
- _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], 'float32', shrink_axis_mask=1, new_axis_mask=2)
- _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], 'float32', shrink_axis_mask=2, new_axis_mask=1)
- _test_stridedslice((3, 4, 5, 4, 5, 6), [0, 0], [2, 3], [1, 1], 'float32', shrink_axis_mask=5, new_axis_mask=1)
- _test_stridedslice((3, 4, 5, 4, 5, 6), [0, 0, 1, 2, 1], [2, 3, 4, 5, 3], [1, 1, 2, 2, 1],
- 'float32', shrink_axis_mask=5, new_axis_mask=1, ellipsis_mask=2, begin_mask=8, end_mask=8)
- _test_stridedslice((3, 4, 5, 4, 5, 6), [0, 0, 1, 2, 1], [2, 3, 4, 5, 3], [1, 1, 2, 2, 1],
- 'float32', shrink_axis_mask=8, new_axis_mask=1, ellipsis_mask=2, begin_mask=5, end_mask=5)
- _test_stridedslice((3, 4, 5, 4, 5, 6), [0, 0, 1, 2, 1], [2, 3, 4, 5, 3], [1, 1, 2, 2, 1],
- 'float32', shrink_axis_mask=16, new_axis_mask=1, ellipsis_mask=2, begin_mask=5, end_mask=5)
- _test_stridedslice((3, 4, 5, 4, 5, 6), [1, 2, 0, -3], [4, 5, 3, 3], [2, 2, 1, 1],
- 'float32', shrink_axis_mask=8, new_axis_mask=1, ellipsis_mask=2, begin_mask=5,
- end_mask=8)
- _test_stridedslice((1), [0], [1], [1], 'float32', shrink_axis_mask=1)
-
-
-#######################################################################
-# Gather
-# ------
-
-def _test_gather(ip_shape, indice_shape, indice_value, axis, dtype):
- """ One iteration of a Gather """
-
- tf.reset_default_graph()
- in_data = tf.placeholder(dtype, ip_shape, name="in_data")
- indices = tf.placeholder("int32", indice_shape, name="indices")
- tf.gather(in_data, indices, axis=axis)
- np_data = np.random.uniform(size=ip_shape).astype(dtype)
-
- def _fill_indices(indice_value):
- indices = np.array(ip_shape, dtype=dtype)
- if isinstance(indice_value, int):
- indices = np.array([indice_value], dtype='int32')
- else:
- indices = np.asarray(indice_value, dtype='int32')
- return indices
- np_indices = _fill_indices(indice_value)
-
- compare_tf_with_tvm([np_data, np_indices], ['in_data:0', 'indices:0'], 'GatherV2:0')
-
-def test_forward_gather():
- '''test gather layer'''
- _test_gather((4,), (1,), 1, 0, 'int32')
- _test_gather((4,), (1,), 1, 0, 'float32')
- _test_gather((1,4), (1,), [0], 0, 'int32')
- _test_gather((4,), (1,2,2), [[[1,0],[0,1]]], 0, 'float32')
- _test_gather((2,2), (1,2,2), [[[1,0],[0,1]]], 0, 'int32')
- _test_gather((2,2), (1,2,2), [[[1,0],[0,1]]], 1, 'int32')
- _test_gather((2,2), (1,2,2), [[[1,0],[0,1]]], 0, 'float32')
- _test_gather((3,3,3), (1,1,2), [[[1,0]]], 0, 'int32')
- _test_gather((3,3,3), (1,1,2), [[[1,0]]], 2, 'int32')
- _test_gather((4,3,5,6), (1,4), [[2,1,0,0]], 0, 'float32')
-
-
-#######################################################################
-# Split
-# -----
-
-def _test_split(in_shape, axis, num_or_size_splits, dtype):
- np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-
- """ One iteration of a Split """
- tf.reset_default_graph()
- in_data = tf.placeholder(dtype, in_shape, name="in_data")
- num_split = len(num_or_size_splits) if isinstance(num_or_size_splits, list) else num_or_size_splits
- tf.split(in_data, num_or_size_splits, axis=axis)
-
- compare_tf_with_tvm([np_data], ['in_data:0'], ['split:{0}'.format(n) for n in range(num_split)])
-
- # and now test together with concat
- tf.reset_default_graph()
- in_data = tf.placeholder(dtype, in_shape, name="in_data")
- splitted = tf.split(in_data, num_or_size_splits, axis=axis)
- tf.concat(splitted, axis)
-
- compare_tf_with_tvm([np_data], 'in_data:0', 'concat:0')
-
-def test_forward_split():
- '''test split layer'''
- # rank 1
- _test_split((3,), 0, 1, 'float32')
- _test_split((3,), 0, 3, 'float32')
- _test_split((6,), 0, 3, 'float32')
- # rank 2
- _test_split((6, 2), 0, 3, 'float32')
- _test_split((2, 6), 1, 6, 'float32')
- # rank 3
- _test_split((6, 2, 4), 0, 2, 'int32')
- _test_split((2, 6, 4), 1, 3, 'float32')
- _test_split((2, 4, 6), 2, 1, 'float32')
- # rank 4
- _test_split((6, 1, 3, 5), 0, 3, 'float32')
- _test_split((1, 6, 3, 5), 1, 3, 'float32')
- _test_split((1, 3, 6, 5), 2, 3, 'float32')
- _test_split((1, 3, 5, 6), 3, 3, 'float32')
- # split along negative axis
- _test_split((6, 1, 3, 5), -4, 3, 'float32')
- _test_split((1, 6, 3, 5), -3, 3, 'float32')
- _test_split((1, 3, 6, 5), -2, 3, 'float32')
- _test_split((1, 3, 5, 6), -1, 3, 'float32')
- # size_splits list
- _test_split((6,), 0, [1, 2, 3], 'int32')
- _test_split((3, 6, 4), -2, [1, 4, 1], 'float32')
-
-
-#######################################################################
-# Unstack
-# -------
-
-def _test_unstack(ip_shape, axis, dtype):
- np_data = np.random.uniform(-5, 5, size=ip_shape).astype(dtype)
-
- tf.reset_default_graph()
- in_data = tf.placeholder(dtype, ip_shape, name="in_data")
- tf.unstack(in_data, axis=axis)
-
- compare_tf_with_tvm([np_data], ['in_data:0'], ['unstack:{0}'.format(n) for n in range(ip_shape[axis])])
-
- tf.reset_default_graph()
- in_data = tf.placeholder(dtype, ip_shape, name="in_data")
- tf.stack(tf.unstack(in_data, axis=axis), axis=axis)
-
- compare_tf_with_tvm([np_data], ['in_data:0'], 'stack:0')
-
-def test_forward_unstack():
- '''test unstack layer'''
- _test_unstack((6,), 0, 'int32')
- _test_unstack((2,6), 1, 'float64')
- # negative axis
- _test_unstack((1,4), -1, 'int32')
- _test_unstack((3,6,4), -2, 'float32')
-
-
-#######################################################################
-# Multi Input to graph
-# --------------------
-
-def test_forward_multi_input():
- with tf.Graph().as_default():
- in1 = tf.placeholder(tf.int32, shape=[3, 3], name='in1')
- in2 = tf.placeholder(tf.int32, shape=[3, 3], name='in2')
- in3 = tf.placeholder(tf.int32, shape=[3, 3], name='in3')
- in4 = tf.placeholder(tf.int32, shape=[3, 3], name='in4')
-
- out1 = tf.add(in1, in2, name='out1')
- out2 = tf.subtract(in3, in4, name='out2')
- out = tf.multiply(out1, out2, name='out')
- in_data = np.arange(9, dtype='int32').reshape([3, 3])
-
- compare_tf_with_tvm([in_data, in_data, in_data, in_data],
- ['in1:0', 'in2:0', 'in3:0', 'in4:0'], 'out:0')
-
-#######################################################################
-# Multi Output to Graph
-# ---------------------
-
-def test_forward_multi_output():
- with tf.Graph().as_default():
- in1 = tf.placeholder(tf.int32, shape=[3, 3], name='in1')
- in2 = tf.placeholder(tf.int32, shape=[3, 3], name='in2')
- in3 = tf.placeholder(tf.int32, shape=[3, 3], name='in3')
- in4 = tf.placeholder(tf.int32, shape=[3, 3], name='in4')
-
- out1 = tf.add(in1, in2, name='out1')
- out2 = tf.subtract(in3, in4, name='out2')
- in_data = np.arange(9, dtype='int32').reshape([3, 3])
- in_data = [in_data] * 4
- in_name = ['in1:0', 'in2:0', 'in3:0', 'in4:0']
- out_name = ['out1:0', 'out2:0']
- out_node = [out.strip(':0') for out in out_name]
- in_node = [inp.strip(':0') for inp in in_name]
-
- with tf.Session() as sess:
- final_graph_def = tf.graph_util.convert_variables_to_constants(
- sess, sess.graph.as_graph_def(add_shapes=True), out_node,)
- tf_output = run_tf_graph(sess, in_data, in_name, out_name)
- tvm_output = run_tvm_graph(final_graph_def, in_data, in_node, target='llvm',
- out_names=out_node, num_output=2)
- for i in range(len(tf_output)):
- tvm.testing.assert_allclose(tf_output[i], tvm_output[i], atol=1e-5, rtol=1e-5)
-
-#######################################################################
-# Resize Bilinear
-# ---------------
-
-def _test_resize_bilinear(in_shape, to_shape, align_corners):
- """ One iteration of resize bilinear """
-
- data = np.random.uniform(size=in_shape).astype('float32')
- shape_data = np.array(to_shape).astype('int32')
-
- with tf.Graph().as_default():
- in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
- shape_data = constant_op.constant(shape_data, shape=shape_data.shape, dtype=shape_data.dtype)
- tf.image.resize_bilinear(in_data, shape_data, align_corners=align_corners)
-
- compare_tf_with_tvm(data, 'Placeholder:0', 'ResizeBilinear:0')
-
-def test_forward_resize_bilinear():
- """ Resize Bilinear """
-
- _test_resize_bilinear((4, 16, 32, 32), [50, 50], False)
- _test_resize_bilinear((6, 32, 64, 64), [20, 20], True)
-
-
-#######################################################################
-# Crop to bounding box
-# --------------------
-
-def _test_crop(in_shape, off_h, off_w, tar_h, tar_w):
- """ Crop to bounding box """
- data = np.random.uniform(size=in_shape).astype('float32')
- with tf.Graph().as_default():
- in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
- tf.image.crop_to_bounding_box(in_data, off_h, off_w, tar_h, tar_w)
- compare_tf_with_tvm(data, 'Placeholder:0', 'crop_to_bounding_box/Slice:0')
-
-def test_forward_crop():
- """ Crop to bounding box """
- _test_crop((1, 224, 224, 3), 20, 20, 120, 120)
-
-
-#######################################################################
-# LSTM
-# ----
-
-def _test_lstm_cell(batch_size, num_hidden, num_layers, forget_bias, dtype):
- """ One iteration of a LSTM cell """
-
- tf.reset_default_graph()
- input_size = num_hidden
- input_data = np.full((batch_size, input_size), 1., dtype=dtype)
- in_state_c = np.full((num_layers, batch_size, num_hidden), 0.1, dtype=dtype)
- in_state_h = np.full((num_layers, batch_size, num_hidden), 0.1, dtype=dtype)
-
- def _get_tensorflow_output():
- with tf.Session() as sess:
- with variable_scope.variable_scope(
- "root", initializer=init_ops.constant_initializer(0.5)):
- m0 = array_ops.zeros([batch_size, num_hidden])
- m1 = array_ops.zeros([batch_size, num_hidden])
- x=tf.placeholder(shape=(batch_size, input_size), dtype=dtype)
- g, ((out_m0, out_m1)) = \
- tf.contrib.rnn.LSTMBlockCell(num_hidden,
- forget_bias=forget_bias)(x, ((m0, m1)))
- sess.run([variables.global_variables_initializer()])
- res = sess.run([g, out_m0, out_m1], {
- x.name: np.array([[1., 1.]]),
- m0.name: 0.1 * np.ones([batch_size, num_hidden]),
- m1.name: 0.1 * np.ones([batch_size, num_hidden]),
- })
- graph_def = sess.graph.as_graph_def(add_shapes=True)
- final_graph_def = graph_util.convert_variables_to_constants(
- sess,
- graph_def,
- ['root/lstm_cell/LSTMBlockCell'])
- return final_graph_def, res
-
- graph_def, tf_out = _get_tensorflow_output()
- tvm_output = run_tvm_graph(graph_def, [input_data, in_state_c, in_state_h],
- ['root/Placeholder', 'root/lstm_cell/LSTMBlockCell_c',
- 'root/lstm_cell/LSTMBlockCell_h'], num_output=2)
- assert isinstance(tvm_output, list)
-
- out = tvm_output[0]
- out_state = tvm_output[1]
- out_state_tup = np.split(out_state, indices_or_sections=2, axis=1)
- out_state_c = np.reshape(out_state_tup[0], (batch_size, num_hidden))
- out_state_h = np.reshape(out_state_tup[1], (batch_size, num_hidden))
- tvm_out = [out, out_state_c, out_state_h]
- tvm.testing.assert_allclose(tf_out[0], tvm_out[0], rtol=1e-3, atol=1e-3)
-
-def test_forward_lstm():
- '''test LSTM block cell'''
- _test_lstm_cell(1, 2, 1, 0.0, 'float32')
-
-
-
-#######################################################################
-# Pack
-# ---
-def _test_pack(axis, shape, **kwargs):
-
- a = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
- b = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
-
- with tf.Graph().as_default():
- tf_a = array_ops.placeholder(shape=shape, dtype='float32', name='pl_a')
- tf_b = array_ops.placeholder(shape=shape, dtype='float32', name='pl_b')
- tf_c = tf.stack([tf_a,tf_b], axis=axis, **kwargs)
- assert tf_c.op.op_def.name == 'Pack', "tf.stack() is expected to produce 'Pack' operation"
-
- compare_tf_with_tvm([a,b], ['pl_a:0','pl_b:0'], 'stack:0')
-
-def test_forward_pack():
- for axis in range(-3,3):
- _test_pack(axis, [3,2,1])
- for axis in range(-1,1):
- _test_pack(axis, [3])
- _test_pack(0, [])
-
-#######################################################################
-# Pad
-# ---
-def _test_pad(input_shape, paddings, mode, **kwargs):
- """ One iteration of pad operation with given shape"""
-
- x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape)
-
- with tf.Graph().as_default():
- in_data = array_ops.placeholder(shape=input_shape, dtype='float32')
- pad_values = constant_op.constant(paddings)
- pad = tf.pad(in_data, paddings=pad_values, mode=mode, **kwargs)
-
- if mode == 'CONSTANT':
- if 'constant_values' in kwargs:
- out_name = 'PadV2:0'
- else:
- out_name = 'Pad:0'
-
- compare_tf_with_tvm(x, 'Placeholder:0', out_name)
-
-def test_forward_pad():
- """ Pad """
- _test_pad((2, 3), [[1,1], [2,2]], mode="CONSTANT")
- _test_pad((2, 3), [[1,1], [2,2]], mode="CONSTANT", constant_values=1.0)
-
-#######################################################################
-# Logical operators
-# --------------------
-def test_logical_and():
- with tf.Graph().as_default():
- in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in1')
- in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in2')
- out = tf.logical_and(in1, in2, name='out')
- in_data1 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
- in_data2 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
- compare_tf_with_tvm([in_data1, in_data2], ['in1:0', 'in2:0'], 'out:0')
-
-def test_logical_or():
- with tf.Graph().as_default():
- in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in1')
- in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in2')
- out = tf.logical_or(in1, in2, name='out')
- in_data1 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
- in_data2 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
- compare_tf_with_tvm([in_data1, in_data2], ['in1:0', 'in2:0'], 'out:0')
-
-def test_logical_xor():
- with tf.Graph().as_default():
- in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in1')
- in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in2')
- out = tf.logical_xor(in1, in2, name='out')
- in_data1 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
- in_data2 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
- compare_tf_with_tvm([in_data1, in_data2], ['in1:0', 'in2:0'], 'out:0')
-
-def test_logical_not():
- with tf.Graph().as_default():
- in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in1')
- out = tf.logical_not(in1, name='out')
- in_data1 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
- compare_tf_with_tvm(in_data1, 'in1:0', 'out:0')
-
-def test_forward_logical():
- test_logical_and()
- test_logical_or()
- test_logical_xor()
- test_logical_not()
-
-#######################################################################
-# Inception V3
-# ------------
-def test_forward_inception_v3():
- '''test inception V3 model'''
- with tf.Graph().as_default():
- graph_def = tf_testing.get_workload('InceptionV3/inception_v3_2016_08_28_frozen-with_shapes.pb')
- # Call the utility to import the graph definition into default graph.
- graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
- data = np.random.uniform(size=(1, 299, 299, 3)).astype('float32')
-
- with tf.Session() as sess:
- tf_output = run_tf_graph(sess, data, 'input:0', 'InceptionV3/Predictions/Reshape_1:0')
- tvm_output = run_tvm_graph(graph_def, data, 'input')
- tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# Inception V1
-# ------------
-def test_forward_inception_v1():
- '''test inception V1 model'''
- with tf.Graph().as_default():
- graph_def = tf_testing.get_workload("InceptionV1/classify_image_graph_def-with_shapes.pb")
- # Call the utility to import the graph definition into default graph.
- graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
- # Build an image from random data.
- from PIL import Image
- from tvm.contrib import util
-
- img_array = np.random.uniform(size=(1, 600, 600, 3)).astype("uint8")
- img = Image.frombuffer('RGB', (600, 600), img_array.tostring(), 'raw', 'RGB', 0, 1)
- temp = util.tempdir()
- img_path = temp.relpath("tf-test.jpg")
- img.save(img_path);
-
- import os.path
- if not tf.gfile.Exists(os.path.join(img_path)):
- tf.logging.fatal('File does not exist %s', image)
- data = tf.gfile.FastGFile(os.path.join(img_path), 'rb').read()
-
- temp.remove()
-
- # Extract tensorflow decoded image frame for tvm input
- with tf.Session() as sess:
- tvm_data = run_tf_graph(sess, data, 'DecodeJpeg/contents:0', 'DecodeJpeg:0')
-
- with tf.Session() as sess:
- tf_output = run_tf_graph(sess, data, 'DecodeJpeg/contents:0', 'softmax:0')
- tvm_output = run_tvm_graph(graph_def, tvm_data, 'DecodeJpeg/contents')
- tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# Mobilenet
-# ---------
-def test_forward_mobilenet():
- '''test mobilenet model'''
- # MobilenetV2
- with tf.Graph().as_default():
- graph_def = tf_testing.get_workload(
- "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz",
- "mobilenet_v2_1.4_224_frozen.pb")
- # Call the utility to import the graph definition into default graph.
- graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
- data = np.random.uniform(size=(1, 224, 224, 3)).astype('float32')
- out_node = 'MobilenetV2/Predictions/Reshape_1'
-
- with tf.Session() as sess:
- # Add shapes to the graph.
- graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
- tf_output = run_tf_graph(sess, data, 'input:0', out_node + ':0')
- tvm_output = run_tvm_graph(graph_def, data, 'input')
- tvm.testing.assert_allclose(np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# ResnetV2
-# --------
-def test_forward_resnetv2():
- '''test resnet model'''
- if is_gpu_available():
- with tf.Graph().as_default():
- graph_def = tf_testing.get_workload("ResnetV2/resnet-20180601_resnet_v2_imagenet-shapes.pb")
- # Call the utility to import the graph definition into default graph.
- graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
- data = np.random.uniform(size=(128, 224, 224, 3)).astype('float32')
- out_node = 'ArgMax'
-
- with tf.Session() as sess:
- tf_output = run_tf_graph(sess, data, 'input_tensor:0', out_node + ':0')
- for device in ["llvm", "cuda"]:
- ctx = tvm.context(device, 0)
- if not ctx.exist:
- print("Skip because %s is not enabled" % device)
- continue
- tvm_output = run_tvm_graph(graph_def, data, 'input_tensor', len(tf_output), target=device)
- tvm.testing.assert_allclose(np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# Placeholder
-# -----------
-def test_forward_placeholder():
- '''test a simple pb with Placeholder node in the end of GraphDef'''
- with tf.Graph().as_default():
- graph_def = tf_testing.get_workload("Custom/placeholder.pb")
-
- # Call the utility to import the graph definition into default graph.
- graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-
- data = np.random.uniform(size=(1, 224, 224, 3)).astype('float32')
- out_node = 'mul'
-
- with tf.Session() as sess:
- # Add shapes to the graph.
- graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
- tf_output = run_tf_graph(sess, data, 'Placeholder:0', out_node + ':0')
- tvm_output = run_tvm_graph(graph_def, data, 'Placeholder')
- print("tf_output is {}\ntvm_output is {}".format(tf_output, tvm_output))
- tvm.testing.assert_allclose(np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# PTB
-# ---
-dir(tf.contrib)
-def test_forward_ptb():
- '''test ptb model'''
- config = tf_testing.get_config()
- num_steps = config.num_steps
- num_hidden = config.hidden_size
- num_layers = config.num_layers
- batch_size = config.batch_size
- vocab_size = config.vocab_size
- out_sample_shape = (batch_size, vocab_size)
- out_state_shape = (num_layers, 2, batch_size, num_hidden)
- #Sample input
- inpt = "we have no useful information on"
- cnt_sample = 20
-
- def _pretty_print(items, is_char_model, id2word):
- if not is_char_model:
- return ' '.join([id2word[x] for x in items])
- else:
- return ''.join([id2word[x] for x in items]).replace('_', ' ')
-
- def _get_tvm_graph_module(graph_def):
- sym, params = nnvm.frontend.from_tensorflow(graph_def)
-
- #Cell inputs 'c and 'h' consist of all layers values
- shape_dict = {'Model/Placeholder': (batch_size, num_steps),
- 'Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_c':(num_layers, batch_size, num_hidden),
- 'Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_h':(num_layers, batch_size, num_hidden)}
- dtype_dict = {'Model/Placeholder': 'int32',
- 'Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_c':'float32',
- 'Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_h':'float32'}
- target = 'llvm'
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict,
- dtype=dtype_dict, params=params)
- from tvm.contrib import graph_runtime
- ctx = tvm.cpu(0)
- return params, graph_runtime.create(graph, lib, ctx)
-
- def _do_tvm_sample(model, data, in_states, params, num_samples):
- """Sampled from the model"""
- samples = []
- state = in_states
- sample = None
- def _get_sample(data, state):
- input_data = np.full((batch_size, num_steps), data, dtype="int32")
- in_state_tup = np.split(state, indices_or_sections=2, axis=1)
- in_state_c = np.reshape(in_state_tup[0], (num_layers, batch_size, num_hidden))
- in_state_h = np.reshape(in_state_tup[1], (num_layers, batch_size, num_hidden))
-
- model.set_input('Model/Placeholder', tvm.nd.array(input_data.astype("int32")))
- model.set_input('Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_c',
- tvm.nd.array(in_state_c.astype("float32")))
- model.set_input('Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_h',
- tvm.nd.array(in_state_h.astype("float32")))
- model.set_input(**params)
- model.run()
- tvm_output = model.get_output(0, tvm.nd.empty(out_sample_shape,
- "float32")).asnumpy()
- state_output = model.get_output(1, tvm.nd.empty(out_state_shape,
- "float32")).asnumpy()
- sample = tf_testing.pick_from_weight(tvm_output[0])
-
- return sample, state_output
-
- for x in data:
- sample, state = _get_sample(x, state)
-
- if sample is not None:
- samples.append(sample)
- else:
- samples.append(0)
-
- k = 1
- while k < num_samples:
- sample, state = _get_sample(samples[-1], state)
- samples.append(sample)
- k += 1
- return samples, state
-
- with tf.Graph().as_default():
- word_to_id, id_to_word, graph_def = tf_testing.get_workload_ptb()
- vocab_size = len(word_to_id)
- # Call the utility to import the graph definition into default graph.
- graph_def = tf_testing.ProcessGraphDefParam(graph_def)
- sess = tf.Session()
-
- #TVM graph module creation
- params, m = _get_tvm_graph_module(graph_def)
-
- # Create 10 predicted statments of 20 words
- cnt_stm = 0
- while cnt_stm < 10:
- cnt_stm += 1
- in_state = np.full((num_layers, 2, batch_size, num_hidden), 0, dtype="float32")
- seed_for_sample = inpt.split()
- tvm_samples, tvm_state = _do_tvm_sample(m, [word_to_id[word] \
- for word in seed_for_sample],
- in_state, params, cnt_sample)
- tvm_sample_str = _pretty_print(tvm_samples, False, id_to_word)
- tf_samples, tf_state = tf_testing.do_tf_sample(sess,
- [word_to_id[word] for word in seed_for_sample],
- in_state, cnt_sample)
- tf_sample_str = _pretty_print(tf_samples, False, id_to_word)
- inpt = tvm_sample_str
- tvm.testing.assert_allclose(tf_samples, tvm_samples, rtol=1e-5, atol=1e-5)
- assert(tvm_sample_str == tf_sample_str)
-
-#######################################################################
-# LRN (Local Response Normalization)
-# ----------------------------------
-
-def _test_lrn(ishape, size, axis, bias, alpha, beta):
- """ testing local response normalization """
- lrn_depth_radius = size / 2
-
- inp_array = np.random.uniform(size=ishape).astype(np.float32)
-
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype, name="lrn0_data")
- nn_ops.local_response_normalization(in1,
- name="lrn",
- depth_radius=lrn_depth_radius,
- bias=bias,
- alpha=alpha,
- beta=beta)
-
- compare_tf_with_tvm(inp_array, 'lrn0_data:0', 'lrn:0')
-
-def test_forward_lrn():
- _test_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)
-
-#######################################################################
-# l2_normalize
-# ------------
-
-def _test_l2_normalize(ishape, eps, axis):
- """ testing l2 normalize (uses max, sum, square, sqrt frontend operators)"""
-
- inp_array = np.random.uniform(size=ishape).astype(np.float32)
-
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- nn.l2_normalize(in1,
- axis=axis,
- epsilon=eps,
- name=None,
- dim=None)
-
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'l2_normalize:0')
-
-def test_forward_l2_normalize():
- _test_l2_normalize((1, 3, 20, 20), 0.001, (0,))
-
-#######################################################################
-# transpose
-# ---------
-def _test_forward_transpose(ishape, axes=None):
- input = np.random.uniform(size=ishape).astype(np.float32)
-
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=input.shape, dtype=input.dtype, name="transpose_data")
-
- if axes is None:
- tf.transpose(in1)
- else:
- tf.transpose(in1, perm=axes)
-
- compare_tf_with_tvm(input, 'transpose_data:0', 'transpose:0')
-
-def test_forward_transpose():
- _test_forward_transpose((2, 3, 4))
- _test_forward_transpose((7, 8, 8, 10))
- _test_forward_transpose((2, 3, 4), (1, 2, 0))
- _test_forward_transpose((2, 3, 4), (0, 1, 2))
- _test_forward_transpose((2, 3, 4, 5), (3, 0, 1, 2))
-
-
-def test_forward_ceil():
- ishape = (1, 3, 10, 10)
- inp_array = np.random.uniform(size=ishape).astype(np.float32)
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- tf.ceil(in1)
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Ceil:0')
-
-def test_forward_floor():
- ishape = (1, 3, 10, 10)
- inp_array = np.random.uniform(size=ishape).astype(np.float32)
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- tf.floor(in1)
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Floor:0')
-
-def test_forward_relu():
- ishape = (1, 3, 10, 10)
- inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- tf.nn.relu(in1)
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Relu:0')
-
-def test_forward_leaky_relu():
- ishape = (1, 3, 10, 10)
- inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- tf.nn.leaky_relu(in1, alpha=0.4)
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'LeakyRelu:0')
-
-def test_forward_elu():
- ishape = (1, 3, 10, 10)
- inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- tf.nn.elu(in1)
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Elu:0')
-
-def test_forward_selu():
- ishape = (1, 3, 10, 10)
- inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- tf.nn.selu(in1)
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Selu:0')
-
-def test_forward_tanh():
- ishape = (1, 3, 10, 10)
- inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- tf.nn.tanh(in1)
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Tanh:0')
-
-#######################################################################
-# Mean
-# ----
-def test_forward_mean():
- def check_mean(ishape, **kwargs):
- inp_array = np.random.uniform(size=ishape).astype(np.float32)
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
- tf.keras.backend.mean(in1, **kwargs)
- compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Mean:0', no_gpu=True)
-
- check_mean((10, 8, 16, 32))
- check_mean((10, 8, 16, 32), axis=(2,3))
- check_mean((10, 8, 16, 32), axis=(1,2), keepdims=True)
-
-#######################################################################
-# Relational operators
-# --------------------
-def _test_forward_rel_op(data, func):
- with tf.Graph().as_default():
- in1 = tf.placeholder(shape=data[0].shape, dtype=data[0].dtype, name='in1')
- in2 = tf.placeholder(shape=data[1].shape, dtype=data[1].dtype, name='in2')
- op = func(in1, in2, name='op')
- out = tf.cast(op, tf.int32, name='out1')
- compare_tf_with_tvm([data[0], data[1]], ['in1:0', 'in2:0'], 'out1:0')
-
-def test_forward_rel_ops():
- t1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
- t2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
- _test_forward_rel_op([t1, t2], math_ops.less)
- _test_forward_rel_op([t1, t2], math_ops.greater)
- _test_forward_rel_op([t1, t2], math_ops.less_equal)
- _test_forward_rel_op([t1, t2], math_ops.greater_equal)
- _test_forward_rel_op([t1, t2], math_ops.equal)
- _test_forward_rel_op([t1, t2], math_ops.not_equal)
-
-
-#######################################################################
-# Main
-# ----
-if __name__ == '__main__':
- # Transforms
- test_forward_transpose()
- test_forward_reshape()
- test_forward_squeeze()
- test_forward_pack()
- test_forward_resize_bilinear()
- test_forward_crop()
- test_forward_pad()
- test_forward_gather()
- test_forward_stridedslice()
- test_forward_split()
- test_forward_unstack()
-
- # Activations
- test_forward_sigmoid()
- test_forward_relu()
- test_forward_leaky_relu()
- test_forward_elu()
- test_forward_selu()
- test_forward_tanh()
-
- # Reductions
- test_forward_argminmax()
- test_forward_reduce()
- test_forward_mean()
-
- # NN
- test_forward_convolution()
- test_forward_pooling()
- if tf.__version__ == '1.4.1':
- _test_forward_concat_v2()
- test_forward_lrn()
- test_forward_l2_normalize()
-
- # General
- test_forward_multi_input()
- test_forward_multi_output()
- test_forward_variable()
-
- # End to End
- test_forward_inception_v3()
- test_forward_inception_v1()
- test_forward_mobilenet()
- test_forward_resnetv2()
- test_forward_placeholder()
- test_forward_ptb()
-
- # RNN
- test_forward_lstm()
-
- # Elementwise
- test_forward_ceil()
- test_forward_floor()
-
- # Relational ops
- test_forward_rel_ops()
- test_forward_logical()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm
-import nnvm.symbol as sym
-import nnvm.graph as graph
-from nnvm.compiler import graph_attr
-
-def correct_layout(g, layout=None):
- if isinstance(g, nnvm.symbol.Symbol):
- g = graph.create(g)
- if layout:
- graph_attr.set_layout_inputs(g, layout)
- g = g.apply("CorrectLayout")
- ldict = {}
- vlayout = g.json_attr("layout")
- entry_ptr = g.index.entry_ptr
- for i, n in enumerate(g.index.nodes):
- begin, end = entry_ptr[i], entry_ptr[i + 1]
- ldict[n["name"]] = vlayout[begin:end]
- return g, ldict
-
-
-# Level 1
-def test_dense():
- x = sym.Variable("data", shape=(10, 20))
- y = sym.dense(x, units=30, name="fc")
- g, ldict = correct_layout(y, "HW")
- assert(ldict["data"][0] == "HW")
- assert(ldict["fc"][0] == "HW")
- assert(ldict["fc_bias"][0] == "__undef__")
- # second pass will insert layout transform
- _, ldict = correct_layout(g, "HW16w")
- assert(ldict["data"][0] == "HW16w")
- assert(ldict["data_HW"][0] == "HW")
- assert(ldict["fc"][0] == "HW")
- assert(ldict["fc_bias"][0] == "__undef__")
-
-
-def test_matmul():
- a = sym.Variable("a", shape=(10, 20))
- b = sym.Variable("b", shape=(20, 30))
- c = sym.matmul(a, b, name="matmul")
- g, ldict = correct_layout(c, {"a" : "HW", "b" : "WC"})
- assert(ldict["a"][0] == "HW")
- assert(ldict["b"][0] == "WC")
- assert(ldict["matmul"][0] == "HC")
- # second pass will insert layout transform
- _, ldict = correct_layout(g, {"a" : "HW16w", "b" : "WC16c"})
- assert(ldict["a"][0] == "HW16w")
- assert(ldict["a_HW"][0] == "HW")
- assert(ldict["b"][0] == "WC16c")
- assert(ldict["b_WC"][0] == "WC")
- assert(ldict["matmul"][0] == "HC")
- a = sym.Variable("a", shape=(20, 10))
- c = sym.matmul(a, b, name="matmul", transpose_a=True)
- g, ldict = correct_layout(c, {"a" : "HW", "b" : "HC"})
- assert(ldict["a"][0] == "HW")
- assert(ldict["b"][0] == "HC")
- assert(ldict["matmul"][0] == "WC")
- b = sym.Variable("b", shape=(30, 20))
- c = sym.matmul(a, b, name="matmul", transpose_b=True)
- g, ldict = correct_layout(c, {"a" : "HW", "b" : "CW"})
- assert(ldict["a"][0] == "HW")
- assert(ldict["b"][0] == "CW")
- assert(ldict["matmul"][0] == "HC")
- a = sym.Variable("a", shape=(20, 10))
- b = sym.Variable("b", shape=(30, 20))
- c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True)
- g, ldict = correct_layout(c, {"a" : "HW", "b" : "CH"})
- assert(ldict["a"][0] == "HW")
- assert(ldict["b"][0] == "CH")
- assert(ldict["matmul"][0] == "WC")
-
-
-def test_concatenate():
- x1 = sym.Variable("x", shape=(10, 20))
- x2 = sym.Variable("y", shape=(10, 30))
- z = sym.concatenate(x1, x2, name="concat")
- g, ldict = correct_layout(z, {"x": "HW", "y": "HW"})
- assert(ldict["x"][0] == "HW")
- assert(ldict["y"][0] == "HW")
- assert(ldict["concat"][0] == "HW")
- # second pass will insert layout transform
- _, ldict = correct_layout(g, {"x": "HW16w", "y": "HW16w"})
- assert(ldict["x"][0] == "HW16w")
- assert(ldict["y"][0] == "HW16w")
- assert(ldict["concat"][0] == "HW16w")
-
- x1 = sym.Variable("x", shape=(10, 20, 60))
- x2 = sym.Variable("y", shape=(10, 20, 40))
- z = sym.concatenate(x1, x2, axis=2, name="concat")
- g, ldict = correct_layout(z, {"x": "H20wW", "y": "H20wW"})
- assert(ldict["x"][0] == "H20wW")
- assert(ldict["y"][0] == "H20wW")
- assert(ldict["concat"][0] == "H20wW")
- # second pass will insert layout transform
- _, ldict = correct_layout(g, {"x": "HW", "y": "HW"})
- assert(ldict["x_H20wW"][0] == "H20wW")
- assert(ldict["x_H20wW"][0] == "H20wW")
- assert(ldict["concat"][0] == "H20wW")
-
-
-def test_expand_dims():
- x = sym.Variable("x", shape=(10, 20))
- y = sym.expand_dims(x, axis=1, name="y")
- g, ldict = correct_layout(y, "HW")
- assert(ldict["x"][0] == "HW")
- assert(ldict["y"][0] == "__undef__")
- # second pass will insert layout transform
- _, ldict = correct_layout(g, "HW16w")
- assert(ldict["x"][0] == "HW16w")
- assert(ldict["x_HW"][0] == "HW")
- assert(ldict["y"][0] == "__undef__")
-
-
-def test_split():
- x = sym.Variable("x", shape=(10, 20))
- y = sym.split(x, indices_or_sections=[11], name="y")
- g, ldict = correct_layout(y, "HW")
- assert(ldict["x"][0] == "HW")
- assert(ldict["y"][0] == "__undef__")
- # second pass will insert layout transform
- _, ldict = correct_layout(g, "HW16w")
- assert(ldict["x"][0] == "HW16w")
- assert(ldict["x_HW"][0] == "HW")
- assert(ldict["y"][0] == "__undef__")
-
-
-def test_batchnorm():
- x = sym.Variable("data", shape=(10, 20, 30, 40))
- y = sym.batch_norm(x, axis=1, epsilon=2e-5, name="bn")
- g, ldict = correct_layout(y, "NCHW")
- assert(ldict["data"][0] == "NCHW")
- assert(ldict["bn"][0] == "NCHW")
- assert(ldict["bn"][1] == "C")
- assert(ldict["bn"][2] == "C")
- assert(ldict["bn_beta"][0] == "C")
- assert(ldict["bn_gamma"][0] == "C")
- assert(ldict["bn_moving_mean"][0] == "C")
- assert(ldict["bn_moving_var"][0] == "C")
- # batch_norm can deal with sub-dim of C at the last dim.
- g, ldict = correct_layout(g, "NCHW16c")
- assert(ldict["data"][0] == "NCHW16c")
- assert(ldict["bn"][0] == "NCHW16c")
- assert(ldict["bn"][1] == "C16c")
- assert(ldict["bn"][2] == "C16c")
- assert(ldict["bn_beta"][0] == "C")
- assert(ldict["bn_beta_C16c"][0] == "C16c")
- assert(ldict["bn_gamma"][0] == "C")
- assert(ldict["bn_gamma_C16c"][0] == "C16c")
- assert(ldict["bn_moving_mean"][0] == "C")
- assert(ldict["bn_moving_mean_C16c"][0] == "C16c")
- assert(ldict["bn_moving_var"][0] == "C")
- assert(ldict["bn_moving_var_C16c"][0] == "C16c")
- # but for other layout, it does a layout transform for data
- g, ldict = correct_layout(g, "NCH16cW")
- assert(ldict["data"][0] == "NCH16cW")
- assert(ldict["data_NCHW16c"][0] == "NCHW16c")
- assert(ldict["bn"][0] == "NCHW16c")
- assert(ldict["bn"][1] == "C16c")
- assert(ldict["bn"][2] == "C16c")
- assert(ldict["bn_beta"][0] == "C")
- assert(ldict["bn_beta_C16c"][0] == "C16c")
- assert(ldict["bn_gamma"][0] == "C")
- assert(ldict["bn_gamma_C16c"][0] == "C16c")
- assert(ldict["bn_moving_mean"][0] == "C")
- assert(ldict["bn_moving_mean_C16c"][0] == "C16c")
- assert(ldict["bn_moving_var"][0] == "C")
- assert(ldict["bn_moving_var_C16c"][0] == "C16c")
-
-
-def test_flatten():
- x = sym.Variable("x", shape=(10, 20, 10, 10))
- y = sym.flatten(x, name="y")
- g, ldict = correct_layout(y, "NCHW")
- assert(ldict["x"][0] == "NCHW")
- assert(ldict["y"][0] == "__undef__")
- # second pass will insert layout transform
- _, ldict = correct_layout(g, "NCHW16c")
- assert(ldict["x"][0] == "NCHW16c")
- assert(ldict["x_NCHW"][0] == "NCHW")
- assert(ldict["y"][0] == "__undef__")
-
-
-def test_softmax():
- x = sym.Variable("x", shape=(10, 20, 10, 10))
- y = sym.softmax(x, name="y")
- g, ldict = correct_layout(y, "NCHW")
- assert(ldict["x"][0] == "NCHW")
- assert(ldict["y"][0] == "NCHW")
- # second pass will insert layout transform
- _, ldict = correct_layout(g, "NCHW16c")
- assert(ldict["x"][0] == "NCHW16c")
- assert(ldict["x_NCHW"][0] == "NCHW")
- assert(ldict["y"][0] == "NCHW")
-
-
-# Level 2
-def test_conv2d():
- x = sym.Variable("data", shape=(1, 32, 512, 512))
- y = sym.conv2d(x, name="conv", channels=12,
- kernel_size=(3,3), padding=(1,1), layout="NCHW")
- _, ldict = correct_layout(y)
- assert(ldict["data"][0] == "NCHW")
- assert(ldict["conv_weight"][0] == "OIHW")
- assert(ldict["conv_bias"][0] == "C")
- assert(ldict["conv"][0] == "NCHW")
- y = sym.conv2d(x, name="conv", channels=12,
- kernel_size=(3,3), padding=(1,1), layout="NCHW16c",
- kernel_layout="OIHW16i16o", out_layout="NCHW8c")
- _, ldict = correct_layout(y)
- assert(ldict["data"][0] == "NCHW16c")
- assert(ldict["conv_weight"][0] == "OIHW16i16o")
- assert(ldict["conv_bias"][0] == "C8c")
- assert(ldict["conv"][0] == "NCHW8c")
- y = sym.conv2d(x, name="conv", channels=12,
- kernel_size=(3,3), padding=(1,1), layout="N16cHWC")
- _, ldict = correct_layout(y)
- assert(ldict["data"][0] == "N16cHWC")
- assert(ldict["conv_weight"][0] == "OIHW")
- assert(ldict["conv_bias"][0] == "16cC")
- assert(ldict["conv"][0] == "N16cHWC")
-
-
-def test_conv2d_transpose():
- x = sym.Variable("data", shape=(1, 32, 512, 512))
- y = sym.conv2d_transpose(x, name="conv", channels=12,
- kernel_size=(3,3), padding=(1,1), layout="NCHW")
- _, ldict = correct_layout(y)
- assert(ldict["data"][0] == "NCHW")
- assert(ldict["conv_weight"][0] == "OIHW")
- assert(ldict["conv_bias"][0] == "C")
- assert(ldict["conv"][0] == "NCHW")
-
-
-def test_max_pool2d():
- x = sym.Variable("data", shape=(1, 32, 512, 512))
- y = sym.max_pool2d(x, name="pool", pool_size=(3,3),
- padding=(1,1), layout="NCHW")
- g, ldict = correct_layout(y)
- assert(ldict["data"][0] == "NCHW")
- assert(ldict["pool"][0] == "NCHW")
- # if index of H and W remain the same,
- # pool2d does not convert the layout.
- g, ldict = correct_layout(g, "NCHW16c")
- assert(ldict["data"][0] == "NCHW16c")
- assert(ldict["pool"][0] == "NCHW16c")
- # for other layout it requires a layout transform.
- g, ldict = correct_layout(g, "NHWC")
- assert(ldict["data"][0] == "NHWC")
- assert(ldict["data_NCHW"][0] == "NCHW")
- assert(ldict["pool"][0] == "NCHW")
-
-
-def test_global_pool2d():
- x = sym.Variable("data", shape=(1, 32, 512, 512))
- y = sym.global_max_pool2d(x, name="pool", layout="NCHW")
- g, ldict = correct_layout(y)
- assert(ldict["data"][0] == "NCHW")
- assert(ldict["pool"][0] == "NCHW")
- # if index of H and W remain the same,
- # pool2d does not convert the layout.
- g, ldict = correct_layout(g, "NCHW16c")
- assert(ldict["data"][0] == "NCHW16c")
- assert(ldict["pool"][0] == "NCHW16c")
- # for other layout it requires a layout transform.
- g, ldict = correct_layout(g, "NHWC")
- assert(ldict["data"][0] == "NHWC")
- assert(ldict["data_NCHW"][0] == "NCHW")
- assert(ldict["pool"][0] == "NCHW")
-
-
-# Level 3
-def test_reshape():
- x = sym.Variable("x", shape=(4,))
- y = sym.reshape(x, shape=(2,2), name="y")
- g, ldict = correct_layout(y, "C")
- assert(ldict["x"][0] == "C")
- assert(ldict["y"][0] == "__undef__")
- # second pass will insert layout transform
- g, ldict = correct_layout(g, "C16c")
- assert(ldict["x"][0] == "C16c")
- assert(ldict["x_C"][0] == "C")
- assert(ldict["y"][0] == "__undef__")
-
-
-def test_transpose():
- x = sym.Variable("x", shape=(1, 32, 512, 512))
- y = sym.transpose(x, name="y", axes=(0, 2, 3, 1))
- g, ldict = correct_layout(y, "NCHW")
- assert(ldict["x"][0] == "NCHW")
- assert(ldict["y"][0] == "NHWC")
- # second pass will insert layout transform
- g, ldict = correct_layout(g, "NCHW16c")
- assert(ldict["x"][0] == "NCHW16c")
- assert(ldict["x_NCHW"][0] == "NCHW")
- assert(ldict["y"][0] == "NHWC")
-
-
-def test_broadcast_to():
- x = sym.Variable("x", shape=(4, 1))
- y = sym.broadcast_to(x, shape=(0, 4), name="y")
- g, ldict = correct_layout(y, "HW")
- assert(ldict["x"][0] == "HW")
- assert(ldict["y"][0] == "__undef__")
- # second pass will insert layout transform
- g, ldict = correct_layout(g, "HW16h")
- assert(ldict["x"][0] == "HW16h")
- assert(ldict["x_HW"][0] == "HW")
- assert(ldict["y"][0] == "__undef__")
-
-
-def test_broadcast_binary():
- x = sym.Variable("x", shape=(1, 16, 512, 512))
- y = sym.Variable("y", shape=(16, 512, 512))
- z = sym.broadcast_add(x, y, name="z")
- g, ldict = correct_layout(z, {"x": "NCHW", "y": "CHW"})
- assert(ldict["x"][0] == "NCHW")
- assert(ldict["y"][0] == "CHW")
- assert(ldict["z"][0] == "NCHW")
- # prior to keep the left layout if they do not match.
- g, ldict = correct_layout(g, {"x": "NCHW16c", "y": "CHW"})
- assert(ldict["x"][0] == "NCHW16c")
- assert(ldict["y"][0] == "CHW")
- assert(ldict["y_CHW16c"][0] == "CHW16c")
- assert(ldict["z"][0] == "NCHW16c")
- # broadcast_add(HCW16c, N16nCH16cW)
- g, ldict = correct_layout(z, {"x": "HCW16c", "y": "N16nCH16cW"})
- assert(ldict["x"][0] == "HCW16c")
- assert(ldict["y"][0] == "N16nCH16cW")
- assert(ldict["x_CH16cW"][0] == "CH16cW")
- assert(ldict["z"][0] == "N16nCH16cW")
-
-
-def test_reduce():
- x = sym.Variable("x", shape=(1, 16, 512, 512))
- y = sym.sum(x, name="y", axis=1)
- g, ldict = correct_layout(y, "NCHW")
- assert(ldict["x"][0] == "NCHW")
- assert(ldict["y"][0] == "__undef__")
- # second pass will insert layout transform
- g, ldict = correct_layout(g, "NCHW16c")
- assert(ldict["x"][0] == "NCHW16c")
- assert(ldict["x_NCHW"][0] == "NCHW")
- assert(ldict["y"][0] == "__undef__")
-
-
-if __name__ == "__main__":
- test_dense()
- test_matmul()
- test_concatenate()
- test_expand_dims()
- test_split()
- test_batchnorm()
- test_flatten()
- test_softmax()
- test_conv2d()
- test_conv2d_transpose()
- test_max_pool2d()
- test_global_pool2d()
- test_reshape()
- test_transpose()
- test_broadcast_to()
- test_broadcast_binary()
- test_reduce()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import json
-import nnvm.symbol as sym
-import nnvm.graph as graph
-import nnvm.compiler.graph_util as graph_util
-
-def test_json_pass():
- x = sym.Variable('x')
- y = sym.dense(data=x, name='conv', units=30)
- g = graph.create(y)
- ret = g.apply('SaveJSON')
- ret._set_json_attr('json', ret.json_attr('json'))
- g2 = ret.apply('LoadJSON')
- assert g2.apply('SaveJSON').json_attr('json') == ret.json_attr('json')
- json = g.json()
- g2 = graph.load_json(json)
- assert json == g2.json()
-
-
-def test_json_pass_with_attr():
- x = sym.Variable('x')
- y = sym.dense(data=x, name='fc', units=30)
- g = graph.create(y)
- g._set_json_attr('version', '0.1.0')
- ret = g.apply('SaveJSON')
- json_str = ret.json_attr('json')
- ret._set_json_attr('json', json_str)
- g2 = ret.apply('LoadJSON')
- assert g2.json_attr('version') == '0.1.0'
-
-
-def test_graph_json_attr():
- x = sym.Variable('x')
- y = sym.dense(data=x, name='fc', units=30)
- g = graph.create(y)
- g._set_json_attr('ilist', [1,2,3], 'list_int')
- assert g.json_attr('ilist') == [1,2,3]
-
-def test_list_args():
- x = sym.Variable('x')
- z = sym.Variable('z')
- y = sym.dense(data=x, name='fc', units=30)
- y = sym.elemwise_add(y, z, name='add1')
-
-def test_infer_shape():
- x = sym.Variable('x', shape=(2, 4, 2))
- y = sym.elemwise_add(x, x, name='add1')
- y = sym.flatten(y, name="flatten")
- g = graph.create(y)
- g._set_json_attr("shape_attr_key", "shape")
- g = g.apply('InferShape')
- jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
- jnodes = jgraph['nodes']
- jnode_row_ptr = jgraph['node_row_ptr']
- nindex = {n['name']: i for i, n in enumerate(jnodes)}
- assert g.json_attr('shape')[jnode_row_ptr[nindex["flatten"]]] == [2, 8]
- assert g.json_attr('shape')[jnode_row_ptr[nindex["add1"]]] == [2, 4, 2]
-
-def test_infer_shape_known_partial():
- x = sym.Variable('x')
- y = sym.elemwise_add(x, x, name='add1')
- y = sym.flatten(y, name="flatten1")
- g = graph.create(y)
- jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
- shape = [[2, 4, 2], [] , []]
- g._set_json_attr("shape", shape, 'list_shape')
- g = g.apply("InferShape")
- jnodes = jgraph['nodes']
- jnode_row_ptr = jgraph['node_row_ptr']
- nindex = {n['name']: i for i, n in enumerate(jnodes)}
- assert g.json_attr('shape')[jnode_row_ptr[nindex["flatten1"]]] == [2, 8]
- assert g.json_attr('shape')[jnode_row_ptr[nindex["add1"]]] == [2, 4, 2]
-
-def test_infer_type():
- x = sym.Variable('x', dtype=0)
- y = sym.elemwise_add(x, x, name='add1')
- y = sym.cast(y, dtype="float64", name="cast1")
- g = graph.create(y)
- g._set_json_attr("dtype_attr_key", "dtype")
- g = g.apply('InferType')
- jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
- jnodes = jgraph['nodes']
- jnode_row_ptr = jgraph['node_row_ptr']
- nindex = {n['name']: i for i, n in enumerate(jnodes)}
- assert g.json_attr('dtype')[jnode_row_ptr[nindex["cast1"]]] == 1
- assert g.json_attr('dtype')[jnode_row_ptr[nindex["add1"]]] == 0
-
-def test_plan_memory():
- x = sym.Variable('x', shape=(4, 2))
- x2 = sym.elemwise_add(x, x, name='addk')
- y = sym.flatten(x2, name="reshapek")
- y = sym.elemwise_add(y, x2, name="add2")
- y = sym.elemwise_add(y, y)
- g = graph.create(y)
- g._set_json_attr("shape_attr_key", "shape")
- g = g.apply(["InferShape", "InferType", "PlanMemory"])
- jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
- jnodes = jgraph['nodes']
- jnode_row_ptr = jgraph['node_row_ptr']
- storage_id = g.json_attr('storage_id')
- nindex = {n['name']: i for i, n in enumerate(jnodes)}
- assert (storage_id[jnode_row_ptr[nindex["addk"]]] !=
- storage_id[jnode_row_ptr[nindex["reshapek"]]])
- assert (storage_id[jnode_row_ptr[nindex["add2"]]] ==
- storage_id[jnode_row_ptr[nindex["reshapek"]]])
-
-def test_print_graph_ir():
- x = sym.Variable("x", shape=(1, 1, 10, 20))
- y = sym.conv2d(x + 1, name="y", channels=10, kernel_size=(3,3))
- g = graph.create(y)
- g = g.apply("InferShape")
- ir1 = g.ir()
- ir2 = g.ir(join_entry_attrs=["shape"])
- assert("y_bias" in ir1)
- assert("shape=" in ir2)
-
-def test_gradient():
- x = sym.Variable("x")
- y = sym.Variable("y")
- z1 = sym.elemwise_add(x, sym.sqrt(y))
- z2 = sym.log(x)
- gradient = graph_util.gradients([z1, z2], [x, y])
- assert len(gradient) == 2
-
- g1 = sym.Variable("g1")
- g2 = sym.Variable("g2")
- grad_ys = [g1, g2]
- gradient = graph_util.gradients(sym.Group([z1, z2]),
- sym.Group([x, y]), grad_ys=grad_ys)
- g_graph = graph.create(sym.Group(gradient)).ir()
- assert len(gradient) == 2
- assert "g1" in g_graph
- assert "g2" in g_graph
-
-if __name__ == "__main__":
- test_print_graph_ir()
- test_json_pass_with_attr()
- test_graph_json_attr()
- test_json_pass()
- test_infer_shape()
- test_infer_shape_known_partial()
- test_infer_type()
- test_plan_memory()
- test_list_args()
- test_gradient()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-from nnvm.compiler import graph_util
-
-def test_cnn_gradients():
- # input data
- h = 128
- w = 128
- data_shape = (1000, 3, h, w)
- data = sym.Variable('data', shape=data_shape, dtype=0)
-
- # conv2d
- num_channels = 64
- kernel_size = 32
- conv_w_shape = (num_channels, 3, kernel_size, kernel_size)
- conv_b_shape = (num_channels,)
- conv_w = sym.Variable('conv_w', shape=conv_w_shape)
- conv_b = sym.Variable('conv_b', shape=conv_b_shape)
- conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b,
- channels=num_channels, kernel_size=(kernel_size, kernel_size),
- name='conv1')
- # relu1
- relu1 = sym.relu(data=conv1, name='relu1')
- # max pooling
- max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1')
- # flatten
- flatten1 = sym.flatten(data=max_pooling1)
- # shape after flatten
- flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels
- # dense1
- dense1_hidden_units = 100
- dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units)
- # relu2
- relu2 = sym.relu(data=dense1, name='relu2')
- # dense2
- dense2_hidden_units = 10
- dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units)
- # softmax
- mlp = sym.softmax(data=dense2, name='softmax')
- # fake non-sparse label
- label = sym.full_like(mlp, fill_value=1)
- # cross entropy loss
- ce_loss = sym.sum(
- sym.elemwise_mul(sym.log_softmax(dense2), label),
- axis=1,
- keepdims=True,
- name="ce_loss")
-
- # input variables:
- # print grad_g.symbol.list_input_names()
- # >> ['data', 'conv_w', 'conv_b',
- # 'dense1_weight', 'dense1_bias',
- # 'dense2_weight', 'dense2_bias']
-
- # output gradient variables:
- # print grad_g.symbol.list_output_names()
- # >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias',
- # 'dense1_grad_weight', 'dense1_grad_bias',
- # 'dense2_grad_weight', 'dense2_grad_bias']
- grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables())
-
- # infer shape
- in_shapes, out_shapes = graph_util.infer_shape(grad_g)
-
- # forward graph shape
- assert in_shapes == [list(data_shape), list(conv_w_shape), list(conv_b_shape),
- [dense1_hidden_units, flatten_out_shape], [dense1_hidden_units],
- [dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units]]
- # input grads shape should be equal with input shape
- assert in_shapes == out_shapes
-
- # output grads w.r.t input variables
- grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables())
-
- # gradients number should be equal with grad_input number
- assert len(grads) == len(ce_loss.list_input_variables())
-
- # infer type
- in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g)
- assert out_dtypes == ['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32']
-
-def test_multi_loss_graph_gradients():
- # input data
- shape1 = (1000, 100)
- data1 = sym.Variable('data1', shape=(1000, 100), dtype=0)
-
- # fake non-sparse label
- label = sym.full(fill_value=3)
-
- # square loss
- sub1 = sym.elemwise_sub(data1, label, name="sub1")
- square_loss = sym.sum(data=sub1**2, axis=1, name="square_loss")
-
- # fake loss1
- shape2 = (1000, )
- data2 = sym.Variable('data2', shape=shape2, dtype=0)
- loss1 = sym.sqrt(data2, name="loss1")
-
- # fake loss2
- loss2 = sym.relu(data1, name='loss2')
-
- # block loss1
- total_loss = sym.elemwise_sum(
- sym.block_grad(loss1),
- square_loss,
- num_args=2,
- name="total_loss")
-
- # grad_g.symbol.list_output_names()
- # >> ['loss1_grad_0_output', 'grad_sum_output']
- grad_g = graph_util.get_gradient_graph([total_loss, loss2], total_loss.list_input_variables())
- # infer shape
- in_shapes, out_shapes = graph_util.infer_shape(grad_g)
- assert out_shapes == [list(shape2), list(shape1)]
-
- # grad_data1 is elemwise_sum of grad_loss2, grad_square_loss
- grad_data1 = grad_g.symbol[1]
- assert grad_data1.list_attr()['num_args'] == '2'
-
- # block grad should return zero grad
- grad_data2 = grad_g.symbol[0]
- assert 'zeros_like' in grad_g.ir()
-
- # test reverse infer shape for label
- assert grad_g.apply('InferShape').json_attr('shape_num_unknown_nodes') == 0
-
- # infer type
- in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g)
- assert out_dtypes == ['float32', 'float32']
-
- # test reverse infer type for label
- assert grad_g.apply('InferType').json_attr('dtype_num_unknown_nodes') == 0
-
-
-if __name__ == "__main__":
- test_cnn_gradients()
- test_multi_loss_graph_gradients()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import json
-import nnvm.symbol as sym
-import nnvm.graph as graph
-
-def infer_shape(sym):
- g = graph.create(sym)
- g._set_json_attr("shape_attr_key", "shape")
- g = g.apply("InferShape")
- sdict = {}
- vshape = g.json_attr("shape")
- entry_ptr = g.index.entry_ptr
- for i, n in enumerate(g.index.nodes):
- begin, end = entry_ptr[i], entry_ptr[i + 1]
- sdict[n["name"]] = vshape[begin:end]
- return sdict
-
-# Level 1
-def test_dense():
- x = sym.Variable("x", shape=(10, 20))
- y = sym.dense(x, units=30, name="fc")
- sdict = infer_shape(y)
- assert(sdict["fc"][0] == [10, 30])
- assert(sdict["fc_bias"][0] == [30])
-
-
-def test_matmul():
- a = sym.Variable('a', shape=(10, 20))
- b = sym.Variable('b', shape=(20, 30))
- c = sym.matmul(a, b, name="matmul")
- sdict = infer_shape(c)
- assert(sdict["matmul"][0] == [10, 30])
- a = sym.Variable('a', shape=(20, 10))
- c = sym.matmul(a, b, name="matmul", transpose_a=True)
- sdict = infer_shape(c)
- assert(sdict["matmul"][0] == [10, 30])
- b = sym.Variable('b', shape=(30, 20))
- c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True)
- sdict = infer_shape(c)
- assert(sdict["matmul"][0] == [10, 30])
- a = sym.Variable('a', shape=(10, 20))
- c = sym.matmul(a, b, name="matmul", transpose_b=True)
- sdict = infer_shape(c)
- assert(sdict["matmul"][0] == [10, 30])
- a = sym.Variable('a', shape=(10, 20, 30))
- b = sym.Variable('b', shape=(30, 40, 50))
- c = sym.matmul(a, b, name="matmul")
- sdict = infer_shape(c)
- assert(sdict["matmul"][0] == [10, 20, 40, 50])
- a = sym.Variable('a', shape=(30, 20, 10))
- b = sym.Variable('b', shape=(50, 40, 30))
- c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True)
- sdict = infer_shape(c)
- assert(sdict["matmul"][0] == [10, 20, 40, 50])
-
-
-def test_concatenate():
- x1 = sym.Variable("x", shape=(10, 20))
- x2 = sym.Variable("y", shape=(10, 30))
- z = sym.concatenate(x1, x2, name="concat")
- sdict = infer_shape(z)
- assert(sdict["concat"][0] == [10, 50])
- z = sym.concatenate(x1, x1, axis=0, name="concat")
- sdict = infer_shape(z)
- assert(sdict["concat"][0] == [20, 20])
-
-
-def test_expand_dims():
- x = sym.Variable("x", shape=(10, 20))
- y = sym.expand_dims(x, axis=1, name="y")
- sdict = infer_shape(y)
- assert(sdict["y"][0] == [10, 1, 20])
- y = sym.expand_dims(x, axis=-1, name="y", num_newaxis=2)
- sdict = infer_shape(y)
- assert(sdict["y"][0] == [10, 20, 1, 1])
-
-
-def test_split():
- x1 = sym.Variable("x", shape=(10, 20))
- z = sym.split(x1, indices_or_sections=[11], name="y")
- sdict = infer_shape(z)
- assert(sdict["y"][0] == [10, 11])
- assert(sdict["y"][1] == [10, 9])
- z = sym.split(x1, indices_or_sections=2, name="y")
- sdict = infer_shape(z)
- assert(sdict["y"][0] == [10, 10])
- assert(sdict["y"][1] == [10, 10])
- z = sym.split(x1, indices_or_sections=[6], axis=-1, name="y")
- sdict = infer_shape(z)
- assert(sdict["y"][0] == [10, 6])
- assert(sdict["y"][1] == [10, 14])
-
-
-def test_batchnorm():
- x = sym.Variable("x", shape=(10, 20))
- y = sym.batch_norm(1 / x, name="bn")
- sdict = infer_shape(y)
- assert(sdict["bn_gamma"][0] == [20])
-
- x = sym.Variable("x", shape=(10, 20, 30, 40))
- y = sym.batch_norm(data=x, axis=0, epsilon=2e-5, name='bn')
- sdict = infer_shape(y)
- assert(sdict['bn_moving_var'][0] == [10])
-
- y = sym.batch_norm(data=x, axis=1, epsilon=2e-5, name='bn')
- sdict = infer_shape(y)
- assert(sdict['bn_gamma'][0] == [20])
-
- y = sym.batch_norm(data=x, axis=2, epsilon=2e-5, name='bn')
- sdict = infer_shape(y)
- assert(sdict['bn_beta'][0] == [30])
-
- y = sym.batch_norm(data=x, axis=3, epsilon=2e-5, name='bn')
- sdict = infer_shape(y)
- assert(sdict['bn_moving_mean'][0] == [40])
-
-def test_flatten():
- x = sym.Variable("x", shape=(10, 20, 10))
- y = sym.flatten(x) * 2
- y = sym.exp(y, name="y")
- sdict = infer_shape(y)
- assert(sdict["y"][0] == [10, 200])
-
-def test_squeeze():
- x = sym.Variable("x", shape=(1, 1, 1, 10))
- y = sym.squeeze(x, axis=(1,2), name='squeeze')
- sdict = infer_shape(y)
- assert(sdict['squeeze'][0] == [1, 10])
-
- x = sym.Variable("x", shape=(1, 3, 1))
- y = sym.squeeze(x, name='squeeze')
- sdict = infer_shape(y)
- assert(sdict['squeeze'][0] == [3])
-
- y = sym.squeeze(x, axis=(0), name='squeeze')
- sdict = infer_shape(y)
- assert(sdict['squeeze'][0] == [3, 1])
-
- y = sym.squeeze(x, axis=(0,2), name='squeeze')
- sdict = infer_shape(y)
- assert(sdict['squeeze'][0] == [3])
-
-# Level 2
-def test_conv2d():
- def check(in_shape, out_shape, **kwargs):
- x = sym.Variable("x", shape=in_shape)
- y = sym.conv2d(x, name="y", **kwargs)
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4, 10, 10, 12),
- (4, 12, 10, 12),
- channels=12,
- kernel_size=(3,3),
- padding=(1,1))
- check((4, 10, 12, 4),
- (4, 8, 8, 5),
- channels=5,
- kernel_size=(3, 5),
- layout="NHWC")
- check((4, 10, 12, 4),
- (4, 6, 8, 5),
- channels=5,
- dilation=(2, 2),
- kernel_size=(3, 3),
- layout="NHWC")
- check((4, 10, 12, 4),
- (4, 5, 6, 5),
- channels=5,
- strides=(2, 2),
- kernel_size=(3, 3),
- padding=(1, 1),
- layout="NHWC")
-
-
-def test_conv2d_packed():
- def check(in_shape,
- out_shape,
- kernel_shape,
- **kwargs):
- x = sym.Variable("x", shape=in_shape)
- y = sym.conv2d(x, name="y", **kwargs)
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
- assert(tuple(sdict["y_weight"][0]) == tuple(kernel_shape))
-
- check((4, 10, 10, 12, 1, 8),
- (4, 10, 10, 2, 1, 8),
- (2, 12, 3, 3, 8, 8),
- channels=8 * 2,
- kernel_size=(3,3),
- padding=(1,1),
- layout="NHWC1n8c",
- kernel_layout="OIHW8o8i")
-
-
-def test_conv2d_transpose():
- def check(in_shape, out_shape, **kwargs):
- x = sym.Variable("x", shape=in_shape)
- y = sym.conv2d_transpose(x, name="y", **kwargs)
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4, 10, 10, 12),
- (4, 15, 10, 12),
- channels=15,
- kernel_size=(3,3),
- padding=(1,1))
- check((4, 10, 10, 12),
- (4, 15, 10, 14),
- channels=15,
- kernel_size=(3, 5),
- padding=(1, 1))
- check((4, 10, 10, 12),
- (4, 15, 11, 15),
- channels=15,
- kernel_size=(3, 5),
- padding=(1, 1),
- output_padding=(1, 1))
- check((4, 10, 10, 12),
- (4, 15, 15, 11),
- channels=11,
- kernel_size=(5, 5),
- output_padding=(1, 1),
- layout="NHWC")
-
-
-def test_max_pool2d():
- def check(in_shape, out_shape, **kwargs):
- x = sym.Variable("x", shape=in_shape)
- y = sym.max_pool2d(x, name="y", **kwargs)
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4, 10, 12, 12),
- (4, 10, 12, 12),
- pool_size=(3,3),
- padding=(1,1))
- check((4, 10, 12, 12),
- (4, 10, 6, 6),
- pool_size=(3, 3),
- padding=(1, 1),
- strides=(2, 2))
- check((4, 10, 12, 12),
- (4, 10, 7, 7),
- pool_size=(3, 3),
- padding=(1, 1),
- strides=(2, 2),
- ceil_mode=True)
- check((4, 12, 14, 10),
- (4, 6, 7, 10),
- pool_size=(3, 3),
- padding=(1, 1),
- strides=(2, 2),
- layout="NHWC")
-
-
-def test_global_pool2d():
- def check(in_shape, out_shape, **kwargs):
- x = sym.Variable("x", shape=in_shape)
- y = sym.global_max_pool2d(x, name="y", **kwargs)
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4, 10, 12, 12),
- (4, 10, 1, 1))
- check((4, 10, 12, 12),
- (4, 1, 1, 12),
- layout="NHWC")
-
-
-# Level 3
-def test_reshape():
- def check(in_shape, tshape, out_shape):
- x = sym.Variable("x", shape=in_shape)
- y = sym.reshape(x, shape=tshape, name="y")
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4,), (2, 2), (2, 2))
- check((2, 3, 4), (4, 0, 2), (4, 3, 2))
- check((2, 3, 4), (2, 0, 0), (2, 3, 4))
- check((2, 3, 4), (6, 1, -1), (6, 1, 4))
- check((2, 3, 4), (3, -1, 8), (3, 1, 8))
- check((2, 3, 4), (-1,), (24,))
- check((2, 3, 4), (-2,), (2, 3, 4))
- check((2, 3, 4), (2, -2), (2, 3, 4))
- check((2, 3, 4), (-2, 1, 1), (2, 3, 4, 1, 1))
- check((2, 3, 4), (-3, 4), (6, 4))
- check((2, 3, 4, 5), (-3, -3), (6, 20))
- check((2, 3, 4), (0, -3), (2, 12))
- check((2, 3, 4), (-3, -2), (6, 4))
- check((2, 3, 4), (-4, 1, 2, -2), (1, 2, 3, 4))
- check((2, 3, 4), (2, -4, -1, 3, -2), (2, 1, 3, 4))
-
-
-def test_prelu():
- def check(in_shape, axis, out_shape):
- x = sym.Variable("x", shape=in_shape)
- w = sym.Variable("w")
- y = sym.prelu(x, w, axis=axis, name="y")
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
- check((1, 3, 2, 2), 1, (1, 3, 2, 2))
- check((1, 2, 2, 3), 3, (1, 2, 2, 3))
-
-
-# Level 4
-def test_transpose():
- def check(in_shape, out_shape, **kwargs):
- x = sym.Variable("x", shape=in_shape)
- y = sym.transpose(x, name="y", **kwargs)
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4, 1), (1, 4))
- check((0, 1, 2, 3), (1, 2, 3, 0), axes=(1, 2, 3, 0))
-
-
-def test_broadcast_to():
- def check(in_shape, tshape, out_shape):
- x = sym.Variable("x", shape=in_shape)
- y = sym.broadcast_to(x, shape=tshape, name="y")
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4, 1), (0, 4), (4, 4))
- check((4, 1, 5), (0, 4, 5), (4, 4, 5))
-
-
-def test_broadcast_binary():
- def check(lhs_shape, rhs_shape, out_shape):
- x = sym.Variable("x", shape=lhs_shape)
- y = sym.Variable("y", shape=rhs_shape)
- z = sym.broadcast_add(x, y, name="y")
- sdict = infer_shape(z)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4, 1), (4), (4, 4))
- check((5, 1, 1), (1, 4, 4), (5, 4, 4))
- check((6, 1, 4), (5, 4), (6, 5, 4))
-
-
-def test_reduce():
- def check(in_shape, out_shape, **kwargs):
- x = sym.Variable("x", shape=in_shape)
- y = sym.sum(x, name="y", **kwargs)
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4, 5), (4,), axis=1)
- check((4, 5), (4, 1), axis=1, keepdims=True)
- check((4, 5), (1, 5), axis=0, keepdims=True)
- check((4, 5), (1, 1), axis=(), keepdims=True)
- check((4, 5), (1,), axis=())
- check((4, 5, 10), (5,), axis=(0, 2))
- check((4, 5, 10), (1, 5, 1), axis=(0, 2), keepdims=True)
-
-
-def test_gather_nd():
- def check(data_shape, indices_shape, out_shape):
- x = sym.Variable("x", shape=data_shape)
- indices = sym.Variable("indices", shape=indices_shape)
- y = sym.gather_nd(x, indices, name="y")
- sdict = infer_shape(y)
- assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
- check((4,), (1, 1), (1,))
- check((4,), (1, 3), (3,))
- check((2, 3), (1, 1), (1, 3))
- check((2, 3), (2, 1), (1,))
- check((2, 3), (2, 5, 6), (5, 6))
- check((2, 3, 4), (1, 1), (1, 3, 4))
- check((2, 3, 4), (2, 1), (1, 4))
- check((2, 3, 4), (2, 5), (5, 4))
- check((2, 3, 4), (2, 5, 6), (5, 6, 4))
- check((2, 3, 4, 5), (2, 6, 7), (6, 7, 4, 5))
-
-
-if __name__ == "__main__":
- test_conv2d_packed()
- test_expand_dims()
- test_dense()
- test_matmul()
- test_concatenate()
- test_split()
- test_batchnorm()
- test_flatten()
- test_conv2d()
- test_conv2d_transpose()
- test_max_pool2d()
- test_global_pool2d()
- test_reshape()
- test_broadcast_to()
- test_broadcast_binary()
- test_reduce()
- test_transpose()
- test_prelu()
- test_squeeze()
- test_gather_nd()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm
-from tvm.contrib import util
-
-
-def test_variable_node_parsed():
- sym = nnvm.sym.Variable('data')
- tempdir = util.tempdir()
- json_filename = 'test_nnvm_symbol.json'
- with open(tempdir.relpath(json_filename), 'w') as fo:
- fo.write(nnvm.graph.create(sym).json())
- sym_str = open(tempdir.relpath(json_filename), 'r').read()
- sym = nnvm.graph.load_json(sym_str).symbol()
- sym = nnvm.sym.relu(sym)
-
-
-if __name__ == '__main__':
- test_variable_node_parsed()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-from nnvm import NNVMError
-
-def test_dense():
- x = sym.Variable('x')
- y = sym.dense(x, units=30, name="fc")
- assert y.list_input_names() == ["x", "fc_weight", "fc_bias"]
-
-def test_batch_norm():
- x = sym.Variable('x')
- y = sym.dense(x, units=30, name="fc")
- z = sym.batch_norm(x, name='bn')
- assert z.list_input_names('aux_state') == ['bn_moving_mean', 'bn_moving_var']
- assert z.list_input_names('read_only') == ['x', 'bn_gamma', 'bn_beta']
-
-def test_compose():
- x = sym.Variable('x')
- z = sym.Variable('z')
- y = sym.exp(sym.elemwise_add(x, x, name='add', gpu=2),
- name='exp', gpu=1, attr={"kk": "1"})
-
- assert y.list_input_names() == ['x']
- assert y.list_output_names() == ["exp_output"]
- assert y.list_attr()['gpu'] == '1'
- z = y.get_internals()
- assert z['add_output'].list_output_names() == ['add_output']
- assert y.list_attr(recursive=True)['add$gpu'] == '2'
-
-def test_default_input():
- x = sym.Variable('x')
- y = sym.dense(data=x, units=30, name='fc', use_bias=False)
- assert y.list_input_names() == ['x', 'fc_weight']
- tname = [z.list_output_names()[0] for z in y.list_input_variables()]
- assert tname == y.list_input_names()
- try:
- z = sym.elemwise_add(x)
- assert False
- except NNVMError:
- pass
-
-def test_copy():
- x = sym.Variable('x')
- z = sym.Variable('z')
- y = sym.exp(sym.elemwise_add(x, x, name='add', gpu=2),
- name='exp', gpu=1, attr={"kk": "1"})
- assert y.__copy__().debug_str() == y.debug_str()
-
-
-def test_op_name():
- x = sym.Variable('x')
- y = sym.exp(x)
- op_name = y.attr("op_name")
- op_func = sym.__dict__[op_name]
- z = op_func(x)
-
-if __name__ == "__main__":
- test_op_name()
- test_copy()
- test_default_input()
- test_compose()
- test_batch_norm()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-import nnvm.graph as graph
-
-def test_dense():
- x = sym.Variable('x')
- x1 = sym.dense(x, units=3, name="dense")
- x2 = sym.flatten(x1)
- x3 = sym.softmax(x2)
- assert x3.list_input_names() == ['x', 'dense_weight', 'dense_bias']
-
-
-def test_concatenate_split():
- x = sym.Variable('x')
- y = sym.Variable('y')
- y = sym.concatenate(x, y)
- assert y.list_input_names() == ['x', 'y']
- z = sym.split(y, indices_or_sections=10)
- assert len(z.list_output_names()) == 10
- z = sym.split(y, indices_or_sections=[10, 20])
- assert len(z.list_output_names()) == 3
-
-def test_expand_dims():
- x = sym.Variable('x')
- y = sym.expand_dims(x, axis=1, num_newaxis=2)
- assert y.list_input_names() == ['x']
-
-
-def test_unary():
- x = sym.Variable('x')
- x = sym.exp(x)
- x = sym.log(x)
- x = sym.sigmoid(x)
- x = sym.tanh(x)
- x = sym.relu(x)
- assert x.list_input_names() == ['x']
-
-
-def test_batchnorm():
- x = sym.Variable('x')
- x = sym.batch_norm(x, name="bn")
- assert x.list_input_names() == [
- "x", "bn_gamma", "bn_beta", "bn_moving_mean", "bn_moving_var"]
-
-
-if __name__ == "__main__":
- test_concatenate_split()
- test_expand_dims()
- test_dense()
- test_unary()
- test_batchnorm()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-
-def test_conv2d():
- x = sym.Variable('x')
- y = sym.conv2d(x, channels=3, kernel_size=(3, 3),
- name="y", use_bias=False)
- assert y.list_input_names() == ["x", "y_weight"]
-
-
-def test_max_pool2d():
- x = sym.Variable('x')
- y = sym.max_pool2d(x, pool_size=(3, 3), name="y")
- y = sym.global_max_pool2d(y)
- assert y.list_input_names() == ["x"]
-
-
-if __name__ == "__main__":
- test_conv2d()
- test_max_pool2d()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-
-def test_reshape():
- x = sym.Variable("x")
- y = sym.reshape(x, shape=(10, 20), name="y")
- assert(y.list_input_names() == ["x"])
-
-
-def test_scalar_op():
- x = sym.Variable("x")
- y = (1 / (x * 2) - 1) ** 2
- assert(y.list_input_names() == ["x"])
-
-def test_leaky_relu():
- x = sym.Variable("x")
- y = sym.leaky_relu(x, alpha=0.1)
- assert(y.list_input_names() == ["x"])
-
-def test_prelu():
- x = sym.Variable("x")
- w = sym.Variable("w")
- y = sym.prelu(x, w)
- assert(y.list_input_names()[0] == 'x')
- assert(y.list_input_names()[1] == 'w')
-
-if __name__ == "__main__":
- test_scalar_op()
- test_reshape()
- test_leaky_relu()
- test_prelu()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-
-def test_binary_broadcast():
- x = sym.Variable('x')
- y = sym.Variable('y')
- z = x + y
- z = x * y
- z = x - y
- z = x / y
-
-
-def test_broadcast_to():
- x = sym.Variable('x')
- y = sym.broadcast_to(x, shape=(3, 3))
- assert y.list_input_names() == ["x"]
-
-
-if __name__ == "__main__":
- test_binary_broadcast()
- test_broadcast_to()
+++ /dev/null
-*.pb
-*.mlmodel
-*.ttf
-*.txt
-*synset*txt
-*.cfg
-ssd_model
-*.names
-*.jpg
-*.pbtxt
-*.weights
+++ /dev/null
-.. _tutorial-nnvm:
-
-NNVM Compiler Tutorials
------------------------
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-deploy-model-on-mali-gpu:
-
-Deploy the Pretrained Model on ARM Mali GPU
-===========================================
-**Author**: `Lianmin Zheng <https://lmzheng.net/>`_, `Ziheng Jiang <https://ziheng.org/>`_
-
-This is an example of using NNVM to compile a ResNet model and
-deploy it on Firefly-RK3399 with ARM Mali GPU. We will use the
-Mali-T860 MP4 GPU on this board to accelerate the inference.
-"""
-
-import tvm
-import nnvm.compiler
-import nnvm.testing
-from tvm import rpc
-from tvm.contrib import util, graph_runtime as runtime
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# Build TVM Runtime on Device
-# ---------------------------
-#
-# The first step is to build tvm runtime on the remote device.
-#
-# .. note::
-#
-# All instructions in both this section and next section should be
-# executed on the target device, e.g. Rk3399. And we assume it
-# has Linux running.
-#
-# Since we do compilation on local machine, the remote device is only used
-# for running the generated code. We only need to build tvm runtime on
-# the remote device. Make sure you have opencl driver in your board.
-# You can refer to `tutorial <https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2>`_
-# to setup OS and opencl driver for rk3399.
-#
-# .. code-block:: bash
-#
-# git clone --recursive https://github.com/apache/incubator-tvm tvm
-# cd tvm
-# cp cmake/config.cmake .
-# sed -i "s/USE_OPENCL OFF/USE_OPENCL ON/" config.cmake
-# make runtime -j4
-#
-# After building runtime successfully, we need to set environment varibles
-# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc`
-# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM
-# directory is in :code:`~/tvm`):
-#
-# .. code-block:: bash
-#
-# export PYTHONPATH=$PYTHONPATH:~/tvm/python
-#
-# To update the environment variables, execute :code:`source ~/.bashrc`.
-
-######################################################################
-# Set Up RPC Server on Device
-# ---------------------------
-# To start an RPC server, run the following command on your remote device
-# (Which is RK3399 in our example).
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
-#
-# If you see the line below, it means the RPC server started
-# successfully on your device.
-#
-# .. code-block:: bash
-#
-# INFO:root:RPCServer: bind to 0.0.0.0:9090
-#
-
-######################################################################
-# Prepare the Pre-trained Model
-# -----------------------------
-# Back to the host machine, which should have a full TVM installed (with LLVM).
-#
-# We will use pre-trained model from
-# `MXNet Gluon model zoo <https://mxnet.incubator.apache.org/api/python/gluon/model_zoo.html>`_.
-# You can found more details about this part at tutorial :ref:`tutorial-from-mxnet`.
-
-from mxnet.gluon.model_zoo.vision import get_model
-from PIL import Image
-import numpy as np
-
-# only one line to get the model
-block = get_model('resnet18_v1', pretrained=True)
-
-######################################################################
-# In order to test our model, here we download an image of cat and
-# transform its format.
-img_name = 'cat.png'
-img_path = download_testdata('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true',
- img_name, module='data')
-image = Image.open(img_path).resize((224, 224))
-
-def transform_image(image):
- image = np.array(image) - np.array([123., 117., 104.])
- image /= np.array([58.395, 57.12, 57.375])
- image = image.transpose((2, 0, 1))
- image = image[np.newaxis, :]
- return image
-
-x = transform_image(image)
-
-######################################################################
-# synset is used to transform the label from number of ImageNet class to
-# the word human can understand.
-synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
- '4d0b62f3d01426887599d4f7ede23ee5/raw/',
- '596b27d23537e5a1b5751d2b0481ef172f58b539/',
- 'imagenet1000_clsid_to_human.txt'])
-
-synset_name = 'imagenet1000_clsid_to_human.txt'
-synset_path = download_testdata(synset_url, synset_name, module='data')
-with open(synset_path) as f:
- synset = eval(f.read())
-
-######################################################################
-# Now we would like to port the Gluon model to a portable computational graph.
-# It's as easy as several lines.
-
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-net, params = nnvm.frontend.from_mxnet(block)
-# we want a probability so add a softmax operator
-net = nnvm.sym.softmax(net)
-
-######################################################################
-# Here are some basic data workload configurations.
-batch_size = 1
-num_classes = 1000
-image_shape = (3, 224, 224)
-data_shape = (batch_size,) + image_shape
-
-######################################################################
-# Compile The Graph
-# -----------------
-# To compile the graph, we call the :any:`nnvm.compiler.build` function
-# with the graph configuration and parameters. As we use OpenCL for
-# GPU computing, the tvm will generate both OpenCL kernel code and ARM
-# CPU host code. The CPU host code is used for calling OpenCL kernels.
-# In order to generate correct CPU code, we need to specify the target
-# triplet for host ARM device by setting the parameter :code:`target_host`.
-
-######################################################################
-# If we run the example on our x86 server for demonstration, we can simply
-# set it as :code:`llvm`. If running it on the RK3399, we need to
-# specify its instruction set. Set :code:`local_demo` to False if you
-# want to run this tutorial with a real device.
-
-local_demo = True
-
-if local_demo:
- target_host = "llvm"
- target = "llvm"
-else:
- # Here is the setting for my rk3399 board
- # If you don't use rk3399, you can query your target triple by
- # execute `gcc -v` on your board.
- target_host = "llvm -target=aarch64-linux-gnu"
-
- # set target as `tvm.target.mali` instead of 'opencl' to enable
- # optimization for mali
- target = tvm.target.mali()
-
-with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(net, target=target,
- shape={"data": data_shape}, params=params, target_host=target_host)
-
-# After `nnvm.compiler.build`, you will get three return values: graph,
-# library and the new parameter, since we do some optimization that will
-# change the parameters but keep the result of model as the same.
-
-# Save the library at local temporary directory.
-tmp = util.tempdir()
-lib_fname = tmp.relpath('net.tar')
-lib.export_library(lib_fname)
-
-######################################################################
-# Deploy the Model Remotely by RPC
-# --------------------------------
-# With RPC, you can deploy the model remotely from your host machine
-# to the remote device.
-
-# obtain an RPC session from remote device.
-if local_demo:
- remote = rpc.LocalSession()
-else:
- # The following is my environment, change this to the IP address of your target device
- host = '10.77.1.145'
- port = 9090
- remote = rpc.connect(host, port)
-
-# upload the library to remote device and load it
-remote.upload(lib_fname)
-rlib = remote.load_module('net.tar')
-
-# create the remote runtime module
-ctx = remote.cl(0) if not local_demo else remote.cpu(0)
-module = runtime.create(graph, rlib, ctx)
-# set parameter (upload params to the remote device. This may take a while)
-module.set_input(**params)
-# set input data
-module.set_input('data', tvm.nd.array(x.astype('float32')))
-# run
-module.run()
-# get output
-out = module.get_output(0)
-# get top1 result
-top1 = np.argmax(out.asnumpy())
-print('TVM prediction top-1: {}'.format(synset[top1]))
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-deploy-model-on-rasp:
-
-Deploy the Pretrained Model on Raspberry Pi
-===========================================
-**Author**: `Ziheng Jiang <https://ziheng.org/>`_
-
-This is an example of using NNVM to compile a ResNet model and deploy
-it on Raspberry Pi.
-"""
-
-import tvm
-import nnvm.compiler
-import nnvm.testing
-from tvm import rpc
-from tvm.contrib import util, graph_runtime as runtime
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# .. _build-tvm-runtime-on-device:
-#
-# Build TVM Runtime on Device
-# ---------------------------
-#
-# The first step is to build tvm runtime on the remote device.
-#
-# .. note::
-#
-# All instructions in both this section and next section should be
-# executed on the target device, e.g. Raspberry Pi. And we assume it
-# has Linux running.
-#
-# Since we do compilation on local machine, the remote device is only used
-# for running the generated code. We only need to build tvm runtime on
-# the remote device.
-#
-# .. code-block:: bash
-#
-# git clone --recursive https://github.com/apache/incubator-tvm tvm
-# cd tvm
-# make runtime -j4
-#
-# After building runtime successfully, we need to set environment varibles
-# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc`
-# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM
-# directory is in :code:`~/tvm`):
-#
-# .. code-block:: bash
-#
-# export PYTHONPATH=$PYTHONPATH:~/tvm/python
-#
-# To update the environment variables, execute :code:`source ~/.bashrc`.
-
-######################################################################
-# Set Up RPC Server on Device
-# ---------------------------
-# To start an RPC server, run the following command on your remote device
-# (Which is Raspberry Pi in our example).
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
-#
-# If you see the line below, it means the RPC server started
-# successfully on your device.
-#
-# .. code-block:: bash
-#
-# INFO:root:RPCServer: bind to 0.0.0.0:9090
-#
-
-######################################################################
-# Prepare the Pre-trained Model
-# -----------------------------
-# Back to the host machine, which should have a full TVM installed (with LLVM).
-#
-# We will use pre-trained model from
-# `MXNet Gluon model zoo <https://mxnet.incubator.apache.org/api/python/gluon/model_zoo.html>`_.
-# You can found more details about this part at tutorial :ref:`tutorial-from-mxnet`.
-
-from mxnet.gluon.model_zoo.vision import get_model
-from PIL import Image
-import numpy as np
-
-# one line to get the model
-block = get_model('resnet18_v1', pretrained=True)
-
-######################################################################
-# In order to test our model, here we download an image of cat and
-# transform its format.
-img_name = 'cat.png'
-img_path = download_testdata('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true',
- img_name, module='data')
-image = Image.open(img_path).resize((224, 224))
-
-def transform_image(image):
- image = np.array(image) - np.array([123., 117., 104.])
- image /= np.array([58.395, 57.12, 57.375])
- image = image.transpose((2, 0, 1))
- image = image[np.newaxis, :]
- return image
-
-x = transform_image(image)
-
-######################################################################
-# synset is used to transform the label from number of ImageNet class to
-# the word human can understand.
-synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
- '4d0b62f3d01426887599d4f7ede23ee5/raw/',
- '596b27d23537e5a1b5751d2b0481ef172f58b539/',
- 'imagenet1000_clsid_to_human.txt'])
-synset_name = 'imagenet1000_clsid_to_human.txt'
-synset_path = download_testdata(synset_url, synset_name, module='data')
-with open(synset_path) as f:
- synset = eval(f.read())
-
-######################################################################
-# Now we would like to port the Gluon model to a portable computational graph.
-# It's as easy as several lines.
-
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-net, params = nnvm.frontend.from_mxnet(block)
-# we want a probability so add a softmax operator
-net = nnvm.sym.softmax(net)
-
-######################################################################
-# Here are some basic data workload configurations.
-batch_size = 1
-num_classes = 1000
-image_shape = (3, 224, 224)
-data_shape = (batch_size,) + image_shape
-
-######################################################################
-# Compile The Graph
-# -----------------
-# To compile the graph, we call the :any:`nnvm.compiler.build` function
-# with the graph configuration and parameters. However, You cannot to
-# deploy a x86 program on a device with ARM instruction set. It means
-# NNVM also needs to know the compilation option of target device,
-# apart from arguments :code:`net` and :code:`params` to specify the
-# deep learning workload. Actually, the option matters, different option
-# will lead to very different performance.
-
-######################################################################
-# If we run the example on our x86 server for demonstration, we can simply
-# set it as :code:`llvm`. If running it on the Raspberry Pi, we need to
-# specify its instruction set. Set :code:`local_demo` to False if you want
-# to run this tutorial with a real device.
-
-local_demo = True
-
-if local_demo:
- target = tvm.target.create('llvm')
-else:
- target = tvm.target.arm_cpu('rasp3b')
- # The above line is a simple form of
- # target = tvm.target.create('llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon')
-
-with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(
- net, target, shape={"data": data_shape}, params=params)
-
-# After `nnvm.compiler.build`, you will get three return values: graph,
-# library and the new parameter, since we do some optimization that will
-# change the parameters but keep the result of model as the same.
-
-# Save the library at local temporary directory.
-tmp = util.tempdir()
-lib_fname = tmp.relpath('net.tar')
-lib.export_library(lib_fname)
-
-######################################################################
-# Deploy the Model Remotely by RPC
-# --------------------------------
-# With RPC, you can deploy the model remotely from your host machine
-# to the remote device.
-
-# obtain an RPC session from remote device.
-if local_demo:
- remote = rpc.LocalSession()
-else:
- # The following is my environment, change this to the IP address of your target device
- host = '10.77.1.162'
- port = 9090
- remote = rpc.connect(host, port)
-
-# upload the library to remote device and load it
-remote.upload(lib_fname)
-rlib = remote.load_module('net.tar')
-
-# create the remote runtime module
-ctx = remote.cpu(0)
-module = runtime.create(graph, rlib, ctx)
-# set parameter (upload params to the remote device. This may take a while)
-module.set_input(**params)
-# set input data
-module.set_input('data', tvm.nd.array(x.astype('float32')))
-# run
-module.run()
-# get output
-out = module.get_output(0)
-# get top1 result
-top1 = np.argmax(out.asnumpy())
-print('TVM prediction top-1: {}'.format(synset[top1]))
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy Single Shot Multibox Detector(SSD) model
-===============================================
-**Author**: `Yao Wang <https://github.com/kevinthesun>`_, \
-`Leyuan Wang <https://github.com/Laurawly>`_
-
-This article is an introductory tutorial to deploy SSD models with TVM.
-We will use mxnet pretrained SSD model with Resnet50 as body network and
-convert it to NNVM graph;
-"""
-import os
-import zipfile
-import tvm
-import mxnet as mx
-import cv2
-import numpy as np
-
-from nnvm import compiler
-from nnvm.frontend import from_mxnet
-from tvm import relay
-from tvm.contrib.download import download_testdata
-from tvm.contrib import graph_runtime
-from mxnet.model import load_checkpoint
-
-
-######################################################################
-# Preliminary and Set parameters
-# ------------------------------
-# We should build TVM with sort support, in TVM root directory
-#
-# .. code-block:: bash
-#
-# echo "set(USE_SORT ON)" > config.mk
-# make -j8
-#
-
-model_name = "ssd_resnet50_512"
-model_file = "%s.zip" % model_name
-test_image = "dog.jpg"
-dshape = (1, 3, 512, 512)
-dtype = "float32"
-
-# Target settings
-# Use these commented settings to build for cuda.
-#target = 'cuda'
-#ctx = tvm.gpu(0)
-# Use these commented settings to build for opencl.
-#target = 'opencl'
-#ctx = tvm.opencl(0)
-target = "llvm"
-ctx = tvm.cpu()
-
-######################################################################
-# Download MXNet SSD pre-trained model and demo image
-# ---------------------------------------------------
-# Pre-trained model available at
-# https://github.com/apache/incubator-\mxnet/tree/master/example/ssd
-
-model_url = "https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/" \
- "resnet50_ssd_512_voc0712_trainval.zip"
-image_url = "https://cloud.githubusercontent.com/assets/3307514/20012567/" \
- "cbb60336-a27d-11e6-93ff-cbc3f09f5c9e.jpg"
-inference_symbol_folder = \
- "c1904e900848df4548ce5dfb18c719c7-a28c4856c827fe766aa3da0e35bad41d44f0fb26"
-inference_symbol_url = "https://gist.github.com/kevinthesun/c1904e900848df4548ce5dfb18c719c7/" \
- "archive/a28c4856c827fe766aa3da0e35bad41d44f0fb26.zip"
-
-model_file_path = download_testdata(model_url, model_file, module=["mxnet", "ssd_model"])
-inference_symbol_path = download_testdata(inference_symbol_url, "inference_model.zip",
- module=["mxnet", "ssd_model"])
-test_image_path = download_testdata(image_url, test_image, module="data")
-model_dir = os.path.dirname(model_file_path)
-
-zip_ref = zipfile.ZipFile(model_file_path, 'r')
-zip_ref.extractall(model_dir)
-zip_ref.close()
-zip_ref = zipfile.ZipFile(inference_symbol_path)
-zip_ref.extractall(model_dir)
-zip_ref.close()
-
-######################################################################
-# Convert and compile model with NNVM or Relay for CPU.
-
-sym = mx.sym.load("%s/%s/ssd_resnet50_inference.json" % (model_dir, inference_symbol_folder))
-_, arg_params, aux_params = load_checkpoint("%s/%s" % (model_dir, model_name), 0)
-
-import argparse
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "-f", "--frontend",
- help="Frontend for compilation, nnvm or relay",
- type=str,
- default="nnvm")
-args = parser.parse_args()
-if args.frontend == "relay":
- net, params = relay.frontend.from_mxnet(sym, {"data": dshape}, arg_params=arg_params, \
- aux_params=aux_params)
- with relay.build_config(opt_level=3):
- graph, lib, params = relay.build(net, target, params=params)
-elif args.frontend == "nnvm":
- net, params = from_mxnet(sym, arg_params, aux_params)
- with compiler.build_config(opt_level=3):
- graph, lib, params = compiler.build(
- net, target, {"data": dshape}, params=params)
-else:
- parser.print_help()
- parser.exit()
-
-######################################################################
-# Create TVM runtime and do inference
-
-# Preprocess image
-image = cv2.imread(test_image_path)
-img_data = cv2.resize(image, (dshape[2], dshape[3]))
-img_data = img_data[:, :, (2, 1, 0)].astype(np.float32)
-img_data -= np.array([123, 117, 104])
-img_data = np.transpose(np.array(img_data), (2, 0, 1))
-img_data = np.expand_dims(img_data, axis=0)
-# Build TVM runtime
-m = graph_runtime.create(graph, lib, ctx)
-m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0)
-
-
-######################################################################
-# Display result
-
-class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair",
- "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant",
- "sheep", "sofa", "train", "tvmonitor"]
-def display(img, out, thresh=0.5):
- import random
- import matplotlib as mpl
- import matplotlib.pyplot as plt
- mpl.rcParams['figure.figsize'] = (10, 10)
- pens = dict()
- plt.clf()
- plt.imshow(img)
- for det in out:
- cid = int(det[0])
- if cid < 0:
- continue
- score = det[1]
- if score < thresh:
- continue
- if cid not in pens:
- pens[cid] = (random.random(), random.random(), random.random())
- scales = [img.shape[1], img.shape[0]] * 2
- xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
- rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False,
- edgecolor=pens[cid], linewidth=3)
- plt.gca().add_patch(rect)
- text = class_names[cid]
- plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score),
- bbox=dict(facecolor=pens[cid], alpha=0.5),
- fontsize=12, color='white')
- plt.show()
-
-image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-display(image, tvm_output.asnumpy()[0], thresh=0.45)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile CoreML Models
-=====================
-**Author**: `Joshua Z. Zhang <https://zhreshold.github.io/>`_
-
-This article is an introductory tutorial to deploy CoreML models with NNVM.
-
-For us to begin with, coremltools module is required to be installed.
-
-A quick solution is to install via pip
-
-.. code-block:: bash
-
- pip install -U coremltools --user
-
-or please refer to official site
-https://github.com/apple/coremltools
-"""
-import nnvm
-import tvm
-import coremltools as cm
-import numpy as np
-from PIL import Image
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# Load pretrained CoreML model
-# ----------------------------
-# We will download and load a pretrained mobilenet classification network
-# provided by apple in this example
-model_url = 'https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel'
-model_file = 'mobilenet.mlmodel'
-model_path = download_testdata(model_url, model_file, module='coreml')
-# now you mobilenet.mlmodel on disk
-mlmodel = cm.models.MLModel(model_path)
-# we can load the graph as NNVM compatible model
-sym, params = nnvm.frontend.from_coreml(mlmodel)
-
-######################################################################
-# Load a test image
-# ------------------
-# A single cat dominates the examples!
-from PIL import Image
-img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
-img_path = download_testdata(img_url, 'cat.png', module='data')
-img = Image.open(img_path).resize((224, 224))
-#x = np.transpose(img, (2, 0, 1))[np.newaxis, :]
-image = np.asarray(img)
-image = image.transpose((2, 0, 1))
-x = image[np.newaxis, :]
-######################################################################
-# Compile the model on NNVM
-# ---------------------------
-# We should be familiar with the process right now.
-import nnvm.compiler
-target = 'cuda'
-shape_dict = {'image': x.shape}
-with nnvm.compiler.build_config(opt_level=2, add_pass=['AlterOpLayout']):
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-
-######################################################################
-# Execute on TVM
-# -------------------
-# The process is no different from other example
-from tvm.contrib import graph_runtime
-ctx = tvm.gpu(0)
-dtype = 'float32'
-m = graph_runtime.create(graph, lib, ctx)
-# set inputs
-m.set_input('image', tvm.nd.array(x.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0)
-top1 = np.argmax(tvm_output.asnumpy()[0])
-
-#####################################################################
-# Look up synset name
-# -------------------
-# Look up prediction top 1 index in 1000 class synset.
-synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
- '4d0b62f3d01426887599d4f7ede23ee5/raw/',
- '596b27d23537e5a1b5751d2b0481ef172f58b539/',
- 'imagenet1000_clsid_to_human.txt'])
-synset_name = 'imagenet1000_clsid_to_human.txt'
-synset_path = download_testdata(synset_url, synset_name, module='data')
-with open(synset_path) as f:
- synset = eval(f.read())
-print('Top-1 id', top1, 'class name', synset[top1])
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile YOLO-V2 and YOLO-V3 in DarkNet Models
-=================================
-**Author**: `Siju Samuel <https://siju-samuel.github.io/>`_
-
-This article is an introductory tutorial to deploy darknet models with NNVM.
-All the required models and libraries will be downloaded from the internet by the script.
-This script runs the YOLO-V2 and YOLO-V3 Model with the bounding boxes
-Darknet parsing have dependancy with CFFI and CV2 library
-Please install CFFI and CV2 before executing this script
-
-.. code-block:: bash
-
- pip install cffi
- pip install opencv-python
-"""
-
-import nnvm
-import nnvm.frontend.darknet
-import tvm.relay.testing.yolo_detection
-import tvm.relay.testing.darknet
-import matplotlib.pyplot as plt
-import numpy as np
-import tvm
-import sys
-
-from ctypes import *
-from tvm.contrib.download import download_testdata
-from tvm.relay.testing.darknet import __darknetffi__
-
-# Model name
-MODEL_NAME = 'yolov3'
-
-######################################################################
-# Download required files
-# -----------------------
-# Download cfg and weights file if first time.
-CFG_NAME = MODEL_NAME + '.cfg'
-WEIGHTS_NAME = MODEL_NAME + '.weights'
-REPO_URL = 'https://github.com/siju-samuel/darknet/blob/master/'
-CFG_URL = REPO_URL + 'cfg/' + CFG_NAME + '?raw=true'
-WEIGHTS_URL = 'https://pjreddie.com/media/files/' + WEIGHTS_NAME
-
-cfg_path = download_testdata(CFG_URL, CFG_NAME, module="darknet")
-weights_path = download_testdata(WEIGHTS_URL, WEIGHTS_NAME, module="darknet")
-
-# Download and Load darknet library
-if sys.platform in ['linux', 'linux2']:
- DARKNET_LIB = 'libdarknet2.0.so'
- DARKNET_URL = REPO_URL + 'lib/' + DARKNET_LIB + '?raw=true'
-elif sys.platform == 'darwin':
- DARKNET_LIB = 'libdarknet_mac2.0.so'
- DARKNET_URL = REPO_URL + 'lib_osx/' + DARKNET_LIB + '?raw=true'
-else:
- err = "Darknet lib is not supported on {} platform".format(sys.platform)
- raise NotImplementedError(err)
-
-lib_path = download_testdata(DARKNET_URL, DARKNET_LIB, module="darknet")
-
-DARKNET_LIB = __darknetffi__.dlopen(lib_path)
-net = DARKNET_LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0)
-dtype = 'float32'
-batch_size = 1
-
-print("Converting darknet to nnvm symbols...")
-sym, params = nnvm.frontend.darknet.from_darknet(net, dtype)
-
-######################################################################
-# Compile the model on NNVM
-# -------------------------
-# compile the model
-target = 'llvm'
-ctx = tvm.cpu(0)
-data = np.empty([batch_size, net.c, net.h, net.w], dtype)
-shape = {'data': data.shape}
-print("Compiling the model...")
-dtype_dict = {}
-with nnvm.compiler.build_config(opt_level=2):
- graph, lib, params = nnvm.compiler.build(sym, target, shape, dtype_dict, params)
-
-[neth, netw] = shape['data'][2:] # Current image shape is 608x608
-######################################################################
-# Load a test image
-# --------------------------------------------------------------------
-test_image = 'dog.jpg'
-print("Loading the test image...")
-img_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + \
- test_image + '?raw=true'
-img_path = download_testdata(img_url, test_image, "data")
-
-data = tvm.relay.testing.darknet.load_image(img_path, netw, neth)
-######################################################################
-# Execute on TVM Runtime
-# ----------------------
-# The process is no different from other examples.
-from tvm.contrib import graph_runtime
-
-m = graph_runtime.create(graph, lib, ctx)
-
-# set inputs
-m.set_input('data', tvm.nd.array(data.astype(dtype)))
-m.set_input(**params)
-# execute
-print("Running the test image...")
-
-m.run()
-# get outputs
-tvm_out = []
-if MODEL_NAME == 'yolov2':
- layer_out = {}
- layer_out['type'] = 'Region'
- # Get the region layer attributes (n, out_c, out_h, out_w, classes, coords, background)
- layer_attr = m.get_output(2).asnumpy()
- layer_out['biases'] = m.get_output(1).asnumpy()
- out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0],
- layer_attr[2], layer_attr[3])
- layer_out['output'] = m.get_output(0).asnumpy().reshape(out_shape)
- layer_out['classes'] = layer_attr[4]
- layer_out['coords'] = layer_attr[5]
- layer_out['background'] = layer_attr[6]
- tvm_out.append(layer_out)
-
-elif MODEL_NAME == 'yolov3':
- for i in range(3):
- layer_out = {}
- layer_out['type'] = 'Yolo'
- # Get the yolo layer attributes (n, out_c, out_h, out_w, classes, total)
- layer_attr = m.get_output(i*4+3).asnumpy()
- layer_out['biases'] = m.get_output(i*4+2).asnumpy()
- layer_out['mask'] = m.get_output(i*4+1).asnumpy()
- out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0],
- layer_attr[2], layer_attr[3])
- layer_out['output'] = m.get_output(i*4).asnumpy().reshape(out_shape)
- layer_out['classes'] = layer_attr[4]
- tvm_out.append(layer_out)
-
-# do the detection and bring up the bounding boxes
-thresh = 0.5
-nms_thresh = 0.45
-img = tvm.relay.testing.darknet.load_image_color(img_path)
-_, im_h, im_w = img.shape
-dets = tvm.relay.testing.yolo_detection.fill_network_boxes((netw, neth), (im_w, im_h), thresh,
- 1, tvm_out)
-last_layer = net.layers[net.n - 1]
-tvm.relay.testing.yolo_detection.do_nms_sort(dets, last_layer.classes, nms_thresh)
-
-coco_name = 'coco.names'
-coco_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + coco_name + '?raw=true'
-font_name = 'arial.ttf'
-font_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + font_name + '?raw=true'
-coco_path = download_testdata(coco_url, coco_name, module='data')
-font_path = download_testdata(font_url, font_name, module='data')
-
-with open(coco_path) as f:
- content = f.readlines()
-
-names = [x.strip() for x in content]
-
-tvm.relay.testing.yolo_detection.draw_detections(font_path, img, dets, thresh, names, last_layer.classes)
-plt.imshow(img.transpose(1, 2, 0))
-plt.show()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-from-mxnet:
-
-Compile MXNet Models
-====================
-**Author**: `Joshua Z. Zhang <https://zhreshold.github.io/>`_
-
-This article is an introductory tutorial to deploy mxnet models with NNVM.
-
-For us to begin with, mxnet module is required to be installed.
-
-A quick solution is
-
-.. code-block:: bash
-
- pip install mxnet --user
-
-or please refer to offical installation guide.
-https://mxnet.incubator.apache.org/versions/master/install/index.html
-"""
-# some standard imports
-import mxnet as mx
-import numpy as np
-import nnvm
-import tvm
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# Download Resnet18 model from Gluon Model Zoo
-# ---------------------------------------------
-# In this section, we download a pretrained imagenet model and classify an image.
-from mxnet.gluon.model_zoo.vision import get_model
-from PIL import Image
-from matplotlib import pyplot as plt
-block = get_model('resnet18_v1', pretrained=True)
-img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
-img_name = 'cat.png'
-synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
- '4d0b62f3d01426887599d4f7ede23ee5/raw/',
- '596b27d23537e5a1b5751d2b0481ef172f58b539/',
- 'imagenet1000_clsid_to_human.txt'])
-synset_name = 'imagenet1000_clsid_to_human.txt'
-img_path = download_testdata(img_url, img_name, module='data')
-synset_path = download_testdata(synset_url, synset_name, module='data')
-with open(synset_path) as f:
- synset = eval(f.read())
-image = Image.open(img_path).resize((224, 224))
-plt.imshow(image)
-plt.show()
-
-def transform_image(image):
- image = np.array(image) - np.array([123., 117., 104.])
- image /= np.array([58.395, 57.12, 57.375])
- image = image.transpose((2, 0, 1))
- image = image[np.newaxis, :]
- return image
-
-x = transform_image(image)
-print('x', x.shape)
-
-######################################################################
-# Compile the Graph
-# -----------------
-# Now we would like to port the Gluon model to a portable computational graph.
-# It's as easy as several lines.
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-sym, params = nnvm.frontend.from_mxnet(block)
-# we want a probability so add a softmax operator
-sym = nnvm.sym.softmax(sym)
-
-######################################################################
-# now compile the graph
-import nnvm.compiler
-target = 'cuda'
-shape_dict = {'data': x.shape}
-with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-
-######################################################################
-# Execute the portable graph on TVM
-# ---------------------------------
-# Now, we would like to reproduce the same forward computation using TVM.
-from tvm.contrib import graph_runtime
-ctx = tvm.gpu(0)
-dtype = 'float32'
-m = graph_runtime.create(graph, lib, ctx)
-# set inputs
-m.set_input('data', tvm.nd.array(x.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0)
-top1 = np.argmax(tvm_output.asnumpy()[0])
-print('TVM prediction top-1:', top1, synset[top1])
-
-######################################################################
-# Use MXNet symbol with pretrained weights
-# ----------------------------------------
-# MXNet often use `arg_params` and `aux_params` to store network parameters
-# separately, here we show how to use these weights with existing API
-def block2symbol(block):
- data = mx.sym.Variable('data')
- sym = block(data)
- args = {}
- auxs = {}
- for k, v in block.collect_params().items():
- args[k] = mx.nd.array(v.data().asnumpy())
- return sym, args, auxs
-mx_sym, args, auxs = block2symbol(block)
-# usually we would save/load it as checkpoint
-mx.model.save_checkpoint('resnet18_v1', 0, mx_sym, args, auxs)
-# there are 'resnet18_v1-0000.params' and 'resnet18_v1-symbol.json' on disk
-
-######################################################################
-# for a normal mxnet model, we start from here
-mx_sym, args, auxs = mx.model.load_checkpoint('resnet18_v1', 0)
-# now we use the same API to get NNVM compatible symbol
-nnvm_sym, nnvm_params = nnvm.frontend.from_mxnet(mx_sym, args, auxs)
-# repeat the same steps to run this model using TVM
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy Deep Learning Models to OpenGL and WebGL
-===============================================
-**Author**: `Zhixun Tan <https://github.com/phisiart>`_
-
-This example shows how to build a neural network with NNVM python frontend and
-generate runtime library for WebGL running in a browser with TVM.
-To run this notebook, you need to install tvm and nnvm.
-Notice that you need to build tvm with OpenGL.
-"""
-
-######################################################################
-# Overview
-# --------
-# In this tutorial, we will download a pre-trained resnet18 model from Gluon
-# Model Zoo, and run image classification in 3 different ways:
-#
-# - Run locally:
-# We will compile the model into a TVM library with OpenGL device code and
-# directly run it locally.
-#
-# - Run in a browser through RPC:
-# We will compile the model into a JavaScript TVM library with WebGL device
-# code, and upload it to an RPC server that is hosting JavaScript TVM runtime
-# to run it.
-#
-# - Export a JavaScript library and run in a browser:
-# We will compile the model into a JavaScript TVM library with WebGL device
-# code, combine it with JavaScript TVM runtime, and pack everything together.
-# Then we will run it directly in a browser.
-#
-from __future__ import print_function
-
-import numpy as np
-import tvm
-from tvm.contrib.download import download_testdata
-import nnvm.compiler
-import nnvm.testing
-
-# This tutorial must be run with OpenGL backend enabled in TVM.
-# The NNVM CI does not enable OpenGL yet. But the user can run this script.
-opengl_enabled = tvm.module.enabled("opengl")
-
-# To run the local demo, set this flag to True.
-run_deploy_local = False
-
-# To run the RPC demo, set this flag to True.
-run_deploy_rpc = False
-
-# To run the WebGL deploy demo, set this flag to True.
-run_deploy_web = False
-
-######################################################################
-# Download a Pre-trained Resnet18 Model
-# -------------------------------------
-# Here we define 2 functions:
-#
-# - A function that downloads a pre-trained resnet18 model from Gluon Model Zoo.
-# The model that we download is in MXNet format, we then transform it into an
-# NNVM computation graph.
-#
-# - A function that downloads a file that contains the name of all the image
-# classes in this model.
-#
-def load_mxnet_resnet():
- """Load a pretrained resnet model from MXNet and transform that into NNVM
- format.
-
- Returns
- -------
- net : nnvm.Symbol
- The loaded resnet computation graph.
-
- params : dict[str -> NDArray]
- The pretrained model parameters.
-
- data_shape: tuple
- The shape of the input tensor (an image).
-
- out_shape: tuple
- The shape of the output tensor (probability of all classes).
- """
-
- print("Loading pretrained resnet model from MXNet...")
-
- # Download a pre-trained mxnet resnet18_v1 model.
- from mxnet.gluon.model_zoo.vision import get_model
- block = get_model('resnet18_v1', pretrained=True)
-
- # Transform the mxnet model into NNVM.
- # We want a probability so add a softmax operator.
- sym, params = nnvm.frontend.from_mxnet(block)
- sym = nnvm.sym.softmax(sym)
-
- print("- Model loaded!")
- return sym, params, (1, 3, 224, 224), (1, 1000)
-
-def download_synset():
- """Download a dictionary from class index to name.
- This lets us know what our prediction actually is.
-
- Returns
- -------
- synset : dict[int -> str]
- The loaded synset.
- """
-
- print("Downloading synset...")
-
- url = "https://gist.githubusercontent.com/zhreshold/" + \
- "4d0b62f3d01426887599d4f7ede23ee5/raw/" + \
- "596b27d23537e5a1b5751d2b0481ef172f58b539/" + \
- "imagenet1000_clsid_to_human.txt"
- file_name = "imagenet1000_clsid_to_human.txt"
-
- file_path = download_testdata(url, file_name, module='data')
- with open(file_path) as f:
- synset = eval(f.read())
-
- print("- Synset downloaded!")
- return synset
-
-######################################################################
-# Download Input Image
-# --------------------
-# Here we define 2 functions that prepare an image that we want to perform
-# classification on.
-#
-# - A function that downloads a cat image.
-#
-# - A function that performs preprocessing to an image so that it fits the
-# format required by the resnet18 model.
-#
-def download_image():
- """Download a cat image and resize it to 224x224 which fits resnet.
-
- Returns
- -------
- image : PIL.Image.Image
- The loaded and resized image.
- """
-
- print("Downloading cat image...")
-
- from matplotlib import pyplot as plt
- from PIL import Image
-
- url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
- img_name = "cat.png"
-
- img_path = download_testdata(url, img_name, module='data')
- image = Image.open(img_path).resize((224, 224))
-
- print("- Cat image downloaded!")
-
- plt.imshow(image)
- plt.show()
-
- return image
-
-def transform_image(image):
- """Perform necessary preprocessing to input image.
-
- Parameters
- ----------
- image : numpy.ndarray
- The raw image.
-
- Returns
- -------
- image : numpy.ndarray
- The preprocessed image.
- """
-
- image = np.array(image) - np.array([123., 117., 104.])
- image /= np.array([58.395, 57.12, 57.375])
- image = image.transpose((2, 0, 1))
- image = image[np.newaxis, :]
- return image
-
-######################################################################
-# Compile the Model
-# -----------------
-# Here we define a function that invokes the NNVM compiler.
-#
-def compile_net(net, target_host, target, data_shape, params):
- """Compiles an NNVM computation graph.
-
- Parameters
- ----------
- net : nnvm.Graph
- The NNVM computation graph.
-
- target_host : str
- The target to compile the host portion of the library.
-
- target : str
- The target to compile the device portion of the library.
-
- data_shape : tuple
- The shape of the input data (image).
-
- params : dict[str -> NDArray]
- Model parameters.
-
- Returns
- -------
- graph : Graph
- The final execution graph.
-
- libmod : tvm.Module
- The module that comes with the execution graph
-
- params : dict[str -> NDArray]
- The updated parameters of graph if params is passed.
- This can be different from the params passed in.
- """
-
- print("Compiling the neural network...")
-
- with nnvm.compiler.build_config(opt_level=0):
- deploy_graph, lib, deploy_params = nnvm.compiler.build(
- net,
- target_host=target_host,
- target=target,
- shape={"data": data_shape},
- params=params)
-
- print("- Complilation completed!")
- return deploy_graph, lib, deploy_params
-
-######################################################################
-# Demo 1: Deploy Locally
-# ----------------------
-# In this demo, we will compile the model targetting the local machine.
-#
-# Then we will demonstrate how to save the compiled model as a shared library
-# and load it back.
-#
-# Finally, we will run the model.
-#
-def deploy_local():
- """Runs the demo that deploys a model locally.
- """
-
- # Load resnet model.
- net, params, data_shape, out_shape = load_mxnet_resnet()
-
- # Compile the model.
- # Note that we specify the the host target as "llvm".
- deploy_graph, lib, deploy_params = compile_net(
- net,
- target_host="llvm",
- target="opengl",
- data_shape=data_shape,
- params=params)
-
- # Save the compiled module.
- # Note we need to save all three files returned from the NNVM compiler.
- print("Saving the compiled module...")
- from tvm.contrib import util
- temp = util.tempdir()
-
- path_lib = temp.relpath("deploy_lib.so")
- path_graph_json = temp.relpath("deploy_graph.json")
- path_params = temp.relpath("deploy_param.params")
-
- lib.export_library(path_lib)
- with open(path_graph_json, "w") as fo:
- fo.write(deploy_graph.json())
- with open(path_params, "wb") as fo:
- fo.write(nnvm.compiler.save_param_dict(deploy_params))
-
- print("- Saved files:", temp.listdir())
-
- # Load the module back.
- print("Loading the module back...")
- loaded_lib = tvm.module.load(path_lib)
- with open(path_graph_json) as fi:
- loaded_graph_json = fi.read()
- with open(path_params, "rb") as fi:
- loaded_params = bytearray(fi.read())
- print("- Module loaded!")
-
- # Run the model! We will perform prediction on an image.
- print("Running the graph...")
- from tvm.contrib import graph_runtime
-
- module = graph_runtime.create(loaded_graph_json, loaded_lib, tvm.opengl(0))
- module.load_params(loaded_params)
-
- image = transform_image(download_image())
- input_data = tvm.nd.array(image.astype("float32"), ctx=tvm.opengl(0))
-
- module.set_input("data", input_data)
- module.run()
-
- # Retrieve the output.
- out = module.get_output(0, tvm.nd.empty(out_shape, ctx=tvm.opengl(0)))
- top1 = np.argmax(out.asnumpy())
- synset = download_synset()
- print('TVM prediction top-1:', top1, synset[top1])
-
-if run_deploy_local and opengl_enabled:
- deploy_local()
-
-######################################################################
-# Demo 2: Deploy the Model to WebGL Remotely with RPC
-# -------------------------------------------------------
-# Following the steps above, we can also compile the model for WebGL.
-# TVM provides rpc module to help with remote deploying.
-#
-# When we deploy a model locally to OpenGL, the model consists of two parts:
-# the host LLVM part and the device GLSL part. Now that we want to deploy to
-# WebGL, we need to leverage Emscripten to transform LLVM into JavaScript. In
-# order to do that, we will need to specify the host target as
-# 'llvm -target=asmjs-unknown-emscripten -system-lib`. Then call Emscripten to
-# compile the LLVM binary output into a JavaScript file.
-#
-# First, we need to manually start an RPC server. Please follow the instructions
-# in `tvm/web/README.md`. After following the steps, you should have a web page
-# opened in a browser, and a Python script running a proxy.
-#
-def deploy_rpc():
- """Runs the demo that deploys a model remotely through RPC.
- """
- from tvm import rpc
- from tvm.contrib import util, emscripten
-
- # As usual, load the resnet18 model.
- net, params, data_shape, out_shape = load_mxnet_resnet()
-
- # Compile the model.
- # Note that this time we are changing the target.
- # This is because we want to translate the host library into JavaScript
- # through Emscripten.
- graph, lib, params = compile_net(
- net,
- target_host="llvm -target=asmjs-unknown-emscripten -system-lib",
- target="opengl",
- data_shape=data_shape,
- params=params)
-
- # Now we want to deploy our model through RPC.
- # First we ned to prepare the module files locally.
- print("Saving the compiled module...")
-
- temp = util.tempdir()
- path_obj = temp.relpath("deploy.bc") # host LLVM part
- path_dso = temp.relpath("deploy.js") # host JavaScript part
- path_gl = temp.relpath("deploy.gl") # device GLSL part
- path_json = temp.relpath("deploy.tvm_meta.json")
-
- lib.save(path_obj)
- emscripten.create_js(path_dso, path_obj, side_module=True)
- lib.imported_modules[0].save(path_gl)
-
- print("- Saved files:", temp.listdir())
-
- # Connect to the RPC server.
- print("Connecting to RPC server...")
- proxy_host = 'localhost'
- proxy_port = 9090
- remote = rpc.connect(proxy_host, proxy_port, key="js")
- print("- Connected to RPC server!")
-
- # Upload module to RPC server.
- print("Uploading module to RPC server...")
- remote.upload(path_dso, "deploy.dso")
- remote.upload(path_gl)
- remote.upload(path_json)
- print("- Upload completed!")
-
- # Load remote library.
- print("Loading remote library...")
- fdev = remote.load_module("deploy.gl")
- fhost = remote.load_module("deploy.dso")
- fhost.import_module(fdev)
- rlib = fhost
- print("- Remote library loaded!")
-
- ctx = remote.opengl(0)
-
- # Upload the parameters.
- print("Uploading parameters...")
- rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
- print("- Parameters uploaded!")
-
- # Create the remote runtime module.
- print("Running remote module...")
- from tvm.contrib import graph_runtime
- module = graph_runtime.create(graph, rlib, ctx)
-
- # Set parameter.
- module.set_input(**rparams)
-
- # Set input data.
- input_data = np.random.uniform(size=data_shape)
- module.set_input('data', tvm.nd.array(input_data.astype('float32')))
-
- # Run.
- module.run()
- print("- Remote module execution completed!")
-
- out = module.get_output(0, out=tvm.nd.empty(out_shape, ctx=ctx))
- # Print first 10 elements of output.
- print(out.asnumpy()[0][0:10])
-
-if run_deploy_rpc and opengl_enabled:
- deploy_rpc()
-
-######################################################################
-# Demo 3: Deploy the Model to WebGL SystemLib
-# -----------------------------------------------
-# This time we are not using RPC. Instead, we will compile the model and link it
-# with the entire tvm runtime into a single giant JavaScript file. Then we will
-# run the model using JavaScript.
-#
-def deploy_web():
- """Runs the demo that deploys to web.
- """
-
- import base64
- import json
- import os
- import shutil
- import SimpleHTTPServer, SocketServer
-
- from tvm.contrib import emscripten
-
- curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(os.getcwd())))
- working_dir = os.getcwd()
- output_dir = os.path.join(working_dir, "resnet")
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
-
- # As usual, load the resnet18 model.
- net, params, data_shape, out_shape = load_mxnet_resnet()
-
- # As usual, compile the model.
- graph, lib, params = compile_net(
- net,
- target_host="llvm -target=asmjs-unknown-emscripten -system-lib",
- target="opengl",
- data_shape=data_shape,
- params=params)
-
- # Now we save the model and link it with the TVM web runtime.
- path_lib = os.path.join(output_dir, "resnet.js")
- path_graph = os.path.join(output_dir, "resnet.json")
- path_params = os.path.join(output_dir, "resnet.params")
- path_data_shape = os.path.join(output_dir, "data_shape.json")
- path_out_shape = os.path.join(output_dir, "out_shape.json")
-
- lib.export_library(path_lib, emscripten.create_js, options=[
- "-s", "USE_GLFW=3",
- "-s", "USE_WEBGL2=1",
- "-lglfw",
- "-s", "TOTAL_MEMORY=1073741824",
- ])
- with open(path_graph, "w") as fo:
- fo.write(graph.json())
- with open(path_params, "w") as fo:
- fo.write(base64.b64encode(nnvm.compiler.save_param_dict(params)))
-
- shutil.copyfile(os.path.join(curr_path, "../tvm/web/tvm_runtime.js"),
- os.path.join(output_dir, "tvm_runtime.js"))
- shutil.copyfile(os.path.join(curr_path, "web/resnet.html"),
- os.path.join(output_dir, "resnet.html"))
-
- # Now we want to save some extra files so that we can execute the model from
- # JavaScript.
- # - data shape
- with open(path_data_shape, "w") as fo:
- json.dump(list(data_shape), fo)
- # - out shape
- with open(path_out_shape, "w") as fo:
- json.dump(list(out_shape), fo)
- # - input image
- image = download_image()
- image.save(os.path.join(output_dir, "data.png"))
- # - synset
- synset = download_synset()
- with open(os.path.join(output_dir, "synset.json"), "w") as fo:
- json.dump(synset, fo)
-
- print("Output files are in", output_dir)
-
- # Finally, we fire up a simple web server to serve all the exported files.
- print("Now running a simple server to serve the files...")
- os.chdir(output_dir)
- port = 8080
- handler = SimpleHTTPServer.SimpleHTTPRequestHandler
- httpd = SocketServer.TCPServer(("", port), handler)
- print("Please open http://localhost:" + str(port) + "/resnet.html")
- httpd.serve_forever()
-
-if run_deploy_web and opengl_enabled:
- deploy_web()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile ONNX Models
-===================
-**Author**: `Joshua Z. Zhang <https://zhreshold.github.io/>`_
-
-This article is an introductory tutorial to deploy ONNX models with NNVM.
-
-For us to begin with, onnx module is required to be installed.
-
-A quick solution is to install protobuf compiler, and
-
-.. code-block:: bash
-
- pip install onnx --user
-
-or please refer to offical site.
-https://github.com/onnx/onnx
-"""
-import nnvm
-import tvm
-from tvm.contrib.download import download_testdata
-import onnx
-import numpy as np
-
-######################################################################
-# Load pretrained ONNX model
-# ---------------------------------------------
-# The example super resolution model used here is exactly the same model in onnx tutorial
-# http://pytorch.org/tutorials/advanced/super_resolution_with_caffe2.html
-# we skip the pytorch model construction part, and download the saved onnx model
-model_url = ''.join(['https://gist.github.com/zhreshold/',
- 'bcda4716699ac97ea44f791c24310193/raw/',
- '93672b029103648953c4e5ad3ac3aadf346a4cdc/',
- 'super_resolution_0.2.onnx'])
-model_path = download_testdata(model_url, 'super_resolution.onnx', module='onnx')
-# now you have super_resolution.onnx on disk
-onnx_model = onnx.load_model(model_path)
-# we can load the graph as NNVM compatible model
-sym, params = nnvm.frontend.from_onnx(onnx_model)
-
-######################################################################
-# Load a test image
-# ---------------------------------------------
-# A single cat dominates the examples!
-from PIL import Image
-img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
-img_path = download_testdata(img_url, 'cat.png', module='data')
-img = Image.open(img_path).resize((224, 224))
-img_ycbcr = img.convert("YCbCr") # convert to YCbCr
-img_y, img_cb, img_cr = img_ycbcr.split()
-x = np.array(img_y)[np.newaxis, np.newaxis, :, :]
-
-######################################################################
-# Compile the model on NNVM
-# ---------------------------------------------
-# We should be familiar with the process right now.
-import nnvm.compiler
-target = 'cuda'
-# assume first input name is data
-input_name = sym.list_input_names()[0]
-shape_dict = {input_name: x.shape}
-with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-
-######################################################################
-# Execute on TVM
-# ---------------------------------------------
-# The process is no different from other example
-from tvm.contrib import graph_runtime
-ctx = tvm.gpu(0)
-dtype = 'float32'
-m = graph_runtime.create(graph, lib, ctx)
-# set inputs
-m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-output_shape = (1, 1, 672, 672)
-tvm_output = m.get_output(0, tvm.nd.empty(output_shape, dtype)).asnumpy()
-
-######################################################################
-# Display results
-# ---------------------------------------------
-# We put input and output image neck to neck
-from matplotlib import pyplot as plt
-out_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)), mode='L')
-out_cb = img_cb.resize(out_y.size, Image.BICUBIC)
-out_cr = img_cr.resize(out_y.size, Image.BICUBIC)
-result = Image.merge('YCbCr', [out_y, out_cb, out_cr]).convert('RGB')
-canvas = np.full((672, 672*2, 3), 255)
-canvas[0:224, 0:224, :] = np.asarray(img)
-canvas[:, 672:, :] = np.asarray(result)
-plt.imshow(canvas.astype(np.uint8))
-plt.show()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile Tensorflow Models
-=========================
-This article is an introductory tutorial to deploy tensorflow models with TVM.
-
-For us to begin with, tensorflow python module is required to be installed.
-
-Please refer to https://www.tensorflow.org/install
-"""
-
-# tvm and nnvm
-import nnvm
-import tvm
-
-# os and numpy
-import numpy as np
-import os.path
-
-# Tensorflow imports
-import tensorflow as tf
-from tensorflow.core.framework import graph_pb2
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import tensor_util
-
-# Tensorflow utility functions
-import tvm.relay.testing.tf as tf_testing
-
-# Base location for model related files.
-repo_base = 'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/'
-
-# Test image
-img_name = 'elephant-299.jpg'
-image_url = os.path.join(repo_base, img_name)
-
-######################################################################
-# Tutorials
-# ---------
-# .. note::
-#
-# protobuf should be exported with :any:`add_shapes=True` option.
-# Could use https://github.com/dmlc/web-data/tree/master/tensorflow/scripts/tf-to-nnvm.py
-# to add shapes for existing models.
-#
-# Please refer docs/frontend/tensorflow.md for more details for various models
-# from tensorflow.
-
-model_name = 'classify_image_graph_def-with_shapes.pb'
-model_url = os.path.join(repo_base, model_name)
-
-# Image label map
-map_proto = 'imagenet_2012_challenge_label_map_proto.pbtxt'
-map_proto_url = os.path.join(repo_base, map_proto)
-
-# Human readable text for labels
-label_map = 'imagenet_synset_to_human_label_map.txt'
-label_map_url = os.path.join(repo_base, label_map)
-
-# Target settings
-# Use these commented settings to build for cuda.
-#target = 'cuda'
-#target_host = 'llvm'
-#layout = "NCHW"
-#ctx = tvm.gpu(0)
-target = 'llvm'
-target_host = 'llvm'
-layout = None
-ctx = tvm.cpu(0)
-
-######################################################################
-# Download required files
-# -----------------------
-# Download files listed above.
-from tvm.contrib.download import download_testdata
-
-img_path = download_testdata(image_url, img_name, module='data')
-model_path = download_testdata(model_url, model_name, module=['tf', 'InceptionV1'])
-map_proto_path = download_testdata(map_proto_url, map_proto, module='data')
-label_path = download_testdata(label_map_url, label_map, module='data')
-
-######################################################################
-# Import model
-# ------------
-# Creates tensorflow graph definition from protobuf file.
-
-with tf.gfile.FastGFile(model_path, 'rb') as f:
- graph_def = tf.GraphDef()
- graph_def.ParseFromString(f.read())
- graph = tf.import_graph_def(graph_def, name='')
- # Call the utility to import the graph definition into default graph.
- graph_def = tf_testing.ProcessGraphDefParam(graph_def)
- # Add shapes to the graph.
- with tf.Session() as sess:
- graph_def = tf_testing.AddShapesToGraphDef(sess, 'softmax')
-
-######################################################################
-# Decode image
-# ------------
-# .. note::
-#
-# tensorflow frontend import doesn't support preprocessing ops like JpegDecode.
-# JpegDecode is bypassed (just return source node).
-# Hence we supply decoded frame to TVM instead.
-#
-
-from PIL import Image
-image = Image.open(img_path).resize((299, 299))
-
-x = np.array(image)
-
-######################################################################
-# Import the graph to NNVM
-# ------------------------
-# Import tensorflow graph definition to nnvm.
-#
-# Results:
-# sym: nnvm graph for given tensorflow protobuf.
-# params: params converted from tensorflow params (tensor protobuf).
-sym, params = nnvm.frontend.from_tensorflow(graph_def, layout=layout)
-
-print("Tensorflow protobuf imported as nnvm graph")
-######################################################################
-# NNVM Compilation
-# ----------------
-# Compile the graph to llvm target with given input specification.
-#
-# Results:
-# graph: Final graph after compilation.
-# params: final params after compilation.
-# lib: target library which can be deployed on target with tvm runtime.
-
-import nnvm.compiler
-shape_dict = {'DecodeJpeg/contents': x.shape}
-dtype_dict = {'DecodeJpeg/contents': 'uint8'}
-graph, lib, params = nnvm.compiler.build(sym, shape=shape_dict, target=target, target_host=target_host, dtype=dtype_dict, params=params)
-
-######################################################################
-# Execute the portable graph on TVM
-# ---------------------------------
-# Now we can try deploying the NNVM compiled model on target.
-
-from tvm.contrib import graph_runtime
-dtype = 'uint8'
-m = graph_runtime.create(graph, lib, ctx)
-# set inputs
-m.set_input('DecodeJpeg/contents', tvm.nd.array(x.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0, tvm.nd.empty(((1, 1008)), 'float32'))
-
-######################################################################
-# Process the output
-# ------------------
-# Process the model output to human readable text for InceptionV1.
-predictions = tvm_output.asnumpy()
-predictions = np.squeeze(predictions)
-
-# Creates node ID --> English string lookup.
-node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path,
- uid_lookup_path=label_path)
-
-# Print top 5 predictions from TVM output.
-top_k = predictions.argsort()[-5:][::-1]
-for node_id in top_k:
- human_string = node_lookup.id_to_string(node_id)
- score = predictions[node_id]
- print('%s (score = %.5f)' % (human_string, score))
-
-######################################################################
-# Inference on tensorflow
-# -----------------------
-# Run the corresponding model on tensorflow
-
-def create_graph():
- """Creates a graph from saved GraphDef file and returns a saver."""
- # Creates graph from saved graph_def.pb.
- with tf.gfile.FastGFile(model_path, 'rb') as f:
- graph_def = tf.GraphDef()
- graph_def.ParseFromString(f.read())
- graph = tf.import_graph_def(graph_def, name='')
- # Call the utility to import the graph definition into default graph.
- graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-def run_inference_on_image(image):
- """Runs inference on an image.
-
- Parameters
- ----------
- image: String
- Image file name.
-
- Returns
- -------
- Nothing
- """
- if not tf.gfile.Exists(image):
- tf.logging.fatal('File does not exist %s', image)
- image_data = tf.gfile.FastGFile(image, 'rb').read()
-
- # Creates graph from saved GraphDef.
- create_graph()
-
- with tf.Session() as sess:
- softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
- predictions = sess.run(softmax_tensor,
- {'DecodeJpeg/contents:0': image_data})
-
- predictions = np.squeeze(predictions)
-
- # Creates node ID --> English string lookup.
- node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path,
- uid_lookup_path=label_path)
-
- # Print top 5 predictions from tensorflow.
- top_k = predictions.argsort()[-5:][::-1]
- print ("===== TENSORFLOW RESULTS =======")
- for node_id in top_k:
- human_string = node_lookup.id_to_string(node_id)
- score = predictions[node_id]
- print('%s (score = %.5f)' % (human_string, score))
-
-run_inference_on_image(img_path)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Get Started with NNVM
-=====================
-**Author**: `Tianqi Chen <https://tqchen.github.io/>`_
-
-This article is an introductory tutorial to workflow in NNVM.
-"""
-import nnvm.compiler
-import nnvm.symbol as sym
-
-######################################################################
-# Declare Computation
-# -------------------
-# We start by describing our need using computational graph.
-# Most deep learning frameworks use computation graph to describe
-# their computation. In this example, we directly use
-# NNVM's API to construct the computational graph.
-#
-# .. note::
-#
-# In a typical deep learning compilation workflow,
-# we can get the models from :any:`nnvm.frontend`
-#
-# The following code snippet describes :math:`z = x + \sqrt{y}`
-# and creates a nnvm graph from the description.
-# We can print out the graph ir to check the graph content.
-
-x = sym.Variable("x")
-y = sym.Variable("y")
-z = sym.elemwise_add(x, sym.sqrt(y))
-compute_graph = nnvm.graph.create(z)
-print("-------compute graph-------")
-print(compute_graph.ir())
-
-######################################################################
-# Compile
-# -------
-# We can call :any:`nnvm.compiler.build` to compile the graph.
-# The build function takes a shape parameter which specifies the
-# input shape requirement. Here we only need to pass in shape of ``x``
-# and the other one will be inferred automatically by NNVM.
-#
-# The function returns three values. ``deploy_graph`` contains
-# the final compiled graph structure. ``lib`` is a :any:`tvm.module.Module`
-# that contains compiled CUDA functions. We do not need the ``params``
-# in this case.
-shape = (4,)
-deploy_graph, lib, params = nnvm.compiler.build(
- compute_graph, target="cuda", shape={"x": shape}, dtype="float32")
-
-######################################################################
-# We can print out the IR of ``deploy_graph`` to understand what just
-# happened under the hood. We can find that ``deploy_graph`` only
-# contains a single operator ``tvm_op``. This is because NNVM
-# automatically fused the operator together into one operator.
-#
-print("-------deploy graph-------")
-print(deploy_graph.ir())
-
-######################################################################
-# Let us also peek into content of ``lib``.
-# Typically a compiled TVM CUDA module contains a host module(lib)
-# and a device module(``lib.imported_modules[0]``) that contains the CUDA code.
-# We print out the the generated device code here.
-# This is exactly a fused CUDA version of kernel that the graph points to.
-#
-print("-------deploy library-------")
-print(lib.imported_modules[0].get_source())
-
-######################################################################
-# Deploy and Run
-# --------------
-# Now that we have have compiled module, let us run it.
-# We can use :any:`graph_runtime <tvm.contrib.graph_runtime.create>`
-# in tvm to create a deployable :any:`GraphModule <tvm.contrib.graph_runtime.GraphModule>`.
-# We can use the :any:`set_input <tvm.contrib.graph_runtime.GraphModule.set_input>`,
-# :any:`run <tvm.contrib.graph_runtime.GraphModule.run>` and
-# :any:`get_output <tvm.contrib.graph_runtime.GraphModule.get_output>` function
-# to set the input, execute the graph and get the output we need.
-#
-import tvm
-import numpy as np
-from tvm.contrib import graph_runtime, util
-
-module = graph_runtime.create(deploy_graph, lib, tvm.gpu(0))
-x_np = np.array([1, 2, 3, 4]).astype("float32")
-y_np = np.array([4, 4, 4, 4]).astype("float32")
-# set input to the graph module
-module.set_input(x=x_np, y=y_np)
-# run forward computation
-module.run()
-# get the first output
-out = module.get_output(0, out=tvm.nd.empty(shape))
-print(out.asnumpy())
-
-######################################################################
-# Provide Model Parameters
-# ------------------------
-# Most deep learning models contains two types of inputs: parameters
-# that remains fixed during inference and data input that need to
-# change for each inference task. It is helpful to provide these
-# information to NNVM. Let us assume that ``y`` is the parameter
-# in our example. We can provide the model parameter information
-# by the params argument to :any:`nnvm.compiler.build`.
-#
-deploy_graph, lib, params = nnvm.compiler.build(
- compute_graph, target="cuda", shape={"x": shape}, params={"y": y_np})
-
-######################################################################
-# This time we will need params value returned by :any:`nnvm.compiler.build`.
-# NNVM applys optimization to pre-compute the intermediate values in
-# the graph that can be determined by parameters. In this case
-# :math:`\sqrt{y}` can be pre-computed. The pre-computed values
-# are returned as new params. We can print out the new compiled library
-# to confirm that the fused kernel only now contains add.
-#
-print("-----optimized params-----")
-print(params)
-print("-------deploy library-------")
-print(lib.imported_modules[0].get_source())
-
-######################################################################
-# Save the Deployed Module
-# ------------------------
-# We can save the ``deploy_graph``, ``lib`` and ``params`` separately
-# and load them back later. We can use :any:`tvm.module.Module` to export
-# the compiled library. ``deploy_graph`` is saved in json format and ``params``
-# is serialized into a bytearray.
-#
-temp = util.tempdir()
-path_lib = temp.relpath("deploy.so")
-lib.export_library(path_lib)
-with open(temp.relpath("deploy.json"), "w") as fo:
- fo.write(deploy_graph.json())
-with open(temp.relpath("deploy.params"), "wb") as fo:
- fo.write(nnvm.compiler.save_param_dict(params))
-print(temp.listdir())
-
-######################################################################
-# We can load the module back.
-loaded_lib = tvm.module.load(path_lib)
-loaded_json = open(temp.relpath("deploy.json")).read()
-loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read())
-module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0))
-params = nnvm.compiler.load_param_dict(loaded_params)
-# directly load from byte array
-module.load_params(loaded_params)
-module.run(x=x_np)
-# get the first output
-out = module.get_output(0, out=tvm.nd.empty(shape))
-print(out.asnumpy())
-
-######################################################################
-# Deploy using Another Language
-# -----------------------------
-# We use python in this example for demonstration.
-# We can also deploy the compiled modules with other languages
-# supported by TVM such as c++, java, javascript.
-# The graph module itself is fully embedded in TVM runtime.
-#
-# The following block demonstrates how we can directly use TVM's
-# runtime API to execute the compiled module.
-# You can find similar runtime API in TVMRuntime of other languages.
-#
-fcreate = tvm.get_global_func("tvm.graph_runtime.create")
-ctx = tvm.gpu(0)
-gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id)
-set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"]
-set_input("x", tvm.nd.array(x_np))
-gmodule["load_params"](loaded_params)
-run()
-out = tvm.nd.empty(shape)
-get_output(0, out)
-print(out.asnumpy())
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile Darknet Models for RNN
-==============================
-**Author**: `Siju Samuel <https://siju-samuel.github.io/>`_
-
-This article is an introductory tutorial to deploy darknet rnn models with NNVM.
-
-This script will run a character prediction model
-Each module consists of 3 fully-connected layers. The input layer propagates information from the
-input to the current state. The recurrent layer propagates information through time from the
-previous state to the current one.
-
-The input to the network is a 1-hot encoding of ASCII characters. We train the network to predict
-the next character in a stream of characters. The output is constrained to be a probability
-distribution using a softmax layer.
-
-Since each recurrent layer contains information about the current character and the past
-characters, it can use this context to predict the future characters in a word or phrase.
-
-All the required models and libraries will be downloaded from the internet
-by the script.
-"""
-import random
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-from tvm.contrib.download import download_testdata
-from nnvm.testing.darknet import __darknetffi__
-import nnvm
-import nnvm.frontend.darknet
-
-# Set the parameters
-# -----------------------
-# Set the seed value and the number of characters to predict
-
-#Model name
-MODEL_NAME = 'rnn'
-#Seed value
-seed = 'Thus'
-#Number of characters to predict
-num = 1000
-
-# Download required files
-# -----------------------
-# Download cfg and weights file if first time.
-CFG_NAME = MODEL_NAME + '.cfg'
-WEIGHTS_NAME = MODEL_NAME + '.weights'
-REPO_URL = 'https://github.com/dmlc/web-data/blob/master/darknet/'
-CFG_URL = REPO_URL + 'cfg/' + CFG_NAME + '?raw=true'
-WEIGHTS_URL = REPO_URL + 'weights/' + WEIGHTS_NAME + '?raw=true'
-
-cfg_path = download_testdata(CFG_URL, CFG_NAME, module='darknet')
-weights_path = download_testdata(WEIGHTS_URL, WEIGHTS_NAME, module='darknet')
-
-# Download and Load darknet library
-DARKNET_LIB = 'libdarknet.so'
-DARKNET_URL = REPO_URL + 'lib/' + DARKNET_LIB + '?raw=true'
-lib_path = download_testdata(DARKNET_URL, DARKNET_LIB, module='darknet')
-DARKNET_LIB = __darknetffi__.dlopen(lib_path)
-net = DARKNET_LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0)
-dtype = 'float32'
-batch_size = 1
-
-# Import the graph to NNVM
-# ------------------------
-# Import darknet graph definition to nnvm.
-#
-# Results:
-# sym: nnvm graph for rnn model
-# params: params converted from darknet weights
-print("Converting darknet rnn model to nnvm symbols...")
-sym, params = nnvm.frontend.darknet.from_darknet(net, dtype)
-
-# Compile the model on NNVM
-data = np.empty([1, net.inputs], dtype)#net.inputs
-
-target = 'llvm'
-shape = {'data': data.shape}
-print("Compiling the model...")
-
-shape_dict = {'data': data.shape}
-dtype_dict = {'data': data.dtype}
-
-with nnvm.compiler.build_config(opt_level=2):
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, dtype_dict, params)
-
-# Execute the portable graph on TVM
-# ---------------------------------
-# Now we can try deploying the NNVM compiled model on cpu target.
-
-# Set the cpu context
-ctx = tvm.cpu(0)
-# Create graph runtime
-m = graph_runtime.create(graph, lib, ctx)
-# Set the params to runtime
-m.set_input(**params)
-
-def _init_state_memory(rnn_cells_count, dtype):
- '''Initialize memory for states'''
- states = {}
- state_shape = (1024,)
- for i in range(rnn_cells_count):
- k = 'rnn' + str(i) + '_state'
- states[k] = tvm.nd.array(np.zeros(state_shape, dtype).astype(dtype))
- return states
-
-def _set_state_input(runtime, states):
- '''Set the state inputs'''
- for state in states:
- runtime.set_input(state, states[state])
-
-def _get_state_output(runtime, states):
- '''Get the state outputs and save'''
- i = 1
- for state in states:
- data = states[state]
- states[state] = runtime.get_output((i), tvm.nd.empty(data.shape, data.dtype))
- i += 1
-
-def _proc_rnn_output(out_data):
- '''Generate the characters from the output array'''
- sum_array = 0
- n = out_data.size
- r = random.uniform(0, 1)
- for j in range(n):
- if out_data[j] < 0.0001:
- out_data[j] = 0
- sum_array += out_data[j]
-
- for j in range(n):
- out_data[j] *= float(1.0) / sum_array
- r = r - out_data[j]
- if r <= 0:
- return j
- return n-1
-
-print("RNN generaring text...")
-
-out_shape = (net.outputs,)
-rnn_cells_count = 3
-
-# Initialize state memory
-# -----------------------
-states = _init_state_memory(rnn_cells_count, dtype)
-
-len_seed = len(seed)
-count = len_seed + num
-out_txt = ""
-
-#Initialize random seed
-random.seed(0)
-c = ord(seed[0])
-inp_data = np.zeros([net.inputs], dtype)
-
-# Run the model
-# -------------
-
-# Predict character by character till `num`
-for i in range(count):
- inp_data[c] = 1
-
- # Set the input data
- m.set_input('data', tvm.nd.array(inp_data.astype(dtype)))
- inp_data[c] = 0
-
- # Set the state inputs
- _set_state_input(m, states)
-
- # Run the model
- m.run()
-
- # Get the output
- tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
-
- # Get the state outputs
- _get_state_output(m, states)
-
- # Get the predicted character and keep buffering it
- c = ord(seed[i]) if i < len_seed else _proc_rnn_output(tvm_out)
- out_txt += chr(c)
-
-print("Predicted Text =", out_txt)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Keras LSTM Sequence to Sequence Model for Translation
-=================================
-**Author**: `Siju Samuel <https://siju-samuel.github.io/>`_
-
-This script demonstrates how to implement a basic character-level sequence-to-sequence model.
-We apply it to translating short English sentences into short French sentences,
-character-by-character.
-
-# Summary of the algorithm
-
-- We start with input sequences from a domain (e.g. English sentences)
- and corresponding target sequences from another domain
- (e.g. French sentences).
-- An encoder LSTM turns input sequences to 2 state vectors
- (we keep the last LSTM state and discard the outputs).
-- A decoder LSTM is trained to turn the target sequences into
- the same sequence but offset by one timestep in the future,
- a training process called "teacher forcing" in this context.
- Is uses as initial state the state vectors from the encoder.
- Effectively, the decoder learns to generate `targets[t+1...]`
- given `targets[...t]`, conditioned on the input sequence.
-
-This script loads the s2s.h5 model saved in repository
-https://github.com/dmlc/web-data/raw/master/keras/models/s2s_translate/lstm_seq2seq.py
-and generates sequences from it. It assumes that no changes have been made (for example:
-latent_dim is unchanged, and the input data and model architecture are unchanged).
-
-# References
-
-- Sequence to Sequence Learning with Neural Networks
- https://arxiv.org/abs/1409.3215
-- Learning Phrase Representations using
- RNN Encoder-Decoder for Statistical Machine Translation
- https://arxiv.org/abs/1406.1078
-
-See lstm_seq2seq.py for more details on the model architecture and how it is trained.
-"""
-
-from keras.models import Model, load_model
-from keras.layers import Input
-import random
-import os
-import numpy as np
-import keras
-import tvm
-import nnvm
-
-######################################################################
-# Download required files
-# -----------------------
-# Download files listed below from dmlc web-data repo.
-model_file = "s2s_translate.h5"
-data_file = "fra-eng.txt"
-
-# Base location for model related files.
-repo_base = 'https://github.com/dmlc/web-data/raw/master/keras/models/s2s_translate/'
-model_url = os.path.join(repo_base, model_file)
-data_url = os.path.join(repo_base, data_file)
-
-# Download files listed below.
-from tvm.contrib.download import download_testdata
-model_path = download_testdata(model_url, model_file, module='keras')
-data_path = download_testdata(data_url, data_file, module='data')
-
-latent_dim = 256 # Latent dimensionality of the encoding space.
-test_samples = 10000 # Number of samples used for testing.
-
-######################################################################
-# Process the data file
-# ---------------------
-# Vectorize the data. We use the same approach as the training script.
-# NOTE: the data must be identical, in order for the character -> integer
-# mappings to be consistent.
-input_texts = []
-target_texts = []
-input_characters = set()
-target_characters = set()
-with open(data_path, 'r', encoding='utf-8') as f:
- lines = f.read().split('\n')
-test_samples = min(test_samples, len(lines))
-max_encoder_seq_length = 0
-max_decoder_seq_length = 0
-for line in lines[:test_samples]:
- input_text, target_text = line.split('\t')
- # We use "tab" as the "start sequence" character
- # for the targets, and "\n" as "end sequence" character.
- target_text = '\t' + target_text + '\n'
- max_encoder_seq_length = max(max_encoder_seq_length, len(input_text))
- max_decoder_seq_length = max(max_decoder_seq_length, len(target_text))
- for char in input_text:
- if char not in input_characters:
- input_characters.add(char)
- for char in target_text:
- if char not in target_characters:
- target_characters.add(char)
-
-input_characters = sorted(list(input_characters))
-target_characters = sorted(list(target_characters))
-num_encoder_tokens = len(input_characters)
-num_decoder_tokens = len(target_characters)
-input_token_index = dict(
- [(char, i) for i, char in enumerate(input_characters)])
-target_token_index = dict(
- [(char, i) for i, char in enumerate(target_characters)])
-
-# Reverse-lookup token index to decode sequences back to something readable.
-reverse_target_char_index = dict(
- (i, char) for char, i in target_token_index.items())
-
-######################################################################
-# Load Keras Model
-# ----------------
-# Restore the model and construct the encoder and decoder.
-model = load_model(model_path)
-encoder_inputs = model.input[0] # input_1
-
-encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1
-encoder_states = [state_h_enc, state_c_enc]
-encoder_model = Model(encoder_inputs, encoder_states)
-
-decoder_inputs = model.input[1] # input_2
-decoder_state_input_h = Input(shape=(latent_dim,), name='input_3')
-decoder_state_input_c = Input(shape=(latent_dim,), name='input_4')
-decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
-decoder_lstm = model.layers[3]
-decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
- decoder_inputs, initial_state=decoder_states_inputs)
-decoder_states = [state_h_dec, state_c_dec]
-decoder_dense = model.layers[4]
-decoder_outputs = decoder_dense(decoder_outputs)
-decoder_model = Model(
- [decoder_inputs] + decoder_states_inputs,
- [decoder_outputs] + decoder_states)
-
-######################################################################
-# Compile both encoder and decoder model on NNVM
-# ----------------------------------------------
-# Creates NNVM graph definition from keras model file.
-from tvm.contrib import graph_runtime
-target = 'llvm'
-ctx = tvm.cpu(0)
-
-# Parse Encoder model
-sym, params = nnvm.frontend.from_keras(encoder_model)
-inp_enc_shape = (1, max_encoder_seq_length, num_encoder_tokens)
-shape_dict = {'input_1': inp_enc_shape}
-
-# Build Encoder model
-with nnvm.compiler.build_config(opt_level=2):
- enc_graph, enc_lib, enc_params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-print("Encoder build ok.")
-
-# Create graph runtime for encoder model
-tvm_enc = graph_runtime.create(enc_graph, enc_lib, ctx)
-tvm_enc.set_input(**enc_params)
-
-# Parse Decoder model
-inp_dec_shape = (1, 1, num_decoder_tokens)
-shape_dict = {'input_2': inp_dec_shape,
- 'input_3': (1, latent_dim),
- 'input_4': (1, latent_dim)}
-
-# Build Decoder model
-sym, params = nnvm.frontend.from_keras(decoder_model)
-with nnvm.compiler.build_config(opt_level=2):
- dec_graph, dec_lib, dec_params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-print("Decoder build ok.")
-
-# Create graph runtime for decoder model
-tvm_dec = graph_runtime.create(dec_graph, dec_lib, ctx)
-tvm_dec.set_input(**dec_params)
-
-# Decodes an input sequence.
-def decode_sequence(input_seq):
- # Set the input for encoder model.
- tvm_enc.set_input('input_1', input_seq)
-
- # Run encoder model
- tvm_enc.run()
-
- # Get states from encoder network
- h = tvm_enc.get_output(0).asnumpy()
- c = tvm_enc.get_output(1).asnumpy()
-
- # Populate the first character of target sequence with the start character.
- sampled_token_index = target_token_index['\t']
-
- # Sampling loop for a batch of sequences
- decoded_sentence = ''
- while True:
- # Generate empty target sequence of length 1.
- target_seq = np.zeros((1, 1, num_decoder_tokens), dtype='float32')
- # Update the target sequence (of length 1).
- target_seq[0, 0, sampled_token_index] = 1.
-
- # Set the input and states for decoder model.
- tvm_dec.set_input('input_2', target_seq)
- tvm_dec.set_input('input_3', h)
- tvm_dec.set_input('input_4', c)
- # Run decoder model
- tvm_dec.run()
-
- output_tokens = tvm_dec.get_output(0).asnumpy()
- h = tvm_dec.get_output(1).asnumpy()
- c = tvm_dec.get_output(2).asnumpy()
-
- # Sample a token
- sampled_token_index = np.argmax(output_tokens[0, -1, :])
- sampled_char = reverse_target_char_index[sampled_token_index]
-
- # Exit condition: either hit max length or find stop character.
- if sampled_char == '\n':
- break
-
- # Update the sentence
- decoded_sentence += sampled_char
- if len(decoded_sentence) > max_decoder_seq_length:
- break
- return decoded_sentence
-
-def generate_input_seq(input_text):
- input_seq = np.zeros((1, max_encoder_seq_length, num_encoder_tokens), dtype='float32')
- for t, char in enumerate(input_text):
- input_seq[0, t, input_token_index[char]] = 1.
- return input_seq
-
-######################################################################
-# Run the model
-# -------------
-# Randonly take some text from test samples and translate
-for seq_index in range(100):
- # Take one sentence randomly and try to decode.
- index = random.randint(1, test_samples)
- input_text, _ = lines[index].split('\t')
- input_seq = generate_input_seq(input_text)
- decoded_sentence = decode_sequence(input_seq)
- print((seq_index + 1), ": ", input_text, "==>", decoded_sentence)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network for ARM CPU (NNVM)
-======================================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Zhao Wu <https://github.com/FrozenGene>`_
-
-Auto-tuning for a specific ARM device is critical for getting the best
-performance. This is a tutorial about how to tune a whole convolutional
-network.
-
-The operator implementation for ARM CPU in TVM is written in template form.
-The template has many tunable knobs (tile factor, vectorization, unrolling, etc).
-We will tune all convolution and depthwise convolution operators
-in the neural network. After tuning, we produce a log file which stores
-the best knob values for all required operators. When the tvm compiler compiles
-these operators, it will query this log file to get the best knob values.
-
-We also released pre-tuned parameters for some arm devices. You can go to
-`ARM CPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#arm-cpu>`_
-to see the results.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user psutil xgboost tornado
-#
-# To make tvm run faster during tuning, it is recommended to use cython
-# as FFI of tvm. In the root directory of tvm, execute
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user cython
-# sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import os
-
-import numpy as np
-
-import nnvm.testing
-import nnvm.compiler
-import tvm
-from tvm import autotvm
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.util import tempdir
-import tvm.contrib.graph_runtime as runtime
-
-#################################################################
-# Define network
-# --------------
-# First we need to define the network in nnvm symbol API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
-# We can also load models from MXNet, ONNX and TensorFlow (see NNVM
-# tutorials :ref:`tutorial-nnvm` for more details).
-
-def get_network(name, batch_size):
- """Get the symbol definition and random weight of a network"""
- input_shape = (batch_size, 3, 224, 224)
- output_shape = (batch_size, 1000)
-
- if "resnet" in name:
- n_layer = int(name.split('-')[1])
- net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
- elif "vgg" in name:
- n_layer = int(name.split('-')[1])
- net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
- elif name == 'mobilenet':
- net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
- elif name == 'squeezenet_v1.1':
- net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
- elif name == 'inception_v3':
- input_shape = (1, 3, 299, 299)
- net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
- elif name == 'custom':
- # an example for custom network
- from nnvm.testing import utils
- net = nnvm.sym.Variable('data')
- net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
- net = nnvm.sym.flatten(net)
- net = nnvm.sym.dense(net, units=1000)
- net, params = utils.create_workload(net, batch_size, (3, 224, 224))
- elif name == 'mxnet':
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
- block = get_model('resnet18_v1', pretrained=True)
- net, params = nnvm.frontend.from_mxnet(block)
- net = nnvm.sym.softmax(net)
- else:
- raise ValueError("Unsupported network: " + name)
-
- return net, params, input_shape, output_shape
-
-
-#################################################################
-# Start RPC Tracker
-# -----------------
-# TVM uses RPC session to communicate with ARM boards.
-# During tuning, the tuner will send the generated code to the board and
-# measure the speed of code on the board.
-#
-# To scale up the tuning, TVM uses RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized master node. We can register all devices to
-# the tracker. For example, if we have 10 phones, we can register all of them
-# to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
-#
-# To start an RPC tracker, run this command on the host machine. The tracker is
-# required during the whole tuning process, so we need to open a new terminal for
-# this command:
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190
-#
-# The expected output is
-#
-# .. code-block:: bash
-#
-# INFO:RPCTracker:bind to 0.0.0.0:9190
-
-#################################################################
-# Register devices to RPC Tracker
-# -----------------------------------
-# Now we can register our devices to the tracker. The first step is to
-# build tvm runtime for the ARM devices.
-#
-# * For Linux:
-# Follow this section :ref:`build-tvm-runtime-on-device` to build
-# tvm runtime on the device. Then register the device to tracker by
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399
-#
-# (replace :code:`[HOST_IP]` with the IP address of your host machine)
-#
-# * For Android:
-# Follow this `readme page <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc>`_ to
-# install tvm rpc apk on the android device. Make sure you can pass the android rpc test.
-# Then you have already registred your device. During tuning, you have to go to developer option
-# and enable "Keep screen awake during changing" and charge your phone to make it stable.
-#
-# After registering devices, we can confirm it by querying rpc_tracker
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190
-#
-# For example, if we have 2 Huawei mate10 pro, 11 Raspberry Pi 3B and 2 rk3399,
-# the output can be
-#
-# .. code-block:: bash
-#
-# Queue Status
-# ----------------------------------
-# key total free pending
-# ----------------------------------
-# mate10pro 2 2 0
-# rk3399 2 2 0
-# rpi3b 11 11 0
-# ----------------------------------
-#
-# You can register multiple devices to the tracker to accelerate the measurement in tuning.
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we should apply some configurations. Here I use an RK3399 board
-# as example. In your setting, you should modify the target and device_key accordingly.
-# set :code:`use_android` to True if you use android phone.
-
-#### DEVICE CONFIG ####
-
-# Replace "aarch64-linux-gnu" with the correct target of your board.
-# This target is used for cross compilation. You can query it by :code:`gcc -v` on your device.
-target = tvm.target.create('llvm -device=arm_cpu -target=aarch64-linux-gnu')
-
-# Also replace this with the device key in your tracker
-device_key = 'rk3399'
-
-# Set this to True if you use android phone
-use_android = False
-
-#### TUNING OPTION ####
-network = 'resnet-18'
-log_file = "%s.%s.log" % (device_key, network)
-dtype = 'float32'
-
-tuning_option = {
- 'log_filename': log_file,
-
- 'tuner': 'xgb',
- 'n_trial': 2000,
- 'early_stopping': 800,
-
- 'measure_option': autotvm.measure_option(
- builder=autotvm.LocalBuilder(
- build_func='ndk' if use_android else 'default'),
- runner=autotvm.RPCRunner(
- device_key, host='localhost', port=9190,
- number=5,
- timeout=4,
- ),
- ),
-}
-
-####################################################################
-#
-# .. note:: How to set tuning options
-#
-# In general, the default values provided here work well.
-# If you have enough time budget, you can set :code:`n_trial`, :code:`early_stopping` larger,
-# which makes the tuning run longer.
-# If your device runs very slow or your conv2d operators have many GFLOPs, considering to
-# set timeout larger.
-#
-# If your model has depthwise convolution, you could consider setting
-# :code:`try_spatial_pack_depthwise` be :code:`True`, which perform better than default
-# optimization in general. For example, on ARM CPU A53 2.0GHz, we find it could boost 1.6x
-# performance of depthwise convolution on Mobilenet V1 model.
-
-###################################################################
-# Begin Tuning
-# ------------
-# Now we can extract tuning tasks from the network and begin tuning.
-# Here, we provide a simple utility function to tune a list of tasks.
-# This function is just an initial implementation which tunes them in sequential order.
-# We will introduce a more sophisticated tuning scheduler in the future.
-
-# You can skip the implementation of this function for this tutorial.
-def tune_tasks(tasks,
- measure_option,
- tuner='xgb',
- n_trial=1000,
- early_stopping=None,
- log_filename='tuning.log',
- use_transfer_learning=True,
- try_winograd=True,
- try_spatial_pack_depthwise=False):
- if try_winograd:
- for i in range(len(tasks)):
- try: # try winograd template
- tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
- tasks[i].target, tasks[i].target_host, 'winograd')
- input_channel = tsk.workload[1][1]
- if input_channel >= 64:
- tasks[i] = tsk
- except Exception:
- pass
-
- # if we want to use spatial pack for depthwise convolution
- if try_spatial_pack_depthwise:
- tuner = 'xgb_knob'
- for i in range(len(tasks)):
- if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw':
- tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
- tasks[i].target, tasks[i].target_host,
- 'contrib_spatial_pack')
- tasks[i] = tsk
-
- # create tmp log file
- tmp_log_file = log_filename + ".tmp"
- if os.path.exists(tmp_log_file):
- os.remove(tmp_log_file)
-
- for i, tsk in enumerate(reversed(tasks)):
- prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
-
- # create tuner
- if tuner == 'xgb' or tuner == 'xgb-rank':
- tuner_obj = XGBTuner(tsk, loss_type='rank')
- elif tuner == 'xgb_knob':
- tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob')
- elif tuner == 'ga':
- tuner_obj = GATuner(tsk, pop_size=50)
- elif tuner == 'random':
- tuner_obj = RandomTuner(tsk)
- elif tuner == 'gridsearch':
- tuner_obj = GridSearchTuner(tsk)
- else:
- raise ValueError("Invalid tuner: " + tuner)
-
- if use_transfer_learning:
- if os.path.isfile(tmp_log_file):
- tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
- # do tuning
- n_trial = min(n_trial, len(tsk.config_space))
- tuner_obj.tune(n_trial=n_trial,
- early_stopping=early_stopping,
- measure_option=measure_option,
- callbacks=[
- autotvm.callback.progress_bar(n_trial, prefix=prefix),
- autotvm.callback.log_to_file(tmp_log_file)])
-
- # pick best records to a cache file
- autotvm.record.pick_best(tmp_log_file, log_filename)
- os.remove(tmp_log_file)
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-def tune_and_evaluate(tuning_opt):
- # extract workloads from nnvm graph
- print("Extract tasks...")
- net, params, input_shape, out_shape = get_network(network, batch_size=1)
- tasks = autotvm.task.extract_from_graph(net, target=target,
- shape={'data': input_shape}, dtype=dtype,
- symbols=(nnvm.sym.conv2d,))
-
- # run tuning tasks
- print("Tuning...")
- tune_tasks(tasks, **tuning_opt)
-
- # compile kernels with history best records
- with autotvm.apply_history_best(log_file):
- print("Compile...")
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(
- net, target=target, shape={'data': input_shape}, params=params, dtype=dtype)
-
- # export library
- tmp = tempdir()
- if use_android:
- from tvm.contrib import ndk
- filename = "net.so"
- lib.export_library(tmp.relpath(filename), ndk.create_shared)
- else:
- filename = "net.tar"
- lib.export_library(tmp.relpath(filename))
-
- # upload module to device
- print("Upload...")
- remote = autotvm.measure.request_remote(device_key, 'localhost', 9190,
- timeout=10000)
- remote.upload(tmp.relpath(filename))
- rlib = remote.load_module(filename)
-
- # upload parameters to device
- ctx = remote.context(str(target), 0)
- module = runtime.create(graph, rlib, ctx)
- data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
- module.set_input('data', data_tvm)
- module.set_input(**params)
-
- # evaluate
- print("Evaluate inference time cost...")
- ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=10)
- prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
- print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
- (np.mean(prof_res), np.std(prof_res)))
-
-# We do not run the tuning in our webpage server since it takes too long.
-# Uncomment the following line to run it by yourself.
-
-# tune_and_evaluate(tuning_option)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended.
-# One sample output is listed below.
-# It takes about 2 hours on a 32T AMD Ryzen Threadripper.
-#
-# .. code-block:: bash
-#
-# Extract tasks...
-# Tuning...
-# [Task 1/12] Current/Best: 22.37/ 52.19 GFLOPS | Progress: (544/1000) | 406.59 s Done.
-# [Task 2/12] Current/Best: 6.51/ 18.77 GFLOPS | Progress: (608/1000) | 325.05 s Done.
-# [Task 3/12] Current/Best: 4.67/ 24.87 GFLOPS | Progress: (480/1000) | 372.31 s Done.
-# [Task 4/12] Current/Best: 11.35/ 46.83 GFLOPS | Progress: (736/1000) | 602.39 s Done.
-# [Task 5/12] Current/Best: 1.01/ 19.80 GFLOPS | Progress: (448/1000) | 262.16 s Done.
-# [Task 6/12] Current/Best: 2.47/ 23.76 GFLOPS | Progress: (672/1000) | 563.85 s Done.
-# [Task 7/12] Current/Best: 14.57/ 33.97 GFLOPS | Progress: (544/1000) | 465.15 s Done.
-# [Task 8/12] Current/Best: 1.13/ 17.65 GFLOPS | Progress: (576/1000) | 365.08 s Done.
-# [Task 9/12] Current/Best: 14.45/ 22.66 GFLOPS | Progress: (928/1000) | 724.25 s Done.
-# [Task 10/12] Current/Best: 3.22/ 15.36 GFLOPS | Progress: (864/1000) | 564.27 s Done.
-# [Task 11/12] Current/Best: 11.03/ 32.23 GFLOPS | Progress: (736/1000) | 635.15 s Done.
-# [Task 12/12] Current/Best: 8.00/ 21.65 GFLOPS | Progress: (1000/1000) | 1111.81 s Done.
-# Compile...
-# Upload...
-# Evaluate inference time cost...
-# Mean inference time (std dev): 162.59 ms (0.06 ms)
-
-######################################################################
-#
-# .. note:: **Experiencing Difficulties?**
-#
-# The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS",
-# then there must be something wrong.
-#
-# First, make sure you set the correct configuration of your device.
-# Then, you can print debug information by adding these lines in the beginning
-# of the script. It will print every measurement result, where you can find useful
-# error messages.
-#
-# .. code-block:: python
-#
-# import logging
-# logging.getLogger('autotvm').setLevel(logging.DEBUG)
-#
-# Finally, always feel free to ask our community for help on https://discuss.tvm.ai
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network for NVIDIA GPU (NNVM)
-=========================================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_
-
-Auto-tuning for specific devices and workloads is critical for getting the
-best performance. This is a tutorial on how to tune a whole convolutional
-network for NVIDIA GPU.
-
-The operator implementation for NVIDIA GPU in TVM is written in template form.
-The template has many tunable knobs (tile factor, unrolling, etc).
-We will tune all convolution and depthwise convolution operators
-in the neural network. After tuning, we produce a log file which stores
-the best knob values for all required operators. When the tvm compiler compiles
-these operators, it will query this log file to get the best knob values.
-
-We also released pre-tuned parameters for some NVIDIA GPUs. You can go to
-`NVIDIA GPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#nvidia-gpu>`_
-to see the results.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user psutil xgboost tornado
-#
-# To make tvm run faster during tuning, it is recommended to use cython
-# as FFI of tvm. In the root directory of tvm, execute:
-#
-# .. code-block:: bash
-#
-# pip3 install --user cython
-# sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import os
-
-import numpy as np
-
-import nnvm.testing
-import nnvm.compiler
-import tvm
-from tvm import autotvm
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.util import tempdir
-import tvm.contrib.graph_runtime as runtime
-
-#################################################################
-# Define Network
-# --------------
-# First we need to define the network in nnvm symbol API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
-# We can also load models from MXNet, ONNX and TensorFlow (see NNVM
-# tutorials :ref:`tutorial-nnvm` for more details).
-
-def get_network(name, batch_size):
- """Get the symbol definition and random weight of a network"""
- input_shape = (batch_size, 3, 224, 224)
- output_shape = (batch_size, 1000)
-
- if "resnet" in name:
- n_layer = int(name.split('-')[1])
- net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
- elif "vgg" in name:
- n_layer = int(name.split('-')[1])
- net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
- elif name == 'mobilenet':
- net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
- elif name == 'squeezenet_v1.1':
- net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
- elif name == 'inception_v3':
- input_shape = (1, 3, 299, 299)
- net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
- elif name == 'custom':
- # an example for custom network
- from nnvm.testing import utils
- net = nnvm.sym.Variable('data')
- net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
- net = nnvm.sym.flatten(net)
- net = nnvm.sym.dense(net, units=1000)
- net, params = utils.create_workload(net, batch_size, (3, 224, 224))
- elif name == 'mxnet':
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
- block = get_model('resnet18_v1', pretrained=True)
- net, params = nnvm.frontend.from_mxnet(block)
- net = nnvm.sym.softmax(net)
- else:
- raise ValueError("Unsupported network: " + name)
-
- return net, params, input_shape, output_shape
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we apply some configurations.
-
-#### DEVICE CONFIG ####
-target = tvm.target.cuda()
-
-#### TUNING OPTION ####
-network = 'resnet-18'
-log_file = "%s.log" % network
-dtype = 'float32'
-
-tuning_option = {
- 'log_filename': log_file,
-
- 'tuner': 'xgb',
- 'n_trial': 2000,
- 'early_stopping': 600,
-
- 'measure_option': autotvm.measure_option(
- builder=autotvm.LocalBuilder(timeout=10),
- runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4, min_repeat_ms=150),
- ),
-}
-
-####################################################################
-#
-# .. note:: How to set tuning options
-#
-# In general, the default value provided here works well.
-#
-# If you have large time budget, you can set :code:`n_trial`, :code:`early_stopping` larger,
-# which makes the tuning runs longer.
-#
-# If you have multiple devices, you can use all of them for measurement to
-# accelerate the tuning process. (see the 'Scale up measurement` section below).
-#
-
-###################################################################
-# Begin Tuning
-# ------------
-# Now we can extract tuning tasks from the network and begin tuning.
-# Here, we provide a simple utility function to tune a list of tasks.
-# This function is just an initial implementation which tunes them in sequential order.
-# We will introduce a more sophisticated tuning scheduler in the future.
-
-# You can skip the implementation of this function for this tutorial.
-def tune_tasks(tasks,
- measure_option,
- tuner='xgb',
- n_trial=1000,
- early_stopping=None,
- log_filename='tuning.log',
- use_transfer_learning=True,
- try_winograd=True):
- if try_winograd:
- for i in range(len(tasks)):
- try: # try winograd template
- tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
- tasks[i].target, tasks[i].target_host, 'winograd')
- input_channel = tsk.workload[1][1]
- if input_channel >= 64:
- tasks[i] = tsk
- except Exception:
- pass
-
- # create tmp log file
- tmp_log_file = log_filename + ".tmp"
- if os.path.exists(tmp_log_file):
- os.remove(tmp_log_file)
-
- for i, tsk in enumerate(reversed(tasks)):
- prefix = "[Task %2d/%2d] " %(i+1, len(tasks))
-
- # create tuner
- if tuner == 'xgb' or tuner == 'xgb-rank':
- tuner_obj = XGBTuner(tsk, loss_type='rank')
- elif tuner == 'ga':
- tuner_obj = GATuner(tsk, pop_size=100)
- elif tuner == 'random':
- tuner_obj = RandomTuner(tsk)
- elif tuner == 'gridsearch':
- tuner_obj = GridSearchTuner(tsk)
- else:
- raise ValueError("Invalid tuner: " + tuner)
-
- if use_transfer_learning:
- if os.path.isfile(tmp_log_file):
- tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
- # do tuning
- n_trial = min(n_trial, len(tsk.config_space))
- tuner_obj.tune(n_trial=n_trial,
- early_stopping=early_stopping,
- measure_option=measure_option,
- callbacks=[
- autotvm.callback.progress_bar(n_trial, prefix=prefix),
- autotvm.callback.log_to_file(tmp_log_file)])
-
- # pick best records to a cache file
- autotvm.record.pick_best(tmp_log_file, log_filename)
- os.remove(tmp_log_file)
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-def tune_and_evaluate(tuning_opt):
- # extract workloads from nnvm graph
- print("Extract tasks...")
- net, params, input_shape, out_shape = get_network(network, batch_size=1)
- tasks = autotvm.task.extract_from_graph(net, target=target,
- shape={'data': input_shape}, dtype=dtype,
- symbols=(nnvm.sym.conv2d,))
-
- # run tuning tasks
- print("Tuning...")
- tune_tasks(tasks, **tuning_opt)
-
- # compile kernels with history best records
- with autotvm.apply_history_best(log_file):
- print("Compile...")
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(
- net, target=target, shape={'data': input_shape}, params=params, dtype=dtype)
-
- # export library
- tmp = tempdir()
- filename = "net.tar"
- lib.export_library(tmp.relpath(filename))
-
- # load parameters
- ctx = tvm.context(str(target), 0)
- module = runtime.create(graph, lib, ctx)
- data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
- module.set_input('data', data_tvm)
- module.set_input(**params)
-
- # evaluate
- print("Evaluate inference time cost...")
- ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600)
- prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
- print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
- (np.mean(prof_res), np.std(prof_res)))
-
-# We do not run the tuning in our webpage server since it takes too long.
-# Uncomment the following line to run it by yourself.
-
-# tune_and_evaluate(tuning_option)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended. One sample output is listed below.
-# It takes about 4 hours to get the following output on a 32T AMD Ryzen Threadripper.
-# The tuning target is NVIDIA 1080 Ti.
-# (You can see some errors during compilation. If the tuning is not stuck, it is okay.)
-#
-# .. code-block:: bash
-#
-# Extract tasks...
-# Tuning...
-# [Task 1/12] Current/Best: 541.83/3570.66 GFLOPS | Progress: (960/2000) | 1001.31 s Done.
-# [Task 2/12] Current/Best: 0.56/ 803.33 GFLOPS | Progress: (704/2000) | 608.08 s Done.
-# [Task 3/12] Current/Best: 103.69/1141.25 GFLOPS | Progress: (768/2000) | 702.13 s Done.
-# [Task 4/12] Current/Best: 2905.03/3925.15 GFLOPS | Progress: (864/2000) | 745.94 sterminate called without an active exception
-# [Task 4/12] Current/Best: 2789.36/3925.15 GFLOPS | Progress: (1056/2000) | 929.40 s Done.
-# [Task 5/12] Current/Best: 89.06/1076.24 GFLOPS | Progress: (704/2000) | 601.73 s Done.
-# [Task 6/12] Current/Best: 40.39/2129.02 GFLOPS | Progress: (1088/2000) | 1125.76 s Done.
-# [Task 7/12] Current/Best: 4090.53/5007.02 GFLOPS | Progress: (800/2000) | 903.90 s Done.
-# [Task 8/12] Current/Best: 4.78/1272.28 GFLOPS | Progress: (768/2000) | 749.14 s Done.
-# [Task 9/12] Current/Best: 1391.45/2325.08 GFLOPS | Progress: (992/2000) | 1084.87 s Done.
-# [Task 10/12] Current/Best: 1995.44/2383.59 GFLOPS | Progress: (864/2000) | 862.60 s Done.
-# [Task 11/12] Current/Best: 4093.94/4899.80 GFLOPS | Progress: (224/2000) | 240.92 sterminate called without an active exception
-# [Task 11/12] Current/Best: 3487.98/4909.91 GFLOPS | Progress: (480/2000) | 534.96 sterminate called without an active exception
-# [Task 11/12] Current/Best: 4636.84/4912.17 GFLOPS | Progress: (1184/2000) | 1381.16 sterminate called without an active exception
-# [Task 11/12] Current/Best: 50.12/4912.17 GFLOPS | Progress: (1344/2000) | 1602.81 s Done.
-# [Task 12/12] Current/Best: 3581.31/4286.30 GFLOPS | Progress: (736/2000) | 943.52 s Done.
-# Compile...
-# Evaluate inference time cost...
-# Mean inference time (std dev): 1.07 ms (0.05 ms)
-#
-# As a reference baseline, the time cost of MXNet + TensorRT on resnet-18 is 1.30ms. So we are a little faster.
-
-######################################################################
-#
-# .. note:: **Experiencing Difficulties?**
-#
-# The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS",
-# then there must be something wrong.
-#
-# First, make sure you set the correct configuration of your device.
-# Then, you can print debug information by adding these lines in the beginning
-# of the script. It will print every measurement result, where you can find useful
-# error messages.
-#
-# .. code-block:: python
-#
-# import logging
-# logging.getLogger('autotvm').setLevel(logging.DEBUG)
-#
-# Finally, always feel free to ask our community for help on https://discuss.tvm.ai
-
-
-#################################################################
-# Scale up measurement by using multiple devices
-# ----------------------------------------------
-#
-# If you have multiple devices, you can use all of them for measurement.
-# TVM uses the RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized master node. We can register all devices to
-# the tracker. For example, if we have 10 GPU cards, we can register all of them
-# to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
-#
-# To start an RPC tracker, run this command on the host machine. The tracker is
-# required during the whole tuning process, so we need to open a new terminal for
-# this command:
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190
-#
-# The expected output is
-#
-# .. code-block:: bash
-#
-# INFO:RPCTracker:bind to 0.0.0.0:9190
-#
-# Then open another new terminal for the RPC server. We need to start one server
-# for each dedicated device. We use a string key to distinguish the types of devices.
-# You can pick a name you like.
-# (Note: For rocm backend, there are some internal errors with the compiler,
-# we need to add `--no-fork` to the argument list.)
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_server --tracker=localhost:9190 --key=1080ti
-#
-# After registering devices, we can confirm it by querying rpc_tracker
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.query_rpc_tracker --host=localhost --port=9190
-#
-# For example, if we have four 1080ti, two titanx and one gfx900, the output can be
-#
-# .. code-block:: bash
-#
-# Queue Status
-# ----------------------------------
-# key total free pending
-# ----------------------------------
-# 1080ti 4 4 0
-# titanx 2 2 0
-# gfx900 1 1 0
-# ----------------------------------
-#
-# Finally, we need to change the tuning option to use RPCRunner. Use the code below
-# to replace the corresponding part above.
-
-tuning_option = {
- 'log_filename': log_file,
-
- 'tuner': 'xgb',
- 'n_trial': 2000,
- 'early_stopping': 600,
-
- 'measure_option': autotvm.measure_option(
- builder=autotvm.LocalBuilder(timeout=10),
- runner=autotvm.RPCRunner(
- '1080ti', # change the device key to your key
- 'localhost', 9190,
- number=20, repeat=3, timeout=4, min_repeat_ms=150),
- ),
-}
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network for Mobile GPU (NNVM)
-=========================================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_
-
-Auto-tuning for a specific device is critical for getting the best
-performance. This is a tutorial about how to tune a whole convolutional
-network.
-
-The operator implementation for Mobile GPU in TVM is written in template form.
-The template has many tunable knobs (tile factor, vectorization, unrolling, etc).
-We will tune all convolution, depthwise convolution and dense operators
-in the neural network. After tuning, we produce a log file which stores
-the best knob values for all required operators. When the tvm compiler compiles
-these operators, it will query this log file to get the best knob values.
-
-We also released pre-tuned parameters for some arm devices. You can go to
-`Mobile GPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#mobile-gpu>`_
-to see the results.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user psutil xgboost tornado
-#
-# To make tvm run faster during tuning, it is recommended to use cython
-# as FFI of tvm. In the root directory of tvm, execute
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user cython
-# sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import os
-
-import numpy as np
-
-import nnvm.testing
-import nnvm.compiler
-import tvm
-from tvm import autotvm
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.util import tempdir
-import tvm.contrib.graph_runtime as runtime
-
-#################################################################
-# Define network
-# --------------
-# First we need to define the network in nnvm symbol API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
-# We can also load models from MXNet, ONNX and TensorFlow (see NNVM
-# tutorials :ref:`tutorial-nnvm` for more details).
-
-def get_network(name, batch_size):
- """Get the symbol definition and random weight of a network"""
- input_shape = (batch_size, 3, 224, 224)
- output_shape = (batch_size, 1000)
-
- if "resnet" in name:
- n_layer = int(name.split('-')[1])
- net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
- elif "vgg" in name:
- n_layer = int(name.split('-')[1])
- net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
- elif name == 'mobilenet':
- net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
- elif name == 'squeezenet_v1.1':
- net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
- elif name == 'inception_v3':
- input_shape = (1, 3, 299, 299)
- net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
- elif name == 'custom':
- # an example for custom network
- from nnvm.testing import utils
- net = nnvm.sym.Variable('data')
- net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
- net = nnvm.sym.flatten(net)
- net = nnvm.sym.dense(net, units=1000)
- net, params = utils.create_workload(net, batch_size, (3, 224, 224))
- elif name == 'mxnet':
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
- block = get_model('resnet18_v1', pretrained=True)
- net, params = nnvm.frontend.from_mxnet(block)
- net = nnvm.sym.softmax(net)
- else:
- raise ValueError("Unsupported network: " + name)
-
- return net, params, input_shape, output_shape
-
-
-#################################################################
-# Start RPC Tracker
-# -----------------
-# TVM uses RPC session to communicate with ARM boards.
-# During tuning, the tuner will send the generated code to the board and
-# measure the speed of code on the board.
-#
-# To scale up the tuning, TVM uses RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized master node. We can register all devices to
-# the tracker. For example, if we have 10 phones, we can register all of them
-# to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
-#
-# To start an RPC tracker, run this command on the host machine. The tracker is
-# required during the whole tuning process, so we need to open a new terminal for
-# this command:
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190
-#
-# The expected output is
-#
-# .. code-block:: bash
-#
-# INFO:RPCTracker:bind to 0.0.0.0:9190
-
-#################################################################
-# Register devices to RPC Tracker
-# -----------------------------------
-# Now we can register our devices to the tracker. The first step is to
-# build tvm runtime for the ARM devices.
-#
-# * For Linux:
-# Follow this section :ref:`build-tvm-runtime-on-device` to build
-# tvm runtime on the device. Then register the device to tracker by
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399
-#
-# (replace :code:`[HOST_IP]` with the IP address of your host machine)
-#
-# * For Android:
-# Follow this `readme page <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc>`_ to
-# install tvm rpc apk on the android device. Make sure you can pass the android rpc test.
-# Then you have already registred your device. During tuning, you have to go to developer option
-# and enable "Keep screen awake during changing" and charge your phone to make it stable.
-#
-# After registering devices, we can confirm it by querying rpc_tracker
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190
-#
-# For example, if we have 2 Huawei mate10 pro, 11 Raspberry Pi 3B and 2 rk3399,
-# the output can be
-#
-# .. code-block:: bash
-#
-# Queue Status
-# ----------------------------------
-# key total free pending
-# ----------------------------------
-# mate10pro 2 2 0
-# rk3399 2 2 0
-# rpi3b 11 11 0
-# ----------------------------------
-#
-# You can register multiple devices to the tracker to accelerate the measurement in tuning.
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we should apply some configurations. Here I use an RK3399 board
-# as example. In your setting, you should modify the target and device_key accordingly.
-# set :code:`use_android` to True if you use android phone.
-
-#### DEVICE CONFIG ####
-
-target = tvm.target.create('opencl -device=mali')
-
-# Replace "aarch64-linux-gnu" with the correct target of your board.
-# This target host is used for cross compilation. You can query it by :code:`gcc -v` on your device.
-target_host = 'llvm -target=aarch64-linux-gnu'
-
-# Also replace this with the device key in your tracker
-device_key = 'rk3399'
-
-# Set this to True if you use android phone
-use_android = False
-
-#### TUNING OPTION ####
-network = 'resnet-18'
-log_file = "%s.%s.log" % (device_key, network)
-dtype = 'float32'
-
-tuning_option = {
- 'log_filename': log_file,
-
- 'tuner': 'xgb',
- 'n_trial': 1000,
- 'early_stopping': 450,
-
- 'measure_option': autotvm.measure_option(
- builder=autotvm.LocalBuilder(
- build_func='ndk' if use_android else 'default'),
- runner=autotvm.RPCRunner(
- device_key, host='localhost', port=9190,
- number=10,
- timeout=5,
- ),
- ),
-}
-
-####################################################################
-#
-# .. note:: How to set tuning options
-#
-# In general, the default values provided here work well.
-# If you have enough time budget, you can set :code:`n_trial`, :code:`early_stopping` larger,
-# which makes the tuning run longer.
-# If your device runs very slow or your conv2d operators have many GFLOPs, considering to
-# set timeout larger.
-#
-
-###################################################################
-# Begin Tuning
-# ------------
-# Now we can extract tuning tasks from the network and begin tuning.
-# Here, we provide a simple utility function to tune a list of tasks.
-# This function is just an initial implementation which tunes them in sequential order.
-# We will introduce a more sophisticated tuning scheduler in the future.
-
-# You can skip the implementation of this function for this tutorial.
-def tune_tasks(tasks,
- measure_option,
- tuner='xgb',
- n_trial=1000,
- early_stopping=None,
- log_filename='tuning.log',
- use_transfer_learning=True,
- try_winograd=True):
- if try_winograd:
- for i in range(len(tasks)):
- try: # try winograd template
- tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
- tasks[i].target, tasks[i].target_host, 'winograd')
- tasks.append(tsk)
- except Exception:
- pass
-
- # create tmp log file
- tmp_log_file = log_filename + ".tmp"
- if os.path.exists(tmp_log_file):
- os.remove(tmp_log_file)
-
- for i, tsk in enumerate(reversed(tasks)):
- prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
-
- # create tuner
- if tuner == 'xgb' or tuner == 'xgb-rank':
- tuner_obj = XGBTuner(tsk, loss_type='rank')
- elif tuner == 'ga':
- tuner_obj = GATuner(tsk, pop_size=50)
- elif tuner == 'random':
- tuner_obj = RandomTuner(tsk)
- elif tuner == 'gridsearch':
- tuner_obj = GridSearchTuner(tsk)
- else:
- raise ValueError("Invalid tuner: " + tuner)
-
- if use_transfer_learning:
- if os.path.isfile(tmp_log_file):
- tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
- # do tuning
- n_trial = min(n_trial, len(tsk.config_space))
- tuner_obj.tune(n_trial=n_trial,
- early_stopping=early_stopping,
- measure_option=measure_option,
- callbacks=[
- autotvm.callback.progress_bar(n_trial, prefix=prefix),
- autotvm.callback.log_to_file(tmp_log_file)])
-
- # pick best records to a cache file
- autotvm.record.pick_best(tmp_log_file, log_filename)
- os.remove(tmp_log_file)
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-def tune_and_evaluate(tuning_opt):
- # extract workloads from nnvm graph
- print("Extract tasks...")
- net, params, input_shape, out_shape = get_network(network, batch_size=1)
- tasks = autotvm.task.extract_from_graph(net, target=target, target_host=target_host,
- shape={'data': input_shape}, dtype=dtype,
- symbols=(nnvm.sym.conv2d, nnvm.sym.dense))
-
- # run tuning tasks
- print("Tuning...")
- tune_tasks(tasks, **tuning_opt)
-
- # compile kernels with history best records
- with autotvm.apply_history_best(log_file):
- print("Compile...")
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(
- net, target=target, target_host=target_host,
- shape={'data': input_shape}, params=params, dtype=dtype)
-
- # export library
- tmp = tempdir()
- if use_android:
- from tvm.contrib import ndk
- filename = "net.so"
- lib.export_library(tmp.relpath(filename), ndk.create_shared)
- else:
- filename = "net.tar"
- lib.export_library(tmp.relpath(filename))
-
- # upload module to device
- print("Upload...")
- remote = autotvm.measure.request_remote(device_key, 'localhost', 9190,
- timeout=10000)
- remote.upload(tmp.relpath(filename))
- rlib = remote.load_module(filename)
-
- # upload parameters to device
- ctx = remote.context(str(target), 0)
- module = runtime.create(graph, rlib, ctx)
- data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
- module.set_input('data', data_tvm)
- module.set_input(**params)
-
- # evaluate
- print("Evaluate inference time cost...")
- ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=30)
- prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
- print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
- (np.mean(prof_res), np.std(prof_res)))
-
-# We do not run the tuning in our webpage server since it takes too long.
-# Uncomment the following line to run it by yourself.
-
-# tune_and_evaluate(tuning_option)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended.
-# One sample output is listed below. It takes about 3 hours on a 32T AMD Ryzen Threadripper.
-#
-# .. code-block:: bash
-#
-# Extract tasks...
-# Tuning...
-# [Task 1/17] Current/Best: 25.30/ 39.12 GFLOPS | Progress: (992/1000) | 751.22 s Done.
-# [Task 2/17] Current/Best: 40.70/ 45.50 GFLOPS | Progress: (736/1000) | 545.46 s Done.
-# [Task 3/17] Current/Best: 38.83/ 42.35 GFLOPS | Progress: (992/1000) | 1549.85 s Done.
-# [Task 4/17] Current/Best: 23.31/ 31.02 GFLOPS | Progress: (640/1000) | 1059.31 s Done.
-# [Task 5/17] Current/Best: 0.06/ 2.34 GFLOPS | Progress: (544/1000) | 305.45 s Done.
-# [Task 6/17] Current/Best: 10.97/ 17.20 GFLOPS | Progress: (992/1000) | 1050.00 s Done.
-# [Task 7/17] Current/Best: 8.98/ 10.94 GFLOPS | Progress: (928/1000) | 421.36 s Done.
-# [Task 8/17] Current/Best: 4.48/ 14.86 GFLOPS | Progress: (704/1000) | 582.60 s Done.
-# [Task 9/17] Current/Best: 10.30/ 25.99 GFLOPS | Progress: (864/1000) | 899.85 s Done.
-# [Task 10/17] Current/Best: 11.73/ 12.52 GFLOPS | Progress: (608/1000) | 304.85 s Done.
-# [Task 11/17] Current/Best: 15.26/ 18.68 GFLOPS | Progress: (800/1000) | 747.52 s Done.
-# [Task 12/17] Current/Best: 17.48/ 26.71 GFLOPS | Progress: (1000/1000) | 1166.40 s Done.
-# [Task 13/17] Current/Best: 0.96/ 11.43 GFLOPS | Progress: (960/1000) | 611.65 s Done.
-# [Task 14/17] Current/Best: 17.88/ 20.22 GFLOPS | Progress: (672/1000) | 670.29 s Done.
-# [Task 15/17] Current/Best: 11.62/ 13.98 GFLOPS | Progress: (736/1000) | 449.25 s Done.
-# [Task 16/17] Current/Best: 19.90/ 23.83 GFLOPS | Progress: (608/1000) | 708.64 s Done.
-# [Task 17/17] Current/Best: 17.98/ 22.75 GFLOPS | Progress: (736/1000) | 1122.60 s Done.
-# Compile...
-# Upload...
-# Evaluate inference time cost...
-# Mean inference time (std dev): 128.05 ms (7.74 ms)
-#
-
-######################################################################
-#
-# .. note:: **Experiencing Difficulties?**
-#
-# The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS",
-# then there must be something wrong.
-#
-# First, make sure you set the correct configuration of your device.
-# Then, you can print debug information by adding these lines in the beginning
-# of the script. It will print every measurement result, where you can find useful
-# error messages.
-#
-# .. code-block:: python
-#
-# import logging
-# logging.getLogger('autotvm').setLevel(logging.DEBUG)
-#
-# Finally, always feel free to ask our community for help on https://discuss.tvm.ai
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network for x86 CPU (NNVM)
-======================================================
-**Author**: `Yao Wang <https://github.com/kevinthesun>`_
-
-This is a tutorial about how to tune convolution neural network
-for x86 cpu.
-"""
-import os
-import numpy as np
-
-import nnvm.testing
-import nnvm.compiler
-import tvm
-from tvm import autotvm
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-import tvm.contrib.graph_runtime as runtime
-
-#################################################################
-# Define network
-# --------------
-# First we need to define the network in nnvm symbol API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
-# We can also load models from MXNet, ONNX and TensorFlow (see NNVM
-# tutorials :ref:`tutorial-nnvm` for more details).
-#
-# In this tutorial, we choose resnet-18 as tuning example.
-
-def get_network(name, batch_size):
- """Get the symbol definition and random weight of a network"""
- input_shape = (batch_size, 3, 224, 224)
- output_shape = (batch_size, 1000)
-
- if "resnet" in name:
- n_layer = int(name.split('-')[1])
- net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
- elif "vgg" in name:
- n_layer = int(name.split('-')[1])
- net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
- elif name == 'mobilenet':
- net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
- elif name == 'squeezenet_v1.1':
- net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
- elif name == 'inception_v3':
- input_shape = (1, 3, 299, 299)
- net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
- elif name == 'custom':
- # an example for custom network
- from nnvm.testing import utils
- net = nnvm.sym.Variable('data')
- net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
- net = nnvm.sym.flatten(net)
- net = nnvm.sym.dense(net, units=1000)
- net, params = utils.create_workload(net, batch_size, (3, 224, 224))
- elif name == 'mxnet':
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
- block = get_model('resnet18_v1', pretrained=True)
- net, params = nnvm.frontend.from_mxnet(block)
- net = nnvm.sym.softmax(net)
- else:
- raise ValueError("Unsupported network: " + name)
-
- return net, params, input_shape, output_shape
-
-# Replace "llvm" with the correct target of your cpu.
-# For example, for AWS EC2 c5 instance with Intel Xeon
-# Platinum 8000 series, the target should be "llvm -mcpu=skylake-avx512".
-# For AWS EC2 c4 instance with Intel Xeon E5-2666 v3, it should be
-# "llvm -mcpu=core-avx2".
-target = "llvm"
-
-batch_size = 1
-dtype = "float32"
-model_name = "resnet-18"
-log_file = "%s.log" % model_name
-
-# Set number of threads used for tuning based on the number of
-# physical cpu cores on your machine.
-num_threads = 1
-os.environ["TVM_NUM_THREADS"] = str(num_threads)
-
-
-#################################################################
-# Configure tensor tuning settings and create tasks
-# -------------------------------------------------
-# To get better kernel execution performance on x86 cpu,
-# we need to change data layout of convolution kernel from
-# "NCHW" to "NCHWc". To deal with this situation, we define
-# conv2d_NCHWc operator in topi. We will tune this operator
-# instead of plain conv2d.
-#
-# We will use local mode for tuning configuration. RPC tracker
-# mode can be setup similarly to the approach in
-# :ref:`tune_nnvm_arm` tutorial.
-
-tuning_option = {
- 'log_filename': log_file,
- 'tuner': 'random',
- 'early_stopping': None,
-
- 'measure_option': autotvm.measure_option(
- builder=autotvm.LocalBuilder(),
- runner=autotvm.LocalRunner(number=10, repeat=1,
- min_repeat_ms=1000),
- ),
-}
-
-# You can skip the implementation of this function for this tutorial.
-def tune_kernels(tasks,
- measure_option,
- tuner='gridsearch',
- early_stopping=None,
- log_filename='tuning.log'):
-
- for i, tsk in enumerate(tasks):
- prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
-
- # converting conv2d tasks to conv2d_NCHWc tasks
- op_name = tsk.workload[0]
- if op_name == 'conv2d':
- func_create = 'topi_x86_conv2d_NCHWc'
- elif op_name == 'depthwise_conv2d_nchw':
- func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw'
- else:
- raise ValueError("Tuning {} is not supported on x86".format(op_name))
-
- task = autotvm.task.create(func_create, args=tsk.args,
- target=target, template_key='direct')
- task.workload = tsk.workload
-
- # create tuner
- if tuner == 'xgb' or tuner == 'xgb-rank':
- tuner_obj = XGBTuner(task, loss_type='rank')
- elif tuner == 'ga':
- tuner_obj = GATuner(task, pop_size=50)
- elif tuner == 'random':
- tuner_obj = RandomTuner(task)
- elif tuner == 'gridsearch':
- tuner_obj = GridSearchTuner(task)
- else:
- raise ValueError("Invalid tuner: " + tuner)
-
- # do tuning
- n_trial=len(task.config_space)
- tuner_obj.tune(n_trial=n_trial,
- early_stopping=early_stopping,
- measure_option=measure_option,
- callbacks=[
- autotvm.callback.progress_bar(n_trial, prefix=prefix),
- autotvm.callback.log_to_file(log_filename)])
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-def tune_and_evaluate(tuning_opt):
- # extract workloads from nnvm graph
- print("Extract tasks...")
- net, params, data_shape, out_shape = get_network(model_name, batch_size)
- tasks = autotvm.task.extract_from_graph(net, target=target,
- shape={'data': data_shape}, dtype=dtype,
- symbols=(nnvm.sym.conv2d,))
-
- # run tuning tasks
- print("Tuning...")
- tune_kernels(tasks, **tuning_opt)
-
- # compile kernels with history best records
- with autotvm.apply_history_best(log_file):
- print("Compile...")
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(
- net, target=target, shape={'data': data_shape}, params=params, dtype=dtype)
-
- # upload parameters to device
- ctx = tvm.cpu()
- data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
- module = runtime.create(graph, lib, ctx)
- module.set_input('data', data_tvm)
- module.set_input(**params)
-
- # evaluate
- print("Evaluate inference time cost...")
- ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
- prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
- print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
- (np.mean(prof_res), np.std(prof_res)))
-
-# We do not run the tuning in our webpage server since it takes too long.
-# Uncomment the following line to run it by yourself.
-
-# tune_and_evaluate(tuning_option)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended.
-# One sample output is listed below.
-#
-# .. code-block:: bash
-#
-# Extract tasks...
-# Tuning...
-# [Task 1/12] Current/Best: 598.05/2497.63 GFLOPS | Progress: (252/252) | 1357.95 s Done.
-# [Task 2/12] Current/Best: 522.63/2279.24 GFLOPS | Progress: (784/784) | 3989.60 s Done.
-# [Task 3/12] Current/Best: 447.33/1927.69 GFLOPS | Progress: (784/784) | 3869.14 s Done.
-# [Task 4/12] Current/Best: 481.11/1912.34 GFLOPS | Progress: (672/672) | 3274.25 s Done.
-# [Task 5/12] Current/Best: 414.09/1598.45 GFLOPS | Progress: (672/672) | 2720.78 s Done.
-# [Task 6/12] Current/Best: 508.96/2273.20 GFLOPS | Progress: (768/768) | 3718.75 s Done.
-# [Task 7/12] Current/Best: 469.14/1955.79 GFLOPS | Progress: (576/576) | 2665.67 s Done.
-# [Task 8/12] Current/Best: 230.91/1658.97 GFLOPS | Progress: (576/576) | 2435.01 s Done.
-# [Task 9/12] Current/Best: 487.75/2295.19 GFLOPS | Progress: (648/648) | 3009.95 s Done.
-# [Task 10/12] Current/Best: 182.33/1734.45 GFLOPS | Progress: (360/360) | 1755.06 s Done.
-# [Task 11/12] Current/Best: 372.18/1745.15 GFLOPS | Progress: (360/360) | 1684.50 s Done.
-# [Task 12/12] Current/Best: 215.34/2271.11 GFLOPS | Progress: (400/400) | 2128.74 s Done.
-# Compile...
-# Evaluate inference time cost...
-# Mean inference time (std dev): 3.16 ms (0.03 ms)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Using External Libraries in NNVM
-================================
-**Author**: `Masahiro Masuda <https://github.com/masahi>`_
-
-This is a short tutorial on how to use external libraries such as cuDNN, or cuBLAS with NNVM.
-
-NNVM uses TVM internally to generate target specific code. For example, with cuda backend TVM generates cuda kernels for all layers in the user provided network.
-But sometimes it is also helpful to incorporate external libraries developed by various vendors into NNVM.
-Luckily, TVM has a mechanism to transparently call into these libraries.
-For NNVM users, all we need to do is just to set a target string appropriately.
-
-Before we can use external libraries from NNVM, your TVM needs to be built with libraries you want to use.
-For example, to use cuDNN, USE_CUDNN option in tvm/make/config.mk needs to be enabled, and cuDNN include and library directories need to be specified.
-
-To begin with, we import NNVM and TVM.
-"""
-import tvm
-import numpy as np
-from tvm.contrib import graph_runtime as runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing import utils
-
-######################################################################
-# Create a simple network
-# -----------------------
-# Let's create a very simple network for demonstration.
-# It consists of convolution, batch normalization, and ReLU activation.
-
-out_channels = 16
-data = sym.Variable(name="data")
-simple_net = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channels, padding = (1, 1), use_bias=True)
-simple_net = sym.batch_norm(data=simple_net)
-simple_net = sym.relu(data=simple_net)
-
-batch_size = 1
-data_shape = (batch_size, 3, 224, 224)
-net, params = utils.create_workload(simple_net, batch_size, data_shape[1:])
-
-######################################################################
-# Build and run with cuda backend
-# -------------------------------
-# We build and run this network with cuda backend, as usual.
-# By setting the logging level to DEBUG, the result of NNVM graph compilation will be dumped as pseudo code.
-import logging
-logging.basicConfig(level=logging.DEBUG) # to dump TVM IR after fusion
-
-target = "cuda"
-graph, lib, params = nnvm.compiler.build(
- net, target, shape={"data": data_shape}, params=params)
-
-ctx = tvm.context(target, 0)
-data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
-module = runtime.create(graph, lib, ctx)
-module.set_input(**params)
-module.set_input("data", data)
-module.run()
-out_shape = (batch_size, out_channels, 224, 224)
-out = module.get_output(0, tvm.nd.empty(out_shape))
-out_cuda = out.asnumpy()
-
-######################################################################
-# The generated pseudo code should look something like below.
-# Note how bias add, batch normalization, and ReLU activation are fused into the convolution kernel.
-# TVM generates a single, fused kernel from this representation.
-#
-# .. code-block:: text
-#
-# produce compute {
-# // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 112
-# // attr [input1.shared] storage_scope = "shared"
-# allocate input1.shared[float32 * 16 * 3 * 3 * 3]
-# // attr [compute] storage_scope = "local"
-# allocate compute[float32 * 16 * 1 * 1 * 1 * 1]
-# // attr [pad_temp.global.global.shared] storage_scope = "shared"
-# allocate pad_temp.global.global.shared[float32 * 1 * 1 * 4 * 57 * 4]
-# // attr [iter_var(threadIdx.x, Range(min=0, extent=448), threadIdx.x)] thread_extent = 448
-# produce compute {
-# produce input1.shared {
-# for (ax0, 0, 16) {
-# if (likely((threadIdx.x < 27))) {
-# input1.shared[(threadIdx.x + (ax0*27))] = input1[((((((blockIdx.x/112)*48) + (threadIdx.x/9))*9) + (threadIdx.x % 9)) + (ax0*27))]
-# }
-# }
-# }
-# compute[0] = 0.000000f
-# compute[1] = 0.000000f
-# compute[2] = 0.000000f
-# compute[3] = 0.000000f
-# compute[4] = 0.000000f
-# compute[5] = 0.000000f
-# compute[6] = 0.000000f
-# compute[7] = 0.000000f
-# compute[8] = 0.000000f
-# compute[9] = 0.000000f
-# compute[10] = 0.000000f
-# compute[11] = 0.000000f
-# compute[12] = 0.000000f
-# compute[13] = 0.000000f
-# compute[14] = 0.000000f
-# compute[15] = 0.000000f
-# for (rc, 0, 3) {
-# produce pad_temp.global.global.shared {
-# if (likely((threadIdx.x < 228))) {
-# if (likely(((blockIdx.x*2) < (226 - (threadIdx.x/57))))) {
-# pad_temp.global.global.shared[ramp((threadIdx.x*4), 1, 4)] = pad_temp[ramp(((((((blockIdx.x*2) + (threadIdx.x/57))*57) + (threadIdx.x % 57)) + (rc*12882))*4), 1, 4)]
-# }
-# }
-# }
-# for (ry, 0, 3) {
-# for (rx, 0, 3) {
-# compute[0] = (compute[0] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[((((rc*3) + ry)*3) + rx)]))
-# compute[1] = (compute[1] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 27)]))
-# compute[2] = (compute[2] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 54)]))
-# compute[3] = (compute[3] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 81)]))
-# compute[4] = (compute[4] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 108)]))
-# compute[5] = (compute[5] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 135)]))
-# compute[6] = (compute[6] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 162)]))
-# compute[7] = (compute[7] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 189)]))
-# compute[8] = (compute[8] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 216)]))
-# compute[9] = (compute[9] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 243)]))
-# compute[10] = (compute[10] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 270)]))
-# compute[11] = (compute[11] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 297)]))
-# compute[12] = (compute[12] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 324)]))
-# compute[13] = (compute[13] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 351)]))
-# compute[14] = (compute[14] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 378)]))
-# compute[15] = (compute[15] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 405)]))
-# }
-# }
-# }
-# }
-# compute[(((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224))] = max((((compute[0] + input2[((blockIdx.x/112)*16)])*input3[((blockIdx.x/112)*16)]) + input4[((blockIdx.x/112)*16)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 50176)] = max((((compute[1] + input2[(((blockIdx.x/112)*16) + 1)])*input3[(((blockIdx.x/112)*16) + 1)]) + input4[(((blockIdx.x/112)*16) + 1)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 100352)] = max((((compute[2] + input2[(((blockIdx.x/112)*16) + 2)])*input3[(((blockIdx.x/112)*16) + 2)]) + input4[(((blockIdx.x/112)*16) + 2)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 150528)] = max((((compute[3] + input2[(((blockIdx.x/112)*16) + 3)])*input3[(((blockIdx.x/112)*16) + 3)]) + input4[(((blockIdx.x/112)*16) + 3)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 200704)] = max((((compute[4] + input2[(((blockIdx.x/112)*16) + 4)])*input3[(((blockIdx.x/112)*16) + 4)]) + input4[(((blockIdx.x/112)*16) + 4)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 250880)] = max((((compute[5] + input2[(((blockIdx.x/112)*16) + 5)])*input3[(((blockIdx.x/112)*16) + 5)]) + input4[(((blockIdx.x/112)*16) + 5)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 301056)] = max((((compute[6] + input2[(((blockIdx.x/112)*16) + 6)])*input3[(((blockIdx.x/112)*16) + 6)]) + input4[(((blockIdx.x/112)*16) + 6)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 351232)] = max((((compute[7] + input2[(((blockIdx.x/112)*16) + 7)])*input3[(((blockIdx.x/112)*16) + 7)]) + input4[(((blockIdx.x/112)*16) + 7)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 401408)] = max((((compute[8] + input2[(((blockIdx.x/112)*16) + 8)])*input3[(((blockIdx.x/112)*16) + 8)]) + input4[(((blockIdx.x/112)*16) + 8)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 451584)] = max((((compute[9] + input2[(((blockIdx.x/112)*16) + 9)])*input3[(((blockIdx.x/112)*16) + 9)]) + input4[(((blockIdx.x/112)*16) + 9)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 501760)] = max((((compute[10] + input2[(((blockIdx.x/112)*16) + 10)])*input3[(((blockIdx.x/112)*16) + 10)]) + input4[(((blockIdx.x/112)*16) + 10)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 551936)] = max((((compute[11] + input2[(((blockIdx.x/112)*16) + 11)])*input3[(((blockIdx.x/112)*16) + 11)]) + input4[(((blockIdx.x/112)*16) + 11)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 602112)] = max((((compute[12] + input2[(((blockIdx.x/112)*16) + 12)])*input3[(((blockIdx.x/112)*16) + 12)]) + input4[(((blockIdx.x/112)*16) + 12)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 652288)] = max((((compute[13] + input2[(((blockIdx.x/112)*16) + 13)])*input3[(((blockIdx.x/112)*16) + 13)]) + input4[(((blockIdx.x/112)*16) + 13)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 702464)] = max((((compute[14] + input2[(((blockIdx.x/112)*16) + 14)])*input3[(((blockIdx.x/112)*16) + 14)]) + input4[(((blockIdx.x/112)*16) + 14)]), 0.000000f)
-# compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 752640)] = max((((compute[15] + input2[(((blockIdx.x/112)*16) + 15)])*input3[(((blockIdx.x/112)*16) + 15)]) + input4[(((blockIdx.x/112)*16) + 15)]), 0.000000f)
-# }
-#
-
-######################################################################
-# Use cuDNN for a convolutional layer
-# -----------------------------------
-# We can use cuDNN to replace convolution kernels with cuDNN ones.
-# To do that, all we need to do is to append the option " -libs=cudnn" to the target string.
-net, params = utils.create_workload(simple_net, batch_size, data_shape[1:])
-target = "cuda -libs=cudnn" # use cudnn for convolution
-graph, lib, params = nnvm.compiler.build(
- net, target, shape={"data": data_shape}, params=params)
-
-ctx = tvm.context(target, 0)
-data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
-module = runtime.create(graph, lib, ctx)
-module.set_input(**params)
-module.set_input("data", data)
-module.run()
-out_shape = (batch_size, out_channels, 224, 224)
-out = module.get_output(0, tvm.nd.empty(out_shape))
-out_cudnn = out.asnumpy()
-
-######################################################################
-# Note that if you use cuDNN, NNVM cannot fuse convolution with layers following it.
-# This is because layer fusion happens at the level of TVM internal representation(IR).
-# NNVM treats external libraries as black box, so there is no way to fuse them with TVM IR.
-#
-# The pseudo code below shows that cuDNN convolution + bias add + batch norm + ReLU turned into two stages of computation, one for cuDNN call and the other for the rest of operations.
-#
-# .. code-block:: text
-#
-# allocate y[float32 * 1 * 16 * 224 * 224]
-# produce y {
-# // attr [0] extern_scope = 0
-# tvm_call_packed("tvm.contrib.cudnn.conv2d.forward", 1, 0, 1, 1, 1, 1, 1, 1, 1, tvm_stack_make_array(input0, tvm_stack_make_shape(1, 3, 224, 224), 0, 4, 0.000000f, 0), tvm_stack_make_array(input1, tvm_stack_make_shape(16, 3, 3, 3), 0, 4, 0.000000f, 0), tvm_stack_make_array(y, tvm_stack_make_shape(1, 16, 224, 224), 0, 4, 0.000000f, 0))
-# }
-# produce compute {
-# // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 1568
-# // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 512
-# compute[((((((blockIdx.x*512) + threadIdx.x)/50176) + ((((blockIdx.x*512) + threadIdx.x)/802816)*16))*50176) + ((((((blockIdx.x*512) + threadIdx.x)/224) % 224)*224) + (((blockIdx.x*64) + threadIdx.x) % 224)))] = max((((y[((((((blockIdx.x*512) + threadIdx.x)/50176) + ((((blockIdx.x*512) + threadIdx.x)/802816)*16))*50176) + ((((((blockIdx.x*512) + threadIdx.x)/224) % 224)*224) + (((blockIdx.x*64) + threadIdx.x) % 224)))] + input2[(((blockIdx.x*512) + threadIdx.x)/50176)])*input3[(((blockIdx.x*512) + threadIdx.x)/50176)]) + input4[(((blockIdx.x*512) + threadIdx.x)/50176)]), 0.000000f)
-# }
-#
-
-######################################################################
-# Verify the result
-# -----------------
-# We can check that the results of two runs match.
-
-tvm.testing.assert_allclose(out_cuda, out_cudnn, rtol=1e-5)
-
-#####################################################################
-# Conclusion
-# ----------
-# This tutorial covered the usage of cuDNN with NNVM.
-# We also have support for cuBLAS. If cuBLAS is enabled, it will be used inside a fully connected layer (nnvm.symbol.dense).
-# To use cuBLAS, set a target string as "cuda -libs=cublas".
-# You can use both cuDNN and cuBLAS with "cuda -libs=cudnn,cublas".
-#
-# For ROCm backend, we have support for MIOpen and rocBLAS.
-# They can be enabled with target "rocm -libs=miopen,rocblas".
-#
-# Being able to use external libraries is great, but we need to keep in mind some cautions.
-#
-# First, the use of external libraries may restrict your usage of TVM and NNVM.
-# For example, MIOpen only supports NCHW layout and fp32 data type at the moment, so you cannot use other layouts or data type in TVM.
-#
-# Second, and more importantly, external libraries restrict the possibility of operator fusion during graph compilation, as shown above.
-# TVM and NNVM aim to achieve the best performance on a variety of hardwares, with joint operator level and graph level optimization.
-# To achieve this goal, we should continue developing better optimizations for TVM and NNVM, while using external libraries as a nice way to fall back to existing implementation when necessary.
+++ /dev/null
-<html>
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements. See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership. The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License. You may obtain a copy of the License at -->
-
-<!--- http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied. See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-
-<head>
- <meta charset="UTF-8">
- <title>NNVM WebGL Test Page</title>
-</head>
-
-<body>
- <h1>NNVM WebGL Test Page</h1>
-
- <!-- We will draw the input image here. -->
- <div>Input Image:</div>
- <img id="image", src="data.png">
-
- <!-- We need a canvas to get the image pixel data. Hide this element. -->
- <canvas hidden id="image_canvas" width="224" height="224"></canvas>
-
- <!-- We will write te prediction result here. -->
- <div id="prediction"></div>
-
- <!-- We will write all log messages here. -->
- <div id="log">Log:</div>
-
- <!-- The OpenGL canvas. -->
- <canvas id="canvas"></canvas>
-
- <script>
- var Module = {};
-
- // resnet.js would recognize Module["canvas"]
- Module["canvas"] = document.getElementById("canvas");
- </script>
-
- <script src="resnet.js"></script>
- <script src="tvm_runtime.js"></script>
-
- <script>
-
- /**
- * Load a text file synchronously.
- * @param {string} url The file path.
- * @return {string} The file content.
- */
- function load_file(url) {
- assert(typeof url == "string", "URL must be string");
-
- var req = new XMLHttpRequest();
- var result;
- req.addEventListener("load", function() {
- result = this.responseText;
- });
- req.open("get", url, false);
- req.send();
- return result;
- }
-
- /**
- * The index of the maximum element in an array.
- * @param {Array} The array.
- * @return {number} The index.
- */
- function argmax(arr) {
- assert(typeof arr.length == "number", "Input must be array-like");
-
- var res = 0;
- for (var i = 0; i < arr.length; i++) {
- if (arr[i] > arr[res]) {
- res = i;
- }
- }
- return res;
- }
-
- /**
- * Preprocess an image to fit resnet input format.
- * @param {ImageData} The input image data. Should be 224x224xRGBA.
- * @return {Float32Array} The preprocessed input array.
- */
- function preprocess_image(image_data) {
- assert(image_data instanceof ImageData, "Input must be ImageData.");
- assert(image_data.width == 224, "Width must be 224.");
- assert(image_data.height == 224, "Height must be 224.");
-
- var width = image_data.width;
- var height = image_data.height;
- var npixels = width * height;
-
- var rgba_uint8 = image_data.data;
- assert(rgba_uint8.length == npixels * 4, "Image should be RGBA.");
-
- // Drop alpha channel. Resnet does not need it.
- var rgb_uint8 = new Uint8Array(npixels * 3);
- for (var i = 0; i < npixels; i++) {
- rgb_uint8[i * 3] = rgba_uint8[i * 4];
- rgb_uint8[i * 3 + 1] = rgba_uint8[i * 4 + 1];
- rgb_uint8[i * 3 + 2] = rgba_uint8[i * 4 + 2];
- }
-
- // Cast to float and normalize.
- var rgb_float = new Float32Array(npixels * 3);
- for (var i = 0; i < npixels; i++) {
- rgb_float[i * 3] = (rgb_uint8[i * 3] - 123.0) / 58.395;
- rgb_float[i * 3 + 1] = (rgb_uint8[i * 3 + 1] - 117.0) / 57.12;
- rgb_float[i * 3 + 2] = (rgb_uint8[i * 3 + 2] - 104.0) / 57.375;
- }
-
- // Transpose. Resnet expects 3 greyscale images.
- var data = new Float32Array(npixels * 3);
- for (var i = 0; i < npixels; i++) {
- data[i] = rgb_float[i * 3];
- data[npixels + i] = rgb_float[i * 3 + 1];
- data[npixels * 2 + i] = rgb_float[i * 3 + 2];
- }
-
- return data;
- }
-
- // Set these variables at the global scope so that we can debug more easily.
- var tvm;
- var syslib;
- var graph_json_str;
- var loaded_module;
- var data_array;
- var data;
- var input;
- var base64_params;
- var output;
- Module["onRuntimeInitialized"] = function () {
- tvm = tvm_runtime.create(Module);
-
- tvm.logger = function (message) {
- console.log(message);
- var d = document.createElement("div");
- d.innerHTML = message;
- document.getElementById("log").appendChild(d);
- };
-
- tvm.logger("Loading SystemLib...");
- syslib = tvm.systemLib();
- tvm.logger("- SystemLib loaded!");
-
- tvm.logger("Loading resnet model...");
- graph_json_str = load_file("resnet.json");
- ctx = tvm.context("opengl", 0);
- loaded_module = tvm.createGraphRuntime(graph_json_str, syslib, ctx);
- tvm.logger("- Model loaded!");
-
- tvm.logger("Loading model parameters...");
- base64_params = load_file("resnet.params");
- loaded_module.load_base64_params(base64_params);
- tvm.logger("- Model parameters loaded!");
-
- tvm.logger("Loading input image...");
- var image = document.getElementById("image");
- var image_canvas = document.getElementById("image_canvas");
- var image_canvas_context = image_canvas.getContext("2d");
- image_canvas_context.drawImage(image, 0, 0);
- var image_data = image_canvas_context.getImageData(0, 0, 224, 224);
- data_array = preprocess_image(image_data);
- tvm.logger("- Input image loaded!");
-
- tvm.logger("Setting input data...");
- data_shape = JSON.parse(load_file("data_shape.json"));
- data = tvm.empty(data_shape, "float32", ctx);
- data.copyFrom(data_array);
- loaded_module.set_input("data", data);
- tvm.logger("- Input data set!");
-
- tvm.logger("Running model...");
- loaded_module.run();
- tvm.logger("- Model execution completed!");
-
- out_shape = JSON.parse(load_file("out_shape.json"));
- output = tvm.empty(out_shape, "float32", ctx);
- loaded_module.get_output(0, output);
-
- prediction = argmax(output.asArray());
-
- synset = JSON.parse(load_file("synset.json"));
- result_string = "Prediction: " + synset[prediction] + "\n";
- document.getElementById("prediction").innerHTML = result_string;
- };
-
- </script>
-</body>
-
-</html>
RELAY_REGISTER_OP("nn.contrib_conv2d_winograd_weight_transform")
.describe(R"code(Weight transformation of winograd fast convolution algorithm.
-Separate this into another nnvm symbol in order to enable Precompute Pass to compute the
+Separate this into another operator in order to enable Precompute Pass to compute the
weight transformation in advance.
- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
# sgx file
"apps/sgx/enclave/sgx-deps.diff",
# html for demo purposes
- "nnvm/tutorials/web/resnet.html",
"tests/webgl/test_static_webgl_library.html",
"web/example_rpc.html",
# images are normally not allowed
layer = LIB.make_connected_layer(1, 12, 2, 1, 1, 0)
for i in range(5):
layer.rolling_mean[i] = np.random.rand(1)
- layer.rolling_variance[i] = np.random.rand(1)
+ layer.rolling_variance[i] = np.random.rand(1) + 0.5
layer.scales[i] = np.random.rand(1)
net.layers[0] = layer
net.w = net.h = 2
layer = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 1, 0, 0, 0)
for i in range(32):
layer.rolling_mean[i] = np.random.rand(1)
- layer.rolling_variance[i] = np.random.rand(1)
+ layer.rolling_variance[i] = np.random.rand(1) + 0.5
net.layers[0] = layer
net.w = net.h = 224
LIB.resize_network(net, 224, 224)
gamma = np.random.uniform(size=(shape[axis])).astype("float32")
beta = np.random.uniform(size=(shape[axis])).astype("float32")
moving_mean = np.random.uniform(size=(shape[axis])).astype("float32")
- moving_var = np.random.uniform(size=(shape[axis])).astype("float32")
+ moving_var = np.abs(np.random.uniform(size=(shape[axis])).astype("float32")) + 0.5
ref_res = mx.nd.BatchNorm(mx.nd.array(x), mx.nd.array(gamma), mx.nd.array(beta),
mx.nd.array(moving_mean), mx.nd.array(moving_var),
axis=axis, use_global_stats=True, fix_gamma=fix_gamma)
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test alter conv2d layout pass"""
-import tvm
-import nnvm
-
-from tvm import relay
-from tvm import autotvm
-from tvm.relay import transform
-from tvm.relay.analysis import alpha_equal
-
-
-def test_alter_layout_conv2d():
- """Additional layout transformations should occour on the graph.
- """
-
- def convnet():
- """Alternating layout of simple convnet (from image super-resolution).
- """
- bias1 = relay.var('bias1', shape=(64,))
- bias2 = relay.var('bias2', shape=(64,))
- bias3 = relay.var('bias3', shape=(64,))
- bias4 = relay.var('bias4', shape=(64,))
- weight1 = relay.var('weight1', shape=(64, 1, 5, 5))
- weight2 = relay.var('weight2', shape=(64, 64, 3, 3))
- weight3 = relay.var('weight3', shape=(64, 64, 3, 3))
- weight4 = relay.var('weight4', shape=(64, 64, 3, 3))
- data = relay.var("x", shape=(1, 1, 224, 224))
- n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5])
- n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2)
- n02 = relay.add(n00, n01)
- n03 = relay.nn.relu(n02)
- n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3])
- n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2)
- n06 = relay.add(n04, n05)
- n07 = relay.nn.relu(n06)
- n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3])
- n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2)
- n10 = relay.add(n08, n09)
- n11 = relay.nn.relu(n10)
- n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3])
- n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2)
- n14 = relay.add(n12, n13)
- n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224])
- n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3])
- net = relay.reshape(n16, newshape=[1, 1, 672, 672])
- args = relay.analysis.free_vars(net)
- return relay.Function(args, net)
-
- # orig net
- N = convnet()
-
- # trigger a test
- # for each known alter_conv2d
- targets=['cuda',
- 'opencl -device=mali',
- 'opencl -device=intel_graphics',
-
- 'llvm -device=arm_cpu',
- 'llvm -device=core-avx-ii']
-
- for tgt in targets:
- with tvm.target.create(tgt) as target:
- with autotvm.tophub.context(target):
- mod = relay.Module.from_expr(N)
- mod = transform.AlterOpLayout()(mod)
- O = mod["main"]
-
- # graph should differ
- assert not relay.analysis.alpha_equal(N, O)
-
-if __name__ == "__main__":
- import numpy as np
- np.random.seed(42)
- test_alter_layout_conv2d()
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import tvm
-import nnvm
-import nnvm.testing
-from nnvm.to_relay import to_relay
-from tvm import relay
-from tvm.relay.testing.config import ctx_list
-from tvm.contrib import graph_runtime
-
-def verify_nnvm_to_relay(nnvm_sym, params, data_shape=(1, 3, 224, 224)):
- def get_nnvm_output(sym, x, params, target, ctx, dtype='float32'):
- shape_dict = {'data': x.shape}
- with nnvm.compiler.build_config(opt_level=3):
- graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input("data", tvm.nd.array(x.astype(dtype)))
- m.set_input(**params)
- m.run()
- return m.get_output(0).asnumpy()
-
- def get_relay_output(sym, x, params, target, ctx, dtype='float32'):
- shape_dict = {'data': x.shape}
- func, params = to_relay(sym, shape_dict, dtype, params)
- with relay.build_config(opt_level=3):
- graph, lib, params = relay.build(func, target=target, params=params)
- m = graph_runtime.create(graph, lib, ctx)
- m.set_input("data", tvm.nd.array(x.astype(dtype)))
- m.set_input(**params)
- m.run()
- return m.get_output(0).asnumpy()
-
- x = np.random.uniform(size=data_shape)
- for target, ctx in ctx_list():
- nnvm_out = get_nnvm_output(nnvm_sym, x, params, target, ctx)
- relay_out = get_relay_output(nnvm_sym, x, params, target, ctx)
- tvm.testing.assert_allclose(nnvm_out, relay_out, rtol=1e-5, atol=1e-5)
-
-
-def test_forward_mlp():
- model, params = nnvm.testing.mlp.get_workload(1)
- verify_nnvm_to_relay(model, params)
-
-
-def test_forward_vgg():
- model, params = nnvm.testing.vgg.get_workload(1)
- verify_nnvm_to_relay(model, params)
-
-
-def test_forward_resnet():
- model, params = nnvm.testing.resnet.get_workload(1)
- verify_nnvm_to_relay(model, params)
-
-
-def test_forward_squeezenet():
- model, params = nnvm.testing.squeezenet.get_workload(1)
- verify_nnvm_to_relay(model, params)
-
-
-def test_forward_inception_v3():
- model, params = nnvm.testing.inception_v3.get_workload(1)
- verify_nnvm_to_relay(model, params, data_shape=(1, 3, 299, 299))
-
-
-def test_forward_densenet():
- model, params = nnvm.testing.squeezenet.get_workload(1)
- verify_nnvm_to_relay(model, params)
-
-
-def test_forward_dqn():
- model, params = nnvm.testing.dqn.get_workload(1)
- verify_nnvm_to_relay(model, params, data_shape=(1, 4, 84, 84))
-
-
-def test_forward_split_concatenate():
- shape = (2, 16)
-
- tensor = nnvm.sym.Variable("data", shape=shape)
-
- splited = nnvm.sym.split(tensor, indices_or_sections=2, axis=1)
-
- concatenated = nnvm.sym.concatenate(*splited, axis=1)
-
- params = {}
-
- verify_nnvm_to_relay(splited[0], params, data_shape=shape)
- verify_nnvm_to_relay(splited[1], params, data_shape=shape)
- verify_nnvm_to_relay(splited, params, data_shape=shape)
- verify_nnvm_to_relay(concatenated, params, data_shape=shape)
-
-
-if __name__ == '__main__':
- test_forward_mlp()
- test_forward_vgg()
- test_forward_resnet()
- test_forward_squeezenet()
- test_forward_inception_v3()
- test_forward_densenet()
- test_forward_dqn()
- test_forward_split_concatenate()
# Define Network
# --------------
# First we need to define the network in relay frontend API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
+# We can load some pre-defined network from :code:`tvm.relay.testing`.
# We can also load models from MXNet, ONNX and TensorFlow.
def get_network(name, batch_size):
#
# .. code-block:: bash
#
-# echo 'export PYTHONPATH=/workspace/python:/workspacem/topi/python:/workspace/nnvm/python/:/workspace/vta/python:${PYTHONPATH}' >> ~/.bashrc
+# echo 'export PYTHONPATH=/workspace/python:/workspacem/topi/python:/workspace/vta/python:${PYTHONPATH}' >> ~/.bashrc
# source ~/.bashrc
#################################################################