1 // Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
2 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 // Version 0: Initial version.
18 // Version 1: Add subgraphs to schema.
19 // Version 2: Rename operators to conform to NN API.
20 // Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
22 // Change namespace to onert_tflite
23 namespace onert_tflite;
25 // This corresponds to the version.
26 file_identifier "TFL3";
27 // File extension of any written files.
28 file_extension "tflite";
30 // IMPORTANT: All new members of tables, enums and unions must be added at the
31 // end to ensure backwards compatibility.
33 // The type of data stored in a tensor.
34 enum TensorType : byte {
48 // Custom quantization parameters for experimenting with new quantization
50 table CustomQuantization {
51 custom:[ubyte] (force_align: 16);
54 // Represents a specific quantization technique's parameters.
55 union QuantizationDetails {
59 // Parameters for converting a quantized tensor back to float.
60 table QuantizationParameters {
61 // These four parameters are the asymmetric linear quantization parameters.
62 // Given a quantized value q, the corresponding float value f should be:
63 // f = scale * (q - zero_point)
64 // For other quantization types, the QuantizationDetails below is used.
65 min:[float]; // For importing back into tensorflow.
66 max:[float]; // For importing back into tensorflow.
67 scale:[float]; // For dequantizing the tensor's values.
70 // If this is not none, the other quantization parameters (i.e. min, max,
71 // scale, zero_point fields above) are ignored and the value of the
72 // QuantizationDetails union should be used.
73 details:QuantizationDetails;
75 // Specifies the dimension of the Tensor's shape that the scales and
76 // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
77 // with quantization params:
78 // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
79 // will be quantized across the second dimension of t.
80 // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
81 // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
82 // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
83 quantized_dimension:int;
87 // We use a modification of the TACO format.
88 // Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
90 // To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
91 // potentially with a k-dimensional block (0 <= k <= n) with dims
92 // (dn, ..., dn+k-1), the format needs to specify:
93 // 1. In what order to traverse these dimensions. For example, to store a 2-D
94 // matrix in row major order, the traversal order would be (d0, d1),
95 // whereas to store it in column major order, the traversal order would be
96 // (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
97 // could be (d0, d1, d2, d3).
98 // 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
99 // tensor dimension in (d0, ..., dn-1).
100 // 3. In the traversal order defined above, the format (dense vs. sparse) and
101 // index metadata for each dimension. For a dense dimension, this is just
102 // the size of that dimension. For a sparse dimension, it's the same as
103 // the compressed index defined in the Compressed Sparse Row (CSR) format.
104 // (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
106 // The storage type for a dimension. Currently we support:
107 // 1. DENSE: each coordinate in this dimension is stored implicitly.
108 // 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
109 // compression technique is the same what CSR uses.
110 // More types like a sparse dimension with a different compression technique
111 // could be added to the list in the future.
112 enum DimensionType : byte {
122 values:[ushort] (force_align: 4);
126 values:[ubyte] (force_align: 4);
129 // Variable-typed buffer to store the index metadata for a sparse dimension.
130 // The widest type is Int32 instead of UInt32 because tensor's shape is a int32
131 // vector. We don't want the per-dimensional index to overflow that range.
132 union SparseIndexVector {
138 table DimensionMetadata {
139 // Whether a dimension is dense or sparse.
140 format:DimensionType;
141 // Index metadata used for a dimension.
142 // - If format is DimensionType.DENSE then we use the dense_size field to
143 // store the size of that dimension. Each index in that dimension is
144 // stored implicitly.
145 // - If format is DimensionType.SPARSE_CSR then we use array_segments and
146 // array_indices to encode that dimension. array_segments represents how
147 // to segment the indices array, each segment corresponds to one element
148 // in the previous dimension. array_indices represents the index of the
149 // non-zero elements within this dimension (as those in the CSR matrix
150 // format, where the first array is row pointers and the second array is
153 array_segments:SparseIndexVector;
154 array_indices:SparseIndexVector;
157 // Parameters to encode a sparse TfLite tensor.
158 table SparsityParameters {
159 // The traversal order of the dimensions defined in the `shape` field of the
160 // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
162 // - if not block sparse, the traversal_order is just a permutation of (d0,
163 // ..., dn-1). For example, a 2-D matrix stored in row-major order would
164 // have traversal_order = (d0, d1).
165 // - if block sparse with a k-dimensional block (0 <= k <= n), the
166 // traversal_order has n + k elements. The first n elements are still a
167 // permutation of (d0, ..., dn-1). The lask k elements are a permutation
168 // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
169 // example, a 2-D matrix with 2-D blocks, both stored in row-major order
170 // would have traversal_order = (d0, d1, d2, d3).
171 traversal_order:[int];
172 // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
173 // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
174 // tensor dimension in (d0, ..., dn).
175 // It's stored in the order of (dn, ..., dn+k-1).
176 // If not block-sparse, this field is NULL.
178 // In the traversal order defined above, the metadata needed for
179 // each dimension to locate the non-zero values in the original dense tensor.
180 // The size of the dim_metadata array = the size of the traversal_order array
182 dim_metadata:[DimensionMetadata];
186 // The tensor shape. The meaning of each entry is operator-specific but
187 // builtin ops use: [batch size, height, width, number of channels] (That's
188 // Tensorflow's NHWC).
191 // An index that refers to the buffers table at the root of the model. Or,
192 // if there is no data buffer associated (i.e. intermediate results), then
193 // this is 0 (which refers to an always existent empty buffer).
195 // The data_buffer itself is an opaque container, with the assumption that the
196 // target device is little-endian. In addition, all builtin operators assume
197 // the memory is ordered such that if `shape` is [4, 3, 2], then index
198 // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
200 name:string; // For debugging and importing back into tensorflow.
201 quantization:QuantizationParameters; // Optional.
203 is_variable:bool = false;
205 // Parameters to encode a sparse tensor. See the example in
206 // tensorflow/lite/testdata/sparse_tensor.json.
207 sparsity:SparsityParameters; // Optional.
209 // Encodes `shape` with unknown dimensions. Unknown dimensions are
210 // represented with -1.
211 shape_signature:[int]; // Optional.
214 // A list of builtin operators. Builtin operators are slightly faster than custom
215 // ones, but not by much. Moreover, while custom operators accept an opaque
216 // object containing configuration parameters, builtins have a predetermined
217 // set of acceptable options.
219 enum BuiltinOperator : byte {
224 DEPTHWISE_CONV_2D = 4,
227 EMBEDDING_LOOKUP = 7,
230 HASHTABLE_LOOKUP = 10,
231 L2_NORMALIZATION = 11,
233 LOCAL_RESPONSE_NORMALIZATION = 13,
240 // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
241 // since different model developers use RELU1 in different ways. Never
242 // create another op called RELU1.
246 RESIZE_BILINEAR = 23,
252 // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
253 CONCAT_EMBEDDINGS = 29,
257 EMBEDDING_LOOKUP_SPARSE = 33,
259 UNIDIRECTIONAL_SEQUENCE_RNN = 35,
261 BATCH_TO_SPACE_ND = 37,
262 SPACE_TO_BATCH_ND = 38,
268 UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
270 BIDIRECTIONAL_SEQUENCE_RNN = 46,
275 // DELEGATE is a special op type for the operations which are delegated to
277 // WARNING: Experimental interface, subject to change
279 BIDIRECTIONAL_SEQUENCE_LSTM = 52,
295 SPARSE_TO_DENSE = 68,
324 RESIZE_NEAREST_NEIGHBOR = 97,
326 SQUARED_DIFFERENCE = 99,
339 REVERSE_SEQUENCE = 112,
342 MATRIX_SET_DIAG = 115,
347 NON_MAX_SUPPRESSION_V4 = 120,
348 NON_MAX_SUPPRESSION_V5 = 121,
357 // Options for the builtin operators.
358 union BuiltinOptions {
360 DepthwiseConv2DOptions,
361 ConcatEmbeddingsOptions,
362 LSHProjectionOptions,
366 FullyConnectedOptions,
368 ConcatenationOptions,
371 LocalResponseNormalizationOptions,
373 ResizeBilinearOptions,
378 EmbeddingLookupSparseOptions,
382 BatchToSpaceNDOptions,
383 SpaceToBatchNDOptions,
397 MaximumMinimumOptions,
407 TransposeConvOptions,
408 SparseToDenseOptions,
427 BidirectionalSequenceLSTMOptions,
428 BidirectionalSequenceRNNOptions,
429 UnidirectionalSequenceLSTMOptions,
432 ResizeNearestNeighborOptions,
434 SquaredDifferenceOptions,
445 ReverseSequenceOptions,
448 MatrixSetDiagOptions,
453 NonMaxSuppressionV4Options,
454 NonMaxSuppressionV5Options,
462 enum Padding : byte { SAME, VALID }
464 enum ActivationFunctionType : byte {
473 table Conv2DOptions {
477 fused_activation_function:ActivationFunctionType;
478 dilation_w_factor:int = 1;
479 dilation_h_factor:int = 1;
482 table Pool2DOptions {
488 fused_activation_function:ActivationFunctionType;
491 table DepthwiseConv2DOptions {
492 // Parameters for DepthwiseConv version 1 or above.
496 // `depth_multiplier` is redundant. It's used by CPU kernels in
497 // TensorFlow 2.0 or below, but ignored in versions above.
498 // See comments in lite/c/builtin_op_data.h for more details.
499 depth_multiplier:int;
500 fused_activation_function:ActivationFunctionType;
501 // Parameters for DepthwiseConv version 2 or above.
502 dilation_w_factor:int = 1;
503 dilation_h_factor:int = 1;
506 table ConcatEmbeddingsOptions {
508 num_columns_per_channel:[int];
509 embedding_dim_per_channel:[int]; // This could be inferred from parameters.
512 enum LSHProjectionType: byte {
518 table LSHProjectionOptions {
519 type: LSHProjectionType;
524 fused_activation_function:ActivationFunctionType;
525 // For weights-only quantization, use asymmetric quantization for non
526 // constant inputs at evaluation time.
527 asymmetric_quantize_inputs:bool;
530 // An implementation of TensorFlow RNNCell.
532 fused_activation_function:ActivationFunctionType;
533 asymmetric_quantize_inputs:bool;
536 // An implementation of TensorFlow dynamic_rnn with RNNCell.
537 table SequenceRNNOptions {
539 fused_activation_function:ActivationFunctionType;
540 asymmetric_quantize_inputs:bool;
543 // An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
544 table BidirectionalSequenceRNNOptions {
546 fused_activation_function:ActivationFunctionType;
548 asymmetric_quantize_inputs:bool;
551 enum FullyConnectedOptionsWeightsFormat: byte {
553 SHUFFLED4x16INT8 = 1,
556 // An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
557 table FullyConnectedOptions {
558 // Parameters for FullyConnected version 1 or above.
559 fused_activation_function:ActivationFunctionType;
561 // Parameters for FullyConnected version 2 or above.
562 weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
564 // Parameters for FullyConnected version 5 or above.
565 // If set to true, then the number of dimension is preserved. Furthermore,
566 // all but the last dimension of the input and output shapes will be equal.
569 // Parameters for FullyConnected version 7 or above.
570 // If set to true, then weights-only op will use asymmetric quantization for
572 asymmetric_quantize_inputs: bool;
575 table SoftmaxOptions {
579 // An implementation of TensorFlow concat.
580 table ConcatenationOptions {
582 fused_activation_function:ActivationFunctionType;
586 fused_activation_function:ActivationFunctionType;
590 fused_activation_function:ActivationFunctionType;
593 table L2NormOptions {
594 fused_activation_function:ActivationFunctionType;
597 table LocalResponseNormalizationOptions {
604 enum LSTMKernelType : byte {
605 // Full LSTM kernel which supports peephole and projection.
607 // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
611 // An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
613 // Parameters for LSTM version 1 or above.
614 fused_activation_function:ActivationFunctionType;
615 cell_clip: float; // Optional, 0.0 means no clipping
616 proj_clip: float; // Optional, 0.0 means no clipping
618 // Parameters for LSTM version 2 or above.
619 // Basic kernel is only supported in version 2 or above.
620 kernel_type: LSTMKernelType = FULL;
622 // Parameters for LSTM version 4 or above.
623 asymmetric_quantize_inputs: bool;
626 // An implementation of TensorFlow dynamic_rnn with LSTMCell.
627 table UnidirectionalSequenceLSTMOptions {
628 fused_activation_function:ActivationFunctionType;
629 cell_clip: float; // Optional, 0.0 means no clipping
630 proj_clip: float; // Optional, 0.0 means no clipping
632 // If true then first dimension is sequence, otherwise batch.
635 // Parameter for Unidirectional Sequence LSTM version 4.
636 asymmetric_quantize_inputs:bool;
639 table BidirectionalSequenceLSTMOptions {
640 // Parameters supported by version 1:
641 fused_activation_function:ActivationFunctionType;
642 cell_clip: float; // Optional, 0.0 means no clipping
643 proj_clip: float; // Optional, 0.0 means no clipping
645 // If true, store the outputs of both directions into the first output.
648 // Parameters supported by version 2:
649 // If true then first dimension is sequence, otherwise batch.
650 // Version 1 implementations assumed time_major to be true, so this default
651 // value should never change.
652 time_major: bool = true;
654 // Parameters for version 3 or above.
655 asymmetric_quantize_inputs:bool;
658 table ResizeBilinearOptions {
659 new_height: int (deprecated);
660 new_width: int (deprecated);
662 half_pixel_centers: bool;
665 table ResizeNearestNeighborOptions {
669 // A call operation options
671 // The subgraph index that needs to be called.
681 table ReshapeOptions {
685 table SpaceToBatchNDOptions {
688 table BatchToSpaceNDOptions {
691 table SkipGramOptions {
694 include_all_ngrams: bool;
697 table SpaceToDepthOptions {
701 table DepthToSpaceOptions {
706 fused_activation_function:ActivationFunctionType;
710 fused_activation_function:ActivationFunctionType;
713 table TopKV2Options {
716 enum CombinerType : byte {
722 table EmbeddingLookupSparseOptions {
723 combiner:CombinerType;
726 table GatherOptions {
730 table TransposeOptions {
739 table ReducerOptions {
743 table SqueezeOptions {
751 table SplitVOptions {
755 table StridedSliceOptions {
760 shrink_axis_mask: int;
763 table LogSoftmaxOptions {
767 in_data_type: TensorType;
768 out_data_type: TensorType;
771 table DequantizeOptions {
774 table MaximumMinimumOptions {
780 table ArgMaxOptions {
781 output_type : TensorType;
784 table ArgMinOptions {
785 output_type : TensorType;
788 table GreaterOptions {
791 table GreaterEqualOptions {
797 table LessEqualOptions {
803 table SelectOptions {
809 table TransposeConvOptions {
815 table ExpandDimsOptions {
818 table SparseToDenseOptions {
819 validate_indices:bool;
825 table NotEqualOptions {
829 // Optional output type of the operation (int32 or int64). Defaults to int32.
830 out_type : TensorType;
839 table FakeQuantOptions {
840 // Parameters supported by version 1:
845 // Parameters supported by version 2:
854 table LogicalOrOptions {
857 table OneHotOptions {
865 table HardSwishOptions {
868 table LogicalAndOptions {
871 table LogicalNotOptions {
874 table UnpackOptions {
879 table FloorDivOptions {
882 table SquareOptions {
885 table ZerosLikeOptions {
891 table FloorModOptions {
897 table LeakyReluOptions {
901 table SquaredDifferenceOptions {
904 enum MirrorPadMode : byte {
905 // Doesn't include borders.
911 table MirrorPadOptions {
915 table UniqueOptions {
916 idx_out_type:TensorType = INT32;
919 table ReverseV2Options {
925 table GatherNdOptions {
931 table ReverseSequenceOptions {
936 table MatrixDiagOptions {
939 table QuantizeOptions {
942 table MatrixSetDiagOptions {
946 then_subgraph_index:int;
947 else_subgraph_index:int;
951 cond_subgraph_index:int;
952 body_subgraph_index:int;
955 table NonMaxSuppressionV4Options {
958 table NonMaxSuppressionV5Options {
961 table ScatterNdOptions {
964 table SelectV2Options {
967 table DensifyOptions {
970 table SegmentSumOptions {
973 table BatchMatMulOptions {
978 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
979 // builtin, or a string if the operator is custom.
981 builtin_code:BuiltinOperator;
984 // The version of the operator. The version need to be bumped whenever new
985 // parameters are introduced into an op.
989 enum CustomOptionsFormat : byte {
993 // An operator takes tensors as inputs and outputs. The type of operation being
994 // performed is determined by an index into the list of valid OperatorCodes,
995 // while the specifics of each operations is configured using builtin_options
996 // or custom_options.
998 // Index into the operator_codes array. Using an integer here avoids
999 // complicate map lookups.
1002 // Optional input are indicated by -1.
1006 builtin_options:BuiltinOptions;
1007 custom_options:[ubyte];
1008 custom_options_format:CustomOptionsFormat;
1010 // A list of booleans indicating the input tensors which are being mutated by
1011 // this operator.(e.g. used by RNN and LSTM).
1012 // For example, if the "inputs" array refers to 5 tensors and the second and
1013 // fifth are mutable variables, then this list will contain
1014 // [false, true, false, false, true].
1016 // If the list is empty, no variable is mutated in this operator.
1017 // The list either has the same length as `inputs`, or is empty.
1018 mutating_variable_inputs:[bool];
1020 // A list of indices to the subgraph's "tensors" that are internal to an Op.
1021 // Internal tensors are those that do not flow in or out of the operation,
1022 // but instead are part of internal computation. As such, the operation's
1023 // implementation may manage its memory more efficiently. They are needed
1024 // however (i.e. not just an implementation detail) since they are part of the
1025 // computation, which may require relevant metadata such as quantization
1027 intermediates:[int];
1030 // The root type, defining a subgraph, which typically represents an entire
1033 // A list of all tensors used in this subgraph.
1036 // Indices of the tensors that are inputs into this subgraph. Note this is
1037 // the list of non-static tensors that feed into the subgraph for inference.
1040 // Indices of the tensors that are outputs out of this subgraph. Note this is
1041 // the list of output tensors that are considered the product of the
1042 // subgraph's inference.
1045 // All operators, in execution order.
1046 operators:[Operator];
1048 // Name of this subgraph (used for debugging).
1052 // Table of raw data buffers (used for constant tensors). Referenced by tensors
1053 // by index. The generous alignment accommodates mmap-friendly data structures.
1055 data:[ubyte] (force_align: 16);
1059 // A human readable string to uniquely identify a Metadata.
1061 // An index to the buffers table.
1066 // Version of the schema.
1069 // A list of all operator codes used in this model. This is
1070 // kept in order because operators carry an index into this
1072 operator_codes:[OperatorCode];
1074 // All the subgraphs of the model. The 0th is assumed to be the main
1076 subgraphs:[SubGraph];
1078 // A description of the model.
1081 // Buffers of the model.
1082 // Note the 0th entry of this array must be an empty buffer (sentinel).
1083 // This is a convention so that tensors without a buffer can provide 0 as
1087 // Metadata about the model. Indirects into the existings buffers list.
1088 // Deprecated, prefer to use metadata field.
1089 metadata_buffer:[int];
1091 // Metadata about the model.
1092 metadata:[Metadata];