1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
16 // Version 0: Initial version.
17 // Version 1: Add subgraphs to schema.
18 // Version 2: Rename operators to conform to NN API.
19 // Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
23 // This corresponds to the version.
24 file_identifier "TFL3";
25 // File extension of any written files.
26 file_extension "tflite";
28 // IMPORTANT: All new members of tables, enums and unions must be added at the
29 // end to ensure backwards compatibility.
31 // The type of data stored in a tensor.
32 enum TensorType : byte {
45 // Custom quantization parameters for experimenting with new quantization
47 table CustomQuantization {
48 custom:[ubyte] (force_align: 16);
51 // Represents a specific quantization technique's parameters.
52 union QuantizationDetails {
56 // Parameters for converting a quantized tensor back to float.
57 table QuantizationParameters {
58 // These four parameters are the asymmetric linear quantization parameters.
59 // Given a quantized value q, the corresponding float value f should be:
60 // f = scale * (q - zero_point)
61 // For other quantization types, the QuantizationDetails below is used.
62 min:[float]; // For importing back into tensorflow.
63 max:[float]; // For importing back into tensorflow.
64 scale:[float]; // For dequantizing the tensor's values.
67 // If this is not none, the other quantization parameters (i.e. min, max,
68 // scale, zero_point fields above) are ignored and the value of the
69 // QuantizationDetails union should be used.
70 details:QuantizationDetails;
72 // Specifies the dimension of the Tensor's shape that the scales and
73 // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
74 // with quantization params:
75 // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
76 // will be quantized across the second dimension of t.
77 // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
78 // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
79 // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
80 quantized_dimension:int;
84 // We use a modification of the TACO format.
85 // Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
87 // To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
88 // potentially with a k-dimensional block (0 <= k <= n) with dims
89 // (dn, ..., dn+k-1), the format needs to specify:
90 // 1. In what order to traverse these dimensions. For example, to store a 2-D
91 // matrix in row major order, the traversal order would be (d0, d1),
92 // whereas to store it in column major order, the traversal order would be
93 // (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
94 // could be (d0, d1, d2, d3).
95 // 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
96 // tensor dimension in (d0, ..., dn-1).
97 // 3. In the traversal order defined above, the format (dense vs. sparse) and
98 // index metadata for each dimension. For a dense dimension, this is just
99 // the size of that dimension. For a sparse dimension, it's the same as
100 // the compressed index defined in the Compressed Sparse Row (CSR) format.
101 // (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
103 // The storage type for a dimension. Currently we support:
104 // 1. DENSE: each coordinate in this dimension is stored implicitly.
105 // 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
106 // compression technique is the same what CSR uses.
107 // More types like a sparse dimension with a different compression technique
108 // could be added to the list in the future.
109 enum DimensionType : byte {
119 values:[ushort] (force_align: 4);
123 values:[ubyte] (force_align: 4);
126 // Variable-typed buffer to store the index metadata for a sparse dimension.
127 // The widest type is Int32 instead of UInt32 because tensor's shape is a int32
128 // vector. We don't want the per-dimensional index to overflow that range.
129 union SparseIndexVector {
135 table DimensionMetadata {
136 // Whether a dimension is dense or sparse.
137 format:DimensionType;
138 // Index metadata used for a dimension.
139 // - If format is DimensionType.DENSE then we use the dense_size field to
140 // store the size of that dimension. Each index in that dimension is
141 // stored implicitly.
142 // - If format is DimensionType.SPARSE_CSR then we use array_segments and
143 // array_indices to encode that dimension. array_segments represents how
144 // to segment the indices array, each segment corresponds to one element
145 // in the previous dimension. array_indices represents the index of the
146 // non-zero elements within this dimension (as those in the CSR matrix
147 // format, where the first array is row pointers and the second array is
150 array_segments:SparseIndexVector;
151 array_indices:SparseIndexVector;
154 // Parameters to encode a sparse TfLite tensor.
155 table SparsityParameters {
156 // The traversal order of the dimensions defined in the `shape` field of the
157 // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
159 // - if not block sparse, the traversal_order is just a permutation of (d0,
160 // ..., dn-1). For example, a 2-D matrix stored in row-major order would
161 // have traversal_order = (d0, d1).
162 // - if block sparse with a k-dimensional block (0 <= k <= n), the
163 // traversal_order has n + k elements. The first n elements are still a
164 // permutation of (d0, ..., dn-1). The lask k elements are a permutation
165 // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
166 // example, a 2-D matrix with 2-D blocks, both stored in row-major order
167 // would have traversal_order = (d0, d1, d2, d3).
168 traversal_order:[int];
169 // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
170 // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
171 // tensor dimension in (d0, ..., dn).
172 // It's stored in the order of (dn, ..., dn+k-1).
173 // If not block-sparse, this field is NULL.
175 // In the traversal order defined above, the metadata needed for
176 // each dimension to locate the non-zero values in the original dense tensor.
177 // The size of the dim_metadata array = the size of the traversal_order array
179 dim_metadata:[DimensionMetadata];
183 // The tensor shape. The meaning of each entry is operator-specific but
184 // builtin ops use: [batch size, height, width, number of channels] (That's
185 // Tensorflow's NHWC).
188 // An index that refers to the buffers table at the root of the model. Or,
189 // if there is no data buffer associated (i.e. intermediate results), then
190 // this is 0 (which refers to an always existent empty buffer).
192 // The data_buffer itself is an opaque container, with the assumption that the
193 // target device is little-endian. In addition, all builtin operators assume
194 // the memory is ordered such that if `shape` is [4, 3, 2], then index
195 // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
197 name:string; // For debugging and importing back into tensorflow.
198 quantization:QuantizationParameters; // Optional.
200 is_variable:bool = false;
202 // Parameters to encode a sparse tensor. See the example in
203 // tensorflow/lite/testdata/sparse_tensor.json.
204 sparsity:SparsityParameters; // Optional.
206 // Encodes `shape` with unknown dimensions. Unknown dimensions are
207 // represented with -1.
208 shape_signature:[int]; // Optional.
211 // A list of builtin operators. Builtin operators are slightly faster than custom
212 // ones, but not by much. Moreover, while custom operators accept an opaque
213 // object containing configuration parameters, builtins have a predetermined
214 // set of acceptable options.
216 enum BuiltinOperator : byte {
221 DEPTHWISE_CONV_2D = 4,
224 EMBEDDING_LOOKUP = 7,
227 HASHTABLE_LOOKUP = 10,
228 L2_NORMALIZATION = 11,
230 LOCAL_RESPONSE_NORMALIZATION = 13,
237 // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
238 // since different model developers use RELU1 in different ways. Never
239 // create another op called RELU1.
243 RESIZE_BILINEAR = 23,
249 // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
250 CONCAT_EMBEDDINGS = 29,
254 EMBEDDING_LOOKUP_SPARSE = 33,
256 UNIDIRECTIONAL_SEQUENCE_RNN = 35,
258 BATCH_TO_SPACE_ND = 37,
259 SPACE_TO_BATCH_ND = 38,
265 UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
267 BIDIRECTIONAL_SEQUENCE_RNN = 46,
272 // DELEGATE is a special op type for the operations which are delegated to
274 // WARNING: Experimental interface, subject to change
276 BIDIRECTIONAL_SEQUENCE_LSTM = 52,
292 SPARSE_TO_DENSE = 68,
321 RESIZE_NEAREST_NEIGHBOR = 97,
323 SQUARED_DIFFERENCE = 99,
336 REVERSE_SEQUENCE = 112,
339 MATRIX_SET_DIAG = 115,
344 NON_MAX_SUPPRESSION_V4 = 120,
345 NON_MAX_SUPPRESSION_V5 = 121,
353 // Options for the builtin operators.
354 union BuiltinOptions {
356 DepthwiseConv2DOptions,
357 ConcatEmbeddingsOptions,
358 LSHProjectionOptions,
362 FullyConnectedOptions,
364 ConcatenationOptions,
367 LocalResponseNormalizationOptions,
369 ResizeBilinearOptions,
374 EmbeddingLookupSparseOptions,
378 BatchToSpaceNDOptions,
379 SpaceToBatchNDOptions,
393 MaximumMinimumOptions,
403 TransposeConvOptions,
404 SparseToDenseOptions,
423 BidirectionalSequenceLSTMOptions,
424 BidirectionalSequenceRNNOptions,
425 UnidirectionalSequenceLSTMOptions,
428 ResizeNearestNeighborOptions,
430 SquaredDifferenceOptions,
441 ReverseSequenceOptions,
444 MatrixSetDiagOptions,
449 NonMaxSuppressionV4Options,
450 NonMaxSuppressionV5Options,
457 enum Padding : byte { SAME, VALID }
459 enum ActivationFunctionType : byte {
468 table Conv2DOptions {
472 fused_activation_function:ActivationFunctionType;
473 dilation_w_factor:int = 1;
474 dilation_h_factor:int = 1;
477 table Pool2DOptions {
483 fused_activation_function:ActivationFunctionType;
486 table DepthwiseConv2DOptions {
487 // Parameters for DepthwiseConv version 1 or above.
491 // `depth_multiplier` is redundant. It's used by CPU kernels in
492 // TensorFlow 2.0 or below, but ignored in versions above.
493 // See comments in lite/c/builtin_op_data.h for more details.
494 depth_multiplier:int;
495 fused_activation_function:ActivationFunctionType;
496 // Parameters for DepthwiseConv version 2 or above.
497 dilation_w_factor:int = 1;
498 dilation_h_factor:int = 1;
501 table ConcatEmbeddingsOptions {
503 num_columns_per_channel:[int];
504 embedding_dim_per_channel:[int]; // This could be inferred from parameters.
507 enum LSHProjectionType: byte {
513 table LSHProjectionOptions {
514 type: LSHProjectionType;
519 fused_activation_function:ActivationFunctionType;
522 // An implementation of TensorFlow RNNCell.
524 fused_activation_function:ActivationFunctionType;
527 // An implementation of TensorFlow dynamic_rnn with RNNCell.
528 table SequenceRNNOptions {
530 fused_activation_function:ActivationFunctionType;
533 // An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
534 table BidirectionalSequenceRNNOptions {
536 fused_activation_function:ActivationFunctionType;
540 enum FullyConnectedOptionsWeightsFormat: byte {
542 SHUFFLED4x16INT8 = 1,
545 // An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
546 table FullyConnectedOptions {
547 // Parameters for FullyConnected version 1 or above.
548 fused_activation_function:ActivationFunctionType;
550 // Parameters for FullyConnected version 2 or above.
551 weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
553 // Parameters for FullyConnected version 5 or above.
554 // If set to true, then the number of dimension is preserved. Furthermore,
555 // all but the last dimension of the input and output shapes will be equal.
559 table SoftmaxOptions {
563 // An implementation of TensorFlow concat.
564 table ConcatenationOptions {
566 fused_activation_function:ActivationFunctionType;
570 fused_activation_function:ActivationFunctionType;
574 fused_activation_function:ActivationFunctionType;
577 table L2NormOptions {
578 fused_activation_function:ActivationFunctionType;
581 table LocalResponseNormalizationOptions {
588 enum LSTMKernelType : byte {
589 // Full LSTM kernel which supports peephole and projection.
591 // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
595 // An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
597 // Parameters for LSTM version 1 or above.
598 fused_activation_function:ActivationFunctionType;
599 cell_clip: float; // Optional, 0.0 means no clipping
600 proj_clip: float; // Optional, 0.0 means no clipping
602 // Parameters for LSTM version 2 or above.
603 // Basic kernel is only supported in version 2 or above.
604 kernel_type: LSTMKernelType = FULL;
607 // An implementation of TensorFlow dynamic_rnn with LSTMCell.
608 table UnidirectionalSequenceLSTMOptions {
609 fused_activation_function:ActivationFunctionType;
610 cell_clip: float; // Optional, 0.0 means no clipping
611 proj_clip: float; // Optional, 0.0 means no clipping
613 // If true then first dimension is sequence, otherwise batch.
617 table BidirectionalSequenceLSTMOptions {
618 // Parameters supported by version 1:
619 fused_activation_function:ActivationFunctionType;
620 cell_clip: float; // Optional, 0.0 means no clipping
621 proj_clip: float; // Optional, 0.0 means no clipping
623 // If true, store the outputs of both directions into the first output.
626 // Parameters supported by version 2:
627 // If true then first dimension is sequence, otherwise batch.
628 // Version 1 implementations assumed time_major to be true, so this default
629 // value should never change.
630 time_major: bool = true;
633 table ResizeBilinearOptions {
634 new_height: int (deprecated);
635 new_width: int (deprecated);
637 half_pixel_centers: bool;
640 table ResizeNearestNeighborOptions {
644 // A call operation options
646 // The subgraph index that needs to be called.
656 table ReshapeOptions {
660 table SpaceToBatchNDOptions {
663 table BatchToSpaceNDOptions {
666 table SkipGramOptions {
669 include_all_ngrams: bool;
672 table SpaceToDepthOptions {
676 table DepthToSpaceOptions {
681 fused_activation_function:ActivationFunctionType;
685 fused_activation_function:ActivationFunctionType;
688 table TopKV2Options {
691 enum CombinerType : byte {
697 table EmbeddingLookupSparseOptions {
698 combiner:CombinerType;
701 table GatherOptions {
705 table TransposeOptions {
714 table ReducerOptions {
718 table SqueezeOptions {
726 table SplitVOptions {
730 table StridedSliceOptions {
735 shrink_axis_mask: int;
738 table LogSoftmaxOptions {
742 in_data_type: TensorType;
743 out_data_type: TensorType;
746 table DequantizeOptions {
749 table MaximumMinimumOptions {
755 table ArgMaxOptions {
756 output_type : TensorType;
759 table ArgMinOptions {
760 output_type : TensorType;
763 table GreaterOptions {
766 table GreaterEqualOptions {
772 table LessEqualOptions {
778 table SelectOptions {
784 table TransposeConvOptions {
790 table ExpandDimsOptions {
793 table SparseToDenseOptions {
794 validate_indices:bool;
800 table NotEqualOptions {
804 // Optional output type of the operation (int32 or int64). Defaults to int32.
805 out_type : TensorType;
814 table FakeQuantOptions {
815 // Parameters supported by version 1:
820 // Parameters supported by version 2:
829 table LogicalOrOptions {
832 table OneHotOptions {
840 table HardSwishOptions {
843 table LogicalAndOptions {
846 table LogicalNotOptions {
849 table UnpackOptions {
854 table FloorDivOptions {
857 table SquareOptions {
860 table ZerosLikeOptions {
866 table FloorModOptions {
872 table LeakyReluOptions {
876 table SquaredDifferenceOptions {
879 enum MirrorPadMode : byte {
880 // Doesn't include borders.
886 table MirrorPadOptions {
890 table UniqueOptions {
891 idx_out_type:TensorType = INT32;
894 table ReverseV2Options {
900 table GatherNdOptions {
906 table ReverseSequenceOptions {
911 table MatrixDiagOptions {
914 table QuantizeOptions {
917 table MatrixSetDiagOptions {
921 then_subgraph_index:int;
922 else_subgraph_index:int;
926 cond_subgraph_index:int;
927 body_subgraph_index:int;
930 table NonMaxSuppressionV4Options {
933 table NonMaxSuppressionV5Options {
936 table ScatterNdOptions {
939 table SelectV2Options {
942 table DensifyOptions {
945 table SegmentSumOptions {
948 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
949 // builtin, or a string if the operator is custom.
951 builtin_code:BuiltinOperator;
954 // The version of the operator. The version need to be bumped whenever new
955 // parameters are introduced into an op.
959 enum CustomOptionsFormat : byte {
963 // An operator takes tensors as inputs and outputs. The type of operation being
964 // performed is determined by an index into the list of valid OperatorCodes,
965 // while the specifics of each operations is configured using builtin_options
966 // or custom_options.
968 // Index into the operator_codes array. Using an integer here avoids
969 // complicate map lookups.
972 // Optional input are indicated by -1.
976 builtin_options:BuiltinOptions;
977 custom_options:[ubyte];
978 custom_options_format:CustomOptionsFormat;
980 // A list of booleans indicating the input tensors which are being mutated by
981 // this operator.(e.g. used by RNN and LSTM).
982 // For example, if the "inputs" array refers to 5 tensors and the second and
983 // fifth are mutable variables, then this list will contain
984 // [false, true, false, false, true].
986 // If the list is empty, no variable is mutated in this operator.
987 // The list either has the same length as `inputs`, or is empty.
988 mutating_variable_inputs:[bool];
990 // A list of indices to the subgraph's "tensors" that are internal to an Op.
991 // Internal tensors are those that do not flow in or out of the operation,
992 // but instead are part of internal computation. As such, the operation's
993 // implementation may manage its memory more efficiently. They are needed
994 // however (i.e. not just an implementation detail) since they are part of the
995 // computation, which may require relevant metadata such as quantization
1000 // The root type, defining a subgraph, which typically represents an entire
1003 // A list of all tensors used in this subgraph.
1006 // Indices of the tensors that are inputs into this subgraph. Note this is
1007 // the list of non-static tensors that feed into the subgraph for inference.
1010 // Indices of the tensors that are outputs out of this subgraph. Note this is
1011 // the list of output tensors that are considered the product of the
1012 // subgraph's inference.
1015 // All operators, in execution order.
1016 operators:[Operator];
1018 // Name of this subgraph (used for debugging).
1022 // Table of raw data buffers (used for constant tensors). Referenced by tensors
1023 // by index. The generous alignment accommodates mmap-friendly data structures.
1025 data:[ubyte] (force_align: 16);
1029 // A human readable string to uniquely identify a Metadata.
1031 // An index to the buffers table.
1036 // Version of the schema.
1039 // A list of all operator codes used in this model. This is
1040 // kept in order because operators carry an index into this
1042 operator_codes:[OperatorCode];
1044 // All the subgraphs of the model. The 0th is assumed to be the main
1046 subgraphs:[SubGraph];
1048 // A description of the model.
1051 // Buffers of the model.
1052 // Note the 0th entry of this array must be an empty buffer (sentinel).
1053 // This is a convention so that tensors without a buffer can provide 0 as
1057 // Metadata about the model. Indirects into the existings buffers list.
1058 // Deprecated, prefer to use metadata field.
1059 metadata_buffer:[int];
1061 // Metadata about the model.
1062 metadata:[Metadata];