1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
16 // Version 0: Initial version.
17 // Version 1: Add subgraphs to schema.
18 // Version 2: Rename operators to conform to NN API.
19 // Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
23 // This corresponds to the version.
24 file_identifier "TFL3";
25 // File extension of any written files.
26 file_extension "tflite";
28 // IMPORTANT: All new members of tables, enums and unions must be added at the
29 // end to ensure backwards compatibility.
31 // The type of data stored in a tensor.
32 enum TensorType : byte {
46 // Custom quantization parameters for experimenting with new quantization
48 table CustomQuantization {
49 custom:[ubyte] (force_align: 16);
52 // Represents a specific quantization technique's parameters.
53 union QuantizationDetails {
57 // Parameters for converting a quantized tensor back to float.
58 table QuantizationParameters {
59 // These four parameters are the asymmetric linear quantization parameters.
60 // Given a quantized value q, the corresponding float value f should be:
61 // f = scale * (q - zero_point)
62 // For other quantization types, the QuantizationDetails below is used.
63 min:[float]; // For importing back into tensorflow.
64 max:[float]; // For importing back into tensorflow.
65 scale:[float]; // For dequantizing the tensor's values.
68 // If this is not none, the other quantization parameters (i.e. min, max,
69 // scale, zero_point fields above) are ignored and the value of the
70 // QuantizationDetails union should be used.
71 details:QuantizationDetails;
73 // Specifies the dimension of the Tensor's shape that the scales and
74 // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
75 // with quantization params:
76 // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
77 // will be quantized across the second dimension of t.
78 // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
79 // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
80 // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
81 quantized_dimension:int;
85 // We use a modification of the TACO format.
86 // Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
88 // To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
89 // potentially with a k-dimensional block (0 <= k <= n) with dims
90 // (dn, ..., dn+k-1), the format needs to specify:
91 // 1. In what order to traverse these dimensions. For example, to store a 2-D
92 // matrix in row major order, the traversal order would be (d0, d1),
93 // whereas to store it in column major order, the traversal order would be
94 // (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
95 // could be (d0, d1, d2, d3).
96 // 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
97 // tensor dimension in (d0, ..., dn-1).
98 // 3. In the traversal order defined above, the format (dense vs. sparse) and
99 // index metadata for each dimension. For a dense dimension, this is just
100 // the size of that dimension. For a sparse dimension, it's the same as
101 // the compressed index defined in the Compressed Sparse Row (CSR) format.
102 // (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
104 // The storage type for a dimension. Currently we support:
105 // 1. DENSE: each coordinate in this dimension is stored implicitly.
106 // 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
107 // compression technique is the same what CSR uses.
108 // More types like a sparse dimension with a different compression technique
109 // could be added to the list in the future.
110 enum DimensionType : byte {
120 values:[ushort] (force_align: 4);
124 values:[ubyte] (force_align: 4);
127 // Variable-typed buffer to store the index metadata for a sparse dimension.
128 // The widest type is Int32 instead of UInt32 because tensor's shape is a int32
129 // vector. We don't want the per-dimensional index to overflow that range.
130 union SparseIndexVector {
136 table DimensionMetadata {
137 // Whether a dimension is dense or sparse.
138 format:DimensionType;
139 // Index metadata used for a dimension.
140 // - If format is DimensionType.DENSE then we use the dense_size field to
141 // store the size of that dimension. Each index in that dimension is
142 // stored implicitly.
143 // - If format is DimensionType.SPARSE_CSR then we use array_segments and
144 // array_indices to encode that dimension. array_segments represents how
145 // to segment the indices array, each segment corresponds to one element
146 // in the previous dimension. array_indices represents the index of the
147 // non-zero elements within this dimension (as those in the CSR matrix
148 // format, where the first array is row pointers and the second array is
151 array_segments:SparseIndexVector;
152 array_indices:SparseIndexVector;
155 // Parameters to encode a sparse TfLite tensor.
156 table SparsityParameters {
157 // The traversal order of the dimensions defined in the `shape` field of the
158 // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
160 // - if not block sparse, the traversal_order is just a permutation of (d0,
161 // ..., dn-1). For example, a 2-D matrix stored in row-major order would
162 // have traversal_order = (d0, d1).
163 // - if block sparse with a k-dimensional block (0 <= k <= n), the
164 // traversal_order has n + k elements. The first n elements are still a
165 // permutation of (d0, ..., dn-1). The lask k elements are a permutation
166 // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
167 // example, a 2-D matrix with 2-D blocks, both stored in row-major order
168 // would have traversal_order = (d0, d1, d2, d3).
169 traversal_order:[int];
170 // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
171 // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
172 // tensor dimension in (d0, ..., dn).
173 // It's stored in the order of (dn, ..., dn+k-1).
174 // If not block-sparse, this field is NULL.
176 // In the traversal order defined above, the metadata needed for
177 // each dimension to locate the non-zero values in the original dense tensor.
178 // The size of the dim_metadata array = the size of the traversal_order array
180 dim_metadata:[DimensionMetadata];
184 // The tensor shape. The meaning of each entry is operator-specific but
185 // builtin ops use: [batch size, height, width, number of channels] (That's
186 // Tensorflow's NHWC).
189 // An index that refers to the buffers table at the root of the model. Or,
190 // if there is no data buffer associated (i.e. intermediate results), then
191 // this is 0 (which refers to an always existent empty buffer).
193 // The data_buffer itself is an opaque container, with the assumption that the
194 // target device is little-endian. In addition, all builtin operators assume
195 // the memory is ordered such that if `shape` is [4, 3, 2], then index
196 // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
198 name:string; // For debugging and importing back into tensorflow.
199 quantization:QuantizationParameters; // Optional.
201 is_variable:bool = false;
203 // Parameters to encode a sparse tensor. See the example in
204 // tensorflow/lite/testdata/sparse_tensor.json.
205 sparsity:SparsityParameters; // Optional.
207 // Encodes `shape` with unknown dimensions. Unknown dimensions are
208 // represented with -1.
209 shape_signature:[int]; // Optional.
212 // A list of builtin operators. Builtin operators are slightly faster than custom
213 // ones, but not by much. Moreover, while custom operators accept an opaque
214 // object containing configuration parameters, builtins have a predetermined
215 // set of acceptable options.
217 enum BuiltinOperator : byte {
222 DEPTHWISE_CONV_2D = 4,
225 EMBEDDING_LOOKUP = 7,
228 HASHTABLE_LOOKUP = 10,
229 L2_NORMALIZATION = 11,
231 LOCAL_RESPONSE_NORMALIZATION = 13,
238 // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
239 // since different model developers use RELU1 in different ways. Never
240 // create another op called RELU1.
244 RESIZE_BILINEAR = 23,
250 // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
251 CONCAT_EMBEDDINGS = 29,
255 EMBEDDING_LOOKUP_SPARSE = 33,
257 UNIDIRECTIONAL_SEQUENCE_RNN = 35,
259 BATCH_TO_SPACE_ND = 37,
260 SPACE_TO_BATCH_ND = 38,
266 UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
268 BIDIRECTIONAL_SEQUENCE_RNN = 46,
273 // DELEGATE is a special op type for the operations which are delegated to
275 // WARNING: Experimental interface, subject to change
277 BIDIRECTIONAL_SEQUENCE_LSTM = 52,
293 SPARSE_TO_DENSE = 68,
322 RESIZE_NEAREST_NEIGHBOR = 97,
324 SQUARED_DIFFERENCE = 99,
337 REVERSE_SEQUENCE = 112,
340 MATRIX_SET_DIAG = 115,
345 NON_MAX_SUPPRESSION_V4 = 120,
346 NON_MAX_SUPPRESSION_V5 = 121,
355 // Options for the builtin operators.
356 union BuiltinOptions {
358 DepthwiseConv2DOptions,
359 ConcatEmbeddingsOptions,
360 LSHProjectionOptions,
364 FullyConnectedOptions,
366 ConcatenationOptions,
369 LocalResponseNormalizationOptions,
371 ResizeBilinearOptions,
376 EmbeddingLookupSparseOptions,
380 BatchToSpaceNDOptions,
381 SpaceToBatchNDOptions,
395 MaximumMinimumOptions,
405 TransposeConvOptions,
406 SparseToDenseOptions,
425 BidirectionalSequenceLSTMOptions,
426 BidirectionalSequenceRNNOptions,
427 UnidirectionalSequenceLSTMOptions,
430 ResizeNearestNeighborOptions,
432 SquaredDifferenceOptions,
443 ReverseSequenceOptions,
446 MatrixSetDiagOptions,
451 NonMaxSuppressionV4Options,
452 NonMaxSuppressionV5Options,
460 enum Padding : byte { SAME, VALID }
462 enum ActivationFunctionType : byte {
471 table Conv2DOptions {
475 fused_activation_function:ActivationFunctionType;
476 dilation_w_factor:int = 1;
477 dilation_h_factor:int = 1;
480 table Pool2DOptions {
486 fused_activation_function:ActivationFunctionType;
489 table DepthwiseConv2DOptions {
490 // Parameters for DepthwiseConv version 1 or above.
494 // `depth_multiplier` is redundant. It's used by CPU kernels in
495 // TensorFlow 2.0 or below, but ignored in versions above.
496 // See comments in lite/c/builtin_op_data.h for more details.
497 depth_multiplier:int;
498 fused_activation_function:ActivationFunctionType;
499 // Parameters for DepthwiseConv version 2 or above.
500 dilation_w_factor:int = 1;
501 dilation_h_factor:int = 1;
504 table ConcatEmbeddingsOptions {
506 num_columns_per_channel:[int];
507 embedding_dim_per_channel:[int]; // This could be inferred from parameters.
510 enum LSHProjectionType: byte {
516 table LSHProjectionOptions {
517 type: LSHProjectionType;
522 fused_activation_function:ActivationFunctionType;
523 // For weights-only quantization, use asymmetric quantization for non
524 // constant inputs at evaluation time.
525 asymmetric_quantize_inputs:bool;
528 // An implementation of TensorFlow RNNCell.
530 fused_activation_function:ActivationFunctionType;
531 asymmetric_quantize_inputs:bool;
534 // An implementation of TensorFlow dynamic_rnn with RNNCell.
535 table SequenceRNNOptions {
537 fused_activation_function:ActivationFunctionType;
538 asymmetric_quantize_inputs:bool;
541 // An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
542 table BidirectionalSequenceRNNOptions {
544 fused_activation_function:ActivationFunctionType;
546 asymmetric_quantize_inputs:bool;
549 enum FullyConnectedOptionsWeightsFormat: byte {
551 SHUFFLED4x16INT8 = 1,
554 // An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
555 table FullyConnectedOptions {
556 // Parameters for FullyConnected version 1 or above.
557 fused_activation_function:ActivationFunctionType;
559 // Parameters for FullyConnected version 2 or above.
560 weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
562 // Parameters for FullyConnected version 5 or above.
563 // If set to true, then the number of dimension is preserved. Furthermore,
564 // all but the last dimension of the input and output shapes will be equal.
567 // Parameters for FullyConnected version 7 or above.
568 // If set to true, then weights-only op will use asymmetric quantization for
570 asymmetric_quantize_inputs: bool;
573 table SoftmaxOptions {
577 // An implementation of TensorFlow concat.
578 table ConcatenationOptions {
580 fused_activation_function:ActivationFunctionType;
584 fused_activation_function:ActivationFunctionType;
588 fused_activation_function:ActivationFunctionType;
591 table L2NormOptions {
592 fused_activation_function:ActivationFunctionType;
595 table LocalResponseNormalizationOptions {
602 enum LSTMKernelType : byte {
603 // Full LSTM kernel which supports peephole and projection.
605 // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
609 // An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
611 // Parameters for LSTM version 1 or above.
612 fused_activation_function:ActivationFunctionType;
613 cell_clip: float; // Optional, 0.0 means no clipping
614 proj_clip: float; // Optional, 0.0 means no clipping
616 // Parameters for LSTM version 2 or above.
617 // Basic kernel is only supported in version 2 or above.
618 kernel_type: LSTMKernelType = FULL;
620 // Parameters for LSTM version 4 or above.
621 asymmetric_quantize_inputs: bool;
624 // An implementation of TensorFlow dynamic_rnn with LSTMCell.
625 table UnidirectionalSequenceLSTMOptions {
626 fused_activation_function:ActivationFunctionType;
627 cell_clip: float; // Optional, 0.0 means no clipping
628 proj_clip: float; // Optional, 0.0 means no clipping
630 // If true then first dimension is sequence, otherwise batch.
633 // Parameter for Unidirectional Sequence LSTM version 4.
634 asymmetric_quantize_inputs:bool;
637 table BidirectionalSequenceLSTMOptions {
638 // Parameters supported by version 1:
639 fused_activation_function:ActivationFunctionType;
640 cell_clip: float; // Optional, 0.0 means no clipping
641 proj_clip: float; // Optional, 0.0 means no clipping
643 // If true, store the outputs of both directions into the first output.
646 // Parameters supported by version 2:
647 // If true then first dimension is sequence, otherwise batch.
648 // Version 1 implementations assumed time_major to be true, so this default
649 // value should never change.
650 time_major: bool = true;
652 // Parameters for version 3 or above.
653 asymmetric_quantize_inputs:bool;
656 table ResizeBilinearOptions {
657 new_height: int (deprecated);
658 new_width: int (deprecated);
660 half_pixel_centers: bool;
663 table ResizeNearestNeighborOptions {
665 half_pixel_centers: bool;
668 // A call operation options
670 // The subgraph index that needs to be called.
680 table ReshapeOptions {
684 table SpaceToBatchNDOptions {
687 table BatchToSpaceNDOptions {
690 table SkipGramOptions {
693 include_all_ngrams: bool;
696 table SpaceToDepthOptions {
700 table DepthToSpaceOptions {
705 fused_activation_function:ActivationFunctionType;
709 fused_activation_function:ActivationFunctionType;
712 table TopKV2Options {
715 enum CombinerType : byte {
721 table EmbeddingLookupSparseOptions {
722 combiner:CombinerType;
725 table GatherOptions {
729 table TransposeOptions {
738 table ReducerOptions {
742 table SqueezeOptions {
750 table SplitVOptions {
754 table StridedSliceOptions {
759 shrink_axis_mask: int;
762 table LogSoftmaxOptions {
766 in_data_type: TensorType;
767 out_data_type: TensorType;
770 table DequantizeOptions {
773 table MaximumMinimumOptions {
779 table ArgMaxOptions {
780 output_type : TensorType;
783 table ArgMinOptions {
784 output_type : TensorType;
787 table GreaterOptions {
790 table GreaterEqualOptions {
796 table LessEqualOptions {
802 table SelectOptions {
808 table TransposeConvOptions {
814 table ExpandDimsOptions {
817 table SparseToDenseOptions {
818 validate_indices:bool;
824 table NotEqualOptions {
828 // Optional output type of the operation (int32 or int64). Defaults to int32.
829 out_type : TensorType;
838 table FakeQuantOptions {
839 // Parameters supported by version 1:
844 // Parameters supported by version 2:
853 table LogicalOrOptions {
856 table OneHotOptions {
864 table HardSwishOptions {
867 table LogicalAndOptions {
870 table LogicalNotOptions {
873 table UnpackOptions {
878 table FloorDivOptions {
881 table SquareOptions {
884 table ZerosLikeOptions {
890 table FloorModOptions {
896 table LeakyReluOptions {
900 table SquaredDifferenceOptions {
903 enum MirrorPadMode : byte {
904 // Doesn't include borders.
910 table MirrorPadOptions {
914 table UniqueOptions {
915 idx_out_type:TensorType = INT32;
918 table ReverseV2Options {
924 table GatherNdOptions {
930 table ReverseSequenceOptions {
935 table MatrixDiagOptions {
938 table QuantizeOptions {
941 table MatrixSetDiagOptions {
945 then_subgraph_index:int;
946 else_subgraph_index:int;
950 cond_subgraph_index:int;
951 body_subgraph_index:int;
954 table NonMaxSuppressionV4Options {
957 table NonMaxSuppressionV5Options {
960 table ScatterNdOptions {
963 table SelectV2Options {
966 table DensifyOptions {
969 table SegmentSumOptions {
972 table BatchMatMulOptions {
977 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
978 // builtin, or a string if the operator is custom.
980 builtin_code:BuiltinOperator;
983 // The version of the operator. The version need to be bumped whenever new
984 // parameters are introduced into an op.
988 enum CustomOptionsFormat : byte {
992 // An operator takes tensors as inputs and outputs. The type of operation being
993 // performed is determined by an index into the list of valid OperatorCodes,
994 // while the specifics of each operations is configured using builtin_options
995 // or custom_options.
997 // Index into the operator_codes array. Using an integer here avoids
998 // complicate map lookups.
1001 // Optional input are indicated by -1.
1005 builtin_options:BuiltinOptions;
1006 custom_options:[ubyte];
1007 custom_options_format:CustomOptionsFormat;
1009 // A list of booleans indicating the input tensors which are being mutated by
1010 // this operator.(e.g. used by RNN and LSTM).
1011 // For example, if the "inputs" array refers to 5 tensors and the second and
1012 // fifth are mutable variables, then this list will contain
1013 // [false, true, false, false, true].
1015 // If the list is empty, no variable is mutated in this operator.
1016 // The list either has the same length as `inputs`, or is empty.
1017 mutating_variable_inputs:[bool];
1019 // A list of indices to the subgraph's "tensors" that are internal to an Op.
1020 // Internal tensors are those that do not flow in or out of the operation,
1021 // but instead are part of internal computation. As such, the operation's
1022 // implementation may manage its memory more efficiently. They are needed
1023 // however (i.e. not just an implementation detail) since they are part of the
1024 // computation, which may require relevant metadata such as quantization
1026 intermediates:[int];
1029 // The root type, defining a subgraph, which typically represents an entire
1032 // A list of all tensors used in this subgraph.
1035 // Indices of the tensors that are inputs into this subgraph. Note this is
1036 // the list of non-static tensors that feed into the subgraph for inference.
1039 // Indices of the tensors that are outputs out of this subgraph. Note this is
1040 // the list of output tensors that are considered the product of the
1041 // subgraph's inference.
1044 // All operators, in execution order.
1045 operators:[Operator];
1047 // Name of this subgraph (used for debugging).
1051 // Table of raw data buffers (used for constant tensors). Referenced by tensors
1052 // by index. The generous alignment accommodates mmap-friendly data structures.
1054 data:[ubyte] (force_align: 16);
1058 // A human readable string to uniquely identify a Metadata.
1060 // An index to the buffers table.
1065 // Version of the schema.
1068 // A list of all operator codes used in this model. This is
1069 // kept in order because operators carry an index into this
1071 operator_codes:[OperatorCode];
1073 // All the subgraphs of the model. The 0th is assumed to be the main
1075 subgraphs:[SubGraph];
1077 // A description of the model.
1080 // Buffers of the model.
1081 // Note the 0th entry of this array must be an empty buffer (sentinel).
1082 // This is a convention so that tensors without a buffer can provide 0 as
1086 // Metadata about the model. Indirects into the existings buffers list.
1087 // Deprecated, prefer to use metadata field.
1088 metadata_buffer:[int];
1090 // Metadata about the model.
1091 metadata:[Metadata];