model-optimizer/mo/front/caffe/proto/mo_caffe.proto

   1 syntax = "proto2";
   2
   3 package mo_caffe;
   4
   5 // Specifies the shape (dimensions) of a Blob.
   6 message BlobShape {
   7   repeated int64 dim = 1 [packed = true];
   8 }
   9
  10 message BlobProto {
  11   optional BlobShape shape = 7;
  12   repeated float data = 5 [packed = true];
  13   repeated float diff = 6 [packed = true];
  14   repeated double double_data = 8 [packed = true];
  15   repeated double double_diff = 9 [packed = true];
  16
  17   // 4D dimensions -- deprecated.  Use "shape" instead.
  18   optional int32 num = 1 [default = 0];
  19   optional int32 channels = 2 [default = 0];
  20   optional int32 height = 3 [default = 0];
  21   optional int32 width = 4 [default = 0];
  22 }
  23
  24 // The BlobProtoVector is simply a way to pass multiple blobproto instances
  25 // around.
  26 message BlobProtoVector {
  27   repeated BlobProto blobs = 1;
  28 }
  29
  30 message CosineSimilarityBatchParameter {
  31   optional double pos_label = 1 [default = 1];
  32   optional double neg_label = 2 [default = -1];
  33 }
  34
  35 message Datum {
  36   optional int32 channels = 1;
  37   optional int32 height = 2;
  38   optional int32 width = 3;
  39   // the actual image data, in bytes
  40   optional bytes data = 4;
  41   optional int32 label = 5;
  42   // Optionally, the datum could also hold float data.
  43   repeated float float_data = 6;
  44   // If true data contains an encoded image that need to be decoded
  45   optional bool encoded = 7 [default = false];
  46 }
  47
  48 // The label (display) name and label id.
  49 message LabelMapItem {
  50   // Both name and label are required.
  51   optional string name = 1;
  52   optional int32 label = 2;
  53   // display_name is optional.
  54   optional string display_name = 3;
  55 }
  56
  57 message LabelMap {
  58   repeated LabelMapItem item = 1;
  59 }
  60
  61 // The normalized bounding box [0, 1] w.r.t. the input image size.
  62 message NormalizedBBox {
  63   optional float xmin = 1;
  64   optional float ymin = 2;
  65   optional float xmax = 3;
  66   optional float ymax = 4;
  67   optional int32 label = 5;
  68   optional bool difficult = 6;
  69   optional float score = 7;
  70   optional float size = 8;
  71 }
  72
  73 message FillerParameter {
  74   // The filler type.
  75   optional string type = 1 [default = 'constant'];
  76   optional float value = 2 [default = 0]; // the value in constant filler
  77   optional float min = 3 [default = 0]; // the min value in uniform filler
  78   optional float max = 4 [default = 1]; // the max value in uniform filler
  79   optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
  80   optional float std = 6 [default = 1]; // the std value in Gaussian filler
  81   // The expected number of non-zero output weights for a given input in
  82   // Gaussian filler -- the default -1 means don't perform sparsification.
  83   optional int32 sparse = 7 [default = -1];
  84   // Normalize the filler variance by fan_in, fan_out, or their average.
  85   // Applies to 'xavier' and 'msra' fillers.
  86   enum VarianceNorm {
  87     FAN_IN = 0;
  88     FAN_OUT = 1;
  89     AVERAGE = 2;
  90   }
  91   optional VarianceNorm variance_norm = 8 [default = FAN_IN];
  92
  93   // added by Kaichun Mo
  94   optional string file = 9;
  95   repeated float diag_val = 10;
  96 }
  97
  98 message NetParameter {
  99   optional string name = 1; // consider giving the network a name
 100   // DEPRECATED. See InputParameter. The input blobs to the network.
 101   repeated string input = 3;
 102   // DEPRECATED. See InputParameter. The shape of the input blobs.
 103   repeated BlobShape input_shape = 8;
 104
 105   // 4D input dimensions -- deprecated.  Use "input_shape" instead.
 106   // If specified, for each input blob there should be four
 107   // values specifying the num, channels, height and width of the input blob.
 108   // Thus, there should be a total of (4 * #input) numbers.
 109   repeated int32 input_dim = 4;
 110
 111   // Whether the network will force every layer to carry out backward operation.
 112   // If set False, then whether to carry out backward is determined
 113   // automatically according to the net structure and learning rates.
 114   optional bool force_backward = 5 [default = false];
 115   // The current "state" of the network, including the phase, level, and stage.
 116   // Some layers may be included/excluded depending on this state and the states
 117   // specified in the layers' include and exclude fields.
 118   optional NetState state = 6;
 119
 120   // Print debugging information about results while running Net::Forward,
 121   // Net::Backward, and Net::Update.
 122   optional bool debug_info = 7 [default = false];
 123
 124   optional bool profile_info = 9 [default = false];
 125   optional int32 profile_iter = 10 [default = 50];
 126   optional int32 profile_warmup = 11 [default = 10];
 127
 128   // The layers that make up the net.  Each of their configurations, including
 129   // connectivity and behavior, is specified as a LayerParameter.
 130   repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
 131
 132   // DEPRECATED: use 'layer' instead.
 133   repeated V1LayerParameter layers = 2;
 134 }
 135
 136 // NOTE
 137 // Update the next available ID when you add a new SolverParameter field.
 138 //
 139 // SolverParameter next available ID: 43 (last added: plateau_winsize)
 140 message SolverParameter {
 141   //////////////////////////////////////////////////////////////////////////////
 142   // Specifying the train and test networks
 143   //
 144   // Exactly one train net must be specified using one of the following fields:
 145   //     train_net_param, train_net, net_param, net
 146   // One or more test nets may be specified using any of the following fields:
 147   //     test_net_param, test_net, net_param, net
 148   // If more than one test net field is specified (e.g., both net and
 149   // test_net are specified), they will be evaluated in the field order given
 150   // above: (1) test_net_param, (2) test_net, (3) net_param/net.
 151   // A test_iter must be specified for each test_net.
 152   // A test_level and/or a test_stage may also be specified for each test_net.
 153   //////////////////////////////////////////////////////////////////////////////
 154
 155   // Proto filename for the train net, possibly combined with one or more
 156   // test nets.
 157   optional string net = 24;
 158   // Inline train net param, possibly combined with one or more test nets.
 159   optional NetParameter net_param = 25;
 160
 161   optional string train_net = 1; // Proto filename for the train net.
 162   repeated string test_net = 2; // Proto filenames for the test nets.
 163   optional NetParameter train_net_param = 21; // Inline train net params.
 164   repeated NetParameter test_net_param = 22; // Inline test net params.
 165
 166   // The states for the train/test nets. Must be unspecified or
 167   // specified once per net.
 168   //
 169   // By default, all states will have solver = true;
 170   // train_state will have phase = TRAIN,
 171   // and all test_state's will have phase = TEST.
 172   // Other defaults are set according to the NetState defaults.
 173   optional NetState train_state = 26;
 174   repeated NetState test_state = 27;
 175
 176   // The number of iterations for each test net.
 177   repeated int32 test_iter = 3;
 178
 179   // The number of iterations between two testing phases.
 180   optional int32 test_interval = 4 [default = 0];
 181   optional bool test_compute_loss = 19 [default = false];
 182   // If true, run an initial test pass before the first iteration,
 183   // ensuring memory availability and printing the starting value of the loss.
 184   optional bool test_initialization = 32 [default = true];
 185   optional float base_lr = 5; // The base learning rate
 186   // the number of iterations between displaying info. If display = 0, no info
 187   // will be displayed.
 188   optional int32 display = 6;
 189   // Display the loss averaged over the last average_loss iterations
 190   optional int32 average_loss = 33 [default = 1];
 191   optional int32 max_iter = 7; // the maximum number of iterations
 192   // accumulate gradients over `iter_size` x `batch_size` instances
 193   optional int32 iter_size = 36 [default = 1];
 194
 195   // The learning rate decay policy. The currently implemented learning rate
 196   // policies are as follows:
 197   //    - fixed: always return base_lr.
 198   //    - step: return base_lr * gamma ^ (floor(iter / step))
 199   //    - exp: return base_lr * gamma ^ iter
 200   //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
 201   //    - multistep: similar to step but it allows non uniform steps defined by
 202   //      stepvalue
 203   //    - poly: the effective learning rate follows a polynomial decay, to be
 204   //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
 205   //    - sigmoid: the effective learning rate follows a sigmod decay
 206   //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
 207   //    - plateau: decreases lr
 208   //              if the minimum loss isn't updated for 'plateau_winsize' iters
 209   //
 210   // where base_lr, max_iter, gamma, step, stepvalue and power are defined
 211   // in the solver parameter protocol buffer, and iter is the current iteration.
 212   optional string lr_policy = 8;
 213   optional float gamma = 9; // The parameter to compute the learning rate.
 214   optional float power = 10; // The parameter to compute the learning rate.
 215   optional float momentum = 11; // The momentum value.
 216   optional float weight_decay = 12; // The weight decay.
 217   // regularization types supported: L1 and L2
 218   // controlled by weight_decay
 219   optional string regularization_type = 29 [default = "L2"];
 220   // the stepsize for learning rate policy "step"
 221   optional int32 stepsize = 13;
 222   // the stepsize for learning rate policy "multistep"
 223   repeated int32 stepvalue = 34;
 224   // the stepsize for learning rate policy "plateau"
 225   repeated int32 plateau_winsize = 42;
 226
 227   // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
 228   // whenever their actual L2 norm is larger.
 229   optional float clip_gradients = 35 [default = -1];
 230
 231   optional int32 snapshot = 14 [default = 0]; // The snapshot interval
 232   optional string snapshot_prefix = 15; // The prefix for the snapshot.
 233   // whether to snapshot diff in the results or not. Snapshotting diff will help
 234   // debugging but the final protocol buffer size will be much larger.
 235   optional bool snapshot_diff = 16 [default = false];
 236   enum SnapshotFormat {
 237     HDF5 = 0;
 238     BINARYPROTO = 1;
 239   }
 240   optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
 241   // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
 242   enum SolverMode {
 243     CPU = 0;
 244     GPU = 1;
 245   }
 246   optional SolverMode solver_mode = 17 [default = GPU];
 247   // the device_id will that be used in GPU mode. Use device_id = 0 in default.
 248   optional int32 device_id = 18 [default = 0];
 249   // If non-negative, the seed with which the Solver will initialize the Caffe
 250   // random number generator -- useful for reproducible results. Otherwise,
 251   // (and by default) initialize using a seed derived from the system clock.
 252   optional int64 random_seed = 20 [default = -1];
 253
 254   // type of the solver
 255   optional string type = 40 [default = "SGD"];
 256
 257   // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
 258   optional float delta = 31 [default = 1e-8];
 259   // parameters for the Adam solver
 260   optional float momentum2 = 39 [default = 0.999];
 261
 262   // RMSProp decay value
 263   // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
 264   optional float rms_decay = 38 [default = 0.99];
 265
 266   // If true, print information about the state of the net that may help with
 267   // debugging learning problems.
 268   optional bool debug_info = 23 [default = false];
 269
 270   // If false, don't save a snapshot after training finishes.
 271   optional bool snapshot_after_train = 28 [default = true];
 272
 273   // DEPRECATED: old solver enum types, use string instead
 274   enum SolverType {
 275     SGD = 0;
 276     NESTEROV = 1;
 277     ADAGRAD = 2;
 278     RMSPROP = 3;
 279     ADADELTA = 4;
 280     ADAM = 5;
 281   }
 282   // DEPRECATED: use type instead of solver_type
 283   optional SolverType solver_type = 30 [default = SGD];
 284
 285   // Overlap compute and communication for data parallel training
 286   optional bool layer_wise_reduce = 41 [default = true];
 287 }
 288
 289 // A message that stores the solver snapshots
 290 message SolverState {
 291   optional int32 iter = 1; // The current iteration
 292   optional string learned_net = 2; // The file that stores the learned net.
 293   repeated BlobProto history = 3; // The history for sgd solvers
 294   optional int32 current_step = 4 [default = 0]; // The current step for learning rate
 295   optional float minimum_loss = 5 [default = 1E38]; // Historical minimum loss
 296   optional int32 iter_last_event = 6 [default = 0]; // The iteration when last lr-update or min_loss-update happend
 297 }
 298
 299 enum Phase {
 300    TRAIN = 0;
 301    TEST = 1;
 302 }
 303
 304 message NetState {
 305   optional Phase phase = 1 [default = TEST];
 306   optional int32 level = 2 [default = 0];
 307   repeated string stage = 3;
 308 }
 309
 310 message NetStateRule {
 311   // Set phase to require the NetState have a particular phase (TRAIN or TEST)
 312   // to meet this rule.
 313   optional Phase phase = 1;
 314
 315   // Set the minimum and/or maximum levels in which the layer should be used.
 316   // Leave undefined to meet the rule regardless of level.
 317   optional int32 min_level = 2;
 318   optional int32 max_level = 3;
 319
 320   // Customizable sets of stages to include or exclude.
 321   // The net must have ALL of the specified stages and NONE of the specified
 322   // "not_stage"s to meet the rule.
 323   // (Use multiple NetStateRules to specify conjunctions of stages.)
 324   repeated string stage = 4;
 325   repeated string not_stage = 5;
 326 }
 327
 328
 329 // added by Kaichun Mo
 330 message SpatialTransformerParameter {
 331   // How to use the parameter passed by localisation network
 332   optional string transform_type = 1 [default = "affine"];
 333   // What is the sampling technique
 334   optional string sampler_type = 2 [default = "bilinear"];
 335
 336   // If not set,stay same with the input dimension H and W
 337   optional int32 output_H = 3;
 338   optional int32 output_W = 4;
 339
 340   // If false, only compute dTheta, DO NOT compute dU
 341   optional bool to_compute_dU = 5 [default = true];
 342
 343   // The default value for some parameters
 344   optional double theta_1_1 = 6;
 345   optional double theta_1_2 = 7;
 346   optional double theta_1_3 = 8;
 347   optional double theta_2_1 = 9;
 348   optional double theta_2_2 = 10;
 349   optional double theta_2_3 = 11;
 350
 351   optional bool de_transform = 12 [default = false];
 352 }
 353
 354 // added by Kaichun Mo
 355 message PowerFileParameter {
 356
 357         optional string shift_file = 1;
 358 }
 359
 360 // added by Kaichun Mo
 361 message STLossParameter {
 362
 363         // Indicate the resolution of the output images after ST transformation
 364         required int32 output_H = 1;
 365         required int32 output_W = 2;
 366 }
 367
 368 // added by Kaichun Mo
 369 message LocLossParameter {
 370
 371         required double threshold = 1;
 372 }
 373
 374 // Specifies training parameters (multipliers on global learning constants,
 375 // and the name and other settings used for weight sharing).
 376 message ParamSpec {
 377   // The names of the parameter blobs -- useful for sharing parameters among
 378   // layers, but never required otherwise.  To share a parameter between two
 379   // layers, give it a (non-empty) name.
 380   optional string name = 1;
 381
 382   // Whether to require shared weights to have the same shape, or just the same
 383   // count -- defaults to STRICT if unspecified.
 384   optional DimCheckMode share_mode = 2;
 385   enum DimCheckMode {
 386     // STRICT (default) requires that num, channels, height, width each match.
 387     STRICT = 0;
 388     // PERMISSIVE requires only the count (num*channels*height*width) to match.
 389     PERMISSIVE = 1;
 390   }
 391
 392   // The multiplier on the global learning rate for this parameter.
 393   optional float lr_mult = 3 [default = 1.0];
 394
 395   // The multiplier on the global weight decay for this parameter.
 396   optional float decay_mult = 4 [default = 1.0];
 397 }
 398
 399 // NOTE
 400 // Update the next available ID when you add a new LayerParameter field.
 401 //
 402 // LayerParameter next available layer-specific ID: 216 (last added: reorg_yolo_param)
 403 message LayerParameter {
 404   optional string name = 1; // the layer name
 405   optional string type = 2; // the layer type
 406   repeated string bottom = 3; // the name of each bottom blob
 407   repeated string top = 4; // the name of each top blob
 408
 409   // The train / test phase for computation.
 410   optional Phase phase = 10;
 411
 412   // The amount of weight to assign each top blob in the objective.
 413   // Each layer assigns a default value, usually of either 0 or 1,
 414   // to each top blob.
 415   repeated float loss_weight = 5;
 416
 417   // Specifies training parameters (multipliers on global learning constants,
 418   // and the name and other settings used for weight sharing).
 419   repeated ParamSpec param = 6;
 420
 421   // The blobs containing the numeric parameters of the layer.
 422   repeated BlobProto blobs = 7;
 423
 424   // Specifies whether to backpropagate to each bottom. If unspecified,
 425   // Caffe will automatically infer whether each input needs backpropagation
 426   // to compute parameter gradients. If set to true for some inputs,
 427   // backpropagation to those inputs is forced; if set false for some inputs,
 428   // backpropagation to those inputs is skipped.
 429   //
 430   // The size must be either 0 or equal to the number of bottoms.
 431   repeated bool propagate_down = 11;
 432
 433   // Rules controlling whether and when a layer is included in the network,
 434   // based on the current NetState.  You may specify a non-zero number of rules
 435   // to include OR exclude, but not both.  If no include or exclude rules are
 436   // specified, the layer is always included.  If the current NetState meets
 437   // ANY (i.e., one or more) of the specified rules, the layer is
 438   // included/excluded.
 439   repeated NetStateRule include = 8;
 440   repeated NetStateRule exclude = 9;
 441
 442   // Parameters for data pre-processing.
 443   optional TransformationParameter transform_param = 100;
 444
 445   // Parameters shared by loss layers.
 446   optional LossParameter loss_param = 101;
 447
 448   // Layer type-specific parameters.
 449   //
 450   // Note: certain layers may have more than one computational engine
 451   // for their implementation. These layers include an Engine type and
 452   // engine parameter for selecting the implementation.
 453   // The default for the engine is set by the ENGINE switch at compile-time.
 454   optional AccuracyParameter accuracy_param = 102;
 455   optional ArgMaxParameter argmax_param = 103;
 456   optional BatchNormParameter batch_norm_param = 139;
 457   optional BiasParameter bias_param = 141;
 458   optional ChannelPermutationParameter channel_permutation_param = 8082;
 459   optional ConcatParameter concat_param = 104;
 460   optional ContrastiveLossParameter contrastive_loss_param = 105;
 461   optional ConvolutionParameter convolution_param = 106;
 462   optional CropParameter crop_param = 144;
 463   optional CTCDecoderParameter ctc_decoder_param = 149;
 464   optional CTCLossParameter ctc_loss_param = 148;
 465   optional DataParameter data_param = 107;
 466   optional DropoutParameter dropout_param = 108;
 467   optional DummyDataParameter dummy_data_param = 109;
 468   optional EltwiseParameter eltwise_param = 110;
 469   optional ELUParameter elu_param = 140;
 470   optional EmbedParameter embed_param = 137;
 471   optional ExpParameter exp_param = 111;
 472   optional FlattenParameter flatten_param = 135;
 473   optional GRNParameter grn_param = 213;
 474   optional HDF5DataParameter hdf5_data_param = 112;
 475   optional HDF5OutputParameter hdf5_output_param = 113;
 476   optional HingeLossParameter hinge_loss_param = 114;
 477   optional ImageDataParameter image_data_param = 115;
 478   optional InfogainLossParameter infogain_loss_param = 116;
 479   optional InnerProductParameter inner_product_param = 117;
 480   optional InputParameter input_param = 143;
 481   optional LogParameter log_param = 134;
 482   optional LRNParameter lrn_param = 118;
 483   optional MemoryDataParameter memory_data_param = 119;
 484   optional MVNParameter mvn_param = 120;
 485   optional ParameterParameter parameter_param = 145;
 486   optional PoolingParameter pooling_param = 121;
 487   optional PermuteParameter permute_param = 154;
 488   optional PowerParameter power_param = 122;
 489   optional PReLUParameter prelu_param = 131;
 490   optional PythonParameter python_param = 130;
 491   optional RecurrentParameter recurrent_param = 146;
 492   optional ReductionParameter reduction_param = 136;
 493   optional ReLUParameter relu_param = 123;
 494   optional ReshapeParameter reshape_param = 133;
 495   optional ReverseParameter reverse_param = 147;
 496   optional ScaleParameter scale_param = 142;
 497   optional SigmoidParameter sigmoid_param = 124;
 498   optional SoftmaxParameter softmax_param = 125;
 499   optional SPPParameter spp_param = 132;
 500   optional SliceParameter slice_param = 126;
 501   optional TanHParameter tanh_param = 127;
 502   optional ThresholdParameter threshold_param = 128;
 503   optional TileParameter tile_param = 138;
 504   optional WindowDataParameter window_data_param = 129;
 505
 506   // added by Kaichun Mo
 507   optional SpatialTransformerParameter st_param = 150;
 508   optional STLossParameter st_loss_param = 151;
 509   optional PowerFileParameter power_file_param = 152;
 510   optional LocLossParameter loc_loss_param = 153;
 511
 512   optional ProposalParameter proposal_param = 201;
 513   optional CosineSimilarityBatchParameter cosine_similarity_batch_param = 202;
 514   optional RandomSamplingSoftmaxLossParameter rss_loss_param = 203;
 515   optional NormalizeParameter norm_param = 204;
 516   optional ROIWarpingParameter roi_warping_param = 205;
 517   optional PSROIPoolingParameter psroi_pooling_param = 207;
 518   optional ROIPoolingParameter roi_pooling_param = 208;
 519   optional SmoothL1LossParameter smooth_l1_loss_param = 209;
 520   optional BoxAnnotatorOHEMParameter box_annotator_ohem_param = 210;
 521   optional DetectionOutputParameter detection_output_param = 211;
 522   optional PriorBoxParameter prior_box_param = 212;
 523
 524   optional RegionYoloParameter region_yolo_param = 214;
 525   optional ReorgYoloParameter reorg_yolo_param = 215;
 526   optional ReLU6Parameter relu6_param = 216;
 527
 528   optional InterpParameter interp_param = 217;
 529
 530   // for FlowNet2
 531   optional AugmentationParameter augmentation_param = 218;
 532   optional CorrelationParameter correlation_param = 219;
 533   optional ResampleParameter resample_param = 220;
 534   optional FlowWarpParameter flow_warp_param = 221;
 535   optional AccumParameter accum_param = 222;
 536   optional CoeffScheduleParameter coeff_schedule_param = 223;
 537
 538   // for Shufflenet v2
 539   optional ShuffleChannelParameter shuffle_channel_param= 224;
 540 }
 541
 542 message InterpParameter {
 543   optional int32 height = 1 [default = 0]; // Height of output
 544   optional int32 width = 2 [default = 0]; // Width of output
 545   optional int32 zoom_factor = 3 [default = 1]; // zoom factor
 546   optional int32 shrink_factor = 4 [default = 1]; // shrink factor
 547   optional int32 pad_beg = 5 [default = 0]; // padding at begin of input
 548   optional int32 pad_end = 6 [default = 0]; // padding at end of input
 549 }
 550
 551 message RandomSamplingSoftmaxLossParameter {
 552   optional int32 random_sampling_num = 1 [default = 100];
 553   optional string random_sampling_policy = 2 [default = "random"];
 554 }
 555
 556 // Message that stores parameters used by ProposalLayer
 557 message ProposalParameter {
 558   optional uint32 feat_stride = 1 [default = 16];
 559   optional uint32 base_size = 2 [default = 16];
 560   optional uint32 min_size = 3 [default = 16];
 561   repeated float ratio = 4;
 562   repeated float scale = 5;
 563   optional uint32 pre_nms_topn = 6 [default = 6000];
 564   optional uint32 post_nms_topn = 7 [default = 300];
 565   optional float nms_thresh = 8 [default = 0.7];
 566 }
 567
 568 // Message that stores parameters used by NormalizeLayer
 569 message NormalizeParameter {
 570   optional bool across_spatial = 1 [default = true];
 571   // Initial value of scale. Default is 1.0 for all
 572   optional FillerParameter scale_filler = 2;
 573   // Whether or not scale parameters are shared across channels.
 574   optional bool channel_shared = 3 [default = true];
 575   // Epsilon for not dividing by zero while normalizing variance
 576   optional float eps = 4 [default = 1e-10];
 577 }
 578
 579 message PermuteParameter {
 580   // The new orders of the axes of data. Notice it should be with
 581   // in the same range as the input data, and it starts from 0.
 582   // Do not provide repeated order.
 583   repeated uint32 order = 1;
 584 }
 585
 586 // Message that stores parameters used to apply transformation
 587 // to the data layer's data
 588 message TransformationParameter {
 589   // For data pre-processing, we can do simple scaling and subtracting the
 590   // data mean, if provided. Note that the mean subtraction is always carried
 591   // out before scaling.
 592   optional float scale = 1 [default = 1];
 593   // Specify if we want to randomly mirror data.
 594   optional bool mirror = 2 [default = false];
 595   // Specify if we would like to randomly crop an image.
 596   optional uint32 crop_size = 3 [default = 0];
 597   // mean_file and mean_value cannot be specified at the same time
 598   optional string mean_file = 4;
 599   // if specified can be repeated once (would subtract it from all the channels)
 600   // or can be repeated the same number of times as channels
 601   // (would subtract them from the corresponding channel)
 602   repeated float mean_value = 5;
 603   // Force the decoded image to have 3 color channels.
 604   optional bool force_color = 6 [default = false];
 605   // Force the decoded image to have 1 color channels.
 606   optional bool force_gray = 7 [default = false];
 607 }
 608
 609 // Message that stores parameters shared by loss layers
 610 message LossParameter {
 611   // If specified, ignore instances with the given label.
 612   optional int32 ignore_label = 1;
 613   // How to normalize the loss for loss layers that aggregate across batches,
 614   // spatial dimensions, or other dimensions.  Currently only implemented in
 615   // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
 616   enum NormalizationMode {
 617     // Divide by the number of examples in the batch times spatial dimensions.
 618     // Outputs that receive the ignore label will NOT be ignored in computing
 619     // the normalization factor.
 620     FULL = 0;
 621     // Divide by the total number of output locations that do not take the
 622     // ignore_label.  If ignore_label is not set, this behaves like FULL.
 623     VALID = 1;
 624     // Divide by the batch size.
 625     BATCH_SIZE = 2;
 626     // Divide by pre-fixed normalizer
 627     PRE_FIXED = 3;
 628     // Do not normalize the loss.
 629     NONE = 4;
 630   }
 631   // For historical reasons, the default normalization for
 632   // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
 633   optional NormalizationMode normalization = 3 [default = VALID];
 634   // Deprecated.  Ignored if normalization is specified.  If normalization
 635   // is not specified, then setting this to false will be equivalent to
 636   // normalization = BATCH_SIZE to be consistent with previous behavior.
 637   optional bool normalize = 2;
 638   //pre-fixed normalizer
 639   optional float pre_fixed_normalizer = 4 [default = 1];
 640   // label frequencies
 641   optional bool weight_by_label_freqs = 5 [default = false];
 642   repeated float class_weighting = 6;
 643 }
 644
 645 // Messages that store parameters used by individual layer types follow, in
 646 // alphabetical order.
 647
 648 message AccuracyParameter {
 649   // When computing accuracy, count as correct by comparing the true label to
 650   // the top k scoring classes.  By default, only compare to the top scoring
 651   // class (i.e. argmax).
 652   optional uint32 top_k = 1 [default = 1];
 653
 654   // The "label" axis of the prediction blob, whose argmax corresponds to the
 655   // predicted label -- may be negative to index from the end (e.g., -1 for the
 656   // last axis).  For example, if axis == 1 and the predictions are
 657   // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
 658   // labels with integer values in {0, 1, ..., C-1}.
 659   optional int32 axis = 2 [default = 1];
 660
 661   // If specified, ignore instances with the given label.
 662   optional int32 ignore_label = 3;
 663 }
 664
 665 message ArgMaxParameter {
 666   // If true produce pairs (argmax, maxval)
 667   optional bool out_max_val = 1 [default = false];
 668   optional uint32 top_k = 2 [default = 1];
 669   // The axis along which to maximise -- may be negative to index from the
 670   // end (e.g., -1 for the last axis).
 671   // By default ArgMaxLayer maximizes over the flattened trailing dimensions
 672   // for each index of the first / num dimension.
 673   optional int32 axis = 3;
 674 }
 675
 676 message ChannelPermutationAction {
 677   // Destination channel.
 678   required uint32 chan = 1;
 679   // Source channel for channel copy operation.  No source channel shall be
 680   // used more than once.
 681   optional uint32 copy = 2;
 682   // Value for channel fill operation (float for both single- and
 683   // double-precision Caffe).
 684   optional float fill = 3;
 685 }
 686
 687 message ChannelPermutationParameter {
 688   // Sequence of actions ordered by increasing value of chan.
 689   // The missing values of chan (i.e. top channel indices) are assumed to be
 690   // copy operations from bottom channels with the same channel index.
 691   repeated ChannelPermutationAction action = 1;
 692
 693   // Number out output channels
 694   required uint32 num_output = 16;
 695   // When true, tells layer that copying/filling channels in-place in
 696   // the given order would give correct result.
 697   optional bool inplace_possible = 17 [default = false];
 698   // Version field is used to check compatibility between layer implementation
 699   // and layer parameters in model prototxt file.
 700   // Version number of this message format is 1.
 701   optional int32 version = 18 [default = 0];
 702 }
 703
 704 message ConcatParameter {
 705   // The axis along which to concatenate -- may be negative to index from the
 706   // end (e.g., -1 for the last axis).  Other axes must have the
 707   // same dimension for all the bottom blobs.
 708   // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
 709   optional int32 axis = 2 [default = 1];
 710
 711   // DEPRECATED: alias for "axis" -- does not support negative indexing.
 712   optional uint32 concat_dim = 1 [default = 1];
 713 }
 714
 715 message BatchNormParameter {
 716   // If false, accumulate global mean/variance values via a moving average. If
 717   // true, use those accumulated values instead of computing mean/variance
 718   // across the batch.
 719   optional bool use_global_stats = 1;
 720   // How much does the moving average decay each iteration?
 721   optional float moving_average_fraction = 2 [default = .999];
 722   // Small value to add to the variance estimate so that we don't divide by
 723   // zero.
 724   optional float eps = 3 [default = 1e-5];
 725 }
 726
 727 message BoxAnnotatorOHEMParameter {
 728   required uint32 roi_per_img = 1; // number of rois for training
 729   optional int32 ignore_label = 2 [default = -1]; // ignore_label in scoring
 730 }
 731
 732 message BiasParameter {
 733   // The first axis of bottom[0] (the first input Blob) along which to apply
 734   // bottom[1] (the second input Blob).  May be negative to index from the end
 735   // (e.g., -1 for the last axis).
 736   //
 737   // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
 738   // top[0] will have the same shape, and bottom[1] may have any of the
 739   // following shapes (for the given value of axis):
 740   //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
 741   //    (axis == 1 == -3)          3;     3x40;     3x40x60
 742   //    (axis == 2 == -2)                   40;       40x60
 743   //    (axis == 3 == -1)                                60
 744   // Furthermore, bottom[1] may have the empty shape (regardless of the value of
 745   // "axis") -- a scalar bias.
 746   optional int32 axis = 1 [default = 1];
 747
 748   // (num_axes is ignored unless just one bottom is given and the bias is
 749   // a learned parameter of the layer.  Otherwise, num_axes is determined by the
 750   // number of axes by the second bottom.)
 751   // The number of axes of the input (bottom[0]) covered by the bias
 752   // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
 753   // Set num_axes := 0, to add a zero-axis Blob: a scalar.
 754   optional int32 num_axes = 2 [default = 1];
 755
 756   // (filler is ignored unless just one bottom is given and the bias is
 757   // a learned parameter of the layer.)
 758   // The initialization for the learned bias parameter.
 759   // Default is the zero (0) initialization, resulting in the BiasLayer
 760   // initially performing the identity operation.
 761   optional FillerParameter filler = 3;
 762 }
 763
 764 message ContrastiveLossParameter {
 765   // margin for dissimilar pair
 766   optional float margin = 1 [default = 1.0];
 767   // The first implementation of this cost did not exactly match the cost of
 768   // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
 769   // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
 770   // Hadsell paper. New models should probably use this version.
 771   // legacy_version = true uses (margin - d^2). This is kept to support /
 772   // reproduce existing models and results
 773   optional bool legacy_version = 2 [default = false];
 774 }
 775
 776 message ConvolutionParameter {
 777   optional uint32 num_output = 1; // The number of outputs for the layer
 778   optional bool bias_term = 2 [default = true]; // whether to have bias terms
 779
 780   // Pad, kernel size, and stride are all given as a single value for equal
 781   // dimensions in all spatial dimensions, or once per spatial dimension.
 782   repeated uint32 pad = 3; // The padding size; defaults to 0
 783   repeated uint32 kernel_size = 4; // The kernel size
 784   repeated uint32 stride = 6; // The stride; defaults to 1
 785   // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
 786   // holes. (Kernel dilation is sometimes referred to by its use in the
 787   // algorithme à trous from Holschneider et al. 1987.)
 788   repeated uint32 dilation = 18; // The dilation; defaults to 1
 789
 790   // For 2D convolution only, the *_h and *_w versions may also be used to
 791   // specify both spatial dimensions.
 792   optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
 793   optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
 794   optional uint32 kernel_h = 11; // The kernel height (2D only)
 795   optional uint32 kernel_w = 12; // The kernel width (2D only)
 796   optional uint32 stride_h = 13; // The stride height (2D only)
 797   optional uint32 stride_w = 14; // The stride width (2D only)
 798
 799   optional uint32 group = 5 [default = 1]; // The group size for group conv
 800
 801   optional FillerParameter weight_filler = 7; // The filler for the weight
 802   optional FillerParameter bias_filler = 8; // The filler for the bias
 803   enum Engine {
 804     DEFAULT = 0;
 805     CAFFE = 1;
 806     CUDNN = 2;
 807   }
 808   optional Engine engine = 15 [default = DEFAULT];
 809
 810   // The axis to interpret as "channels" when performing convolution.
 811   // Preceding dimensions are treated as independent inputs;
 812   // succeeding dimensions are treated as "spatial".
 813   // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
 814   // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
 815   // groups g>1) filters across the spatial axes (H, W) of the input.
 816   // With (N, C, D, H, W) inputs, and axis == 1, we perform
 817   // N independent 3D convolutions, sliding (C/g)-channels
 818   // filters across the spatial axes (D, H, W) of the input.
 819   optional int32 axis = 16 [default = 1];
 820
 821   // Whether to force use of the general ND convolution, even if a specific
 822   // implementation for blobs of the appropriate number of spatial dimensions
 823   // is available. (Currently, there is only a 2D-specific convolution
 824   // implementation; for input blobs with num_axes != 2, this option is
 825   // ignored and the ND implementation will be used.)
 826   optional bool force_nd_im2col = 17 [default = false];
 827 }
 828
 829 message CropParameter {
 830   // To crop, elements of the first bottom are selected to fit the dimensions
 831   // of the second, reference bottom. The crop is configured by
 832   // - the crop `axis` to pick the dimensions for cropping
 833   // - the crop `offset` to set the shift for all/each dimension
 834   // to align the cropped bottom with the reference bottom.
 835   // All dimensions up to but excluding `axis` are preserved, while
 836   // the dimensions including and trailing `axis` are cropped.
 837   // If only one `offset` is set, then all dimensions are offset by this amount.
 838   // Otherwise, the number of offsets must equal the number of cropped axes to
 839   // shift the crop in each dimension accordingly.
 840   // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
 841   // and `axis` may be negative to index from the end (e.g., -1 for the last
 842   // axis).
 843   optional int32 axis = 1 [default = 2];
 844   repeated uint32 offset = 2;
 845   repeated uint32 dimsize = 3;
 846 }
 847
 848 message CTCDecoderParameter {
 849   // The index of the blank index in the labels. A negative (default)
 850   // value will use the last index
 851   optional int32 blank_index = 1 [default = -1];
 852
 853   // Collapse the repeated labels during the ctc calculation
 854   // e.g. collapse [0bbb11bb11bb0b2] to [01102] instead of [0111102],
 855   // where b means blank label.
 856   // The default behaviour is to merge repeated labels.
 857   // Note: blank labels will be removed in any case.
 858   optional bool ctc_merge_repeated = 2 [default = true];
 859 }
 860
 861 message CTCLossParameter {
 862   // Adds delayed output to the CTC loss calculation (untested!)
 863   optional int32 output_delay = 1 [default = 0];
 864
 865   // The index of the blank index in the labels. A negative (default)
 866   // value will use the last index
 867   optional int32 blank_index = 2 [default = -1];
 868
 869   // Collapse repeating labels of the target sequence before calculating
 870   // the loss and the gradients (e.g. collapse [01102] to [0102])
 871   // The default behaviour is to keep repeated labels. Elsewise the
 872   // network will not learn to predict repetitions.
 873   optional bool preprocess_collapse_repeated = 3 [default = false];
 874
 875   // Collapse the repeated labels during the ctc calculation
 876   // e.g collapse [0bbb11bb11bb0b2] to [01102] instead of [0111102],
 877   // where b means blank label.
 878   // The default behaviour is to merge repeated labels.
 879   // Note: blank labels will be removed in any case.
 880   optional bool ctc_merge_repeated = 4 [default = true];
 881
 882   /// This parameter is for test cases only!
 883   /// The time for which to calculate the loss (see Graves Eq. (7.27) )
 884   /// Note that the result must be the same for each 0 <= t < T
 885   /// Therefore you can chose an arbitrary value, default 0
 886   optional int32 loss_calculation_t = 5 [default = 0];
 887 }
 888
 889 message DataParameter {
 890   enum DB {
 891     LEVELDB = 0;
 892     LMDB = 1;
 893   }
 894   // Specify the data source.
 895   optional string source = 1;
 896   // Specify the batch size.
 897   optional uint32 batch_size = 4;
 898   // The rand_skip variable is for the data layer to skip a few data points
 899   // to avoid all asynchronous sgd clients to start at the same point. The skip
 900   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 901   // be larger than the number of keys in the database.
 902   // DEPRECATED. Each solver accesses a different subset of the database.
 903   optional uint32 rand_skip = 7 [default = 0];
 904   optional DB backend = 8 [default = LEVELDB];
 905   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
 906   // simple scaling and subtracting the data mean, if provided. Note that the
 907   // mean subtraction is always carried out before scaling.
 908   optional float scale = 2 [default = 1];
 909   optional string mean_file = 3;
 910   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
 911   // crop an image.
 912   optional uint32 crop_size = 5 [default = 0];
 913   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
 914   // data.
 915   optional bool mirror = 6 [default = false];
 916   // Force the encoded image to have 3 color channels
 917   optional bool force_encoded_color = 9 [default = false];
 918   // Prefetch queue (Increase if data feeding bandwidth varies, within the
 919   // limit of device memory for GPU training)
 920   optional uint32 prefetch = 10 [default = 4];
 921 }
 922
 923 message NonMaximumSuppressionParameter {
 924   // Threshold to be used in nms.
 925   optional float nms_threshold = 1 [default = 0.3];
 926   // Maximum number of results to be kept.
 927   optional int32 top_k = 2;
 928   // Parameter for adaptive nms.
 929   optional float eta = 3 [default = 1.0];
 930 }
 931
 932 // Message that stores parameters used by data transformer for resize policy
 933 message ResizeParameter {
 934   //Probability of using this resize policy
 935   optional float prob = 1 [default = 1];
 936
 937   enum Resize_mode {
 938     WARP = 1;
 939     FIT_SMALL_SIZE = 2;
 940     FIT_LARGE_SIZE_AND_PAD = 3;
 941   }
 942   optional Resize_mode resize_mode = 2 [default = WARP];
 943   optional uint32 height = 3 [default = 0];
 944   optional uint32 width = 4 [default = 0];
 945   // A parameter used to update bbox in FIT_SMALL_SIZE mode.
 946   optional uint32 height_scale = 8 [default = 0];
 947   optional uint32 width_scale = 9 [default = 0];
 948
 949   enum Pad_mode {
 950     CONSTANT = 1;
 951     MIRRORED = 2;
 952     REPEAT_NEAREST = 3;
 953   }
 954   // Padding mode for BE_SMALL_SIZE_AND_PAD mode and object centering
 955   optional Pad_mode pad_mode = 5 [default = CONSTANT];
 956   // if specified can be repeated once (would fill all the channels)
 957   // or can be repeated the same number of times as channels
 958   // (would use it them to the corresponding channel)
 959   repeated float pad_value = 6;
 960
 961   enum Interp_mode { //Same as in OpenCV
 962     LINEAR = 1;
 963     AREA = 2;
 964     NEAREST = 3;
 965     CUBIC = 4;
 966     LANCZOS4 = 5;
 967   }
 968   //interpolation for for resizing
 969   repeated Interp_mode interp_mode = 7;
 970 }
 971
 972 message SaveOutputParameter {
 973   // Output directory. If not empty, we will save the results.
 974   optional string output_directory = 1;
 975   // Output name prefix.
 976   optional string output_name_prefix = 2;
 977   // Output format.
 978   //    VOC - PASCAL VOC output format.
 979   //    COCO - MS COCO output format.
 980   optional string output_format = 3;
 981   // If you want to output results, must also provide the following two files.
 982   // Otherwise, we will ignore saving results.
 983   // label map file.
 984   optional string label_map_file = 4;
 985   // A file which contains a list of names and sizes with same order
 986   // of the input DB. The file is in the following format:
 987   //    name height width
 988   //    ...
 989   optional string name_size_file = 5;
 990   // Number of test images. It can be less than the lines specified in
 991   // name_size_file. For example, when we only want to evaluate on part
 992   // of the test images.
 993   optional uint32 num_test_image = 6;
 994   // The resize parameter used in saving the data.
 995   optional ResizeParameter resize_param = 7;
 996 }
 997
 998 // Message that store parameters used by DetectionOutputLayer
 999 message DetectionOutputParameter {
1000   // Number of classes to be predicted. Required!
1001   optional uint32 num_classes = 1;
1002   // If true, bounding box are shared among different classes.
1003   optional bool share_location = 2 [default = true];
1004   // Background label id. If there is no background class,
1005   // set it as -1.
1006   optional int32 background_label_id = 3 [default = 0];
1007   // Parameters used for non maximum suppression.
1008   optional NonMaximumSuppressionParameter nms_param = 4;
1009   // Parameters used for saving detection results.
1010   optional SaveOutputParameter save_output_param = 5;
1011   // Type of coding method for bbox.
1012   optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
1013   // If true, variance is encoded in target; otherwise we need to adjust the
1014   // predicted offset accordingly.
1015   optional bool variance_encoded_in_target = 8 [default = false];
1016   // Number of total bboxes to be kept per image after nms step.
1017   // -1 means keeping all bboxes after nms step.
1018   optional int32 keep_top_k = 7 [default = -1];
1019   // Only consider detections whose confidences are larger than a threshold.
1020   // If not provided, consider all boxes.
1021   optional float confidence_threshold = 9;
1022   // If true, visualize the detection results.
1023   optional bool visualize = 10 [default = false];
1024   // The threshold used to visualize the detection results.
1025   optional float visualize_threshold = 11;
1026   // If provided, save outputs to video file.
1027   optional string save_file = 12;
1028   // Input width
1029   optional int32 input_width = 13 [default = -1];
1030   // Input height
1031   optional int32 input_height = 14 [default = -1];
1032   // If false, bboxes need to be normalized
1033   optional bool normalized = 15 [default = true];
1034   //the objectness score is used for the anchor refinement module to filter easy negative anchor.
1035   optional float objectness_score = 16 [default = 0.01];
1036 }
1037
1038 message DropoutParameter {
1039   optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
1040 }
1041
1042 // DummyDataLayer fills any number of arbitrarily shaped blobs with random
1043 // (or constant) data generated by "Fillers" (see "message FillerParameter").
1044 message DummyDataParameter {
1045   // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
1046   // shape fields, and 0, 1 or N data_fillers.
1047   //
1048   // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
1049   // If 1 data_filler is specified, it is applied to all top blobs.  If N are
1050   // specified, the ith is applied to the ith top blob.
1051   repeated FillerParameter data_filler = 1;
1052   repeated BlobShape shape = 6;
1053
1054   // 4D dimensions -- deprecated.  Use "shape" instead.
1055   repeated uint32 num = 2;
1056   repeated uint32 channels = 3;
1057   repeated uint32 height = 4;
1058   repeated uint32 width = 5;
1059 }
1060
1061 message EltwiseParameter {
1062   enum EltwiseOp {
1063     PROD = 0;
1064     SUM = 1;
1065     MAX = 2;
1066   }
1067   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
1068   repeated float coeff = 2; // blob-wise coefficient for SUM operation
1069
1070   // Whether to use an asymptotically slower (for >2 inputs) but stabler method
1071   // of computing the gradient for the PROD operation. (No effect for SUM op.)
1072   optional bool stable_prod_grad = 3 [default = true];
1073 }
1074
1075 // Message that stores parameters used by ELULayer
1076 message ELUParameter {
1077   // Described in:
1078   // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
1079   // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
1080   optional float alpha = 1 [default = 1];
1081 }
1082
1083 // Message that stores parameters used by EmbedLayer
1084 message EmbedParameter {
1085   optional uint32 num_output = 1; // The number of outputs for the layer
1086   // The input is given as integers to be interpreted as one-hot
1087   // vector indices with dimension num_input.  Hence num_input should be
1088   // 1 greater than the maximum possible input value.
1089   optional uint32 input_dim = 2;
1090
1091   optional bool bias_term = 3 [default = true]; // Whether to use a bias term
1092   optional FillerParameter weight_filler = 4; // The filler for the weight
1093   optional FillerParameter bias_filler = 5; // The filler for the bias
1094
1095 }
1096
1097 // Message that stores parameters used by ExpLayer
1098 message ExpParameter {
1099   // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
1100   // Or if base is set to the default (-1), base is set to e,
1101   // so y = exp(shift + scale * x).
1102   optional float base = 1 [default = -1.0];
1103   optional float scale = 2 [default = 1.0];
1104   optional float shift = 3 [default = 0.0];
1105 }
1106
1107 /// Message that stores parameters used by FlattenLayer
1108 message FlattenParameter {
1109   // The first axis to flatten: all preceding axes are retained in the output.
1110   // May be negative to index from the end (e.g., -1 for the last axis).
1111   optional int32 axis = 1 [default = 1];
1112
1113   // The last axis to flatten: all following axes are retained in the output.
1114   // May be negative to index from the end (e.g., the default -1 for the last
1115   // axis).
1116   optional int32 end_axis = 2 [default = -1];
1117 }
1118
1119 // Message that stores parameters used by HDF5DataLayer
1120 message HDF5DataParameter {
1121   // Specify the data source.
1122   optional string source = 1;
1123   // Specify the batch size.
1124   optional uint32 batch_size = 2;
1125
1126   // Specify whether to shuffle the data.
1127   // If shuffle == true, the ordering of the HDF5 files is shuffled,
1128   // and the ordering of data within any given HDF5 file is shuffled,
1129   // but data between different files are not interleaved; all of a file's
1130   // data are output (in a random order) before moving onto another file.
1131   optional bool shuffle = 3 [default = false];
1132 }
1133
1134 message HDF5OutputParameter {
1135   optional string file_name = 1;
1136 }
1137
1138 message HingeLossParameter {
1139   enum Norm {
1140     L1 = 1;
1141     L2 = 2;
1142   }
1143   // Specify the Norm to use L1 or L2
1144   optional Norm norm = 1 [default = L1];
1145 }
1146
1147 message ImageDataParameter {
1148   // Specify the data source.
1149   optional string source = 1;
1150   // Specify the batch size.
1151   optional uint32 batch_size = 4 [default = 1];
1152   // The rand_skip variable is for the data layer to skip a few data points
1153   // to avoid all asynchronous sgd clients to start at the same point. The skip
1154   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1155   // be larger than the number of keys in the database.
1156   optional uint32 rand_skip = 7 [default = 0];
1157   // Whether or not ImageLayer should shuffle the list of files at every epoch.
1158   optional bool shuffle = 8 [default = false];
1159   // It will also resize images if new_height or new_width are not zero.
1160   optional uint32 new_height = 9 [default = 0];
1161   optional uint32 new_width = 10 [default = 0];
1162   // Specify if the images are color or gray
1163   optional bool is_color = 11 [default = true];
1164   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
1165   // simple scaling and subtracting the data mean, if provided. Note that the
1166   // mean subtraction is always carried out before scaling.
1167   optional float scale = 2 [default = 1];
1168   optional string mean_file = 3;
1169   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
1170   // crop an image.
1171   optional uint32 crop_size = 5 [default = 0];
1172   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
1173   // data.
1174   optional bool mirror = 6 [default = false];
1175   optional string root_folder = 12 [default = ""];
1176 }
1177
1178 message InfogainLossParameter {
1179   // Specify the infogain matrix source.
1180   optional string source = 1;
1181 }
1182
1183 message InnerProductParameter {
1184   optional uint32 num_output = 1; // The number of outputs for the layer
1185   optional bool bias_term = 2 [default = true]; // whether to have bias terms
1186   optional FillerParameter weight_filler = 3; // The filler for the weight
1187   optional FillerParameter bias_filler = 4; // The filler for the bias
1188
1189   // The first axis to be lumped into a single inner product computation;
1190   // all preceding axes are retained in the output.
1191   // May be negative to index from the end (e.g., -1 for the last axis).
1192   optional int32 axis = 5 [default = 1];
1193   // Specify whether to transpose the weight matrix or not.
1194   // If transpose == true, any operations will be performed on the transpose
1195   // of the weight matrix. The weight matrix itself is not going to be transposed
1196   // but rather the transfer flag of operations will be toggled accordingly.
1197   optional bool transpose = 6 [default = false];
1198 }
1199
1200 message InputParameter {
1201   // This layer produces N >= 1 top blob(s) to be assigned manually.
1202   // Define N shapes to set a shape for each top.
1203   // Define 1 shape to set the same shape for every top.
1204   // Define no shape to defer to reshaping manually.
1205   repeated BlobShape shape = 1;
1206 }
1207
1208 // Message that stores parameters used by LogLayer
1209 message LogParameter {
1210   // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
1211   // Or if base is set to the default (-1), base is set to e,
1212   // so y = ln(shift + scale * x) = log_e(shift + scale * x)
1213   optional float base = 1 [default = -1.0];
1214   optional float scale = 2 [default = 1.0];
1215   optional float shift = 3 [default = 0.0];
1216 }
1217
1218 // Message that stores parameters used by LRNLayer
1219 message LRNParameter {
1220   optional uint32 local_size = 1 [default = 5];
1221   optional float alpha = 2 [default = 1.];
1222   optional float beta = 3 [default = 0.75];
1223   enum NormRegion {
1224     ACROSS_CHANNELS = 0;
1225     WITHIN_CHANNEL = 1;
1226   }
1227   optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
1228   optional float k = 5 [default = 1.];
1229   enum Engine {
1230     DEFAULT = 0;
1231     CAFFE = 1;
1232     CUDNN = 2;
1233   }
1234   optional Engine engine = 6 [default = DEFAULT];
1235 }
1236
1237 // Message that stores parameters used by GRNLayer (across channels only)
1238 message GRNParameter {
1239   optional float bias = 1 [default = 1.];
1240 }
1241
1242 message MemoryDataParameter {
1243   optional uint32 batch_size = 1;
1244   optional uint32 channels = 2;
1245   optional uint32 height = 3;
1246   optional uint32 width = 4;
1247 }
1248
1249 message MVNParameter {
1250   // This parameter can be set to false to normalize mean only
1251   optional bool normalize_variance = 1 [default = true];
1252
1253   // This parameter can be set to true to perform DNN-like MVN
1254   optional bool across_channels = 2 [default = false];
1255
1256   // Epsilon for not dividing by zero while normalizing variance
1257   optional float eps = 3 [default = 1e-9];
1258 }
1259
1260 message ParameterParameter {
1261   optional BlobShape shape = 1;
1262 }
1263
1264 message PoolingParameter {
1265   enum PoolMethod {
1266     MAX = 0;
1267     AVE = 1;
1268     STOCHASTIC = 2;
1269   }
1270   optional PoolMethod pool = 1 [default = MAX]; // The pooling method
1271   // Pad, kernel size, and stride are all given as a single value for equal
1272   // dimensions in height and width or as Y, X pairs.
1273   optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
1274   optional uint32 pad_h = 9 [default = 0]; // The padding height
1275   optional uint32 pad_w = 10 [default = 0]; // The padding width
1276   optional uint32 kernel_size = 2; // The kernel size (square)
1277   optional uint32 kernel_h = 5; // The kernel height
1278   optional uint32 kernel_w = 6; // The kernel width
1279   optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
1280   optional uint32 stride_h = 7; // The stride height
1281   optional uint32 stride_w = 8; // The stride width
1282   enum Engine {
1283     DEFAULT = 0;
1284     CAFFE = 1;
1285     CUDNN = 2;
1286   }
1287   optional Engine engine = 11 [default = DEFAULT];
1288   // If global_pooling then it will pool over the size of the bottom by doing
1289   // kernel_h = bottom->height and kernel_w = bottom->width
1290   optional bool global_pooling = 12 [default = false];
1291   optional bool ceil_mode = 13 [default = true];
1292 }
1293
1294 message PowerParameter {
1295   // PowerLayer computes outputs y = (shift + scale * x) ^ power.
1296   optional float power = 1 [default = 1.0];
1297   optional float scale = 2 [default = 1.0];
1298   optional float shift = 3 [default = 0.0];
1299 }
1300
1301 // Message that store parameters used by PriorBoxLayer
1302 message PriorBoxParameter {
1303   // Encode/decode type.
1304   enum CodeType {
1305     CORNER = 1;
1306     CENTER_SIZE = 2;
1307     CORNER_SIZE = 3;
1308   }
1309   // Minimum box size (in pixels). Required!
1310   repeated float min_size = 1;
1311   // Maximum box size (in pixels). Required!
1312   repeated float max_size = 2;
1313   // Various of aspect ratios. Duplicate ratios will be ignored.
1314   // If none is provided, we use default ratio 1.
1315   repeated float aspect_ratio = 3;
1316   // If true, will flip each aspect ratio.
1317   // For example, if there is aspect ratio "r",
1318   // we will generate aspect ratio "1.0/r" as well.
1319   optional bool flip = 4 [default = true];
1320   // If true, will clip the prior so that it is within [0, 1]
1321   optional bool clip = 5 [default = false];
1322   // Variance for adjusting the prior bboxes.
1323   repeated float variance = 6;
1324   // By default, we calculate img_height, img_width, step_x, step_y based on
1325   // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
1326   // provided.
1327   // Explicitly provide the img_size.
1328   optional uint32 img_size = 7;
1329   // Either img_size or img_h/img_w should be specified; not both.
1330   optional uint32 img_h = 8;
1331   optional uint32 img_w = 9;
1332
1333   // Explicitly provide the step size.
1334   optional float step = 10;
1335   // Either step or step_h/step_w should be specified; not both.
1336   optional float step_h = 11;
1337   optional float step_w = 12;
1338
1339   // Offset to the top left corner of each cell.
1340   optional float offset = 13 [default = 0.5];
1341
1342   // width (in pixels).
1343   repeated float width = 14;
1344   // height (in pixels).
1345   repeated float height = 15;
1346 }
1347
1348 message PSROIPoolingParameter {
1349    required float spatial_scale = 1;
1350    required int32 output_dim = 2; // output channel number
1351    required int32 group_size = 3; // number of groups to encode position-sensitive score maps
1352  }
1353
1354 message PythonParameter {
1355   optional string module = 1;
1356   optional string layer = 2;
1357   // This value is set to the attribute `param_str` of the `PythonLayer` object
1358   // in Python before calling the `setup()` method. This could be a number,
1359   // string, dictionary in Python dict format, JSON, etc. You may parse this
1360   // string in `setup` method and use it in `forward` and `backward`.
1361   optional string param_str = 3 [default = ''];
1362   // Whether this PythonLayer is shared among worker solvers during data parallelism.
1363   // If true, each worker solver sequentially run forward from this layer.
1364   // This value should be set true if you are using it as a data layer.
1365   optional bool share_in_parallel = 4 [default = false];
1366 }
1367
1368 // Message that stores parameters used by RecurrentLayer
1369 message RecurrentParameter {
1370   // The dimension of the output (and usually hidden state) representation --
1371   // must be explicitly set to non-zero.
1372   optional uint32 num_output = 1 [default = 0];
1373
1374   optional FillerParameter weight_filler = 2; // The filler for the weight
1375   optional FillerParameter bias_filler = 3; // The filler for the bias
1376
1377   // Whether to enable displaying debug_info in the unrolled recurrent net.
1378   optional bool debug_info = 4 [default = false];
1379
1380   // Whether to add as additional inputs (bottoms) the initial hidden state
1381   // blobs, and add as additional outputs (tops) the final timestep hidden state
1382   // blobs.  The number of additional bottom/top blobs required depends on the
1383   // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
1384   optional bool expose_hidden = 5 [default = false];
1385 }
1386
1387 // Message that stores parameters used by ReductionLayer
1388 message ReductionParameter {
1389   enum ReductionOp {
1390     SUM = 1;
1391     ASUM = 2;
1392     SUMSQ = 3;
1393     MEAN = 4;
1394   }
1395
1396   optional ReductionOp operation = 1 [default = SUM]; // reduction operation
1397
1398   // The first axis to reduce to a scalar -- may be negative to index from the
1399   // end (e.g., -1 for the last axis).
1400   // (Currently, only reduction along ALL "tail" axes is supported; reduction
1401   // of axis M through N, where N < num_axes - 1, is unsupported.)
1402   // Suppose we have an n-axis bottom Blob with shape:
1403   //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
1404   // If axis == m, the output Blob will have shape
1405   //     (d0, d1, d2, ..., d(m-1)),
1406   // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
1407   // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
1408   // If axis == 0 (the default), the output Blob always has the empty shape
1409   // (count 1), performing reduction across the entire input --
1410   // often useful for creating new loss functions.
1411   optional int32 axis = 2 [default = 0];
1412
1413   optional float coeff = 3 [default = 1.0]; // coefficient for output
1414 }
1415
1416 // Message that stores parameters used by ReLULayer
1417 message ReLUParameter {
1418   // Allow non-zero slope for negative inputs to speed up optimization
1419   // Described in:
1420   // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
1421   // improve neural network acoustic models. In ICML Workshop on Deep Learning
1422   // for Audio, Speech, and Language Processing.
1423   optional float negative_slope = 1 [default = 0];
1424   enum Engine {
1425     DEFAULT = 0;
1426     CAFFE = 1;
1427     CUDNN = 2;
1428   }
1429   optional Engine engine = 2 [default = DEFAULT];
1430 }
1431
1432 message ReLU6Parameter {
1433   // Allows to limit ReLU activation from the top and clip by specified value
1434   //
1435   optional float n = 1 [default = 6];
1436 }
1437
1438 message ReshapeParameter {
1439   // Specify the output dimensions. If some of the dimensions are set to 0,
1440   // the corresponding dimension from the bottom layer is used (unchanged).
1441   // Exactly one dimension may be set to -1, in which case its value is
1442   // inferred from the count of the bottom blob and the remaining dimensions.
1443   // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
1444   //
1445   //   layer {
1446   //     type: "Reshape" bottom: "input" top: "output"
1447   //     reshape_param { ... }
1448   //   }
1449   //
1450   // If "input" is 2D with shape 2 x 8, then the following reshape_param
1451   // specifications are all equivalent, producing a 3D blob "output" with shape
1452   // 2 x 2 x 4:
1453   //
1454   //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
1455   //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
1456   //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
1457   //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
1458   //
1459   optional BlobShape shape = 1;
1460
1461   // axis and num_axes control the portion of the bottom blob's shape that are
1462   // replaced by (included in) the reshape. By default (axis == 0 and
1463   // num_axes == -1), the entire bottom blob shape is included in the reshape,
1464   // and hence the shape field must specify the entire output shape.
1465   //
1466   // axis may be non-zero to retain some portion of the beginning of the input
1467   // shape (and may be negative to index from the end; e.g., -1 to begin the
1468   // reshape after the last axis, including nothing in the reshape,
1469   // -2 to include only the last axis, etc.).
1470   //
1471   // For example, suppose "input" is a 2D blob with shape 2 x 8.
1472   // Then the following ReshapeLayer specifications are all equivalent,
1473   // producing a blob "output" with shape 2 x 2 x 4:
1474   //
1475   //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
1476   //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
1477   //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
1478   //
1479   // num_axes specifies the extent of the reshape.
1480   // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
1481   // input axes in the range [axis, axis+num_axes].
1482   // num_axes may also be -1, the default, to include all remaining axes
1483   // (starting from axis).
1484   //
1485   // For example, suppose "input" is a 2D blob with shape 2 x 8.
1486   // Then the following ReshapeLayer specifications are equivalent,
1487   // producing a blob "output" with shape 1 x 2 x 8.
1488   //
1489   //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
1490   //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
1491   //   reshape_param { shape { dim:  1  }  num_axes: 0 }
1492   //
1493   // On the other hand, these would produce output blob shape 2 x 1 x 8:
1494   //
1495   //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
1496   //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
1497   //
1498   optional int32 axis = 2 [default = 0];
1499   optional int32 num_axes = 3 [default = -1];
1500 }
1501
1502 message ReverseParameter {
1503   // axis controls the data axis which shall be inverted.
1504   // The layout of the content will not be inverted
1505   //
1506   // The default axis is 0 that means:
1507   //   data_previous[n] == data_afterwards[N - n -1]
1508   // where N is the shape of axis(n)
1509   //
1510   // Usually this layer will be used with recurrent layers to invert the
1511   // time axis which is axis 0
1512   // This layer will therefore swap the order in time but not the
1513   // order of the actual data.
1514   optional int32 axis = 1 [default = 0];
1515 }
1516
1517 // Message that stores parameters used by ROIPoolingLayer
1518 message ROIPoolingParameter {
1519   // Pad, kernel size, and stride are all given as a single value for equal
1520   // dimensions in height and width or as Y, X pairs.
1521   optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
1522   optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
1523   // Multiplicative spatial scale factor to translate ROI coords from their
1524   // input scale to the scale used when pooling
1525   optional float spatial_scale = 3 [default = 1];
1526 }
1527
1528 message ROIWarpingTestParameter {
1529   // Pad, kernel size, and stride are all given as a single value for equal
1530   // dimensions in height and width or as Y, X pairs.
1531   optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
1532   optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
1533   // Multiplicative spatial scale factor to translate ROI coords from their
1534   // input scale to the scale used when pooling
1535   optional float spatial_scale = 3 [default = 1];
1536 }
1537 message ROIWarpingParameter {
1538   // Pad, kernel size, and stride are all given as a single value for equal
1539   // dimensions in height and width or as Y, X pairs.
1540   optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
1541   optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
1542   // Multiplicative spatial scale factor to translate ROI coords from their
1543   // input scale to the scale used when pooling
1544   optional float spatial_scale = 3 [default = 1];
1545 }
1546
1547 message ScaleParameter {
1548   // The first axis of bottom[0] (the first input Blob) along which to apply
1549   // bottom[1] (the second input Blob).  May be negative to index from the end
1550   // (e.g., -1 for the last axis).
1551   //
1552   // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
1553   // top[0] will have the same shape, and bottom[1] may have any of the
1554   // following shapes (for the given value of axis):
1555   //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
1556   //    (axis == 1 == -3)          3;     3x40;     3x40x60
1557   //    (axis == 2 == -2)                   40;       40x60
1558   //    (axis == 3 == -1)                                60
1559   // Furthermore, bottom[1] may have the empty shape (regardless of the value of
1560   // "axis") -- a scalar multiplier.
1561   optional int32 axis = 1 [default = 1];
1562
1563   // (num_axes is ignored unless just one bottom is given and the scale is
1564   // a learned parameter of the layer.  Otherwise, num_axes is determined by the
1565   // number of axes by the second bottom.)
1566   // The number of axes of the input (bottom[0]) covered by the scale
1567   // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
1568   // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
1569   optional int32 num_axes = 2 [default = 1];
1570
1571   // (filler is ignored unless just one bottom is given and the scale is
1572   // a learned parameter of the layer.)
1573   // The initialization for the learned scale parameter.
1574   // Default is the unit (1) initialization, resulting in the ScaleLayer
1575   // initially performing the identity operation.
1576   optional FillerParameter filler = 3;
1577
1578   // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
1579   // may be more efficient).  Initialized with bias_filler (defaults to 0).
1580   optional bool bias_term = 4 [default = false];
1581   optional FillerParameter bias_filler = 5;
1582 }
1583
1584 message SigmoidParameter {
1585   enum Engine {
1586     DEFAULT = 0;
1587     CAFFE = 1;
1588     CUDNN = 2;
1589   }
1590   optional Engine engine = 1 [default = DEFAULT];
1591 }
1592
1593 message SliceParameter {
1594   // The axis along which to slice -- may be negative to index from the end
1595   // (e.g., -1 for the last axis).
1596   // By default, SliceLayer concatenates blobs along the "channels" axis (1).
1597   optional int32 axis = 3 [default = 1];
1598   repeated uint32 slice_point = 2;
1599
1600   // DEPRECATED: alias for "axis" -- does not support negative indexing.
1601   optional uint32 slice_dim = 1 [default = 1];
1602 }
1603
1604 message SmoothL1LossParameter {
1605   // SmoothL1Loss(x) =
1606   //   0.5 * (sigma * x) ** 2    -- if x < 1.0 / sigma / sigma
1607   //   |x| - 0.5 / sigma / sigma -- otherwise
1608   optional float sigma = 1 [default = 1];
1609 }
1610
1611 // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
1612 message SoftmaxParameter {
1613   enum Engine {
1614     DEFAULT = 0;
1615     CAFFE = 1;
1616     CUDNN = 2;
1617   }
1618   optional Engine engine = 1 [default = DEFAULT];
1619
1620   // The axis along which to perform the softmax -- may be negative to index
1621   // from the end (e.g., -1 for the last axis).
1622   // Any other axes will be evaluated as independent softmaxes.
1623   optional int32 axis = 2 [default = 1];
1624 }
1625
1626 message TanHParameter {
1627   enum Engine {
1628     DEFAULT = 0;
1629     CAFFE = 1;
1630     CUDNN = 2;
1631   }
1632   optional Engine engine = 1 [default = DEFAULT];
1633 }
1634
1635 // Message that stores parameters used by TileLayer
1636 message TileParameter {
1637   // The index of the axis to tile.
1638   optional int32 axis = 1 [default = 1];
1639
1640   // The number of copies (tiles) of the blob to output.
1641   optional int32 tiles = 2;
1642 }
1643
1644 // Message that stores parameters used by ThresholdLayer
1645 message ThresholdParameter {
1646   optional float threshold = 1 [default = 0]; // Strictly positive values
1647 }
1648
1649 message WindowDataParameter {
1650   // Specify the data source.
1651   optional string source = 1;
1652   // For data pre-processing, we can do simple scaling and subtracting the
1653   // data mean, if provided. Note that the mean subtraction is always carried
1654   // out before scaling.
1655   optional float scale = 2 [default = 1];
1656   optional string mean_file = 3;
1657   // Specify the batch size.
1658   optional uint32 batch_size = 4;
1659   // Specify if we would like to randomly crop an image.
1660   optional uint32 crop_size = 5 [default = 0];
1661   // Specify if we want to randomly mirror data.
1662   optional bool mirror = 6 [default = false];
1663   // Foreground (object) overlap threshold
1664   optional float fg_threshold = 7 [default = 0.5];
1665   // Background (non-object) overlap threshold
1666   optional float bg_threshold = 8 [default = 0.5];
1667   // Fraction of batch that should be foreground objects
1668   optional float fg_fraction = 9 [default = 0.25];
1669   // Amount of contextual padding to add around a window
1670   // (used only by the window_data_layer)
1671   optional uint32 context_pad = 10 [default = 0];
1672   // Mode for cropping out a detection window
1673   // warp: cropped window is warped to a fixed size and aspect ratio
1674   // square: the tightest square around the window is cropped
1675   optional string crop_mode = 11 [default = "warp"];
1676   // cache_images: will load all images in memory for faster access
1677   optional bool cache_images = 12 [default = false];
1678   // append root_folder to locate images
1679   optional string root_folder = 13 [default = ""];
1680 }
1681
1682 message SPPParameter {
1683   enum PoolMethod {
1684     MAX = 0;
1685     AVE = 1;
1686     STOCHASTIC = 2;
1687   }
1688   optional uint32 pyramid_height = 1;
1689   optional PoolMethod pool = 2 [default = MAX]; // The pooling method
1690   enum Engine {
1691     DEFAULT = 0;
1692     CAFFE = 1;
1693     CUDNN = 2;
1694   }
1695   optional Engine engine = 6 [default = DEFAULT];
1696 }
1697
1698 // DEPRECATED: use LayerParameter.
1699 message V1LayerParameter {
1700   repeated string bottom = 2;
1701   repeated string top = 3;
1702   optional string name = 4;
1703   repeated NetStateRule include = 32;
1704   repeated NetStateRule exclude = 33;
1705   enum LayerType {
1706     NONE = 0;
1707     ABSVAL = 35;
1708     ACCURACY = 1;
1709     ARGMAX = 30;
1710     BNLL = 2;
1711     CONCAT = 3;
1712     CONTRASTIVE_LOSS = 37;
1713     CONVOLUTION = 4;
1714     DATA = 5;
1715     DECONVOLUTION = 39;
1716     DROPOUT = 6;
1717     DUMMY_DATA = 32;
1718     EUCLIDEAN_LOSS = 7;
1719     ELTWISE = 25;
1720     EXP = 38;
1721     FLATTEN = 8;
1722     HDF5_DATA = 9;
1723     HDF5_OUTPUT = 10;
1724     HINGE_LOSS = 28;
1725     IM2COL = 11;
1726     IMAGE_DATA = 12;
1727     INFOGAIN_LOSS = 13;
1728     INNER_PRODUCT = 14;
1729     LRN = 15;
1730     MEMORY_DATA = 29;
1731     MULTINOMIAL_LOGISTIC_LOSS = 16;
1732     MVN = 34;
1733     POOLING = 17;
1734     POWER = 26;
1735     RELU = 18;
1736     SIGMOID = 19;
1737     SIGMOID_CROSS_ENTROPY_LOSS = 27;
1738     SILENCE = 36;
1739     SOFTMAX = 20;
1740     SOFTMAX_LOSS = 21;
1741     SPLIT = 22;
1742     SLICE = 33;
1743     TANH = 23;
1744     WINDOW_DATA = 24;
1745     THRESHOLD = 31;
1746   }
1747   optional LayerType type = 5;
1748   repeated BlobProto blobs = 6;
1749   repeated string param = 1001;
1750   repeated DimCheckMode blob_share_mode = 1002;
1751   enum DimCheckMode {
1752     STRICT = 0;
1753     PERMISSIVE = 1;
1754   }
1755   repeated float blobs_lr = 7;
1756   repeated float weight_decay = 8;
1757   repeated float loss_weight = 35;
1758   optional AccuracyParameter accuracy_param = 27;
1759   optional ArgMaxParameter argmax_param = 23;
1760   optional ConcatParameter concat_param = 9;
1761   optional ContrastiveLossParameter contrastive_loss_param = 40;
1762   optional ConvolutionParameter convolution_param = 10;
1763   optional DataParameter data_param = 11;
1764   optional DropoutParameter dropout_param = 12;
1765   optional DummyDataParameter dummy_data_param = 26;
1766   optional EltwiseParameter eltwise_param = 24;
1767   optional ExpParameter exp_param = 41;
1768   optional HDF5DataParameter hdf5_data_param = 13;
1769   optional HDF5OutputParameter hdf5_output_param = 14;
1770   optional HingeLossParameter hinge_loss_param = 29;
1771   optional ImageDataParameter image_data_param = 15;
1772   optional InfogainLossParameter infogain_loss_param = 16;
1773   optional InnerProductParameter inner_product_param = 17;
1774   optional LRNParameter lrn_param = 18;
1775   optional MemoryDataParameter memory_data_param = 22;
1776   optional MVNParameter mvn_param = 34;
1777   optional PoolingParameter pooling_param = 19;
1778   optional PowerParameter power_param = 21;
1779   optional ReLUParameter relu_param = 30;
1780   optional SigmoidParameter sigmoid_param = 38;
1781   optional SoftmaxParameter softmax_param = 39;
1782   optional SliceParameter slice_param = 31;
1783   optional TanHParameter tanh_param = 37;
1784   optional ThresholdParameter threshold_param = 25;
1785   optional WindowDataParameter window_data_param = 20;
1786   optional TransformationParameter transform_param = 36;
1787   optional LossParameter loss_param = 42;
1788   optional V0LayerParameter layer = 1;
1789 }
1790
1791 // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
1792 // in Caffe.  We keep this message type around for legacy support.
1793 message V0LayerParameter {
1794   optional string name = 1; // the layer name
1795   optional string type = 2; // the string to specify the layer type
1796
1797   // Parameters to specify layers with inner products.
1798   optional uint32 num_output = 3; // The number of outputs for the layer
1799   optional bool biasterm = 4 [default = true]; // whether to have bias terms
1800   optional FillerParameter weight_filler = 5; // The filler for the weight
1801   optional FillerParameter bias_filler = 6; // The filler for the bias
1802
1803   optional uint32 pad = 7 [default = 0]; // The padding size
1804   optional uint32 kernelsize = 8; // The kernel size
1805   optional uint32 group = 9 [default = 1]; // The group size for group conv
1806   optional uint32 stride = 10 [default = 1]; // The stride
1807   enum PoolMethod {
1808     MAX = 0;
1809     AVE = 1;
1810     STOCHASTIC = 2;
1811   }
1812   optional PoolMethod pool = 11 [default = MAX]; // The pooling method
1813   optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
1814
1815   optional uint32 local_size = 13 [default = 5]; // for local response norm
1816   optional float alpha = 14 [default = 1.]; // for local response norm
1817   optional float beta = 15 [default = 0.75]; // for local response norm
1818   optional float k = 22 [default = 1.];
1819
1820   // For data layers, specify the data source
1821   optional string source = 16;
1822   // For data pre-processing, we can do simple scaling and subtracting the
1823   // data mean, if provided. Note that the mean subtraction is always carried
1824   // out before scaling.
1825   optional float scale = 17 [default = 1];
1826   optional string meanfile = 18;
1827   // For data layers, specify the batch size.
1828   optional uint32 batchsize = 19;
1829   // For data layers, specify if we would like to randomly crop an image.
1830   optional uint32 cropsize = 20 [default = 0];
1831   // For data layers, specify if we want to randomly mirror data.
1832   optional bool mirror = 21 [default = false];
1833
1834   // The blobs containing the numeric parameters of the layer
1835   repeated BlobProto blobs = 50;
1836   // The ratio that is multiplied on the global learning rate. If you want to
1837   // set the learning ratio for one blob, you need to set it for all blobs.
1838   repeated float blobs_lr = 51;
1839   // The weight decay that is multiplied on the global weight decay.
1840   repeated float weight_decay = 52;
1841
1842   // The rand_skip variable is for the data layer to skip a few data points
1843   // to avoid all asynchronous sgd clients to start at the same point. The skip
1844   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1845   // be larger than the number of keys in the database.
1846   optional uint32 rand_skip = 53 [default = 0];
1847
1848   // Fields related to detection (det_*)
1849   // foreground (object) overlap threshold
1850   optional float det_fg_threshold = 54 [default = 0.5];
1851   // background (non-object) overlap threshold
1852   optional float det_bg_threshold = 55 [default = 0.5];
1853   // Fraction of batch that should be foreground objects
1854   optional float det_fg_fraction = 56 [default = 0.25];
1855
1856   // optional bool OBSOLETE_can_clobber = 57 [default = true];
1857
1858   // Amount of contextual padding to add around a window
1859   // (used only by the window_data_layer)
1860   optional uint32 det_context_pad = 58 [default = 0];
1861
1862   // Mode for cropping out a detection window
1863   // warp: cropped window is warped to a fixed size and aspect ratio
1864   // square: the tightest square around the window is cropped
1865   optional string det_crop_mode = 59 [default = "warp"];
1866
1867   // For ReshapeLayer, one needs to specify the new dimensions.
1868   optional int32 new_num = 60 [default = 0];
1869   optional int32 new_channels = 61 [default = 0];
1870   optional int32 new_height = 62 [default = 0];
1871   optional int32 new_width = 63 [default = 0];
1872
1873   // Whether or not ImageLayer should shuffle the list of files at every epoch.
1874   // It will also resize images if new_height or new_width are not zero.
1875   optional bool shuffle_images = 64 [default = false];
1876
1877   // For ConcatLayer, one needs to specify the dimension for concatenation, and
1878   // the other dimensions must be the same for all the bottom blobs.
1879   // By default it will concatenate blobs along the channels dimension.
1880   optional uint32 concat_dim = 65 [default = 1];
1881
1882   optional HDF5OutputParameter hdf5_output_param = 1001;
1883 }
1884
1885 message PReLUParameter {
1886   // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
1887   // Surpassing Human-Level Performance on ImageNet Classification, 2015.
1888
1889   // Initial value of a_i. Default is a_i=0.25 for all i.
1890   optional FillerParameter filler = 1;
1891   // Whether or not slope parameters are shared across channels.
1892   optional bool channel_shared = 2 [default = false];
1893 }
1894
1895 message RegionYoloParameter {
1896   optional int32 coords = 1 [default = 4];
1897   optional int32 classes = 2 [default = 20];
1898   optional int32 num = 3 [default = 1];
1899   optional bool do_softmax = 4 [default = true];
1900   repeated float anchors = 5;
1901   repeated int32 mask = 6;
1902 }
1903
1904 message ReorgYoloParameter {
1905   optional int32 stride = 1 [default = 1];
1906 }
1907
1908 // Message used by AugmentationParameter for describing how to generate augmentation parameters
1909 message RandomGeneratorParameter {
1910     optional string rand_type = 1 [default = "uniform" ]; // can be uniform, gaussian, bernoulli
1911     optional bool exp = 2 [default = false ]; // after generating the random number, exponentiate it or not
1912     optional float mean = 4 [default = 0. ]; // mean of the random variable
1913     optional float spread = 5 [default = 0. ]; // half of interval length for uniform; standard deviation for gaussian
1914     optional float prob = 6 [default = 1.];
1915     optional bool apply_schedule = 7 [default = true];
1916     optional bool discretize = 8 [default = false]; //Discretize (Round) value from rng to INT
1917     optional float multiplier = 9 [default = 1.]; //Final random value will be multiplied by this. (Useful for discrete distributions)
1918 }
1919
1920 message CoeffScheduleParameter {
1921     optional float half_life = 1 [default = 1];
1922     optional float initial_coeff = 2 [default = 1];
1923     optional float final_coeff = 3 [default = 1];
1924 }
1925
1926 // Message storing the actual coefficients of a transformation
1927 // IMPORTANT: default values should be 0 or 1
1928 message AugmentationCoeff {
1929     // Spatial
1930     optional float mirror = 1 [default = 0];
1931     optional float dx = 2 [default = 0];
1932     optional float dy = 3 [default = 0];
1933     optional float angle = 4 [default = 0];
1934     optional float zoom_x = 5 [default = 1];
1935     optional float zoom_y = 6 [default = 1];
1936
1937     // Chromatic
1938     optional float gamma = 100 [default = 1];
1939     optional float brightness = 101 [default = 0];
1940     optional float contrast = 102 [default = 1];
1941     optional float color1 = 103 [default = 1];
1942     optional float color2 = 104 [default = 1];
1943     optional float color3 = 105 [default = 1];
1944
1945     // Chromatic-Eigen
1946     optional float pow_nomean0 = 10 [default = 1];
1947     optional float pow_nomean1 = 11 [default = 1];
1948     optional float pow_nomean2 = 12 [default = 1];
1949     optional float add_nomean0 = 13 [default = 0];
1950     optional float add_nomean1 = 14 [default = 0];
1951     optional float add_nomean2 = 15 [default = 0];
1952     optional float mult_nomean0 = 16 [default = 1];
1953     optional float mult_nomean1 = 17 [default = 1];
1954     optional float mult_nomean2 = 18 [default = 1];
1955     optional float pow_withmean0 = 19 [default = 1];
1956     optional float pow_withmean1 = 20 [default = 1];
1957     optional float pow_withmean2 = 21 [default = 1];
1958     optional float add_withmean0 = 22 [default = 0];
1959     optional float add_withmean1 = 23 [default = 0];
1960     optional float add_withmean2 = 24 [default = 0];
1961     optional float mult_withmean0 = 25 [default = 1];
1962     optional float mult_withmean1 = 26 [default = 1];
1963     optional float mult_withmean2 = 27 [default = 1];
1964     optional float lmult_pow = 28 [default = 1];
1965     optional float lmult_add = 29 [default = 0];
1966     optional float lmult_mult = 30 [default = 1];
1967     optional float col_angle = 31 [default = 0];
1968
1969     // Effect
1970     optional float fog_amount = 38 [default = 0];
1971     optional float fog_size = 39 [default = 0];
1972     optional float motion_blur_angle = 40 [default = 0];
1973     optional float motion_blur_size = 41 [default = 0];
1974     optional float shadow_angle = 42 [default = 0];
1975     optional float shadow_distance = 43 [default = 0];
1976     optional float shadow_strength = 44 [default = 0];
1977     optional float noise = 45 [default = 0];
1978 }
1979
1980 message AugmentationParameter {
1981     optional uint32 crop_width = 33 [default = 0];
1982     optional uint32 crop_height = 34 [default = 0];
1983     optional string write_augmented = 2 [default = ""];
1984     optional float max_multiplier = 3 [default = 255.];
1985     optional bool augment_during_test = 4 [default = false];
1986     optional uint32 recompute_mean = 5 [default = 0]; // number of iterations to recompute mean (0 - do not recompute)
1987     optional string write_mean = 6 [default = ""];
1988     optional bool mean_per_pixel = 7 [default = true]; // if the mean is computed for each pixel or for the whole channel
1989     repeated float mean = 18; // Eddy: Per pixel RGB mean to subtract
1990     optional string mode = 8 [default = "add"]; // can be "add" or "replace" or "regenerate"
1991     optional uint32 bottomwidth = 80 [default = 0];
1992     optional uint32 bottomheight = 81 [default = 0];
1993     optional uint32 num = 82 [default = 0];
1994
1995     repeated float chromatic_eigvec = 83;
1996
1997     // Spatial
1998     optional RandomGeneratorParameter mirror = 10;
1999     optional RandomGeneratorParameter translate = 11 ;
2000     optional RandomGeneratorParameter rotate = 12 ;
2001     optional RandomGeneratorParameter zoom = 13 ;
2002     optional RandomGeneratorParameter squeeze = 14 ;
2003     optional RandomGeneratorParameter translate_x = 15 ;
2004     optional RandomGeneratorParameter translate_y = 16 ;
2005
2006
2007     // Chromatic
2008     optional RandomGeneratorParameter gamma = 35 ;
2009     optional RandomGeneratorParameter brightness = 36 ;
2010     optional RandomGeneratorParameter contrast = 37 ;
2011     optional RandomGeneratorParameter color = 38 ;
2012
2013     // Chromatic-Eigen
2014     optional RandomGeneratorParameter lmult_pow = 20 ;
2015     optional RandomGeneratorParameter lmult_mult = 21 ;
2016     optional RandomGeneratorParameter lmult_add = 22 ;
2017     optional RandomGeneratorParameter sat_pow = 23 ;
2018     optional RandomGeneratorParameter sat_mult = 24 ;
2019     optional RandomGeneratorParameter sat_add = 25 ;
2020     optional RandomGeneratorParameter col_pow = 26 ;
2021     optional RandomGeneratorParameter col_mult = 27 ;
2022     optional RandomGeneratorParameter col_add = 28 ;
2023     optional RandomGeneratorParameter ladd_pow = 29 ;
2024     optional RandomGeneratorParameter ladd_mult = 30 ;
2025     optional RandomGeneratorParameter ladd_add = 31 ;
2026     optional RandomGeneratorParameter col_rotate = 32 ;
2027
2028     // Effect
2029     optional RandomGeneratorParameter fog_amount = 100 ;
2030     optional RandomGeneratorParameter fog_size = 101 ;
2031     optional RandomGeneratorParameter motion_blur_angle = 102 ;
2032     optional RandomGeneratorParameter motion_blur_size = 103 ;
2033     optional RandomGeneratorParameter shadow_angle = 104 ;
2034     optional RandomGeneratorParameter shadow_distance = 105 ;
2035     optional RandomGeneratorParameter shadow_strength = 106 ;
2036     optional RandomGeneratorParameter noise = 107 ;
2037 }
2038
2039 message FlowWarpParameter {
2040     enum FillParameter {
2041         ZERO = 1;
2042         NOT_A_NUMBER = 2;
2043     }
2044
2045     optional FillParameter fill_value = 1 [ default = ZERO ];
2046 }
2047
2048 message CorrelationParameter {
2049     optional uint32 pad = 2 [default = 0]; // The padding size (equal in Y, X)
2050     optional uint32 kernel_size = 3; // The kernel size (square)
2051     optional uint32 max_displacement = 4; // The maximum displacement (square)
2052     optional uint32 stride_1 = 5 [default = 1]; // The stride in blob 1 (equal in Y, X)
2053     optional uint32 stride_2 = 6 [default = 1]; // The stride in blob 2 (equal in Y, X)
2054
2055     // For Correlation1D:
2056     optional int32 single_direction = 8 [default = 0]; // Correlate only to the left (-1) or right (1)
2057
2058     optional bool do_abs = 7 [default = false]; // Use absolute value of result
2059     enum CorrelationType {
2060         MULTIPLY = 0;
2061         SUBTRACT = 1;
2062     }
2063     optional CorrelationType correlation_type = 15 [default = MULTIPLY]; // Multiplicative is normal correlation
2064 }
2065
2066 message ResampleParameter {
2067     enum ResampleType {
2068         NEAREST = 1;
2069         LINEAR = 2;
2070         CUBIC = 3;
2071         AREA = 4;
2072     };
2073     optional bool antialias = 4 [ default = true ];
2074     optional uint32 width  = 1;
2075     optional uint32 height = 2;
2076     optional ResampleType type = 3 [ default = LINEAR ];
2077     optional float factor = 5 [ default = 1.0 ];
2078 }
2079
2080 message AccumParameter {
2081     optional uint32 top_height = 1 [default = 0]; // The output height
2082     optional uint32 top_width = 2 [default = 0]; // The output width
2083     optional uint32 size_divisible_by = 3 [default = 0]; // Upscales to the minimal size divisible by the given number
2084     optional bool have_reference = 4 [ default = false ];
2085 }
2086
2087 message ShuffleChannelParameter {
2088     required uint32 group = 1;
2089 }