[Application] Rename yolo -> yolo v2
authorSeungbaek Hong <sb92.hong@samsung.com>
Wed, 8 May 2024 04:05:32 +0000 (13:05 +0900)
committerMyungJoo Ham <myungjoo.ham@samsung.com>
Thu, 16 May 2024 06:16:47 +0000 (15:16 +0900)
To prevent confusion, the name of YOLOv2 implementation was changed from
YOLO to YOLOv2.

Signed-off-by: Seungbaek Hong <sb92.hong@samsung.com>
29 files changed:
Applications/YOLO/PyTorch/dataset.py [deleted file]
Applications/YOLO/PyTorch/main.py [deleted file]
Applications/YOLO/PyTorch/yolo.py [deleted file]
Applications/YOLO/PyTorch/yolo_loss.py [deleted file]
Applications/YOLO/jni/Android.mk [deleted file]
Applications/YOLO/jni/Application.mk [deleted file]
Applications/YOLO/jni/det_dataloader.cpp [deleted file]
Applications/YOLO/jni/det_dataloader.h [deleted file]
Applications/YOLO/jni/main.cpp [deleted file]
Applications/YOLO/jni/meson.build [deleted file]
Applications/YOLO/jni/reorg_layer.cpp [deleted file]
Applications/YOLO/jni/reorg_layer.h [deleted file]
Applications/YOLO/jni/yolo_v2_loss.cpp [deleted file]
Applications/YOLO/jni/yolo_v2_loss.h [deleted file]
Applications/YOLOv2/PyTorch/dataset.py [new file with mode: 0644]
Applications/YOLOv2/PyTorch/main.py [new file with mode: 0644]
Applications/YOLOv2/PyTorch/yolo.py [new file with mode: 0644]
Applications/YOLOv2/PyTorch/yolo_loss.py [new file with mode: 0644]
Applications/YOLOv2/jni/Android.mk [new file with mode: 0644]
Applications/YOLOv2/jni/Application.mk [new file with mode: 0644]
Applications/YOLOv2/jni/det_dataloader.cpp [new file with mode: 0644]
Applications/YOLOv2/jni/det_dataloader.h [new file with mode: 0644]
Applications/YOLOv2/jni/main.cpp [new file with mode: 0644]
Applications/YOLOv2/jni/meson.build [new file with mode: 0644]
Applications/YOLOv2/jni/reorg_layer.cpp [new file with mode: 0644]
Applications/YOLOv2/jni/reorg_layer.h [new file with mode: 0644]
Applications/YOLOv2/jni/yolo_v2_loss.cpp [new file with mode: 0644]
Applications/YOLOv2/jni/yolo_v2_loss.h [new file with mode: 0644]
Applications/meson.build

diff --git a/Applications/YOLO/PyTorch/dataset.py b/Applications/YOLO/PyTorch/dataset.py
deleted file mode 100644 (file)
index a02971a..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-#
-# @file dataset.py
-# @date 8 March 2023
-# @brief Define dataset class for yolo
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
-
-import glob
-import numpy as np
-import torch
-from torch.utils.data import Dataset
-from torch.utils.data.dataloader import default_collate
-from PIL import Image
-
-##
-# @brief dataset class for yolo
-# @note Need annotation text files corresponding to the name of the images.    
-class YOLODataset(Dataset):
-    def __init__(self, img_dir, ann_dir):
-        super().__init__()
-        img_list = glob.glob(img_dir)
-        ann_list = glob.glob(ann_dir)
-        img_list.sort(), ann_list.sort()
-
-        self.length = len(img_list)
-        self.input_images = []
-        self.bbox_gt = []
-        self.cls_gt = []
-
-        for i in range(len(img_list)):
-            img = np.array(Image.open(img_list[i]).resize((416, 416))) / 255
-            label_bbox = []
-            label_cls = []
-            with open(ann_list[i], 'rt') as f:
-                for line in f.readlines():
-                    line = [float(i) for i in line.split()]
-                    label_bbox.append(np.array(line[1:], dtype=np.float32) / 416)
-                    label_cls.append(int(line[0]))
-
-            self.input_images.append(img)
-            self.bbox_gt.append(label_bbox)
-            self.cls_gt.append(label_cls)
-
-        self.input_images = np.array(self.input_images)
-        self.input_images = torch.FloatTensor(self.input_images).permute((0, 3, 1, 2))
-
-    def __len__(self):
-        return self.length
-    
-    def __getitem__(self, idx):
-        return self.input_images[idx], self.bbox_gt[idx], self.cls_gt[idx]
-    
-##
-# @brief collate db function for yolo
-def collate_db(batch):
-    """
-    @param batch list of batch, (img, bbox, cls)
-    @return collated list of batch, (img, bbox, cls)
-    """
-    items = list(zip(*batch))
-    items[0] = default_collate(items[0])
-    items[1] = list(items[1])
-    items[2] = list(items[2])
-    return items
diff --git a/Applications/YOLO/PyTorch/main.py b/Applications/YOLO/PyTorch/main.py
deleted file mode 100644 (file)
index b831e1e..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-#
-# @file main.py
-# @date 8 March 2023
-# @brief Implement training for yolo
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
-
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.optim as optim
-import torch.nn.functional as F
-from torch.utils.data import DataLoader
-
-from yolo import YoloV2
-from yolo_loss import YoloV2_LOSS
-from dataset import YOLODataset, collate_db
-
-import sys
-import os
-
-# get pyutils path using relative path
-def get_util_path():
-    current_path = os.path.abspath(os.path.dirname(__file__))
-    parent_path = os.path.abspath(os.path.dirname(current_path))
-    target_path = os.path.abspath(os.path.dirname(parent_path))
-    return os.path.dirname(target_path) + '/tools/pyutils/'
-
-# add pyutils path to sys.path
-sys.path.append(get_util_path())
-from torchconverter import save_bin
-
-# set config
-out_size = 13
-num_classes = 4
-num_anchors = 5
-
-epochs = 3
-batch_size = 4
-
-train_img_dir = '/home/user/TRAIN_DIR/images/*'
-train_ann_dir = '/home/user/TRAIN_DIR/annotations/*'
-valid_img_dir = '/home/user/VALID_DIR/images/*'
-valid_ann_dir = '/home/user/VALID_DIR/annotations/*'
-
-# load data
-train_dataset = YOLODataset(train_img_dir, train_ann_dir)
-train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate_db, shuffle=True, drop_last=True)
-valid_dataset = YOLODataset(valid_img_dir, valid_ann_dir)
-valid_loader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate_db, shuffle=False, drop_last=True)
-
-# set model, loss and optimizer
-model = YoloV2(num_classes=num_classes)
-criterion = YoloV2_LOSS(num_classes=num_classes)
-optimizer = optim.Adam(model.parameters(), lr=1e-3)
-# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)
-
-# save init model
-save_bin(model, 'init_model')
-torch.save(model.state_dict(), './init_model.pt')
-
-# train model
-best_loss = 1e+10
-for epoch in range(epochs):
-    epoch_train_loss = 0
-    epoch_valid_loss = 0
-    for idx, (img, bbox, cls) in enumerate(train_loader):
-        model.train()
-        optimizer.zero_grad()
-        # model prediction
-        hypothesis = model(img).permute((0, 2, 3, 1))
-        hypothesis = hypothesis.reshape((batch_size, out_size**2, num_anchors, 5+num_classes))
-        # split each prediction(bbox, iou, class prob)
-        bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
-        bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
-        bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
-        iou_pred = torch.sigmoid(hypothesis[..., 4:5])
-        score_pred = hypothesis[..., 5:].contiguous()
-        prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(score_pred.shape)
-        # calc loss
-        loss = criterion(torch.FloatTensor(bbox_pred),
-                         torch.FloatTensor(iou_pred),
-                         torch.FloatTensor(prob_pred),
-                         bbox,
-                         cls)
-        # back prop
-        loss.backward()
-        optimizer.step()  
-        # scheduler.step()
-        epoch_train_loss += loss.item()
-
-    for idx, (img, bbox, cls) in enumerate(valid_loader):
-        model.eval()
-        with torch.no_grad():
-            # model prediction
-            hypothesis = model(img).permute((0, 2, 3, 1))
-            hypothesis = hypothesis.reshape((hypothesis.shape[0], out_size**2, num_anchors, 5+num_classes))        
-            # split each prediction(bbox, iou, class prob)
-            bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
-            bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
-            bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
-            iou_pred = torch.sigmoid(hypothesis[..., 4:5])
-            score_pred = hypothesis[..., 5:].contiguous()
-            prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(score_pred.shape)
-            # calc loss
-            loss = criterion(torch.FloatTensor(bbox_pred),
-                            torch.FloatTensor(iou_pred),
-                            torch.FloatTensor(prob_pred),
-                            bbox,
-                            cls)
-            epoch_valid_loss += loss.item()
-        
-    if epoch_valid_loss < best_loss:
-        best_loss = epoch_valid_loss
-        torch.save(model.state_dict(), './best_model.pt')
-        save_bin(model, 'best_model')
-        
-    print("{}epoch, train loss: {:.4f}, valid loss: {:.4f}".format(
-        epoch, epoch_train_loss / len(train_loader), epoch_valid_loss / len(valid_loader)))
-
-##
-# @brief bbox post process function for inference
-def post_process_for_bbox(bbox_pred):
-    """
-    @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
-    @return bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
-    """
-    anchors = torch.FloatTensor(
-        [(1.3221, 1.73145),
-        (3.19275, 4.00944),
-        (5.05587, 8.09892),
-        (9.47112, 4.84053),
-        (11.2364, 10.0071)]
-    )
-
-    outsize = (13, 13)
-    width, height = outsize
-    
-    # restore cell pos to x, y
-    for w in range(width):
-        for h in range(height):
-            bbox_pred[:, height*h + w, :, 0] += w
-            bbox_pred[:, height*h + w, :, 1] += h
-    bbox_pred[:, :, :, :2] /= 13
-    
-    # apply anchors to w, h
-    anchor_w = anchors[:, 0].contiguous().view(-1, 1)
-    anchor_h = anchors[:, 1].contiguous().view(-1, 1)
-    bbox_pred[:, :, :, 2:3] *= anchor_w
-    bbox_pred[:, :, :, 3:4] *= anchor_h
-
-    return bbox_pred
-
-# inference example using trained model
-hypothesis = model(img).permute((0, 2, 3, 1))
-hypothesis = hypothesis[0].reshape((1, out_size**2, num_anchors, 5+num_classes))
-
-# transform output
-bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
-bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
-bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
-bbox_pred = post_process_for_bbox(bbox_pred)
-iou_pred = torch.sigmoid(hypothesis[..., 4:5])
-score_pred = hypothesis[..., 5:].contiguous()
-prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(score_pred.shape)
-
-# result of inference (data range 0~1)
-iou_mask = (iou_pred > 0.5)
-print(bbox_pred * iou_mask, iou_pred * iou_mask, prob_pred * iou_mask)
diff --git a/Applications/YOLO/PyTorch/yolo.py b/Applications/YOLO/PyTorch/yolo.py
deleted file mode 100644 (file)
index 53763f1..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-#
-# @file yolo.py
-# @date 8 March 2023
-# @brief Define simple yolo model, but not original darknet.
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
-
-import torch
-import torch.nn as nn
-
-##
-# @brief define yolo model (except for re-organization module)
-class YoloV2(nn.Module): 
-    def __init__(self, num_classes, num_anchors=5):
-        
-        super(YoloV2, self).__init__()
-        self.num_classes = num_classes
-        self.num_anchors = num_anchors
-        self.conv1 = nn.Sequential(nn.Conv2d(3, 32, 3, 1, 1), nn.BatchNorm2d(32, eps=1e-3),
-                                   nn.LeakyReLU(), nn.MaxPool2d(2, 2))
-        self.conv2 = nn.Sequential(nn.Conv2d(32, 64, 3, 1, 1), nn.BatchNorm2d(64, eps=1e-3),
-                                   nn.LeakyReLU(), nn.MaxPool2d(2, 2))
-        self.conv3 = nn.Sequential(nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv4 = nn.Sequential(nn.Conv2d(128, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv5 = nn.Sequential(nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128, eps=1e-3),
-                                   nn.LeakyReLU(), nn.MaxPool2d(2, 2))
-        self.conv6 = nn.Sequential(nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv7 = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0), nn.BatchNorm2d(128, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv8 = nn.Sequential(nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256, eps=1e-3),
-                                   nn.LeakyReLU(), nn.MaxPool2d(2, 2))
-        self.conv9 = nn.Sequential(nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv10 = nn.Sequential(nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3),
-                                    nn.LeakyReLU())
-        self.conv11 = nn.Sequential(nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3),
-                                    nn.LeakyReLU())
-        self.conv12 = nn.Sequential(nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3),
-                                    nn.LeakyReLU())
-        self.conv13 = nn.Sequential(nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3),
-                                           nn.LeakyReLU())
-
-        self.conv_b = nn.Sequential(nn.Conv2d(512, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3),
-                                    nn.LeakyReLU())        
-
-        self.maxpool_a = nn.MaxPool2d(2, 2)
-        self.conv_a1 = nn.Sequential(nn.Conv2d(512, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv_a2 = nn.Sequential(nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv_a3 = nn.Sequential(nn.Conv2d(512, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv_a4 = nn.Sequential(nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv_a5 = nn.Sequential(nn.Conv2d(512, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
-                                   nn.LeakyReLU())        
-        self.conv_a6 = nn.Sequential(nn.Conv2d(1024, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
-                                   nn.LeakyReLU())
-        self.conv_a7 = nn.Sequential(nn.Conv2d(1024, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
-                                            nn.LeakyReLU())
-
-        self.conv_out1 = nn.Sequential(nn.Conv2d(1280, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
-                                          nn.LeakyReLU())
-
-        self.conv_out2 = nn.Conv2d(1024, self.num_anchors * (5 + num_classes), 1, 1, 0)
-
-    def forward(self, input):
-        output = self.conv1(input)
-        output = self.conv2(output)
-        output = self.conv3(output)
-        output = self.conv4(output)
-        output = self.conv5(output)
-        output = self.conv6(output)
-        output = self.conv7(output)
-        output = self.conv8(output)
-        output = self.conv9(output)
-        output = self.conv10(output)
-        output = self.conv11(output)
-        output = self.conv12(output)
-        output = self.conv13(output)
-
-        output_a = self.maxpool_a(output)
-        output_a = self.conv_a1(output_a)
-        output_a = self.conv_a2(output_a)
-        output_a = self.conv_a3(output_a)
-        output_a = self.conv_a4(output_a)
-        output_a = self.conv_a5(output_a)
-        output_a = self.conv_a6(output_a)
-        output_a = self.conv_a7(output_a)
-
-        output_b = self.conv_b(output)
-        b, c, h, w = output_b.size()
-        output_b = output_b.view(b, int(c / 4), h, 2, w, 2).contiguous()
-        output_b = output_b.permute(0, 3, 5, 1, 2, 4).contiguous()
-        output_b = output_b.view(b, -1, int(h / 2), int(w / 2))
-
-        output = torch.cat((output_a, output_b), 1)
-        output = self.conv_out1(output)
-        output = self.conv_out2(output)
-        return output
diff --git a/Applications/YOLO/PyTorch/yolo_loss.py b/Applications/YOLO/PyTorch/yolo_loss.py
deleted file mode 100644 (file)
index 12f9557..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-#
-# @file yolo_loss.py
-# @date 8 March 2023
-# @brief Define loss class for yolo
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
-
-import torch
-import torch.nn as nn
-import torch.functional as F
-import numpy as np
-
-##
-# @brief calculate iou between two boxes list
-def calculate_iou(bbox1, bbox2):
-    """
-    @param bbox1 shape(numb_of_bbox, 4), it contains x, y, w, h
-    @param bbox2 shape(numb_of_bbox, 4), it contains x, y, w, h
-    @return result shape(numb_of_bbox, 1)
-    """
-    # bbox coordinates
-    b1x1, b1y1 = (bbox1[:, :2]).split(1, 1)
-    b1x2, b1y2 = (bbox1[:, :2] + (bbox1[:, 2:4])).split(1, 1)
-    b2x1, b2y1 = (bbox2[:, :2]).split(1, 1)
-    b2x2, b2y2 = (bbox2[:, :2] + (bbox2[:, 2:4])).split(1, 1)
-    
-    # box areas
-    areas1 = (b1x2 - b1x1) * (b1y2 - b1y1)
-    areas2 = (b2x2 - b2x1) * (b2y2 - b2y1)
-    
-    # intersections
-    min_x_of_max_x, max_x_of_min_x = torch.min(b1x2, b2x2), torch.max(b1x1, b2x1)
-    min_y_of_max_y, max_y_of_min_y = torch.min(b1y2, b2y2), torch.max(b1y1, b2y1)
-    intersection_width = (min_x_of_max_x - max_x_of_min_x).clamp(min=0)
-    intersection_height = (min_y_of_max_y - max_y_of_min_y).clamp(min=0)
-    intersections = intersection_width * intersection_height
-    
-    # unions        
-    unions = (areas1 + areas2) - intersections
-    
-    result = intersections / unions    
-    return result
-
-##
-# @brief find best iou and its index
-def find_best_ratio(anchors, bbox):    
-    """
-    @param anchors shape(numb_of_anchors, 2), it contains w, h
-    @param bbox shape(numb_of_bbox, 2), it contains w, h
-    @return best_match index of best match, shape(numb_of_bbox, 1)
-    """
-    b1 = np.divide(anchors[:, 0], anchors[:, 1])
-    b2 = np.divide(bbox[:, 0], bbox[:, 1])
-    similarities = np.abs(b1.reshape(-1, 1) - b2)
-    best_match = np.argmin(similarities, axis=0)
-    return best_match
-
-##
-# @brief loss class for yolo
-class YoloV2_LOSS(nn.Module):
-    """Yolo v2 loss"""
-    def __init__(self, num_classes, img_shape = (416, 416), outsize = (13, 13)):
-        super().__init__()
-        self.num_classes = num_classes
-        self.img_shape = img_shape
-        self.outsize = outsize
-        self.hook = dict()
-        
-        self.anchors = torch.FloatTensor(
-            [(1.3221, 1.73145),
-            (3.19275, 4.00944),
-            (5.05587, 8.09892),
-            (9.47112, 4.84053),
-            (11.2364, 10.0071)]
-        )
-                
-        self.mse = nn.MSELoss()
-        self.bbox_loss, self.iou_loss, self.cls_loss = None, None, None
-    
-    ##
-    # @brief function to track gradients of non-leaf varibles.    
-    def hook_variable(self, name, var):
-        """ Do not use this function when training. It is for debugging. """
-        self.hook[name] = var
-        self.hook[name].requires_grad_().retain_grad()
-
-    ##
-    # @brief function to print gradients of non-leaf varibles.
-    def print_hook_variables(self):
-        """ Do not use this function when training. It is for debugging. """
-        for k, var in self.hook.items():
-            print("gradients of variable {}:".format(k))
-            batch, channel, height, width = var.grad.shape
-            for b in range(batch):
-                for c in range(channel):
-                    for h in range(height):
-                        for w in range(width):
-                            if torch.abs(var.grad[b, c, h, w]).item() >= 1e-3:
-                                print("(b: {}, c: {}, h: {}, w: {}) = {}"\
-                                      .format(b, c, h, w, var.grad[b, c, h, w]))
-            print("=" * 20)
-        
-    def forward(self, bbox_pred, iou_pred, prob_pred, bbox_gt, cls_gt):        
-        """
-        @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
-        @param iou_pred shape(batch_size, cell_h x cell_w, 1)
-        @param prob_pred shape(batch_size, cell_h x cell_w, num_anchors, num_classes)
-        @param bbox_gt shape(batch_size, num_bbox, 4), data range(0~1)
-        @param cls_gt shape(batch_size, num_bbox, 1)
-        @return loss shape(1,)
-        """
-        self.hook_variable("bbox_pred", bbox_pred)
-        bbox_pred = self.apply_anchors_to_bbox(bbox_pred)
-
-        bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask =\
-            self._build_target(bbox_pred, bbox_gt, cls_gt)
-        
-        self.bbox_loss = self.mse(bbox_pred * bbox_mask,
-                                        bbox_built * bbox_mask)
-        self.iou_loss = self.mse(iou_pred * iou_mask,
-                                       iou_built * iou_mask)
-        self.cls_loss = self.mse(prob_pred * cls_mask,
-                                       cls_built * cls_mask)
-        
-        return self.bbox_loss * 5 + self.iou_loss + self.cls_loss
-        
-    def apply_anchors_to_bbox(self, bbox_pred):
-        """
-        @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
-        @return bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)    
-        """
-        anchor_w = self.anchors[:, 0].contiguous().view(-1, 1)
-        anchor_h = self.anchors[:, 1].contiguous().view(-1, 1)
-        bbox_pred_tmp = bbox_pred.clone()
-        bbox_pred_tmp[:, :, :, 2:3] = torch.sqrt(bbox_pred[:, :, :, 2:3] * anchor_w)
-        bbox_pred_tmp[:, :, :, 3:4] = torch.sqrt(bbox_pred[:, :, :, 3:4] * anchor_h)
-        return bbox_pred_tmp
-    
-    def _build_target(self, bbox_pred, bbox_gt, cls_gt):
-        """
-        @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
-        @param bbox_gt shape(batch_size, num_bbox, 4)
-        @param cls_gt shape(batch_size, num_bbox, 1)
-        @return tuple of (bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask)
-        """    
-        bbox_built, bbox_mask = [], []
-        iou_built, iou_mask = [], []
-        cls_built, cls_mask = [], []
-        
-        batch_size = bbox_pred.shape[0]
-                
-        for i in range(batch_size):
-            _bbox_built, _iou_built, _cls_built,\
-                _bbox_mask, _iou_mask, _cls_mask =\
-                    self._make_target_per_sample(
-                        torch.FloatTensor(bbox_pred[i]),
-                        torch.FloatTensor(np.array(bbox_gt[i])),
-                        torch.LongTensor(cls_gt[i])
-                    )
-            
-            bbox_built.append(_bbox_built)
-            bbox_mask.append(_bbox_mask)
-            iou_built.append(_iou_built)
-            iou_mask.append(_iou_mask)
-            cls_built.append(_cls_built)
-            cls_mask.append(_cls_mask)
-
-        bbox_built = torch.stack(bbox_built)
-        bbox_mask = torch.stack(bbox_mask)
-        iou_built = torch.stack(iou_built)
-        iou_mask = torch.stack(iou_mask)
-        cls_built = torch.stack(cls_built)
-        cls_mask = torch.stack(cls_mask)
-                    
-        return bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask
-        
-    def _make_target_per_sample(self, _bbox_pred, _bbox_gt, _cls_gt):
-        """
-        @param _bbox_pred shape(cell_h x cell_w, num_anchors, 4)
-        @param _bbox_gt shape(num_bbox, 4)
-        @param _cls_gt shape(num_bbox,)
-        @return tuple of (_bbox_built, _iou_built, _cls_built, _bbox_mask, _iou_mask, _cls_mask)
-        """
-        hw, num_anchors, _  = _bbox_pred.shape
-        
-        # set result template
-        _bbox_built = torch.zeros((hw, num_anchors, 4))
-        _bbox_mask = torch.zeros((hw, num_anchors, 1))
-        
-        _iou_built = torch.zeros((hw, num_anchors, 1))
-        _iou_mask = torch.ones((hw, num_anchors, 1)) * 0.5
-        
-        _cls_built = torch.zeros((hw, num_anchors, self.num_classes))
-        _cls_mask = torch.zeros((hw, num_anchors, 1))
-                        
-        # find best anchors
-        _bbox_gt_wh = _bbox_gt.clone()[:, 2:]        
-        best_anchors = find_best_ratio(self.anchors, _bbox_gt_wh)
-        
-        # normalize x, y pos based on cell coornindates
-        cx = _bbox_gt[:, 0] * self.outsize[0]
-        cy = _bbox_gt[:, 1] * self.outsize[1]
-        # calculate cell pos and normalize x, y
-        cell_idx = np.floor(cy) * self.outsize[0] + np.floor(cx)
-        cell_idx = np.array(cell_idx, dtype=np.int16)
-        cx -= np.floor(cx)
-        cy -= np.floor(cy)
-                
-        # set bbox of gt
-        _bbox_built[cell_idx, best_anchors, 0] = cx 
-        _bbox_built[cell_idx, best_anchors, 1] = cy
-        _bbox_built[cell_idx, best_anchors, 2] = torch.sqrt(_bbox_gt[:, 2]) 
-        _bbox_built[cell_idx, best_anchors, 3] = torch.sqrt(_bbox_gt[:, 3]) 
-        _bbox_mask[cell_idx, best_anchors, :] = 1
-        
-        # set cls of gt       
-        _cls_built[cell_idx, best_anchors, _cls_gt] = 1
-        _cls_mask[cell_idx, best_anchors, :] = 1
-        
-        # set confidence score of gt
-        _iou_built = calculate_iou(_bbox_pred.reshape(-1, 4), _bbox_built.view(-1, 4)).detach()
-        _iou_built = _iou_built.view(hw, num_anchors, 1)
-        _iou_mask[cell_idx, best_anchors, :] = 1
-        
-        return _bbox_built, _iou_built, _cls_built,\
-                _bbox_mask, _iou_mask, _cls_mask  
diff --git a/Applications/YOLO/jni/Android.mk b/Applications/YOLO/jni/Android.mk
deleted file mode 100644 (file)
index 9f0dfb7..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-
-# ndk path
-ifndef ANDROID_NDK
-$(error ANDROID_NDK is not defined!)
-endif
-
-ifndef NNTRAINER_ROOT
-NNTRAINER_ROOT := $(LOCAL_PATH)/../../..
-endif
-
-ML_API_COMMON_INCLUDES := ${NNTRAINER_ROOT}/ml_api_common/include
-NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
-       $(NNTRAINER_ROOT)/nntrainer/dataset \
-       $(NNTRAINER_ROOT)/nntrainer/models \
-       $(NNTRAINER_ROOT)/nntrainer/layers \
-       $(NNTRAINER_ROOT)/nntrainer/compiler \
-       $(NNTRAINER_ROOT)/nntrainer/graph \
-       $(NNTRAINER_ROOT)/nntrainer/optimizers \
-       $(NNTRAINER_ROOT)/nntrainer/tensor \
-       $(NNTRAINER_ROOT)/nntrainer/utils \
-       $(NNTRAINER_ROOT)/api \
-       $(NNTRAINER_ROOT)/api/ccapi/include \
-       ${ML_API_COMMON_INCLUDES}
-
-LOCAL_MODULE := nntrainer
-LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/libs/$(TARGET_ARCH_ABI)/libnntrainer.so
-
-include $(PREBUILT_SHARED_LIBRARY)
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := ccapi-nntrainer
-LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/libs/$(TARGET_ARCH_ABI)/libccapi-nntrainer.so
-
-include $(PREBUILT_SHARED_LIBRARY)
-
-include $(CLEAR_VARS)
-
-LOCAL_ARM_NEON := true
-LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
-LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/
-LOCAL_CXXFLAGS += -std=c++17 -frtti
-LOCAL_CFLAGS += -pthread -fexceptions -fopenmp
-LOCAL_LDFLAGS += -fexceptions
-LOCAL_MODULE_TAGS := optional
-LOCAL_ARM_MODE := arm
-LOCAL_MODULE := nntrainer_yolo
-LOCAL_LDLIBS := -llog -landroid -fopenmp
-
-LOCAL_SRC_FILES := main.cpp det_dataloader.cpp yolo_v2_loss.cpp reorg_layer.cpp
-LOCAL_SHARED_LIBRARIES := nntrainer ccapi-nntrainer
-
-LOCAL_C_INCLUDES += $(NNTRAINER_INCLUDES)
-
-include $(BUILD_EXECUTABLE)
diff --git a/Applications/YOLO/jni/Application.mk b/Applications/YOLO/jni/Application.mk
deleted file mode 100644 (file)
index 659caaf..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-APP_ABI := arm64-v8a
-APP_STL := c++_shared
-APP_PLATFORM := android-29
diff --git a/Applications/YOLO/jni/det_dataloader.cpp b/Applications/YOLO/jni/det_dataloader.cpp
deleted file mode 100644 (file)
index b48d0da..0000000
+++ /dev/null
@@ -1,157 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file   det_dataloader.h
- * @date   22 March 2023
- * @brief  dataloader for object detection dataset
- * @see    https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @author Jijoong Moon <jijoong.moon@samsung.com>
- * @bug    No known bugs except for NYI items
- */
-
-#include "det_dataloader.h"
-
-#include <cstring>
-#include <filesystem>
-#include <fstream>
-#include <nntrainer_error.h>
-#include <random>
-
-namespace nntrainer::util {
-
-// It supports bmp image file only now.
-DirDataLoader::DirDataLoader(const char *directory_, unsigned int max_num_label,
-                             unsigned int c, unsigned int w, unsigned int h,
-                             bool is_train_) :
-  max_num_label(max_num_label),
-  channel(c),
-  height(h),
-  width(w),
-  is_train(is_train_) {
-  dir_path.assign(directory_);
-
-  // set data list
-  std::filesystem::directory_iterator itr(dir_path + "images");
-  while (itr != std::filesystem::end(itr)) {
-    // get image file name
-    std::string img_file = itr->path().string();
-
-    // check if it is bmp image file
-    if (img_file.find(".bmp") == std::string::npos) {
-      itr++;
-      continue;
-    }
-
-    // set label file name
-    std::string label_file = img_file;
-    label_file.replace(label_file.find(".bmp"), 4, ".txt");
-    label_file.replace(label_file.find("/images"), 7, "/annotations");
-
-    // check if there is paired label file
-    if (!std::filesystem::exists(label_file)) {
-      itr++;
-      continue;
-    }
-
-    // set data list
-    data_list.push_back(make_pair(img_file, label_file));
-    itr++;
-  }
-
-  // set index and shuffle data
-  idxes = std::vector<unsigned int>(data_list.size());
-  std::iota(idxes.begin(), idxes.end(), 0);
-  if (is_train)
-    std::shuffle(idxes.begin(), idxes.end(), rng);
-
-  data_size = data_list.size();
-  count = 0;
-}
-
-void read_image(const std::string path, float *input, uint &width,
-                uint &height) {
-  FILE *f = fopen(path.c_str(), "rb");
-
-  if (f == nullptr)
-    throw std::invalid_argument("Cannot open file: " + path);
-
-  unsigned char info[54];
-  size_t s = fread(info, sizeof(unsigned char), 54, f);
-
-  unsigned int w = *(int *)&info[18];
-  unsigned int h = *(int *)&info[22];
-
-  if (w != width or h != height) {
-    fclose(f);
-    throw std::invalid_argument("the dimension of image file does not match" +
-                                std::to_string(s));
-  }
-
-  int row_padded = (width * 3 + 3) & (~3);
-  unsigned char *data = new unsigned char[row_padded];
-
-  for (uint i = 0; i < height; i++) {
-    s = fread(data, sizeof(unsigned char), row_padded, f);
-    for (uint j = 0; j < width; j++) {
-      input[height * (height - i - 1) + j] = (float)data[j * 3 + 2] / 255;
-      input[(height * width) + height * (height - i - 1) + j] =
-        (float)data[j * 3 + 1] / 255;
-      input[(height * width) * 2 + height * (height - i - 1) + j] =
-        (float)data[j * 3] / 255;
-    }
-  }
-
-  delete[] data;
-  fclose(f);
-}
-
-void DirDataLoader::next(float **input, float **label, bool *last) {
-  auto fill_one_sample = [this](float *input_, float *label_, int index) {
-    // set input data
-    std::string img_file = data_list[index].first;
-    read_image(img_file, input_, width, height);
-
-    // set label data
-    std::string label_file = data_list[index].second;
-    std::memset(label_, 0.0, 5 * sizeof(float) * max_num_label);
-
-    std::ifstream file(label_file);
-    std::string cur_line;
-
-    int line_idx = 0;
-    while (getline(file, cur_line)) {
-      std::stringstream ss(cur_line);
-      std::string cur_value;
-
-      int row_idx = 0;
-      while (getline(ss, cur_value, ' ')) {
-        if (row_idx == 0) {
-          label_[line_idx * 5 + 4] = std::stof(cur_value);
-        } else {
-          label_[line_idx * 5 + row_idx - 1] = std::stof(cur_value) / 416;
-        }
-        row_idx++;
-      }
-
-      line_idx++;
-    }
-
-    file.close();
-  };
-
-  fill_one_sample(*input, *label, idxes[count]);
-
-  count++;
-
-  if (count < data_size) {
-    *last = false;
-  } else {
-    *last = true;
-    count = 0;
-    std::shuffle(idxes.begin(), idxes.end(), rng);
-  }
-}
-
-} // namespace nntrainer::util
diff --git a/Applications/YOLO/jni/det_dataloader.h b/Applications/YOLO/jni/det_dataloader.h
deleted file mode 100644 (file)
index 468148d..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file   det_dataloader.h
- * @date   22 March 2023
- * @brief  dataloader for object detection dataset
- * @see    https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @author Jijoong Moon <jijoong.moon@samsung.com>
- * @bug    No known bugs except for NYI items
- */
-
-#include <random>
-#include <string>
-#include <tensor_dim.h>
-#include <vector>
-
-namespace nntrainer::util {
-
-using TensorDim = ml::train::TensorDim;
-
-/**
- * @brief user data object
- */
-class DirDataLoader {
-public:
-  /**
-   * @brief Construct a new Dir Data Loader object
-   */
-  DirDataLoader(const char *directory_, unsigned int max_num_label,
-                unsigned int c, unsigned int w, unsigned int h, bool is_train_);
-  /**
-   * @brief Destroy the Dir Data Loader object
-   */
-  ~DirDataLoader(){};
-
-  /**
-   * @copydoc void DataLoader::next(float **input, float**label, bool *last)
-   */
-  void next(float **input, float **label, bool *last);
-
-  /**
-   * @brief getter for current file name
-   * @return current file name
-   */
-  std::string getCurFileName() { return cur_file_name; };
-
-  /**
-   * @brief setter for current file name
-   */
-  void setCurFileName(std::string s) { cur_file_name = s; };
-
-private:
-  std::string dir_path;
-  unsigned int data_size;
-  unsigned int max_num_label;
-  unsigned int channel;
-  unsigned int height;
-  unsigned int width;
-  bool is_train;
-
-  std::vector<std::pair<std::string, std::string>> data_list;
-  std::vector<unsigned int> idxes;
-  unsigned int count;
-  std::string cur_file_name;
-
-  // random number generator
-  std::mt19937 rng;
-};
-
-} // namespace nntrainer::util
diff --git a/Applications/YOLO/jni/main.cpp b/Applications/YOLO/jni/main.cpp
deleted file mode 100644 (file)
index bc3985a..0000000
+++ /dev/null
@@ -1,333 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file   main.cpp
- * @date   03 March 2023
- * @todo   replace backbone to original darknet of yolo v2
- * @brief  application example for YOLO v2
- * @see    https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @bug    No known bugs except for NYI items
- */
-
-#include <array>
-#include <chrono>
-#include <ctime>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include <vector>
-
-#include <app_context.h>
-#include <det_dataloader.h>
-#include <layer.h>
-#include <model.h>
-#include <optimizer.h>
-
-#include "yolo_v2_loss.h"
-
-#include <reorg_layer.h>
-
-using LayerHandle = std::shared_ptr<ml::train::Layer>;
-using ModelHandle = std::unique_ptr<ml::train::Model>;
-using UserDataType = std::unique_ptr<nntrainer::util::DirDataLoader>;
-
-const unsigned int ANCHOR_NUMBER = 5;
-
-const unsigned int MAX_OBJECT_NUMBER = 4;
-const unsigned int CLASS_NUMBER = 4;
-const unsigned int GRID_HEIGHT_NUMBER = 13;
-const unsigned int GRID_WIDTH_NUMBER = 13;
-const unsigned int IMAGE_HEIGHT_SIZE = 416;
-const unsigned int IMAGE_WIDTH_SIZE = 416;
-const unsigned int BATCH_SIZE = 4;
-const unsigned int EPOCHS = 3;
-const char *TRAIN_DIR_PATH = "/TRAIN_DIR/";
-const char *VALIDATION_DIR_PATH = "/VALID_DIR/";
-// const std::string MODEL_INIT_BIN_PATH = "/home/user/MODEL_INIT_BIN_PATH.bin";
-
-int trainData_cb(float **input, float **label, bool *last, void *user_data) {
-  auto data = reinterpret_cast<nntrainer::util::DirDataLoader *>(user_data);
-
-  data->next(input, label, last);
-  return 0;
-}
-
-int validData_cb(float **input, float **label, bool *last, void *user_data) {
-  auto data = reinterpret_cast<nntrainer::util::DirDataLoader *>(user_data);
-
-  data->next(input, label, last);
-  return 0;
-}
-
-std::array<UserDataType, 2> createDetDataGenerator(const char *train_dir,
-                                                   const char *valid_dir,
-                                                   int max_num_label, int c,
-                                                   int h, int w) {
-  UserDataType train_data(new nntrainer::util::DirDataLoader(
-    train_dir, max_num_label, c, h, w, true));
-  UserDataType valid_data(new nntrainer::util::DirDataLoader(
-    valid_dir, max_num_label, c, h, w, false));
-
-  return {std::move(train_data), std::move(valid_data)};
-}
-
-/**
- * @brief make "key=value" from key and value
- *
- * @tparam T type of a value
- * @param key key
- * @param value value
- * @return std::string with "key=value"
- */
-template <typename T>
-static std::string withKey(const std::string &key, const T &value) {
-  std::stringstream ss;
-  ss << key << "=" << value;
-  return ss.str();
-}
-
-template <typename T>
-static std::string withKey(const std::string &key,
-                           std::initializer_list<T> value) {
-  if (std::empty(value)) {
-    throw std::invalid_argument("empty data cannot be converted");
-  }
-
-  std::stringstream ss;
-  ss << key << "=";
-
-  auto iter = value.begin();
-  for (; iter != value.end() - 1; ++iter) {
-    ss << *iter << ',';
-  }
-  ss << *iter;
-
-  return ss.str();
-}
-
-/**
- * @brief yolo block
- *
- * @param block_name name of the block
- * @param input_name name of the input
- * @param filters number of filters
- * @param kernel_size number of kernel_size
- * @param downsample downsample to make output size 0
- * @return std::vector<LayerHandle> vectors of layers
- */
-std::vector<LayerHandle> yoloBlock(const std::string &block_name,
-                                   const std::string &input_name, int filters,
-                                   int kernel_size, bool downsample) {
-  using ml::train::createLayer;
-
-  auto scoped_name = [&block_name](const std::string &layer_name) {
-    return block_name + "/" + layer_name;
-  };
-  auto with_name = [&scoped_name](const std::string &layer_name) {
-    return withKey("name", scoped_name(layer_name));
-  };
-
-  auto createConv = [&with_name, filters](const std::string &name,
-                                          int kernel_size, int stride,
-                                          const std::string &padding,
-                                          const std::string &input_layer) {
-    std::vector<std::string> props{
-      with_name(name),
-      withKey("stride", {stride, stride}),
-      withKey("filters", filters),
-      withKey("kernel_size", {kernel_size, kernel_size}),
-      withKey("padding", padding),
-      withKey("input_layers", input_layer)};
-
-    return createLayer("conv2d", props);
-  };
-
-  /** construct basic layer **/
-  LayerHandle a1 = createConv("a1", kernel_size, 1, "same", input_name);
-
-  if (downsample) {
-    LayerHandle a2 = createLayer("batch_normalization",
-                                 {with_name("a2"), withKey("momentum", "0.9"),
-                                  withKey("activation", "leaky_relu")});
-
-    LayerHandle a3 = createLayer(
-      "pooling2d", {withKey("name", block_name), withKey("stride", {2, 2}),
-                    withKey("pooling", "max"), withKey("pool_size", {2, 2})});
-
-    return {a1, a2, a3};
-  } else {
-    LayerHandle a2 =
-      createLayer("batch_normalization",
-                  {withKey("name", block_name), withKey("momentum", "0.9"),
-                   withKey("activation", "leaky_relu")});
-
-    return {a1, a2};
-  }
-}
-
-/**
- * @brief Create yolo v2 light
- *
- * @return vector of layers that contain full graph of yolo v2 light
- */
-ModelHandle YOLO() {
-  using ml::train::createLayer;
-
-  ModelHandle model = ml::train::createModel(ml::train::ModelType::NEURAL_NET);
-
-  std::vector<LayerHandle> layers;
-
-  layers.push_back(createLayer(
-    "input",
-    {withKey("name", "input0"),
-     withKey("input_shape", "3:" + std::to_string(IMAGE_HEIGHT_SIZE) + ":" +
-                              std::to_string(IMAGE_WIDTH_SIZE))}));
-
-  std::vector<std::vector<LayerHandle>> blocks;
-
-  blocks.push_back(yoloBlock("conv1", "input0", 32, 3, true));
-  blocks.push_back(yoloBlock("conv2", "conv1", 64, 3, true));
-  blocks.push_back(yoloBlock("conv3", "conv2", 128, 3, false));
-  blocks.push_back(yoloBlock("conv4", "conv3", 64, 1, false));
-  blocks.push_back(yoloBlock("conv5", "conv4", 128, 3, true));
-  blocks.push_back(yoloBlock("conv6", "conv5", 256, 3, false));
-  blocks.push_back(yoloBlock("conv7", "conv6", 128, 1, false));
-  blocks.push_back(yoloBlock("conv8", "conv7", 256, 3, true));
-  blocks.push_back(yoloBlock("conv9", "conv8", 512, 3, false));
-  blocks.push_back(yoloBlock("conv10", "conv9", 256, 1, false));
-  blocks.push_back(yoloBlock("conv11", "conv10", 512, 3, false));
-  blocks.push_back(yoloBlock("conv12", "conv11", 256, 1, false));
-  blocks.push_back(yoloBlock("conv13", "conv12", 512, 3, false));
-
-  blocks.push_back({createLayer(
-    "pooling2d", {withKey("name", "conv_a_pool"), withKey("stride", {2, 2}),
-                  withKey("pooling", "max"), withKey("pool_size", {2, 2}),
-                  withKey("input_layers", "conv13")})});
-  blocks.push_back(yoloBlock("conv_a1", "conv_a_pool", 1024, 3, false));
-  blocks.push_back(yoloBlock("conv_a2", "conv_a1", 512, 1, false));
-  blocks.push_back(yoloBlock("conv_a3", "conv_a2", 1024, 3, false));
-  blocks.push_back(yoloBlock("conv_a4", "conv_a3", 512, 1, false));
-  blocks.push_back(yoloBlock("conv_a5", "conv_a4", 1024, 3, false));
-  blocks.push_back(yoloBlock("conv_a6", "conv_a5", 1024, 3, false));
-  blocks.push_back(yoloBlock("conv_a7", "conv_a6", 1024, 3, false));
-
-  blocks.push_back(yoloBlock("conv_b", "conv13", 64, 1, false));
-
-  blocks.push_back(
-    {createLayer("reorg_layer", {withKey("name", "re_organization"),
-                                 withKey("input_layers", "conv_b")})});
-
-  blocks.push_back(
-    {createLayer("concat", {withKey("name", "concat"),
-                            withKey("input_layers", "conv_a7, re_organization"),
-                            withKey("axis", 1)})});
-
-  blocks.push_back(yoloBlock("conv_out1", "concat", 1024, 3, false));
-
-  blocks.push_back(
-    {createLayer("conv2d", {
-                             withKey("name", "conv_out2"),
-                             withKey("filters", 5 * (5 + CLASS_NUMBER)),
-                             withKey("kernel_size", {1, 1}),
-                             withKey("stride", {1, 1}),
-                             withKey("padding", "same"),
-                             withKey("input_layers", "conv_out1"),
-                           })});
-
-  for (auto &block : blocks) {
-    layers.insert(layers.end(), block.begin(), block.end());
-  }
-
-  layers.push_back(createLayer("permute", {
-                                            withKey("name", "permute"),
-                                            withKey("direction", {2, 3, 1}),
-                                          }));
-
-  layers.push_back(createLayer(
-    "reshape",
-    {
-      withKey("name", "reshape"),
-      withKey("target_shape",
-              std::to_string(GRID_HEIGHT_NUMBER * GRID_WIDTH_NUMBER) + ":" +
-                std::to_string(ANCHOR_NUMBER) + ":" +
-                std::to_string(5 + CLASS_NUMBER)),
-    }));
-
-  layers.push_back(createLayer(
-    "yolo_v2_loss", {
-                      withKey("name", "yolo_v2_loss"),
-                      withKey("max_object_number", MAX_OBJECT_NUMBER),
-                      withKey("class_number", CLASS_NUMBER),
-                      withKey("grid_height_number", GRID_HEIGHT_NUMBER),
-                      withKey("grid_width_number", GRID_WIDTH_NUMBER),
-                    }));
-
-  for (auto &layer : layers) {
-    model->addLayer(layer);
-  }
-
-  return model;
-}
-
-int main(int argc, char *argv[]) {
-  // print start time
-  auto start = std::chrono::system_clock::now();
-  std::time_t start_time = std::chrono::system_clock::to_time_t(start);
-  std::cout << "started computation at " << std::ctime(&start_time)
-            << std::endl;
-
-  // set training config and print it
-  std::cout << "batch_size: " << BATCH_SIZE << " epochs: " << EPOCHS
-            << std::endl;
-
-  try {
-    // create YOLO v2 model
-    ModelHandle model = YOLO();
-    model->setProperty({withKey("batch_size", BATCH_SIZE),
-                        withKey("epochs", EPOCHS),
-                        withKey("save_path", "yolov2.bin")});
-
-    // create optimizer
-    auto optimizer = ml::train::createOptimizer(
-      "adam", {"learning_rate=0.001", "epsilon=1e-8", "torch_ref=true"});
-    model->setOptimizer(std::move(optimizer));
-
-    // compile and initialize model
-    model->compile();
-    model->initialize();
-    model->save("./yolov2.ini", ml::train::ModelFormat::MODEL_FORMAT_INI);
-    // model->load(MODEL_INIT_BIN_PATH);
-
-    // create train and validation data
-    std::array<UserDataType, 2> user_datas;
-    user_datas = createDetDataGenerator(TRAIN_DIR_PATH, VALIDATION_DIR_PATH,
-                                        MAX_OBJECT_NUMBER, 3, IMAGE_HEIGHT_SIZE,
-                                        IMAGE_WIDTH_SIZE);
-    auto &[train_user_data, valid_user_data] = user_datas;
-
-    auto dataset_train = ml::train::createDataset(
-      ml::train::DatasetType::GENERATOR, trainData_cb, train_user_data.get());
-    auto dataset_valid = ml::train::createDataset(
-      ml::train::DatasetType::GENERATOR, validData_cb, valid_user_data.get());
-
-    model->setDataset(ml::train::DatasetModeType::MODE_TRAIN,
-                      std::move(dataset_train));
-    model->setDataset(ml::train::DatasetModeType::MODE_VALID,
-                      std::move(dataset_valid));
-
-    model->train();
-  } catch (const std::exception &e) {
-    std::cerr << "uncaught error while running! details: " << e.what()
-              << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  // print end time and duration
-  auto end = std::chrono::system_clock::now();
-  std::chrono::duration<double> elapsed_seconds = end - start;
-  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
-  std::cout << "finished computation at " << std::ctime(&end_time)
-            << "elapsed time: " << elapsed_seconds.count() << "s\n";
-}
diff --git a/Applications/YOLO/jni/meson.build b/Applications/YOLO/jni/meson.build
deleted file mode 100644 (file)
index 310b08d..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-# build command for lib_yolov2_loss_layer.so
-yolov2_loss_src = files('yolo_v2_loss.cpp')
-yolov2_loss_layer = shared_library('yolov2_loss_layer',
-  yolov2_loss_src,
-  dependencies: [nntrainer_dep, nntrainer_ccapi_dep],
-  include_directories: include_directories('./'),
-  install: true,
-  install_dir: nntrainer_libdir/'nntrainer'/'layers',
-  cpp_args: '-DPLUGGABLE'
-)
-yolov2_loss_layer_dep = declare_dependency(
-  link_with: yolov2_loss_layer,
-  include_directories: include_directories('./')
-)
-
-# build command for lib_reorg_layer.so
-layer_reorg_src = files('reorg_layer.cpp')
-reorg_layer = shared_library('reorg_layer',
-  layer_reorg_src,
-  dependencies: [nntrainer_dep, nntrainer_ccapi_dep],
-  include_directories: include_directories('./'),
-  install: true,
-  install_dir: nntrainer_libdir/'nntrainer'/'layers',
-  cpp_args: '-DPLUGGABLE'
-)
-reorg_layer_dep = declare_dependency(
-  link_with: reorg_layer,
-  include_directories: include_directories('./')
-)
-
-yolo_sources = [
-  'main.cpp',
-  'det_dataloader.cpp',
-  'yolo_v2_loss.cpp',
-  'reorg_layer.cpp',
-]
-
-yolo_dependencies = [app_utils_dep,
-  nntrainer_dep,
-  nntrainer_ccapi_dep,
-  yolov2_loss_layer_dep,
-  reorg_layer_dep
-]
-
-e = executable('nntrainer_yolov2',
-  yolo_sources,
-  include_directories: [include_directories('.')],
-  dependencies: yolo_dependencies,
-  install: get_option('install-app'),
-  install_dir: application_install_dir
-)
diff --git a/Applications/YOLO/jni/reorg_layer.cpp b/Applications/YOLO/jni/reorg_layer.cpp
deleted file mode 100644 (file)
index e05be1e..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file   reorganization.cpp
- * @date   06 April 2023
- * @todo support in-place operation. we can get channel, height, width
- * coordinate from index of buffer memory. then we can use reorganizePos and
- * restorePos func
- * @brief  This file contains the mean absolute error loss as a sample layer
- * @see    https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @bug    No known bugs except for NYI items
- */
-
-#include <iostream>
-
-#include "reorg_layer.h"
-
-namespace custom {
-
-static constexpr size_t SINGLE_INOUT_IDX = 0;
-
-namespace ReorgOp {
-
-/**
- * @brief re-organize tensor
- * @return output coordinate of reorganized tensor
- */
-int reorg(int b, int c, int h, int w, int batch, int channel, int height,
-          int width) {
-  int out_c = channel / 4;
-  int c2 = c % out_c;
-  int offset = c / out_c;
-  int w2 = w * 2 + offset % 2;
-  int h2 = h * 2 + offset / 2;
-  int out_index = w2 + width * 2 * (h2 + height * 2 * (c2 + out_c * b));
-  return out_index;
-}
-} // namespace ReorgOp
-
-void ReorgLayer::finalize(nntrainer::InitLayerContext &context) {
-  std::vector<nntrainer::TensorDim> dim = context.getInputDimensions();
-
-  for (unsigned int i = 0; i < dim.size(); ++i) {
-    if (dim[i].getDataLen() == 0) {
-      throw std::invalid_argument("Input dimension is not set");
-    } else {
-      dim[i].channel(dim[i].channel() * 4);
-      dim[i].height(dim[i].height() / 2);
-      dim[i].width(dim[i].width() / 2);
-    }
-  }
-
-  context.setOutputDimensions(dim);
-}
-
-void ReorgLayer::forwarding(nntrainer::RunLayerContext &context,
-                            bool training) {
-  nntrainer::Tensor &in = context.getInput(SINGLE_INOUT_IDX);
-  nntrainer::Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
-
-  for (int b = 0; b < (int)in.batch(); b++) {
-    for (int c = 0; c < (int)in.channel(); c++) {
-      for (int h = 0; h < (int)in.height(); h++) {
-        for (int w = 0; w < (int)in.width(); w++) {
-          int out_idx =
-            w + in.width() * (h + in.height() * (c + in.channel() * b));
-          int in_idx = ReorgOp::reorg(b, c, h, w, in.batch(), in.channel(),
-                                      in.height(), in.width());
-          out.getData()[out_idx] = in.getValue(in_idx);
-        }
-      }
-    }
-  }
-}
-
-void ReorgLayer::calcDerivative(nntrainer::RunLayerContext &context) {
-  const nntrainer::Tensor &derivative_ =
-    context.getIncomingDerivative(SINGLE_INOUT_IDX);
-
-  nntrainer::Tensor &dx = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
-
-  for (int b = 0; b < (int)derivative_.batch(); b++) {
-    for (int c = 0; c < (int)derivative_.channel(); c++) {
-      for (int h = 0; h < (int)derivative_.height(); h++) {
-        for (int w = 0; w < (int)derivative_.width(); w++) {
-          int in_idx =
-            w + derivative_.width() *
-                  (h + derivative_.height() * (c + derivative_.channel() * b));
-          int out_idx = ReorgOp::reorg(
-            b, c, h, w, derivative_.batch(), derivative_.channel(),
-            derivative_.height(), derivative_.width());
-          dx.getData()[out_idx] = derivative_.getValue(in_idx);
-        }
-      }
-    }
-  }
-}
-
-#ifdef PLUGGABLE
-
-nntrainer::Layer *create_reorg_layer() {
-  auto layer = new ReorgLayer();
-  std::cout << "reorg created\n";
-  return layer;
-}
-
-void destroy_reorg_layer(nntrainer::Layer *layer) {
-  std::cout << "reorg deleted\n";
-  delete layer;
-}
-
-extern "C" {
-nntrainer::LayerPluggable ml_train_layer_pluggable{create_reorg_layer,
-                                                   destroy_reorg_layer};
-}
-
-#endif
-
-} // namespace custom
diff --git a/Applications/YOLO/jni/reorg_layer.h b/Applications/YOLO/jni/reorg_layer.h
deleted file mode 100644 (file)
index e13cc36..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file   reorganization.h
- * @date   4 April 2023
- * @brief  This file contains the mean absolute error loss as a sample layer
- * @see    https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @bug    No known bugs except for NYI items
- *
- */
-
-#ifndef __REORGANIZATION_LAYER_H__
-#define __REORGANIZATION_LAYER_H__
-
-#include <layer_context.h>
-#include <layer_devel.h>
-#include <node_exporter.h>
-#include <utility>
-
-namespace custom {
-
-/**
- * @brief A Re-orginazation layer for yolo v2.
- *
- */
-class ReorgLayer final : public nntrainer::Layer {
-public:
-  /**
-   * @brief Construct a new Reorg Layer object
-   *
-   */
-  ReorgLayer() : Layer() {}
-
-  /**
-   * @brief Destroy the Reorg Layer object
-   *
-   */
-  ~ReorgLayer() {}
-
-  /**
-   * @copydoc Layer::finalize(InitLayerContext &context)
-   */
-  void finalize(nntrainer::InitLayerContext &context) override;
-
-  /**
-   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
-   */
-  void forwarding(nntrainer::RunLayerContext &context, bool training) override;
-
-  /**
-   * @copydoc Layer::calcDerivative(RunLayerContext &context)
-   */
-  void calcDerivative(nntrainer::RunLayerContext &context) override;
-
-  /**
-   * @copydoc bool supportBackwarding() const
-   */
-  bool supportBackwarding() const override { return true; };
-
-  /**
-   * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
-   */
-  void exportTo(nntrainer::Exporter &exporter,
-                const ml::train::ExportMethods &method) const override{};
-
-  /**
-   * @copydoc Layer::getType()
-   */
-  const std::string getType() const override { return ReorgLayer::type; };
-
-  /**
-   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
-   */
-  void setProperty(const std::vector<std::string> &values) override{};
-
-  inline static const std::string type = "reorg_layer";
-};
-
-} // namespace custom
-
-#endif /* __REORGANIZATION_LAYER_H__ */
diff --git a/Applications/YOLO/jni/yolo_v2_loss.cpp b/Applications/YOLO/jni/yolo_v2_loss.cpp
deleted file mode 100644 (file)
index 8421dd2..0000000
+++ /dev/null
@@ -1,949 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
- *
- * @file   yolo_v2_loss.cpp
- * @date   07 March 2023
- * @brief  This file contains the yolo v2 loss layer
- * @see    https://github.com/nnstreamer/nntrainer
- * @author Hyeonseok Lee <hs89.lee@samsung.com>
- * @bug    No known bugs except for NYI items
- *
- */
-
-#include "yolo_v2_loss.h"
-#include <nntrainer_log.h>
-
-namespace custom {
-
-static constexpr size_t SINGLE_INOUT_IDX = 0;
-
-enum YoloV2LossParams {
-  bbox_x_pred,
-  bbox_y_pred,
-  bbox_w_pred,
-  bbox_h_pred,
-  confidence_pred,
-  class_pred,
-  bbox_w_pred_anchor,
-  bbox_h_pred_anchor,
-  bbox_x_gt,
-  bbox_y_gt,
-  bbox_w_gt,
-  bbox_h_gt,
-  confidence_gt,
-  class_gt,
-  bbox_class_mask,
-  iou_mask,
-  bbox1_width,
-  bbox1_height,
-  is_xy_min_max,
-  intersection_width,
-  intersection_height,
-  unions,
-};
-
-namespace props {
-MaxObjectNumber::MaxObjectNumber(const unsigned &value) { set(value); }
-ClassNumber::ClassNumber(const unsigned &value) { set(value); }
-GridHeightNumber::GridHeightNumber(const unsigned &value) { set(value); }
-GridWidthNumber::GridWidthNumber(const unsigned &value) { set(value); }
-} // namespace props
-
-/**
- * @brief mse
- *
- * @param pred prediction
- * @param ground_truth ground truth
- * @return float loss
- * @todo make loss behaves like acti_func
- */
-float mse(nntrainer::Tensor &pred, nntrainer::Tensor &ground_truth) {
-  nntrainer::Tensor residual;
-  pred.subtract(ground_truth, residual);
-
-  float l2norm = residual.l2norm();
-  l2norm *= l2norm / residual.size();
-
-  return l2norm;
-}
-
-/**
- * @brief backwarding of mse
- *
- * @param pred prediction
- * @param ground_truth ground truth
- * @param outgoing_derivative outgoing derivative
- */
-void msePrime(nntrainer::Tensor &pred, nntrainer::Tensor &ground_truth,
-              nntrainer::Tensor &outgoing_derivative) {
-  pred.subtract(ground_truth, outgoing_derivative);
-  float divider = ((float)pred.size()) / 2;
-  if (outgoing_derivative.divide_i(divider) != ML_ERROR_NONE) {
-    throw std::runtime_error(
-      "[YoloV2LossLayer::calcDerivative] Error when calculating loss");
-  }
-}
-
-/**
- * @brief calculate iou
- *
- * @param bbox1_x1 bbox1_x1
- * @param bbox1_y1 bbox1_y1
- * @param bbox1_w bbox1_w
- * @param bbox1_h bbox1_h
- * @param bbox2_x1 bbox2_x1
- * @param bbox2_y1 bbox2_y1
- * @param bbox2_w bbox2_w
- * @param bbox2_h bbox2_h
- * @param[out] bbox1_width bbox1 width
- * @param[out] bbox1_height bbox1 height
- * @param[out] is_xy_min_max For x1, y1 this value is 1 if x1 > x2, y1 > y2 and
- * for x2, y2 this is value is 1 if x2 < x1, y2 < y1. else 0.
- * @param[out] intersection_width intersection width
- * @param[out] intersection_height intersection height
- * @param[out] unions unions
- * @return nntrainer::Tensor iou
- */
-nntrainer::Tensor
-calc_iou(nntrainer::Tensor &bbox1_x1, nntrainer::Tensor &bbox1_y1,
-         nntrainer::Tensor &bbox1_w, nntrainer::Tensor &bbox1_h,
-         nntrainer::Tensor &bbox2_x1, nntrainer::Tensor &bbox2_y1,
-         nntrainer::Tensor &bbox2_w, nntrainer::Tensor &bbox2_h,
-         nntrainer::Tensor &bbox1_width, nntrainer::Tensor &bbox1_height,
-         nntrainer::Tensor &is_xy_min_max,
-         nntrainer::Tensor &intersection_width,
-         nntrainer::Tensor &intersection_height, nntrainer::Tensor &unions) {
-  nntrainer::Tensor bbox1_x2 = bbox1_x1.add(bbox1_w);
-  nntrainer::Tensor bbox1_y2 = bbox1_y1.add(bbox1_h);
-  nntrainer::Tensor bbox2_x2 = bbox2_x1.add(bbox2_w);
-  nntrainer::Tensor bbox2_y2 = bbox2_y1.add(bbox2_h);
-
-  bbox1_x2.subtract(bbox1_x1, bbox1_width);
-  bbox1_y2.subtract(bbox1_y1, bbox1_height);
-  nntrainer::Tensor bbox1 = bbox1_width.multiply(bbox1_height);
-
-  nntrainer::Tensor bbox2_width = bbox2_x2.subtract(bbox2_x1);
-  nntrainer::Tensor bbox2_height = bbox2_y2.subtract(bbox2_y1);
-  nntrainer::Tensor bbox2 = bbox2_width.multiply(bbox2_height);
-
-  auto min_func = [&](nntrainer::Tensor &bbox1_xy, nntrainer::Tensor &bbox2_xy,
-                      nntrainer::Tensor &intersection_xy) {
-    std::transform(bbox1_xy.getData(), bbox1_xy.getData() + bbox1_xy.size(),
-                   bbox2_xy.getData(), intersection_xy.getData(),
-                   [](float x1, float x2) { return std::min(x1, x2); });
-  };
-  auto max_func = [&](nntrainer::Tensor &bbox1_xy, nntrainer::Tensor &bbox2_xy,
-                      nntrainer::Tensor &intersection_xy) {
-    std::transform(bbox1_xy.getData(), bbox1_xy.getData() + bbox1_xy.size(),
-                   bbox2_xy.getData(), intersection_xy.getData(),
-                   [](float x1, float x2) { return std::max(x1, x2); });
-  };
-
-  nntrainer::Tensor intersection_x1(bbox1_x1.getDim());
-  nntrainer::Tensor intersection_x2(bbox1_x1.getDim());
-  nntrainer::Tensor intersection_y1(bbox1_y1.getDim());
-  nntrainer::Tensor intersection_y2(bbox1_y1.getDim());
-  max_func(bbox1_x1, bbox2_x1, intersection_x1);
-  min_func(bbox1_x2, bbox2_x2, intersection_x2);
-  max_func(bbox1_y1, bbox2_y1, intersection_y1);
-  min_func(bbox1_y2, bbox2_y2, intersection_y2);
-
-  auto is_min_max_func = [&](nntrainer::Tensor &xy,
-                             nntrainer::Tensor &intersection,
-                             nntrainer::Tensor &is_min_max) {
-    std::transform(xy.getData(), xy.getData() + xy.size(),
-                   intersection.getData(), is_min_max.getData(),
-                   [](float x, float m) {
-                     return nntrainer::absFloat(x - m) < 1e-4 ? 1.0 : 0.0;
-                   });
-  };
-
-  nntrainer::Tensor is_bbox1_x1_max(bbox1_x1.getDim());
-  nntrainer::Tensor is_bbox1_y1_max(bbox1_x1.getDim());
-  nntrainer::Tensor is_bbox1_x2_min(bbox1_x1.getDim());
-  nntrainer::Tensor is_bbox1_y2_min(bbox1_x1.getDim());
-  is_min_max_func(bbox1_x1, intersection_x1, is_bbox1_x1_max);
-  is_min_max_func(bbox1_y1, intersection_y1, is_bbox1_y1_max);
-  is_min_max_func(bbox1_x2, intersection_x2, is_bbox1_x2_min);
-  is_min_max_func(bbox1_y2, intersection_y2, is_bbox1_y2_min);
-
-  nntrainer::Tensor is_bbox_min_max = nntrainer::Tensor::cat(
-    {is_bbox1_x1_max, is_bbox1_y1_max, is_bbox1_x2_min, is_bbox1_y2_min}, 3);
-  is_xy_min_max.copyData(is_bbox_min_max);
-
-  intersection_x2.subtract(intersection_x1, intersection_width);
-
-  auto type_intersection_width = intersection_width.getDataType();
-  if (type_intersection_width == ml::train::TensorDim::DataType::FP32) {
-    intersection_width.apply_i<float>(nntrainer::ActiFunc::relu<float>);
-  } else if (type_intersection_width == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    intersection_width.apply_i<_FP16>(nntrainer::ActiFunc::relu<_FP16>);
-#else
-    throw std::runtime_error("Not supported data type");
-#endif
-  }
-
-  intersection_y2.subtract(intersection_y1, intersection_height);
-
-  auto type_intersection_height = intersection_height.getDataType();
-  if (type_intersection_height == ml::train::TensorDim::DataType::FP32) {
-    intersection_height.apply_i<float>(nntrainer::ActiFunc::relu<float>);
-  } else if (type_intersection_height == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    intersection_height.apply_i<_FP16>(nntrainer::ActiFunc::relu<_FP16>);
-#else
-    throw std::runtime_error("Not supported data type");
-#endif
-  }
-
-  nntrainer::Tensor intersection =
-    intersection_width.multiply(intersection_height);
-  bbox1.add(bbox2, unions);
-  unions.subtract_i(intersection);
-
-  return intersection.divide(unions);
-}
-
-/**
- * @brief calculate iou graident
- * @details Let say bbox_pred as x, intersection as f(x), union as g(x) and iou
- * as y. Then y = f(x)/g(x). Also g(x) = bbox1 + bbox2 - f(x). Partial
- * derivative of y with respect to x will be (f'(x)g(x) - f(x)g'(x))/(g(x)^2).
- * Partial derivative of g(x) with respect to x will be bbox1'(x) - f'(x).
- * @param confidence_gt_grad incoming derivative for iou
- * @param bbox1_width bbox1_width
- * @param bbox1_height bbox1_height
- * @param is_xy_min_max For x1, y1 this value is 1 if x1 > x2, y1 > y2 and for
- * x2, y2 this is value is 1 if x2 < x1, y2 < y1. else 0.
- * @param intersection_width intersection width
- * @param intersection_height intersection height
- * @param unions unions
- * @return std::vector<nntrainer::Tensor> iou_grad
- */
-std::vector<nntrainer::Tensor> calc_iou_grad(
-  nntrainer::Tensor &confidence_gt_grad, nntrainer::Tensor &bbox1_width,
-  nntrainer::Tensor &bbox1_height, nntrainer::Tensor &is_xy_min_max,
-  nntrainer::Tensor &intersection_width, nntrainer::Tensor &intersection_height,
-  nntrainer::Tensor &unions) {
-  nntrainer::Tensor intersection =
-    intersection_width.multiply(intersection_height);
-
-  // 1. calculate intersection local gradient [f'(x)]
-  nntrainer::Tensor intersection_width_relu_prime;
-  nntrainer::Tensor intersection_height_relu_prime;
-  auto type_intersection_width = intersection_width.getDataType();
-  if (type_intersection_width == ml::train::TensorDim::DataType::FP32) {
-    intersection_width_relu_prime =
-      intersection_width.apply<float>(nntrainer::ActiFunc::reluPrime<float>);
-  } else if (type_intersection_width == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    intersection_height_relu_prime =
-      intersection_height.apply<_FP16>(nntrainer::ActiFunc::reluPrime<_FP16>);
-#else
-    throw std::runtime_error("Not supported data type");
-#endif
-  }
-
-  nntrainer::Tensor intersection_x2_local_grad =
-    intersection_width_relu_prime.multiply(intersection_height);
-  nntrainer::Tensor intersection_y2_local_grad =
-    intersection_height_relu_prime.multiply(intersection_width);
-  nntrainer::Tensor intersection_x1_local_grad =
-    intersection_x2_local_grad.multiply(-1.0);
-  nntrainer::Tensor intersection_y1_local_grad =
-    intersection_y2_local_grad.multiply(-1.0);
-
-  nntrainer::Tensor intersection_local_grad = nntrainer::Tensor::cat(
-    {intersection_x1_local_grad, intersection_y1_local_grad,
-     intersection_x2_local_grad, intersection_y2_local_grad},
-    3);
-  intersection_local_grad.multiply_i(is_xy_min_max);
-
-  // 2. calculate union local gradient [g'(x)]
-  nntrainer::Tensor bbox1_x1_grad = bbox1_height.multiply(-1.0);
-  nntrainer::Tensor bbox1_y1_grad = bbox1_width.multiply(-1.0);
-  nntrainer::Tensor bbox1_x2_grad = bbox1_height;
-  nntrainer::Tensor bbox1_y2_grad = bbox1_width;
-  nntrainer::Tensor bbox1_grad = nntrainer::Tensor::cat(
-    {bbox1_x1_grad, bbox1_y1_grad, bbox1_x2_grad, bbox1_y2_grad}, 3);
-
-  nntrainer::Tensor unions_local_grad =
-    bbox1_grad.subtract(intersection_local_grad);
-
-  // 3. calculate iou local gradient [(f'(x)g(x) - f(x)g'(x))/(g(x)^2)]
-  nntrainer::Tensor lhs = intersection_local_grad.multiply(unions);
-  nntrainer::Tensor rhs = unions_local_grad.multiply(intersection);
-  nntrainer::Tensor iou_grad = lhs.subtract(rhs);
-  iou_grad.divide_i(unions);
-  iou_grad.divide_i(unions);
-
-  // 3. multiply with incoming derivative
-  iou_grad.multiply_i(confidence_gt_grad);
-
-  auto splitted_iou_grad = iou_grad.split({1, 1, 1, 1}, 3);
-  std::vector<nntrainer::Tensor> ret = {
-    splitted_iou_grad[0].add(splitted_iou_grad[2]),
-    splitted_iou_grad[1].add(splitted_iou_grad[3]), splitted_iou_grad[2],
-    splitted_iou_grad[3]};
-  return ret;
-}
-
-YoloV2LossLayer::YoloV2LossLayer() :
-  anchors_w({1, 1, NUM_ANCHOR, 1}, anchors_w_buf),
-  anchors_h({1, 1, NUM_ANCHOR, 1}, anchors_h_buf),
-  sigmoid(nntrainer::ActivationType::ACT_SIGMOID, true),
-  softmax(nntrainer::ActivationType::ACT_SOFTMAX, true),
-  yolo_v2_loss_props(props::MaxObjectNumber(), props::ClassNumber(),
-                     props::GridHeightNumber(), props::GridWidthNumber()) {
-  anchors_ratio = anchors_w.divide(anchors_h);
-  wt_idx.fill(std::numeric_limits<unsigned>::max());
-}
-
-void YoloV2LossLayer::finalize(nntrainer::InitLayerContext &context) {
-  nntrainer::TensorDim input_dim =
-    context.getInputDimensions()[SINGLE_INOUT_IDX];
-  const unsigned int batch_size = input_dim.batch();
-  const unsigned int class_number =
-    std::get<props::ClassNumber>(yolo_v2_loss_props).get();
-  const unsigned int grid_height_number =
-    std::get<props::GridHeightNumber>(yolo_v2_loss_props).get();
-  const unsigned int grid_width_number =
-    std::get<props::GridWidthNumber>(yolo_v2_loss_props).get();
-  const unsigned int max_object_number =
-    std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
-  nntrainer::TensorDim label_dim(batch_size, 1, max_object_number, 5);
-  context.setOutputDimensions({label_dim});
-
-  nntrainer::TensorDim bbox_x_pred_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_x_pred] = context.requestTensor(
-    bbox_x_pred_dim, "bbox_x_pred", nntrainer::Tensor::Initializer::NONE, true,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_y_pred_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_y_pred] = context.requestTensor(
-    bbox_y_pred_dim, "bbox_y_pred", nntrainer::Tensor::Initializer::NONE, true,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_w_pred_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_w_pred] = context.requestTensor(
-    bbox_w_pred_dim, "bbox_w_pred", nntrainer::Tensor::Initializer::NONE, true,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_h_pred_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_h_pred] = context.requestTensor(
-    bbox_h_pred_dim, "bbox_h_pred", nntrainer::Tensor::Initializer::NONE, true,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim confidence_pred_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::confidence_pred] =
-    context.requestTensor(confidence_pred_dim, "confidence_pred",
-                          nntrainer::Tensor::Initializer::NONE, true,
-                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim class_pred_dim(batch_size,
-                                      grid_height_number * grid_width_number,
-                                      NUM_ANCHOR, class_number);
-  wt_idx[YoloV2LossParams::class_pred] = context.requestTensor(
-    class_pred_dim, "class_pred", nntrainer::Tensor::Initializer::NONE, true,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_w_pred_anchor_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_w_pred_anchor] =
-    context.requestTensor(bbox_w_pred_anchor_dim, "bbox_w_pred_anchor",
-                          nntrainer::Tensor::Initializer::NONE, false,
-                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_h_pred_anchor_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_h_pred_anchor] =
-    context.requestTensor(bbox_h_pred_anchor_dim, "bbox_h_pred_anchor",
-                          nntrainer::Tensor::Initializer::NONE, false,
-                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_x_gt_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_x_gt] = context.requestTensor(
-    bbox_x_gt_dim, "bbox_x_gt", nntrainer::Tensor::Initializer::NONE, false,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_y_gt_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_y_gt] = context.requestTensor(
-    bbox_y_gt_dim, "bbox_y_gt", nntrainer::Tensor::Initializer::NONE, false,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_w_gt_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_w_gt] = context.requestTensor(
-    bbox_w_gt_dim, "bbox_w_gt", nntrainer::Tensor::Initializer::NONE, false,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_h_gt_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_h_gt] = context.requestTensor(
-    bbox_h_gt_dim, "bbox_h_gt", nntrainer::Tensor::Initializer::NONE, false,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim confidence_gt_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::confidence_gt] = context.requestTensor(
-    confidence_gt_dim, "confidence_gt", nntrainer::Tensor::Initializer::NONE,
-    false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim class_gt_dim(batch_size,
-                                    grid_height_number * grid_width_number,
-                                    NUM_ANCHOR, class_number);
-  wt_idx[YoloV2LossParams::class_gt] = context.requestTensor(
-    class_gt_dim, "class_gt", nntrainer::Tensor::Initializer::NONE, false,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox_class_mask_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox_class_mask] =
-    context.requestTensor(bbox_class_mask_dim, "bbox_class_mask",
-                          nntrainer::Tensor::Initializer::NONE, false,
-                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim iou_mask_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::iou_mask] = context.requestTensor(
-    iou_mask_dim, "iou_mask", nntrainer::Tensor::Initializer::NONE, false,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox1_width_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox1_width] = context.requestTensor(
-    bbox1_width_dim, "bbox1_width", nntrainer::Tensor::Initializer::NONE, false,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim bbox1_height_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::bbox1_height] = context.requestTensor(
-    bbox1_height_dim, "bbox1_height", nntrainer::Tensor::Initializer::NONE,
-    false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim is_xy_min_max_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 4);
-  wt_idx[YoloV2LossParams::is_xy_min_max] = context.requestTensor(
-    is_xy_min_max_dim, "is_xy_min_max", nntrainer::Tensor::Initializer::NONE,
-    false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim intersection_width_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::intersection_width] =
-    context.requestTensor(intersection_width_dim, "intersection_width",
-                          nntrainer::Tensor::Initializer::NONE, false,
-                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim intersection_height_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::intersection_height] =
-    context.requestTensor(intersection_height_dim, "intersection_height",
-                          nntrainer::Tensor::Initializer::NONE, false,
-                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
-  nntrainer::TensorDim unions_dim(
-    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
-  wt_idx[YoloV2LossParams::unions] = context.requestTensor(
-    unions_dim, "unions", nntrainer::Tensor::Initializer::NONE, false,
-    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-}
-
-void YoloV2LossLayer::forwarding(nntrainer::RunLayerContext &context,
-                                 bool training) {
-  const unsigned int max_object_number =
-    std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
-
-  nntrainer::Tensor &input = context.getInput(SINGLE_INOUT_IDX);
-
-  std::vector<nntrainer::Tensor> splited_input =
-    input.split({1, 1, 1, 1, 1, max_object_number}, 3);
-  nntrainer::Tensor bbox_x_pred_ = splited_input[0];
-  nntrainer::Tensor bbox_y_pred_ = splited_input[1];
-  nntrainer::Tensor bbox_w_pred_ = splited_input[2];
-  nntrainer::Tensor bbox_h_pred_ = splited_input[3];
-  nntrainer::Tensor confidence_pred_ = splited_input[4];
-  nntrainer::Tensor class_pred_ = splited_input[5];
-
-  nntrainer::Tensor &bbox_x_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
-  nntrainer::Tensor &bbox_y_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
-  nntrainer::Tensor &bbox_w_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred]);
-  nntrainer::Tensor &bbox_h_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred]);
-
-  nntrainer::Tensor &confidence_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::confidence_pred]);
-  nntrainer::Tensor &class_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::class_pred]);
-
-  nntrainer::Tensor &bbox_w_pred_anchor =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
-  nntrainer::Tensor &bbox_h_pred_anchor =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
-
-  bbox_x_pred.copyData(bbox_x_pred_);
-  bbox_y_pred.copyData(bbox_y_pred_);
-  bbox_w_pred.copyData(bbox_w_pred_);
-  bbox_h_pred.copyData(bbox_h_pred_);
-
-  confidence_pred.copyData(confidence_pred_);
-  class_pred.copyData(class_pred_);
-
-  nntrainer::Tensor &bbox_x_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
-  nntrainer::Tensor &bbox_y_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
-  nntrainer::Tensor &bbox_w_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
-  nntrainer::Tensor &bbox_h_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
-
-  nntrainer::Tensor &confidence_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
-  nntrainer::Tensor &class_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
-
-  nntrainer::Tensor &bbox_class_mask =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
-  nntrainer::Tensor &iou_mask =
-    context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
-
-  bbox_x_gt.setValue(0);
-  bbox_y_gt.setValue(0);
-  bbox_w_gt.setValue(0);
-  bbox_h_gt.setValue(0);
-
-  confidence_gt.setValue(0);
-  class_gt.setValue(0);
-
-  // init mask
-  bbox_class_mask.setValue(0);
-  iou_mask.setValue(0.5);
-
-  // activate pred
-  sigmoid.run_fn(bbox_x_pred, bbox_x_pred);
-  sigmoid.run_fn(bbox_y_pred, bbox_y_pred);
-
-  auto type_bbox_w_pred = bbox_w_pred.getDataType();
-  if (type_bbox_w_pred == ml::train::TensorDim::DataType::FP32) {
-    bbox_w_pred.apply_i<float>(nntrainer::exp_util<float>);
-  } else if (type_bbox_w_pred == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    bbox_w_pred.apply_i<_FP16>(nntrainer::exp_util<_FP16>);
-#else
-    throw std::runtime_error("Not supported data type");
-#endif
-  }
-
-  auto type_bbox_h_pred = bbox_h_pred.getDataType();
-  if (type_bbox_h_pred == ml::train::TensorDim::DataType::FP32) {
-    bbox_h_pred.apply_i<float>(nntrainer::exp_util<float>);
-  } else if (type_bbox_h_pred == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    bbox_h_pred.apply_i<_FP16>(nntrainer::exp_util<_FP16>);
-#else
-    throw std::runtime_error("Not supported data type");
-#endif
-  }
-
-  sigmoid.run_fn(confidence_pred, confidence_pred);
-  softmax.run_fn(class_pred, class_pred);
-
-  bbox_w_pred_anchor.copyData(bbox_w_pred);
-  bbox_h_pred_anchor.copyData(bbox_h_pred);
-
-  // apply anchors to bounding box
-  bbox_w_pred_anchor.multiply_i(anchors_w);
-  auto type_bbox_w_pred_anchor = bbox_w_pred_anchor.getDataType();
-  if (type_bbox_w_pred_anchor == ml::train::TensorDim::DataType::FP32) {
-    bbox_w_pred_anchor.apply_i<float>(nntrainer::sqrtFloat<float>);
-  } else if (type_bbox_w_pred_anchor == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    bbox_w_pred_anchor.apply_i<_FP16>(nntrainer::sqrtFloat<_FP16>);
-#else
-    throw std::runtime_error("Not supported data type");
-#endif
-  }
-
-  bbox_h_pred_anchor.multiply_i(anchors_h);
-  auto type_bbox_h_pred_anchor = bbox_h_pred_anchor.getDataType();
-  if (type_bbox_h_pred_anchor == ml::train::TensorDim::DataType::FP32) {
-    bbox_h_pred_anchor.apply_i<float>(nntrainer::sqrtFloat<float>);
-  } else if (type_bbox_h_pred_anchor == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    bbox_h_pred_anchor.apply_i<_FP16>(nntrainer::sqrtFloat<_FP16>);
-#else
-    throw std::runtime_error("Not supported data type");
-#endif
-  }
-
-  generate_ground_truth(context);
-
-  nntrainer::Tensor bbox_pred = nntrainer::Tensor::cat(
-    {bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor}, 3);
-  nntrainer::Tensor masked_bbox_pred = bbox_pred.multiply(bbox_class_mask);
-  nntrainer::Tensor masked_confidence_pred = confidence_pred.multiply(iou_mask);
-  nntrainer::Tensor masked_class_pred = class_pred.multiply(bbox_class_mask);
-
-  nntrainer::Tensor bbox_gt =
-    nntrainer::Tensor::cat({bbox_x_gt, bbox_y_gt, bbox_w_gt, bbox_h_gt}, 3);
-  nntrainer::Tensor masked_bbox_gt = bbox_gt.multiply(bbox_class_mask);
-  nntrainer::Tensor masked_confidence_gt = confidence_gt.multiply(iou_mask);
-  nntrainer::Tensor masked_class_gt = class_gt.multiply(bbox_class_mask);
-
-  float bbox_loss = mse(masked_bbox_pred, masked_bbox_gt);
-  float confidence_loss = mse(masked_confidence_pred, masked_confidence_gt);
-  float class_loss = mse(masked_class_pred, masked_class_gt);
-
-  float loss = 5 * bbox_loss + confidence_loss + class_loss;
-  ml_logd("Current iteration loss: %f", loss);
-}
-
-void YoloV2LossLayer::calcDerivative(nntrainer::RunLayerContext &context) {
-  nntrainer::Tensor &bbox_x_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
-  nntrainer::Tensor &bbox_x_pred_grad =
-    context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_x_pred]);
-  nntrainer::Tensor &bbox_y_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
-  nntrainer::Tensor &bbox_y_pred_grad =
-    context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_y_pred]);
-  nntrainer::Tensor &bbox_w_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred]);
-  nntrainer::Tensor &bbox_w_pred_grad =
-    context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_w_pred]);
-  nntrainer::Tensor &bbox_h_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred]);
-  nntrainer::Tensor &bbox_h_pred_grad =
-    context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_h_pred]);
-
-  nntrainer::Tensor &confidence_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::confidence_pred]);
-  nntrainer::Tensor &confidence_pred_grad =
-    context.getTensorGrad(wt_idx[YoloV2LossParams::confidence_pred]);
-  nntrainer::Tensor &class_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::class_pred]);
-  nntrainer::Tensor &class_pred_grad =
-    context.getTensorGrad(wt_idx[YoloV2LossParams::class_pred]);
-
-  nntrainer::Tensor &bbox_w_pred_anchor =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
-  nntrainer::Tensor &bbox_h_pred_anchor =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
-
-  nntrainer::Tensor &bbox_x_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
-  nntrainer::Tensor &bbox_y_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
-  nntrainer::Tensor &bbox_w_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
-  nntrainer::Tensor &bbox_h_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
-
-  nntrainer::Tensor &confidence_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
-  nntrainer::Tensor &class_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
-
-  nntrainer::Tensor &bbox_class_mask =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
-  nntrainer::Tensor &iou_mask =
-    context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
-
-  nntrainer::Tensor &bbox1_width =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox1_width]);
-  nntrainer::Tensor &bbox1_height =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox1_height]);
-  nntrainer::Tensor &is_xy_min_max =
-    context.getTensor(wt_idx[YoloV2LossParams::is_xy_min_max]);
-  nntrainer::Tensor &intersection_width =
-    context.getTensor(wt_idx[YoloV2LossParams::intersection_width]);
-  nntrainer::Tensor &intersection_height =
-    context.getTensor(wt_idx[YoloV2LossParams::intersection_height]);
-  nntrainer::Tensor &unions =
-    context.getTensor(wt_idx[YoloV2LossParams::unions]);
-
-  nntrainer::Tensor bbox_pred = nntrainer::Tensor::cat(
-    {bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor}, 3);
-  nntrainer::Tensor masked_bbox_pred = bbox_pred.multiply(bbox_class_mask);
-  nntrainer::Tensor masked_confidence_pred = confidence_pred.multiply(iou_mask);
-  nntrainer::Tensor masked_class_pred = class_pred.multiply(bbox_class_mask);
-
-  nntrainer::Tensor bbox_gt =
-    nntrainer::Tensor::cat({bbox_x_gt, bbox_y_gt, bbox_w_gt, bbox_h_gt}, 3);
-  nntrainer::Tensor masked_bbox_gt = bbox_gt.multiply(bbox_class_mask);
-  nntrainer::Tensor masked_confidence_gt = confidence_gt.multiply(iou_mask);
-  nntrainer::Tensor masked_class_gt = class_gt.multiply(bbox_class_mask);
-
-  nntrainer::Tensor masked_bbox_pred_grad;
-  nntrainer::Tensor masked_confidence_pred_grad;
-  nntrainer::Tensor masked_confidence_gt_grad;
-  nntrainer::Tensor masked_class_pred_grad;
-
-  nntrainer::Tensor confidence_gt_grad;
-
-  msePrime(masked_bbox_pred, masked_bbox_gt, masked_bbox_pred_grad);
-  msePrime(masked_confidence_pred, masked_confidence_gt,
-           masked_confidence_pred_grad);
-  msePrime(masked_confidence_gt, masked_confidence_pred,
-           masked_confidence_gt_grad);
-  msePrime(masked_class_pred, masked_class_gt, masked_class_pred_grad);
-
-  masked_bbox_pred_grad.multiply_i(5);
-
-  nntrainer::Tensor bbox_pred_grad;
-
-  masked_bbox_pred_grad.multiply(bbox_class_mask, bbox_pred_grad);
-  masked_confidence_pred_grad.multiply(iou_mask, confidence_pred_grad);
-  masked_confidence_gt_grad.multiply(iou_mask, confidence_gt_grad);
-  masked_class_pred_grad.multiply(bbox_class_mask, class_pred_grad);
-
-  std::vector<nntrainer::Tensor> splitted_bbox_pred_grad =
-    bbox_pred_grad.split({1, 1, 1, 1}, 3);
-  bbox_x_pred_grad.copyData(splitted_bbox_pred_grad[0]);
-  bbox_y_pred_grad.copyData(splitted_bbox_pred_grad[1]);
-  bbox_w_pred_grad.copyData(splitted_bbox_pred_grad[2]);
-  bbox_h_pred_grad.copyData(splitted_bbox_pred_grad[3]);
-
-  // std::vector<nntrainer::Tensor> bbox_pred_iou_grad =
-  //   calc_iou_grad(confidence_gt_grad, bbox1_width, bbox1_height,
-  //   is_xy_min_max,
-  //                 intersection_width, intersection_height, unions);
-  // bbox_x_pred_grad.add_i(bbox_pred_iou_grad[0]);
-  // bbox_y_pred_grad.add_i(bbox_pred_iou_grad[1]);
-  // bbox_w_pred_grad.add_i(bbox_pred_iou_grad[2]);
-  // bbox_h_pred_grad.add_i(bbox_pred_iou_grad[3]);
-
-  /**
-   * @brief calculate gradient for applying anchors to bounding box
-   * @details Let say bbox_pred as x, anchor as c indicated that anchor is
-   * constant for bbox_pred and bbox_pred_anchor as y. Then we can denote y =
-   * sqrt(cx). Partial derivative of y with respect to x will be
-   * sqrt(c)/(2*sqrt(x)) which is equivalent to sqrt(cx)/(2x) and we can replace
-   * sqrt(cx) with y.
-   * @note divide by bbox_pred(x) will not be executed because bbox_pred_grad
-   * will be multiply by bbox_pred(x) soon after.
-   */
-  bbox_w_pred_grad.multiply_i(bbox_w_pred_anchor);
-  bbox_h_pred_grad.multiply_i(bbox_h_pred_anchor);
-  /** intended comment */
-  // bbox_w_pred_grad.divide_i(bbox_w_pred);
-  // bbox_h_pred_grad.divide_i(bbox_h_pred);
-  bbox_w_pred_grad.divide_i(2);
-  bbox_h_pred_grad.divide_i(2);
-
-  sigmoid.run_prime_fn(bbox_x_pred, bbox_x_pred, bbox_x_pred_grad,
-                       bbox_x_pred_grad);
-  sigmoid.run_prime_fn(bbox_y_pred, bbox_y_pred, bbox_y_pred_grad,
-                       bbox_y_pred_grad);
-  /** intended comment */
-  // bbox_w_pred_grad.multiply_i(bbox_w_pred);
-  // bbox_h_pred_grad.multiply_i(bbox_h_pred);
-  sigmoid.run_prime_fn(confidence_pred, confidence_pred, confidence_pred_grad,
-                       confidence_pred_grad);
-  softmax.run_prime_fn(class_pred, class_pred, class_pred_grad,
-                       class_pred_grad);
-
-  nntrainer::Tensor outgoing_derivative_ = nntrainer::Tensor::cat(
-    {bbox_x_pred_grad, bbox_y_pred_grad, bbox_w_pred_grad, bbox_h_pred_grad,
-     confidence_pred_grad, class_pred_grad},
-    3);
-  nntrainer::Tensor &outgoing_derivative =
-    context.getOutgoingDerivative(SINGLE_INOUT_IDX);
-  outgoing_derivative.copyData(outgoing_derivative_);
-}
-
-void YoloV2LossLayer::exportTo(nntrainer::Exporter &exporter,
-                               const ml::train::ExportMethods &method) const {
-  exporter.saveResult(yolo_v2_loss_props, method, this);
-}
-
-void YoloV2LossLayer::setProperty(const std::vector<std::string> &values) {
-  auto remain_props = loadProperties(values, yolo_v2_loss_props);
-  NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
-    << "[YoloV2LossLayer] Unknown Layer Properties count " +
-         std::to_string(values.size());
-}
-
-void YoloV2LossLayer::setBatch(nntrainer::RunLayerContext &context,
-                               unsigned int batch) {
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_x_pred], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_y_pred], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_pred], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_pred], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::confidence_pred], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::class_pred], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor], batch);
-
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_x_gt], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_y_gt], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_gt], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_gt], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::confidence_gt], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::class_gt], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox_class_mask], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::iou_mask], batch);
-
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox1_width], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::bbox1_height], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::is_xy_min_max], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::intersection_width], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::intersection_height], batch);
-  context.updateTensor(wt_idx[YoloV2LossParams::unions], batch);
-}
-
-unsigned int YoloV2LossLayer::find_responsible_anchors(float bbox_ratio) {
-  nntrainer::Tensor similarity = anchors_ratio.subtract(bbox_ratio);
-  auto data_type = similarity.getDataType();
-  if (data_type == ml::train::TensorDim::DataType::FP32) {
-    similarity.apply_i<float>(nntrainer::absFloat<float>);
-  } else if (data_type == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    similarity.apply_i<_FP16>(nntrainer::absFloat<_FP16>);
-#else
-    throw std::runtime_error("Not supported data type");
-#endif
-  }
-  auto data = similarity.getData();
-
-  auto min_iter = std::min_element(data, data + NUM_ANCHOR);
-  return std::distance(data, min_iter);
-}
-
-void YoloV2LossLayer::generate_ground_truth(
-  nntrainer::RunLayerContext &context) {
-  const unsigned int max_object_number =
-    std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
-  const unsigned int grid_height_number =
-    std::get<props::GridHeightNumber>(yolo_v2_loss_props).get();
-  const unsigned int grid_width_number =
-    std::get<props::GridWidthNumber>(yolo_v2_loss_props).get();
-
-  nntrainer::Tensor &label = context.getLabel(SINGLE_INOUT_IDX);
-
-  nntrainer::Tensor &bbox_x_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
-  nntrainer::Tensor &bbox_y_pred =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
-  nntrainer::Tensor &bbox_w_pred_anchor =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
-  nntrainer::Tensor &bbox_h_pred_anchor =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
-
-  nntrainer::Tensor &bbox_x_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
-  nntrainer::Tensor &bbox_y_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
-  nntrainer::Tensor &bbox_w_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
-  nntrainer::Tensor &bbox_h_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
-
-  nntrainer::Tensor &confidence_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
-  nntrainer::Tensor &class_gt =
-    context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
-
-  nntrainer::Tensor &bbox_class_mask =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
-  nntrainer::Tensor &iou_mask =
-    context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
-
-  nntrainer::Tensor &bbox1_width =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox1_width]);
-  nntrainer::Tensor &bbox1_height =
-    context.getTensor(wt_idx[YoloV2LossParams::bbox1_height]);
-  nntrainer::Tensor &is_xy_min_max =
-    context.getTensor(wt_idx[YoloV2LossParams::is_xy_min_max]);
-  nntrainer::Tensor &intersection_width =
-    context.getTensor(wt_idx[YoloV2LossParams::intersection_width]);
-  nntrainer::Tensor &intersection_height =
-    context.getTensor(wt_idx[YoloV2LossParams::intersection_height]);
-  nntrainer::Tensor &unions =
-    context.getTensor(wt_idx[YoloV2LossParams::unions]);
-
-  const unsigned int batch_size = bbox_x_pred.getDim().batch();
-
-  std::vector<nntrainer::Tensor> splited_label =
-    label.split({1, 1, 1, 1, 1}, 3);
-  nntrainer::Tensor bbox_x_label = splited_label[0];
-  nntrainer::Tensor bbox_y_label = splited_label[1];
-  nntrainer::Tensor bbox_w_label = splited_label[2];
-  nntrainer::Tensor bbox_h_label = splited_label[3];
-  nntrainer::Tensor class_label = splited_label[4];
-
-  bbox_x_label.multiply_i(grid_width_number);
-  bbox_y_label.multiply_i(grid_height_number);
-
-  for (unsigned int batch = 0; batch < batch_size; ++batch) {
-    for (unsigned int object = 0; object < max_object_number; ++object) {
-      if (!bbox_w_label.getValue(batch, 0, object, 0) &&
-          !bbox_h_label.getValue(batch, 0, object, 0)) {
-        break;
-      }
-      unsigned int grid_x_index = bbox_x_label.getValue(batch, 0, object, 0);
-      unsigned int grid_y_index = bbox_y_label.getValue(batch, 0, object, 0);
-      unsigned int grid_index = grid_y_index * grid_width_number + grid_x_index;
-      unsigned int responsible_anchor =
-        find_responsible_anchors(bbox_w_label.getValue(batch, 0, object, 0) /
-                                 bbox_h_label.getValue(batch, 0, object, 0));
-
-      bbox_x_gt.setValue(batch, grid_index, responsible_anchor, 0,
-                         bbox_x_label.getValue(batch, 0, object, 0) -
-                           grid_x_index);
-      bbox_y_gt.setValue(batch, grid_index, responsible_anchor, 0,
-                         bbox_y_label.getValue(batch, 0, object, 0) -
-                           grid_y_index);
-      bbox_w_gt.setValue(
-        batch, grid_index, responsible_anchor, 0,
-        nntrainer::sqrtFloat(bbox_w_label.getValue(batch, 0, object, 0)));
-      bbox_h_gt.setValue(
-        batch, grid_index, responsible_anchor, 0,
-        nntrainer::sqrtFloat(bbox_h_label.getValue(batch, 0, object, 0)));
-
-      class_gt.setValue(batch, grid_index, responsible_anchor,
-                        class_label.getValue(batch, 0, object, 0), 1);
-      bbox_class_mask.setValue(batch, grid_index, responsible_anchor, 0, 1);
-      iou_mask.setValue(batch, grid_index, responsible_anchor, 0, 1);
-    }
-  }
-
-  nntrainer::Tensor iou = calc_iou(
-    bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor, bbox_x_gt,
-    bbox_y_gt, bbox_w_gt, bbox_h_gt, bbox1_width, bbox1_height, is_xy_min_max,
-    intersection_width, intersection_height, unions);
-  confidence_gt.copyData(iou);
-}
-
-#ifdef PLUGGABLE
-
-nntrainer::Layer *create_yolo_v2_loss_layer() {
-  auto layer = new YoloV2LossLayer();
-  return layer;
-}
-
-void destory_yolo_v2_loss_layer(nntrainer::Layer *layer) { delete layer; }
-
-/**
- * @note ml_train_layer_pluggable defines the entry point for nntrainer to
- * register a plugin layer
- */
-extern "C" {
-nntrainer::LayerPluggable ml_train_layer_pluggable{create_yolo_v2_loss_layer,
-                                                   destory_yolo_v2_loss_layer};
-}
-
-#endif
-} // namespace custom
diff --git a/Applications/YOLO/jni/yolo_v2_loss.h b/Applications/YOLO/jni/yolo_v2_loss.h
deleted file mode 100644 (file)
index fd1f2fa..0000000
+++ /dev/null
@@ -1,172 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
- *
- * @file   yolo_v2_loss.h
- * @date   07 March 2023
- * @brief  This file contains the yolo v2 loss layer
- * @see    https://github.com/nnstreamer/nntrainer
- * @author Hyeonseok Lee <hs89.lee@samsung.com>
- * @bug    No known bugs except for NYI items
- *
- */
-#ifndef __YOLO_V2_LOSS_LAYER_H__
-#define __YOLO_V2_LOSS_LAYER_H__
-
-#include <string>
-
-#include <acti_func.h>
-#include <base_properties.h>
-#include <layer_context.h>
-#include <layer_devel.h>
-#include <node_exporter.h>
-
-namespace custom {
-
-namespace props {
-
-/**
- * @brief maximum object number in 1 image for given dataset
- *
- */
-class MaxObjectNumber final : public nntrainer::PositiveIntegerProperty {
-public:
-  MaxObjectNumber(const unsigned &value = 1);
-  static constexpr const char *key = "max_object_number";
-  using prop_tag = nntrainer::uint_prop_tag;
-};
-
-/**
- * @brief class number for given dataset
- *
- */
-class ClassNumber final : public nntrainer::PositiveIntegerProperty {
-public:
-  ClassNumber(const unsigned &value = 1);
-  static constexpr const char *key = "class_number";
-  using prop_tag = nntrainer::uint_prop_tag;
-};
-
-/**
- * @brief grid height number
- *
- */
-class GridHeightNumber final : public nntrainer::PositiveIntegerProperty {
-public:
-  GridHeightNumber(const unsigned &value = 1);
-  static constexpr const char *key = "grid_height_number";
-  using prop_tag = nntrainer::uint_prop_tag;
-};
-
-/**
- * @brief grid width number
- *
- */
-class GridWidthNumber final : public nntrainer::PositiveIntegerProperty {
-public:
-  GridWidthNumber(const unsigned &value = 1);
-  static constexpr const char *key = "grid_width_number";
-  using prop_tag = nntrainer::uint_prop_tag;
-};
-
-} // namespace props
-
-/**
- * @brief Yolo V2 loss layer
- *
- */
-class YoloV2LossLayer final : public nntrainer::Layer {
-public:
-  /**
-   * @brief Construct a new YoloV2Loss Layer object
-   *
-   */
-  YoloV2LossLayer();
-
-  /**
-   * @brief Destroy the YoloV2Loss Layer object
-   *
-   */
-  ~YoloV2LossLayer() {}
-
-  /**
-   * @copydoc Layer::finalize(InitLayerContext &context)
-   */
-  void finalize(nntrainer::InitLayerContext &context) override;
-
-  /**
-   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
-   */
-  void forwarding(nntrainer::RunLayerContext &context, bool training) override;
-
-  /**
-   * @copydoc Layer::calcDerivative(RunLayerContext &context)
-   */
-  void calcDerivative(nntrainer::RunLayerContext &context) override;
-
-  /**
-   * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
-   */
-  void exportTo(nntrainer::Exporter &exporter,
-                const ml::train::ExportMethods &method) const override;
-
-  /**
-   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
-   */
-  void setProperty(const std::vector<std::string> &values) override;
-
-  /**
-   * @copydoc Layer::setBatch(RunLayerContext &context, unsigned int batch)
-   */
-  void setBatch(nntrainer::RunLayerContext &context,
-                unsigned int batch) override;
-
-  /**
-   * @copydoc bool supportBackwarding() const
-   */
-  bool supportBackwarding() const override { return true; };
-
-  /**
-   * @copydoc Layer::requireLabel()
-   */
-  bool requireLabel() const { return true; }
-
-  /**
-   * @copydoc Layer::getType()
-   */
-  const std::string getType() const override { return YoloV2LossLayer::type; };
-
-  inline static const std::string type = "yolo_v2_loss";
-
-private:
-  static constexpr unsigned int NUM_ANCHOR = 5;
-  const float anchors_w_buf[NUM_ANCHOR] = {1.3221, 3.19275, 5.05587, 9.47112,
-                                           11.2364};
-  const float anchors_h_buf[NUM_ANCHOR] = {1.73145, 4.00944, 8.09892, 4.84053,
-                                           10.0071};
-  const nntrainer::Tensor anchors_w;
-  const nntrainer::Tensor anchors_h;
-  nntrainer::Tensor anchors_ratio;
-
-  nntrainer::ActiFunc sigmoid; /** sigmoid activation operation */
-  nntrainer::ActiFunc softmax; /** softmax activation operation */
-
-  std::tuple<props::MaxObjectNumber, props::ClassNumber,
-             props::GridHeightNumber, props::GridWidthNumber>
-    yolo_v2_loss_props;
-  std::array<unsigned int, 22> wt_idx; /**< indices of the weights */
-
-  /**
-   * @brief find responsible anchors per object
-   */
-  unsigned int find_responsible_anchors(float bbox_ratio);
-
-  /**
-   * @brief generate ground truth, mask from labels
-   */
-  void generate_ground_truth(nntrainer::RunLayerContext &context);
-};
-
-} // namespace custom
-
-#endif /* __YOLO_V2_LOSS_LAYER_H__ */
diff --git a/Applications/YOLOv2/PyTorch/dataset.py b/Applications/YOLOv2/PyTorch/dataset.py
new file mode 100644 (file)
index 0000000..855ba19
--- /dev/null
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+#
+# @file dataset.py
+# @date 8 March 2023
+# @brief Define dataset class for yolo
+#
+# @author Seungbaek Hong <sb92.hong@samsung.com>
+
+import glob
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data.dataloader import default_collate
+from PIL import Image
+
+
+##
+# @brief dataset class for yolo
+# @note Need annotation text files corresponding to the name of the images.
+class YOLODataset(Dataset):
+    def __init__(self, img_dir, ann_dir):
+        super().__init__()
+        img_list = glob.glob(img_dir)
+        ann_list = glob.glob(ann_dir)
+        img_list.sort()
+        ann_list.sort()
+
+        self.length = len(img_list)
+        self.input_images = []
+        self.bbox_gt = []
+        self.cls_gt = []
+
+        for i in range(len(img_list)):
+            img = np.array(Image.open(img_list[i]).resize((416, 416))) / 255
+            label_bbox = []
+            label_cls = []
+            with open(ann_list[i], "rt", encoding="utf-8") as f:
+                for line in f.readlines():
+                    line = [float(i) for i in line.split()]
+                    label_bbox.append(np.array(line[1:], dtype=np.float32) / 416)
+                    label_cls.append(int(line[0]))
+
+            self.input_images.append(img)
+            self.bbox_gt.append(label_bbox)
+            self.cls_gt.append(label_cls)
+
+        self.input_images = np.array(self.input_images)
+        self.input_images = torch.FloatTensor(self.input_images).permute((0, 3, 1, 2))
+
+    def __len__(self):
+        return self.length
+
+    def __getitem__(self, idx):
+        return self.input_images[idx], self.bbox_gt[idx], self.cls_gt[idx]
+
+
+##
+# @brief collate db function for yolo
+def collate_db(batch):
+    """
+    @param batch list of batch, (img, bbox, cls)
+    @return collated list of batch, (img, bbox, cls)
+    """
+    items = list(zip(*batch))
+    items[0] = default_collate(items[0])
+    items[1] = list(items[1])
+    items[2] = list(items[2])
+    return items
diff --git a/Applications/YOLOv2/PyTorch/main.py b/Applications/YOLOv2/PyTorch/main.py
new file mode 100644 (file)
index 0000000..11fe1e5
--- /dev/null
@@ -0,0 +1,203 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+#
+# @file main.py
+# @date 8 March 2023
+# @brief Implement training for yolo
+#
+# @author Seungbaek Hong <sb92.hong@samsung.com>
+
+import sys
+import os
+
+from torchconverter import save_bin
+import torch
+from torch import optim
+from torch.utils.data import DataLoader
+
+from yolo import YoloV2
+from yolo_loss import YoloV2_LOSS
+from dataset import YOLODataset, collate_db
+
+
+# get pyutils path using relative path
+def get_util_path():
+    current_path = os.path.abspath(os.path.dirname(__file__))
+    parent_path = os.path.abspath(os.path.dirname(current_path))
+    target_path = os.path.abspath(os.path.dirname(parent_path))
+    return os.path.dirname(target_path) + "/tools/pyutils/"
+
+
+# add pyutils path to sys.path
+sys.path.append(get_util_path())
+
+# set config
+out_size = 13
+num_classes = 4
+num_anchors = 5
+
+epochs = 3
+batch_size = 4
+
+train_img_dir = "/home/user/TRAIN_DIR/images/*"
+train_ann_dir = "/home/user/TRAIN_DIR/annotations/*"
+valid_img_dir = "/home/user/VALID_DIR/images/*"
+valid_ann_dir = "/home/user/VALID_DIR/annotations/*"
+
+# load data
+train_dataset = YOLODataset(train_img_dir, train_ann_dir)
+train_loader = DataLoader(
+    train_dataset,
+    batch_size=batch_size,
+    collate_fn=collate_db,
+    shuffle=True,
+    drop_last=True,
+)
+valid_dataset = YOLODataset(valid_img_dir, valid_ann_dir)
+valid_loader = DataLoader(
+    valid_dataset,
+    batch_size=batch_size,
+    collate_fn=collate_db,
+    shuffle=False,
+    drop_last=True,
+)
+
+# set model, loss and optimizer
+model = YoloV2(num_classes=num_classes)
+criterion = YoloV2_LOSS(num_classes=num_classes)
+optimizer = optim.Adam(model.parameters(), lr=1e-3)
+# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)
+
+# save init model
+save_bin(model, "init_model")
+torch.save(model.state_dict(), "./init_model.pt")
+
+# train model
+best_loss = 1e10
+for epoch in range(epochs):
+    epoch_train_loss = 0
+    epoch_valid_loss = 0
+    for idx, (img, bbox, cls) in enumerate(train_loader):
+        model.train()
+        optimizer.zero_grad()
+        # model prediction
+        hypothesis = model(img).permute((0, 2, 3, 1))
+        hypothesis = hypothesis.reshape(
+            (batch_size, out_size**2, num_anchors, 5 + num_classes)
+        )
+        # split each prediction(bbox, iou, class prob)
+        bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
+        bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
+        bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
+        iou_pred = torch.sigmoid(hypothesis[..., 4:5])
+        score_pred = hypothesis[..., 5:].contiguous()
+        prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(
+            score_pred.shape
+        )
+        # calc loss
+        loss = criterion(
+            torch.FloatTensor(bbox_pred),
+            torch.FloatTensor(iou_pred),
+            torch.FloatTensor(prob_pred),
+            bbox,
+            cls,
+        )
+        # back prop
+        loss.backward()
+        optimizer.step()
+        # scheduler.step()
+        epoch_train_loss += loss.item()
+
+    for idx, (img, bbox, cls) in enumerate(valid_loader):
+        model.eval()
+        with torch.no_grad():
+            # model prediction
+            hypothesis = model(img).permute((0, 2, 3, 1))
+            hypothesis = hypothesis.reshape(
+                (hypothesis.shape[0], out_size**2, num_anchors, 5 + num_classes)
+            )
+            # split each prediction(bbox, iou, class prob)
+            bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
+            bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
+            bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
+            iou_pred = torch.sigmoid(hypothesis[..., 4:5])
+            score_pred = hypothesis[..., 5:].contiguous()
+            prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(
+                score_pred.shape
+            )
+            # calc loss
+            loss = criterion(
+                torch.FloatTensor(bbox_pred),
+                torch.FloatTensor(iou_pred),
+                torch.FloatTensor(prob_pred),
+                bbox,
+                cls,
+            )
+            epoch_valid_loss += loss.item()
+
+    if epoch_valid_loss < best_loss:
+        best_loss = epoch_valid_loss
+        torch.save(model.state_dict(), "./best_model.pt")
+        save_bin(model, "best_model")
+
+    print(
+        f"{epoch}epoch, train loss: {epoch_train_loss / len(train_loader):.4f},\
+          valid loss: {epoch_valid_loss / len(valid_loader):.4f}"
+    )
+
+##
+# @brief bbox post process function for inference
+
+
+def post_process_for_bbox(bbox_p):
+    """
+    @param bbox_p shape(batch_size, cell_h x cell_w, num_anchors, 4)
+    @return bbox_p shape(batch_size, cell_h x cell_w, num_anchors, 4)
+    """
+    anchors = torch.FloatTensor(
+        [
+            (1.3221, 1.73145),
+            (3.19275, 4.00944),
+            (5.05587, 8.09892),
+            (9.47112, 4.84053),
+            (11.2364, 10.0071),
+        ]
+    )
+
+    outsize = (13, 13)
+    width, height = outsize
+
+    # restore cell pos to x, y
+    for w in range(width):
+        for h in range(height):
+            bbox_p[:, height * h + w, :, 0] += w
+            bbox_p[:, height * h + w, :, 1] += h
+    bbox_p[:, :, :, :2] /= 13
+
+    # apply anchors to w, h
+    anchor_w = anchors[:, 0].contiguous().view(-1, 1)
+    anchor_h = anchors[:, 1].contiguous().view(-1, 1)
+    bbox_p[:, :, :, 2:3] *= anchor_w
+    bbox_p[:, :, :, 3:4] *= anchor_h
+
+    return bbox_p
+
+
+# inference example using trained model
+hypothesis = model(img).permute((0, 2, 3, 1))
+hypothesis = hypothesis[0].reshape((1, out_size**2, num_anchors, 5 + num_classes))
+
+# transform output
+bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
+bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
+bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
+bbox_pred = post_process_for_bbox(bbox_pred)
+iou_pred = torch.sigmoid(hypothesis[..., 4:5])
+score_pred = hypothesis[..., 5:].contiguous()
+prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(
+    score_pred.shape
+)
+
+# result of inference (data range 0~1)
+iou_mask = iou_pred > 0.5
+print(bbox_pred * iou_mask, iou_pred * iou_mask, prob_pred * iou_mask)
diff --git a/Applications/YOLOv2/PyTorch/yolo.py b/Applications/YOLOv2/PyTorch/yolo.py
new file mode 100644 (file)
index 0000000..b2a535c
--- /dev/null
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+#
+# @file yolo.py
+# @date 8 March 2023
+# @brief Define simple yolo model, but not original darknet.
+#
+# @author Seungbaek Hong <sb92.hong@samsung.com>
+
+import torch
+from torch import nn
+
+
+##
+# @brief define yolo model (except for re-organization module)
+class YoloV2(nn.Module):
+    def __init__(self, num_classes, num_anchors=5):
+
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_anchors = num_anchors
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(3, 32, 3, 1, 1),
+            nn.BatchNorm2d(32, eps=1e-3),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(2, 2),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(32, 64, 3, 1, 1),
+            nn.BatchNorm2d(64, eps=1e-3),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(2, 2),
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv4 = nn.Sequential(
+            nn.Conv2d(128, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv5 = nn.Sequential(
+            nn.Conv2d(64, 128, 3, 1, 1),
+            nn.BatchNorm2d(128, eps=1e-3),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(2, 2),
+        )
+        self.conv6 = nn.Sequential(
+            nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv7 = nn.Sequential(
+            nn.Conv2d(256, 128, 1, 1, 0), nn.BatchNorm2d(128, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv8 = nn.Sequential(
+            nn.Conv2d(128, 256, 3, 1, 1),
+            nn.BatchNorm2d(256, eps=1e-3),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(2, 2),
+        )
+        self.conv9 = nn.Sequential(
+            nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv10 = nn.Sequential(
+            nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv11 = nn.Sequential(
+            nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv12 = nn.Sequential(
+            nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv13 = nn.Sequential(
+            nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+        )
+
+        self.conv_b = nn.Sequential(
+            nn.Conv2d(512, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3), nn.LeakyReLU()
+        )
+
+        self.maxpool_a = nn.MaxPool2d(2, 2)
+        self.conv_a1 = nn.Sequential(
+            nn.Conv2d(512, 1024, 3, 1, 1),
+            nn.BatchNorm2d(1024, eps=1e-3),
+            nn.LeakyReLU(),
+        )
+        self.conv_a2 = nn.Sequential(
+            nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv_a3 = nn.Sequential(
+            nn.Conv2d(512, 1024, 3, 1, 1),
+            nn.BatchNorm2d(1024, eps=1e-3),
+            nn.LeakyReLU(),
+        )
+        self.conv_a4 = nn.Sequential(
+            nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+        )
+        self.conv_a5 = nn.Sequential(
+            nn.Conv2d(512, 1024, 3, 1, 1),
+            nn.BatchNorm2d(1024, eps=1e-3),
+            nn.LeakyReLU(),
+        )
+        self.conv_a6 = nn.Sequential(
+            nn.Conv2d(1024, 1024, 3, 1, 1),
+            nn.BatchNorm2d(1024, eps=1e-3),
+            nn.LeakyReLU(),
+        )
+        self.conv_a7 = nn.Sequential(
+            nn.Conv2d(1024, 1024, 3, 1, 1),
+            nn.BatchNorm2d(1024, eps=1e-3),
+            nn.LeakyReLU(),
+        )
+
+        self.conv_out1 = nn.Sequential(
+            nn.Conv2d(1280, 1024, 3, 1, 1),
+            nn.BatchNorm2d(1024, eps=1e-3),
+            nn.LeakyReLU(),
+        )
+
+        self.conv_out2 = nn.Conv2d(1024, self.num_anchors * (5 + num_classes), 1, 1, 0)
+
+    def forward(self, x):
+        output = self.conv1(x)
+        output = self.conv2(output)
+        output = self.conv3(output)
+        output = self.conv4(output)
+        output = self.conv5(output)
+        output = self.conv6(output)
+        output = self.conv7(output)
+        output = self.conv8(output)
+        output = self.conv9(output)
+        output = self.conv10(output)
+        output = self.conv11(output)
+        output = self.conv12(output)
+        output = self.conv13(output)
+
+        output_a = self.maxpool_a(output)
+        output_a = self.conv_a1(output_a)
+        output_a = self.conv_a2(output_a)
+        output_a = self.conv_a3(output_a)
+        output_a = self.conv_a4(output_a)
+        output_a = self.conv_a5(output_a)
+        output_a = self.conv_a6(output_a)
+        output_a = self.conv_a7(output_a)
+
+        output_b = self.conv_b(output)
+        b, c, h, w = output_b.size()
+        output_b = output_b.view(b, int(c / 4), h, 2, w, 2).contiguous()
+        output_b = output_b.permute(0, 3, 5, 1, 2, 4).contiguous()
+        output_b = output_b.view(b, -1, int(h / 2), int(w / 2))
+
+        output = torch.cat((output_a, output_b), 1)
+        output = self.conv_out1(output)
+        output = self.conv_out2(output)
+        return output
diff --git a/Applications/YOLOv2/PyTorch/yolo_loss.py b/Applications/YOLOv2/PyTorch/yolo_loss.py
new file mode 100644 (file)
index 0000000..b1949a6
--- /dev/null
@@ -0,0 +1,234 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+#
+# @file yolo_loss.py
+# @date 8 March 2023
+# @brief Define loss class for yolo
+#
+# @author Seungbaek Hong <sb92.hong@samsung.com>
+
+import torch
+from torch import nn
+import numpy as np
+
+
+##
+# @brief calculate iou between two boxes list
+def calculate_iou(bbox1, bbox2):
+    """
+    @param bbox1 shape(numb_of_bbox, 4), it contains x, y, w, h
+    @param bbox2 shape(numb_of_bbox, 4), it contains x, y, w, h
+    @return result shape(numb_of_bbox, 1)
+    """
+    # bbox coordinates
+    b1x1, b1y1 = (bbox1[:, :2]).split(1, 1)
+    b1x2, b1y2 = (bbox1[:, :2] + (bbox1[:, 2:4])).split(1, 1)
+    b2x1, b2y1 = (bbox2[:, :2]).split(1, 1)
+    b2x2, b2y2 = (bbox2[:, :2] + (bbox2[:, 2:4])).split(1, 1)
+
+    # box areas
+    areas1 = (b1x2 - b1x1) * (b1y2 - b1y1)
+    areas2 = (b2x2 - b2x1) * (b2y2 - b2y1)
+
+    # intersections
+    min_x_of_max_x, max_x_of_min_x = torch.min(b1x2, b2x2), torch.max(b1x1, b2x1)
+    min_y_of_max_y, max_y_of_min_y = torch.min(b1y2, b2y2), torch.max(b1y1, b2y1)
+    intersection_width = (min_x_of_max_x - max_x_of_min_x).clamp(min=0)
+    intersection_height = (min_y_of_max_y - max_y_of_min_y).clamp(min=0)
+    intersections = intersection_width * intersection_height
+
+    # unions
+    unions = (areas1 + areas2) - intersections
+
+    result = intersections / unions
+    return result
+
+
+##
+# @brief find best iou and its index
+def find_best_ratio(anchors, bbox):
+    """
+    @param anchors shape(numb_of_anchors, 2), it contains w, h
+    @param bbox shape(numb_of_bbox, 2), it contains w, h
+    @return best_match index of best match, shape(numb_of_bbox, 1)
+    """
+    b1 = np.divide(anchors[:, 0], anchors[:, 1])
+    b2 = np.divide(bbox[:, 0], bbox[:, 1])
+    similarities = np.abs(b1.reshape(-1, 1) - b2)
+    best_match = np.argmin(similarities, axis=0)
+    return best_match
+
+
+##
+# @brief loss class for yolo
+class YoloV2_LOSS(nn.Module):
+    """Yolo v2 loss"""
+
+    def __init__(self, num_classes, img_shape=(416, 416), outsize=(13, 13)):
+        super().__init__()
+        self.num_classes = num_classes
+        self.img_shape = img_shape
+        self.outsize = outsize
+        self.hook = {}
+
+        self.anchors = torch.FloatTensor(
+            [
+                (1.3221, 1.73145),
+                (3.19275, 4.00944),
+                (5.05587, 8.09892),
+                (9.47112, 4.84053),
+                (11.2364, 10.0071),
+            ]
+        )
+
+        self.mse = nn.MSELoss()
+        self.bbox_loss, self.iou_loss, self.cls_loss = None, None, None
+
+    ##
+    # @brief function to track gradients of non-leaf varibles.
+    def hook_variable(self, name, var):
+        """Do not use this function when training. It is for debugging."""
+        self.hook[name] = var
+        self.hook[name].requires_grad_().retain_grad()
+
+    ##
+    # @brief function to print gradients of non-leaf varibles.
+    def print_hook_variables(self):
+        """Do not use this function when training. It is for debugging."""
+        for k, var in self.hook.items():
+            print(f"gradients of variable {k}:")
+            batch, channel, height, width = var.grad.shape
+            for b in range(batch):
+                for c in range(channel):
+                    for h in range(height):
+                        for w in range(width):
+                            if torch.abs(var.grad[b, c, h, w]).item() >= 1e-3:
+                                print(
+                                    f"(b: {b}, c: {c}, h: {h}, w: {w}) =\
+                                          {var.grad[b, c, h, w]}"
+                                )
+            print("=" * 20)
+
+    def forward(self, bbox_pred, iou_pred, prob_pred, bbox_gt, cls_gt):
+        """
+        @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
+        @param iou_pred shape(batch_size, cell_h x cell_w, 1)
+        @param prob_pred shape(batch_size, cell_h x cell_w, num_anchors, num_classes)
+        @param bbox_gt shape(batch_size, num_bbox, 4), data range(0~1)
+        @param cls_gt shape(batch_size, num_bbox, 1)
+        @return loss shape(1,)
+        """
+        self.hook_variable("bbox_pred", bbox_pred)
+        bbox_pred = self.apply_anchors_to_bbox(bbox_pred)
+
+        bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask = (
+            self._build_target(bbox_pred, bbox_gt, cls_gt)
+        )
+
+        self.bbox_loss = self.mse(bbox_pred * bbox_mask, bbox_built * bbox_mask)
+        self.iou_loss = self.mse(iou_pred * iou_mask, iou_built * iou_mask)
+        self.cls_loss = self.mse(prob_pred * cls_mask, cls_built * cls_mask)
+
+        return self.bbox_loss * 5 + self.iou_loss + self.cls_loss
+
+    def apply_anchors_to_bbox(self, bbox_pred):
+        """
+        @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
+        @return bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
+        """
+        anchor_w = self.anchors[:, 0].contiguous().view(-1, 1)
+        anchor_h = self.anchors[:, 1].contiguous().view(-1, 1)
+        bbox_pred_tmp = bbox_pred.clone()
+        bbox_pred_tmp[:, :, :, 2:3] = torch.sqrt(bbox_pred[:, :, :, 2:3] * anchor_w)
+        bbox_pred_tmp[:, :, :, 3:4] = torch.sqrt(bbox_pred[:, :, :, 3:4] * anchor_h)
+        return bbox_pred_tmp
+
+    def _build_target(self, bbox_pred, bbox_gt, cls_gt):
+        """
+        @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
+        @param bbox_gt shape(batch_size, num_bbox, 4)
+        @param cls_gt shape(batch_size, num_bbox, 1)
+        @return tuple of (bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask)
+        """
+        bbox_built, bbox_mask = [], []
+        iou_built, iou_mask = [], []
+        cls_built, cls_mask = [], []
+
+        batch_size = bbox_pred.shape[0]
+
+        for i in range(batch_size):
+            _bbox_built, _iou_built, _cls_built, _bbox_mask, _iou_mask, _cls_mask = (
+                self._make_target_per_sample(
+                    torch.FloatTensor(bbox_pred[i]),
+                    torch.FloatTensor(np.array(bbox_gt[i])),
+                    torch.LongTensor(cls_gt[i]),
+                )
+            )
+
+            bbox_built.append(_bbox_built)
+            bbox_mask.append(_bbox_mask)
+            iou_built.append(_iou_built)
+            iou_mask.append(_iou_mask)
+            cls_built.append(_cls_built)
+            cls_mask.append(_cls_mask)
+
+        bbox_built = torch.stack(bbox_built)
+        bbox_mask = torch.stack(bbox_mask)
+        iou_built = torch.stack(iou_built)
+        iou_mask = torch.stack(iou_mask)
+        cls_built = torch.stack(cls_built)
+        cls_mask = torch.stack(cls_mask)
+
+        return bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask
+
+    def _make_target_per_sample(self, _bbox_pred, _bbox_gt, _cls_gt):
+        """
+        @param _bbox_pred shape(cell_h x cell_w, num_anchors, 4)
+        @param _bbox_gt shape(num_bbox, 4)
+        @param _cls_gt shape(num_bbox,)
+        @return tuple of (_bbox_built, _iou_built, _cls_built, _bbox_mask, _iou_mask, _cls_mask)
+        """
+        hw, num_anchors, _ = _bbox_pred.shape
+
+        # set result template
+        _bbox_built = torch.zeros((hw, num_anchors, 4))
+        _bbox_mask = torch.zeros((hw, num_anchors, 1))
+
+        _iou_built = torch.zeros((hw, num_anchors, 1))
+        _iou_mask = torch.ones((hw, num_anchors, 1)) * 0.5
+
+        _cls_built = torch.zeros((hw, num_anchors, self.num_classes))
+        _cls_mask = torch.zeros((hw, num_anchors, 1))
+
+        # find best anchors
+        _bbox_gt_wh = _bbox_gt.clone()[:, 2:]
+        best_anchors = find_best_ratio(self.anchors, _bbox_gt_wh)
+
+        # normalize x, y pos based on cell coornindates
+        cx = _bbox_gt[:, 0] * self.outsize[0]
+        cy = _bbox_gt[:, 1] * self.outsize[1]
+        # calculate cell pos and normalize x, y
+        cell_idx = np.floor(cy) * self.outsize[0] + np.floor(cx)
+        cell_idx = np.array(cell_idx, dtype=np.int16)
+        cx -= np.floor(cx)
+        cy -= np.floor(cy)
+
+        # set bbox of gt
+        _bbox_built[cell_idx, best_anchors, 0] = cx
+        _bbox_built[cell_idx, best_anchors, 1] = cy
+        _bbox_built[cell_idx, best_anchors, 2] = torch.sqrt(_bbox_gt[:, 2])
+        _bbox_built[cell_idx, best_anchors, 3] = torch.sqrt(_bbox_gt[:, 3])
+        _bbox_mask[cell_idx, best_anchors, :] = 1
+
+        # set cls of gt
+        _cls_built[cell_idx, best_anchors, _cls_gt] = 1
+        _cls_mask[cell_idx, best_anchors, :] = 1
+
+        # set confidence score of gt
+        _iou_built = calculate_iou(
+            _bbox_pred.reshape(-1, 4), _bbox_built.view(-1, 4)
+        ).detach()
+        _iou_built = _iou_built.view(hw, num_anchors, 1)
+        _iou_mask[cell_idx, best_anchors, :] = 1
+
+        return _bbox_built, _iou_built, _cls_built, _bbox_mask, _iou_mask, _cls_mask
diff --git a/Applications/YOLOv2/jni/Android.mk b/Applications/YOLOv2/jni/Android.mk
new file mode 100644 (file)
index 0000000..9f0dfb7
--- /dev/null
@@ -0,0 +1,58 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+# ndk path
+ifndef ANDROID_NDK
+$(error ANDROID_NDK is not defined!)
+endif
+
+ifndef NNTRAINER_ROOT
+NNTRAINER_ROOT := $(LOCAL_PATH)/../../..
+endif
+
+ML_API_COMMON_INCLUDES := ${NNTRAINER_ROOT}/ml_api_common/include
+NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
+       $(NNTRAINER_ROOT)/nntrainer/dataset \
+       $(NNTRAINER_ROOT)/nntrainer/models \
+       $(NNTRAINER_ROOT)/nntrainer/layers \
+       $(NNTRAINER_ROOT)/nntrainer/compiler \
+       $(NNTRAINER_ROOT)/nntrainer/graph \
+       $(NNTRAINER_ROOT)/nntrainer/optimizers \
+       $(NNTRAINER_ROOT)/nntrainer/tensor \
+       $(NNTRAINER_ROOT)/nntrainer/utils \
+       $(NNTRAINER_ROOT)/api \
+       $(NNTRAINER_ROOT)/api/ccapi/include \
+       ${ML_API_COMMON_INCLUDES}
+
+LOCAL_MODULE := nntrainer
+LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/libs/$(TARGET_ARCH_ABI)/libnntrainer.so
+
+include $(PREBUILT_SHARED_LIBRARY)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := ccapi-nntrainer
+LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/libs/$(TARGET_ARCH_ABI)/libccapi-nntrainer.so
+
+include $(PREBUILT_SHARED_LIBRARY)
+
+include $(CLEAR_VARS)
+
+LOCAL_ARM_NEON := true
+LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
+LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/
+LOCAL_CXXFLAGS += -std=c++17 -frtti
+LOCAL_CFLAGS += -pthread -fexceptions -fopenmp
+LOCAL_LDFLAGS += -fexceptions
+LOCAL_MODULE_TAGS := optional
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE := nntrainer_yolo
+LOCAL_LDLIBS := -llog -landroid -fopenmp
+
+LOCAL_SRC_FILES := main.cpp det_dataloader.cpp yolo_v2_loss.cpp reorg_layer.cpp
+LOCAL_SHARED_LIBRARIES := nntrainer ccapi-nntrainer
+
+LOCAL_C_INCLUDES += $(NNTRAINER_INCLUDES)
+
+include $(BUILD_EXECUTABLE)
diff --git a/Applications/YOLOv2/jni/Application.mk b/Applications/YOLOv2/jni/Application.mk
new file mode 100644 (file)
index 0000000..659caaf
--- /dev/null
@@ -0,0 +1,3 @@
+APP_ABI := arm64-v8a
+APP_STL := c++_shared
+APP_PLATFORM := android-29
diff --git a/Applications/YOLOv2/jni/det_dataloader.cpp b/Applications/YOLOv2/jni/det_dataloader.cpp
new file mode 100644 (file)
index 0000000..b48d0da
--- /dev/null
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   det_dataloader.h
+ * @date   22 March 2023
+ * @brief  dataloader for object detection dataset
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug    No known bugs except for NYI items
+ */
+
+#include "det_dataloader.h"
+
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <nntrainer_error.h>
+#include <random>
+
+namespace nntrainer::util {
+
+// It supports bmp image file only now.
+DirDataLoader::DirDataLoader(const char *directory_, unsigned int max_num_label,
+                             unsigned int c, unsigned int w, unsigned int h,
+                             bool is_train_) :
+  max_num_label(max_num_label),
+  channel(c),
+  height(h),
+  width(w),
+  is_train(is_train_) {
+  dir_path.assign(directory_);
+
+  // set data list
+  std::filesystem::directory_iterator itr(dir_path + "images");
+  while (itr != std::filesystem::end(itr)) {
+    // get image file name
+    std::string img_file = itr->path().string();
+
+    // check if it is bmp image file
+    if (img_file.find(".bmp") == std::string::npos) {
+      itr++;
+      continue;
+    }
+
+    // set label file name
+    std::string label_file = img_file;
+    label_file.replace(label_file.find(".bmp"), 4, ".txt");
+    label_file.replace(label_file.find("/images"), 7, "/annotations");
+
+    // check if there is paired label file
+    if (!std::filesystem::exists(label_file)) {
+      itr++;
+      continue;
+    }
+
+    // set data list
+    data_list.push_back(make_pair(img_file, label_file));
+    itr++;
+  }
+
+  // set index and shuffle data
+  idxes = std::vector<unsigned int>(data_list.size());
+  std::iota(idxes.begin(), idxes.end(), 0);
+  if (is_train)
+    std::shuffle(idxes.begin(), idxes.end(), rng);
+
+  data_size = data_list.size();
+  count = 0;
+}
+
+void read_image(const std::string path, float *input, uint &width,
+                uint &height) {
+  FILE *f = fopen(path.c_str(), "rb");
+
+  if (f == nullptr)
+    throw std::invalid_argument("Cannot open file: " + path);
+
+  unsigned char info[54];
+  size_t s = fread(info, sizeof(unsigned char), 54, f);
+
+  unsigned int w = *(int *)&info[18];
+  unsigned int h = *(int *)&info[22];
+
+  if (w != width or h != height) {
+    fclose(f);
+    throw std::invalid_argument("the dimension of image file does not match" +
+                                std::to_string(s));
+  }
+
+  int row_padded = (width * 3 + 3) & (~3);
+  unsigned char *data = new unsigned char[row_padded];
+
+  for (uint i = 0; i < height; i++) {
+    s = fread(data, sizeof(unsigned char), row_padded, f);
+    for (uint j = 0; j < width; j++) {
+      input[height * (height - i - 1) + j] = (float)data[j * 3 + 2] / 255;
+      input[(height * width) + height * (height - i - 1) + j] =
+        (float)data[j * 3 + 1] / 255;
+      input[(height * width) * 2 + height * (height - i - 1) + j] =
+        (float)data[j * 3] / 255;
+    }
+  }
+
+  delete[] data;
+  fclose(f);
+}
+
+void DirDataLoader::next(float **input, float **label, bool *last) {
+  auto fill_one_sample = [this](float *input_, float *label_, int index) {
+    // set input data
+    std::string img_file = data_list[index].first;
+    read_image(img_file, input_, width, height);
+
+    // set label data
+    std::string label_file = data_list[index].second;
+    std::memset(label_, 0.0, 5 * sizeof(float) * max_num_label);
+
+    std::ifstream file(label_file);
+    std::string cur_line;
+
+    int line_idx = 0;
+    while (getline(file, cur_line)) {
+      std::stringstream ss(cur_line);
+      std::string cur_value;
+
+      int row_idx = 0;
+      while (getline(ss, cur_value, ' ')) {
+        if (row_idx == 0) {
+          label_[line_idx * 5 + 4] = std::stof(cur_value);
+        } else {
+          label_[line_idx * 5 + row_idx - 1] = std::stof(cur_value) / 416;
+        }
+        row_idx++;
+      }
+
+      line_idx++;
+    }
+
+    file.close();
+  };
+
+  fill_one_sample(*input, *label, idxes[count]);
+
+  count++;
+
+  if (count < data_size) {
+    *last = false;
+  } else {
+    *last = true;
+    count = 0;
+    std::shuffle(idxes.begin(), idxes.end(), rng);
+  }
+}
+
+} // namespace nntrainer::util
diff --git a/Applications/YOLOv2/jni/det_dataloader.h b/Applications/YOLOv2/jni/det_dataloader.h
new file mode 100644 (file)
index 0000000..468148d
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   det_dataloader.h
+ * @date   22 March 2023
+ * @brief  dataloader for object detection dataset
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug    No known bugs except for NYI items
+ */
+
+#include <random>
+#include <string>
+#include <tensor_dim.h>
+#include <vector>
+
+namespace nntrainer::util {
+
+using TensorDim = ml::train::TensorDim;
+
+/**
+ * @brief user data object
+ */
+class DirDataLoader {
+public:
+  /**
+   * @brief Construct a new Dir Data Loader object
+   */
+  DirDataLoader(const char *directory_, unsigned int max_num_label,
+                unsigned int c, unsigned int w, unsigned int h, bool is_train_);
+  /**
+   * @brief Destroy the Dir Data Loader object
+   */
+  ~DirDataLoader(){};
+
+  /**
+   * @copydoc void DataLoader::next(float **input, float**label, bool *last)
+   */
+  void next(float **input, float **label, bool *last);
+
+  /**
+   * @brief getter for current file name
+   * @return current file name
+   */
+  std::string getCurFileName() { return cur_file_name; };
+
+  /**
+   * @brief setter for current file name
+   */
+  void setCurFileName(std::string s) { cur_file_name = s; };
+
+private:
+  std::string dir_path;
+  unsigned int data_size;
+  unsigned int max_num_label;
+  unsigned int channel;
+  unsigned int height;
+  unsigned int width;
+  bool is_train;
+
+  std::vector<std::pair<std::string, std::string>> data_list;
+  std::vector<unsigned int> idxes;
+  unsigned int count;
+  std::string cur_file_name;
+
+  // random number generator
+  std::mt19937 rng;
+};
+
+} // namespace nntrainer::util
diff --git a/Applications/YOLOv2/jni/main.cpp b/Applications/YOLOv2/jni/main.cpp
new file mode 100644 (file)
index 0000000..bc3985a
--- /dev/null
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   main.cpp
+ * @date   03 March 2023
+ * @todo   replace backbone to original darknet of yolo v2
+ * @brief  application example for YOLO v2
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @bug    No known bugs except for NYI items
+ */
+
+#include <array>
+#include <chrono>
+#include <ctime>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include <app_context.h>
+#include <det_dataloader.h>
+#include <layer.h>
+#include <model.h>
+#include <optimizer.h>
+
+#include "yolo_v2_loss.h"
+
+#include <reorg_layer.h>
+
+using LayerHandle = std::shared_ptr<ml::train::Layer>;
+using ModelHandle = std::unique_ptr<ml::train::Model>;
+using UserDataType = std::unique_ptr<nntrainer::util::DirDataLoader>;
+
+const unsigned int ANCHOR_NUMBER = 5;
+
+const unsigned int MAX_OBJECT_NUMBER = 4;
+const unsigned int CLASS_NUMBER = 4;
+const unsigned int GRID_HEIGHT_NUMBER = 13;
+const unsigned int GRID_WIDTH_NUMBER = 13;
+const unsigned int IMAGE_HEIGHT_SIZE = 416;
+const unsigned int IMAGE_WIDTH_SIZE = 416;
+const unsigned int BATCH_SIZE = 4;
+const unsigned int EPOCHS = 3;
+const char *TRAIN_DIR_PATH = "/TRAIN_DIR/";
+const char *VALIDATION_DIR_PATH = "/VALID_DIR/";
+// const std::string MODEL_INIT_BIN_PATH = "/home/user/MODEL_INIT_BIN_PATH.bin";
+
+int trainData_cb(float **input, float **label, bool *last, void *user_data) {
+  auto data = reinterpret_cast<nntrainer::util::DirDataLoader *>(user_data);
+
+  data->next(input, label, last);
+  return 0;
+}
+
+int validData_cb(float **input, float **label, bool *last, void *user_data) {
+  auto data = reinterpret_cast<nntrainer::util::DirDataLoader *>(user_data);
+
+  data->next(input, label, last);
+  return 0;
+}
+
+std::array<UserDataType, 2> createDetDataGenerator(const char *train_dir,
+                                                   const char *valid_dir,
+                                                   int max_num_label, int c,
+                                                   int h, int w) {
+  UserDataType train_data(new nntrainer::util::DirDataLoader(
+    train_dir, max_num_label, c, h, w, true));
+  UserDataType valid_data(new nntrainer::util::DirDataLoader(
+    valid_dir, max_num_label, c, h, w, false));
+
+  return {std::move(train_data), std::move(valid_data)};
+}
+
+/**
+ * @brief make "key=value" from key and value
+ *
+ * @tparam T type of a value
+ * @param key key
+ * @param value value
+ * @return std::string with "key=value"
+ */
+template <typename T>
+static std::string withKey(const std::string &key, const T &value) {
+  std::stringstream ss;
+  ss << key << "=" << value;
+  return ss.str();
+}
+
+template <typename T>
+static std::string withKey(const std::string &key,
+                           std::initializer_list<T> value) {
+  if (std::empty(value)) {
+    throw std::invalid_argument("empty data cannot be converted");
+  }
+
+  std::stringstream ss;
+  ss << key << "=";
+
+  auto iter = value.begin();
+  for (; iter != value.end() - 1; ++iter) {
+    ss << *iter << ',';
+  }
+  ss << *iter;
+
+  return ss.str();
+}
+
+/**
+ * @brief yolo block
+ *
+ * @param block_name name of the block
+ * @param input_name name of the input
+ * @param filters number of filters
+ * @param kernel_size number of kernel_size
+ * @param downsample downsample to make output size 0
+ * @return std::vector<LayerHandle> vectors of layers
+ */
+std::vector<LayerHandle> yoloBlock(const std::string &block_name,
+                                   const std::string &input_name, int filters,
+                                   int kernel_size, bool downsample) {
+  using ml::train::createLayer;
+
+  auto scoped_name = [&block_name](const std::string &layer_name) {
+    return block_name + "/" + layer_name;
+  };
+  auto with_name = [&scoped_name](const std::string &layer_name) {
+    return withKey("name", scoped_name(layer_name));
+  };
+
+  auto createConv = [&with_name, filters](const std::string &name,
+                                          int kernel_size, int stride,
+                                          const std::string &padding,
+                                          const std::string &input_layer) {
+    std::vector<std::string> props{
+      with_name(name),
+      withKey("stride", {stride, stride}),
+      withKey("filters", filters),
+      withKey("kernel_size", {kernel_size, kernel_size}),
+      withKey("padding", padding),
+      withKey("input_layers", input_layer)};
+
+    return createLayer("conv2d", props);
+  };
+
+  /** construct basic layer **/
+  LayerHandle a1 = createConv("a1", kernel_size, 1, "same", input_name);
+
+  if (downsample) {
+    LayerHandle a2 = createLayer("batch_normalization",
+                                 {with_name("a2"), withKey("momentum", "0.9"),
+                                  withKey("activation", "leaky_relu")});
+
+    LayerHandle a3 = createLayer(
+      "pooling2d", {withKey("name", block_name), withKey("stride", {2, 2}),
+                    withKey("pooling", "max"), withKey("pool_size", {2, 2})});
+
+    return {a1, a2, a3};
+  } else {
+    LayerHandle a2 =
+      createLayer("batch_normalization",
+                  {withKey("name", block_name), withKey("momentum", "0.9"),
+                   withKey("activation", "leaky_relu")});
+
+    return {a1, a2};
+  }
+}
+
+/**
+ * @brief Create yolo v2 light
+ *
+ * @return vector of layers that contain full graph of yolo v2 light
+ */
+ModelHandle YOLO() {
+  using ml::train::createLayer;
+
+  ModelHandle model = ml::train::createModel(ml::train::ModelType::NEURAL_NET);
+
+  std::vector<LayerHandle> layers;
+
+  layers.push_back(createLayer(
+    "input",
+    {withKey("name", "input0"),
+     withKey("input_shape", "3:" + std::to_string(IMAGE_HEIGHT_SIZE) + ":" +
+                              std::to_string(IMAGE_WIDTH_SIZE))}));
+
+  std::vector<std::vector<LayerHandle>> blocks;
+
+  blocks.push_back(yoloBlock("conv1", "input0", 32, 3, true));
+  blocks.push_back(yoloBlock("conv2", "conv1", 64, 3, true));
+  blocks.push_back(yoloBlock("conv3", "conv2", 128, 3, false));
+  blocks.push_back(yoloBlock("conv4", "conv3", 64, 1, false));
+  blocks.push_back(yoloBlock("conv5", "conv4", 128, 3, true));
+  blocks.push_back(yoloBlock("conv6", "conv5", 256, 3, false));
+  blocks.push_back(yoloBlock("conv7", "conv6", 128, 1, false));
+  blocks.push_back(yoloBlock("conv8", "conv7", 256, 3, true));
+  blocks.push_back(yoloBlock("conv9", "conv8", 512, 3, false));
+  blocks.push_back(yoloBlock("conv10", "conv9", 256, 1, false));
+  blocks.push_back(yoloBlock("conv11", "conv10", 512, 3, false));
+  blocks.push_back(yoloBlock("conv12", "conv11", 256, 1, false));
+  blocks.push_back(yoloBlock("conv13", "conv12", 512, 3, false));
+
+  blocks.push_back({createLayer(
+    "pooling2d", {withKey("name", "conv_a_pool"), withKey("stride", {2, 2}),
+                  withKey("pooling", "max"), withKey("pool_size", {2, 2}),
+                  withKey("input_layers", "conv13")})});
+  blocks.push_back(yoloBlock("conv_a1", "conv_a_pool", 1024, 3, false));
+  blocks.push_back(yoloBlock("conv_a2", "conv_a1", 512, 1, false));
+  blocks.push_back(yoloBlock("conv_a3", "conv_a2", 1024, 3, false));
+  blocks.push_back(yoloBlock("conv_a4", "conv_a3", 512, 1, false));
+  blocks.push_back(yoloBlock("conv_a5", "conv_a4", 1024, 3, false));
+  blocks.push_back(yoloBlock("conv_a6", "conv_a5", 1024, 3, false));
+  blocks.push_back(yoloBlock("conv_a7", "conv_a6", 1024, 3, false));
+
+  blocks.push_back(yoloBlock("conv_b", "conv13", 64, 1, false));
+
+  blocks.push_back(
+    {createLayer("reorg_layer", {withKey("name", "re_organization"),
+                                 withKey("input_layers", "conv_b")})});
+
+  blocks.push_back(
+    {createLayer("concat", {withKey("name", "concat"),
+                            withKey("input_layers", "conv_a7, re_organization"),
+                            withKey("axis", 1)})});
+
+  blocks.push_back(yoloBlock("conv_out1", "concat", 1024, 3, false));
+
+  blocks.push_back(
+    {createLayer("conv2d", {
+                             withKey("name", "conv_out2"),
+                             withKey("filters", 5 * (5 + CLASS_NUMBER)),
+                             withKey("kernel_size", {1, 1}),
+                             withKey("stride", {1, 1}),
+                             withKey("padding", "same"),
+                             withKey("input_layers", "conv_out1"),
+                           })});
+
+  for (auto &block : blocks) {
+    layers.insert(layers.end(), block.begin(), block.end());
+  }
+
+  layers.push_back(createLayer("permute", {
+                                            withKey("name", "permute"),
+                                            withKey("direction", {2, 3, 1}),
+                                          }));
+
+  layers.push_back(createLayer(
+    "reshape",
+    {
+      withKey("name", "reshape"),
+      withKey("target_shape",
+              std::to_string(GRID_HEIGHT_NUMBER * GRID_WIDTH_NUMBER) + ":" +
+                std::to_string(ANCHOR_NUMBER) + ":" +
+                std::to_string(5 + CLASS_NUMBER)),
+    }));
+
+  layers.push_back(createLayer(
+    "yolo_v2_loss", {
+                      withKey("name", "yolo_v2_loss"),
+                      withKey("max_object_number", MAX_OBJECT_NUMBER),
+                      withKey("class_number", CLASS_NUMBER),
+                      withKey("grid_height_number", GRID_HEIGHT_NUMBER),
+                      withKey("grid_width_number", GRID_WIDTH_NUMBER),
+                    }));
+
+  for (auto &layer : layers) {
+    model->addLayer(layer);
+  }
+
+  return model;
+}
+
+int main(int argc, char *argv[]) {
+  // print start time
+  auto start = std::chrono::system_clock::now();
+  std::time_t start_time = std::chrono::system_clock::to_time_t(start);
+  std::cout << "started computation at " << std::ctime(&start_time)
+            << std::endl;
+
+  // set training config and print it
+  std::cout << "batch_size: " << BATCH_SIZE << " epochs: " << EPOCHS
+            << std::endl;
+
+  try {
+    // create YOLO v2 model
+    ModelHandle model = YOLO();
+    model->setProperty({withKey("batch_size", BATCH_SIZE),
+                        withKey("epochs", EPOCHS),
+                        withKey("save_path", "yolov2.bin")});
+
+    // create optimizer
+    auto optimizer = ml::train::createOptimizer(
+      "adam", {"learning_rate=0.001", "epsilon=1e-8", "torch_ref=true"});
+    model->setOptimizer(std::move(optimizer));
+
+    // compile and initialize model
+    model->compile();
+    model->initialize();
+    model->save("./yolov2.ini", ml::train::ModelFormat::MODEL_FORMAT_INI);
+    // model->load(MODEL_INIT_BIN_PATH);
+
+    // create train and validation data
+    std::array<UserDataType, 2> user_datas;
+    user_datas = createDetDataGenerator(TRAIN_DIR_PATH, VALIDATION_DIR_PATH,
+                                        MAX_OBJECT_NUMBER, 3, IMAGE_HEIGHT_SIZE,
+                                        IMAGE_WIDTH_SIZE);
+    auto &[train_user_data, valid_user_data] = user_datas;
+
+    auto dataset_train = ml::train::createDataset(
+      ml::train::DatasetType::GENERATOR, trainData_cb, train_user_data.get());
+    auto dataset_valid = ml::train::createDataset(
+      ml::train::DatasetType::GENERATOR, validData_cb, valid_user_data.get());
+
+    model->setDataset(ml::train::DatasetModeType::MODE_TRAIN,
+                      std::move(dataset_train));
+    model->setDataset(ml::train::DatasetModeType::MODE_VALID,
+                      std::move(dataset_valid));
+
+    model->train();
+  } catch (const std::exception &e) {
+    std::cerr << "uncaught error while running! details: " << e.what()
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // print end time and duration
+  auto end = std::chrono::system_clock::now();
+  std::chrono::duration<double> elapsed_seconds = end - start;
+  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
+  std::cout << "finished computation at " << std::ctime(&end_time)
+            << "elapsed time: " << elapsed_seconds.count() << "s\n";
+}
diff --git a/Applications/YOLOv2/jni/meson.build b/Applications/YOLOv2/jni/meson.build
new file mode 100644 (file)
index 0000000..310b08d
--- /dev/null
@@ -0,0 +1,51 @@
+# build command for lib_yolov2_loss_layer.so
+yolov2_loss_src = files('yolo_v2_loss.cpp')
+yolov2_loss_layer = shared_library('yolov2_loss_layer',
+  yolov2_loss_src,
+  dependencies: [nntrainer_dep, nntrainer_ccapi_dep],
+  include_directories: include_directories('./'),
+  install: true,
+  install_dir: nntrainer_libdir/'nntrainer'/'layers',
+  cpp_args: '-DPLUGGABLE'
+)
+yolov2_loss_layer_dep = declare_dependency(
+  link_with: yolov2_loss_layer,
+  include_directories: include_directories('./')
+)
+
+# build command for lib_reorg_layer.so
+layer_reorg_src = files('reorg_layer.cpp')
+reorg_layer = shared_library('reorg_layer',
+  layer_reorg_src,
+  dependencies: [nntrainer_dep, nntrainer_ccapi_dep],
+  include_directories: include_directories('./'),
+  install: true,
+  install_dir: nntrainer_libdir/'nntrainer'/'layers',
+  cpp_args: '-DPLUGGABLE'
+)
+reorg_layer_dep = declare_dependency(
+  link_with: reorg_layer,
+  include_directories: include_directories('./')
+)
+
+yolo_sources = [
+  'main.cpp',
+  'det_dataloader.cpp',
+  'yolo_v2_loss.cpp',
+  'reorg_layer.cpp',
+]
+
+yolo_dependencies = [app_utils_dep,
+  nntrainer_dep,
+  nntrainer_ccapi_dep,
+  yolov2_loss_layer_dep,
+  reorg_layer_dep
+]
+
+e = executable('nntrainer_yolov2',
+  yolo_sources,
+  include_directories: [include_directories('.')],
+  dependencies: yolo_dependencies,
+  install: get_option('install-app'),
+  install_dir: application_install_dir
+)
diff --git a/Applications/YOLOv2/jni/reorg_layer.cpp b/Applications/YOLOv2/jni/reorg_layer.cpp
new file mode 100644 (file)
index 0000000..e05be1e
--- /dev/null
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   reorganization.cpp
+ * @date   06 April 2023
+ * @todo support in-place operation. we can get channel, height, width
+ * coordinate from index of buffer memory. then we can use reorganizePos and
+ * restorePos func
+ * @brief  This file contains the mean absolute error loss as a sample layer
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @bug    No known bugs except for NYI items
+ */
+
+#include <iostream>
+
+#include "reorg_layer.h"
+
+namespace custom {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+namespace ReorgOp {
+
+/**
+ * @brief re-organize tensor
+ * @return output coordinate of reorganized tensor
+ */
+int reorg(int b, int c, int h, int w, int batch, int channel, int height,
+          int width) {
+  int out_c = channel / 4;
+  int c2 = c % out_c;
+  int offset = c / out_c;
+  int w2 = w * 2 + offset % 2;
+  int h2 = h * 2 + offset / 2;
+  int out_index = w2 + width * 2 * (h2 + height * 2 * (c2 + out_c * b));
+  return out_index;
+}
+} // namespace ReorgOp
+
+void ReorgLayer::finalize(nntrainer::InitLayerContext &context) {
+  std::vector<nntrainer::TensorDim> dim = context.getInputDimensions();
+
+  for (unsigned int i = 0; i < dim.size(); ++i) {
+    if (dim[i].getDataLen() == 0) {
+      throw std::invalid_argument("Input dimension is not set");
+    } else {
+      dim[i].channel(dim[i].channel() * 4);
+      dim[i].height(dim[i].height() / 2);
+      dim[i].width(dim[i].width() / 2);
+    }
+  }
+
+  context.setOutputDimensions(dim);
+}
+
+void ReorgLayer::forwarding(nntrainer::RunLayerContext &context,
+                            bool training) {
+  nntrainer::Tensor &in = context.getInput(SINGLE_INOUT_IDX);
+  nntrainer::Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
+
+  for (int b = 0; b < (int)in.batch(); b++) {
+    for (int c = 0; c < (int)in.channel(); c++) {
+      for (int h = 0; h < (int)in.height(); h++) {
+        for (int w = 0; w < (int)in.width(); w++) {
+          int out_idx =
+            w + in.width() * (h + in.height() * (c + in.channel() * b));
+          int in_idx = ReorgOp::reorg(b, c, h, w, in.batch(), in.channel(),
+                                      in.height(), in.width());
+          out.getData()[out_idx] = in.getValue(in_idx);
+        }
+      }
+    }
+  }
+}
+
+void ReorgLayer::calcDerivative(nntrainer::RunLayerContext &context) {
+  const nntrainer::Tensor &derivative_ =
+    context.getIncomingDerivative(SINGLE_INOUT_IDX);
+
+  nntrainer::Tensor &dx = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+
+  for (int b = 0; b < (int)derivative_.batch(); b++) {
+    for (int c = 0; c < (int)derivative_.channel(); c++) {
+      for (int h = 0; h < (int)derivative_.height(); h++) {
+        for (int w = 0; w < (int)derivative_.width(); w++) {
+          int in_idx =
+            w + derivative_.width() *
+                  (h + derivative_.height() * (c + derivative_.channel() * b));
+          int out_idx = ReorgOp::reorg(
+            b, c, h, w, derivative_.batch(), derivative_.channel(),
+            derivative_.height(), derivative_.width());
+          dx.getData()[out_idx] = derivative_.getValue(in_idx);
+        }
+      }
+    }
+  }
+}
+
+#ifdef PLUGGABLE
+
+nntrainer::Layer *create_reorg_layer() {
+  auto layer = new ReorgLayer();
+  std::cout << "reorg created\n";
+  return layer;
+}
+
+void destroy_reorg_layer(nntrainer::Layer *layer) {
+  std::cout << "reorg deleted\n";
+  delete layer;
+}
+
+extern "C" {
+nntrainer::LayerPluggable ml_train_layer_pluggable{create_reorg_layer,
+                                                   destroy_reorg_layer};
+}
+
+#endif
+
+} // namespace custom
diff --git a/Applications/YOLOv2/jni/reorg_layer.h b/Applications/YOLOv2/jni/reorg_layer.h
new file mode 100644 (file)
index 0000000..e13cc36
--- /dev/null
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file   reorganization.h
+ * @date   4 April 2023
+ * @brief  This file contains the mean absolute error loss as a sample layer
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#ifndef __REORGANIZATION_LAYER_H__
+#define __REORGANIZATION_LAYER_H__
+
+#include <layer_context.h>
+#include <layer_devel.h>
+#include <node_exporter.h>
+#include <utility>
+
+namespace custom {
+
+/**
+ * @brief A Re-orginazation layer for yolo v2.
+ *
+ */
+class ReorgLayer final : public nntrainer::Layer {
+public:
+  /**
+   * @brief Construct a new Reorg Layer object
+   *
+   */
+  ReorgLayer() : Layer() {}
+
+  /**
+   * @brief Destroy the Reorg Layer object
+   *
+   */
+  ~ReorgLayer() {}
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(nntrainer::InitLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+   */
+  void forwarding(nntrainer::RunLayerContext &context, bool training) override;
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(nntrainer::RunLayerContext &context) override;
+
+  /**
+   * @copydoc bool supportBackwarding() const
+   */
+  bool supportBackwarding() const override { return true; };
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
+   */
+  void exportTo(nntrainer::Exporter &exporter,
+                const ml::train::ExportMethods &method) const override{};
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return ReorgLayer::type; };
+
+  /**
+   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+   */
+  void setProperty(const std::vector<std::string> &values) override{};
+
+  inline static const std::string type = "reorg_layer";
+};
+
+} // namespace custom
+
+#endif /* __REORGANIZATION_LAYER_H__ */
diff --git a/Applications/YOLOv2/jni/yolo_v2_loss.cpp b/Applications/YOLOv2/jni/yolo_v2_loss.cpp
new file mode 100644 (file)
index 0000000..8421dd2
--- /dev/null
@@ -0,0 +1,949 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file   yolo_v2_loss.cpp
+ * @date   07 March 2023
+ * @brief  This file contains the yolo v2 loss layer
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#include "yolo_v2_loss.h"
+#include <nntrainer_log.h>
+
+namespace custom {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+enum YoloV2LossParams {
+  bbox_x_pred,
+  bbox_y_pred,
+  bbox_w_pred,
+  bbox_h_pred,
+  confidence_pred,
+  class_pred,
+  bbox_w_pred_anchor,
+  bbox_h_pred_anchor,
+  bbox_x_gt,
+  bbox_y_gt,
+  bbox_w_gt,
+  bbox_h_gt,
+  confidence_gt,
+  class_gt,
+  bbox_class_mask,
+  iou_mask,
+  bbox1_width,
+  bbox1_height,
+  is_xy_min_max,
+  intersection_width,
+  intersection_height,
+  unions,
+};
+
+namespace props {
+MaxObjectNumber::MaxObjectNumber(const unsigned &value) { set(value); }
+ClassNumber::ClassNumber(const unsigned &value) { set(value); }
+GridHeightNumber::GridHeightNumber(const unsigned &value) { set(value); }
+GridWidthNumber::GridWidthNumber(const unsigned &value) { set(value); }
+} // namespace props
+
+/**
+ * @brief mse
+ *
+ * @param pred prediction
+ * @param ground_truth ground truth
+ * @return float loss
+ * @todo make loss behaves like acti_func
+ */
+float mse(nntrainer::Tensor &pred, nntrainer::Tensor &ground_truth) {
+  nntrainer::Tensor residual;
+  pred.subtract(ground_truth, residual);
+
+  float l2norm = residual.l2norm();
+  l2norm *= l2norm / residual.size();
+
+  return l2norm;
+}
+
+/**
+ * @brief backwarding of mse
+ *
+ * @param pred prediction
+ * @param ground_truth ground truth
+ * @param outgoing_derivative outgoing derivative
+ */
+void msePrime(nntrainer::Tensor &pred, nntrainer::Tensor &ground_truth,
+              nntrainer::Tensor &outgoing_derivative) {
+  pred.subtract(ground_truth, outgoing_derivative);
+  float divider = ((float)pred.size()) / 2;
+  if (outgoing_derivative.divide_i(divider) != ML_ERROR_NONE) {
+    throw std::runtime_error(
+      "[YoloV2LossLayer::calcDerivative] Error when calculating loss");
+  }
+}
+
+/**
+ * @brief calculate iou
+ *
+ * @param bbox1_x1 bbox1_x1
+ * @param bbox1_y1 bbox1_y1
+ * @param bbox1_w bbox1_w
+ * @param bbox1_h bbox1_h
+ * @param bbox2_x1 bbox2_x1
+ * @param bbox2_y1 bbox2_y1
+ * @param bbox2_w bbox2_w
+ * @param bbox2_h bbox2_h
+ * @param[out] bbox1_width bbox1 width
+ * @param[out] bbox1_height bbox1 height
+ * @param[out] is_xy_min_max For x1, y1 this value is 1 if x1 > x2, y1 > y2 and
+ * for x2, y2 this is value is 1 if x2 < x1, y2 < y1. else 0.
+ * @param[out] intersection_width intersection width
+ * @param[out] intersection_height intersection height
+ * @param[out] unions unions
+ * @return nntrainer::Tensor iou
+ */
+nntrainer::Tensor
+calc_iou(nntrainer::Tensor &bbox1_x1, nntrainer::Tensor &bbox1_y1,
+         nntrainer::Tensor &bbox1_w, nntrainer::Tensor &bbox1_h,
+         nntrainer::Tensor &bbox2_x1, nntrainer::Tensor &bbox2_y1,
+         nntrainer::Tensor &bbox2_w, nntrainer::Tensor &bbox2_h,
+         nntrainer::Tensor &bbox1_width, nntrainer::Tensor &bbox1_height,
+         nntrainer::Tensor &is_xy_min_max,
+         nntrainer::Tensor &intersection_width,
+         nntrainer::Tensor &intersection_height, nntrainer::Tensor &unions) {
+  nntrainer::Tensor bbox1_x2 = bbox1_x1.add(bbox1_w);
+  nntrainer::Tensor bbox1_y2 = bbox1_y1.add(bbox1_h);
+  nntrainer::Tensor bbox2_x2 = bbox2_x1.add(bbox2_w);
+  nntrainer::Tensor bbox2_y2 = bbox2_y1.add(bbox2_h);
+
+  bbox1_x2.subtract(bbox1_x1, bbox1_width);
+  bbox1_y2.subtract(bbox1_y1, bbox1_height);
+  nntrainer::Tensor bbox1 = bbox1_width.multiply(bbox1_height);
+
+  nntrainer::Tensor bbox2_width = bbox2_x2.subtract(bbox2_x1);
+  nntrainer::Tensor bbox2_height = bbox2_y2.subtract(bbox2_y1);
+  nntrainer::Tensor bbox2 = bbox2_width.multiply(bbox2_height);
+
+  auto min_func = [&](nntrainer::Tensor &bbox1_xy, nntrainer::Tensor &bbox2_xy,
+                      nntrainer::Tensor &intersection_xy) {
+    std::transform(bbox1_xy.getData(), bbox1_xy.getData() + bbox1_xy.size(),
+                   bbox2_xy.getData(), intersection_xy.getData(),
+                   [](float x1, float x2) { return std::min(x1, x2); });
+  };
+  auto max_func = [&](nntrainer::Tensor &bbox1_xy, nntrainer::Tensor &bbox2_xy,
+                      nntrainer::Tensor &intersection_xy) {
+    std::transform(bbox1_xy.getData(), bbox1_xy.getData() + bbox1_xy.size(),
+                   bbox2_xy.getData(), intersection_xy.getData(),
+                   [](float x1, float x2) { return std::max(x1, x2); });
+  };
+
+  nntrainer::Tensor intersection_x1(bbox1_x1.getDim());
+  nntrainer::Tensor intersection_x2(bbox1_x1.getDim());
+  nntrainer::Tensor intersection_y1(bbox1_y1.getDim());
+  nntrainer::Tensor intersection_y2(bbox1_y1.getDim());
+  max_func(bbox1_x1, bbox2_x1, intersection_x1);
+  min_func(bbox1_x2, bbox2_x2, intersection_x2);
+  max_func(bbox1_y1, bbox2_y1, intersection_y1);
+  min_func(bbox1_y2, bbox2_y2, intersection_y2);
+
+  auto is_min_max_func = [&](nntrainer::Tensor &xy,
+                             nntrainer::Tensor &intersection,
+                             nntrainer::Tensor &is_min_max) {
+    std::transform(xy.getData(), xy.getData() + xy.size(),
+                   intersection.getData(), is_min_max.getData(),
+                   [](float x, float m) {
+                     return nntrainer::absFloat(x - m) < 1e-4 ? 1.0 : 0.0;
+                   });
+  };
+
+  nntrainer::Tensor is_bbox1_x1_max(bbox1_x1.getDim());
+  nntrainer::Tensor is_bbox1_y1_max(bbox1_x1.getDim());
+  nntrainer::Tensor is_bbox1_x2_min(bbox1_x1.getDim());
+  nntrainer::Tensor is_bbox1_y2_min(bbox1_x1.getDim());
+  is_min_max_func(bbox1_x1, intersection_x1, is_bbox1_x1_max);
+  is_min_max_func(bbox1_y1, intersection_y1, is_bbox1_y1_max);
+  is_min_max_func(bbox1_x2, intersection_x2, is_bbox1_x2_min);
+  is_min_max_func(bbox1_y2, intersection_y2, is_bbox1_y2_min);
+
+  nntrainer::Tensor is_bbox_min_max = nntrainer::Tensor::cat(
+    {is_bbox1_x1_max, is_bbox1_y1_max, is_bbox1_x2_min, is_bbox1_y2_min}, 3);
+  is_xy_min_max.copyData(is_bbox_min_max);
+
+  intersection_x2.subtract(intersection_x1, intersection_width);
+
+  auto type_intersection_width = intersection_width.getDataType();
+  if (type_intersection_width == ml::train::TensorDim::DataType::FP32) {
+    intersection_width.apply_i<float>(nntrainer::ActiFunc::relu<float>);
+  } else if (type_intersection_width == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    intersection_width.apply_i<_FP16>(nntrainer::ActiFunc::relu<_FP16>);
+#else
+    throw std::runtime_error("Not supported data type");
+#endif
+  }
+
+  intersection_y2.subtract(intersection_y1, intersection_height);
+
+  auto type_intersection_height = intersection_height.getDataType();
+  if (type_intersection_height == ml::train::TensorDim::DataType::FP32) {
+    intersection_height.apply_i<float>(nntrainer::ActiFunc::relu<float>);
+  } else if (type_intersection_height == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    intersection_height.apply_i<_FP16>(nntrainer::ActiFunc::relu<_FP16>);
+#else
+    throw std::runtime_error("Not supported data type");
+#endif
+  }
+
+  nntrainer::Tensor intersection =
+    intersection_width.multiply(intersection_height);
+  bbox1.add(bbox2, unions);
+  unions.subtract_i(intersection);
+
+  return intersection.divide(unions);
+}
+
+/**
+ * @brief calculate iou graident
+ * @details Let say bbox_pred as x, intersection as f(x), union as g(x) and iou
+ * as y. Then y = f(x)/g(x). Also g(x) = bbox1 + bbox2 - f(x). Partial
+ * derivative of y with respect to x will be (f'(x)g(x) - f(x)g'(x))/(g(x)^2).
+ * Partial derivative of g(x) with respect to x will be bbox1'(x) - f'(x).
+ * @param confidence_gt_grad incoming derivative for iou
+ * @param bbox1_width bbox1_width
+ * @param bbox1_height bbox1_height
+ * @param is_xy_min_max For x1, y1 this value is 1 if x1 > x2, y1 > y2 and for
+ * x2, y2 this is value is 1 if x2 < x1, y2 < y1. else 0.
+ * @param intersection_width intersection width
+ * @param intersection_height intersection height
+ * @param unions unions
+ * @return std::vector<nntrainer::Tensor> iou_grad
+ */
+std::vector<nntrainer::Tensor> calc_iou_grad(
+  nntrainer::Tensor &confidence_gt_grad, nntrainer::Tensor &bbox1_width,
+  nntrainer::Tensor &bbox1_height, nntrainer::Tensor &is_xy_min_max,
+  nntrainer::Tensor &intersection_width, nntrainer::Tensor &intersection_height,
+  nntrainer::Tensor &unions) {
+  nntrainer::Tensor intersection =
+    intersection_width.multiply(intersection_height);
+
+  // 1. calculate intersection local gradient [f'(x)]
+  nntrainer::Tensor intersection_width_relu_prime;
+  nntrainer::Tensor intersection_height_relu_prime;
+  auto type_intersection_width = intersection_width.getDataType();
+  if (type_intersection_width == ml::train::TensorDim::DataType::FP32) {
+    intersection_width_relu_prime =
+      intersection_width.apply<float>(nntrainer::ActiFunc::reluPrime<float>);
+  } else if (type_intersection_width == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    intersection_height_relu_prime =
+      intersection_height.apply<_FP16>(nntrainer::ActiFunc::reluPrime<_FP16>);
+#else
+    throw std::runtime_error("Not supported data type");
+#endif
+  }
+
+  nntrainer::Tensor intersection_x2_local_grad =
+    intersection_width_relu_prime.multiply(intersection_height);
+  nntrainer::Tensor intersection_y2_local_grad =
+    intersection_height_relu_prime.multiply(intersection_width);
+  nntrainer::Tensor intersection_x1_local_grad =
+    intersection_x2_local_grad.multiply(-1.0);
+  nntrainer::Tensor intersection_y1_local_grad =
+    intersection_y2_local_grad.multiply(-1.0);
+
+  nntrainer::Tensor intersection_local_grad = nntrainer::Tensor::cat(
+    {intersection_x1_local_grad, intersection_y1_local_grad,
+     intersection_x2_local_grad, intersection_y2_local_grad},
+    3);
+  intersection_local_grad.multiply_i(is_xy_min_max);
+
+  // 2. calculate union local gradient [g'(x)]
+  nntrainer::Tensor bbox1_x1_grad = bbox1_height.multiply(-1.0);
+  nntrainer::Tensor bbox1_y1_grad = bbox1_width.multiply(-1.0);
+  nntrainer::Tensor bbox1_x2_grad = bbox1_height;
+  nntrainer::Tensor bbox1_y2_grad = bbox1_width;
+  nntrainer::Tensor bbox1_grad = nntrainer::Tensor::cat(
+    {bbox1_x1_grad, bbox1_y1_grad, bbox1_x2_grad, bbox1_y2_grad}, 3);
+
+  nntrainer::Tensor unions_local_grad =
+    bbox1_grad.subtract(intersection_local_grad);
+
+  // 3. calculate iou local gradient [(f'(x)g(x) - f(x)g'(x))/(g(x)^2)]
+  nntrainer::Tensor lhs = intersection_local_grad.multiply(unions);
+  nntrainer::Tensor rhs = unions_local_grad.multiply(intersection);
+  nntrainer::Tensor iou_grad = lhs.subtract(rhs);
+  iou_grad.divide_i(unions);
+  iou_grad.divide_i(unions);
+
+  // 3. multiply with incoming derivative
+  iou_grad.multiply_i(confidence_gt_grad);
+
+  auto splitted_iou_grad = iou_grad.split({1, 1, 1, 1}, 3);
+  std::vector<nntrainer::Tensor> ret = {
+    splitted_iou_grad[0].add(splitted_iou_grad[2]),
+    splitted_iou_grad[1].add(splitted_iou_grad[3]), splitted_iou_grad[2],
+    splitted_iou_grad[3]};
+  return ret;
+}
+
+YoloV2LossLayer::YoloV2LossLayer() :
+  anchors_w({1, 1, NUM_ANCHOR, 1}, anchors_w_buf),
+  anchors_h({1, 1, NUM_ANCHOR, 1}, anchors_h_buf),
+  sigmoid(nntrainer::ActivationType::ACT_SIGMOID, true),
+  softmax(nntrainer::ActivationType::ACT_SOFTMAX, true),
+  yolo_v2_loss_props(props::MaxObjectNumber(), props::ClassNumber(),
+                     props::GridHeightNumber(), props::GridWidthNumber()) {
+  anchors_ratio = anchors_w.divide(anchors_h);
+  wt_idx.fill(std::numeric_limits<unsigned>::max());
+}
+
+void YoloV2LossLayer::finalize(nntrainer::InitLayerContext &context) {
+  nntrainer::TensorDim input_dim =
+    context.getInputDimensions()[SINGLE_INOUT_IDX];
+  const unsigned int batch_size = input_dim.batch();
+  const unsigned int class_number =
+    std::get<props::ClassNumber>(yolo_v2_loss_props).get();
+  const unsigned int grid_height_number =
+    std::get<props::GridHeightNumber>(yolo_v2_loss_props).get();
+  const unsigned int grid_width_number =
+    std::get<props::GridWidthNumber>(yolo_v2_loss_props).get();
+  const unsigned int max_object_number =
+    std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
+  nntrainer::TensorDim label_dim(batch_size, 1, max_object_number, 5);
+  context.setOutputDimensions({label_dim});
+
+  nntrainer::TensorDim bbox_x_pred_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_x_pred] = context.requestTensor(
+    bbox_x_pred_dim, "bbox_x_pred", nntrainer::Tensor::Initializer::NONE, true,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_y_pred_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_y_pred] = context.requestTensor(
+    bbox_y_pred_dim, "bbox_y_pred", nntrainer::Tensor::Initializer::NONE, true,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_w_pred_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_w_pred] = context.requestTensor(
+    bbox_w_pred_dim, "bbox_w_pred", nntrainer::Tensor::Initializer::NONE, true,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_h_pred_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_h_pred] = context.requestTensor(
+    bbox_h_pred_dim, "bbox_h_pred", nntrainer::Tensor::Initializer::NONE, true,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim confidence_pred_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::confidence_pred] =
+    context.requestTensor(confidence_pred_dim, "confidence_pred",
+                          nntrainer::Tensor::Initializer::NONE, true,
+                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim class_pred_dim(batch_size,
+                                      grid_height_number * grid_width_number,
+                                      NUM_ANCHOR, class_number);
+  wt_idx[YoloV2LossParams::class_pred] = context.requestTensor(
+    class_pred_dim, "class_pred", nntrainer::Tensor::Initializer::NONE, true,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_w_pred_anchor_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_w_pred_anchor] =
+    context.requestTensor(bbox_w_pred_anchor_dim, "bbox_w_pred_anchor",
+                          nntrainer::Tensor::Initializer::NONE, false,
+                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_h_pred_anchor_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_h_pred_anchor] =
+    context.requestTensor(bbox_h_pred_anchor_dim, "bbox_h_pred_anchor",
+                          nntrainer::Tensor::Initializer::NONE, false,
+                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_x_gt_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_x_gt] = context.requestTensor(
+    bbox_x_gt_dim, "bbox_x_gt", nntrainer::Tensor::Initializer::NONE, false,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_y_gt_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_y_gt] = context.requestTensor(
+    bbox_y_gt_dim, "bbox_y_gt", nntrainer::Tensor::Initializer::NONE, false,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_w_gt_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_w_gt] = context.requestTensor(
+    bbox_w_gt_dim, "bbox_w_gt", nntrainer::Tensor::Initializer::NONE, false,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_h_gt_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_h_gt] = context.requestTensor(
+    bbox_h_gt_dim, "bbox_h_gt", nntrainer::Tensor::Initializer::NONE, false,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim confidence_gt_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::confidence_gt] = context.requestTensor(
+    confidence_gt_dim, "confidence_gt", nntrainer::Tensor::Initializer::NONE,
+    false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim class_gt_dim(batch_size,
+                                    grid_height_number * grid_width_number,
+                                    NUM_ANCHOR, class_number);
+  wt_idx[YoloV2LossParams::class_gt] = context.requestTensor(
+    class_gt_dim, "class_gt", nntrainer::Tensor::Initializer::NONE, false,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox_class_mask_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox_class_mask] =
+    context.requestTensor(bbox_class_mask_dim, "bbox_class_mask",
+                          nntrainer::Tensor::Initializer::NONE, false,
+                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim iou_mask_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::iou_mask] = context.requestTensor(
+    iou_mask_dim, "iou_mask", nntrainer::Tensor::Initializer::NONE, false,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox1_width_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox1_width] = context.requestTensor(
+    bbox1_width_dim, "bbox1_width", nntrainer::Tensor::Initializer::NONE, false,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim bbox1_height_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::bbox1_height] = context.requestTensor(
+    bbox1_height_dim, "bbox1_height", nntrainer::Tensor::Initializer::NONE,
+    false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim is_xy_min_max_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 4);
+  wt_idx[YoloV2LossParams::is_xy_min_max] = context.requestTensor(
+    is_xy_min_max_dim, "is_xy_min_max", nntrainer::Tensor::Initializer::NONE,
+    false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim intersection_width_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::intersection_width] =
+    context.requestTensor(intersection_width_dim, "intersection_width",
+                          nntrainer::Tensor::Initializer::NONE, false,
+                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim intersection_height_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::intersection_height] =
+    context.requestTensor(intersection_height_dim, "intersection_height",
+                          nntrainer::Tensor::Initializer::NONE, false,
+                          nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+  nntrainer::TensorDim unions_dim(
+    batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+  wt_idx[YoloV2LossParams::unions] = context.requestTensor(
+    unions_dim, "unions", nntrainer::Tensor::Initializer::NONE, false,
+    nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+}
+
+void YoloV2LossLayer::forwarding(nntrainer::RunLayerContext &context,
+                                 bool training) {
+  const unsigned int max_object_number =
+    std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
+
+  nntrainer::Tensor &input = context.getInput(SINGLE_INOUT_IDX);
+
+  std::vector<nntrainer::Tensor> splited_input =
+    input.split({1, 1, 1, 1, 1, max_object_number}, 3);
+  nntrainer::Tensor bbox_x_pred_ = splited_input[0];
+  nntrainer::Tensor bbox_y_pred_ = splited_input[1];
+  nntrainer::Tensor bbox_w_pred_ = splited_input[2];
+  nntrainer::Tensor bbox_h_pred_ = splited_input[3];
+  nntrainer::Tensor confidence_pred_ = splited_input[4];
+  nntrainer::Tensor class_pred_ = splited_input[5];
+
+  nntrainer::Tensor &bbox_x_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
+  nntrainer::Tensor &bbox_y_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
+  nntrainer::Tensor &bbox_w_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred]);
+  nntrainer::Tensor &bbox_h_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred]);
+
+  nntrainer::Tensor &confidence_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::confidence_pred]);
+  nntrainer::Tensor &class_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::class_pred]);
+
+  nntrainer::Tensor &bbox_w_pred_anchor =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
+  nntrainer::Tensor &bbox_h_pred_anchor =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
+
+  bbox_x_pred.copyData(bbox_x_pred_);
+  bbox_y_pred.copyData(bbox_y_pred_);
+  bbox_w_pred.copyData(bbox_w_pred_);
+  bbox_h_pred.copyData(bbox_h_pred_);
+
+  confidence_pred.copyData(confidence_pred_);
+  class_pred.copyData(class_pred_);
+
+  nntrainer::Tensor &bbox_x_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
+  nntrainer::Tensor &bbox_y_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
+  nntrainer::Tensor &bbox_w_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
+  nntrainer::Tensor &bbox_h_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
+
+  nntrainer::Tensor &confidence_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
+  nntrainer::Tensor &class_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
+
+  nntrainer::Tensor &bbox_class_mask =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
+  nntrainer::Tensor &iou_mask =
+    context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
+
+  bbox_x_gt.setValue(0);
+  bbox_y_gt.setValue(0);
+  bbox_w_gt.setValue(0);
+  bbox_h_gt.setValue(0);
+
+  confidence_gt.setValue(0);
+  class_gt.setValue(0);
+
+  // init mask
+  bbox_class_mask.setValue(0);
+  iou_mask.setValue(0.5);
+
+  // activate pred
+  sigmoid.run_fn(bbox_x_pred, bbox_x_pred);
+  sigmoid.run_fn(bbox_y_pred, bbox_y_pred);
+
+  auto type_bbox_w_pred = bbox_w_pred.getDataType();
+  if (type_bbox_w_pred == ml::train::TensorDim::DataType::FP32) {
+    bbox_w_pred.apply_i<float>(nntrainer::exp_util<float>);
+  } else if (type_bbox_w_pred == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    bbox_w_pred.apply_i<_FP16>(nntrainer::exp_util<_FP16>);
+#else
+    throw std::runtime_error("Not supported data type");
+#endif
+  }
+
+  auto type_bbox_h_pred = bbox_h_pred.getDataType();
+  if (type_bbox_h_pred == ml::train::TensorDim::DataType::FP32) {
+    bbox_h_pred.apply_i<float>(nntrainer::exp_util<float>);
+  } else if (type_bbox_h_pred == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    bbox_h_pred.apply_i<_FP16>(nntrainer::exp_util<_FP16>);
+#else
+    throw std::runtime_error("Not supported data type");
+#endif
+  }
+
+  sigmoid.run_fn(confidence_pred, confidence_pred);
+  softmax.run_fn(class_pred, class_pred);
+
+  bbox_w_pred_anchor.copyData(bbox_w_pred);
+  bbox_h_pred_anchor.copyData(bbox_h_pred);
+
+  // apply anchors to bounding box
+  bbox_w_pred_anchor.multiply_i(anchors_w);
+  auto type_bbox_w_pred_anchor = bbox_w_pred_anchor.getDataType();
+  if (type_bbox_w_pred_anchor == ml::train::TensorDim::DataType::FP32) {
+    bbox_w_pred_anchor.apply_i<float>(nntrainer::sqrtFloat<float>);
+  } else if (type_bbox_w_pred_anchor == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    bbox_w_pred_anchor.apply_i<_FP16>(nntrainer::sqrtFloat<_FP16>);
+#else
+    throw std::runtime_error("Not supported data type");
+#endif
+  }
+
+  bbox_h_pred_anchor.multiply_i(anchors_h);
+  auto type_bbox_h_pred_anchor = bbox_h_pred_anchor.getDataType();
+  if (type_bbox_h_pred_anchor == ml::train::TensorDim::DataType::FP32) {
+    bbox_h_pred_anchor.apply_i<float>(nntrainer::sqrtFloat<float>);
+  } else if (type_bbox_h_pred_anchor == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    bbox_h_pred_anchor.apply_i<_FP16>(nntrainer::sqrtFloat<_FP16>);
+#else
+    throw std::runtime_error("Not supported data type");
+#endif
+  }
+
+  generate_ground_truth(context);
+
+  nntrainer::Tensor bbox_pred = nntrainer::Tensor::cat(
+    {bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor}, 3);
+  nntrainer::Tensor masked_bbox_pred = bbox_pred.multiply(bbox_class_mask);
+  nntrainer::Tensor masked_confidence_pred = confidence_pred.multiply(iou_mask);
+  nntrainer::Tensor masked_class_pred = class_pred.multiply(bbox_class_mask);
+
+  nntrainer::Tensor bbox_gt =
+    nntrainer::Tensor::cat({bbox_x_gt, bbox_y_gt, bbox_w_gt, bbox_h_gt}, 3);
+  nntrainer::Tensor masked_bbox_gt = bbox_gt.multiply(bbox_class_mask);
+  nntrainer::Tensor masked_confidence_gt = confidence_gt.multiply(iou_mask);
+  nntrainer::Tensor masked_class_gt = class_gt.multiply(bbox_class_mask);
+
+  float bbox_loss = mse(masked_bbox_pred, masked_bbox_gt);
+  float confidence_loss = mse(masked_confidence_pred, masked_confidence_gt);
+  float class_loss = mse(masked_class_pred, masked_class_gt);
+
+  float loss = 5 * bbox_loss + confidence_loss + class_loss;
+  ml_logd("Current iteration loss: %f", loss);
+}
+
+void YoloV2LossLayer::calcDerivative(nntrainer::RunLayerContext &context) {
+  nntrainer::Tensor &bbox_x_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
+  nntrainer::Tensor &bbox_x_pred_grad =
+    context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_x_pred]);
+  nntrainer::Tensor &bbox_y_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
+  nntrainer::Tensor &bbox_y_pred_grad =
+    context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_y_pred]);
+  nntrainer::Tensor &bbox_w_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred]);
+  nntrainer::Tensor &bbox_w_pred_grad =
+    context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_w_pred]);
+  nntrainer::Tensor &bbox_h_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred]);
+  nntrainer::Tensor &bbox_h_pred_grad =
+    context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_h_pred]);
+
+  nntrainer::Tensor &confidence_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::confidence_pred]);
+  nntrainer::Tensor &confidence_pred_grad =
+    context.getTensorGrad(wt_idx[YoloV2LossParams::confidence_pred]);
+  nntrainer::Tensor &class_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::class_pred]);
+  nntrainer::Tensor &class_pred_grad =
+    context.getTensorGrad(wt_idx[YoloV2LossParams::class_pred]);
+
+  nntrainer::Tensor &bbox_w_pred_anchor =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
+  nntrainer::Tensor &bbox_h_pred_anchor =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
+
+  nntrainer::Tensor &bbox_x_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
+  nntrainer::Tensor &bbox_y_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
+  nntrainer::Tensor &bbox_w_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
+  nntrainer::Tensor &bbox_h_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
+
+  nntrainer::Tensor &confidence_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
+  nntrainer::Tensor &class_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
+
+  nntrainer::Tensor &bbox_class_mask =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
+  nntrainer::Tensor &iou_mask =
+    context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
+
+  nntrainer::Tensor &bbox1_width =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox1_width]);
+  nntrainer::Tensor &bbox1_height =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox1_height]);
+  nntrainer::Tensor &is_xy_min_max =
+    context.getTensor(wt_idx[YoloV2LossParams::is_xy_min_max]);
+  nntrainer::Tensor &intersection_width =
+    context.getTensor(wt_idx[YoloV2LossParams::intersection_width]);
+  nntrainer::Tensor &intersection_height =
+    context.getTensor(wt_idx[YoloV2LossParams::intersection_height]);
+  nntrainer::Tensor &unions =
+    context.getTensor(wt_idx[YoloV2LossParams::unions]);
+
+  nntrainer::Tensor bbox_pred = nntrainer::Tensor::cat(
+    {bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor}, 3);
+  nntrainer::Tensor masked_bbox_pred = bbox_pred.multiply(bbox_class_mask);
+  nntrainer::Tensor masked_confidence_pred = confidence_pred.multiply(iou_mask);
+  nntrainer::Tensor masked_class_pred = class_pred.multiply(bbox_class_mask);
+
+  nntrainer::Tensor bbox_gt =
+    nntrainer::Tensor::cat({bbox_x_gt, bbox_y_gt, bbox_w_gt, bbox_h_gt}, 3);
+  nntrainer::Tensor masked_bbox_gt = bbox_gt.multiply(bbox_class_mask);
+  nntrainer::Tensor masked_confidence_gt = confidence_gt.multiply(iou_mask);
+  nntrainer::Tensor masked_class_gt = class_gt.multiply(bbox_class_mask);
+
+  nntrainer::Tensor masked_bbox_pred_grad;
+  nntrainer::Tensor masked_confidence_pred_grad;
+  nntrainer::Tensor masked_confidence_gt_grad;
+  nntrainer::Tensor masked_class_pred_grad;
+
+  nntrainer::Tensor confidence_gt_grad;
+
+  msePrime(masked_bbox_pred, masked_bbox_gt, masked_bbox_pred_grad);
+  msePrime(masked_confidence_pred, masked_confidence_gt,
+           masked_confidence_pred_grad);
+  msePrime(masked_confidence_gt, masked_confidence_pred,
+           masked_confidence_gt_grad);
+  msePrime(masked_class_pred, masked_class_gt, masked_class_pred_grad);
+
+  masked_bbox_pred_grad.multiply_i(5);
+
+  nntrainer::Tensor bbox_pred_grad;
+
+  masked_bbox_pred_grad.multiply(bbox_class_mask, bbox_pred_grad);
+  masked_confidence_pred_grad.multiply(iou_mask, confidence_pred_grad);
+  masked_confidence_gt_grad.multiply(iou_mask, confidence_gt_grad);
+  masked_class_pred_grad.multiply(bbox_class_mask, class_pred_grad);
+
+  std::vector<nntrainer::Tensor> splitted_bbox_pred_grad =
+    bbox_pred_grad.split({1, 1, 1, 1}, 3);
+  bbox_x_pred_grad.copyData(splitted_bbox_pred_grad[0]);
+  bbox_y_pred_grad.copyData(splitted_bbox_pred_grad[1]);
+  bbox_w_pred_grad.copyData(splitted_bbox_pred_grad[2]);
+  bbox_h_pred_grad.copyData(splitted_bbox_pred_grad[3]);
+
+  // std::vector<nntrainer::Tensor> bbox_pred_iou_grad =
+  //   calc_iou_grad(confidence_gt_grad, bbox1_width, bbox1_height,
+  //   is_xy_min_max,
+  //                 intersection_width, intersection_height, unions);
+  // bbox_x_pred_grad.add_i(bbox_pred_iou_grad[0]);
+  // bbox_y_pred_grad.add_i(bbox_pred_iou_grad[1]);
+  // bbox_w_pred_grad.add_i(bbox_pred_iou_grad[2]);
+  // bbox_h_pred_grad.add_i(bbox_pred_iou_grad[3]);
+
+  /**
+   * @brief calculate gradient for applying anchors to bounding box
+   * @details Let say bbox_pred as x, anchor as c indicated that anchor is
+   * constant for bbox_pred and bbox_pred_anchor as y. Then we can denote y =
+   * sqrt(cx). Partial derivative of y with respect to x will be
+   * sqrt(c)/(2*sqrt(x)) which is equivalent to sqrt(cx)/(2x) and we can replace
+   * sqrt(cx) with y.
+   * @note divide by bbox_pred(x) will not be executed because bbox_pred_grad
+   * will be multiply by bbox_pred(x) soon after.
+   */
+  bbox_w_pred_grad.multiply_i(bbox_w_pred_anchor);
+  bbox_h_pred_grad.multiply_i(bbox_h_pred_anchor);
+  /** intended comment */
+  // bbox_w_pred_grad.divide_i(bbox_w_pred);
+  // bbox_h_pred_grad.divide_i(bbox_h_pred);
+  bbox_w_pred_grad.divide_i(2);
+  bbox_h_pred_grad.divide_i(2);
+
+  sigmoid.run_prime_fn(bbox_x_pred, bbox_x_pred, bbox_x_pred_grad,
+                       bbox_x_pred_grad);
+  sigmoid.run_prime_fn(bbox_y_pred, bbox_y_pred, bbox_y_pred_grad,
+                       bbox_y_pred_grad);
+  /** intended comment */
+  // bbox_w_pred_grad.multiply_i(bbox_w_pred);
+  // bbox_h_pred_grad.multiply_i(bbox_h_pred);
+  sigmoid.run_prime_fn(confidence_pred, confidence_pred, confidence_pred_grad,
+                       confidence_pred_grad);
+  softmax.run_prime_fn(class_pred, class_pred, class_pred_grad,
+                       class_pred_grad);
+
+  nntrainer::Tensor outgoing_derivative_ = nntrainer::Tensor::cat(
+    {bbox_x_pred_grad, bbox_y_pred_grad, bbox_w_pred_grad, bbox_h_pred_grad,
+     confidence_pred_grad, class_pred_grad},
+    3);
+  nntrainer::Tensor &outgoing_derivative =
+    context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+  outgoing_derivative.copyData(outgoing_derivative_);
+}
+
+void YoloV2LossLayer::exportTo(nntrainer::Exporter &exporter,
+                               const ml::train::ExportMethods &method) const {
+  exporter.saveResult(yolo_v2_loss_props, method, this);
+}
+
+void YoloV2LossLayer::setProperty(const std::vector<std::string> &values) {
+  auto remain_props = loadProperties(values, yolo_v2_loss_props);
+  NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
+    << "[YoloV2LossLayer] Unknown Layer Properties count " +
+         std::to_string(values.size());
+}
+
+void YoloV2LossLayer::setBatch(nntrainer::RunLayerContext &context,
+                               unsigned int batch) {
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_x_pred], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_y_pred], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_pred], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_pred], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::confidence_pred], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::class_pred], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor], batch);
+
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_x_gt], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_y_gt], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_gt], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_gt], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::confidence_gt], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::class_gt], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox_class_mask], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::iou_mask], batch);
+
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox1_width], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::bbox1_height], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::is_xy_min_max], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::intersection_width], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::intersection_height], batch);
+  context.updateTensor(wt_idx[YoloV2LossParams::unions], batch);
+}
+
+unsigned int YoloV2LossLayer::find_responsible_anchors(float bbox_ratio) {
+  nntrainer::Tensor similarity = anchors_ratio.subtract(bbox_ratio);
+  auto data_type = similarity.getDataType();
+  if (data_type == ml::train::TensorDim::DataType::FP32) {
+    similarity.apply_i<float>(nntrainer::absFloat<float>);
+  } else if (data_type == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    similarity.apply_i<_FP16>(nntrainer::absFloat<_FP16>);
+#else
+    throw std::runtime_error("Not supported data type");
+#endif
+  }
+  auto data = similarity.getData();
+
+  auto min_iter = std::min_element(data, data + NUM_ANCHOR);
+  return std::distance(data, min_iter);
+}
+
+void YoloV2LossLayer::generate_ground_truth(
+  nntrainer::RunLayerContext &context) {
+  const unsigned int max_object_number =
+    std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
+  const unsigned int grid_height_number =
+    std::get<props::GridHeightNumber>(yolo_v2_loss_props).get();
+  const unsigned int grid_width_number =
+    std::get<props::GridWidthNumber>(yolo_v2_loss_props).get();
+
+  nntrainer::Tensor &label = context.getLabel(SINGLE_INOUT_IDX);
+
+  nntrainer::Tensor &bbox_x_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
+  nntrainer::Tensor &bbox_y_pred =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
+  nntrainer::Tensor &bbox_w_pred_anchor =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
+  nntrainer::Tensor &bbox_h_pred_anchor =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
+
+  nntrainer::Tensor &bbox_x_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
+  nntrainer::Tensor &bbox_y_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
+  nntrainer::Tensor &bbox_w_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
+  nntrainer::Tensor &bbox_h_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
+
+  nntrainer::Tensor &confidence_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
+  nntrainer::Tensor &class_gt =
+    context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
+
+  nntrainer::Tensor &bbox_class_mask =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
+  nntrainer::Tensor &iou_mask =
+    context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
+
+  nntrainer::Tensor &bbox1_width =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox1_width]);
+  nntrainer::Tensor &bbox1_height =
+    context.getTensor(wt_idx[YoloV2LossParams::bbox1_height]);
+  nntrainer::Tensor &is_xy_min_max =
+    context.getTensor(wt_idx[YoloV2LossParams::is_xy_min_max]);
+  nntrainer::Tensor &intersection_width =
+    context.getTensor(wt_idx[YoloV2LossParams::intersection_width]);
+  nntrainer::Tensor &intersection_height =
+    context.getTensor(wt_idx[YoloV2LossParams::intersection_height]);
+  nntrainer::Tensor &unions =
+    context.getTensor(wt_idx[YoloV2LossParams::unions]);
+
+  const unsigned int batch_size = bbox_x_pred.getDim().batch();
+
+  std::vector<nntrainer::Tensor> splited_label =
+    label.split({1, 1, 1, 1, 1}, 3);
+  nntrainer::Tensor bbox_x_label = splited_label[0];
+  nntrainer::Tensor bbox_y_label = splited_label[1];
+  nntrainer::Tensor bbox_w_label = splited_label[2];
+  nntrainer::Tensor bbox_h_label = splited_label[3];
+  nntrainer::Tensor class_label = splited_label[4];
+
+  bbox_x_label.multiply_i(grid_width_number);
+  bbox_y_label.multiply_i(grid_height_number);
+
+  for (unsigned int batch = 0; batch < batch_size; ++batch) {
+    for (unsigned int object = 0; object < max_object_number; ++object) {
+      if (!bbox_w_label.getValue(batch, 0, object, 0) &&
+          !bbox_h_label.getValue(batch, 0, object, 0)) {
+        break;
+      }
+      unsigned int grid_x_index = bbox_x_label.getValue(batch, 0, object, 0);
+      unsigned int grid_y_index = bbox_y_label.getValue(batch, 0, object, 0);
+      unsigned int grid_index = grid_y_index * grid_width_number + grid_x_index;
+      unsigned int responsible_anchor =
+        find_responsible_anchors(bbox_w_label.getValue(batch, 0, object, 0) /
+                                 bbox_h_label.getValue(batch, 0, object, 0));
+
+      bbox_x_gt.setValue(batch, grid_index, responsible_anchor, 0,
+                         bbox_x_label.getValue(batch, 0, object, 0) -
+                           grid_x_index);
+      bbox_y_gt.setValue(batch, grid_index, responsible_anchor, 0,
+                         bbox_y_label.getValue(batch, 0, object, 0) -
+                           grid_y_index);
+      bbox_w_gt.setValue(
+        batch, grid_index, responsible_anchor, 0,
+        nntrainer::sqrtFloat(bbox_w_label.getValue(batch, 0, object, 0)));
+      bbox_h_gt.setValue(
+        batch, grid_index, responsible_anchor, 0,
+        nntrainer::sqrtFloat(bbox_h_label.getValue(batch, 0, object, 0)));
+
+      class_gt.setValue(batch, grid_index, responsible_anchor,
+                        class_label.getValue(batch, 0, object, 0), 1);
+      bbox_class_mask.setValue(batch, grid_index, responsible_anchor, 0, 1);
+      iou_mask.setValue(batch, grid_index, responsible_anchor, 0, 1);
+    }
+  }
+
+  nntrainer::Tensor iou = calc_iou(
+    bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor, bbox_x_gt,
+    bbox_y_gt, bbox_w_gt, bbox_h_gt, bbox1_width, bbox1_height, is_xy_min_max,
+    intersection_width, intersection_height, unions);
+  confidence_gt.copyData(iou);
+}
+
+#ifdef PLUGGABLE
+
+nntrainer::Layer *create_yolo_v2_loss_layer() {
+  auto layer = new YoloV2LossLayer();
+  return layer;
+}
+
+void destory_yolo_v2_loss_layer(nntrainer::Layer *layer) { delete layer; }
+
+/**
+ * @note ml_train_layer_pluggable defines the entry point for nntrainer to
+ * register a plugin layer
+ */
+extern "C" {
+nntrainer::LayerPluggable ml_train_layer_pluggable{create_yolo_v2_loss_layer,
+                                                   destory_yolo_v2_loss_layer};
+}
+
+#endif
+} // namespace custom
diff --git a/Applications/YOLOv2/jni/yolo_v2_loss.h b/Applications/YOLOv2/jni/yolo_v2_loss.h
new file mode 100644 (file)
index 0000000..fd1f2fa
--- /dev/null
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file   yolo_v2_loss.h
+ * @date   07 March 2023
+ * @brief  This file contains the yolo v2 loss layer
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+#ifndef __YOLO_V2_LOSS_LAYER_H__
+#define __YOLO_V2_LOSS_LAYER_H__
+
+#include <string>
+
+#include <acti_func.h>
+#include <base_properties.h>
+#include <layer_context.h>
+#include <layer_devel.h>
+#include <node_exporter.h>
+
+namespace custom {
+
+namespace props {
+
+/**
+ * @brief maximum object number in 1 image for given dataset
+ *
+ */
+class MaxObjectNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+  MaxObjectNumber(const unsigned &value = 1);
+  static constexpr const char *key = "max_object_number";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief class number for given dataset
+ *
+ */
+class ClassNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+  ClassNumber(const unsigned &value = 1);
+  static constexpr const char *key = "class_number";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief grid height number
+ *
+ */
+class GridHeightNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+  GridHeightNumber(const unsigned &value = 1);
+  static constexpr const char *key = "grid_height_number";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief grid width number
+ *
+ */
+class GridWidthNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+  GridWidthNumber(const unsigned &value = 1);
+  static constexpr const char *key = "grid_width_number";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+} // namespace props
+
+/**
+ * @brief Yolo V2 loss layer
+ *
+ */
+class YoloV2LossLayer final : public nntrainer::Layer {
+public:
+  /**
+   * @brief Construct a new YoloV2Loss Layer object
+   *
+   */
+  YoloV2LossLayer();
+
+  /**
+   * @brief Destroy the YoloV2Loss Layer object
+   *
+   */
+  ~YoloV2LossLayer() {}
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(nntrainer::InitLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+   */
+  void forwarding(nntrainer::RunLayerContext &context, bool training) override;
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(nntrainer::RunLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
+   */
+  void exportTo(nntrainer::Exporter &exporter,
+                const ml::train::ExportMethods &method) const override;
+
+  /**
+   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  /**
+   * @copydoc Layer::setBatch(RunLayerContext &context, unsigned int batch)
+   */
+  void setBatch(nntrainer::RunLayerContext &context,
+                unsigned int batch) override;
+
+  /**
+   * @copydoc bool supportBackwarding() const
+   */
+  bool supportBackwarding() const override { return true; };
+
+  /**
+   * @copydoc Layer::requireLabel()
+   */
+  bool requireLabel() const { return true; }
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return YoloV2LossLayer::type; };
+
+  inline static const std::string type = "yolo_v2_loss";
+
+private:
+  static constexpr unsigned int NUM_ANCHOR = 5;
+  const float anchors_w_buf[NUM_ANCHOR] = {1.3221, 3.19275, 5.05587, 9.47112,
+                                           11.2364};
+  const float anchors_h_buf[NUM_ANCHOR] = {1.73145, 4.00944, 8.09892, 4.84053,
+                                           10.0071};
+  const nntrainer::Tensor anchors_w;
+  const nntrainer::Tensor anchors_h;
+  nntrainer::Tensor anchors_ratio;
+
+  nntrainer::ActiFunc sigmoid; /** sigmoid activation operation */
+  nntrainer::ActiFunc softmax; /** softmax activation operation */
+
+  std::tuple<props::MaxObjectNumber, props::ClassNumber,
+             props::GridHeightNumber, props::GridWidthNumber>
+    yolo_v2_loss_props;
+  std::array<unsigned int, 22> wt_idx; /**< indices of the weights */
+
+  /**
+   * @brief find responsible anchors per object
+   */
+  unsigned int find_responsible_anchors(float bbox_ratio);
+
+  /**
+   * @brief generate ground truth, mask from labels
+   */
+  void generate_ground_truth(nntrainer::RunLayerContext &context);
+};
+
+} // namespace custom
+
+#endif /* __YOLO_V2_LOSS_LAYER_H__ */
index 2e3f59fdf2c4adfc75015698e016baa1ea12245f..7c8ef63cd45e6feab700750492a36be02c519b23 100644 (file)
@@ -9,7 +9,7 @@ if enable_ccapi
 endif
 subdir('VGG/jni')
 subdir('Resnet/jni')
-subdir('YOLO/jni')
+subdir('YOLOv2/jni')
 subdir('YOLOv3/jni')
 subdir('LLaMA/jni')
 subdir('Multi_input/jni')