+++ /dev/null
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-#
-# @file dataset.py
-# @date 8 March 2023
-# @brief Define dataset class for yolo
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
-
-import glob
-import numpy as np
-import torch
-from torch.utils.data import Dataset
-from torch.utils.data.dataloader import default_collate
-from PIL import Image
-
-##
-# @brief dataset class for yolo
-# @note Need annotation text files corresponding to the name of the images.
-class YOLODataset(Dataset):
- def __init__(self, img_dir, ann_dir):
- super().__init__()
- img_list = glob.glob(img_dir)
- ann_list = glob.glob(ann_dir)
- img_list.sort(), ann_list.sort()
-
- self.length = len(img_list)
- self.input_images = []
- self.bbox_gt = []
- self.cls_gt = []
-
- for i in range(len(img_list)):
- img = np.array(Image.open(img_list[i]).resize((416, 416))) / 255
- label_bbox = []
- label_cls = []
- with open(ann_list[i], 'rt') as f:
- for line in f.readlines():
- line = [float(i) for i in line.split()]
- label_bbox.append(np.array(line[1:], dtype=np.float32) / 416)
- label_cls.append(int(line[0]))
-
- self.input_images.append(img)
- self.bbox_gt.append(label_bbox)
- self.cls_gt.append(label_cls)
-
- self.input_images = np.array(self.input_images)
- self.input_images = torch.FloatTensor(self.input_images).permute((0, 3, 1, 2))
-
- def __len__(self):
- return self.length
-
- def __getitem__(self, idx):
- return self.input_images[idx], self.bbox_gt[idx], self.cls_gt[idx]
-
-##
-# @brief collate db function for yolo
-def collate_db(batch):
- """
- @param batch list of batch, (img, bbox, cls)
- @return collated list of batch, (img, bbox, cls)
- """
- items = list(zip(*batch))
- items[0] = default_collate(items[0])
- items[1] = list(items[1])
- items[2] = list(items[2])
- return items
+++ /dev/null
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-#
-# @file main.py
-# @date 8 March 2023
-# @brief Implement training for yolo
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
-
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.optim as optim
-import torch.nn.functional as F
-from torch.utils.data import DataLoader
-
-from yolo import YoloV2
-from yolo_loss import YoloV2_LOSS
-from dataset import YOLODataset, collate_db
-
-import sys
-import os
-
-# get pyutils path using relative path
-def get_util_path():
- current_path = os.path.abspath(os.path.dirname(__file__))
- parent_path = os.path.abspath(os.path.dirname(current_path))
- target_path = os.path.abspath(os.path.dirname(parent_path))
- return os.path.dirname(target_path) + '/tools/pyutils/'
-
-# add pyutils path to sys.path
-sys.path.append(get_util_path())
-from torchconverter import save_bin
-
-# set config
-out_size = 13
-num_classes = 4
-num_anchors = 5
-
-epochs = 3
-batch_size = 4
-
-train_img_dir = '/home/user/TRAIN_DIR/images/*'
-train_ann_dir = '/home/user/TRAIN_DIR/annotations/*'
-valid_img_dir = '/home/user/VALID_DIR/images/*'
-valid_ann_dir = '/home/user/VALID_DIR/annotations/*'
-
-# load data
-train_dataset = YOLODataset(train_img_dir, train_ann_dir)
-train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate_db, shuffle=True, drop_last=True)
-valid_dataset = YOLODataset(valid_img_dir, valid_ann_dir)
-valid_loader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate_db, shuffle=False, drop_last=True)
-
-# set model, loss and optimizer
-model = YoloV2(num_classes=num_classes)
-criterion = YoloV2_LOSS(num_classes=num_classes)
-optimizer = optim.Adam(model.parameters(), lr=1e-3)
-# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)
-
-# save init model
-save_bin(model, 'init_model')
-torch.save(model.state_dict(), './init_model.pt')
-
-# train model
-best_loss = 1e+10
-for epoch in range(epochs):
- epoch_train_loss = 0
- epoch_valid_loss = 0
- for idx, (img, bbox, cls) in enumerate(train_loader):
- model.train()
- optimizer.zero_grad()
- # model prediction
- hypothesis = model(img).permute((0, 2, 3, 1))
- hypothesis = hypothesis.reshape((batch_size, out_size**2, num_anchors, 5+num_classes))
- # split each prediction(bbox, iou, class prob)
- bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
- bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
- bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
- iou_pred = torch.sigmoid(hypothesis[..., 4:5])
- score_pred = hypothesis[..., 5:].contiguous()
- prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(score_pred.shape)
- # calc loss
- loss = criterion(torch.FloatTensor(bbox_pred),
- torch.FloatTensor(iou_pred),
- torch.FloatTensor(prob_pred),
- bbox,
- cls)
- # back prop
- loss.backward()
- optimizer.step()
- # scheduler.step()
- epoch_train_loss += loss.item()
-
- for idx, (img, bbox, cls) in enumerate(valid_loader):
- model.eval()
- with torch.no_grad():
- # model prediction
- hypothesis = model(img).permute((0, 2, 3, 1))
- hypothesis = hypothesis.reshape((hypothesis.shape[0], out_size**2, num_anchors, 5+num_classes))
- # split each prediction(bbox, iou, class prob)
- bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
- bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
- bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
- iou_pred = torch.sigmoid(hypothesis[..., 4:5])
- score_pred = hypothesis[..., 5:].contiguous()
- prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(score_pred.shape)
- # calc loss
- loss = criterion(torch.FloatTensor(bbox_pred),
- torch.FloatTensor(iou_pred),
- torch.FloatTensor(prob_pred),
- bbox,
- cls)
- epoch_valid_loss += loss.item()
-
- if epoch_valid_loss < best_loss:
- best_loss = epoch_valid_loss
- torch.save(model.state_dict(), './best_model.pt')
- save_bin(model, 'best_model')
-
- print("{}epoch, train loss: {:.4f}, valid loss: {:.4f}".format(
- epoch, epoch_train_loss / len(train_loader), epoch_valid_loss / len(valid_loader)))
-
-##
-# @brief bbox post process function for inference
-def post_process_for_bbox(bbox_pred):
- """
- @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
- @return bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
- """
- anchors = torch.FloatTensor(
- [(1.3221, 1.73145),
- (3.19275, 4.00944),
- (5.05587, 8.09892),
- (9.47112, 4.84053),
- (11.2364, 10.0071)]
- )
-
- outsize = (13, 13)
- width, height = outsize
-
- # restore cell pos to x, y
- for w in range(width):
- for h in range(height):
- bbox_pred[:, height*h + w, :, 0] += w
- bbox_pred[:, height*h + w, :, 1] += h
- bbox_pred[:, :, :, :2] /= 13
-
- # apply anchors to w, h
- anchor_w = anchors[:, 0].contiguous().view(-1, 1)
- anchor_h = anchors[:, 1].contiguous().view(-1, 1)
- bbox_pred[:, :, :, 2:3] *= anchor_w
- bbox_pred[:, :, :, 3:4] *= anchor_h
-
- return bbox_pred
-
-# inference example using trained model
-hypothesis = model(img).permute((0, 2, 3, 1))
-hypothesis = hypothesis[0].reshape((1, out_size**2, num_anchors, 5+num_classes))
-
-# transform output
-bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
-bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
-bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
-bbox_pred = post_process_for_bbox(bbox_pred)
-iou_pred = torch.sigmoid(hypothesis[..., 4:5])
-score_pred = hypothesis[..., 5:].contiguous()
-prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(score_pred.shape)
-
-# result of inference (data range 0~1)
-iou_mask = (iou_pred > 0.5)
-print(bbox_pred * iou_mask, iou_pred * iou_mask, prob_pred * iou_mask)
+++ /dev/null
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-#
-# @file yolo.py
-# @date 8 March 2023
-# @brief Define simple yolo model, but not original darknet.
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
-
-import torch
-import torch.nn as nn
-
-##
-# @brief define yolo model (except for re-organization module)
-class YoloV2(nn.Module):
- def __init__(self, num_classes, num_anchors=5):
-
- super(YoloV2, self).__init__()
- self.num_classes = num_classes
- self.num_anchors = num_anchors
- self.conv1 = nn.Sequential(nn.Conv2d(3, 32, 3, 1, 1), nn.BatchNorm2d(32, eps=1e-3),
- nn.LeakyReLU(), nn.MaxPool2d(2, 2))
- self.conv2 = nn.Sequential(nn.Conv2d(32, 64, 3, 1, 1), nn.BatchNorm2d(64, eps=1e-3),
- nn.LeakyReLU(), nn.MaxPool2d(2, 2))
- self.conv3 = nn.Sequential(nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128, eps=1e-3),
- nn.LeakyReLU())
- self.conv4 = nn.Sequential(nn.Conv2d(128, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3),
- nn.LeakyReLU())
- self.conv5 = nn.Sequential(nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128, eps=1e-3),
- nn.LeakyReLU(), nn.MaxPool2d(2, 2))
- self.conv6 = nn.Sequential(nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256, eps=1e-3),
- nn.LeakyReLU())
- self.conv7 = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0), nn.BatchNorm2d(128, eps=1e-3),
- nn.LeakyReLU())
- self.conv8 = nn.Sequential(nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256, eps=1e-3),
- nn.LeakyReLU(), nn.MaxPool2d(2, 2))
- self.conv9 = nn.Sequential(nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3),
- nn.LeakyReLU())
- self.conv10 = nn.Sequential(nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3),
- nn.LeakyReLU())
- self.conv11 = nn.Sequential(nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3),
- nn.LeakyReLU())
- self.conv12 = nn.Sequential(nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3),
- nn.LeakyReLU())
- self.conv13 = nn.Sequential(nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3),
- nn.LeakyReLU())
-
- self.conv_b = nn.Sequential(nn.Conv2d(512, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3),
- nn.LeakyReLU())
-
- self.maxpool_a = nn.MaxPool2d(2, 2)
- self.conv_a1 = nn.Sequential(nn.Conv2d(512, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
- nn.LeakyReLU())
- self.conv_a2 = nn.Sequential(nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3),
- nn.LeakyReLU())
- self.conv_a3 = nn.Sequential(nn.Conv2d(512, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
- nn.LeakyReLU())
- self.conv_a4 = nn.Sequential(nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3),
- nn.LeakyReLU())
- self.conv_a5 = nn.Sequential(nn.Conv2d(512, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
- nn.LeakyReLU())
- self.conv_a6 = nn.Sequential(nn.Conv2d(1024, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
- nn.LeakyReLU())
- self.conv_a7 = nn.Sequential(nn.Conv2d(1024, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
- nn.LeakyReLU())
-
- self.conv_out1 = nn.Sequential(nn.Conv2d(1280, 1024, 3, 1, 1), nn.BatchNorm2d(1024, eps=1e-3),
- nn.LeakyReLU())
-
- self.conv_out2 = nn.Conv2d(1024, self.num_anchors * (5 + num_classes), 1, 1, 0)
-
- def forward(self, input):
- output = self.conv1(input)
- output = self.conv2(output)
- output = self.conv3(output)
- output = self.conv4(output)
- output = self.conv5(output)
- output = self.conv6(output)
- output = self.conv7(output)
- output = self.conv8(output)
- output = self.conv9(output)
- output = self.conv10(output)
- output = self.conv11(output)
- output = self.conv12(output)
- output = self.conv13(output)
-
- output_a = self.maxpool_a(output)
- output_a = self.conv_a1(output_a)
- output_a = self.conv_a2(output_a)
- output_a = self.conv_a3(output_a)
- output_a = self.conv_a4(output_a)
- output_a = self.conv_a5(output_a)
- output_a = self.conv_a6(output_a)
- output_a = self.conv_a7(output_a)
-
- output_b = self.conv_b(output)
- b, c, h, w = output_b.size()
- output_b = output_b.view(b, int(c / 4), h, 2, w, 2).contiguous()
- output_b = output_b.permute(0, 3, 5, 1, 2, 4).contiguous()
- output_b = output_b.view(b, -1, int(h / 2), int(w / 2))
-
- output = torch.cat((output_a, output_b), 1)
- output = self.conv_out1(output)
- output = self.conv_out2(output)
- return output
+++ /dev/null
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-#
-# @file yolo_loss.py
-# @date 8 March 2023
-# @brief Define loss class for yolo
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
-
-import torch
-import torch.nn as nn
-import torch.functional as F
-import numpy as np
-
-##
-# @brief calculate iou between two boxes list
-def calculate_iou(bbox1, bbox2):
- """
- @param bbox1 shape(numb_of_bbox, 4), it contains x, y, w, h
- @param bbox2 shape(numb_of_bbox, 4), it contains x, y, w, h
- @return result shape(numb_of_bbox, 1)
- """
- # bbox coordinates
- b1x1, b1y1 = (bbox1[:, :2]).split(1, 1)
- b1x2, b1y2 = (bbox1[:, :2] + (bbox1[:, 2:4])).split(1, 1)
- b2x1, b2y1 = (bbox2[:, :2]).split(1, 1)
- b2x2, b2y2 = (bbox2[:, :2] + (bbox2[:, 2:4])).split(1, 1)
-
- # box areas
- areas1 = (b1x2 - b1x1) * (b1y2 - b1y1)
- areas2 = (b2x2 - b2x1) * (b2y2 - b2y1)
-
- # intersections
- min_x_of_max_x, max_x_of_min_x = torch.min(b1x2, b2x2), torch.max(b1x1, b2x1)
- min_y_of_max_y, max_y_of_min_y = torch.min(b1y2, b2y2), torch.max(b1y1, b2y1)
- intersection_width = (min_x_of_max_x - max_x_of_min_x).clamp(min=0)
- intersection_height = (min_y_of_max_y - max_y_of_min_y).clamp(min=0)
- intersections = intersection_width * intersection_height
-
- # unions
- unions = (areas1 + areas2) - intersections
-
- result = intersections / unions
- return result
-
-##
-# @brief find best iou and its index
-def find_best_ratio(anchors, bbox):
- """
- @param anchors shape(numb_of_anchors, 2), it contains w, h
- @param bbox shape(numb_of_bbox, 2), it contains w, h
- @return best_match index of best match, shape(numb_of_bbox, 1)
- """
- b1 = np.divide(anchors[:, 0], anchors[:, 1])
- b2 = np.divide(bbox[:, 0], bbox[:, 1])
- similarities = np.abs(b1.reshape(-1, 1) - b2)
- best_match = np.argmin(similarities, axis=0)
- return best_match
-
-##
-# @brief loss class for yolo
-class YoloV2_LOSS(nn.Module):
- """Yolo v2 loss"""
- def __init__(self, num_classes, img_shape = (416, 416), outsize = (13, 13)):
- super().__init__()
- self.num_classes = num_classes
- self.img_shape = img_shape
- self.outsize = outsize
- self.hook = dict()
-
- self.anchors = torch.FloatTensor(
- [(1.3221, 1.73145),
- (3.19275, 4.00944),
- (5.05587, 8.09892),
- (9.47112, 4.84053),
- (11.2364, 10.0071)]
- )
-
- self.mse = nn.MSELoss()
- self.bbox_loss, self.iou_loss, self.cls_loss = None, None, None
-
- ##
- # @brief function to track gradients of non-leaf varibles.
- def hook_variable(self, name, var):
- """ Do not use this function when training. It is for debugging. """
- self.hook[name] = var
- self.hook[name].requires_grad_().retain_grad()
-
- ##
- # @brief function to print gradients of non-leaf varibles.
- def print_hook_variables(self):
- """ Do not use this function when training. It is for debugging. """
- for k, var in self.hook.items():
- print("gradients of variable {}:".format(k))
- batch, channel, height, width = var.grad.shape
- for b in range(batch):
- for c in range(channel):
- for h in range(height):
- for w in range(width):
- if torch.abs(var.grad[b, c, h, w]).item() >= 1e-3:
- print("(b: {}, c: {}, h: {}, w: {}) = {}"\
- .format(b, c, h, w, var.grad[b, c, h, w]))
- print("=" * 20)
-
- def forward(self, bbox_pred, iou_pred, prob_pred, bbox_gt, cls_gt):
- """
- @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
- @param iou_pred shape(batch_size, cell_h x cell_w, 1)
- @param prob_pred shape(batch_size, cell_h x cell_w, num_anchors, num_classes)
- @param bbox_gt shape(batch_size, num_bbox, 4), data range(0~1)
- @param cls_gt shape(batch_size, num_bbox, 1)
- @return loss shape(1,)
- """
- self.hook_variable("bbox_pred", bbox_pred)
- bbox_pred = self.apply_anchors_to_bbox(bbox_pred)
-
- bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask =\
- self._build_target(bbox_pred, bbox_gt, cls_gt)
-
- self.bbox_loss = self.mse(bbox_pred * bbox_mask,
- bbox_built * bbox_mask)
- self.iou_loss = self.mse(iou_pred * iou_mask,
- iou_built * iou_mask)
- self.cls_loss = self.mse(prob_pred * cls_mask,
- cls_built * cls_mask)
-
- return self.bbox_loss * 5 + self.iou_loss + self.cls_loss
-
- def apply_anchors_to_bbox(self, bbox_pred):
- """
- @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
- @return bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
- """
- anchor_w = self.anchors[:, 0].contiguous().view(-1, 1)
- anchor_h = self.anchors[:, 1].contiguous().view(-1, 1)
- bbox_pred_tmp = bbox_pred.clone()
- bbox_pred_tmp[:, :, :, 2:3] = torch.sqrt(bbox_pred[:, :, :, 2:3] * anchor_w)
- bbox_pred_tmp[:, :, :, 3:4] = torch.sqrt(bbox_pred[:, :, :, 3:4] * anchor_h)
- return bbox_pred_tmp
-
- def _build_target(self, bbox_pred, bbox_gt, cls_gt):
- """
- @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
- @param bbox_gt shape(batch_size, num_bbox, 4)
- @param cls_gt shape(batch_size, num_bbox, 1)
- @return tuple of (bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask)
- """
- bbox_built, bbox_mask = [], []
- iou_built, iou_mask = [], []
- cls_built, cls_mask = [], []
-
- batch_size = bbox_pred.shape[0]
-
- for i in range(batch_size):
- _bbox_built, _iou_built, _cls_built,\
- _bbox_mask, _iou_mask, _cls_mask =\
- self._make_target_per_sample(
- torch.FloatTensor(bbox_pred[i]),
- torch.FloatTensor(np.array(bbox_gt[i])),
- torch.LongTensor(cls_gt[i])
- )
-
- bbox_built.append(_bbox_built)
- bbox_mask.append(_bbox_mask)
- iou_built.append(_iou_built)
- iou_mask.append(_iou_mask)
- cls_built.append(_cls_built)
- cls_mask.append(_cls_mask)
-
- bbox_built = torch.stack(bbox_built)
- bbox_mask = torch.stack(bbox_mask)
- iou_built = torch.stack(iou_built)
- iou_mask = torch.stack(iou_mask)
- cls_built = torch.stack(cls_built)
- cls_mask = torch.stack(cls_mask)
-
- return bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask
-
- def _make_target_per_sample(self, _bbox_pred, _bbox_gt, _cls_gt):
- """
- @param _bbox_pred shape(cell_h x cell_w, num_anchors, 4)
- @param _bbox_gt shape(num_bbox, 4)
- @param _cls_gt shape(num_bbox,)
- @return tuple of (_bbox_built, _iou_built, _cls_built, _bbox_mask, _iou_mask, _cls_mask)
- """
- hw, num_anchors, _ = _bbox_pred.shape
-
- # set result template
- _bbox_built = torch.zeros((hw, num_anchors, 4))
- _bbox_mask = torch.zeros((hw, num_anchors, 1))
-
- _iou_built = torch.zeros((hw, num_anchors, 1))
- _iou_mask = torch.ones((hw, num_anchors, 1)) * 0.5
-
- _cls_built = torch.zeros((hw, num_anchors, self.num_classes))
- _cls_mask = torch.zeros((hw, num_anchors, 1))
-
- # find best anchors
- _bbox_gt_wh = _bbox_gt.clone()[:, 2:]
- best_anchors = find_best_ratio(self.anchors, _bbox_gt_wh)
-
- # normalize x, y pos based on cell coornindates
- cx = _bbox_gt[:, 0] * self.outsize[0]
- cy = _bbox_gt[:, 1] * self.outsize[1]
- # calculate cell pos and normalize x, y
- cell_idx = np.floor(cy) * self.outsize[0] + np.floor(cx)
- cell_idx = np.array(cell_idx, dtype=np.int16)
- cx -= np.floor(cx)
- cy -= np.floor(cy)
-
- # set bbox of gt
- _bbox_built[cell_idx, best_anchors, 0] = cx
- _bbox_built[cell_idx, best_anchors, 1] = cy
- _bbox_built[cell_idx, best_anchors, 2] = torch.sqrt(_bbox_gt[:, 2])
- _bbox_built[cell_idx, best_anchors, 3] = torch.sqrt(_bbox_gt[:, 3])
- _bbox_mask[cell_idx, best_anchors, :] = 1
-
- # set cls of gt
- _cls_built[cell_idx, best_anchors, _cls_gt] = 1
- _cls_mask[cell_idx, best_anchors, :] = 1
-
- # set confidence score of gt
- _iou_built = calculate_iou(_bbox_pred.reshape(-1, 4), _bbox_built.view(-1, 4)).detach()
- _iou_built = _iou_built.view(hw, num_anchors, 1)
- _iou_mask[cell_idx, best_anchors, :] = 1
-
- return _bbox_built, _iou_built, _cls_built,\
- _bbox_mask, _iou_mask, _cls_mask
+++ /dev/null
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-
-# ndk path
-ifndef ANDROID_NDK
-$(error ANDROID_NDK is not defined!)
-endif
-
-ifndef NNTRAINER_ROOT
-NNTRAINER_ROOT := $(LOCAL_PATH)/../../..
-endif
-
-ML_API_COMMON_INCLUDES := ${NNTRAINER_ROOT}/ml_api_common/include
-NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
- $(NNTRAINER_ROOT)/nntrainer/dataset \
- $(NNTRAINER_ROOT)/nntrainer/models \
- $(NNTRAINER_ROOT)/nntrainer/layers \
- $(NNTRAINER_ROOT)/nntrainer/compiler \
- $(NNTRAINER_ROOT)/nntrainer/graph \
- $(NNTRAINER_ROOT)/nntrainer/optimizers \
- $(NNTRAINER_ROOT)/nntrainer/tensor \
- $(NNTRAINER_ROOT)/nntrainer/utils \
- $(NNTRAINER_ROOT)/api \
- $(NNTRAINER_ROOT)/api/ccapi/include \
- ${ML_API_COMMON_INCLUDES}
-
-LOCAL_MODULE := nntrainer
-LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/libs/$(TARGET_ARCH_ABI)/libnntrainer.so
-
-include $(PREBUILT_SHARED_LIBRARY)
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := ccapi-nntrainer
-LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/libs/$(TARGET_ARCH_ABI)/libccapi-nntrainer.so
-
-include $(PREBUILT_SHARED_LIBRARY)
-
-include $(CLEAR_VARS)
-
-LOCAL_ARM_NEON := true
-LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
-LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/
-LOCAL_CXXFLAGS += -std=c++17 -frtti
-LOCAL_CFLAGS += -pthread -fexceptions -fopenmp
-LOCAL_LDFLAGS += -fexceptions
-LOCAL_MODULE_TAGS := optional
-LOCAL_ARM_MODE := arm
-LOCAL_MODULE := nntrainer_yolo
-LOCAL_LDLIBS := -llog -landroid -fopenmp
-
-LOCAL_SRC_FILES := main.cpp det_dataloader.cpp yolo_v2_loss.cpp reorg_layer.cpp
-LOCAL_SHARED_LIBRARIES := nntrainer ccapi-nntrainer
-
-LOCAL_C_INCLUDES += $(NNTRAINER_INCLUDES)
-
-include $(BUILD_EXECUTABLE)
+++ /dev/null
-APP_ABI := arm64-v8a
-APP_STL := c++_shared
-APP_PLATFORM := android-29
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file det_dataloader.h
- * @date 22 March 2023
- * @brief dataloader for object detection dataset
- * @see https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @author Jijoong Moon <jijoong.moon@samsung.com>
- * @bug No known bugs except for NYI items
- */
-
-#include "det_dataloader.h"
-
-#include <cstring>
-#include <filesystem>
-#include <fstream>
-#include <nntrainer_error.h>
-#include <random>
-
-namespace nntrainer::util {
-
-// It supports bmp image file only now.
-DirDataLoader::DirDataLoader(const char *directory_, unsigned int max_num_label,
- unsigned int c, unsigned int w, unsigned int h,
- bool is_train_) :
- max_num_label(max_num_label),
- channel(c),
- height(h),
- width(w),
- is_train(is_train_) {
- dir_path.assign(directory_);
-
- // set data list
- std::filesystem::directory_iterator itr(dir_path + "images");
- while (itr != std::filesystem::end(itr)) {
- // get image file name
- std::string img_file = itr->path().string();
-
- // check if it is bmp image file
- if (img_file.find(".bmp") == std::string::npos) {
- itr++;
- continue;
- }
-
- // set label file name
- std::string label_file = img_file;
- label_file.replace(label_file.find(".bmp"), 4, ".txt");
- label_file.replace(label_file.find("/images"), 7, "/annotations");
-
- // check if there is paired label file
- if (!std::filesystem::exists(label_file)) {
- itr++;
- continue;
- }
-
- // set data list
- data_list.push_back(make_pair(img_file, label_file));
- itr++;
- }
-
- // set index and shuffle data
- idxes = std::vector<unsigned int>(data_list.size());
- std::iota(idxes.begin(), idxes.end(), 0);
- if (is_train)
- std::shuffle(idxes.begin(), idxes.end(), rng);
-
- data_size = data_list.size();
- count = 0;
-}
-
-void read_image(const std::string path, float *input, uint &width,
- uint &height) {
- FILE *f = fopen(path.c_str(), "rb");
-
- if (f == nullptr)
- throw std::invalid_argument("Cannot open file: " + path);
-
- unsigned char info[54];
- size_t s = fread(info, sizeof(unsigned char), 54, f);
-
- unsigned int w = *(int *)&info[18];
- unsigned int h = *(int *)&info[22];
-
- if (w != width or h != height) {
- fclose(f);
- throw std::invalid_argument("the dimension of image file does not match" +
- std::to_string(s));
- }
-
- int row_padded = (width * 3 + 3) & (~3);
- unsigned char *data = new unsigned char[row_padded];
-
- for (uint i = 0; i < height; i++) {
- s = fread(data, sizeof(unsigned char), row_padded, f);
- for (uint j = 0; j < width; j++) {
- input[height * (height - i - 1) + j] = (float)data[j * 3 + 2] / 255;
- input[(height * width) + height * (height - i - 1) + j] =
- (float)data[j * 3 + 1] / 255;
- input[(height * width) * 2 + height * (height - i - 1) + j] =
- (float)data[j * 3] / 255;
- }
- }
-
- delete[] data;
- fclose(f);
-}
-
-void DirDataLoader::next(float **input, float **label, bool *last) {
- auto fill_one_sample = [this](float *input_, float *label_, int index) {
- // set input data
- std::string img_file = data_list[index].first;
- read_image(img_file, input_, width, height);
-
- // set label data
- std::string label_file = data_list[index].second;
- std::memset(label_, 0.0, 5 * sizeof(float) * max_num_label);
-
- std::ifstream file(label_file);
- std::string cur_line;
-
- int line_idx = 0;
- while (getline(file, cur_line)) {
- std::stringstream ss(cur_line);
- std::string cur_value;
-
- int row_idx = 0;
- while (getline(ss, cur_value, ' ')) {
- if (row_idx == 0) {
- label_[line_idx * 5 + 4] = std::stof(cur_value);
- } else {
- label_[line_idx * 5 + row_idx - 1] = std::stof(cur_value) / 416;
- }
- row_idx++;
- }
-
- line_idx++;
- }
-
- file.close();
- };
-
- fill_one_sample(*input, *label, idxes[count]);
-
- count++;
-
- if (count < data_size) {
- *last = false;
- } else {
- *last = true;
- count = 0;
- std::shuffle(idxes.begin(), idxes.end(), rng);
- }
-}
-
-} // namespace nntrainer::util
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file det_dataloader.h
- * @date 22 March 2023
- * @brief dataloader for object detection dataset
- * @see https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @author Jijoong Moon <jijoong.moon@samsung.com>
- * @bug No known bugs except for NYI items
- */
-
-#include <random>
-#include <string>
-#include <tensor_dim.h>
-#include <vector>
-
-namespace nntrainer::util {
-
-using TensorDim = ml::train::TensorDim;
-
-/**
- * @brief user data object
- */
-class DirDataLoader {
-public:
- /**
- * @brief Construct a new Dir Data Loader object
- */
- DirDataLoader(const char *directory_, unsigned int max_num_label,
- unsigned int c, unsigned int w, unsigned int h, bool is_train_);
- /**
- * @brief Destroy the Dir Data Loader object
- */
- ~DirDataLoader(){};
-
- /**
- * @copydoc void DataLoader::next(float **input, float**label, bool *last)
- */
- void next(float **input, float **label, bool *last);
-
- /**
- * @brief getter for current file name
- * @return current file name
- */
- std::string getCurFileName() { return cur_file_name; };
-
- /**
- * @brief setter for current file name
- */
- void setCurFileName(std::string s) { cur_file_name = s; };
-
-private:
- std::string dir_path;
- unsigned int data_size;
- unsigned int max_num_label;
- unsigned int channel;
- unsigned int height;
- unsigned int width;
- bool is_train;
-
- std::vector<std::pair<std::string, std::string>> data_list;
- std::vector<unsigned int> idxes;
- unsigned int count;
- std::string cur_file_name;
-
- // random number generator
- std::mt19937 rng;
-};
-
-} // namespace nntrainer::util
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file main.cpp
- * @date 03 March 2023
- * @todo replace backbone to original darknet of yolo v2
- * @brief application example for YOLO v2
- * @see https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @bug No known bugs except for NYI items
- */
-
-#include <array>
-#include <chrono>
-#include <ctime>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include <vector>
-
-#include <app_context.h>
-#include <det_dataloader.h>
-#include <layer.h>
-#include <model.h>
-#include <optimizer.h>
-
-#include "yolo_v2_loss.h"
-
-#include <reorg_layer.h>
-
-using LayerHandle = std::shared_ptr<ml::train::Layer>;
-using ModelHandle = std::unique_ptr<ml::train::Model>;
-using UserDataType = std::unique_ptr<nntrainer::util::DirDataLoader>;
-
-const unsigned int ANCHOR_NUMBER = 5;
-
-const unsigned int MAX_OBJECT_NUMBER = 4;
-const unsigned int CLASS_NUMBER = 4;
-const unsigned int GRID_HEIGHT_NUMBER = 13;
-const unsigned int GRID_WIDTH_NUMBER = 13;
-const unsigned int IMAGE_HEIGHT_SIZE = 416;
-const unsigned int IMAGE_WIDTH_SIZE = 416;
-const unsigned int BATCH_SIZE = 4;
-const unsigned int EPOCHS = 3;
-const char *TRAIN_DIR_PATH = "/TRAIN_DIR/";
-const char *VALIDATION_DIR_PATH = "/VALID_DIR/";
-// const std::string MODEL_INIT_BIN_PATH = "/home/user/MODEL_INIT_BIN_PATH.bin";
-
-int trainData_cb(float **input, float **label, bool *last, void *user_data) {
- auto data = reinterpret_cast<nntrainer::util::DirDataLoader *>(user_data);
-
- data->next(input, label, last);
- return 0;
-}
-
-int validData_cb(float **input, float **label, bool *last, void *user_data) {
- auto data = reinterpret_cast<nntrainer::util::DirDataLoader *>(user_data);
-
- data->next(input, label, last);
- return 0;
-}
-
-std::array<UserDataType, 2> createDetDataGenerator(const char *train_dir,
- const char *valid_dir,
- int max_num_label, int c,
- int h, int w) {
- UserDataType train_data(new nntrainer::util::DirDataLoader(
- train_dir, max_num_label, c, h, w, true));
- UserDataType valid_data(new nntrainer::util::DirDataLoader(
- valid_dir, max_num_label, c, h, w, false));
-
- return {std::move(train_data), std::move(valid_data)};
-}
-
-/**
- * @brief make "key=value" from key and value
- *
- * @tparam T type of a value
- * @param key key
- * @param value value
- * @return std::string with "key=value"
- */
-template <typename T>
-static std::string withKey(const std::string &key, const T &value) {
- std::stringstream ss;
- ss << key << "=" << value;
- return ss.str();
-}
-
-template <typename T>
-static std::string withKey(const std::string &key,
- std::initializer_list<T> value) {
- if (std::empty(value)) {
- throw std::invalid_argument("empty data cannot be converted");
- }
-
- std::stringstream ss;
- ss << key << "=";
-
- auto iter = value.begin();
- for (; iter != value.end() - 1; ++iter) {
- ss << *iter << ',';
- }
- ss << *iter;
-
- return ss.str();
-}
-
-/**
- * @brief yolo block
- *
- * @param block_name name of the block
- * @param input_name name of the input
- * @param filters number of filters
- * @param kernel_size number of kernel_size
- * @param downsample downsample to make output size 0
- * @return std::vector<LayerHandle> vectors of layers
- */
-std::vector<LayerHandle> yoloBlock(const std::string &block_name,
- const std::string &input_name, int filters,
- int kernel_size, bool downsample) {
- using ml::train::createLayer;
-
- auto scoped_name = [&block_name](const std::string &layer_name) {
- return block_name + "/" + layer_name;
- };
- auto with_name = [&scoped_name](const std::string &layer_name) {
- return withKey("name", scoped_name(layer_name));
- };
-
- auto createConv = [&with_name, filters](const std::string &name,
- int kernel_size, int stride,
- const std::string &padding,
- const std::string &input_layer) {
- std::vector<std::string> props{
- with_name(name),
- withKey("stride", {stride, stride}),
- withKey("filters", filters),
- withKey("kernel_size", {kernel_size, kernel_size}),
- withKey("padding", padding),
- withKey("input_layers", input_layer)};
-
- return createLayer("conv2d", props);
- };
-
- /** construct basic layer **/
- LayerHandle a1 = createConv("a1", kernel_size, 1, "same", input_name);
-
- if (downsample) {
- LayerHandle a2 = createLayer("batch_normalization",
- {with_name("a2"), withKey("momentum", "0.9"),
- withKey("activation", "leaky_relu")});
-
- LayerHandle a3 = createLayer(
- "pooling2d", {withKey("name", block_name), withKey("stride", {2, 2}),
- withKey("pooling", "max"), withKey("pool_size", {2, 2})});
-
- return {a1, a2, a3};
- } else {
- LayerHandle a2 =
- createLayer("batch_normalization",
- {withKey("name", block_name), withKey("momentum", "0.9"),
- withKey("activation", "leaky_relu")});
-
- return {a1, a2};
- }
-}
-
-/**
- * @brief Create yolo v2 light
- *
- * @return vector of layers that contain full graph of yolo v2 light
- */
-ModelHandle YOLO() {
- using ml::train::createLayer;
-
- ModelHandle model = ml::train::createModel(ml::train::ModelType::NEURAL_NET);
-
- std::vector<LayerHandle> layers;
-
- layers.push_back(createLayer(
- "input",
- {withKey("name", "input0"),
- withKey("input_shape", "3:" + std::to_string(IMAGE_HEIGHT_SIZE) + ":" +
- std::to_string(IMAGE_WIDTH_SIZE))}));
-
- std::vector<std::vector<LayerHandle>> blocks;
-
- blocks.push_back(yoloBlock("conv1", "input0", 32, 3, true));
- blocks.push_back(yoloBlock("conv2", "conv1", 64, 3, true));
- blocks.push_back(yoloBlock("conv3", "conv2", 128, 3, false));
- blocks.push_back(yoloBlock("conv4", "conv3", 64, 1, false));
- blocks.push_back(yoloBlock("conv5", "conv4", 128, 3, true));
- blocks.push_back(yoloBlock("conv6", "conv5", 256, 3, false));
- blocks.push_back(yoloBlock("conv7", "conv6", 128, 1, false));
- blocks.push_back(yoloBlock("conv8", "conv7", 256, 3, true));
- blocks.push_back(yoloBlock("conv9", "conv8", 512, 3, false));
- blocks.push_back(yoloBlock("conv10", "conv9", 256, 1, false));
- blocks.push_back(yoloBlock("conv11", "conv10", 512, 3, false));
- blocks.push_back(yoloBlock("conv12", "conv11", 256, 1, false));
- blocks.push_back(yoloBlock("conv13", "conv12", 512, 3, false));
-
- blocks.push_back({createLayer(
- "pooling2d", {withKey("name", "conv_a_pool"), withKey("stride", {2, 2}),
- withKey("pooling", "max"), withKey("pool_size", {2, 2}),
- withKey("input_layers", "conv13")})});
- blocks.push_back(yoloBlock("conv_a1", "conv_a_pool", 1024, 3, false));
- blocks.push_back(yoloBlock("conv_a2", "conv_a1", 512, 1, false));
- blocks.push_back(yoloBlock("conv_a3", "conv_a2", 1024, 3, false));
- blocks.push_back(yoloBlock("conv_a4", "conv_a3", 512, 1, false));
- blocks.push_back(yoloBlock("conv_a5", "conv_a4", 1024, 3, false));
- blocks.push_back(yoloBlock("conv_a6", "conv_a5", 1024, 3, false));
- blocks.push_back(yoloBlock("conv_a7", "conv_a6", 1024, 3, false));
-
- blocks.push_back(yoloBlock("conv_b", "conv13", 64, 1, false));
-
- blocks.push_back(
- {createLayer("reorg_layer", {withKey("name", "re_organization"),
- withKey("input_layers", "conv_b")})});
-
- blocks.push_back(
- {createLayer("concat", {withKey("name", "concat"),
- withKey("input_layers", "conv_a7, re_organization"),
- withKey("axis", 1)})});
-
- blocks.push_back(yoloBlock("conv_out1", "concat", 1024, 3, false));
-
- blocks.push_back(
- {createLayer("conv2d", {
- withKey("name", "conv_out2"),
- withKey("filters", 5 * (5 + CLASS_NUMBER)),
- withKey("kernel_size", {1, 1}),
- withKey("stride", {1, 1}),
- withKey("padding", "same"),
- withKey("input_layers", "conv_out1"),
- })});
-
- for (auto &block : blocks) {
- layers.insert(layers.end(), block.begin(), block.end());
- }
-
- layers.push_back(createLayer("permute", {
- withKey("name", "permute"),
- withKey("direction", {2, 3, 1}),
- }));
-
- layers.push_back(createLayer(
- "reshape",
- {
- withKey("name", "reshape"),
- withKey("target_shape",
- std::to_string(GRID_HEIGHT_NUMBER * GRID_WIDTH_NUMBER) + ":" +
- std::to_string(ANCHOR_NUMBER) + ":" +
- std::to_string(5 + CLASS_NUMBER)),
- }));
-
- layers.push_back(createLayer(
- "yolo_v2_loss", {
- withKey("name", "yolo_v2_loss"),
- withKey("max_object_number", MAX_OBJECT_NUMBER),
- withKey("class_number", CLASS_NUMBER),
- withKey("grid_height_number", GRID_HEIGHT_NUMBER),
- withKey("grid_width_number", GRID_WIDTH_NUMBER),
- }));
-
- for (auto &layer : layers) {
- model->addLayer(layer);
- }
-
- return model;
-}
-
-int main(int argc, char *argv[]) {
- // print start time
- auto start = std::chrono::system_clock::now();
- std::time_t start_time = std::chrono::system_clock::to_time_t(start);
- std::cout << "started computation at " << std::ctime(&start_time)
- << std::endl;
-
- // set training config and print it
- std::cout << "batch_size: " << BATCH_SIZE << " epochs: " << EPOCHS
- << std::endl;
-
- try {
- // create YOLO v2 model
- ModelHandle model = YOLO();
- model->setProperty({withKey("batch_size", BATCH_SIZE),
- withKey("epochs", EPOCHS),
- withKey("save_path", "yolov2.bin")});
-
- // create optimizer
- auto optimizer = ml::train::createOptimizer(
- "adam", {"learning_rate=0.001", "epsilon=1e-8", "torch_ref=true"});
- model->setOptimizer(std::move(optimizer));
-
- // compile and initialize model
- model->compile();
- model->initialize();
- model->save("./yolov2.ini", ml::train::ModelFormat::MODEL_FORMAT_INI);
- // model->load(MODEL_INIT_BIN_PATH);
-
- // create train and validation data
- std::array<UserDataType, 2> user_datas;
- user_datas = createDetDataGenerator(TRAIN_DIR_PATH, VALIDATION_DIR_PATH,
- MAX_OBJECT_NUMBER, 3, IMAGE_HEIGHT_SIZE,
- IMAGE_WIDTH_SIZE);
- auto &[train_user_data, valid_user_data] = user_datas;
-
- auto dataset_train = ml::train::createDataset(
- ml::train::DatasetType::GENERATOR, trainData_cb, train_user_data.get());
- auto dataset_valid = ml::train::createDataset(
- ml::train::DatasetType::GENERATOR, validData_cb, valid_user_data.get());
-
- model->setDataset(ml::train::DatasetModeType::MODE_TRAIN,
- std::move(dataset_train));
- model->setDataset(ml::train::DatasetModeType::MODE_VALID,
- std::move(dataset_valid));
-
- model->train();
- } catch (const std::exception &e) {
- std::cerr << "uncaught error while running! details: " << e.what()
- << std::endl;
- return EXIT_FAILURE;
- }
-
- // print end time and duration
- auto end = std::chrono::system_clock::now();
- std::chrono::duration<double> elapsed_seconds = end - start;
- std::time_t end_time = std::chrono::system_clock::to_time_t(end);
- std::cout << "finished computation at " << std::ctime(&end_time)
- << "elapsed time: " << elapsed_seconds.count() << "s\n";
-}
+++ /dev/null
-# build command for lib_yolov2_loss_layer.so
-yolov2_loss_src = files('yolo_v2_loss.cpp')
-yolov2_loss_layer = shared_library('yolov2_loss_layer',
- yolov2_loss_src,
- dependencies: [nntrainer_dep, nntrainer_ccapi_dep],
- include_directories: include_directories('./'),
- install: true,
- install_dir: nntrainer_libdir/'nntrainer'/'layers',
- cpp_args: '-DPLUGGABLE'
-)
-yolov2_loss_layer_dep = declare_dependency(
- link_with: yolov2_loss_layer,
- include_directories: include_directories('./')
-)
-
-# build command for lib_reorg_layer.so
-layer_reorg_src = files('reorg_layer.cpp')
-reorg_layer = shared_library('reorg_layer',
- layer_reorg_src,
- dependencies: [nntrainer_dep, nntrainer_ccapi_dep],
- include_directories: include_directories('./'),
- install: true,
- install_dir: nntrainer_libdir/'nntrainer'/'layers',
- cpp_args: '-DPLUGGABLE'
-)
-reorg_layer_dep = declare_dependency(
- link_with: reorg_layer,
- include_directories: include_directories('./')
-)
-
-yolo_sources = [
- 'main.cpp',
- 'det_dataloader.cpp',
- 'yolo_v2_loss.cpp',
- 'reorg_layer.cpp',
-]
-
-yolo_dependencies = [app_utils_dep,
- nntrainer_dep,
- nntrainer_ccapi_dep,
- yolov2_loss_layer_dep,
- reorg_layer_dep
-]
-
-e = executable('nntrainer_yolov2',
- yolo_sources,
- include_directories: [include_directories('.')],
- dependencies: yolo_dependencies,
- install: get_option('install-app'),
- install_dir: application_install_dir
-)
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file reorganization.cpp
- * @date 06 April 2023
- * @todo support in-place operation. we can get channel, height, width
- * coordinate from index of buffer memory. then we can use reorganizePos and
- * restorePos func
- * @brief This file contains the mean absolute error loss as a sample layer
- * @see https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @bug No known bugs except for NYI items
- */
-
-#include <iostream>
-
-#include "reorg_layer.h"
-
-namespace custom {
-
-static constexpr size_t SINGLE_INOUT_IDX = 0;
-
-namespace ReorgOp {
-
-/**
- * @brief re-organize tensor
- * @return output coordinate of reorganized tensor
- */
-int reorg(int b, int c, int h, int w, int batch, int channel, int height,
- int width) {
- int out_c = channel / 4;
- int c2 = c % out_c;
- int offset = c / out_c;
- int w2 = w * 2 + offset % 2;
- int h2 = h * 2 + offset / 2;
- int out_index = w2 + width * 2 * (h2 + height * 2 * (c2 + out_c * b));
- return out_index;
-}
-} // namespace ReorgOp
-
-void ReorgLayer::finalize(nntrainer::InitLayerContext &context) {
- std::vector<nntrainer::TensorDim> dim = context.getInputDimensions();
-
- for (unsigned int i = 0; i < dim.size(); ++i) {
- if (dim[i].getDataLen() == 0) {
- throw std::invalid_argument("Input dimension is not set");
- } else {
- dim[i].channel(dim[i].channel() * 4);
- dim[i].height(dim[i].height() / 2);
- dim[i].width(dim[i].width() / 2);
- }
- }
-
- context.setOutputDimensions(dim);
-}
-
-void ReorgLayer::forwarding(nntrainer::RunLayerContext &context,
- bool training) {
- nntrainer::Tensor &in = context.getInput(SINGLE_INOUT_IDX);
- nntrainer::Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
-
- for (int b = 0; b < (int)in.batch(); b++) {
- for (int c = 0; c < (int)in.channel(); c++) {
- for (int h = 0; h < (int)in.height(); h++) {
- for (int w = 0; w < (int)in.width(); w++) {
- int out_idx =
- w + in.width() * (h + in.height() * (c + in.channel() * b));
- int in_idx = ReorgOp::reorg(b, c, h, w, in.batch(), in.channel(),
- in.height(), in.width());
- out.getData()[out_idx] = in.getValue(in_idx);
- }
- }
- }
- }
-}
-
-void ReorgLayer::calcDerivative(nntrainer::RunLayerContext &context) {
- const nntrainer::Tensor &derivative_ =
- context.getIncomingDerivative(SINGLE_INOUT_IDX);
-
- nntrainer::Tensor &dx = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
-
- for (int b = 0; b < (int)derivative_.batch(); b++) {
- for (int c = 0; c < (int)derivative_.channel(); c++) {
- for (int h = 0; h < (int)derivative_.height(); h++) {
- for (int w = 0; w < (int)derivative_.width(); w++) {
- int in_idx =
- w + derivative_.width() *
- (h + derivative_.height() * (c + derivative_.channel() * b));
- int out_idx = ReorgOp::reorg(
- b, c, h, w, derivative_.batch(), derivative_.channel(),
- derivative_.height(), derivative_.width());
- dx.getData()[out_idx] = derivative_.getValue(in_idx);
- }
- }
- }
- }
-}
-
-#ifdef PLUGGABLE
-
-nntrainer::Layer *create_reorg_layer() {
- auto layer = new ReorgLayer();
- std::cout << "reorg created\n";
- return layer;
-}
-
-void destroy_reorg_layer(nntrainer::Layer *layer) {
- std::cout << "reorg deleted\n";
- delete layer;
-}
-
-extern "C" {
-nntrainer::LayerPluggable ml_train_layer_pluggable{create_reorg_layer,
- destroy_reorg_layer};
-}
-
-#endif
-
-} // namespace custom
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
- *
- * @file reorganization.h
- * @date 4 April 2023
- * @brief This file contains the mean absolute error loss as a sample layer
- * @see https://github.com/nnstreamer/nntrainer
- * @author Seungbaek Hong <sb92.hong@samsung.com>
- * @bug No known bugs except for NYI items
- *
- */
-
-#ifndef __REORGANIZATION_LAYER_H__
-#define __REORGANIZATION_LAYER_H__
-
-#include <layer_context.h>
-#include <layer_devel.h>
-#include <node_exporter.h>
-#include <utility>
-
-namespace custom {
-
-/**
- * @brief A Re-orginazation layer for yolo v2.
- *
- */
-class ReorgLayer final : public nntrainer::Layer {
-public:
- /**
- * @brief Construct a new Reorg Layer object
- *
- */
- ReorgLayer() : Layer() {}
-
- /**
- * @brief Destroy the Reorg Layer object
- *
- */
- ~ReorgLayer() {}
-
- /**
- * @copydoc Layer::finalize(InitLayerContext &context)
- */
- void finalize(nntrainer::InitLayerContext &context) override;
-
- /**
- * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
- */
- void forwarding(nntrainer::RunLayerContext &context, bool training) override;
-
- /**
- * @copydoc Layer::calcDerivative(RunLayerContext &context)
- */
- void calcDerivative(nntrainer::RunLayerContext &context) override;
-
- /**
- * @copydoc bool supportBackwarding() const
- */
- bool supportBackwarding() const override { return true; };
-
- /**
- * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
- */
- void exportTo(nntrainer::Exporter &exporter,
- const ml::train::ExportMethods &method) const override{};
-
- /**
- * @copydoc Layer::getType()
- */
- const std::string getType() const override { return ReorgLayer::type; };
-
- /**
- * @copydoc Layer::setProperty(const std::vector<std::string> &values)
- */
- void setProperty(const std::vector<std::string> &values) override{};
-
- inline static const std::string type = "reorg_layer";
-};
-
-} // namespace custom
-
-#endif /* __REORGANIZATION_LAYER_H__ */
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
- *
- * @file yolo_v2_loss.cpp
- * @date 07 March 2023
- * @brief This file contains the yolo v2 loss layer
- * @see https://github.com/nnstreamer/nntrainer
- * @author Hyeonseok Lee <hs89.lee@samsung.com>
- * @bug No known bugs except for NYI items
- *
- */
-
-#include "yolo_v2_loss.h"
-#include <nntrainer_log.h>
-
-namespace custom {
-
-static constexpr size_t SINGLE_INOUT_IDX = 0;
-
-enum YoloV2LossParams {
- bbox_x_pred,
- bbox_y_pred,
- bbox_w_pred,
- bbox_h_pred,
- confidence_pred,
- class_pred,
- bbox_w_pred_anchor,
- bbox_h_pred_anchor,
- bbox_x_gt,
- bbox_y_gt,
- bbox_w_gt,
- bbox_h_gt,
- confidence_gt,
- class_gt,
- bbox_class_mask,
- iou_mask,
- bbox1_width,
- bbox1_height,
- is_xy_min_max,
- intersection_width,
- intersection_height,
- unions,
-};
-
-namespace props {
-MaxObjectNumber::MaxObjectNumber(const unsigned &value) { set(value); }
-ClassNumber::ClassNumber(const unsigned &value) { set(value); }
-GridHeightNumber::GridHeightNumber(const unsigned &value) { set(value); }
-GridWidthNumber::GridWidthNumber(const unsigned &value) { set(value); }
-} // namespace props
-
-/**
- * @brief mse
- *
- * @param pred prediction
- * @param ground_truth ground truth
- * @return float loss
- * @todo make loss behaves like acti_func
- */
-float mse(nntrainer::Tensor &pred, nntrainer::Tensor &ground_truth) {
- nntrainer::Tensor residual;
- pred.subtract(ground_truth, residual);
-
- float l2norm = residual.l2norm();
- l2norm *= l2norm / residual.size();
-
- return l2norm;
-}
-
-/**
- * @brief backwarding of mse
- *
- * @param pred prediction
- * @param ground_truth ground truth
- * @param outgoing_derivative outgoing derivative
- */
-void msePrime(nntrainer::Tensor &pred, nntrainer::Tensor &ground_truth,
- nntrainer::Tensor &outgoing_derivative) {
- pred.subtract(ground_truth, outgoing_derivative);
- float divider = ((float)pred.size()) / 2;
- if (outgoing_derivative.divide_i(divider) != ML_ERROR_NONE) {
- throw std::runtime_error(
- "[YoloV2LossLayer::calcDerivative] Error when calculating loss");
- }
-}
-
-/**
- * @brief calculate iou
- *
- * @param bbox1_x1 bbox1_x1
- * @param bbox1_y1 bbox1_y1
- * @param bbox1_w bbox1_w
- * @param bbox1_h bbox1_h
- * @param bbox2_x1 bbox2_x1
- * @param bbox2_y1 bbox2_y1
- * @param bbox2_w bbox2_w
- * @param bbox2_h bbox2_h
- * @param[out] bbox1_width bbox1 width
- * @param[out] bbox1_height bbox1 height
- * @param[out] is_xy_min_max For x1, y1 this value is 1 if x1 > x2, y1 > y2 and
- * for x2, y2 this is value is 1 if x2 < x1, y2 < y1. else 0.
- * @param[out] intersection_width intersection width
- * @param[out] intersection_height intersection height
- * @param[out] unions unions
- * @return nntrainer::Tensor iou
- */
-nntrainer::Tensor
-calc_iou(nntrainer::Tensor &bbox1_x1, nntrainer::Tensor &bbox1_y1,
- nntrainer::Tensor &bbox1_w, nntrainer::Tensor &bbox1_h,
- nntrainer::Tensor &bbox2_x1, nntrainer::Tensor &bbox2_y1,
- nntrainer::Tensor &bbox2_w, nntrainer::Tensor &bbox2_h,
- nntrainer::Tensor &bbox1_width, nntrainer::Tensor &bbox1_height,
- nntrainer::Tensor &is_xy_min_max,
- nntrainer::Tensor &intersection_width,
- nntrainer::Tensor &intersection_height, nntrainer::Tensor &unions) {
- nntrainer::Tensor bbox1_x2 = bbox1_x1.add(bbox1_w);
- nntrainer::Tensor bbox1_y2 = bbox1_y1.add(bbox1_h);
- nntrainer::Tensor bbox2_x2 = bbox2_x1.add(bbox2_w);
- nntrainer::Tensor bbox2_y2 = bbox2_y1.add(bbox2_h);
-
- bbox1_x2.subtract(bbox1_x1, bbox1_width);
- bbox1_y2.subtract(bbox1_y1, bbox1_height);
- nntrainer::Tensor bbox1 = bbox1_width.multiply(bbox1_height);
-
- nntrainer::Tensor bbox2_width = bbox2_x2.subtract(bbox2_x1);
- nntrainer::Tensor bbox2_height = bbox2_y2.subtract(bbox2_y1);
- nntrainer::Tensor bbox2 = bbox2_width.multiply(bbox2_height);
-
- auto min_func = [&](nntrainer::Tensor &bbox1_xy, nntrainer::Tensor &bbox2_xy,
- nntrainer::Tensor &intersection_xy) {
- std::transform(bbox1_xy.getData(), bbox1_xy.getData() + bbox1_xy.size(),
- bbox2_xy.getData(), intersection_xy.getData(),
- [](float x1, float x2) { return std::min(x1, x2); });
- };
- auto max_func = [&](nntrainer::Tensor &bbox1_xy, nntrainer::Tensor &bbox2_xy,
- nntrainer::Tensor &intersection_xy) {
- std::transform(bbox1_xy.getData(), bbox1_xy.getData() + bbox1_xy.size(),
- bbox2_xy.getData(), intersection_xy.getData(),
- [](float x1, float x2) { return std::max(x1, x2); });
- };
-
- nntrainer::Tensor intersection_x1(bbox1_x1.getDim());
- nntrainer::Tensor intersection_x2(bbox1_x1.getDim());
- nntrainer::Tensor intersection_y1(bbox1_y1.getDim());
- nntrainer::Tensor intersection_y2(bbox1_y1.getDim());
- max_func(bbox1_x1, bbox2_x1, intersection_x1);
- min_func(bbox1_x2, bbox2_x2, intersection_x2);
- max_func(bbox1_y1, bbox2_y1, intersection_y1);
- min_func(bbox1_y2, bbox2_y2, intersection_y2);
-
- auto is_min_max_func = [&](nntrainer::Tensor &xy,
- nntrainer::Tensor &intersection,
- nntrainer::Tensor &is_min_max) {
- std::transform(xy.getData(), xy.getData() + xy.size(),
- intersection.getData(), is_min_max.getData(),
- [](float x, float m) {
- return nntrainer::absFloat(x - m) < 1e-4 ? 1.0 : 0.0;
- });
- };
-
- nntrainer::Tensor is_bbox1_x1_max(bbox1_x1.getDim());
- nntrainer::Tensor is_bbox1_y1_max(bbox1_x1.getDim());
- nntrainer::Tensor is_bbox1_x2_min(bbox1_x1.getDim());
- nntrainer::Tensor is_bbox1_y2_min(bbox1_x1.getDim());
- is_min_max_func(bbox1_x1, intersection_x1, is_bbox1_x1_max);
- is_min_max_func(bbox1_y1, intersection_y1, is_bbox1_y1_max);
- is_min_max_func(bbox1_x2, intersection_x2, is_bbox1_x2_min);
- is_min_max_func(bbox1_y2, intersection_y2, is_bbox1_y2_min);
-
- nntrainer::Tensor is_bbox_min_max = nntrainer::Tensor::cat(
- {is_bbox1_x1_max, is_bbox1_y1_max, is_bbox1_x2_min, is_bbox1_y2_min}, 3);
- is_xy_min_max.copyData(is_bbox_min_max);
-
- intersection_x2.subtract(intersection_x1, intersection_width);
-
- auto type_intersection_width = intersection_width.getDataType();
- if (type_intersection_width == ml::train::TensorDim::DataType::FP32) {
- intersection_width.apply_i<float>(nntrainer::ActiFunc::relu<float>);
- } else if (type_intersection_width == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- intersection_width.apply_i<_FP16>(nntrainer::ActiFunc::relu<_FP16>);
-#else
- throw std::runtime_error("Not supported data type");
-#endif
- }
-
- intersection_y2.subtract(intersection_y1, intersection_height);
-
- auto type_intersection_height = intersection_height.getDataType();
- if (type_intersection_height == ml::train::TensorDim::DataType::FP32) {
- intersection_height.apply_i<float>(nntrainer::ActiFunc::relu<float>);
- } else if (type_intersection_height == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- intersection_height.apply_i<_FP16>(nntrainer::ActiFunc::relu<_FP16>);
-#else
- throw std::runtime_error("Not supported data type");
-#endif
- }
-
- nntrainer::Tensor intersection =
- intersection_width.multiply(intersection_height);
- bbox1.add(bbox2, unions);
- unions.subtract_i(intersection);
-
- return intersection.divide(unions);
-}
-
-/**
- * @brief calculate iou graident
- * @details Let say bbox_pred as x, intersection as f(x), union as g(x) and iou
- * as y. Then y = f(x)/g(x). Also g(x) = bbox1 + bbox2 - f(x). Partial
- * derivative of y with respect to x will be (f'(x)g(x) - f(x)g'(x))/(g(x)^2).
- * Partial derivative of g(x) with respect to x will be bbox1'(x) - f'(x).
- * @param confidence_gt_grad incoming derivative for iou
- * @param bbox1_width bbox1_width
- * @param bbox1_height bbox1_height
- * @param is_xy_min_max For x1, y1 this value is 1 if x1 > x2, y1 > y2 and for
- * x2, y2 this is value is 1 if x2 < x1, y2 < y1. else 0.
- * @param intersection_width intersection width
- * @param intersection_height intersection height
- * @param unions unions
- * @return std::vector<nntrainer::Tensor> iou_grad
- */
-std::vector<nntrainer::Tensor> calc_iou_grad(
- nntrainer::Tensor &confidence_gt_grad, nntrainer::Tensor &bbox1_width,
- nntrainer::Tensor &bbox1_height, nntrainer::Tensor &is_xy_min_max,
- nntrainer::Tensor &intersection_width, nntrainer::Tensor &intersection_height,
- nntrainer::Tensor &unions) {
- nntrainer::Tensor intersection =
- intersection_width.multiply(intersection_height);
-
- // 1. calculate intersection local gradient [f'(x)]
- nntrainer::Tensor intersection_width_relu_prime;
- nntrainer::Tensor intersection_height_relu_prime;
- auto type_intersection_width = intersection_width.getDataType();
- if (type_intersection_width == ml::train::TensorDim::DataType::FP32) {
- intersection_width_relu_prime =
- intersection_width.apply<float>(nntrainer::ActiFunc::reluPrime<float>);
- } else if (type_intersection_width == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- intersection_height_relu_prime =
- intersection_height.apply<_FP16>(nntrainer::ActiFunc::reluPrime<_FP16>);
-#else
- throw std::runtime_error("Not supported data type");
-#endif
- }
-
- nntrainer::Tensor intersection_x2_local_grad =
- intersection_width_relu_prime.multiply(intersection_height);
- nntrainer::Tensor intersection_y2_local_grad =
- intersection_height_relu_prime.multiply(intersection_width);
- nntrainer::Tensor intersection_x1_local_grad =
- intersection_x2_local_grad.multiply(-1.0);
- nntrainer::Tensor intersection_y1_local_grad =
- intersection_y2_local_grad.multiply(-1.0);
-
- nntrainer::Tensor intersection_local_grad = nntrainer::Tensor::cat(
- {intersection_x1_local_grad, intersection_y1_local_grad,
- intersection_x2_local_grad, intersection_y2_local_grad},
- 3);
- intersection_local_grad.multiply_i(is_xy_min_max);
-
- // 2. calculate union local gradient [g'(x)]
- nntrainer::Tensor bbox1_x1_grad = bbox1_height.multiply(-1.0);
- nntrainer::Tensor bbox1_y1_grad = bbox1_width.multiply(-1.0);
- nntrainer::Tensor bbox1_x2_grad = bbox1_height;
- nntrainer::Tensor bbox1_y2_grad = bbox1_width;
- nntrainer::Tensor bbox1_grad = nntrainer::Tensor::cat(
- {bbox1_x1_grad, bbox1_y1_grad, bbox1_x2_grad, bbox1_y2_grad}, 3);
-
- nntrainer::Tensor unions_local_grad =
- bbox1_grad.subtract(intersection_local_grad);
-
- // 3. calculate iou local gradient [(f'(x)g(x) - f(x)g'(x))/(g(x)^2)]
- nntrainer::Tensor lhs = intersection_local_grad.multiply(unions);
- nntrainer::Tensor rhs = unions_local_grad.multiply(intersection);
- nntrainer::Tensor iou_grad = lhs.subtract(rhs);
- iou_grad.divide_i(unions);
- iou_grad.divide_i(unions);
-
- // 3. multiply with incoming derivative
- iou_grad.multiply_i(confidence_gt_grad);
-
- auto splitted_iou_grad = iou_grad.split({1, 1, 1, 1}, 3);
- std::vector<nntrainer::Tensor> ret = {
- splitted_iou_grad[0].add(splitted_iou_grad[2]),
- splitted_iou_grad[1].add(splitted_iou_grad[3]), splitted_iou_grad[2],
- splitted_iou_grad[3]};
- return ret;
-}
-
-YoloV2LossLayer::YoloV2LossLayer() :
- anchors_w({1, 1, NUM_ANCHOR, 1}, anchors_w_buf),
- anchors_h({1, 1, NUM_ANCHOR, 1}, anchors_h_buf),
- sigmoid(nntrainer::ActivationType::ACT_SIGMOID, true),
- softmax(nntrainer::ActivationType::ACT_SOFTMAX, true),
- yolo_v2_loss_props(props::MaxObjectNumber(), props::ClassNumber(),
- props::GridHeightNumber(), props::GridWidthNumber()) {
- anchors_ratio = anchors_w.divide(anchors_h);
- wt_idx.fill(std::numeric_limits<unsigned>::max());
-}
-
-void YoloV2LossLayer::finalize(nntrainer::InitLayerContext &context) {
- nntrainer::TensorDim input_dim =
- context.getInputDimensions()[SINGLE_INOUT_IDX];
- const unsigned int batch_size = input_dim.batch();
- const unsigned int class_number =
- std::get<props::ClassNumber>(yolo_v2_loss_props).get();
- const unsigned int grid_height_number =
- std::get<props::GridHeightNumber>(yolo_v2_loss_props).get();
- const unsigned int grid_width_number =
- std::get<props::GridWidthNumber>(yolo_v2_loss_props).get();
- const unsigned int max_object_number =
- std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
- nntrainer::TensorDim label_dim(batch_size, 1, max_object_number, 5);
- context.setOutputDimensions({label_dim});
-
- nntrainer::TensorDim bbox_x_pred_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_x_pred] = context.requestTensor(
- bbox_x_pred_dim, "bbox_x_pred", nntrainer::Tensor::Initializer::NONE, true,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_y_pred_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_y_pred] = context.requestTensor(
- bbox_y_pred_dim, "bbox_y_pred", nntrainer::Tensor::Initializer::NONE, true,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_w_pred_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_w_pred] = context.requestTensor(
- bbox_w_pred_dim, "bbox_w_pred", nntrainer::Tensor::Initializer::NONE, true,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_h_pred_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_h_pred] = context.requestTensor(
- bbox_h_pred_dim, "bbox_h_pred", nntrainer::Tensor::Initializer::NONE, true,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim confidence_pred_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::confidence_pred] =
- context.requestTensor(confidence_pred_dim, "confidence_pred",
- nntrainer::Tensor::Initializer::NONE, true,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim class_pred_dim(batch_size,
- grid_height_number * grid_width_number,
- NUM_ANCHOR, class_number);
- wt_idx[YoloV2LossParams::class_pred] = context.requestTensor(
- class_pred_dim, "class_pred", nntrainer::Tensor::Initializer::NONE, true,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_w_pred_anchor_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_w_pred_anchor] =
- context.requestTensor(bbox_w_pred_anchor_dim, "bbox_w_pred_anchor",
- nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_h_pred_anchor_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_h_pred_anchor] =
- context.requestTensor(bbox_h_pred_anchor_dim, "bbox_h_pred_anchor",
- nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_x_gt_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_x_gt] = context.requestTensor(
- bbox_x_gt_dim, "bbox_x_gt", nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_y_gt_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_y_gt] = context.requestTensor(
- bbox_y_gt_dim, "bbox_y_gt", nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_w_gt_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_w_gt] = context.requestTensor(
- bbox_w_gt_dim, "bbox_w_gt", nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_h_gt_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_h_gt] = context.requestTensor(
- bbox_h_gt_dim, "bbox_h_gt", nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim confidence_gt_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::confidence_gt] = context.requestTensor(
- confidence_gt_dim, "confidence_gt", nntrainer::Tensor::Initializer::NONE,
- false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim class_gt_dim(batch_size,
- grid_height_number * grid_width_number,
- NUM_ANCHOR, class_number);
- wt_idx[YoloV2LossParams::class_gt] = context.requestTensor(
- class_gt_dim, "class_gt", nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox_class_mask_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox_class_mask] =
- context.requestTensor(bbox_class_mask_dim, "bbox_class_mask",
- nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim iou_mask_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::iou_mask] = context.requestTensor(
- iou_mask_dim, "iou_mask", nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox1_width_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox1_width] = context.requestTensor(
- bbox1_width_dim, "bbox1_width", nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim bbox1_height_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::bbox1_height] = context.requestTensor(
- bbox1_height_dim, "bbox1_height", nntrainer::Tensor::Initializer::NONE,
- false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim is_xy_min_max_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 4);
- wt_idx[YoloV2LossParams::is_xy_min_max] = context.requestTensor(
- is_xy_min_max_dim, "is_xy_min_max", nntrainer::Tensor::Initializer::NONE,
- false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim intersection_width_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::intersection_width] =
- context.requestTensor(intersection_width_dim, "intersection_width",
- nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim intersection_height_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::intersection_height] =
- context.requestTensor(intersection_height_dim, "intersection_height",
- nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-
- nntrainer::TensorDim unions_dim(
- batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
- wt_idx[YoloV2LossParams::unions] = context.requestTensor(
- unions_dim, "unions", nntrainer::Tensor::Initializer::NONE, false,
- nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
-}
-
-void YoloV2LossLayer::forwarding(nntrainer::RunLayerContext &context,
- bool training) {
- const unsigned int max_object_number =
- std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
-
- nntrainer::Tensor &input = context.getInput(SINGLE_INOUT_IDX);
-
- std::vector<nntrainer::Tensor> splited_input =
- input.split({1, 1, 1, 1, 1, max_object_number}, 3);
- nntrainer::Tensor bbox_x_pred_ = splited_input[0];
- nntrainer::Tensor bbox_y_pred_ = splited_input[1];
- nntrainer::Tensor bbox_w_pred_ = splited_input[2];
- nntrainer::Tensor bbox_h_pred_ = splited_input[3];
- nntrainer::Tensor confidence_pred_ = splited_input[4];
- nntrainer::Tensor class_pred_ = splited_input[5];
-
- nntrainer::Tensor &bbox_x_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
- nntrainer::Tensor &bbox_y_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
- nntrainer::Tensor &bbox_w_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred]);
- nntrainer::Tensor &bbox_h_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred]);
-
- nntrainer::Tensor &confidence_pred =
- context.getTensor(wt_idx[YoloV2LossParams::confidence_pred]);
- nntrainer::Tensor &class_pred =
- context.getTensor(wt_idx[YoloV2LossParams::class_pred]);
-
- nntrainer::Tensor &bbox_w_pred_anchor =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
- nntrainer::Tensor &bbox_h_pred_anchor =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
-
- bbox_x_pred.copyData(bbox_x_pred_);
- bbox_y_pred.copyData(bbox_y_pred_);
- bbox_w_pred.copyData(bbox_w_pred_);
- bbox_h_pred.copyData(bbox_h_pred_);
-
- confidence_pred.copyData(confidence_pred_);
- class_pred.copyData(class_pred_);
-
- nntrainer::Tensor &bbox_x_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
- nntrainer::Tensor &bbox_y_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
- nntrainer::Tensor &bbox_w_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
- nntrainer::Tensor &bbox_h_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
-
- nntrainer::Tensor &confidence_gt =
- context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
- nntrainer::Tensor &class_gt =
- context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
-
- nntrainer::Tensor &bbox_class_mask =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
- nntrainer::Tensor &iou_mask =
- context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
-
- bbox_x_gt.setValue(0);
- bbox_y_gt.setValue(0);
- bbox_w_gt.setValue(0);
- bbox_h_gt.setValue(0);
-
- confidence_gt.setValue(0);
- class_gt.setValue(0);
-
- // init mask
- bbox_class_mask.setValue(0);
- iou_mask.setValue(0.5);
-
- // activate pred
- sigmoid.run_fn(bbox_x_pred, bbox_x_pred);
- sigmoid.run_fn(bbox_y_pred, bbox_y_pred);
-
- auto type_bbox_w_pred = bbox_w_pred.getDataType();
- if (type_bbox_w_pred == ml::train::TensorDim::DataType::FP32) {
- bbox_w_pred.apply_i<float>(nntrainer::exp_util<float>);
- } else if (type_bbox_w_pred == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- bbox_w_pred.apply_i<_FP16>(nntrainer::exp_util<_FP16>);
-#else
- throw std::runtime_error("Not supported data type");
-#endif
- }
-
- auto type_bbox_h_pred = bbox_h_pred.getDataType();
- if (type_bbox_h_pred == ml::train::TensorDim::DataType::FP32) {
- bbox_h_pred.apply_i<float>(nntrainer::exp_util<float>);
- } else if (type_bbox_h_pred == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- bbox_h_pred.apply_i<_FP16>(nntrainer::exp_util<_FP16>);
-#else
- throw std::runtime_error("Not supported data type");
-#endif
- }
-
- sigmoid.run_fn(confidence_pred, confidence_pred);
- softmax.run_fn(class_pred, class_pred);
-
- bbox_w_pred_anchor.copyData(bbox_w_pred);
- bbox_h_pred_anchor.copyData(bbox_h_pred);
-
- // apply anchors to bounding box
- bbox_w_pred_anchor.multiply_i(anchors_w);
- auto type_bbox_w_pred_anchor = bbox_w_pred_anchor.getDataType();
- if (type_bbox_w_pred_anchor == ml::train::TensorDim::DataType::FP32) {
- bbox_w_pred_anchor.apply_i<float>(nntrainer::sqrtFloat<float>);
- } else if (type_bbox_w_pred_anchor == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- bbox_w_pred_anchor.apply_i<_FP16>(nntrainer::sqrtFloat<_FP16>);
-#else
- throw std::runtime_error("Not supported data type");
-#endif
- }
-
- bbox_h_pred_anchor.multiply_i(anchors_h);
- auto type_bbox_h_pred_anchor = bbox_h_pred_anchor.getDataType();
- if (type_bbox_h_pred_anchor == ml::train::TensorDim::DataType::FP32) {
- bbox_h_pred_anchor.apply_i<float>(nntrainer::sqrtFloat<float>);
- } else if (type_bbox_h_pred_anchor == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- bbox_h_pred_anchor.apply_i<_FP16>(nntrainer::sqrtFloat<_FP16>);
-#else
- throw std::runtime_error("Not supported data type");
-#endif
- }
-
- generate_ground_truth(context);
-
- nntrainer::Tensor bbox_pred = nntrainer::Tensor::cat(
- {bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor}, 3);
- nntrainer::Tensor masked_bbox_pred = bbox_pred.multiply(bbox_class_mask);
- nntrainer::Tensor masked_confidence_pred = confidence_pred.multiply(iou_mask);
- nntrainer::Tensor masked_class_pred = class_pred.multiply(bbox_class_mask);
-
- nntrainer::Tensor bbox_gt =
- nntrainer::Tensor::cat({bbox_x_gt, bbox_y_gt, bbox_w_gt, bbox_h_gt}, 3);
- nntrainer::Tensor masked_bbox_gt = bbox_gt.multiply(bbox_class_mask);
- nntrainer::Tensor masked_confidence_gt = confidence_gt.multiply(iou_mask);
- nntrainer::Tensor masked_class_gt = class_gt.multiply(bbox_class_mask);
-
- float bbox_loss = mse(masked_bbox_pred, masked_bbox_gt);
- float confidence_loss = mse(masked_confidence_pred, masked_confidence_gt);
- float class_loss = mse(masked_class_pred, masked_class_gt);
-
- float loss = 5 * bbox_loss + confidence_loss + class_loss;
- ml_logd("Current iteration loss: %f", loss);
-}
-
-void YoloV2LossLayer::calcDerivative(nntrainer::RunLayerContext &context) {
- nntrainer::Tensor &bbox_x_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
- nntrainer::Tensor &bbox_x_pred_grad =
- context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_x_pred]);
- nntrainer::Tensor &bbox_y_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
- nntrainer::Tensor &bbox_y_pred_grad =
- context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_y_pred]);
- nntrainer::Tensor &bbox_w_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred]);
- nntrainer::Tensor &bbox_w_pred_grad =
- context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_w_pred]);
- nntrainer::Tensor &bbox_h_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred]);
- nntrainer::Tensor &bbox_h_pred_grad =
- context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_h_pred]);
-
- nntrainer::Tensor &confidence_pred =
- context.getTensor(wt_idx[YoloV2LossParams::confidence_pred]);
- nntrainer::Tensor &confidence_pred_grad =
- context.getTensorGrad(wt_idx[YoloV2LossParams::confidence_pred]);
- nntrainer::Tensor &class_pred =
- context.getTensor(wt_idx[YoloV2LossParams::class_pred]);
- nntrainer::Tensor &class_pred_grad =
- context.getTensorGrad(wt_idx[YoloV2LossParams::class_pred]);
-
- nntrainer::Tensor &bbox_w_pred_anchor =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
- nntrainer::Tensor &bbox_h_pred_anchor =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
-
- nntrainer::Tensor &bbox_x_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
- nntrainer::Tensor &bbox_y_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
- nntrainer::Tensor &bbox_w_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
- nntrainer::Tensor &bbox_h_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
-
- nntrainer::Tensor &confidence_gt =
- context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
- nntrainer::Tensor &class_gt =
- context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
-
- nntrainer::Tensor &bbox_class_mask =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
- nntrainer::Tensor &iou_mask =
- context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
-
- nntrainer::Tensor &bbox1_width =
- context.getTensor(wt_idx[YoloV2LossParams::bbox1_width]);
- nntrainer::Tensor &bbox1_height =
- context.getTensor(wt_idx[YoloV2LossParams::bbox1_height]);
- nntrainer::Tensor &is_xy_min_max =
- context.getTensor(wt_idx[YoloV2LossParams::is_xy_min_max]);
- nntrainer::Tensor &intersection_width =
- context.getTensor(wt_idx[YoloV2LossParams::intersection_width]);
- nntrainer::Tensor &intersection_height =
- context.getTensor(wt_idx[YoloV2LossParams::intersection_height]);
- nntrainer::Tensor &unions =
- context.getTensor(wt_idx[YoloV2LossParams::unions]);
-
- nntrainer::Tensor bbox_pred = nntrainer::Tensor::cat(
- {bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor}, 3);
- nntrainer::Tensor masked_bbox_pred = bbox_pred.multiply(bbox_class_mask);
- nntrainer::Tensor masked_confidence_pred = confidence_pred.multiply(iou_mask);
- nntrainer::Tensor masked_class_pred = class_pred.multiply(bbox_class_mask);
-
- nntrainer::Tensor bbox_gt =
- nntrainer::Tensor::cat({bbox_x_gt, bbox_y_gt, bbox_w_gt, bbox_h_gt}, 3);
- nntrainer::Tensor masked_bbox_gt = bbox_gt.multiply(bbox_class_mask);
- nntrainer::Tensor masked_confidence_gt = confidence_gt.multiply(iou_mask);
- nntrainer::Tensor masked_class_gt = class_gt.multiply(bbox_class_mask);
-
- nntrainer::Tensor masked_bbox_pred_grad;
- nntrainer::Tensor masked_confidence_pred_grad;
- nntrainer::Tensor masked_confidence_gt_grad;
- nntrainer::Tensor masked_class_pred_grad;
-
- nntrainer::Tensor confidence_gt_grad;
-
- msePrime(masked_bbox_pred, masked_bbox_gt, masked_bbox_pred_grad);
- msePrime(masked_confidence_pred, masked_confidence_gt,
- masked_confidence_pred_grad);
- msePrime(masked_confidence_gt, masked_confidence_pred,
- masked_confidence_gt_grad);
- msePrime(masked_class_pred, masked_class_gt, masked_class_pred_grad);
-
- masked_bbox_pred_grad.multiply_i(5);
-
- nntrainer::Tensor bbox_pred_grad;
-
- masked_bbox_pred_grad.multiply(bbox_class_mask, bbox_pred_grad);
- masked_confidence_pred_grad.multiply(iou_mask, confidence_pred_grad);
- masked_confidence_gt_grad.multiply(iou_mask, confidence_gt_grad);
- masked_class_pred_grad.multiply(bbox_class_mask, class_pred_grad);
-
- std::vector<nntrainer::Tensor> splitted_bbox_pred_grad =
- bbox_pred_grad.split({1, 1, 1, 1}, 3);
- bbox_x_pred_grad.copyData(splitted_bbox_pred_grad[0]);
- bbox_y_pred_grad.copyData(splitted_bbox_pred_grad[1]);
- bbox_w_pred_grad.copyData(splitted_bbox_pred_grad[2]);
- bbox_h_pred_grad.copyData(splitted_bbox_pred_grad[3]);
-
- // std::vector<nntrainer::Tensor> bbox_pred_iou_grad =
- // calc_iou_grad(confidence_gt_grad, bbox1_width, bbox1_height,
- // is_xy_min_max,
- // intersection_width, intersection_height, unions);
- // bbox_x_pred_grad.add_i(bbox_pred_iou_grad[0]);
- // bbox_y_pred_grad.add_i(bbox_pred_iou_grad[1]);
- // bbox_w_pred_grad.add_i(bbox_pred_iou_grad[2]);
- // bbox_h_pred_grad.add_i(bbox_pred_iou_grad[3]);
-
- /**
- * @brief calculate gradient for applying anchors to bounding box
- * @details Let say bbox_pred as x, anchor as c indicated that anchor is
- * constant for bbox_pred and bbox_pred_anchor as y. Then we can denote y =
- * sqrt(cx). Partial derivative of y with respect to x will be
- * sqrt(c)/(2*sqrt(x)) which is equivalent to sqrt(cx)/(2x) and we can replace
- * sqrt(cx) with y.
- * @note divide by bbox_pred(x) will not be executed because bbox_pred_grad
- * will be multiply by bbox_pred(x) soon after.
- */
- bbox_w_pred_grad.multiply_i(bbox_w_pred_anchor);
- bbox_h_pred_grad.multiply_i(bbox_h_pred_anchor);
- /** intended comment */
- // bbox_w_pred_grad.divide_i(bbox_w_pred);
- // bbox_h_pred_grad.divide_i(bbox_h_pred);
- bbox_w_pred_grad.divide_i(2);
- bbox_h_pred_grad.divide_i(2);
-
- sigmoid.run_prime_fn(bbox_x_pred, bbox_x_pred, bbox_x_pred_grad,
- bbox_x_pred_grad);
- sigmoid.run_prime_fn(bbox_y_pred, bbox_y_pred, bbox_y_pred_grad,
- bbox_y_pred_grad);
- /** intended comment */
- // bbox_w_pred_grad.multiply_i(bbox_w_pred);
- // bbox_h_pred_grad.multiply_i(bbox_h_pred);
- sigmoid.run_prime_fn(confidence_pred, confidence_pred, confidence_pred_grad,
- confidence_pred_grad);
- softmax.run_prime_fn(class_pred, class_pred, class_pred_grad,
- class_pred_grad);
-
- nntrainer::Tensor outgoing_derivative_ = nntrainer::Tensor::cat(
- {bbox_x_pred_grad, bbox_y_pred_grad, bbox_w_pred_grad, bbox_h_pred_grad,
- confidence_pred_grad, class_pred_grad},
- 3);
- nntrainer::Tensor &outgoing_derivative =
- context.getOutgoingDerivative(SINGLE_INOUT_IDX);
- outgoing_derivative.copyData(outgoing_derivative_);
-}
-
-void YoloV2LossLayer::exportTo(nntrainer::Exporter &exporter,
- const ml::train::ExportMethods &method) const {
- exporter.saveResult(yolo_v2_loss_props, method, this);
-}
-
-void YoloV2LossLayer::setProperty(const std::vector<std::string> &values) {
- auto remain_props = loadProperties(values, yolo_v2_loss_props);
- NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
- << "[YoloV2LossLayer] Unknown Layer Properties count " +
- std::to_string(values.size());
-}
-
-void YoloV2LossLayer::setBatch(nntrainer::RunLayerContext &context,
- unsigned int batch) {
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_x_pred], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_y_pred], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_pred], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_pred], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::confidence_pred], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::class_pred], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor], batch);
-
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_x_gt], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_y_gt], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_gt], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_gt], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::confidence_gt], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::class_gt], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox_class_mask], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::iou_mask], batch);
-
- context.updateTensor(wt_idx[YoloV2LossParams::bbox1_width], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::bbox1_height], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::is_xy_min_max], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::intersection_width], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::intersection_height], batch);
- context.updateTensor(wt_idx[YoloV2LossParams::unions], batch);
-}
-
-unsigned int YoloV2LossLayer::find_responsible_anchors(float bbox_ratio) {
- nntrainer::Tensor similarity = anchors_ratio.subtract(bbox_ratio);
- auto data_type = similarity.getDataType();
- if (data_type == ml::train::TensorDim::DataType::FP32) {
- similarity.apply_i<float>(nntrainer::absFloat<float>);
- } else if (data_type == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- similarity.apply_i<_FP16>(nntrainer::absFloat<_FP16>);
-#else
- throw std::runtime_error("Not supported data type");
-#endif
- }
- auto data = similarity.getData();
-
- auto min_iter = std::min_element(data, data + NUM_ANCHOR);
- return std::distance(data, min_iter);
-}
-
-void YoloV2LossLayer::generate_ground_truth(
- nntrainer::RunLayerContext &context) {
- const unsigned int max_object_number =
- std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
- const unsigned int grid_height_number =
- std::get<props::GridHeightNumber>(yolo_v2_loss_props).get();
- const unsigned int grid_width_number =
- std::get<props::GridWidthNumber>(yolo_v2_loss_props).get();
-
- nntrainer::Tensor &label = context.getLabel(SINGLE_INOUT_IDX);
-
- nntrainer::Tensor &bbox_x_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
- nntrainer::Tensor &bbox_y_pred =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
- nntrainer::Tensor &bbox_w_pred_anchor =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
- nntrainer::Tensor &bbox_h_pred_anchor =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
-
- nntrainer::Tensor &bbox_x_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
- nntrainer::Tensor &bbox_y_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
- nntrainer::Tensor &bbox_w_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
- nntrainer::Tensor &bbox_h_gt =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
-
- nntrainer::Tensor &confidence_gt =
- context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
- nntrainer::Tensor &class_gt =
- context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
-
- nntrainer::Tensor &bbox_class_mask =
- context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
- nntrainer::Tensor &iou_mask =
- context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
-
- nntrainer::Tensor &bbox1_width =
- context.getTensor(wt_idx[YoloV2LossParams::bbox1_width]);
- nntrainer::Tensor &bbox1_height =
- context.getTensor(wt_idx[YoloV2LossParams::bbox1_height]);
- nntrainer::Tensor &is_xy_min_max =
- context.getTensor(wt_idx[YoloV2LossParams::is_xy_min_max]);
- nntrainer::Tensor &intersection_width =
- context.getTensor(wt_idx[YoloV2LossParams::intersection_width]);
- nntrainer::Tensor &intersection_height =
- context.getTensor(wt_idx[YoloV2LossParams::intersection_height]);
- nntrainer::Tensor &unions =
- context.getTensor(wt_idx[YoloV2LossParams::unions]);
-
- const unsigned int batch_size = bbox_x_pred.getDim().batch();
-
- std::vector<nntrainer::Tensor> splited_label =
- label.split({1, 1, 1, 1, 1}, 3);
- nntrainer::Tensor bbox_x_label = splited_label[0];
- nntrainer::Tensor bbox_y_label = splited_label[1];
- nntrainer::Tensor bbox_w_label = splited_label[2];
- nntrainer::Tensor bbox_h_label = splited_label[3];
- nntrainer::Tensor class_label = splited_label[4];
-
- bbox_x_label.multiply_i(grid_width_number);
- bbox_y_label.multiply_i(grid_height_number);
-
- for (unsigned int batch = 0; batch < batch_size; ++batch) {
- for (unsigned int object = 0; object < max_object_number; ++object) {
- if (!bbox_w_label.getValue(batch, 0, object, 0) &&
- !bbox_h_label.getValue(batch, 0, object, 0)) {
- break;
- }
- unsigned int grid_x_index = bbox_x_label.getValue(batch, 0, object, 0);
- unsigned int grid_y_index = bbox_y_label.getValue(batch, 0, object, 0);
- unsigned int grid_index = grid_y_index * grid_width_number + grid_x_index;
- unsigned int responsible_anchor =
- find_responsible_anchors(bbox_w_label.getValue(batch, 0, object, 0) /
- bbox_h_label.getValue(batch, 0, object, 0));
-
- bbox_x_gt.setValue(batch, grid_index, responsible_anchor, 0,
- bbox_x_label.getValue(batch, 0, object, 0) -
- grid_x_index);
- bbox_y_gt.setValue(batch, grid_index, responsible_anchor, 0,
- bbox_y_label.getValue(batch, 0, object, 0) -
- grid_y_index);
- bbox_w_gt.setValue(
- batch, grid_index, responsible_anchor, 0,
- nntrainer::sqrtFloat(bbox_w_label.getValue(batch, 0, object, 0)));
- bbox_h_gt.setValue(
- batch, grid_index, responsible_anchor, 0,
- nntrainer::sqrtFloat(bbox_h_label.getValue(batch, 0, object, 0)));
-
- class_gt.setValue(batch, grid_index, responsible_anchor,
- class_label.getValue(batch, 0, object, 0), 1);
- bbox_class_mask.setValue(batch, grid_index, responsible_anchor, 0, 1);
- iou_mask.setValue(batch, grid_index, responsible_anchor, 0, 1);
- }
- }
-
- nntrainer::Tensor iou = calc_iou(
- bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor, bbox_x_gt,
- bbox_y_gt, bbox_w_gt, bbox_h_gt, bbox1_width, bbox1_height, is_xy_min_max,
- intersection_width, intersection_height, unions);
- confidence_gt.copyData(iou);
-}
-
-#ifdef PLUGGABLE
-
-nntrainer::Layer *create_yolo_v2_loss_layer() {
- auto layer = new YoloV2LossLayer();
- return layer;
-}
-
-void destory_yolo_v2_loss_layer(nntrainer::Layer *layer) { delete layer; }
-
-/**
- * @note ml_train_layer_pluggable defines the entry point for nntrainer to
- * register a plugin layer
- */
-extern "C" {
-nntrainer::LayerPluggable ml_train_layer_pluggable{create_yolo_v2_loss_layer,
- destory_yolo_v2_loss_layer};
-}
-
-#endif
-} // namespace custom
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
- *
- * @file yolo_v2_loss.h
- * @date 07 March 2023
- * @brief This file contains the yolo v2 loss layer
- * @see https://github.com/nnstreamer/nntrainer
- * @author Hyeonseok Lee <hs89.lee@samsung.com>
- * @bug No known bugs except for NYI items
- *
- */
-#ifndef __YOLO_V2_LOSS_LAYER_H__
-#define __YOLO_V2_LOSS_LAYER_H__
-
-#include <string>
-
-#include <acti_func.h>
-#include <base_properties.h>
-#include <layer_context.h>
-#include <layer_devel.h>
-#include <node_exporter.h>
-
-namespace custom {
-
-namespace props {
-
-/**
- * @brief maximum object number in 1 image for given dataset
- *
- */
-class MaxObjectNumber final : public nntrainer::PositiveIntegerProperty {
-public:
- MaxObjectNumber(const unsigned &value = 1);
- static constexpr const char *key = "max_object_number";
- using prop_tag = nntrainer::uint_prop_tag;
-};
-
-/**
- * @brief class number for given dataset
- *
- */
-class ClassNumber final : public nntrainer::PositiveIntegerProperty {
-public:
- ClassNumber(const unsigned &value = 1);
- static constexpr const char *key = "class_number";
- using prop_tag = nntrainer::uint_prop_tag;
-};
-
-/**
- * @brief grid height number
- *
- */
-class GridHeightNumber final : public nntrainer::PositiveIntegerProperty {
-public:
- GridHeightNumber(const unsigned &value = 1);
- static constexpr const char *key = "grid_height_number";
- using prop_tag = nntrainer::uint_prop_tag;
-};
-
-/**
- * @brief grid width number
- *
- */
-class GridWidthNumber final : public nntrainer::PositiveIntegerProperty {
-public:
- GridWidthNumber(const unsigned &value = 1);
- static constexpr const char *key = "grid_width_number";
- using prop_tag = nntrainer::uint_prop_tag;
-};
-
-} // namespace props
-
-/**
- * @brief Yolo V2 loss layer
- *
- */
-class YoloV2LossLayer final : public nntrainer::Layer {
-public:
- /**
- * @brief Construct a new YoloV2Loss Layer object
- *
- */
- YoloV2LossLayer();
-
- /**
- * @brief Destroy the YoloV2Loss Layer object
- *
- */
- ~YoloV2LossLayer() {}
-
- /**
- * @copydoc Layer::finalize(InitLayerContext &context)
- */
- void finalize(nntrainer::InitLayerContext &context) override;
-
- /**
- * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
- */
- void forwarding(nntrainer::RunLayerContext &context, bool training) override;
-
- /**
- * @copydoc Layer::calcDerivative(RunLayerContext &context)
- */
- void calcDerivative(nntrainer::RunLayerContext &context) override;
-
- /**
- * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
- */
- void exportTo(nntrainer::Exporter &exporter,
- const ml::train::ExportMethods &method) const override;
-
- /**
- * @copydoc Layer::setProperty(const std::vector<std::string> &values)
- */
- void setProperty(const std::vector<std::string> &values) override;
-
- /**
- * @copydoc Layer::setBatch(RunLayerContext &context, unsigned int batch)
- */
- void setBatch(nntrainer::RunLayerContext &context,
- unsigned int batch) override;
-
- /**
- * @copydoc bool supportBackwarding() const
- */
- bool supportBackwarding() const override { return true; };
-
- /**
- * @copydoc Layer::requireLabel()
- */
- bool requireLabel() const { return true; }
-
- /**
- * @copydoc Layer::getType()
- */
- const std::string getType() const override { return YoloV2LossLayer::type; };
-
- inline static const std::string type = "yolo_v2_loss";
-
-private:
- static constexpr unsigned int NUM_ANCHOR = 5;
- const float anchors_w_buf[NUM_ANCHOR] = {1.3221, 3.19275, 5.05587, 9.47112,
- 11.2364};
- const float anchors_h_buf[NUM_ANCHOR] = {1.73145, 4.00944, 8.09892, 4.84053,
- 10.0071};
- const nntrainer::Tensor anchors_w;
- const nntrainer::Tensor anchors_h;
- nntrainer::Tensor anchors_ratio;
-
- nntrainer::ActiFunc sigmoid; /** sigmoid activation operation */
- nntrainer::ActiFunc softmax; /** softmax activation operation */
-
- std::tuple<props::MaxObjectNumber, props::ClassNumber,
- props::GridHeightNumber, props::GridWidthNumber>
- yolo_v2_loss_props;
- std::array<unsigned int, 22> wt_idx; /**< indices of the weights */
-
- /**
- * @brief find responsible anchors per object
- */
- unsigned int find_responsible_anchors(float bbox_ratio);
-
- /**
- * @brief generate ground truth, mask from labels
- */
- void generate_ground_truth(nntrainer::RunLayerContext &context);
-};
-
-} // namespace custom
-
-#endif /* __YOLO_V2_LOSS_LAYER_H__ */
--- /dev/null
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+#
+# @file dataset.py
+# @date 8 March 2023
+# @brief Define dataset class for yolo
+#
+# @author Seungbaek Hong <sb92.hong@samsung.com>
+
+import glob
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data.dataloader import default_collate
+from PIL import Image
+
+
+##
+# @brief dataset class for yolo
+# @note Need annotation text files corresponding to the name of the images.
+class YOLODataset(Dataset):
+ def __init__(self, img_dir, ann_dir):
+ super().__init__()
+ img_list = glob.glob(img_dir)
+ ann_list = glob.glob(ann_dir)
+ img_list.sort()
+ ann_list.sort()
+
+ self.length = len(img_list)
+ self.input_images = []
+ self.bbox_gt = []
+ self.cls_gt = []
+
+ for i in range(len(img_list)):
+ img = np.array(Image.open(img_list[i]).resize((416, 416))) / 255
+ label_bbox = []
+ label_cls = []
+ with open(ann_list[i], "rt", encoding="utf-8") as f:
+ for line in f.readlines():
+ line = [float(i) for i in line.split()]
+ label_bbox.append(np.array(line[1:], dtype=np.float32) / 416)
+ label_cls.append(int(line[0]))
+
+ self.input_images.append(img)
+ self.bbox_gt.append(label_bbox)
+ self.cls_gt.append(label_cls)
+
+ self.input_images = np.array(self.input_images)
+ self.input_images = torch.FloatTensor(self.input_images).permute((0, 3, 1, 2))
+
+ def __len__(self):
+ return self.length
+
+ def __getitem__(self, idx):
+ return self.input_images[idx], self.bbox_gt[idx], self.cls_gt[idx]
+
+
+##
+# @brief collate db function for yolo
+def collate_db(batch):
+ """
+ @param batch list of batch, (img, bbox, cls)
+ @return collated list of batch, (img, bbox, cls)
+ """
+ items = list(zip(*batch))
+ items[0] = default_collate(items[0])
+ items[1] = list(items[1])
+ items[2] = list(items[2])
+ return items
--- /dev/null
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+#
+# @file main.py
+# @date 8 March 2023
+# @brief Implement training for yolo
+#
+# @author Seungbaek Hong <sb92.hong@samsung.com>
+
+import sys
+import os
+
+from torchconverter import save_bin
+import torch
+from torch import optim
+from torch.utils.data import DataLoader
+
+from yolo import YoloV2
+from yolo_loss import YoloV2_LOSS
+from dataset import YOLODataset, collate_db
+
+
+# get pyutils path using relative path
+def get_util_path():
+ current_path = os.path.abspath(os.path.dirname(__file__))
+ parent_path = os.path.abspath(os.path.dirname(current_path))
+ target_path = os.path.abspath(os.path.dirname(parent_path))
+ return os.path.dirname(target_path) + "/tools/pyutils/"
+
+
+# add pyutils path to sys.path
+sys.path.append(get_util_path())
+
+# set config
+out_size = 13
+num_classes = 4
+num_anchors = 5
+
+epochs = 3
+batch_size = 4
+
+train_img_dir = "/home/user/TRAIN_DIR/images/*"
+train_ann_dir = "/home/user/TRAIN_DIR/annotations/*"
+valid_img_dir = "/home/user/VALID_DIR/images/*"
+valid_ann_dir = "/home/user/VALID_DIR/annotations/*"
+
+# load data
+train_dataset = YOLODataset(train_img_dir, train_ann_dir)
+train_loader = DataLoader(
+ train_dataset,
+ batch_size=batch_size,
+ collate_fn=collate_db,
+ shuffle=True,
+ drop_last=True,
+)
+valid_dataset = YOLODataset(valid_img_dir, valid_ann_dir)
+valid_loader = DataLoader(
+ valid_dataset,
+ batch_size=batch_size,
+ collate_fn=collate_db,
+ shuffle=False,
+ drop_last=True,
+)
+
+# set model, loss and optimizer
+model = YoloV2(num_classes=num_classes)
+criterion = YoloV2_LOSS(num_classes=num_classes)
+optimizer = optim.Adam(model.parameters(), lr=1e-3)
+# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)
+
+# save init model
+save_bin(model, "init_model")
+torch.save(model.state_dict(), "./init_model.pt")
+
+# train model
+best_loss = 1e10
+for epoch in range(epochs):
+ epoch_train_loss = 0
+ epoch_valid_loss = 0
+ for idx, (img, bbox, cls) in enumerate(train_loader):
+ model.train()
+ optimizer.zero_grad()
+ # model prediction
+ hypothesis = model(img).permute((0, 2, 3, 1))
+ hypothesis = hypothesis.reshape(
+ (batch_size, out_size**2, num_anchors, 5 + num_classes)
+ )
+ # split each prediction(bbox, iou, class prob)
+ bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
+ bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
+ bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
+ iou_pred = torch.sigmoid(hypothesis[..., 4:5])
+ score_pred = hypothesis[..., 5:].contiguous()
+ prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(
+ score_pred.shape
+ )
+ # calc loss
+ loss = criterion(
+ torch.FloatTensor(bbox_pred),
+ torch.FloatTensor(iou_pred),
+ torch.FloatTensor(prob_pred),
+ bbox,
+ cls,
+ )
+ # back prop
+ loss.backward()
+ optimizer.step()
+ # scheduler.step()
+ epoch_train_loss += loss.item()
+
+ for idx, (img, bbox, cls) in enumerate(valid_loader):
+ model.eval()
+ with torch.no_grad():
+ # model prediction
+ hypothesis = model(img).permute((0, 2, 3, 1))
+ hypothesis = hypothesis.reshape(
+ (hypothesis.shape[0], out_size**2, num_anchors, 5 + num_classes)
+ )
+ # split each prediction(bbox, iou, class prob)
+ bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
+ bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
+ bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
+ iou_pred = torch.sigmoid(hypothesis[..., 4:5])
+ score_pred = hypothesis[..., 5:].contiguous()
+ prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(
+ score_pred.shape
+ )
+ # calc loss
+ loss = criterion(
+ torch.FloatTensor(bbox_pred),
+ torch.FloatTensor(iou_pred),
+ torch.FloatTensor(prob_pred),
+ bbox,
+ cls,
+ )
+ epoch_valid_loss += loss.item()
+
+ if epoch_valid_loss < best_loss:
+ best_loss = epoch_valid_loss
+ torch.save(model.state_dict(), "./best_model.pt")
+ save_bin(model, "best_model")
+
+ print(
+ f"{epoch}epoch, train loss: {epoch_train_loss / len(train_loader):.4f},\
+ valid loss: {epoch_valid_loss / len(valid_loader):.4f}"
+ )
+
+##
+# @brief bbox post process function for inference
+
+
+def post_process_for_bbox(bbox_p):
+ """
+ @param bbox_p shape(batch_size, cell_h x cell_w, num_anchors, 4)
+ @return bbox_p shape(batch_size, cell_h x cell_w, num_anchors, 4)
+ """
+ anchors = torch.FloatTensor(
+ [
+ (1.3221, 1.73145),
+ (3.19275, 4.00944),
+ (5.05587, 8.09892),
+ (9.47112, 4.84053),
+ (11.2364, 10.0071),
+ ]
+ )
+
+ outsize = (13, 13)
+ width, height = outsize
+
+ # restore cell pos to x, y
+ for w in range(width):
+ for h in range(height):
+ bbox_p[:, height * h + w, :, 0] += w
+ bbox_p[:, height * h + w, :, 1] += h
+ bbox_p[:, :, :, :2] /= 13
+
+ # apply anchors to w, h
+ anchor_w = anchors[:, 0].contiguous().view(-1, 1)
+ anchor_h = anchors[:, 1].contiguous().view(-1, 1)
+ bbox_p[:, :, :, 2:3] *= anchor_w
+ bbox_p[:, :, :, 3:4] *= anchor_h
+
+ return bbox_p
+
+
+# inference example using trained model
+hypothesis = model(img).permute((0, 2, 3, 1))
+hypothesis = hypothesis[0].reshape((1, out_size**2, num_anchors, 5 + num_classes))
+
+# transform output
+bbox_pred_xy = torch.sigmoid(hypothesis[..., :2])
+bbox_pred_wh = torch.exp(hypothesis[..., 2:4])
+bbox_pred = torch.cat((bbox_pred_xy, bbox_pred_wh), 3)
+bbox_pred = post_process_for_bbox(bbox_pred)
+iou_pred = torch.sigmoid(hypothesis[..., 4:5])
+score_pred = hypothesis[..., 5:].contiguous()
+prob_pred = torch.softmax(score_pred.view(-1, num_classes), dim=1).view(
+ score_pred.shape
+)
+
+# result of inference (data range 0~1)
+iou_mask = iou_pred > 0.5
+print(bbox_pred * iou_mask, iou_pred * iou_mask, prob_pred * iou_mask)
--- /dev/null
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+#
+# @file yolo.py
+# @date 8 March 2023
+# @brief Define simple yolo model, but not original darknet.
+#
+# @author Seungbaek Hong <sb92.hong@samsung.com>
+
+import torch
+from torch import nn
+
+
+##
+# @brief define yolo model (except for re-organization module)
+class YoloV2(nn.Module):
+ def __init__(self, num_classes, num_anchors=5):
+
+ super().__init__()
+ self.num_classes = num_classes
+ self.num_anchors = num_anchors
+ self.conv1 = nn.Sequential(
+ nn.Conv2d(3, 32, 3, 1, 1),
+ nn.BatchNorm2d(32, eps=1e-3),
+ nn.LeakyReLU(),
+ nn.MaxPool2d(2, 2),
+ )
+ self.conv2 = nn.Sequential(
+ nn.Conv2d(32, 64, 3, 1, 1),
+ nn.BatchNorm2d(64, eps=1e-3),
+ nn.LeakyReLU(),
+ nn.MaxPool2d(2, 2),
+ )
+ self.conv3 = nn.Sequential(
+ nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv4 = nn.Sequential(
+ nn.Conv2d(128, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv5 = nn.Sequential(
+ nn.Conv2d(64, 128, 3, 1, 1),
+ nn.BatchNorm2d(128, eps=1e-3),
+ nn.LeakyReLU(),
+ nn.MaxPool2d(2, 2),
+ )
+ self.conv6 = nn.Sequential(
+ nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv7 = nn.Sequential(
+ nn.Conv2d(256, 128, 1, 1, 0), nn.BatchNorm2d(128, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv8 = nn.Sequential(
+ nn.Conv2d(128, 256, 3, 1, 1),
+ nn.BatchNorm2d(256, eps=1e-3),
+ nn.LeakyReLU(),
+ nn.MaxPool2d(2, 2),
+ )
+ self.conv9 = nn.Sequential(
+ nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv10 = nn.Sequential(
+ nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv11 = nn.Sequential(
+ nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv12 = nn.Sequential(
+ nn.Conv2d(512, 256, 1, 1, 0), nn.BatchNorm2d(256, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv13 = nn.Sequential(
+ nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+ )
+
+ self.conv_b = nn.Sequential(
+ nn.Conv2d(512, 64, 1, 1, 0), nn.BatchNorm2d(64, eps=1e-3), nn.LeakyReLU()
+ )
+
+ self.maxpool_a = nn.MaxPool2d(2, 2)
+ self.conv_a1 = nn.Sequential(
+ nn.Conv2d(512, 1024, 3, 1, 1),
+ nn.BatchNorm2d(1024, eps=1e-3),
+ nn.LeakyReLU(),
+ )
+ self.conv_a2 = nn.Sequential(
+ nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv_a3 = nn.Sequential(
+ nn.Conv2d(512, 1024, 3, 1, 1),
+ nn.BatchNorm2d(1024, eps=1e-3),
+ nn.LeakyReLU(),
+ )
+ self.conv_a4 = nn.Sequential(
+ nn.Conv2d(1024, 512, 1, 1, 0), nn.BatchNorm2d(512, eps=1e-3), nn.LeakyReLU()
+ )
+ self.conv_a5 = nn.Sequential(
+ nn.Conv2d(512, 1024, 3, 1, 1),
+ nn.BatchNorm2d(1024, eps=1e-3),
+ nn.LeakyReLU(),
+ )
+ self.conv_a6 = nn.Sequential(
+ nn.Conv2d(1024, 1024, 3, 1, 1),
+ nn.BatchNorm2d(1024, eps=1e-3),
+ nn.LeakyReLU(),
+ )
+ self.conv_a7 = nn.Sequential(
+ nn.Conv2d(1024, 1024, 3, 1, 1),
+ nn.BatchNorm2d(1024, eps=1e-3),
+ nn.LeakyReLU(),
+ )
+
+ self.conv_out1 = nn.Sequential(
+ nn.Conv2d(1280, 1024, 3, 1, 1),
+ nn.BatchNorm2d(1024, eps=1e-3),
+ nn.LeakyReLU(),
+ )
+
+ self.conv_out2 = nn.Conv2d(1024, self.num_anchors * (5 + num_classes), 1, 1, 0)
+
+ def forward(self, x):
+ output = self.conv1(x)
+ output = self.conv2(output)
+ output = self.conv3(output)
+ output = self.conv4(output)
+ output = self.conv5(output)
+ output = self.conv6(output)
+ output = self.conv7(output)
+ output = self.conv8(output)
+ output = self.conv9(output)
+ output = self.conv10(output)
+ output = self.conv11(output)
+ output = self.conv12(output)
+ output = self.conv13(output)
+
+ output_a = self.maxpool_a(output)
+ output_a = self.conv_a1(output_a)
+ output_a = self.conv_a2(output_a)
+ output_a = self.conv_a3(output_a)
+ output_a = self.conv_a4(output_a)
+ output_a = self.conv_a5(output_a)
+ output_a = self.conv_a6(output_a)
+ output_a = self.conv_a7(output_a)
+
+ output_b = self.conv_b(output)
+ b, c, h, w = output_b.size()
+ output_b = output_b.view(b, int(c / 4), h, 2, w, 2).contiguous()
+ output_b = output_b.permute(0, 3, 5, 1, 2, 4).contiguous()
+ output_b = output_b.view(b, -1, int(h / 2), int(w / 2))
+
+ output = torch.cat((output_a, output_b), 1)
+ output = self.conv_out1(output)
+ output = self.conv_out2(output)
+ return output
--- /dev/null
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+#
+# @file yolo_loss.py
+# @date 8 March 2023
+# @brief Define loss class for yolo
+#
+# @author Seungbaek Hong <sb92.hong@samsung.com>
+
+import torch
+from torch import nn
+import numpy as np
+
+
+##
+# @brief calculate iou between two boxes list
+def calculate_iou(bbox1, bbox2):
+ """
+ @param bbox1 shape(numb_of_bbox, 4), it contains x, y, w, h
+ @param bbox2 shape(numb_of_bbox, 4), it contains x, y, w, h
+ @return result shape(numb_of_bbox, 1)
+ """
+ # bbox coordinates
+ b1x1, b1y1 = (bbox1[:, :2]).split(1, 1)
+ b1x2, b1y2 = (bbox1[:, :2] + (bbox1[:, 2:4])).split(1, 1)
+ b2x1, b2y1 = (bbox2[:, :2]).split(1, 1)
+ b2x2, b2y2 = (bbox2[:, :2] + (bbox2[:, 2:4])).split(1, 1)
+
+ # box areas
+ areas1 = (b1x2 - b1x1) * (b1y2 - b1y1)
+ areas2 = (b2x2 - b2x1) * (b2y2 - b2y1)
+
+ # intersections
+ min_x_of_max_x, max_x_of_min_x = torch.min(b1x2, b2x2), torch.max(b1x1, b2x1)
+ min_y_of_max_y, max_y_of_min_y = torch.min(b1y2, b2y2), torch.max(b1y1, b2y1)
+ intersection_width = (min_x_of_max_x - max_x_of_min_x).clamp(min=0)
+ intersection_height = (min_y_of_max_y - max_y_of_min_y).clamp(min=0)
+ intersections = intersection_width * intersection_height
+
+ # unions
+ unions = (areas1 + areas2) - intersections
+
+ result = intersections / unions
+ return result
+
+
+##
+# @brief find best iou and its index
+def find_best_ratio(anchors, bbox):
+ """
+ @param anchors shape(numb_of_anchors, 2), it contains w, h
+ @param bbox shape(numb_of_bbox, 2), it contains w, h
+ @return best_match index of best match, shape(numb_of_bbox, 1)
+ """
+ b1 = np.divide(anchors[:, 0], anchors[:, 1])
+ b2 = np.divide(bbox[:, 0], bbox[:, 1])
+ similarities = np.abs(b1.reshape(-1, 1) - b2)
+ best_match = np.argmin(similarities, axis=0)
+ return best_match
+
+
+##
+# @brief loss class for yolo
+class YoloV2_LOSS(nn.Module):
+ """Yolo v2 loss"""
+
+ def __init__(self, num_classes, img_shape=(416, 416), outsize=(13, 13)):
+ super().__init__()
+ self.num_classes = num_classes
+ self.img_shape = img_shape
+ self.outsize = outsize
+ self.hook = {}
+
+ self.anchors = torch.FloatTensor(
+ [
+ (1.3221, 1.73145),
+ (3.19275, 4.00944),
+ (5.05587, 8.09892),
+ (9.47112, 4.84053),
+ (11.2364, 10.0071),
+ ]
+ )
+
+ self.mse = nn.MSELoss()
+ self.bbox_loss, self.iou_loss, self.cls_loss = None, None, None
+
+ ##
+ # @brief function to track gradients of non-leaf varibles.
+ def hook_variable(self, name, var):
+ """Do not use this function when training. It is for debugging."""
+ self.hook[name] = var
+ self.hook[name].requires_grad_().retain_grad()
+
+ ##
+ # @brief function to print gradients of non-leaf varibles.
+ def print_hook_variables(self):
+ """Do not use this function when training. It is for debugging."""
+ for k, var in self.hook.items():
+ print(f"gradients of variable {k}:")
+ batch, channel, height, width = var.grad.shape
+ for b in range(batch):
+ for c in range(channel):
+ for h in range(height):
+ for w in range(width):
+ if torch.abs(var.grad[b, c, h, w]).item() >= 1e-3:
+ print(
+ f"(b: {b}, c: {c}, h: {h}, w: {w}) =\
+ {var.grad[b, c, h, w]}"
+ )
+ print("=" * 20)
+
+ def forward(self, bbox_pred, iou_pred, prob_pred, bbox_gt, cls_gt):
+ """
+ @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
+ @param iou_pred shape(batch_size, cell_h x cell_w, 1)
+ @param prob_pred shape(batch_size, cell_h x cell_w, num_anchors, num_classes)
+ @param bbox_gt shape(batch_size, num_bbox, 4), data range(0~1)
+ @param cls_gt shape(batch_size, num_bbox, 1)
+ @return loss shape(1,)
+ """
+ self.hook_variable("bbox_pred", bbox_pred)
+ bbox_pred = self.apply_anchors_to_bbox(bbox_pred)
+
+ bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask = (
+ self._build_target(bbox_pred, bbox_gt, cls_gt)
+ )
+
+ self.bbox_loss = self.mse(bbox_pred * bbox_mask, bbox_built * bbox_mask)
+ self.iou_loss = self.mse(iou_pred * iou_mask, iou_built * iou_mask)
+ self.cls_loss = self.mse(prob_pred * cls_mask, cls_built * cls_mask)
+
+ return self.bbox_loss * 5 + self.iou_loss + self.cls_loss
+
+ def apply_anchors_to_bbox(self, bbox_pred):
+ """
+ @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
+ @return bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
+ """
+ anchor_w = self.anchors[:, 0].contiguous().view(-1, 1)
+ anchor_h = self.anchors[:, 1].contiguous().view(-1, 1)
+ bbox_pred_tmp = bbox_pred.clone()
+ bbox_pred_tmp[:, :, :, 2:3] = torch.sqrt(bbox_pred[:, :, :, 2:3] * anchor_w)
+ bbox_pred_tmp[:, :, :, 3:4] = torch.sqrt(bbox_pred[:, :, :, 3:4] * anchor_h)
+ return bbox_pred_tmp
+
+ def _build_target(self, bbox_pred, bbox_gt, cls_gt):
+ """
+ @param bbox_pred shape(batch_size, cell_h x cell_w, num_anchors, 4)
+ @param bbox_gt shape(batch_size, num_bbox, 4)
+ @param cls_gt shape(batch_size, num_bbox, 1)
+ @return tuple of (bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask)
+ """
+ bbox_built, bbox_mask = [], []
+ iou_built, iou_mask = [], []
+ cls_built, cls_mask = [], []
+
+ batch_size = bbox_pred.shape[0]
+
+ for i in range(batch_size):
+ _bbox_built, _iou_built, _cls_built, _bbox_mask, _iou_mask, _cls_mask = (
+ self._make_target_per_sample(
+ torch.FloatTensor(bbox_pred[i]),
+ torch.FloatTensor(np.array(bbox_gt[i])),
+ torch.LongTensor(cls_gt[i]),
+ )
+ )
+
+ bbox_built.append(_bbox_built)
+ bbox_mask.append(_bbox_mask)
+ iou_built.append(_iou_built)
+ iou_mask.append(_iou_mask)
+ cls_built.append(_cls_built)
+ cls_mask.append(_cls_mask)
+
+ bbox_built = torch.stack(bbox_built)
+ bbox_mask = torch.stack(bbox_mask)
+ iou_built = torch.stack(iou_built)
+ iou_mask = torch.stack(iou_mask)
+ cls_built = torch.stack(cls_built)
+ cls_mask = torch.stack(cls_mask)
+
+ return bbox_built, iou_built, cls_built, bbox_mask, iou_mask, cls_mask
+
+ def _make_target_per_sample(self, _bbox_pred, _bbox_gt, _cls_gt):
+ """
+ @param _bbox_pred shape(cell_h x cell_w, num_anchors, 4)
+ @param _bbox_gt shape(num_bbox, 4)
+ @param _cls_gt shape(num_bbox,)
+ @return tuple of (_bbox_built, _iou_built, _cls_built, _bbox_mask, _iou_mask, _cls_mask)
+ """
+ hw, num_anchors, _ = _bbox_pred.shape
+
+ # set result template
+ _bbox_built = torch.zeros((hw, num_anchors, 4))
+ _bbox_mask = torch.zeros((hw, num_anchors, 1))
+
+ _iou_built = torch.zeros((hw, num_anchors, 1))
+ _iou_mask = torch.ones((hw, num_anchors, 1)) * 0.5
+
+ _cls_built = torch.zeros((hw, num_anchors, self.num_classes))
+ _cls_mask = torch.zeros((hw, num_anchors, 1))
+
+ # find best anchors
+ _bbox_gt_wh = _bbox_gt.clone()[:, 2:]
+ best_anchors = find_best_ratio(self.anchors, _bbox_gt_wh)
+
+ # normalize x, y pos based on cell coornindates
+ cx = _bbox_gt[:, 0] * self.outsize[0]
+ cy = _bbox_gt[:, 1] * self.outsize[1]
+ # calculate cell pos and normalize x, y
+ cell_idx = np.floor(cy) * self.outsize[0] + np.floor(cx)
+ cell_idx = np.array(cell_idx, dtype=np.int16)
+ cx -= np.floor(cx)
+ cy -= np.floor(cy)
+
+ # set bbox of gt
+ _bbox_built[cell_idx, best_anchors, 0] = cx
+ _bbox_built[cell_idx, best_anchors, 1] = cy
+ _bbox_built[cell_idx, best_anchors, 2] = torch.sqrt(_bbox_gt[:, 2])
+ _bbox_built[cell_idx, best_anchors, 3] = torch.sqrt(_bbox_gt[:, 3])
+ _bbox_mask[cell_idx, best_anchors, :] = 1
+
+ # set cls of gt
+ _cls_built[cell_idx, best_anchors, _cls_gt] = 1
+ _cls_mask[cell_idx, best_anchors, :] = 1
+
+ # set confidence score of gt
+ _iou_built = calculate_iou(
+ _bbox_pred.reshape(-1, 4), _bbox_built.view(-1, 4)
+ ).detach()
+ _iou_built = _iou_built.view(hw, num_anchors, 1)
+ _iou_mask[cell_idx, best_anchors, :] = 1
+
+ return _bbox_built, _iou_built, _cls_built, _bbox_mask, _iou_mask, _cls_mask
--- /dev/null
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+# ndk path
+ifndef ANDROID_NDK
+$(error ANDROID_NDK is not defined!)
+endif
+
+ifndef NNTRAINER_ROOT
+NNTRAINER_ROOT := $(LOCAL_PATH)/../../..
+endif
+
+ML_API_COMMON_INCLUDES := ${NNTRAINER_ROOT}/ml_api_common/include
+NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
+ $(NNTRAINER_ROOT)/nntrainer/dataset \
+ $(NNTRAINER_ROOT)/nntrainer/models \
+ $(NNTRAINER_ROOT)/nntrainer/layers \
+ $(NNTRAINER_ROOT)/nntrainer/compiler \
+ $(NNTRAINER_ROOT)/nntrainer/graph \
+ $(NNTRAINER_ROOT)/nntrainer/optimizers \
+ $(NNTRAINER_ROOT)/nntrainer/tensor \
+ $(NNTRAINER_ROOT)/nntrainer/utils \
+ $(NNTRAINER_ROOT)/api \
+ $(NNTRAINER_ROOT)/api/ccapi/include \
+ ${ML_API_COMMON_INCLUDES}
+
+LOCAL_MODULE := nntrainer
+LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/libs/$(TARGET_ARCH_ABI)/libnntrainer.so
+
+include $(PREBUILT_SHARED_LIBRARY)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := ccapi-nntrainer
+LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/libs/$(TARGET_ARCH_ABI)/libccapi-nntrainer.so
+
+include $(PREBUILT_SHARED_LIBRARY)
+
+include $(CLEAR_VARS)
+
+LOCAL_ARM_NEON := true
+LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
+LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/
+LOCAL_CXXFLAGS += -std=c++17 -frtti
+LOCAL_CFLAGS += -pthread -fexceptions -fopenmp
+LOCAL_LDFLAGS += -fexceptions
+LOCAL_MODULE_TAGS := optional
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE := nntrainer_yolo
+LOCAL_LDLIBS := -llog -landroid -fopenmp
+
+LOCAL_SRC_FILES := main.cpp det_dataloader.cpp yolo_v2_loss.cpp reorg_layer.cpp
+LOCAL_SHARED_LIBRARIES := nntrainer ccapi-nntrainer
+
+LOCAL_C_INCLUDES += $(NNTRAINER_INCLUDES)
+
+include $(BUILD_EXECUTABLE)
--- /dev/null
+APP_ABI := arm64-v8a
+APP_STL := c++_shared
+APP_PLATFORM := android-29
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file det_dataloader.h
+ * @date 22 March 2023
+ * @brief dataloader for object detection dataset
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include "det_dataloader.h"
+
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <nntrainer_error.h>
+#include <random>
+
+namespace nntrainer::util {
+
+// It supports bmp image file only now.
+DirDataLoader::DirDataLoader(const char *directory_, unsigned int max_num_label,
+ unsigned int c, unsigned int w, unsigned int h,
+ bool is_train_) :
+ max_num_label(max_num_label),
+ channel(c),
+ height(h),
+ width(w),
+ is_train(is_train_) {
+ dir_path.assign(directory_);
+
+ // set data list
+ std::filesystem::directory_iterator itr(dir_path + "images");
+ while (itr != std::filesystem::end(itr)) {
+ // get image file name
+ std::string img_file = itr->path().string();
+
+ // check if it is bmp image file
+ if (img_file.find(".bmp") == std::string::npos) {
+ itr++;
+ continue;
+ }
+
+ // set label file name
+ std::string label_file = img_file;
+ label_file.replace(label_file.find(".bmp"), 4, ".txt");
+ label_file.replace(label_file.find("/images"), 7, "/annotations");
+
+ // check if there is paired label file
+ if (!std::filesystem::exists(label_file)) {
+ itr++;
+ continue;
+ }
+
+ // set data list
+ data_list.push_back(make_pair(img_file, label_file));
+ itr++;
+ }
+
+ // set index and shuffle data
+ idxes = std::vector<unsigned int>(data_list.size());
+ std::iota(idxes.begin(), idxes.end(), 0);
+ if (is_train)
+ std::shuffle(idxes.begin(), idxes.end(), rng);
+
+ data_size = data_list.size();
+ count = 0;
+}
+
+void read_image(const std::string path, float *input, uint &width,
+ uint &height) {
+ FILE *f = fopen(path.c_str(), "rb");
+
+ if (f == nullptr)
+ throw std::invalid_argument("Cannot open file: " + path);
+
+ unsigned char info[54];
+ size_t s = fread(info, sizeof(unsigned char), 54, f);
+
+ unsigned int w = *(int *)&info[18];
+ unsigned int h = *(int *)&info[22];
+
+ if (w != width or h != height) {
+ fclose(f);
+ throw std::invalid_argument("the dimension of image file does not match" +
+ std::to_string(s));
+ }
+
+ int row_padded = (width * 3 + 3) & (~3);
+ unsigned char *data = new unsigned char[row_padded];
+
+ for (uint i = 0; i < height; i++) {
+ s = fread(data, sizeof(unsigned char), row_padded, f);
+ for (uint j = 0; j < width; j++) {
+ input[height * (height - i - 1) + j] = (float)data[j * 3 + 2] / 255;
+ input[(height * width) + height * (height - i - 1) + j] =
+ (float)data[j * 3 + 1] / 255;
+ input[(height * width) * 2 + height * (height - i - 1) + j] =
+ (float)data[j * 3] / 255;
+ }
+ }
+
+ delete[] data;
+ fclose(f);
+}
+
+void DirDataLoader::next(float **input, float **label, bool *last) {
+ auto fill_one_sample = [this](float *input_, float *label_, int index) {
+ // set input data
+ std::string img_file = data_list[index].first;
+ read_image(img_file, input_, width, height);
+
+ // set label data
+ std::string label_file = data_list[index].second;
+ std::memset(label_, 0.0, 5 * sizeof(float) * max_num_label);
+
+ std::ifstream file(label_file);
+ std::string cur_line;
+
+ int line_idx = 0;
+ while (getline(file, cur_line)) {
+ std::stringstream ss(cur_line);
+ std::string cur_value;
+
+ int row_idx = 0;
+ while (getline(ss, cur_value, ' ')) {
+ if (row_idx == 0) {
+ label_[line_idx * 5 + 4] = std::stof(cur_value);
+ } else {
+ label_[line_idx * 5 + row_idx - 1] = std::stof(cur_value) / 416;
+ }
+ row_idx++;
+ }
+
+ line_idx++;
+ }
+
+ file.close();
+ };
+
+ fill_one_sample(*input, *label, idxes[count]);
+
+ count++;
+
+ if (count < data_size) {
+ *last = false;
+ } else {
+ *last = true;
+ count = 0;
+ std::shuffle(idxes.begin(), idxes.end(), rng);
+ }
+}
+
+} // namespace nntrainer::util
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file det_dataloader.h
+ * @date 22 March 2023
+ * @brief dataloader for object detection dataset
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <random>
+#include <string>
+#include <tensor_dim.h>
+#include <vector>
+
+namespace nntrainer::util {
+
+using TensorDim = ml::train::TensorDim;
+
+/**
+ * @brief user data object
+ */
+class DirDataLoader {
+public:
+ /**
+ * @brief Construct a new Dir Data Loader object
+ */
+ DirDataLoader(const char *directory_, unsigned int max_num_label,
+ unsigned int c, unsigned int w, unsigned int h, bool is_train_);
+ /**
+ * @brief Destroy the Dir Data Loader object
+ */
+ ~DirDataLoader(){};
+
+ /**
+ * @copydoc void DataLoader::next(float **input, float**label, bool *last)
+ */
+ void next(float **input, float **label, bool *last);
+
+ /**
+ * @brief getter for current file name
+ * @return current file name
+ */
+ std::string getCurFileName() { return cur_file_name; };
+
+ /**
+ * @brief setter for current file name
+ */
+ void setCurFileName(std::string s) { cur_file_name = s; };
+
+private:
+ std::string dir_path;
+ unsigned int data_size;
+ unsigned int max_num_label;
+ unsigned int channel;
+ unsigned int height;
+ unsigned int width;
+ bool is_train;
+
+ std::vector<std::pair<std::string, std::string>> data_list;
+ std::vector<unsigned int> idxes;
+ unsigned int count;
+ std::string cur_file_name;
+
+ // random number generator
+ std::mt19937 rng;
+};
+
+} // namespace nntrainer::util
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file main.cpp
+ * @date 03 March 2023
+ * @todo replace backbone to original darknet of yolo v2
+ * @brief application example for YOLO v2
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <array>
+#include <chrono>
+#include <ctime>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include <app_context.h>
+#include <det_dataloader.h>
+#include <layer.h>
+#include <model.h>
+#include <optimizer.h>
+
+#include "yolo_v2_loss.h"
+
+#include <reorg_layer.h>
+
+using LayerHandle = std::shared_ptr<ml::train::Layer>;
+using ModelHandle = std::unique_ptr<ml::train::Model>;
+using UserDataType = std::unique_ptr<nntrainer::util::DirDataLoader>;
+
+const unsigned int ANCHOR_NUMBER = 5;
+
+const unsigned int MAX_OBJECT_NUMBER = 4;
+const unsigned int CLASS_NUMBER = 4;
+const unsigned int GRID_HEIGHT_NUMBER = 13;
+const unsigned int GRID_WIDTH_NUMBER = 13;
+const unsigned int IMAGE_HEIGHT_SIZE = 416;
+const unsigned int IMAGE_WIDTH_SIZE = 416;
+const unsigned int BATCH_SIZE = 4;
+const unsigned int EPOCHS = 3;
+const char *TRAIN_DIR_PATH = "/TRAIN_DIR/";
+const char *VALIDATION_DIR_PATH = "/VALID_DIR/";
+// const std::string MODEL_INIT_BIN_PATH = "/home/user/MODEL_INIT_BIN_PATH.bin";
+
+int trainData_cb(float **input, float **label, bool *last, void *user_data) {
+ auto data = reinterpret_cast<nntrainer::util::DirDataLoader *>(user_data);
+
+ data->next(input, label, last);
+ return 0;
+}
+
+int validData_cb(float **input, float **label, bool *last, void *user_data) {
+ auto data = reinterpret_cast<nntrainer::util::DirDataLoader *>(user_data);
+
+ data->next(input, label, last);
+ return 0;
+}
+
+std::array<UserDataType, 2> createDetDataGenerator(const char *train_dir,
+ const char *valid_dir,
+ int max_num_label, int c,
+ int h, int w) {
+ UserDataType train_data(new nntrainer::util::DirDataLoader(
+ train_dir, max_num_label, c, h, w, true));
+ UserDataType valid_data(new nntrainer::util::DirDataLoader(
+ valid_dir, max_num_label, c, h, w, false));
+
+ return {std::move(train_data), std::move(valid_data)};
+}
+
+/**
+ * @brief make "key=value" from key and value
+ *
+ * @tparam T type of a value
+ * @param key key
+ * @param value value
+ * @return std::string with "key=value"
+ */
+template <typename T>
+static std::string withKey(const std::string &key, const T &value) {
+ std::stringstream ss;
+ ss << key << "=" << value;
+ return ss.str();
+}
+
+template <typename T>
+static std::string withKey(const std::string &key,
+ std::initializer_list<T> value) {
+ if (std::empty(value)) {
+ throw std::invalid_argument("empty data cannot be converted");
+ }
+
+ std::stringstream ss;
+ ss << key << "=";
+
+ auto iter = value.begin();
+ for (; iter != value.end() - 1; ++iter) {
+ ss << *iter << ',';
+ }
+ ss << *iter;
+
+ return ss.str();
+}
+
+/**
+ * @brief yolo block
+ *
+ * @param block_name name of the block
+ * @param input_name name of the input
+ * @param filters number of filters
+ * @param kernel_size number of kernel_size
+ * @param downsample downsample to make output size 0
+ * @return std::vector<LayerHandle> vectors of layers
+ */
+std::vector<LayerHandle> yoloBlock(const std::string &block_name,
+ const std::string &input_name, int filters,
+ int kernel_size, bool downsample) {
+ using ml::train::createLayer;
+
+ auto scoped_name = [&block_name](const std::string &layer_name) {
+ return block_name + "/" + layer_name;
+ };
+ auto with_name = [&scoped_name](const std::string &layer_name) {
+ return withKey("name", scoped_name(layer_name));
+ };
+
+ auto createConv = [&with_name, filters](const std::string &name,
+ int kernel_size, int stride,
+ const std::string &padding,
+ const std::string &input_layer) {
+ std::vector<std::string> props{
+ with_name(name),
+ withKey("stride", {stride, stride}),
+ withKey("filters", filters),
+ withKey("kernel_size", {kernel_size, kernel_size}),
+ withKey("padding", padding),
+ withKey("input_layers", input_layer)};
+
+ return createLayer("conv2d", props);
+ };
+
+ /** construct basic layer **/
+ LayerHandle a1 = createConv("a1", kernel_size, 1, "same", input_name);
+
+ if (downsample) {
+ LayerHandle a2 = createLayer("batch_normalization",
+ {with_name("a2"), withKey("momentum", "0.9"),
+ withKey("activation", "leaky_relu")});
+
+ LayerHandle a3 = createLayer(
+ "pooling2d", {withKey("name", block_name), withKey("stride", {2, 2}),
+ withKey("pooling", "max"), withKey("pool_size", {2, 2})});
+
+ return {a1, a2, a3};
+ } else {
+ LayerHandle a2 =
+ createLayer("batch_normalization",
+ {withKey("name", block_name), withKey("momentum", "0.9"),
+ withKey("activation", "leaky_relu")});
+
+ return {a1, a2};
+ }
+}
+
+/**
+ * @brief Create yolo v2 light
+ *
+ * @return vector of layers that contain full graph of yolo v2 light
+ */
+ModelHandle YOLO() {
+ using ml::train::createLayer;
+
+ ModelHandle model = ml::train::createModel(ml::train::ModelType::NEURAL_NET);
+
+ std::vector<LayerHandle> layers;
+
+ layers.push_back(createLayer(
+ "input",
+ {withKey("name", "input0"),
+ withKey("input_shape", "3:" + std::to_string(IMAGE_HEIGHT_SIZE) + ":" +
+ std::to_string(IMAGE_WIDTH_SIZE))}));
+
+ std::vector<std::vector<LayerHandle>> blocks;
+
+ blocks.push_back(yoloBlock("conv1", "input0", 32, 3, true));
+ blocks.push_back(yoloBlock("conv2", "conv1", 64, 3, true));
+ blocks.push_back(yoloBlock("conv3", "conv2", 128, 3, false));
+ blocks.push_back(yoloBlock("conv4", "conv3", 64, 1, false));
+ blocks.push_back(yoloBlock("conv5", "conv4", 128, 3, true));
+ blocks.push_back(yoloBlock("conv6", "conv5", 256, 3, false));
+ blocks.push_back(yoloBlock("conv7", "conv6", 128, 1, false));
+ blocks.push_back(yoloBlock("conv8", "conv7", 256, 3, true));
+ blocks.push_back(yoloBlock("conv9", "conv8", 512, 3, false));
+ blocks.push_back(yoloBlock("conv10", "conv9", 256, 1, false));
+ blocks.push_back(yoloBlock("conv11", "conv10", 512, 3, false));
+ blocks.push_back(yoloBlock("conv12", "conv11", 256, 1, false));
+ blocks.push_back(yoloBlock("conv13", "conv12", 512, 3, false));
+
+ blocks.push_back({createLayer(
+ "pooling2d", {withKey("name", "conv_a_pool"), withKey("stride", {2, 2}),
+ withKey("pooling", "max"), withKey("pool_size", {2, 2}),
+ withKey("input_layers", "conv13")})});
+ blocks.push_back(yoloBlock("conv_a1", "conv_a_pool", 1024, 3, false));
+ blocks.push_back(yoloBlock("conv_a2", "conv_a1", 512, 1, false));
+ blocks.push_back(yoloBlock("conv_a3", "conv_a2", 1024, 3, false));
+ blocks.push_back(yoloBlock("conv_a4", "conv_a3", 512, 1, false));
+ blocks.push_back(yoloBlock("conv_a5", "conv_a4", 1024, 3, false));
+ blocks.push_back(yoloBlock("conv_a6", "conv_a5", 1024, 3, false));
+ blocks.push_back(yoloBlock("conv_a7", "conv_a6", 1024, 3, false));
+
+ blocks.push_back(yoloBlock("conv_b", "conv13", 64, 1, false));
+
+ blocks.push_back(
+ {createLayer("reorg_layer", {withKey("name", "re_organization"),
+ withKey("input_layers", "conv_b")})});
+
+ blocks.push_back(
+ {createLayer("concat", {withKey("name", "concat"),
+ withKey("input_layers", "conv_a7, re_organization"),
+ withKey("axis", 1)})});
+
+ blocks.push_back(yoloBlock("conv_out1", "concat", 1024, 3, false));
+
+ blocks.push_back(
+ {createLayer("conv2d", {
+ withKey("name", "conv_out2"),
+ withKey("filters", 5 * (5 + CLASS_NUMBER)),
+ withKey("kernel_size", {1, 1}),
+ withKey("stride", {1, 1}),
+ withKey("padding", "same"),
+ withKey("input_layers", "conv_out1"),
+ })});
+
+ for (auto &block : blocks) {
+ layers.insert(layers.end(), block.begin(), block.end());
+ }
+
+ layers.push_back(createLayer("permute", {
+ withKey("name", "permute"),
+ withKey("direction", {2, 3, 1}),
+ }));
+
+ layers.push_back(createLayer(
+ "reshape",
+ {
+ withKey("name", "reshape"),
+ withKey("target_shape",
+ std::to_string(GRID_HEIGHT_NUMBER * GRID_WIDTH_NUMBER) + ":" +
+ std::to_string(ANCHOR_NUMBER) + ":" +
+ std::to_string(5 + CLASS_NUMBER)),
+ }));
+
+ layers.push_back(createLayer(
+ "yolo_v2_loss", {
+ withKey("name", "yolo_v2_loss"),
+ withKey("max_object_number", MAX_OBJECT_NUMBER),
+ withKey("class_number", CLASS_NUMBER),
+ withKey("grid_height_number", GRID_HEIGHT_NUMBER),
+ withKey("grid_width_number", GRID_WIDTH_NUMBER),
+ }));
+
+ for (auto &layer : layers) {
+ model->addLayer(layer);
+ }
+
+ return model;
+}
+
+int main(int argc, char *argv[]) {
+ // print start time
+ auto start = std::chrono::system_clock::now();
+ std::time_t start_time = std::chrono::system_clock::to_time_t(start);
+ std::cout << "started computation at " << std::ctime(&start_time)
+ << std::endl;
+
+ // set training config and print it
+ std::cout << "batch_size: " << BATCH_SIZE << " epochs: " << EPOCHS
+ << std::endl;
+
+ try {
+ // create YOLO v2 model
+ ModelHandle model = YOLO();
+ model->setProperty({withKey("batch_size", BATCH_SIZE),
+ withKey("epochs", EPOCHS),
+ withKey("save_path", "yolov2.bin")});
+
+ // create optimizer
+ auto optimizer = ml::train::createOptimizer(
+ "adam", {"learning_rate=0.001", "epsilon=1e-8", "torch_ref=true"});
+ model->setOptimizer(std::move(optimizer));
+
+ // compile and initialize model
+ model->compile();
+ model->initialize();
+ model->save("./yolov2.ini", ml::train::ModelFormat::MODEL_FORMAT_INI);
+ // model->load(MODEL_INIT_BIN_PATH);
+
+ // create train and validation data
+ std::array<UserDataType, 2> user_datas;
+ user_datas = createDetDataGenerator(TRAIN_DIR_PATH, VALIDATION_DIR_PATH,
+ MAX_OBJECT_NUMBER, 3, IMAGE_HEIGHT_SIZE,
+ IMAGE_WIDTH_SIZE);
+ auto &[train_user_data, valid_user_data] = user_datas;
+
+ auto dataset_train = ml::train::createDataset(
+ ml::train::DatasetType::GENERATOR, trainData_cb, train_user_data.get());
+ auto dataset_valid = ml::train::createDataset(
+ ml::train::DatasetType::GENERATOR, validData_cb, valid_user_data.get());
+
+ model->setDataset(ml::train::DatasetModeType::MODE_TRAIN,
+ std::move(dataset_train));
+ model->setDataset(ml::train::DatasetModeType::MODE_VALID,
+ std::move(dataset_valid));
+
+ model->train();
+ } catch (const std::exception &e) {
+ std::cerr << "uncaught error while running! details: " << e.what()
+ << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ // print end time and duration
+ auto end = std::chrono::system_clock::now();
+ std::chrono::duration<double> elapsed_seconds = end - start;
+ std::time_t end_time = std::chrono::system_clock::to_time_t(end);
+ std::cout << "finished computation at " << std::ctime(&end_time)
+ << "elapsed time: " << elapsed_seconds.count() << "s\n";
+}
--- /dev/null
+# build command for lib_yolov2_loss_layer.so
+yolov2_loss_src = files('yolo_v2_loss.cpp')
+yolov2_loss_layer = shared_library('yolov2_loss_layer',
+ yolov2_loss_src,
+ dependencies: [nntrainer_dep, nntrainer_ccapi_dep],
+ include_directories: include_directories('./'),
+ install: true,
+ install_dir: nntrainer_libdir/'nntrainer'/'layers',
+ cpp_args: '-DPLUGGABLE'
+)
+yolov2_loss_layer_dep = declare_dependency(
+ link_with: yolov2_loss_layer,
+ include_directories: include_directories('./')
+)
+
+# build command for lib_reorg_layer.so
+layer_reorg_src = files('reorg_layer.cpp')
+reorg_layer = shared_library('reorg_layer',
+ layer_reorg_src,
+ dependencies: [nntrainer_dep, nntrainer_ccapi_dep],
+ include_directories: include_directories('./'),
+ install: true,
+ install_dir: nntrainer_libdir/'nntrainer'/'layers',
+ cpp_args: '-DPLUGGABLE'
+)
+reorg_layer_dep = declare_dependency(
+ link_with: reorg_layer,
+ include_directories: include_directories('./')
+)
+
+yolo_sources = [
+ 'main.cpp',
+ 'det_dataloader.cpp',
+ 'yolo_v2_loss.cpp',
+ 'reorg_layer.cpp',
+]
+
+yolo_dependencies = [app_utils_dep,
+ nntrainer_dep,
+ nntrainer_ccapi_dep,
+ yolov2_loss_layer_dep,
+ reorg_layer_dep
+]
+
+e = executable('nntrainer_yolov2',
+ yolo_sources,
+ include_directories: [include_directories('.')],
+ dependencies: yolo_dependencies,
+ install: get_option('install-app'),
+ install_dir: application_install_dir
+)
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file reorganization.cpp
+ * @date 06 April 2023
+ * @todo support in-place operation. we can get channel, height, width
+ * coordinate from index of buffer memory. then we can use reorganizePos and
+ * restorePos func
+ * @brief This file contains the mean absolute error loss as a sample layer
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <iostream>
+
+#include "reorg_layer.h"
+
+namespace custom {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+namespace ReorgOp {
+
+/**
+ * @brief re-organize tensor
+ * @return output coordinate of reorganized tensor
+ */
+int reorg(int b, int c, int h, int w, int batch, int channel, int height,
+ int width) {
+ int out_c = channel / 4;
+ int c2 = c % out_c;
+ int offset = c / out_c;
+ int w2 = w * 2 + offset % 2;
+ int h2 = h * 2 + offset / 2;
+ int out_index = w2 + width * 2 * (h2 + height * 2 * (c2 + out_c * b));
+ return out_index;
+}
+} // namespace ReorgOp
+
+void ReorgLayer::finalize(nntrainer::InitLayerContext &context) {
+ std::vector<nntrainer::TensorDim> dim = context.getInputDimensions();
+
+ for (unsigned int i = 0; i < dim.size(); ++i) {
+ if (dim[i].getDataLen() == 0) {
+ throw std::invalid_argument("Input dimension is not set");
+ } else {
+ dim[i].channel(dim[i].channel() * 4);
+ dim[i].height(dim[i].height() / 2);
+ dim[i].width(dim[i].width() / 2);
+ }
+ }
+
+ context.setOutputDimensions(dim);
+}
+
+void ReorgLayer::forwarding(nntrainer::RunLayerContext &context,
+ bool training) {
+ nntrainer::Tensor &in = context.getInput(SINGLE_INOUT_IDX);
+ nntrainer::Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
+
+ for (int b = 0; b < (int)in.batch(); b++) {
+ for (int c = 0; c < (int)in.channel(); c++) {
+ for (int h = 0; h < (int)in.height(); h++) {
+ for (int w = 0; w < (int)in.width(); w++) {
+ int out_idx =
+ w + in.width() * (h + in.height() * (c + in.channel() * b));
+ int in_idx = ReorgOp::reorg(b, c, h, w, in.batch(), in.channel(),
+ in.height(), in.width());
+ out.getData()[out_idx] = in.getValue(in_idx);
+ }
+ }
+ }
+ }
+}
+
+void ReorgLayer::calcDerivative(nntrainer::RunLayerContext &context) {
+ const nntrainer::Tensor &derivative_ =
+ context.getIncomingDerivative(SINGLE_INOUT_IDX);
+
+ nntrainer::Tensor &dx = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+
+ for (int b = 0; b < (int)derivative_.batch(); b++) {
+ for (int c = 0; c < (int)derivative_.channel(); c++) {
+ for (int h = 0; h < (int)derivative_.height(); h++) {
+ for (int w = 0; w < (int)derivative_.width(); w++) {
+ int in_idx =
+ w + derivative_.width() *
+ (h + derivative_.height() * (c + derivative_.channel() * b));
+ int out_idx = ReorgOp::reorg(
+ b, c, h, w, derivative_.batch(), derivative_.channel(),
+ derivative_.height(), derivative_.width());
+ dx.getData()[out_idx] = derivative_.getValue(in_idx);
+ }
+ }
+ }
+ }
+}
+
+#ifdef PLUGGABLE
+
+nntrainer::Layer *create_reorg_layer() {
+ auto layer = new ReorgLayer();
+ std::cout << "reorg created\n";
+ return layer;
+}
+
+void destroy_reorg_layer(nntrainer::Layer *layer) {
+ std::cout << "reorg deleted\n";
+ delete layer;
+}
+
+extern "C" {
+nntrainer::LayerPluggable ml_train_layer_pluggable{create_reorg_layer,
+ destroy_reorg_layer};
+}
+
+#endif
+
+} // namespace custom
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+ *
+ * @file reorganization.h
+ * @date 4 April 2023
+ * @brief This file contains the mean absolute error loss as a sample layer
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Seungbaek Hong <sb92.hong@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#ifndef __REORGANIZATION_LAYER_H__
+#define __REORGANIZATION_LAYER_H__
+
+#include <layer_context.h>
+#include <layer_devel.h>
+#include <node_exporter.h>
+#include <utility>
+
+namespace custom {
+
+/**
+ * @brief A Re-orginazation layer for yolo v2.
+ *
+ */
+class ReorgLayer final : public nntrainer::Layer {
+public:
+ /**
+ * @brief Construct a new Reorg Layer object
+ *
+ */
+ ReorgLayer() : Layer() {}
+
+ /**
+ * @brief Destroy the Reorg Layer object
+ *
+ */
+ ~ReorgLayer() {}
+
+ /**
+ * @copydoc Layer::finalize(InitLayerContext &context)
+ */
+ void finalize(nntrainer::InitLayerContext &context) override;
+
+ /**
+ * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+ */
+ void forwarding(nntrainer::RunLayerContext &context, bool training) override;
+
+ /**
+ * @copydoc Layer::calcDerivative(RunLayerContext &context)
+ */
+ void calcDerivative(nntrainer::RunLayerContext &context) override;
+
+ /**
+ * @copydoc bool supportBackwarding() const
+ */
+ bool supportBackwarding() const override { return true; };
+
+ /**
+ * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
+ */
+ void exportTo(nntrainer::Exporter &exporter,
+ const ml::train::ExportMethods &method) const override{};
+
+ /**
+ * @copydoc Layer::getType()
+ */
+ const std::string getType() const override { return ReorgLayer::type; };
+
+ /**
+ * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+ */
+ void setProperty(const std::vector<std::string> &values) override{};
+
+ inline static const std::string type = "reorg_layer";
+};
+
+} // namespace custom
+
+#endif /* __REORGANIZATION_LAYER_H__ */
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file yolo_v2_loss.cpp
+ * @date 07 March 2023
+ * @brief This file contains the yolo v2 loss layer
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include "yolo_v2_loss.h"
+#include <nntrainer_log.h>
+
+namespace custom {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+enum YoloV2LossParams {
+ bbox_x_pred,
+ bbox_y_pred,
+ bbox_w_pred,
+ bbox_h_pred,
+ confidence_pred,
+ class_pred,
+ bbox_w_pred_anchor,
+ bbox_h_pred_anchor,
+ bbox_x_gt,
+ bbox_y_gt,
+ bbox_w_gt,
+ bbox_h_gt,
+ confidence_gt,
+ class_gt,
+ bbox_class_mask,
+ iou_mask,
+ bbox1_width,
+ bbox1_height,
+ is_xy_min_max,
+ intersection_width,
+ intersection_height,
+ unions,
+};
+
+namespace props {
+MaxObjectNumber::MaxObjectNumber(const unsigned &value) { set(value); }
+ClassNumber::ClassNumber(const unsigned &value) { set(value); }
+GridHeightNumber::GridHeightNumber(const unsigned &value) { set(value); }
+GridWidthNumber::GridWidthNumber(const unsigned &value) { set(value); }
+} // namespace props
+
+/**
+ * @brief mse
+ *
+ * @param pred prediction
+ * @param ground_truth ground truth
+ * @return float loss
+ * @todo make loss behaves like acti_func
+ */
+float mse(nntrainer::Tensor &pred, nntrainer::Tensor &ground_truth) {
+ nntrainer::Tensor residual;
+ pred.subtract(ground_truth, residual);
+
+ float l2norm = residual.l2norm();
+ l2norm *= l2norm / residual.size();
+
+ return l2norm;
+}
+
+/**
+ * @brief backwarding of mse
+ *
+ * @param pred prediction
+ * @param ground_truth ground truth
+ * @param outgoing_derivative outgoing derivative
+ */
+void msePrime(nntrainer::Tensor &pred, nntrainer::Tensor &ground_truth,
+ nntrainer::Tensor &outgoing_derivative) {
+ pred.subtract(ground_truth, outgoing_derivative);
+ float divider = ((float)pred.size()) / 2;
+ if (outgoing_derivative.divide_i(divider) != ML_ERROR_NONE) {
+ throw std::runtime_error(
+ "[YoloV2LossLayer::calcDerivative] Error when calculating loss");
+ }
+}
+
+/**
+ * @brief calculate iou
+ *
+ * @param bbox1_x1 bbox1_x1
+ * @param bbox1_y1 bbox1_y1
+ * @param bbox1_w bbox1_w
+ * @param bbox1_h bbox1_h
+ * @param bbox2_x1 bbox2_x1
+ * @param bbox2_y1 bbox2_y1
+ * @param bbox2_w bbox2_w
+ * @param bbox2_h bbox2_h
+ * @param[out] bbox1_width bbox1 width
+ * @param[out] bbox1_height bbox1 height
+ * @param[out] is_xy_min_max For x1, y1 this value is 1 if x1 > x2, y1 > y2 and
+ * for x2, y2 this is value is 1 if x2 < x1, y2 < y1. else 0.
+ * @param[out] intersection_width intersection width
+ * @param[out] intersection_height intersection height
+ * @param[out] unions unions
+ * @return nntrainer::Tensor iou
+ */
+nntrainer::Tensor
+calc_iou(nntrainer::Tensor &bbox1_x1, nntrainer::Tensor &bbox1_y1,
+ nntrainer::Tensor &bbox1_w, nntrainer::Tensor &bbox1_h,
+ nntrainer::Tensor &bbox2_x1, nntrainer::Tensor &bbox2_y1,
+ nntrainer::Tensor &bbox2_w, nntrainer::Tensor &bbox2_h,
+ nntrainer::Tensor &bbox1_width, nntrainer::Tensor &bbox1_height,
+ nntrainer::Tensor &is_xy_min_max,
+ nntrainer::Tensor &intersection_width,
+ nntrainer::Tensor &intersection_height, nntrainer::Tensor &unions) {
+ nntrainer::Tensor bbox1_x2 = bbox1_x1.add(bbox1_w);
+ nntrainer::Tensor bbox1_y2 = bbox1_y1.add(bbox1_h);
+ nntrainer::Tensor bbox2_x2 = bbox2_x1.add(bbox2_w);
+ nntrainer::Tensor bbox2_y2 = bbox2_y1.add(bbox2_h);
+
+ bbox1_x2.subtract(bbox1_x1, bbox1_width);
+ bbox1_y2.subtract(bbox1_y1, bbox1_height);
+ nntrainer::Tensor bbox1 = bbox1_width.multiply(bbox1_height);
+
+ nntrainer::Tensor bbox2_width = bbox2_x2.subtract(bbox2_x1);
+ nntrainer::Tensor bbox2_height = bbox2_y2.subtract(bbox2_y1);
+ nntrainer::Tensor bbox2 = bbox2_width.multiply(bbox2_height);
+
+ auto min_func = [&](nntrainer::Tensor &bbox1_xy, nntrainer::Tensor &bbox2_xy,
+ nntrainer::Tensor &intersection_xy) {
+ std::transform(bbox1_xy.getData(), bbox1_xy.getData() + bbox1_xy.size(),
+ bbox2_xy.getData(), intersection_xy.getData(),
+ [](float x1, float x2) { return std::min(x1, x2); });
+ };
+ auto max_func = [&](nntrainer::Tensor &bbox1_xy, nntrainer::Tensor &bbox2_xy,
+ nntrainer::Tensor &intersection_xy) {
+ std::transform(bbox1_xy.getData(), bbox1_xy.getData() + bbox1_xy.size(),
+ bbox2_xy.getData(), intersection_xy.getData(),
+ [](float x1, float x2) { return std::max(x1, x2); });
+ };
+
+ nntrainer::Tensor intersection_x1(bbox1_x1.getDim());
+ nntrainer::Tensor intersection_x2(bbox1_x1.getDim());
+ nntrainer::Tensor intersection_y1(bbox1_y1.getDim());
+ nntrainer::Tensor intersection_y2(bbox1_y1.getDim());
+ max_func(bbox1_x1, bbox2_x1, intersection_x1);
+ min_func(bbox1_x2, bbox2_x2, intersection_x2);
+ max_func(bbox1_y1, bbox2_y1, intersection_y1);
+ min_func(bbox1_y2, bbox2_y2, intersection_y2);
+
+ auto is_min_max_func = [&](nntrainer::Tensor &xy,
+ nntrainer::Tensor &intersection,
+ nntrainer::Tensor &is_min_max) {
+ std::transform(xy.getData(), xy.getData() + xy.size(),
+ intersection.getData(), is_min_max.getData(),
+ [](float x, float m) {
+ return nntrainer::absFloat(x - m) < 1e-4 ? 1.0 : 0.0;
+ });
+ };
+
+ nntrainer::Tensor is_bbox1_x1_max(bbox1_x1.getDim());
+ nntrainer::Tensor is_bbox1_y1_max(bbox1_x1.getDim());
+ nntrainer::Tensor is_bbox1_x2_min(bbox1_x1.getDim());
+ nntrainer::Tensor is_bbox1_y2_min(bbox1_x1.getDim());
+ is_min_max_func(bbox1_x1, intersection_x1, is_bbox1_x1_max);
+ is_min_max_func(bbox1_y1, intersection_y1, is_bbox1_y1_max);
+ is_min_max_func(bbox1_x2, intersection_x2, is_bbox1_x2_min);
+ is_min_max_func(bbox1_y2, intersection_y2, is_bbox1_y2_min);
+
+ nntrainer::Tensor is_bbox_min_max = nntrainer::Tensor::cat(
+ {is_bbox1_x1_max, is_bbox1_y1_max, is_bbox1_x2_min, is_bbox1_y2_min}, 3);
+ is_xy_min_max.copyData(is_bbox_min_max);
+
+ intersection_x2.subtract(intersection_x1, intersection_width);
+
+ auto type_intersection_width = intersection_width.getDataType();
+ if (type_intersection_width == ml::train::TensorDim::DataType::FP32) {
+ intersection_width.apply_i<float>(nntrainer::ActiFunc::relu<float>);
+ } else if (type_intersection_width == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ intersection_width.apply_i<_FP16>(nntrainer::ActiFunc::relu<_FP16>);
+#else
+ throw std::runtime_error("Not supported data type");
+#endif
+ }
+
+ intersection_y2.subtract(intersection_y1, intersection_height);
+
+ auto type_intersection_height = intersection_height.getDataType();
+ if (type_intersection_height == ml::train::TensorDim::DataType::FP32) {
+ intersection_height.apply_i<float>(nntrainer::ActiFunc::relu<float>);
+ } else if (type_intersection_height == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ intersection_height.apply_i<_FP16>(nntrainer::ActiFunc::relu<_FP16>);
+#else
+ throw std::runtime_error("Not supported data type");
+#endif
+ }
+
+ nntrainer::Tensor intersection =
+ intersection_width.multiply(intersection_height);
+ bbox1.add(bbox2, unions);
+ unions.subtract_i(intersection);
+
+ return intersection.divide(unions);
+}
+
+/**
+ * @brief calculate iou graident
+ * @details Let say bbox_pred as x, intersection as f(x), union as g(x) and iou
+ * as y. Then y = f(x)/g(x). Also g(x) = bbox1 + bbox2 - f(x). Partial
+ * derivative of y with respect to x will be (f'(x)g(x) - f(x)g'(x))/(g(x)^2).
+ * Partial derivative of g(x) with respect to x will be bbox1'(x) - f'(x).
+ * @param confidence_gt_grad incoming derivative for iou
+ * @param bbox1_width bbox1_width
+ * @param bbox1_height bbox1_height
+ * @param is_xy_min_max For x1, y1 this value is 1 if x1 > x2, y1 > y2 and for
+ * x2, y2 this is value is 1 if x2 < x1, y2 < y1. else 0.
+ * @param intersection_width intersection width
+ * @param intersection_height intersection height
+ * @param unions unions
+ * @return std::vector<nntrainer::Tensor> iou_grad
+ */
+std::vector<nntrainer::Tensor> calc_iou_grad(
+ nntrainer::Tensor &confidence_gt_grad, nntrainer::Tensor &bbox1_width,
+ nntrainer::Tensor &bbox1_height, nntrainer::Tensor &is_xy_min_max,
+ nntrainer::Tensor &intersection_width, nntrainer::Tensor &intersection_height,
+ nntrainer::Tensor &unions) {
+ nntrainer::Tensor intersection =
+ intersection_width.multiply(intersection_height);
+
+ // 1. calculate intersection local gradient [f'(x)]
+ nntrainer::Tensor intersection_width_relu_prime;
+ nntrainer::Tensor intersection_height_relu_prime;
+ auto type_intersection_width = intersection_width.getDataType();
+ if (type_intersection_width == ml::train::TensorDim::DataType::FP32) {
+ intersection_width_relu_prime =
+ intersection_width.apply<float>(nntrainer::ActiFunc::reluPrime<float>);
+ } else if (type_intersection_width == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ intersection_height_relu_prime =
+ intersection_height.apply<_FP16>(nntrainer::ActiFunc::reluPrime<_FP16>);
+#else
+ throw std::runtime_error("Not supported data type");
+#endif
+ }
+
+ nntrainer::Tensor intersection_x2_local_grad =
+ intersection_width_relu_prime.multiply(intersection_height);
+ nntrainer::Tensor intersection_y2_local_grad =
+ intersection_height_relu_prime.multiply(intersection_width);
+ nntrainer::Tensor intersection_x1_local_grad =
+ intersection_x2_local_grad.multiply(-1.0);
+ nntrainer::Tensor intersection_y1_local_grad =
+ intersection_y2_local_grad.multiply(-1.0);
+
+ nntrainer::Tensor intersection_local_grad = nntrainer::Tensor::cat(
+ {intersection_x1_local_grad, intersection_y1_local_grad,
+ intersection_x2_local_grad, intersection_y2_local_grad},
+ 3);
+ intersection_local_grad.multiply_i(is_xy_min_max);
+
+ // 2. calculate union local gradient [g'(x)]
+ nntrainer::Tensor bbox1_x1_grad = bbox1_height.multiply(-1.0);
+ nntrainer::Tensor bbox1_y1_grad = bbox1_width.multiply(-1.0);
+ nntrainer::Tensor bbox1_x2_grad = bbox1_height;
+ nntrainer::Tensor bbox1_y2_grad = bbox1_width;
+ nntrainer::Tensor bbox1_grad = nntrainer::Tensor::cat(
+ {bbox1_x1_grad, bbox1_y1_grad, bbox1_x2_grad, bbox1_y2_grad}, 3);
+
+ nntrainer::Tensor unions_local_grad =
+ bbox1_grad.subtract(intersection_local_grad);
+
+ // 3. calculate iou local gradient [(f'(x)g(x) - f(x)g'(x))/(g(x)^2)]
+ nntrainer::Tensor lhs = intersection_local_grad.multiply(unions);
+ nntrainer::Tensor rhs = unions_local_grad.multiply(intersection);
+ nntrainer::Tensor iou_grad = lhs.subtract(rhs);
+ iou_grad.divide_i(unions);
+ iou_grad.divide_i(unions);
+
+ // 3. multiply with incoming derivative
+ iou_grad.multiply_i(confidence_gt_grad);
+
+ auto splitted_iou_grad = iou_grad.split({1, 1, 1, 1}, 3);
+ std::vector<nntrainer::Tensor> ret = {
+ splitted_iou_grad[0].add(splitted_iou_grad[2]),
+ splitted_iou_grad[1].add(splitted_iou_grad[3]), splitted_iou_grad[2],
+ splitted_iou_grad[3]};
+ return ret;
+}
+
+YoloV2LossLayer::YoloV2LossLayer() :
+ anchors_w({1, 1, NUM_ANCHOR, 1}, anchors_w_buf),
+ anchors_h({1, 1, NUM_ANCHOR, 1}, anchors_h_buf),
+ sigmoid(nntrainer::ActivationType::ACT_SIGMOID, true),
+ softmax(nntrainer::ActivationType::ACT_SOFTMAX, true),
+ yolo_v2_loss_props(props::MaxObjectNumber(), props::ClassNumber(),
+ props::GridHeightNumber(), props::GridWidthNumber()) {
+ anchors_ratio = anchors_w.divide(anchors_h);
+ wt_idx.fill(std::numeric_limits<unsigned>::max());
+}
+
+void YoloV2LossLayer::finalize(nntrainer::InitLayerContext &context) {
+ nntrainer::TensorDim input_dim =
+ context.getInputDimensions()[SINGLE_INOUT_IDX];
+ const unsigned int batch_size = input_dim.batch();
+ const unsigned int class_number =
+ std::get<props::ClassNumber>(yolo_v2_loss_props).get();
+ const unsigned int grid_height_number =
+ std::get<props::GridHeightNumber>(yolo_v2_loss_props).get();
+ const unsigned int grid_width_number =
+ std::get<props::GridWidthNumber>(yolo_v2_loss_props).get();
+ const unsigned int max_object_number =
+ std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
+ nntrainer::TensorDim label_dim(batch_size, 1, max_object_number, 5);
+ context.setOutputDimensions({label_dim});
+
+ nntrainer::TensorDim bbox_x_pred_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_x_pred] = context.requestTensor(
+ bbox_x_pred_dim, "bbox_x_pred", nntrainer::Tensor::Initializer::NONE, true,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_y_pred_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_y_pred] = context.requestTensor(
+ bbox_y_pred_dim, "bbox_y_pred", nntrainer::Tensor::Initializer::NONE, true,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_w_pred_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_w_pred] = context.requestTensor(
+ bbox_w_pred_dim, "bbox_w_pred", nntrainer::Tensor::Initializer::NONE, true,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_h_pred_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_h_pred] = context.requestTensor(
+ bbox_h_pred_dim, "bbox_h_pred", nntrainer::Tensor::Initializer::NONE, true,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim confidence_pred_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::confidence_pred] =
+ context.requestTensor(confidence_pred_dim, "confidence_pred",
+ nntrainer::Tensor::Initializer::NONE, true,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim class_pred_dim(batch_size,
+ grid_height_number * grid_width_number,
+ NUM_ANCHOR, class_number);
+ wt_idx[YoloV2LossParams::class_pred] = context.requestTensor(
+ class_pred_dim, "class_pred", nntrainer::Tensor::Initializer::NONE, true,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_w_pred_anchor_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_w_pred_anchor] =
+ context.requestTensor(bbox_w_pred_anchor_dim, "bbox_w_pred_anchor",
+ nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_h_pred_anchor_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_h_pred_anchor] =
+ context.requestTensor(bbox_h_pred_anchor_dim, "bbox_h_pred_anchor",
+ nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_x_gt_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_x_gt] = context.requestTensor(
+ bbox_x_gt_dim, "bbox_x_gt", nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_y_gt_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_y_gt] = context.requestTensor(
+ bbox_y_gt_dim, "bbox_y_gt", nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_w_gt_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_w_gt] = context.requestTensor(
+ bbox_w_gt_dim, "bbox_w_gt", nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_h_gt_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_h_gt] = context.requestTensor(
+ bbox_h_gt_dim, "bbox_h_gt", nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim confidence_gt_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::confidence_gt] = context.requestTensor(
+ confidence_gt_dim, "confidence_gt", nntrainer::Tensor::Initializer::NONE,
+ false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim class_gt_dim(batch_size,
+ grid_height_number * grid_width_number,
+ NUM_ANCHOR, class_number);
+ wt_idx[YoloV2LossParams::class_gt] = context.requestTensor(
+ class_gt_dim, "class_gt", nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox_class_mask_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox_class_mask] =
+ context.requestTensor(bbox_class_mask_dim, "bbox_class_mask",
+ nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim iou_mask_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::iou_mask] = context.requestTensor(
+ iou_mask_dim, "iou_mask", nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox1_width_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox1_width] = context.requestTensor(
+ bbox1_width_dim, "bbox1_width", nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim bbox1_height_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::bbox1_height] = context.requestTensor(
+ bbox1_height_dim, "bbox1_height", nntrainer::Tensor::Initializer::NONE,
+ false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim is_xy_min_max_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 4);
+ wt_idx[YoloV2LossParams::is_xy_min_max] = context.requestTensor(
+ is_xy_min_max_dim, "is_xy_min_max", nntrainer::Tensor::Initializer::NONE,
+ false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim intersection_width_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::intersection_width] =
+ context.requestTensor(intersection_width_dim, "intersection_width",
+ nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim intersection_height_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::intersection_height] =
+ context.requestTensor(intersection_height_dim, "intersection_height",
+ nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+
+ nntrainer::TensorDim unions_dim(
+ batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1);
+ wt_idx[YoloV2LossParams::unions] = context.requestTensor(
+ unions_dim, "unions", nntrainer::Tensor::Initializer::NONE, false,
+ nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN);
+}
+
+void YoloV2LossLayer::forwarding(nntrainer::RunLayerContext &context,
+ bool training) {
+ const unsigned int max_object_number =
+ std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
+
+ nntrainer::Tensor &input = context.getInput(SINGLE_INOUT_IDX);
+
+ std::vector<nntrainer::Tensor> splited_input =
+ input.split({1, 1, 1, 1, 1, max_object_number}, 3);
+ nntrainer::Tensor bbox_x_pred_ = splited_input[0];
+ nntrainer::Tensor bbox_y_pred_ = splited_input[1];
+ nntrainer::Tensor bbox_w_pred_ = splited_input[2];
+ nntrainer::Tensor bbox_h_pred_ = splited_input[3];
+ nntrainer::Tensor confidence_pred_ = splited_input[4];
+ nntrainer::Tensor class_pred_ = splited_input[5];
+
+ nntrainer::Tensor &bbox_x_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
+ nntrainer::Tensor &bbox_y_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
+ nntrainer::Tensor &bbox_w_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred]);
+ nntrainer::Tensor &bbox_h_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred]);
+
+ nntrainer::Tensor &confidence_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::confidence_pred]);
+ nntrainer::Tensor &class_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::class_pred]);
+
+ nntrainer::Tensor &bbox_w_pred_anchor =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
+ nntrainer::Tensor &bbox_h_pred_anchor =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
+
+ bbox_x_pred.copyData(bbox_x_pred_);
+ bbox_y_pred.copyData(bbox_y_pred_);
+ bbox_w_pred.copyData(bbox_w_pred_);
+ bbox_h_pred.copyData(bbox_h_pred_);
+
+ confidence_pred.copyData(confidence_pred_);
+ class_pred.copyData(class_pred_);
+
+ nntrainer::Tensor &bbox_x_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
+ nntrainer::Tensor &bbox_y_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
+ nntrainer::Tensor &bbox_w_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
+ nntrainer::Tensor &bbox_h_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
+
+ nntrainer::Tensor &confidence_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
+ nntrainer::Tensor &class_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
+
+ nntrainer::Tensor &bbox_class_mask =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
+ nntrainer::Tensor &iou_mask =
+ context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
+
+ bbox_x_gt.setValue(0);
+ bbox_y_gt.setValue(0);
+ bbox_w_gt.setValue(0);
+ bbox_h_gt.setValue(0);
+
+ confidence_gt.setValue(0);
+ class_gt.setValue(0);
+
+ // init mask
+ bbox_class_mask.setValue(0);
+ iou_mask.setValue(0.5);
+
+ // activate pred
+ sigmoid.run_fn(bbox_x_pred, bbox_x_pred);
+ sigmoid.run_fn(bbox_y_pred, bbox_y_pred);
+
+ auto type_bbox_w_pred = bbox_w_pred.getDataType();
+ if (type_bbox_w_pred == ml::train::TensorDim::DataType::FP32) {
+ bbox_w_pred.apply_i<float>(nntrainer::exp_util<float>);
+ } else if (type_bbox_w_pred == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ bbox_w_pred.apply_i<_FP16>(nntrainer::exp_util<_FP16>);
+#else
+ throw std::runtime_error("Not supported data type");
+#endif
+ }
+
+ auto type_bbox_h_pred = bbox_h_pred.getDataType();
+ if (type_bbox_h_pred == ml::train::TensorDim::DataType::FP32) {
+ bbox_h_pred.apply_i<float>(nntrainer::exp_util<float>);
+ } else if (type_bbox_h_pred == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ bbox_h_pred.apply_i<_FP16>(nntrainer::exp_util<_FP16>);
+#else
+ throw std::runtime_error("Not supported data type");
+#endif
+ }
+
+ sigmoid.run_fn(confidence_pred, confidence_pred);
+ softmax.run_fn(class_pred, class_pred);
+
+ bbox_w_pred_anchor.copyData(bbox_w_pred);
+ bbox_h_pred_anchor.copyData(bbox_h_pred);
+
+ // apply anchors to bounding box
+ bbox_w_pred_anchor.multiply_i(anchors_w);
+ auto type_bbox_w_pred_anchor = bbox_w_pred_anchor.getDataType();
+ if (type_bbox_w_pred_anchor == ml::train::TensorDim::DataType::FP32) {
+ bbox_w_pred_anchor.apply_i<float>(nntrainer::sqrtFloat<float>);
+ } else if (type_bbox_w_pred_anchor == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ bbox_w_pred_anchor.apply_i<_FP16>(nntrainer::sqrtFloat<_FP16>);
+#else
+ throw std::runtime_error("Not supported data type");
+#endif
+ }
+
+ bbox_h_pred_anchor.multiply_i(anchors_h);
+ auto type_bbox_h_pred_anchor = bbox_h_pred_anchor.getDataType();
+ if (type_bbox_h_pred_anchor == ml::train::TensorDim::DataType::FP32) {
+ bbox_h_pred_anchor.apply_i<float>(nntrainer::sqrtFloat<float>);
+ } else if (type_bbox_h_pred_anchor == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ bbox_h_pred_anchor.apply_i<_FP16>(nntrainer::sqrtFloat<_FP16>);
+#else
+ throw std::runtime_error("Not supported data type");
+#endif
+ }
+
+ generate_ground_truth(context);
+
+ nntrainer::Tensor bbox_pred = nntrainer::Tensor::cat(
+ {bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor}, 3);
+ nntrainer::Tensor masked_bbox_pred = bbox_pred.multiply(bbox_class_mask);
+ nntrainer::Tensor masked_confidence_pred = confidence_pred.multiply(iou_mask);
+ nntrainer::Tensor masked_class_pred = class_pred.multiply(bbox_class_mask);
+
+ nntrainer::Tensor bbox_gt =
+ nntrainer::Tensor::cat({bbox_x_gt, bbox_y_gt, bbox_w_gt, bbox_h_gt}, 3);
+ nntrainer::Tensor masked_bbox_gt = bbox_gt.multiply(bbox_class_mask);
+ nntrainer::Tensor masked_confidence_gt = confidence_gt.multiply(iou_mask);
+ nntrainer::Tensor masked_class_gt = class_gt.multiply(bbox_class_mask);
+
+ float bbox_loss = mse(masked_bbox_pred, masked_bbox_gt);
+ float confidence_loss = mse(masked_confidence_pred, masked_confidence_gt);
+ float class_loss = mse(masked_class_pred, masked_class_gt);
+
+ float loss = 5 * bbox_loss + confidence_loss + class_loss;
+ ml_logd("Current iteration loss: %f", loss);
+}
+
+void YoloV2LossLayer::calcDerivative(nntrainer::RunLayerContext &context) {
+ nntrainer::Tensor &bbox_x_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
+ nntrainer::Tensor &bbox_x_pred_grad =
+ context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_x_pred]);
+ nntrainer::Tensor &bbox_y_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
+ nntrainer::Tensor &bbox_y_pred_grad =
+ context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_y_pred]);
+ nntrainer::Tensor &bbox_w_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred]);
+ nntrainer::Tensor &bbox_w_pred_grad =
+ context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_w_pred]);
+ nntrainer::Tensor &bbox_h_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred]);
+ nntrainer::Tensor &bbox_h_pred_grad =
+ context.getTensorGrad(wt_idx[YoloV2LossParams::bbox_h_pred]);
+
+ nntrainer::Tensor &confidence_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::confidence_pred]);
+ nntrainer::Tensor &confidence_pred_grad =
+ context.getTensorGrad(wt_idx[YoloV2LossParams::confidence_pred]);
+ nntrainer::Tensor &class_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::class_pred]);
+ nntrainer::Tensor &class_pred_grad =
+ context.getTensorGrad(wt_idx[YoloV2LossParams::class_pred]);
+
+ nntrainer::Tensor &bbox_w_pred_anchor =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
+ nntrainer::Tensor &bbox_h_pred_anchor =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
+
+ nntrainer::Tensor &bbox_x_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
+ nntrainer::Tensor &bbox_y_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
+ nntrainer::Tensor &bbox_w_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
+ nntrainer::Tensor &bbox_h_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
+
+ nntrainer::Tensor &confidence_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
+ nntrainer::Tensor &class_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
+
+ nntrainer::Tensor &bbox_class_mask =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
+ nntrainer::Tensor &iou_mask =
+ context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
+
+ nntrainer::Tensor &bbox1_width =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox1_width]);
+ nntrainer::Tensor &bbox1_height =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox1_height]);
+ nntrainer::Tensor &is_xy_min_max =
+ context.getTensor(wt_idx[YoloV2LossParams::is_xy_min_max]);
+ nntrainer::Tensor &intersection_width =
+ context.getTensor(wt_idx[YoloV2LossParams::intersection_width]);
+ nntrainer::Tensor &intersection_height =
+ context.getTensor(wt_idx[YoloV2LossParams::intersection_height]);
+ nntrainer::Tensor &unions =
+ context.getTensor(wt_idx[YoloV2LossParams::unions]);
+
+ nntrainer::Tensor bbox_pred = nntrainer::Tensor::cat(
+ {bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor}, 3);
+ nntrainer::Tensor masked_bbox_pred = bbox_pred.multiply(bbox_class_mask);
+ nntrainer::Tensor masked_confidence_pred = confidence_pred.multiply(iou_mask);
+ nntrainer::Tensor masked_class_pred = class_pred.multiply(bbox_class_mask);
+
+ nntrainer::Tensor bbox_gt =
+ nntrainer::Tensor::cat({bbox_x_gt, bbox_y_gt, bbox_w_gt, bbox_h_gt}, 3);
+ nntrainer::Tensor masked_bbox_gt = bbox_gt.multiply(bbox_class_mask);
+ nntrainer::Tensor masked_confidence_gt = confidence_gt.multiply(iou_mask);
+ nntrainer::Tensor masked_class_gt = class_gt.multiply(bbox_class_mask);
+
+ nntrainer::Tensor masked_bbox_pred_grad;
+ nntrainer::Tensor masked_confidence_pred_grad;
+ nntrainer::Tensor masked_confidence_gt_grad;
+ nntrainer::Tensor masked_class_pred_grad;
+
+ nntrainer::Tensor confidence_gt_grad;
+
+ msePrime(masked_bbox_pred, masked_bbox_gt, masked_bbox_pred_grad);
+ msePrime(masked_confidence_pred, masked_confidence_gt,
+ masked_confidence_pred_grad);
+ msePrime(masked_confidence_gt, masked_confidence_pred,
+ masked_confidence_gt_grad);
+ msePrime(masked_class_pred, masked_class_gt, masked_class_pred_grad);
+
+ masked_bbox_pred_grad.multiply_i(5);
+
+ nntrainer::Tensor bbox_pred_grad;
+
+ masked_bbox_pred_grad.multiply(bbox_class_mask, bbox_pred_grad);
+ masked_confidence_pred_grad.multiply(iou_mask, confidence_pred_grad);
+ masked_confidence_gt_grad.multiply(iou_mask, confidence_gt_grad);
+ masked_class_pred_grad.multiply(bbox_class_mask, class_pred_grad);
+
+ std::vector<nntrainer::Tensor> splitted_bbox_pred_grad =
+ bbox_pred_grad.split({1, 1, 1, 1}, 3);
+ bbox_x_pred_grad.copyData(splitted_bbox_pred_grad[0]);
+ bbox_y_pred_grad.copyData(splitted_bbox_pred_grad[1]);
+ bbox_w_pred_grad.copyData(splitted_bbox_pred_grad[2]);
+ bbox_h_pred_grad.copyData(splitted_bbox_pred_grad[3]);
+
+ // std::vector<nntrainer::Tensor> bbox_pred_iou_grad =
+ // calc_iou_grad(confidence_gt_grad, bbox1_width, bbox1_height,
+ // is_xy_min_max,
+ // intersection_width, intersection_height, unions);
+ // bbox_x_pred_grad.add_i(bbox_pred_iou_grad[0]);
+ // bbox_y_pred_grad.add_i(bbox_pred_iou_grad[1]);
+ // bbox_w_pred_grad.add_i(bbox_pred_iou_grad[2]);
+ // bbox_h_pred_grad.add_i(bbox_pred_iou_grad[3]);
+
+ /**
+ * @brief calculate gradient for applying anchors to bounding box
+ * @details Let say bbox_pred as x, anchor as c indicated that anchor is
+ * constant for bbox_pred and bbox_pred_anchor as y. Then we can denote y =
+ * sqrt(cx). Partial derivative of y with respect to x will be
+ * sqrt(c)/(2*sqrt(x)) which is equivalent to sqrt(cx)/(2x) and we can replace
+ * sqrt(cx) with y.
+ * @note divide by bbox_pred(x) will not be executed because bbox_pred_grad
+ * will be multiply by bbox_pred(x) soon after.
+ */
+ bbox_w_pred_grad.multiply_i(bbox_w_pred_anchor);
+ bbox_h_pred_grad.multiply_i(bbox_h_pred_anchor);
+ /** intended comment */
+ // bbox_w_pred_grad.divide_i(bbox_w_pred);
+ // bbox_h_pred_grad.divide_i(bbox_h_pred);
+ bbox_w_pred_grad.divide_i(2);
+ bbox_h_pred_grad.divide_i(2);
+
+ sigmoid.run_prime_fn(bbox_x_pred, bbox_x_pred, bbox_x_pred_grad,
+ bbox_x_pred_grad);
+ sigmoid.run_prime_fn(bbox_y_pred, bbox_y_pred, bbox_y_pred_grad,
+ bbox_y_pred_grad);
+ /** intended comment */
+ // bbox_w_pred_grad.multiply_i(bbox_w_pred);
+ // bbox_h_pred_grad.multiply_i(bbox_h_pred);
+ sigmoid.run_prime_fn(confidence_pred, confidence_pred, confidence_pred_grad,
+ confidence_pred_grad);
+ softmax.run_prime_fn(class_pred, class_pred, class_pred_grad,
+ class_pred_grad);
+
+ nntrainer::Tensor outgoing_derivative_ = nntrainer::Tensor::cat(
+ {bbox_x_pred_grad, bbox_y_pred_grad, bbox_w_pred_grad, bbox_h_pred_grad,
+ confidence_pred_grad, class_pred_grad},
+ 3);
+ nntrainer::Tensor &outgoing_derivative =
+ context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+ outgoing_derivative.copyData(outgoing_derivative_);
+}
+
+void YoloV2LossLayer::exportTo(nntrainer::Exporter &exporter,
+ const ml::train::ExportMethods &method) const {
+ exporter.saveResult(yolo_v2_loss_props, method, this);
+}
+
+void YoloV2LossLayer::setProperty(const std::vector<std::string> &values) {
+ auto remain_props = loadProperties(values, yolo_v2_loss_props);
+ NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
+ << "[YoloV2LossLayer] Unknown Layer Properties count " +
+ std::to_string(values.size());
+}
+
+void YoloV2LossLayer::setBatch(nntrainer::RunLayerContext &context,
+ unsigned int batch) {
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_x_pred], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_y_pred], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_pred], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_pred], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::confidence_pred], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::class_pred], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor], batch);
+
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_x_gt], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_y_gt], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_w_gt], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_h_gt], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::confidence_gt], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::class_gt], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox_class_mask], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::iou_mask], batch);
+
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox1_width], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::bbox1_height], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::is_xy_min_max], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::intersection_width], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::intersection_height], batch);
+ context.updateTensor(wt_idx[YoloV2LossParams::unions], batch);
+}
+
+unsigned int YoloV2LossLayer::find_responsible_anchors(float bbox_ratio) {
+ nntrainer::Tensor similarity = anchors_ratio.subtract(bbox_ratio);
+ auto data_type = similarity.getDataType();
+ if (data_type == ml::train::TensorDim::DataType::FP32) {
+ similarity.apply_i<float>(nntrainer::absFloat<float>);
+ } else if (data_type == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ similarity.apply_i<_FP16>(nntrainer::absFloat<_FP16>);
+#else
+ throw std::runtime_error("Not supported data type");
+#endif
+ }
+ auto data = similarity.getData();
+
+ auto min_iter = std::min_element(data, data + NUM_ANCHOR);
+ return std::distance(data, min_iter);
+}
+
+void YoloV2LossLayer::generate_ground_truth(
+ nntrainer::RunLayerContext &context) {
+ const unsigned int max_object_number =
+ std::get<props::MaxObjectNumber>(yolo_v2_loss_props).get();
+ const unsigned int grid_height_number =
+ std::get<props::GridHeightNumber>(yolo_v2_loss_props).get();
+ const unsigned int grid_width_number =
+ std::get<props::GridWidthNumber>(yolo_v2_loss_props).get();
+
+ nntrainer::Tensor &label = context.getLabel(SINGLE_INOUT_IDX);
+
+ nntrainer::Tensor &bbox_x_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_x_pred]);
+ nntrainer::Tensor &bbox_y_pred =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_y_pred]);
+ nntrainer::Tensor &bbox_w_pred_anchor =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_w_pred_anchor]);
+ nntrainer::Tensor &bbox_h_pred_anchor =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_h_pred_anchor]);
+
+ nntrainer::Tensor &bbox_x_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_x_gt]);
+ nntrainer::Tensor &bbox_y_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_y_gt]);
+ nntrainer::Tensor &bbox_w_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_w_gt]);
+ nntrainer::Tensor &bbox_h_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_h_gt]);
+
+ nntrainer::Tensor &confidence_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::confidence_gt]);
+ nntrainer::Tensor &class_gt =
+ context.getTensor(wt_idx[YoloV2LossParams::class_gt]);
+
+ nntrainer::Tensor &bbox_class_mask =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox_class_mask]);
+ nntrainer::Tensor &iou_mask =
+ context.getTensor(wt_idx[YoloV2LossParams::iou_mask]);
+
+ nntrainer::Tensor &bbox1_width =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox1_width]);
+ nntrainer::Tensor &bbox1_height =
+ context.getTensor(wt_idx[YoloV2LossParams::bbox1_height]);
+ nntrainer::Tensor &is_xy_min_max =
+ context.getTensor(wt_idx[YoloV2LossParams::is_xy_min_max]);
+ nntrainer::Tensor &intersection_width =
+ context.getTensor(wt_idx[YoloV2LossParams::intersection_width]);
+ nntrainer::Tensor &intersection_height =
+ context.getTensor(wt_idx[YoloV2LossParams::intersection_height]);
+ nntrainer::Tensor &unions =
+ context.getTensor(wt_idx[YoloV2LossParams::unions]);
+
+ const unsigned int batch_size = bbox_x_pred.getDim().batch();
+
+ std::vector<nntrainer::Tensor> splited_label =
+ label.split({1, 1, 1, 1, 1}, 3);
+ nntrainer::Tensor bbox_x_label = splited_label[0];
+ nntrainer::Tensor bbox_y_label = splited_label[1];
+ nntrainer::Tensor bbox_w_label = splited_label[2];
+ nntrainer::Tensor bbox_h_label = splited_label[3];
+ nntrainer::Tensor class_label = splited_label[4];
+
+ bbox_x_label.multiply_i(grid_width_number);
+ bbox_y_label.multiply_i(grid_height_number);
+
+ for (unsigned int batch = 0; batch < batch_size; ++batch) {
+ for (unsigned int object = 0; object < max_object_number; ++object) {
+ if (!bbox_w_label.getValue(batch, 0, object, 0) &&
+ !bbox_h_label.getValue(batch, 0, object, 0)) {
+ break;
+ }
+ unsigned int grid_x_index = bbox_x_label.getValue(batch, 0, object, 0);
+ unsigned int grid_y_index = bbox_y_label.getValue(batch, 0, object, 0);
+ unsigned int grid_index = grid_y_index * grid_width_number + grid_x_index;
+ unsigned int responsible_anchor =
+ find_responsible_anchors(bbox_w_label.getValue(batch, 0, object, 0) /
+ bbox_h_label.getValue(batch, 0, object, 0));
+
+ bbox_x_gt.setValue(batch, grid_index, responsible_anchor, 0,
+ bbox_x_label.getValue(batch, 0, object, 0) -
+ grid_x_index);
+ bbox_y_gt.setValue(batch, grid_index, responsible_anchor, 0,
+ bbox_y_label.getValue(batch, 0, object, 0) -
+ grid_y_index);
+ bbox_w_gt.setValue(
+ batch, grid_index, responsible_anchor, 0,
+ nntrainer::sqrtFloat(bbox_w_label.getValue(batch, 0, object, 0)));
+ bbox_h_gt.setValue(
+ batch, grid_index, responsible_anchor, 0,
+ nntrainer::sqrtFloat(bbox_h_label.getValue(batch, 0, object, 0)));
+
+ class_gt.setValue(batch, grid_index, responsible_anchor,
+ class_label.getValue(batch, 0, object, 0), 1);
+ bbox_class_mask.setValue(batch, grid_index, responsible_anchor, 0, 1);
+ iou_mask.setValue(batch, grid_index, responsible_anchor, 0, 1);
+ }
+ }
+
+ nntrainer::Tensor iou = calc_iou(
+ bbox_x_pred, bbox_y_pred, bbox_w_pred_anchor, bbox_h_pred_anchor, bbox_x_gt,
+ bbox_y_gt, bbox_w_gt, bbox_h_gt, bbox1_width, bbox1_height, is_xy_min_max,
+ intersection_width, intersection_height, unions);
+ confidence_gt.copyData(iou);
+}
+
+#ifdef PLUGGABLE
+
+nntrainer::Layer *create_yolo_v2_loss_layer() {
+ auto layer = new YoloV2LossLayer();
+ return layer;
+}
+
+void destory_yolo_v2_loss_layer(nntrainer::Layer *layer) { delete layer; }
+
+/**
+ * @note ml_train_layer_pluggable defines the entry point for nntrainer to
+ * register a plugin layer
+ */
+extern "C" {
+nntrainer::LayerPluggable ml_train_layer_pluggable{create_yolo_v2_loss_layer,
+ destory_yolo_v2_loss_layer};
+}
+
+#endif
+} // namespace custom
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file yolo_v2_loss.h
+ * @date 07 March 2023
+ * @brief This file contains the yolo v2 loss layer
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+#ifndef __YOLO_V2_LOSS_LAYER_H__
+#define __YOLO_V2_LOSS_LAYER_H__
+
+#include <string>
+
+#include <acti_func.h>
+#include <base_properties.h>
+#include <layer_context.h>
+#include <layer_devel.h>
+#include <node_exporter.h>
+
+namespace custom {
+
+namespace props {
+
+/**
+ * @brief maximum object number in 1 image for given dataset
+ *
+ */
+class MaxObjectNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+ MaxObjectNumber(const unsigned &value = 1);
+ static constexpr const char *key = "max_object_number";
+ using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief class number for given dataset
+ *
+ */
+class ClassNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+ ClassNumber(const unsigned &value = 1);
+ static constexpr const char *key = "class_number";
+ using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief grid height number
+ *
+ */
+class GridHeightNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+ GridHeightNumber(const unsigned &value = 1);
+ static constexpr const char *key = "grid_height_number";
+ using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief grid width number
+ *
+ */
+class GridWidthNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+ GridWidthNumber(const unsigned &value = 1);
+ static constexpr const char *key = "grid_width_number";
+ using prop_tag = nntrainer::uint_prop_tag;
+};
+
+} // namespace props
+
+/**
+ * @brief Yolo V2 loss layer
+ *
+ */
+class YoloV2LossLayer final : public nntrainer::Layer {
+public:
+ /**
+ * @brief Construct a new YoloV2Loss Layer object
+ *
+ */
+ YoloV2LossLayer();
+
+ /**
+ * @brief Destroy the YoloV2Loss Layer object
+ *
+ */
+ ~YoloV2LossLayer() {}
+
+ /**
+ * @copydoc Layer::finalize(InitLayerContext &context)
+ */
+ void finalize(nntrainer::InitLayerContext &context) override;
+
+ /**
+ * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+ */
+ void forwarding(nntrainer::RunLayerContext &context, bool training) override;
+
+ /**
+ * @copydoc Layer::calcDerivative(RunLayerContext &context)
+ */
+ void calcDerivative(nntrainer::RunLayerContext &context) override;
+
+ /**
+ * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
+ */
+ void exportTo(nntrainer::Exporter &exporter,
+ const ml::train::ExportMethods &method) const override;
+
+ /**
+ * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+ */
+ void setProperty(const std::vector<std::string> &values) override;
+
+ /**
+ * @copydoc Layer::setBatch(RunLayerContext &context, unsigned int batch)
+ */
+ void setBatch(nntrainer::RunLayerContext &context,
+ unsigned int batch) override;
+
+ /**
+ * @copydoc bool supportBackwarding() const
+ */
+ bool supportBackwarding() const override { return true; };
+
+ /**
+ * @copydoc Layer::requireLabel()
+ */
+ bool requireLabel() const { return true; }
+
+ /**
+ * @copydoc Layer::getType()
+ */
+ const std::string getType() const override { return YoloV2LossLayer::type; };
+
+ inline static const std::string type = "yolo_v2_loss";
+
+private:
+ static constexpr unsigned int NUM_ANCHOR = 5;
+ const float anchors_w_buf[NUM_ANCHOR] = {1.3221, 3.19275, 5.05587, 9.47112,
+ 11.2364};
+ const float anchors_h_buf[NUM_ANCHOR] = {1.73145, 4.00944, 8.09892, 4.84053,
+ 10.0071};
+ const nntrainer::Tensor anchors_w;
+ const nntrainer::Tensor anchors_h;
+ nntrainer::Tensor anchors_ratio;
+
+ nntrainer::ActiFunc sigmoid; /** sigmoid activation operation */
+ nntrainer::ActiFunc softmax; /** softmax activation operation */
+
+ std::tuple<props::MaxObjectNumber, props::ClassNumber,
+ props::GridHeightNumber, props::GridWidthNumber>
+ yolo_v2_loss_props;
+ std::array<unsigned int, 22> wt_idx; /**< indices of the weights */
+
+ /**
+ * @brief find responsible anchors per object
+ */
+ unsigned int find_responsible_anchors(float bbox_ratio);
+
+ /**
+ * @brief generate ground truth, mask from labels
+ */
+ void generate_ground_truth(nntrainer::RunLayerContext &context);
+};
+
+} // namespace custom
+
+#endif /* __YOLO_V2_LOSS_LAYER_H__ */
endif
subdir('VGG/jni')
subdir('Resnet/jni')
-subdir('YOLO/jni')
+subdir('YOLOv2/jni')
subdir('YOLOv3/jni')
subdir('LLaMA/jni')
subdir('Multi_input/jni')