[TTVD] Add support for AI zoom during video capture 40/315640/5
authorJakub Gajownik <j.gajownik2@samsung.com>
Wed, 22 May 2024 16:27:35 +0000 (18:27 +0200)
committerBot Blink <blinkbot@samsung.com>
Mon, 5 Aug 2024 18:29:49 +0000 (18:29 +0000)
This patch implements in-device feature of face detection.
It makes detected scene cropped according to the region
of detected face.

Whole feature is within video capture component, without
any changes to other components or pipeline. Video capture
device returns cropped video frames, so it's seamless
integrated in existing upstream Chromium pipeline.

It's supported only on several boards, other might request
this feature, but it'll be ignored, as it heavily bases
on NPU features.

Bug: https://jira-eu.sec.samsung.net/browse/VDGAME-505
Change-Id: I9485cb3265deb081ab3c68f94aaa2b9184386518
Signed-off-by: Jakub Gajownik <j.gajownik2@samsung.com>
14 files changed:
media/capture/BUILD.gn
media/capture/video/tizen/ai_zoom_helper.cc [new file with mode: 0644]
media/capture/video/tizen/ai_zoom_helper.h [new file with mode: 0644]
media/capture/video/tizen/create_face_detection_library.cc [new file with mode: 0644]
media/capture/video/tizen/create_face_detection_library.h [new file with mode: 0644]
media/capture/video/tizen/face_detection.h [new file with mode: 0644]
media/capture/video/tizen/face_detection_library.h [new file with mode: 0644]
media/capture/video/tizen/face_detection_library_impl.cc [new file with mode: 0644]
media/capture/video/tizen/face_detection_library_impl.h [new file with mode: 0644]
media/capture/video/tizen/face_detection_library_object.cc [new file with mode: 0644]
media/capture/video/tizen/face_detection_library_object.h [new file with mode: 0644]
media/capture/video/tizen/video_capture_device_tizen_tv.cc
third_party/blink/renderer/modules/mediastream/media_stream_constraints_util_video_device.cc
tizen_src/build/BUILD.gn

index 6718b9acaf3105af32a4813486af135c4b31b441..c2183ffacc8bf17328f3f3bc19235aa2a018c2ba 100644 (file)
@@ -186,7 +186,10 @@ component("capture_lib") {
   }
 
   if (tizen_tv_upstream_multimedia) {
-    configs += [ ":capture_logger_config" ]
+    configs += [
+      ":capture_logger_config",
+    ]
+
     sources += [
       "video/tizen/gpu_memory_buffer_tracker_tizen.cc",
       "video/tizen/gpu_memory_buffer_tracker_tizen.h",
@@ -194,16 +197,43 @@ component("capture_lib") {
 
     if (tizen_version > 60) {
       sources += [
-        "video/tizen/camera.cc",
-        "video/tizen/camera.h",
+        "video/tizen/ai_zoom_helper.cc",
+        "video/tizen/ai_zoom_helper.h",
         "video/tizen/camera_utils.cc",
         "video/tizen/camera_utils.h",
+        "video/tizen/camera.cc",
+        "video/tizen/camera.h",
+        "video/tizen/create_face_detection_library.cc",
+        "video/tizen/create_face_detection_library.h",
+        "video/tizen/face_detection_library.h",
+        "video/tizen/face_detection.h",
         "video/tizen/video_capture_device_factory_tizen_tv.cc",
         "video/tizen/video_capture_device_factory_tizen_tv.h",
         "video/tizen/video_capture_device_tizen_tv.cc",
         "video/tizen/video_capture_device_tizen_tv.h",
       ]
     }
+
+    if (tizen_version >= 70) {
+      sources += [
+        "video/tizen/face_detection_library_object.cc",
+        "video/tizen/face_detection_library_object.h",
+      ]
+      if (!defined(public_configs)) {
+        public_configs = []
+      }
+      public_configs += [
+        # For AIFW
+        "//tizen_src/build:libauto_zoom",
+        # For missing shared library from AIFW
+        "//tizen_src/build:aifw_object_detection",
+      ]
+    } else {
+      sources += [
+        "video/tizen/face_detection_library_impl.cc",
+        "video/tizen/face_detection_library_impl.h",
+      ]
+    }
   }
 
   if (is_android) {
diff --git a/media/capture/video/tizen/ai_zoom_helper.cc b/media/capture/video/tizen/ai_zoom_helper.cc
new file mode 100644 (file)
index 0000000..fd16578
--- /dev/null
@@ -0,0 +1,321 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/capture/video/tizen/ai_zoom_helper.h"
+
+#include "media/base/tizen/logger/media_logger.h"
+#include "media/base/video_frame.h"
+#include "media/capture/video/tizen/create_face_detection_library.h"
+#include "third_party/libyuv/include/libyuv.h"
+
+namespace media {
+
+namespace {
+constexpr static const float kDefaultMoveSpeed = 0.05f;
+constexpr static const float kDefaultSizeSpeed = 0.05f;
+
+gfx::RectF EnlargeToSize(const gfx::RectF& rect, const gfx::SizeF& size) {
+  float new_width = std::max(rect.width(), size.width());
+  float new_height = std::max(rect.height(), size.height());
+  float new_x = rect.x() + (rect.width() - new_width) / 2;
+  float new_y = rect.y() + (rect.height() - new_height) / 2;
+  return gfx::RectF(new_x, new_y, new_width, new_height);
+}
+
+gfx::RectF RectFromBorders(float left, float top, float right, float bottom) {
+  return gfx::RectF{left, top, right - left, bottom - top};
+}
+
+float Interpolate(float start_value, float end_value, double progress) {
+  return start_value + (end_value - start_value) * progress;
+}
+}  // namespace
+
+std::unique_ptr<AiZoomHelper> AiZoomHelper::Create(
+    ResultCb result_cb,
+    base::RepeatingClosure error_cb) {
+  auto result = std::unique_ptr<AiZoomHelper>(
+      new AiZoomHelper(std::move(result_cb), std::move(error_cb)));
+  if (!result->Initialize()) {
+    TIZEN_MEDIA_LOG_NO_INSTANCE(ERROR) << "Error initializing AI zoom helper";
+    return nullptr;
+  }
+  return result;
+}
+
+AiZoomHelper::AiZoomHelper(ResultCb result_cb, base::RepeatingClosure error_cb)
+    : interpolator_(kDefaultMoveSpeed, kDefaultSizeSpeed),
+      result_cb_(std::move(result_cb)),
+      error_cb_(std::move(error_cb)) {}
+
+bool AiZoomHelper::Initialize() {
+  face_detection_ = CreateFaceDetectionLibrary();
+  if (!face_detection_) {
+    TIZEN_MEDIA_LOG(ERROR) << "Error when creating face detector";
+    return false;
+  }
+
+  const gfx::Size image_size = face_detection_->InputSize();
+
+  scaled_y_.resize(
+      VideoFrame::PlaneSize(PIXEL_FORMAT_NV12, VideoFrame::kYPlane, image_size)
+          .GetArea());
+  scaled_uv_.resize(
+      VideoFrame::PlaneSize(PIXEL_FORMAT_NV12, VideoFrame::kUVPlane, image_size)
+          .GetArea());
+  bgr_.resize(VideoFrame::AllocationSize(PIXEL_FORMAT_RGB24, image_size));
+  return true;
+}
+
+void AiZoomHelper::AnalyzeNV12(const gfx::Size& image_size,
+                               gfx::GpuMemoryBuffer* gpu_memory_buffer) {
+  const gfx::Size target_size = face_detection_->InputSize();
+  libyuv::NV12Scale(reinterpret_cast<const uint8_t*>(
+                        gpu_memory_buffer->memory(VideoFrame::kYPlane)),
+                    gpu_memory_buffer->stride(0),
+                    reinterpret_cast<const uint8_t*>(
+                        gpu_memory_buffer->memory(VideoFrame::kUVPlane)),
+                    gpu_memory_buffer->stride(1), image_size.width(),
+                    image_size.height(), scaled_y_.data(), target_size.width(),
+                    scaled_uv_.data(), target_size.width(), target_size.width(),
+                    target_size.height(), libyuv::kFilterLinear);
+
+  libyuv::NV12ToRAW(
+      scaled_y_.data(),
+      VideoFrame::RowBytes(VideoFrame::kYPlane, PIXEL_FORMAT_NV12,
+                           target_size.width()),
+      scaled_uv_.data(),
+      VideoFrame::RowBytes(VideoFrame::kUVPlane, PIXEL_FORMAT_NV12,
+                           target_size.width()),
+      bgr_.data(),
+      VideoFrame::RowBytes(VideoFrame::kARGBPlane, PIXEL_FORMAT_RGB24,
+                           target_size.width()),
+      target_size.width(), target_size.height());
+
+  Analyze();
+}
+
+void AiZoomHelper::AnalyzeI420(const gfx::Size& image_size,
+                               const uint8_t* y_data,
+                               size_t y_stride,
+                               const uint8_t* u_data,
+                               size_t u_stride,
+                               const uint8_t* v_data,
+                               size_t v_stride) {
+  const gfx::Size scaled_size = face_detection_->InputSize();
+  uint8_t* dst_y = scaled_y_.data();
+  uint8_t* dst_u = scaled_uv_.data();
+  uint8_t* dst_v =
+      scaled_uv_.data() +
+      VideoFrame::PlaneSize(PIXEL_FORMAT_I420, VideoFrame::kUPlane, scaled_size)
+          .GetArea();
+
+  libyuv::I420Scale(y_data, y_stride, u_data, u_stride, v_data, v_stride,
+                    image_size.width(), image_size.height(), dst_y,
+                    VideoFrame::RowBytes(VideoFrame::kYPlane, PIXEL_FORMAT_I420,
+                                         scaled_size.width()),
+                    dst_u,
+                    VideoFrame::RowBytes(VideoFrame::kUPlane, PIXEL_FORMAT_I420,
+                                         scaled_size.width()),
+                    dst_v,
+                    VideoFrame::RowBytes(VideoFrame::kVPlane, PIXEL_FORMAT_I420,
+                                         scaled_size.width()),
+                    scaled_size.width(), scaled_size.height(),
+                    libyuv::kFilterLinear);
+
+  libyuv::I420ToRAW(
+      dst_y,
+      VideoFrame::RowBytes(VideoFrame::kYPlane, PIXEL_FORMAT_I420,
+                           scaled_size.width()),
+      dst_u,
+      VideoFrame::RowBytes(VideoFrame::kUPlane, PIXEL_FORMAT_I420,
+                           scaled_size.width()),
+      dst_v,
+      VideoFrame::RowBytes(VideoFrame::kVPlane, PIXEL_FORMAT_I420,
+                           scaled_size.width()),
+      bgr_.data(),
+      VideoFrame::RowBytes(VideoFrame::kARGBPlane, PIXEL_FORMAT_RGB24,
+                           scaled_size.width()),
+      scaled_size.width(), scaled_size.height());
+
+  Analyze();
+}
+
+void Interpolator::SetNewDestination(const gfx::RectF& destination) {
+  auto now = base::TimeTicks::Now();
+  auto current_rect = GetCurrent();
+
+  // Calculate duration.
+  auto duration_size =
+      std::abs(current_rect.width() - destination.width()) / size_speed_;
+  auto duration_x =
+      std::abs(current_rect.CenterPoint().x() - destination.CenterPoint().x()) /
+      origin_speed_;
+  auto duration_y =
+      std::abs(current_rect.CenterPoint().y() - destination.CenterPoint().y()) /
+      origin_speed_;
+  auto duration_move =
+      std::sqrt(duration_x * duration_x + duration_y * duration_y);
+  auto duration = base::Seconds(0.7f * duration_move + 0.3f * duration_size);
+
+  start_.time = now;
+  start_.rect = current_rect;
+  end_.time = now + duration;
+  end_.rect = destination;
+}
+
+gfx::RectF Interpolator::GetCurrent() {
+  auto now = base::TimeTicks::Now();
+  if (now >= end_.time) {
+    return end_.rect;
+  }
+
+  auto progress = (now - start_.time) / (end_.time - start_.time);
+  return RectFromBorders(
+      Interpolate(start_.rect.x(), end_.rect.x(), progress),
+      Interpolate(start_.rect.y(), end_.rect.y(), progress),
+      Interpolate(start_.rect.right(), end_.rect.right(), progress),
+      Interpolate(start_.rect.bottom(), end_.rect.bottom(), progress));
+}
+
+void AiZoomHelper::Analyze() {
+  std::vector<gfx::RectF> faces =
+      face_detection_->DetectFace(base::make_span(bgr_.data(), bgr_.size()));
+
+  auto maybe_new_crop = UpdateCrop(faces);
+  if (maybe_new_crop) {
+    TIZEN_MEDIA_LOG(VERBOSE)
+        << "New target destination: " << maybe_new_crop->ToString();
+    interpolator_.SetNewDestination(*maybe_new_crop);
+  }
+
+  auto interpolated = interpolator_.GetCurrent();
+  result_cb_.Run(interpolated);
+}
+
+namespace {
+float CalculateIntersectionOverUnion(const gfx::RectF& first,
+                                     const gfx::RectF& second) {
+  auto intersection_rect = first;
+  intersection_rect.Intersect(second);
+  auto union_rect = first;
+  union_rect.Union(second);
+  return intersection_rect.size().GetArea() / union_rect.size().GetArea();
+}
+
+gfx::RectF UnionRectsWithThreshold(const std::vector<gfx::RectF>& rects,
+                                   float threshold) {
+  gfx::RectF result;
+  for (const auto rect : rects) {
+    if (rect.size().GetArea() <= threshold) {
+      continue;
+    }
+
+    result.Union(rect);
+  }
+  return result;
+}
+
+gfx::RectF ApplyCropUsingRegion(gfx::RectF region) {
+  constexpr const float kMinCropRatio = 0.75f;
+  constexpr const float kVerticalPaddingRatio = 0.75f;
+
+  // Step 1: Outset with margin.
+  float margin = kVerticalPaddingRatio * region.height();
+  region.Outset(gfx::OutsetsF::VH(margin, 0.0f));
+  region.AdjustToFit(gfx::RectF(1.0f, 1.0f));
+
+  // Step 2: Apply minimal size.
+  const auto desired_size =
+      std::max(kMinCropRatio, std::max(region.width(), region.height()));
+  region = EnlargeToSize(region, gfx::SizeF(desired_size, desired_size));
+
+  // Step 3: Move region so it fits in (0.0f, 0.0f, 1.0f, 1.0f).
+  region.set_x(std::clamp(region.x(), 0.0f, 1.0f - region.width()));
+  region.set_y(std::clamp(region.y(), 0.0f, 1.0f - region.height()));
+
+  // Step 4: Apply minimal size on border
+  const auto border_desired_size =
+      std::max(std::max(region.width(), region.height()), kMinCropRatio);
+  if (region.width() != border_desired_size) {
+    if (region.x() == 0.0f) {
+      region.set_width(border_desired_size);
+    } else if (region.right() == 1.0f) {
+      region.set_x(1.0f - border_desired_size);
+      region.set_width(border_desired_size);
+    }
+  }
+  if (region.height() != border_desired_size) {
+    if (region.y() == 0.0f) {
+      region.set_height(border_desired_size);
+    } else if (region.bottom() == 1.0f) {
+      region.set_y(1.0f - border_desired_size);
+      region.set_height(border_desired_size);
+    }
+  }
+
+  return region;
+}
+}  // namespace
+
+absl::optional<gfx::RectF> AiZoomHelper::UpdateCrop(
+    const std::vector<gfx::RectF>& faces) {
+  // Step 1: Merge faces into single.
+  gfx::RectF super_face = UnionRectsWithThreshold(faces, 0.0225f);
+  if (super_face.IsEmpty()) {
+    missing_frames_++;
+  } else {
+    missing_frames_ = 0;
+  }
+
+  constexpr const size_t kFramesToDetectInactivity = 32;
+  if (missing_frames_ == kFramesToDetectInactivity) {
+    // Reset crop after inactivity period.
+    destination_face_ = gfx::RectF(1.0f, 1.0f);
+    return gfx::RectF(1.0f, 1.0f);
+  } else if (missing_frames_ != 0) {
+    // No new information, skip updating.
+    return absl::nullopt;
+  }
+
+  // Step 2: Prevent from updating crop immediately when there was only
+  //         small change.
+  const auto percent_intersection =
+      CalculateIntersectionOverUnion(destination_face_, super_face);
+  if (percent_intersection >= 0.5f) {
+    TIZEN_MEDIA_LOG(VERBOSE) << "Intersection too big, ROI has not changed";
+    return absl::nullopt;
+  }
+
+  const bool moved_x = std::abs(super_face.CenterPoint().x() -
+                                last_super_face_.CenterPoint().x()) >= 0.2f;
+  const bool moved_y = std::abs(super_face.CenterPoint().y() -
+                                last_super_face_.CenterPoint().y()) >= 0.01f;
+  if (!moved_x && !moved_y) {
+    TIZEN_MEDIA_LOG(VERBOSE) << "Movement not enough";
+    return absl::nullopt;
+  }
+
+  const auto percent_intersection_to_last =
+      CalculateIntersectionOverUnion(last_super_face_, super_face);
+  if (similar_frames_ != 0 || percent_intersection_to_last < 0.5f) {
+    similar_frames_++;
+  } else {
+    last_super_face_ = super_face;
+    similar_frames_ = 1;
+  }
+
+  if (similar_frames_ <= 8) {
+    TIZEN_MEDIA_LOG(VERBOSE) << "Similar frame";
+    return absl::nullopt;
+  }
+
+  similar_frames_ = 0;
+  destination_face_ = super_face;
+
+  // Step 3: Enlarge region.
+  return ApplyCropUsingRegion(super_face);
+}
+
+}  // namespace media
diff --git a/media/capture/video/tizen/ai_zoom_helper.h b/media/capture/video/tizen/ai_zoom_helper.h
new file mode 100644 (file)
index 0000000..fef02d2
--- /dev/null
@@ -0,0 +1,92 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_CAPTURE_VIDEO_TIZEN_AI_ZOOM_HELPER_H_
+#define MEDIA_CAPTURE_VIDEO_TIZEN_AI_ZOOM_HELPER_H_
+
+#include <memory>
+#include <vector>
+
+#include "base/functional/callback.h"
+#include "base/time/time.h"
+#include "media/capture/video/tizen/face_detection_library.h"
+#include "ui/gfx/geometry/rect_f.h"
+#include "ui/gfx/gpu_memory_buffer.h"
+
+namespace media {
+
+class Interpolator {
+ public:
+  Interpolator(float origin_speed, float size_speed)
+      : origin_speed_(origin_speed), size_speed_(size_speed) {}
+
+  void SetNewDestination(const gfx::RectF& destination);
+  gfx::RectF GetCurrent();
+
+ private:
+  const float origin_speed_;
+  const float size_speed_;
+
+  struct Keyframe {
+    base::TimeTicks time;
+    gfx::RectF rect{1.0f, 1.0f};
+  };
+
+  Keyframe start_;
+  Keyframe end_;
+};
+
+class AiZoomHelper {
+ public:
+  ~AiZoomHelper() = default;
+
+  AiZoomHelper(const AiZoomHelper&) = delete;
+  AiZoomHelper& operator=(const AiZoomHelper&) = delete;
+
+  AiZoomHelper(AiZoomHelper&&) = delete;
+  AiZoomHelper& operator=(AiZoomHelper&&) = delete;
+
+  using ResultCb = base::RepeatingCallback<void(gfx::RectF)>;
+  static std::unique_ptr<AiZoomHelper> Create(ResultCb result_cb,
+                                              base::RepeatingClosure error_cb);
+
+  void AnalyzeNV12(const gfx::Size& image_size,
+                   gfx::GpuMemoryBuffer* gpu_memory_buffer);
+  void AnalyzeI420(const gfx::Size& image_size,
+                   const uint8_t* y_data,
+                   size_t y_stride,
+                   const uint8_t* u_data,
+                   size_t u_stride,
+                   const uint8_t* v_data,
+                   size_t v_stride);
+
+ private:
+  AiZoomHelper(ResultCb result_cb, base::RepeatingClosure error_cb);
+
+  bool Initialize();
+
+  void Analyze();
+
+  absl::optional<gfx::RectF> UpdateCrop(const std::vector<gfx::RectF>& faces);
+
+  std::unique_ptr<FaceDetectionLibrary> face_detection_;
+
+  Interpolator interpolator_;
+
+  std::vector<uint8_t> scaled_y_;
+  std::vector<uint8_t> scaled_uv_;
+  std::vector<uint8_t> bgr_;
+
+  size_t missing_frames_ = 0;
+  size_t similar_frames_ = 0;
+  gfx::RectF last_super_face_{1.0f, 1.0f};
+  gfx::RectF destination_face_{1.0f, 1.0f};
+
+  ResultCb result_cb_;
+  base::RepeatingClosure error_cb_;
+};
+
+}  // namespace media
+
+#endif  // MEDIA_CAPTURE_VIDEO_TIZEN_AI_ZOOM_HELPER_H_
diff --git a/media/capture/video/tizen/create_face_detection_library.cc b/media/capture/video/tizen/create_face_detection_library.cc
new file mode 100644 (file)
index 0000000..aa07d32
--- /dev/null
@@ -0,0 +1,24 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/capture/video/tizen/create_face_detection_library.h"
+#include "build/tizen_version.h"
+
+#if TIZEN_VERSION_AT_LEAST(7, 0, 0)
+#include "media/capture/video/tizen/face_detection_library_object.h"
+#else
+#include "media/capture/video/tizen/face_detection_library_impl.h"
+#endif
+
+namespace media {
+
+std::unique_ptr<FaceDetectionLibrary> CreateFaceDetectionLibrary() {
+#if TIZEN_VERSION_AT_LEAST(7, 0, 0)
+  return FaceDetectionLibraryObject::Create();
+#else
+  return FaceDetectionLibraryImpl::Create();
+#endif
+}
+
+}  // namespace media
diff --git a/media/capture/video/tizen/create_face_detection_library.h b/media/capture/video/tizen/create_face_detection_library.h
new file mode 100644 (file)
index 0000000..e97077c
--- /dev/null
@@ -0,0 +1,18 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_CAPTURE_VIDEO_TIZEN_CREATE_FACE_DETECTION_LIBRARY_H_
+#define MEDIA_CAPTURE_VIDEO_TIZEN_CREATE_FACE_DETECTION_LIBRARY_H_
+
+#include <memory>
+
+#include "media/capture/video/tizen/face_detection_library.h"
+
+namespace media {
+
+std::unique_ptr<FaceDetectionLibrary> CreateFaceDetectionLibrary();
+
+}  // namespace media
+
+#endif  // MEDIA_CAPTURE_VIDEO_TIZEN_CREATE_FACE_DETECTION_LIBRARY_H_
diff --git a/media/capture/video/tizen/face_detection.h b/media/capture/video/tizen/face_detection.h
new file mode 100644 (file)
index 0000000..832340b
--- /dev/null
@@ -0,0 +1,48 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_H_
+#define MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_H_
+
+// Note this file reflects to platform library load as shared library, so no
+// changes should be done here!
+
+namespace media {
+
+#define MAX_NUM_FACE 1000
+#define NUM_LANDMARK 5
+
+struct FaceLandmark {
+  float x;
+  float y;
+};
+
+struct FaceRect {
+  float score;                          // confidence score
+  float x;                              // coordinate x
+  float y;                              // coordinate y */
+  float width;                          // width of face box
+  float height;                         // height of face box
+  FaceLandmark landmark[NUM_LANDMARK];  // landmark point of face
+};
+
+struct FaceDetected {
+  FaceRect face[MAX_NUM_FACE];  // detail infos of detected faces
+  int num_face;                 // number of detected face
+};
+
+struct ModelInput {
+  const unsigned char* data;  // input image data
+};
+
+void DetectFace(ModelInput* in, bool with_landmark, FaceDetected* res);
+bool LoadFaceModel(const char* model_path,
+                   int width,
+                   int height,
+                   double confidence_threshold);
+void ReleaseFaceModel();
+
+}  // namespace media
+
+#endif  // MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_H_
\ No newline at end of file
diff --git a/media/capture/video/tizen/face_detection_library.h b/media/capture/video/tizen/face_detection_library.h
new file mode 100644 (file)
index 0000000..09bcb7a
--- /dev/null
@@ -0,0 +1,27 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_H_
+#define MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_H_
+
+#include <vector>
+
+#include "base/containers/span.h"
+#include "ui/gfx/geometry/rect_f.h"
+#include "ui/gfx/geometry/size.h"
+
+namespace media {
+
+class FaceDetectionLibrary {
+ public:
+  FaceDetectionLibrary() = default;
+  virtual ~FaceDetectionLibrary() = default;
+
+  virtual gfx::Size InputSize() const = 0;
+  virtual std::vector<gfx::RectF> DetectFace(base::span<uint8_t> data) = 0;
+};
+
+}  // namespace media
+
+#endif  // MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_H_
diff --git a/media/capture/video/tizen/face_detection_library_impl.cc b/media/capture/video/tizen/face_detection_library_impl.cc
new file mode 100644 (file)
index 0000000..585e208
--- /dev/null
@@ -0,0 +1,160 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/capture/video/tizen/face_detection_library_impl.h"
+
+#include "base/files/file_util.h"
+#include "base/json/json_reader.h"
+#include "base/logging.h"
+#include "media/base/video_frame.h"
+#include "third_party/libyuv/include/libyuv/planar_functions.h"
+
+namespace media {
+
+namespace {
+constexpr static const char* kConfigFileLocation =
+    "/opt/usr/apps/com.samsung.tv.nvision-analysis-dbmgr/shared/res/"
+    "data_nvision_face_detector/config.dat";
+}
+
+std::unique_ptr<FaceDetectionLibraryImpl> FaceDetectionLibraryImpl::Create() {
+  constexpr const char* kLibFaceDetectionPath = "/usr/lib/libface-detection.so";
+  auto native_library =
+      base::ScopedNativeLibrary(base::FilePath(kLibFaceDetectionPath));
+  if (!native_library.is_valid()) {
+    LOG(INFO) << "Error when loading shared library: " << kLibFaceDetectionPath;
+    return nullptr;
+  }
+
+  auto result = std::unique_ptr<FaceDetectionLibraryImpl>(
+      new FaceDetectionLibraryImpl(std::move(native_library)));
+  if (!result->LoadSymbols()) {
+    LOG(ERROR) << "Error when loading symbols";
+    return nullptr;
+  }
+
+  if (!result->LoadConfig()) {
+    LOG(ERROR) << "Error when loading config";
+    return nullptr;
+  }
+
+  if (!result->load_face_model_func_(
+          result->model_path_.c_str(), result->input_size_.width(),
+          result->input_size_.height(), result->confidence_threashold_)) {
+    LOG(INFO) << "Error when loading face model";
+    return nullptr;
+  }
+
+  return result;
+}
+
+FaceDetectionLibraryImpl::FaceDetectionLibraryImpl(
+    base::ScopedNativeLibrary native_library)
+    : native_library_(std::move(native_library)) {}
+
+FaceDetectionLibraryImpl::~FaceDetectionLibraryImpl() {
+  if (release_face_model_func_) {
+    release_face_model_func_();
+  }
+}
+
+gfx::Size FaceDetectionLibraryImpl::InputSize() const {
+  return input_size_;
+}
+
+std::vector<gfx::RectF> FaceDetectionLibraryImpl::DetectFace(
+    base::span<uint8_t> data) {
+  ModelInput input;
+
+  std::vector<uint8_t> temp_buffer;
+  temp_buffer.resize(
+      VideoFrame::AllocationSize(PIXEL_FORMAT_RGB24, input_size_));
+
+  uint8_t* dst_r = temp_buffer.data();
+  uint8_t* dst_g = dst_r + input_size_.GetArea();
+  uint8_t* dst_b = dst_g + input_size_.GetArea();
+  libyuv::SplitRGBPlane(
+      data.data(),
+      VideoFrame::RowBytes(VideoFrame::kARGBPlane, PIXEL_FORMAT_RGB24,
+                           input_size_.width()),
+      dst_r, input_size_.width(), dst_g, input_size_.width(), dst_b,
+      input_size_.width(), input_size_.width(), input_size_.height());
+
+  input.data = temp_buffer.data();
+  FaceDetected result;
+  detect_face_func_(&input, false, &result);
+  LOG(INFO) << "Detected faces: " << result.num_face;
+
+  std::vector<gfx::RectF> faces;
+  faces.reserve(result.num_face);
+  for (auto i = 0; i < result.num_face; ++i) {
+    faces.emplace_back(result.face[i].x, result.face[i].y, result.face[i].width,
+                       result.face[i].height);
+  }
+  return faces;
+}
+
+bool FaceDetectionLibraryImpl::LoadConfig() {
+  std::string config_json;
+  if (!base::ReadFileToString(base::FilePath{kConfigFileLocation},
+                              &config_json)) {
+    return false;
+  }
+
+  auto config_value = base::JSONReader::Read(config_json);
+  if (!config_value || !config_value->is_dict()) {
+    return false;
+  }
+
+  auto config_dict = std::move(*config_value).TakeDict();
+
+  auto maybe_model_path = config_dict.Find("modelPath");
+  if (!maybe_model_path || !maybe_model_path->is_string()) {
+    return false;
+  }
+
+  auto maybe_width = config_dict.Find("inputWidth");
+  if (!maybe_width || !maybe_width->is_int()) {
+    return false;
+  }
+
+  auto maybe_height = config_dict.Find("inputHeight");
+  if (!maybe_height || !maybe_height->is_int()) {
+    return false;
+  }
+
+  auto maybe_confidence_threshold = config_dict.Find("confidenceThreshold");
+  if (!maybe_confidence_threshold || !maybe_confidence_threshold->is_double()) {
+    return false;
+  }
+
+  model_path_ = maybe_model_path->GetString();
+  input_size_ = gfx::Size(maybe_width->GetInt(), maybe_height->GetInt());
+  confidence_threashold_ = maybe_confidence_threshold->GetDouble();
+  return true;
+}
+
+bool FaceDetectionLibraryImpl::LoadSymbols() {
+  detect_face_func_ = reinterpret_cast<DetectFaceFunction>(
+      native_library_.GetFunctionPointer("detectFace"));
+  if (!detect_face_func_) {
+    LOG(ERROR) << "Error loading function: detectFace";
+    return false;
+  }
+  load_face_model_func_ = reinterpret_cast<LoadFaceModelFunction>(
+      native_library_.GetFunctionPointer("LoadFaceModel"));
+  if (!load_face_model_func_) {
+    LOG(ERROR) << "Error loading function: LoadFaceModel";
+    return false;
+  }
+  release_face_model_func_ = reinterpret_cast<ReleaseFaceModelFunction>(
+      native_library_.GetFunctionPointer("ReleaseFaceModel"));
+  if (!release_face_model_func_) {
+    LOG(ERROR) << "Error loading function: ReleaseFaceModel";
+    return false;
+  }
+  return true;
+}
+
+}  // namespace media
diff --git a/media/capture/video/tizen/face_detection_library_impl.h b/media/capture/video/tizen/face_detection_library_impl.h
new file mode 100644 (file)
index 0000000..a20a286
--- /dev/null
@@ -0,0 +1,49 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_IMPL_H_
+#define MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_IMPL_H_
+
+#include <memory>
+
+#include "base/scoped_native_library.h"
+#include "media/capture/video/tizen/face_detection.h"
+#include "media/capture/video/tizen/face_detection_library.h"
+
+namespace media {
+
+class FaceDetectionLibraryImpl : public FaceDetectionLibrary {
+ public:
+  ~FaceDetectionLibraryImpl() override;
+
+  // FaceDetectionLibrary implementation.
+  gfx::Size InputSize() const override;
+  std::vector<gfx::RectF> DetectFace(base::span<uint8_t> data) override;
+
+  static std::unique_ptr<FaceDetectionLibraryImpl> Create();
+
+ private:
+  explicit FaceDetectionLibraryImpl(base::ScopedNativeLibrary native_library);
+
+  bool LoadConfig();
+  bool LoadSymbols();
+
+  std::string model_path_;
+  gfx::Size input_size_;
+  double confidence_threashold_ = 0.0;
+
+  base::ScopedNativeLibrary native_library_;
+
+  using DetectFaceFunction = decltype(&media::DetectFace);
+  using LoadFaceModelFunction = decltype(&LoadFaceModel);
+  using ReleaseFaceModelFunction = decltype(&ReleaseFaceModel);
+
+  DetectFaceFunction detect_face_func_ = nullptr;
+  LoadFaceModelFunction load_face_model_func_ = nullptr;
+  ReleaseFaceModelFunction release_face_model_func_ = nullptr;
+};
+
+}  // namespace media
+
+#endif  // MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_IMPL_H_
diff --git a/media/capture/video/tizen/face_detection_library_object.cc b/media/capture/video/tizen/face_detection_library_object.cc
new file mode 100644 (file)
index 0000000..7ffa47e
--- /dev/null
@@ -0,0 +1,63 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/capture/video/tizen/face_detection_library_object.h"
+
+namespace media {
+
+namespace {
+constexpr static const size_t kDefaultWidth = 320;
+constexpr static const size_t kDefaultHeight = 320;
+}  // namespace
+
+std::unique_ptr<FaceDetectionLibraryObject>
+FaceDetectionLibraryObject::Create() {
+  auto result = std::unique_ptr<FaceDetectionLibraryObject>(
+      new FaceDetectionLibraryObject());
+  if (object_detection_create(&result->processor_) != AIFW_RESULT_SUCCESS) {
+    return nullptr;
+  }
+
+  if (object_detection_init(result->processor_, OBJECT_DETECTION_TYPE_FACE,
+                            OBJECT_DETECTION_TRACKING_MODE_NONE) !=
+      AIFW_RESULT_SUCCESS) {
+    return nullptr;
+  }
+  return result;
+}
+
+FaceDetectionLibraryObject::~FaceDetectionLibraryObject() {
+  object_detection_deinit(processor_);
+  object_detection_destroy(processor_);
+}
+
+gfx::Size FaceDetectionLibraryObject::InputSize() const {
+  return {kDefaultWidth, kDefaultHeight};
+}
+
+std::vector<gfx::RectF> FaceDetectionLibraryObject::DetectFace(
+    base::span<uint8_t> data) {
+  aifw_raw_image_s frame;
+  frame.pixel_format = IMAGE_FORMAT_BGR;
+  frame.width = kDefaultWidth;
+  frame.height = kDefaultHeight;
+  frame.buffer = data.data();
+
+  object_detection_info_s* outputs;
+  int outputs_num = 0;
+  if (object_detection_run(processor_, &frame, &outputs, &outputs_num) !=
+      AIFW_RESULT_SUCCESS) {
+    return {};
+  }
+
+  std::vector<gfx::RectF> faces;
+  faces.reserve(outputs_num);
+  for (auto i = 0; i < outputs_num; ++i) {
+    faces.emplace_back(outputs[i].bound.x, outputs[i].bound.y,
+                       outputs[i].bound.width, outputs[i].bound.height);
+  }
+  return faces;
+}
+
+}  // namespace media
diff --git a/media/capture/video/tizen/face_detection_library_object.h b/media/capture/video/tizen/face_detection_library_object.h
new file mode 100644 (file)
index 0000000..4dd007d
--- /dev/null
@@ -0,0 +1,34 @@
+// Copyright 2024 Samsung Electronics Inc. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_OBJECT_H_
+#define MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_OBJECT_H_
+
+#include <memory>
+
+#include <aifw_core/vision/object_detection.h>
+
+#include "media/capture/video/tizen/face_detection_library.h"
+
+namespace media {
+
+class FaceDetectionLibraryObject : public FaceDetectionLibrary {
+ public:
+  ~FaceDetectionLibraryObject() override;
+
+  // FaceDetectionLibrary implementation.
+  gfx::Size InputSize() const override;
+  std::vector<gfx::RectF> DetectFace(base::span<uint8_t> data) override;
+
+  static std::unique_ptr<FaceDetectionLibraryObject> Create();
+
+ private:
+  FaceDetectionLibraryObject() = default;
+
+  object_detection_h processor_;
+};
+
+}  // namespace media
+
+#endif  // MEDIA_CAPTURE_VIDEO_TIZEN_FACE_DETECTION_LIBRARY_OBJECT_H_
index 69e4e85ffbbb742d83863e8106370625ff3061b0..1c36fd21a946705b4d656dd8428571f4d03b74f8 100644 (file)
@@ -7,6 +7,7 @@
 #include <array>
 #include <memory>
 
+#include "base/bits.h"
 #include "base/functional/bind.h"
 #include "base/functional/callback_forward.h"
 #include "base/functional/callback_helpers.h"
@@ -28,6 +29,7 @@
 #include "media/base/video_transformation.h"
 #include "media/base/video_types.h"
 #include "media/capture/capture_switches.h"
+#include "media/capture/video/tizen/ai_zoom_helper.h"
 #include "media/capture/video/tizen/camera.h"
 #include "media/capture/video/tizen/camera_utils.h"
 #include "media/capture/video/video_capture_device.h"
@@ -179,6 +181,10 @@ camera_pixel_format_e SelectPreviewFormat(
   return formats.front();
 }
 
+int RoundDownToEven(float value) {
+  return base::bits::AlignDown(static_cast<int>(std::floor(value)), 2);
+}
+
 }  // namespace
 
 class VideoCaptureDeviceTizenTv::Impl {
@@ -212,6 +218,9 @@ class VideoCaptureDeviceTizenTv::Impl {
 
   bool PrepareVideoDecoder();
 
+  void OnAiZoomRect(gfx::RectF rect);
+  gfx::Rect CalculateCrop(const gfx::Size image_size);
+
   bool suspended_ = false;
 
   Camera* const camera_;
@@ -223,6 +232,9 @@ class VideoCaptureDeviceTizenTv::Impl {
   std::unique_ptr<TTvdVideoDecoderBase> decoder_;
   media::VideoRotation last_preview_rotation_{VIDEO_ROTATION_0};
 
+  gfx::RectF crop_;
+  std::unique_ptr<AiZoomHelper> ai_zoom_helper_;
+
   // The timestamp of the first frame received from platform API.
   absl::optional<base::TimeDelta> first_frame_timestamp_;
 
@@ -231,7 +243,7 @@ class VideoCaptureDeviceTizenTv::Impl {
 };
 
 VideoCaptureDeviceTizenTv::Impl::Impl(Camera* camera)
-    : camera_(camera), client_(nullptr) {
+    : camera_(camera), client_(nullptr), crop_(1.0f, 1.0f) {
   TIZEN_MEDIA_LOG(INFO);
   LOG_ASSERT(camera);
 
@@ -254,7 +266,8 @@ void VideoCaptureDeviceTizenTv::Impl::AllocateAndStart(
                         << ", buffer type: "
                         << static_cast<int>(params.buffer_type)
                         << ", GPU memory buffer: "
-                        << switches::IsVideoCaptureUseGpuMemoryBufferEnabled();
+                        << switches::IsVideoCaptureUseGpuMemoryBufferEnabled()
+                        << ", face detection: " << params.enable_face_detection;
 
   LOG_ASSERT(client);
   LOG_ASSERT(!client_);
@@ -276,6 +289,20 @@ void VideoCaptureDeviceTizenTv::Impl::AllocateAndStart(
   // For now mark it's started early. There's some problems with
   // initialization and camera device is being deallocated.
   client_->OnStarted();
+
+  if (params_.enable_face_detection) {
+    TIZEN_MEDIA_LOG(INFO) << "Enable AI Zoom";
+    ai_zoom_helper_ = AiZoomHelper::Create(
+        BindToCurrentLoop(
+            base::BindRepeating(&VideoCaptureDeviceTizenTv::Impl::OnAiZoomRect,
+                                weak_factory_.GetWeakPtr())),
+        base::BindRepeating(
+            []() { TIZEN_MEDIA_LOG_NO_INSTANCE(ERROR) << "AI zoom error!"; }));
+    if (!ai_zoom_helper_) {
+      TIZEN_MEDIA_LOG(ERROR) << "Cannot create AI zoom";
+      return;
+    }
+  }
 }
 
 DeviceCaptureError VideoCaptureDeviceTizenTv::Impl::ConfigureCamera() {
@@ -511,6 +538,9 @@ void VideoCaptureDeviceTizenTv::Impl::ProcessRawCameraCapture(
 
   const gfx::Size image_size{frame->width, frame->height};
 
+  const gfx::Rect cropped = CalculateCrop(image_size);
+  TIZEN_MEDIA_LOG(VERBOSE) << "Cropped: " << cropped.ToString();
+
   const uint8_t* data = frame->data.single_plane.yuv;
   const size_t size = frame->data.single_plane.size;
   auto current_frame_time = base::Milliseconds(frame->timestamp);
@@ -561,6 +591,13 @@ void VideoCaptureDeviceTizenTv::Impl::ProcessRawCameraCapture(
     return;
   }
 
+  if (ai_zoom_helper_) {
+    ai_zoom_helper_->AnalyzeI420(
+        image_size, i420_access.y_plane_data, i420_access.y_plane_stride,
+        i420_access.u_plane_data, i420_access.uv_plane_stride,
+        i420_access.v_plane_data, i420_access.uv_plane_stride);
+  }
+
   const VideoCaptureFormat output_format = VideoCaptureFormat(
       dimensions, capture_format.frame_rate, PIXEL_FORMAT_I420);
 
@@ -575,8 +612,8 @@ void VideoCaptureDeviceTizenTv::Impl::ProcessRawCameraCapture(
   client_->OnIncomingCapturedBufferExt(
       std::move(buffer), output_format, gfx::ColorSpace(),
       base::TimeTicks::Now(),
-      current_frame_time - first_frame_timestamp_.value(),
-      gfx::Rect(image_size), std::move(metadata));
+      current_frame_time - first_frame_timestamp_.value(), cropped,
+      std::move(metadata));
 }
 
 void VideoCaptureDeviceTizenTv::Impl::OnDecoderInitialized(
@@ -715,6 +752,9 @@ void VideoCaptureDeviceTizenTv::Impl::OnDecodedFrame(RawFrame frame) {
     }
   }
 
+  const gfx::Rect cropped = CalculateCrop(frame.image_size);
+  TIZEN_MEDIA_LOG(VERBOSE) << "Cropped: " << cropped.ToString();
+
   params_.requested_format.pixel_format = pixel_format;
   if (params_.requested_format.frame_size != frame.image_size) {
     TIZEN_MEDIA_LOG(INFO) << "Captured different frame size: "
@@ -723,6 +763,29 @@ void VideoCaptureDeviceTizenTv::Impl::OnDecodedFrame(RawFrame frame) {
     params_.requested_format.frame_size = frame.image_size;
   }
 
+  if (ai_zoom_helper_) {
+    auto gpu_memory_buffer =
+        gpu_memory_buffer_support_.CreateGpuMemoryBufferImplFromHandle(
+            buffer.handle_provider->GetGpuMemoryBufferHandle(),
+            frame.image_size, gfx::BufferFormat::YUV_420_BIPLANAR,
+            gfx::BufferUsage::SCANOUT_VEA_CPU_READ, base::DoNothing());
+    if (!gpu_memory_buffer) {
+      TIZEN_MEDIA_LOG(ERROR) << "Invalid gpu memory buffer, drop";
+      client_->OnFrameDropped(
+          VideoCaptureFrameDropReason::kBufferPoolBufferAllocationFailed);
+      return;
+    }
+
+    if (!gpu_memory_buffer->Map()) {
+      TIZEN_MEDIA_LOG(ERROR) << "Cannot map gpu memory buffer, drop";
+      client_->OnFrameDropped(
+          VideoCaptureFrameDropReason::kBufferPoolBufferAllocationFailed);
+      return;
+    }
+    ai_zoom_helper_->AnalyzeNV12(frame.image_size, gpu_memory_buffer.get());
+    gpu_memory_buffer->Unmap();
+  }
+
   VideoFrameMetadata metadata;
   metadata.transformation = GetVideoRotation();
 
@@ -734,7 +797,23 @@ void VideoCaptureDeviceTizenTv::Impl::OnDecodedFrame(RawFrame frame) {
   client_->OnIncomingCapturedBufferExt(
       std::move(buffer), params_.requested_format, gfx::ColorSpace(),
       base::TimeTicks::Now(), frame.timestamp - first_frame_timestamp_.value(),
-      gfx::Rect(frame.image_size), std::move(metadata));
+      cropped, std::move(metadata));
+}
+
+void VideoCaptureDeviceTizenTv::Impl::OnAiZoomRect(gfx::RectF rect) {
+  crop_ = rect;
+}
+
+gfx::Rect VideoCaptureDeviceTizenTv::Impl::CalculateCrop(
+    const gfx::Size image_size) {
+  // Chromium struggles displaying 2x2 subsampled formats with pictures
+  // cropped "in the middle" of subsample. Because of that, round
+  // X and Y so both of them are multiplication of 2.
+  int x = RoundDownToEven(crop_.x() * image_size.width());
+  int y = RoundDownToEven(crop_.y() * image_size.height());
+  int width = RoundDownToEven(crop_.width() * image_size.width());
+  int height = RoundDownToEven(crop_.height() * image_size.height());
+  return gfx::Rect(x, y, width, height);
 }
 
 VideoCaptureDeviceTizenTv::VideoCaptureDeviceTizenTv(
index 5e5a6d6d2db223b8e2a5f974fcda2376e3088488..abb2dceb0c974893fdf144035deb02a3c769545c 100644 (file)
@@ -875,6 +875,10 @@ VideoCaptureSettings SelectSettingsVideoDeviceCapture(
           capture_params.requested_format = candidate_format.format();
           capture_params.ai_zoom_settings =
               SelectAiZoomSettings(constraints.Basic());
+          if (capture_params.ai_zoom_settings.target ==
+              media::TizenAiZoomSettings::Target::kFace) {
+            capture_params.enable_face_detection = true;
+          }
           result = VideoCaptureSettings(
               device.device_id.Utf8(), capture_params, noise_reduction,
               track_settings, candidate_format.constrained_frame_rate().Min(),
index f967b5f1d4d56f8f46786a10151cbd03adfff1bd..79ff40c87454c1cf7c9394c050148d2e3c18d882 100644 (file)
@@ -748,6 +748,10 @@ if (tizen_auto_zoom_enable) {
       "aifw-core",
     ]
   }
+
+  config("aifw_object_detection") {
+    libs = [ "aifw_core_vision_object_detection" ]
+  }
 }
 
 if (tizen_multimedia) {