Merge pull request #15290 from cudawarped:ffmpeg_raw_retrieve
authorcudawarped <bowleyj33@gmail.com>
Mon, 18 Nov 2019 14:07:06 +0000 (14:07 +0000)
committerAlexander Alekhin <alexander.a.alekhin@gmail.com>
Mon, 18 Nov 2019 14:07:06 +0000 (17:07 +0300)
Add retrieve encoded frame to VideoCapture

* Add capacity to retrieve the encoded frame from a VideoCapture object.

* Correct raw codec and pixle format output from ffmpeg capture.

* Remove warnings from build.

* Added VideoCaptureRaw subclass.

* Include abstract base class VideoCaptureBase and rename new subclass VideoContainer as suggested by mshabunin.

* Remove using.

* Change base class name for compatibility with jave bindings generator.

* Move grab and retrieve and add override specifier

* Add setRaw and readRaw to IVideoCapture interface
-setRaw to disable video decoding and enable bitstream filters from mp4 to h254 and h265.
-readRaw to return the raw undecoded/filtered bitstream.
Add createRawCapture to initiate a backend with setRaw enabled.
Remove inheritance and use an independant VideoContainer subclass with IVideoCapture member.

* Address unused parameter warings.
Remove VideoContainer from python bindings as it no longer returns a Mat.
Use opencv type uchar instead of unsigned char.
Add missing destructor to VideoContainer class.

* Address build warnings and include all params in documentation.

* Include deprecated bitstream filtering API.

* Update codec_id query to work with older ffmpeg api's.
Change api version defines to be consistent - most recent api version first.

* Fix typo.

* Update test to work with naming of new files in the extra repo

* Investigate test failure

* Check bytes read by ffmpeg

* Removed mp4 video container test

* Applied suggested changes.

* videoio: rework API for extraction of RAW video streams

- FFmpeg only

* address review comments

modules/videoio/include/opencv2/videoio.hpp
modules/videoio/src/cap_ffmpeg_impl.hpp
modules/videoio/test/test_ffmpeg.cpp

index 890e854..999643d 100644 (file)
@@ -137,7 +137,8 @@ enum VideoCaptureProperties {
        CAP_PROP_FPS            =5, //!< Frame rate.
        CAP_PROP_FOURCC         =6, //!< 4-character code of codec. see VideoWriter::fourcc .
        CAP_PROP_FRAME_COUNT    =7, //!< Number of frames in the video file.
-       CAP_PROP_FORMAT         =8, //!< Format of the %Mat objects returned by VideoCapture::retrieve().
+       CAP_PROP_FORMAT         =8, //!< Format of the %Mat objects (see Mat::type()) returned by VideoCapture::retrieve().
+                                   //!< Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1).
        CAP_PROP_MODE           =9, //!< Backend-specific value indicating the current capture mode.
        CAP_PROP_BRIGHTNESS    =10, //!< Brightness of the image (only for those cameras that support).
        CAP_PROP_CONTRAST      =11, //!< Contrast of the image (only for cameras).
@@ -174,6 +175,7 @@ enum VideoCaptureProperties {
        CAP_PROP_CHANNEL       =43, //!< Video input or Channel Number (only for those cameras that support)
        CAP_PROP_AUTO_WB       =44, //!< enable/ disable auto white-balance
        CAP_PROP_WB_TEMPERATURE=45, //!< white-balance color temperature
+       CAP_PROP_CODEC_PIXEL_FORMAT =46,    //!< (read-only) codec's pixel format. 4-character code - see VideoWriter::fourcc . Subset of [AV_PIX_FMT_*](https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/raw.c) or -1 if unknown
 #ifndef CV_DOXYGEN
        CV__CAP_PROP_LATEST
 #endif
index c6d2efd..1101c72 100644 (file)
@@ -531,6 +531,17 @@ struct CvCapture_FFMPEG
 #if USE_AV_INTERRUPT_CALLBACK
     AVInterruptCallbackMetadata interrupt_metadata;
 #endif
+
+    bool setRaw();
+    bool processRawPacket();
+    bool rawMode;
+    bool rawModeInitialized;
+    AVPacket packet_filtered;
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+    AVBSFContext* bsfc;
+ #else
+    AVBitStreamFilterContext* bsfc;
+#endif
 };
 
 void CvCapture_FFMPEG::init()
@@ -555,6 +566,12 @@ void CvCapture_FFMPEG::init()
 #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)
     dict = NULL;
 #endif
+
+    rawMode = false;
+    rawModeInitialized = false;
+    memset(&packet_filtered, 0, sizeof(packet_filtered));
+    av_init_packet(&packet_filtered);
+    bsfc = NULL;
 }
 
 
@@ -623,6 +640,21 @@ void CvCapture_FFMPEG::close()
        av_dict_free(&dict);
 #endif
 
+    if (packet_filtered.data)
+    {
+        _opencv_ffmpeg_av_packet_unref(&packet_filtered);
+        packet_filtered.data = NULL;
+    }
+
+    if (bsfc)
+    {
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+        av_bsf_free(&bsfc);
+#else
+        av_bitstream_filter_close(bsfc);
+#endif
+    }
+
     init();
 }
 
@@ -997,6 +1029,116 @@ exit_func:
     return valid;
 }
 
+bool CvCapture_FFMPEG::setRaw()
+{
+    if (!rawMode)
+    {
+        if (frame_number != 0)
+        {
+            CV_WARN("Incorrect usage: do not grab frames before .set(CAP_PROP_FORMAT, -1)");
+        }
+        // binary stream filter creation is moved into processRawPacket()
+        rawMode = true;
+    }
+    return true;
+}
+
+bool CvCapture_FFMPEG::processRawPacket()
+{
+    if (packet.data == NULL)  // EOF
+        return false;
+    if (!rawModeInitialized)
+    {
+        rawModeInitialized = true;
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+        AVCodecID eVideoCodec = ic->streams[video_stream]->codecpar->codec_id;
+#elif LIBAVFORMAT_BUILD > 4628
+        AVCodecID eVideoCodec = video_st->codec->codec_id;
+#else
+        AVCodecID eVideoCodec = video_st->codec.codec_id;
+#endif
+        const char* filterName = NULL;
+        if (eVideoCodec == CV_CODEC(CODEC_ID_H264) || eVideoCodec == CV_CODEC(CODEC_ID_H265))
+        {
+            // check start code prefixed mode (as defined in the Annex B H.264 / H.265 specification)
+            if (packet.size >= 5
+                 && !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 0 && packet.data[3] == 1)
+                 && !(packet.data[0] == 0 && packet.data[1] == 0 && packet.data[2] == 1)
+            )
+            {
+                filterName = eVideoCodec == CV_CODEC(CODEC_ID_H264) ? "h264_mp4toannexb" : "hevc_mp4toannexb";
+            }
+        }
+        if (filterName)
+        {
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+            const AVBitStreamFilter * bsf = av_bsf_get_by_name(filterName);
+            if (!bsf)
+            {
+                CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str());
+                return false;
+            }
+            int err = av_bsf_alloc(bsf, &bsfc);
+            if (err < 0)
+            {
+                CV_WARN("Error allocating context for bitstream buffer");
+                return false;
+            }
+            avcodec_parameters_copy(bsfc->par_in, ic->streams[video_stream]->codecpar);
+            err = av_bsf_init(bsfc);
+            if (err < 0)
+            {
+                CV_WARN("Error initializing bitstream buffer");
+                return false;
+            }
+#else
+            bsfc = av_bitstream_filter_init(filterName);
+            if (!bsfc)
+            {
+                CV_WARN(cv::format("Bitstream filter is not available: %s", filterName).c_str());
+                return false;
+            }
+#endif
+        }
+    }
+    if (bsfc)
+    {
+        if (packet_filtered.data)
+        {
+            av_packet_unref(&packet_filtered);
+        }
+
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
+        int err = av_bsf_send_packet(bsfc, &packet);
+        if (err < 0)
+        {
+            CV_WARN("Packet submission for filtering failed");
+            return false;
+        }
+        err = av_bsf_receive_packet(bsfc, &packet_filtered);
+        if (err < 0)
+        {
+            CV_WARN("Filtered packet retrieve failed");
+            return false;
+        }
+#else
+#if LIBAVFORMAT_BUILD > 4628
+        AVCodecContext* ctx = ic->streams[video_stream]->codec;
+#else
+        AVCodecContext* ctx = &ic->streams[video_stream]->codec;
+#endif
+        int err = av_bitstream_filter_filter(bsfc, ctx, NULL, &packet_filtered.data,
+            &packet_filtered.size, packet.data, packet.size, packet_filtered.flags & AV_PKT_FLAG_KEY);
+        if (err < 0)
+        {
+            CV_WARN("Packet filtering failed");
+            return false;
+        }
+#endif
+        return packet_filtered.data != NULL;
+    }
+    return packet.data != NULL;
+}
 
 bool CvCapture_FFMPEG::grabFrame()
 {
@@ -1048,6 +1190,12 @@ bool CvCapture_FFMPEG::grabFrame()
             continue;
         }
 
+        if (rawMode)
+        {
+            valid = processRawPacket();
+            break;
+        }
+
         // Decode video frame
         #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0)
             avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet);
@@ -1068,7 +1216,6 @@ bool CvCapture_FFMPEG::grabFrame()
             if( picture_pts == AV_NOPTS_VALUE_ )
                 picture_pts = picture->pkt_pts != AV_NOPTS_VALUE_ && picture->pkt_pts != 0 ? picture->pkt_pts : picture->pkt_dts;
 
-            frame_number++;
             valid = true;
         }
         else
@@ -1079,7 +1226,10 @@ bool CvCapture_FFMPEG::grabFrame()
         }
     }
 
-    if( valid && first_frame_number < 0 )
+    if (valid)
+        frame_number++;
+
+    if (!rawMode && valid && first_frame_number < 0)
         first_frame_number = dts_to_frame_number(picture_pts);
 
 #if USE_AV_INTERRUPT_CALLBACK
@@ -1087,14 +1237,28 @@ bool CvCapture_FFMPEG::grabFrame()
     interrupt_metadata.timeout_after_ms = 0;
 #endif
 
-    // return if we have a new picture or not
+    // return if we have a new frame or not
     return valid;
 }
 
 
 bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int* width, int* height, int* cn)
 {
-    if( !video_st || !picture->data[0] )
+    if (!video_st)
+        return false;
+
+    if (rawMode)
+    {
+        AVPacket& p = bsfc ? packet_filtered : packet;
+        *data = p.data;
+        *step = p.size;
+        *width = p.size;
+        *height = 1;
+        *cn = 1;
+        return p.data != NULL;
+    }
+
+    if (!picture->data[0])
         return false;
 
     if( img_convert_ctx == NULL ||
@@ -1163,7 +1327,6 @@ bool CvCapture_FFMPEG::retrieveFrame(int, unsigned char** data, int* step, int*
     return true;
 }
 
-
 double CvCapture_FFMPEG::getProperty( int property_id ) const
 {
     if( !video_st ) return 0;
@@ -1217,6 +1380,20 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const
         return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).num;
     case CAP_PROP_SAR_DEN:
         return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).den;
+    case CAP_PROP_CODEC_PIXEL_FORMAT:
+    {
+#if LIBAVFORMAT_BUILD > 4628
+        AVPixelFormat pix_fmt = video_st->codec->pix_fmt;
+#else
+        AVPixelFormat pix_fmt = video_st->codec.pix_fmt;
+#endif
+        unsigned int fourcc_tag = avcodec_pix_fmt_to_codec_tag(pix_fmt);
+        return (fourcc_tag == 0) ? (double)-1 : (double)fourcc_tag;
+    }
+    case CAP_PROP_FORMAT:
+        if (rawMode)
+            return -1;
+        break;
     default:
         break;
     }
@@ -1386,6 +1563,10 @@ bool CvCapture_FFMPEG::setProperty( int property_id, double value )
             picture_pts=(int64_t)value;
         }
         break;
+    case CAP_PROP_FORMAT:
+        if (value == -1)
+            return setRaw();
+        return false;
     default:
         return false;
     }
index e581cb5..bdff1b7 100644 (file)
@@ -95,6 +95,90 @@ TEST(videoio_ffmpeg, image)
 
 //==========================================================================
 
+typedef tuple<VideoCaptureAPIs, string, string, string, string, string> videoio_container_params_t;
+typedef testing::TestWithParam< videoio_container_params_t > videoio_container;
+
+TEST_P(videoio_container, read)
+{
+    const VideoCaptureAPIs api = get<0>(GetParam());
+
+    if (!videoio_registry::hasBackend(api))
+        throw SkipTestException("Backend was not found");
+
+    const string path = get<1>(GetParam());
+    const string ext = get<2>(GetParam());
+    const string ext_raw = get<3>(GetParam());
+    const string codec = get<4>(GetParam());
+    const string pixelFormat = get<5>(GetParam());
+    const string fileName = path + "." + ext;
+    const string fileNameOut = tempfile(cv::format("test_container_stream.%s", ext_raw.c_str()).c_str());
+
+    // Write encoded video read using VideoContainer to tmp file
+    {
+        VideoCapture container(findDataFile(fileName), api);
+        ASSERT_TRUE(container.isOpened());
+        if (!container.set(CAP_PROP_FORMAT, -1))  // turn off video decoder (extract stream)
+            throw SkipTestException("Fetching of RAW video streams is not supported");
+        ASSERT_EQ(-1.f, container.get(CAP_PROP_FORMAT));  // check
+        EXPECT_EQ(codec, fourccToString((int)container.get(CAP_PROP_FOURCC)));
+        EXPECT_EQ(pixelFormat, fourccToString((int)container.get(CAP_PROP_CODEC_PIXEL_FORMAT)));
+
+        std::ofstream file(fileNameOut, ios::out | ios::trunc | std::ios::binary);
+        size_t totalBytes = 0;
+        Mat raw_data;
+        while (true)
+        {
+            container >> raw_data;
+            size_t size = raw_data.total();
+            if (raw_data.empty())
+                break;
+            ASSERT_EQ(CV_8UC1, raw_data.type());
+            ASSERT_LE(raw_data.dims, 2);
+            ASSERT_EQ(raw_data.rows, 1);
+            ASSERT_EQ((size_t)raw_data.cols, raw_data.total());
+            ASSERT_TRUE(raw_data.isContinuous());
+            totalBytes += size;
+            file.write(reinterpret_cast<char*>(raw_data.data), size);
+            ASSERT_FALSE(file.fail());
+        }
+        ASSERT_GE(totalBytes, (size_t)65536) << "Encoded stream is too small";
+    }
+
+    std::cout << "Checking extracted video stream: " << fileNameOut << std::endl;
+
+    // Check decoded frames read from original media are equal to frames decoded from tmp file
+    {
+        VideoCapture capReference(findDataFile(fileName), api);
+        ASSERT_TRUE(capReference.isOpened());
+        VideoCapture capActual(fileNameOut.c_str(), api);
+        ASSERT_TRUE(capActual.isOpened());
+        Mat reference, actual;
+        int nframes = 0, n_err = 0;
+        while (capReference.read(reference) && n_err < 3)
+        {
+            nframes++;
+            ASSERT_TRUE(capActual.read(actual)) << nframes;
+            EXPECT_EQ(0, cvtest::norm(actual, reference, NORM_INF)) << nframes << " err=" << ++n_err;
+        }
+        ASSERT_GT(nframes, 0);
+    }
+
+    ASSERT_EQ(0, remove(fileNameOut.c_str()));
+}
+
+const videoio_container_params_t videoio_container_params[] =
+{
+    make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264", "h264", "h264", "I420"),
+    make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265", "h265", "hevc", "I420"),
+    //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264.mkv", "mkv.h264", "h264", "I420"),
+    //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265.mkv", "mkv.h265", "hevc", "I420"),
+    //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h264.mp4", "mp4.avc1", "avc1", "I420"),
+    //make_tuple(CAP_FFMPEG, "video/big_buck_bunny", "h265.mp4", "mp4.hev1", "hev1", "I420"),
+};
+
+INSTANTIATE_TEST_CASE_P(/**/, videoio_container, testing::ValuesIn(videoio_container_params));
+
+//==========================================================================
 
 static void generateFrame(Mat &frame, unsigned int i, const Point &center, const Scalar &color)
 {