#include <vector>
#include <string>
#include <algorithm>
+#include <deque>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#endif
#include <mferror.h>
-
#include <comdef.h>
#include <shlwapi.h> // QISearch
{
}
+ void swap(_In_ ComPtr<T>& lp)
+ {
+ ComPtr<T> tmp(p);
+ p = lp.p;
+ lp.p = tmp.p;
+ tmp = NULL;
+ }
T** operator&()
{
CV_Assert(p == NULL);
// Structure for collecting info about types of video which are supported by current video device
struct MediaType
{
+ //video param
UINT32 width;
UINT32 height;
INT32 stride; // stride is negative if image is bottom-up
UINT32 aspectRatioDenom;
UINT32 sampleSize;
UINT32 interlaceMode;
+ //audio param
+ UINT32 bit_per_sample;
+ UINT32 nChannels;
+ UINT32 nAvgBytesPerSec;
+ UINT32 nSamplesPerSec;
+
GUID majorType; // video or audio
GUID subType; // fourCC
+ _ComPtr<IMFMediaType> Type;
MediaType(IMFMediaType *pType = 0) :
+ Type(pType),
width(0), height(0),
stride(0),
isFixedSize(true),
aspectRatioNum(1), aspectRatioDenom(1),
sampleSize(0),
interlaceMode(0),
- majorType(MFMediaType_Video),
+ bit_per_sample(0),
+ nChannels(0),
+ nAvgBytesPerSec(0),
+ nSamplesPerSec(0),
+ majorType({ 0 }),//MFMediaType_Video
subType({ 0 })
{
if (pType)
{
- MFGetAttributeSize(pType, MF_MT_FRAME_SIZE, &width, &height);
- pType->GetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32*)&stride); // value is stored as UINT32 but should be casted to INT3)
- pType->GetUINT32(MF_MT_FIXED_SIZE_SAMPLES, &isFixedSize);
- MFGetAttributeRatio(pType, MF_MT_FRAME_RATE, &frameRateNum, &frameRateDenom);
- MFGetAttributeRatio(pType, MF_MT_PIXEL_ASPECT_RATIO, &aspectRatioNum, &aspectRatioDenom);
- pType->GetUINT32(MF_MT_SAMPLE_SIZE, &sampleSize);
- pType->GetUINT32(MF_MT_INTERLACE_MODE, &interlaceMode);
pType->GetGUID(MF_MT_MAJOR_TYPE, &majorType);
pType->GetGUID(MF_MT_SUBTYPE, &subType);
+ if (majorType == MFMediaType_Audio)
+ {
+ pType->GetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, &bit_per_sample);
+ pType->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &nChannels);
+ pType->GetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &nAvgBytesPerSec);
+ pType->GetUINT32(MF_MT_AUDIO_FLOAT_SAMPLES_PER_SECOND, &nSamplesPerSec);
+ }
+ else if (majorType == MFMediaType_Video)
+ {
+ MFGetAttributeSize(pType, MF_MT_FRAME_SIZE, &width, &height);
+ pType->GetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32*)&stride); // value is stored as UINT32 but should be casted to INT3)
+ pType->GetUINT32(MF_MT_FIXED_SIZE_SAMPLES, &isFixedSize);
+ MFGetAttributeRatio(pType, MF_MT_FRAME_RATE, &frameRateNum, &frameRateDenom);
+ MFGetAttributeRatio(pType, MF_MT_PIXEL_ASPECT_RATIO, &aspectRatioNum, &aspectRatioDenom);
+ pType->GetUINT32(MF_MT_SAMPLE_SIZE, &sampleSize);
+ pType->GetUINT32(MF_MT_INTERLACE_MODE, &interlaceMode);
+ pType->GetUINT32(MF_MT_INTERLACE_MODE, &interlaceMode);
+ }
}
}
- static MediaType createDefault()
+ static MediaType createDefault_Video()
{
MediaType res;
res.width = 640;
res.setFramerate(30.0);
return res;
}
- inline bool isEmpty() const
+ static MediaType createDefault_Audio()
+ {
+ MediaType res;
+ res.majorType = MFMediaType_Audio;
+ res.subType = MFAudioFormat_PCM;
+ res.bit_per_sample = 16;
+ res.nChannels = 1;
+ res.nSamplesPerSec = 44100;
+ return res;
+ }
+ inline bool isEmpty(bool flag = false) const
{
- return width == 0 && height == 0;
+ if (!flag)
+ return width == 0 && height == 0;
+ else
+ return nChannels == 0;
}
- _ComPtr<IMFMediaType> createMediaType() const
+ _ComPtr<IMFMediaType> createMediaType_Video() const
{
_ComPtr<IMFMediaType> res;
MFCreateMediaType(&res);
res->SetGUID(MF_MT_SUBTYPE, subType);
return res;
}
+ _ComPtr<IMFMediaType> createMediaType_Audio() const
+ {
+ _ComPtr<IMFMediaType> res;
+ MFCreateMediaType(&res);
+ if (majorType != GUID())
+ res->SetGUID(MF_MT_MAJOR_TYPE, majorType);
+ if (subType != GUID())
+ res->SetGUID(MF_MT_SUBTYPE, subType);
+ if (bit_per_sample != 0)
+ res->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bit_per_sample);
+ if (nChannels != 0)
+ res->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, nChannels);
+ if (nSamplesPerSec != 0)
+ res->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, nSamplesPerSec);
+ return res;
+ }
void setFramerate(double fps)
{
frameRateNum = (UINT32)cvRound(fps * 1000.0);
return wdiff + hdiff;
}
// check if 'this' is better than 'other' comparing to reference
- bool isBetterThan(const MediaType& other, const MediaType& ref) const
+ bool VideoIsBetterThan(const MediaType& other, const MediaType& ref) const
{
const unsigned long thisDiff = resolutionDiff(ref);
const unsigned long otherDiff = other.resolutionDiff(ref);
}
return false;
}
+ bool AudioIsBetterThan(const MediaType& other, const MediaType& ref) const
+ {
+ double thisDiff = absDiff(nChannels, ref.nChannels);
+ double otherDiff = absDiff(other.nChannels, ref.nChannels);
+ if (otherDiff < thisDiff)
+ {
+ thisDiff = absDiff(bit_per_sample, ref.bit_per_sample);
+ otherDiff = absDiff(bit_per_sample, ref.bit_per_sample);
+ if (otherDiff < thisDiff)
+ {
+ thisDiff = absDiff(nSamplesPerSec, ref.nSamplesPerSec);
+ otherDiff = absDiff(nSamplesPerSec, ref.nSamplesPerSec);
+ if (otherDiff < thisDiff)
+ return true;
+ }
+ }
+ return false;
+ }
};
void printFormat(std::ostream& out, const GUID& fmt)
return S_OK;
}
- HRESULT Wait(DWORD dwMilliseconds, _ComPtr<IMFSample>& videoSample, BOOL& pbEOS)
+ HRESULT Wait(DWORD dwMilliseconds, _ComPtr<IMFSample>& mediaSample, BOOL& pbEOS)
{
pbEOS = FALSE;
if (!pbEOS)
{
cv::AutoLock lock(m_mutex);
- videoSample = m_lastSample;
- CV_Assert(videoSample);
+ mediaSample = m_lastSample;
+ CV_Assert(mediaSample);
m_lastSample.Release();
ResetEvent(m_hEvent); // event is auto-reset, but we need this forced reset due time gap between wait() and mutex hold.
}
-
return m_hrStatus;
}
+
private:
// Destructor is private. Caller should call Release.
virtual ~SourceReaderCB()
}
}
}
+ void countNumberOfAudioStreams(DWORD &numberOfAudioStreams)
+ {
+ std::pair<MediaID, MediaType> best;
+ std::map<MediaID, MediaType>::const_iterator i = formats.begin();
+ for (; i != formats.end(); ++i)
+ {
+ if(i->second.majorType == MFMediaType_Audio)
+ {
+ if(best.second.isEmpty() || i->first.stream != best.first.stream)
+ {
+ numberOfAudioStreams++;
+ best = *i;
+ }
+ }
+ }
+ }
std::pair<MediaID, MediaType> findBestVideoFormat(const MediaType& newType)
{
std::pair<MediaID, MediaType> best;
std::map<MediaID, MediaType>::const_iterator i = formats.begin();
for (; i != formats.end(); ++i)
{
- if (i->second.majorType != MFMediaType_Video)
- continue;
- if (newType.isEmpty()) // file input - choose first returned media type
+ if (i->second.majorType == MFMediaType_Video)
{
- best = *i;
- break;
+ if (best.second.isEmpty() || i->second.VideoIsBetterThan(best.second, newType))
+ {
+ best = *i;
+ }
}
- if (best.second.isEmpty() || i->second.isBetterThan(best.second, newType))
+ }
+ return best;
+ }
+ std::pair<MediaID, MediaType> findBestAudioFormat(const MediaType& newType)
+ {
+ std::pair<MediaID, MediaType> best;
+ std::map<MediaID, MediaType>::const_iterator i = formats.begin();
+ best = *i;
+ for (; i != formats.end(); ++i)
+ {
+ if (i->second.majorType == MFMediaType_Audio)
+ {
+ if ( i->second.AudioIsBetterThan(best.second, newType))
+ {
+ best = *i;
+ }
+ }
+ }
+ return best;
+ }
+ std::pair<MediaID, MediaType> findAudioFormatByStream(const DWORD StreamIndex)
+ {
+ std::pair<MediaID, MediaType> best;
+ std::map<MediaID, MediaType>::const_iterator i = formats.begin();
+ for (; i != formats.end(); ++i)
+ {
+ if (i->second.majorType == MFMediaType_Audio)
{
- best = *i;
+ if ((*i).first.stream == StreamIndex)
+ {
+ best = *i;
+ }
}
}
return best;
virtual void close();
virtual double getProperty(int) const CV_OVERRIDE;
virtual bool setProperty(int, double) CV_OVERRIDE;
+ bool grabAudioFrame();
+ bool grabVideoFrame();
virtual bool grabFrame() CV_OVERRIDE;
+ bool retrieveAudioFrame(int, OutputArray);
+ bool retrieveVideoFrame(OutputArray);
virtual bool retrieveFrame(int, cv::OutputArray) CV_OVERRIDE;
virtual bool isOpened() const CV_OVERRIDE { return isOpen; }
virtual int getCaptureDomain() CV_OVERRIDE { return CV_CAP_MSMF; }
protected:
- bool configureOutput(MediaType newType, cv::uint32_t outFormat);
+ bool configureOutput();
+ bool configureAudioOutput(MediaType newType);
+ bool configureVideoOutput(MediaType newType, cv::uint32_t outFormat);
bool setTime(double time, bool rough);
+ bool setTime(int numberFrame);
bool configureHW(bool enable);
+ bool configureStreams(const cv::VideoCaptureParameters&);
+ bool setAudioProperties(const cv::VideoCaptureParameters&);
template <typename CtrlT>
bool readComplexPropery(long prop, long& val) const;
template <typename CtrlT>
bool writeComplexProperty(long prop, double val, long flags);
_ComPtr<IMFAttributes> getDefaultSourceConfig(UINT32 num = 10);
- bool initStream(DWORD streamID, const MediaType& mt);
+ bool initStream(DWORD streamID, const MediaType mt);
bool openFinalize_(const VideoCaptureParameters* params);
_ComPtr<IMFDXGIDeviceManager> D3DMgr;
#endif
_ComPtr<IMFSourceReader> videoFileSource;
- _ComPtr<IMFSample> videoSample;
_ComPtr<IMFSourceReaderCallback> readCallback; // non-NULL for "live" streams (camera capture)
- DWORD dwStreamIndex;
+ std::vector<DWORD> dwStreamIndices;
+ std::vector<_ComPtr<IMFSample>> audioSamples;
+ _ComPtr<IMFSample> impendingVideoSample;
+ _ComPtr<IMFSample> usedVideoSample;
+ DWORD dwVideoStreamIndex;
+ DWORD dwAudioStreamIndex;
MediaType nativeFormat;
- MediaType captureFormat;
- int outputFormat;
+ MediaType captureVideoFormat;
+ MediaType captureAudioFormat;
+ bool device_status; //on or off
+ int videoStream; // look at CAP_PROP_VIDEO_STREAM
+ int audioStream; // look at CAP_PROP_AUDIO_STREAM
+ bool vEOS;
+ bool aEOS;
+ unsigned int audioBaseIndex;
+ int outputVideoFormat;
+ int outputAudioFormat;
bool convertFormat;
MFTIME duration;
LONGLONG frameStep;
- LONGLONG sampleTime;
+ LONGLONG nFrame;
+ LONGLONG impendingVideoSampleTime;
+ LONGLONG usedVideoSampleTime;
+ LONGLONG videoStartOffset;
+ LONGLONG videoSampleDuration;
+ LONGLONG requiredAudioTime;
+ LONGLONG audioSampleTime;
+ LONGLONG audioStartOffset;
+ LONGLONG audioSampleDuration;
+ LONGLONG audioTime;
+ LONGLONG chunkLengthOfBytes;
+ LONGLONG givenAudioTime;
+ LONGLONG numberOfAdditionalAudioBytes; // the number of additional bytes required to align the audio chunk
+ double bufferedAudioDuration;
+ LONGLONG audioSamplePos;
+ DWORD numberOfAudioStreams;
+ Mat audioFrame;
+ std::deque<BYTE> bufferAudioData;
bool isOpen;
+ bool grabIsDone;
+ bool syncLastFrame;
+ bool lastFrame;
};
CvCapture_MSMF::CvCapture_MSMF():
D3DMgr(NULL),
#endif
videoFileSource(NULL),
- videoSample(NULL),
readCallback(NULL),
- dwStreamIndex(0),
- outputFormat(CV_CAP_MODE_BGR),
+ impendingVideoSample(NULL),
+ usedVideoSample(NULL),
+ dwVideoStreamIndex(0),
+ dwAudioStreamIndex(0),
+ device_status(false),
+ videoStream(0),
+ audioStream(-1),
+ vEOS(false),
+ aEOS(false),
+ audioBaseIndex(1),
+ outputVideoFormat(CV_CAP_MODE_BGR),
+ outputAudioFormat(CV_16S),
convertFormat(true),
duration(0),
frameStep(0),
- sampleTime(0),
- isOpen(false)
+ nFrame(0),
+ impendingVideoSampleTime(0),
+ usedVideoSampleTime(0),
+ videoStartOffset(-1),
+ videoSampleDuration(0),
+ requiredAudioTime(0),
+ audioSampleTime(0),
+ audioStartOffset(-1),
+ audioSampleDuration(0),
+ audioTime(0),
+ chunkLengthOfBytes(0),
+ givenAudioTime(0),
+ numberOfAdditionalAudioBytes(0),
+ bufferedAudioDuration(0),
+ audioSamplePos(0),
+ numberOfAudioStreams(0),
+ isOpen(false),
+ grabIsDone(false),
+ syncLastFrame(true),
+ lastFrame(false)
{
}
if (isOpen)
{
isOpen = false;
- videoSample.Release();
+ usedVideoSample.Release();
+ for (auto item : audioSamples)
+ item.Release();
videoFileSource.Release();
+ device_status = false;
camid = -1;
filename.clear();
}
readCallback.Release();
}
-bool CvCapture_MSMF::initStream(DWORD streamID, const MediaType& mt)
+bool CvCapture_MSMF::initStream(DWORD streamID, const MediaType mt)
{
CV_LOG_DEBUG(NULL, "Init stream " << streamID << " with MediaType " << mt);
- _ComPtr<IMFMediaType> mediaTypeOut = mt.createMediaType();
- if (FAILED(videoFileSource->SetStreamSelection((DWORD)MF_SOURCE_READER_ALL_STREAMS, false)))
+ _ComPtr<IMFMediaType> mediaTypesOut;
+ if (mt.majorType == MFMediaType_Audio)
{
- CV_LOG_WARNING(NULL, "Failed to reset streams");
- return false;
+ captureAudioFormat = mt;
+ mediaTypesOut = mt.createMediaType_Audio();
+ }
+ if (mt.majorType == MFMediaType_Video)
+ {
+ captureVideoFormat = mt;
+ mediaTypesOut = mt.createMediaType_Video();
}
if (FAILED(videoFileSource->SetStreamSelection(streamID, true)))
{
CV_LOG_WARNING(NULL, "Failed to select stream " << streamID);
return false;
}
- HRESULT hr = videoFileSource->SetCurrentMediaType(streamID, NULL, mediaTypeOut.Get());
+ HRESULT hr = videoFileSource->SetCurrentMediaType(streamID, NULL, mediaTypesOut.Get());
if (hr == MF_E_TOPO_CODEC_NOT_FOUND)
{
CV_LOG_WARNING(NULL, "Failed to set mediaType (stream " << streamID << ", " << mt << "(codec not found)");
CV_LOG_WARNING(NULL, "Failed to set mediaType (stream " << streamID << ", " << mt << "(HRESULT " << hr << ")");
return false;
}
- captureFormat = mt;
+
return true;
}
return configureHW(va_type == VIDEO_ACCELERATION_D3D11 || va_type == VIDEO_ACCELERATION_ANY);
}
-bool CvCapture_MSMF::configureOutput(MediaType newType, cv::uint32_t outFormat)
+bool CvCapture_MSMF::configureAudioOutput(MediaType newType)
+{
+ FormatStorage formats;
+ formats.read(videoFileSource.Get());
+ std::pair<FormatStorage::MediaID, MediaType> bestMatch;
+ formats.countNumberOfAudioStreams(numberOfAudioStreams);
+ if (device_status)
+ bestMatch = formats.findBestAudioFormat(newType);
+ else
+ bestMatch = formats.findAudioFormatByStream(audioStream);
+ if (bestMatch.second.isEmpty(true))
+ {
+ CV_LOG_DEBUG(NULL, "Can not find audio stream with requested parameters");
+ return false;
+ }
+ dwAudioStreamIndex = bestMatch.first.stream;
+ dwStreamIndices.push_back(dwAudioStreamIndex);
+ MediaType newFormat = bestMatch.second;
+
+ newFormat.majorType = MFMediaType_Audio;
+ newFormat.nSamplesPerSec = 44100;
+ switch (outputAudioFormat)
+ {
+ case CV_8S:
+ newFormat.subType = MFAudioFormat_PCM;
+ newFormat.bit_per_sample = 8;
+ break;
+ case CV_16S:
+ newFormat.subType = MFAudioFormat_PCM;
+ newFormat.bit_per_sample = 16;
+ break;
+ case CV_32S:
+ newFormat.subType = MFAudioFormat_PCM;
+ newFormat.bit_per_sample = 32;
+ case CV_32F:
+ newFormat.subType = MFAudioFormat_Float;
+ newFormat.bit_per_sample = 32;
+ break;
+ default:
+ break;
+ }
+
+ return initStream(dwAudioStreamIndex, newFormat);
+}
+
+bool CvCapture_MSMF::configureVideoOutput(MediaType newType, cv::uint32_t outFormat)
{
FormatStorage formats;
formats.read(videoFileSource.Get());
CV_LOG_DEBUG(NULL, "Can not find video stream with requested parameters");
return false;
}
- dwStreamIndex = bestMatch.first.stream;
+ dwVideoStreamIndex = bestMatch.first.stream;
+ dwStreamIndices.push_back(dwVideoStreamIndex);
nativeFormat = bestMatch.second;
MediaType newFormat = nativeFormat;
+
if (convertFormat)
{
switch (outFormat)
}
// we select native format first and then our requested format (related issue #12822)
if (!newType.isEmpty()) // camera input
- initStream(dwStreamIndex, nativeFormat);
- return initStream(dwStreamIndex, newFormat);
+ {
+ initStream(dwVideoStreamIndex, nativeFormat);
+ }
+ return initStream(dwVideoStreamIndex, newFormat);
+}
+
+bool CvCapture_MSMF::configureOutput()
+{
+ if (FAILED(videoFileSource->SetStreamSelection((DWORD)MF_SOURCE_READER_ALL_STREAMS, false)))
+ {
+ CV_LOG_WARNING(NULL, "Failed to reset streams");
+ return false;
+ }
+ bool tmp = true;
+ if (videoStream != -1)
+ tmp = (!device_status)? configureVideoOutput(MediaType(), outputVideoFormat) : configureVideoOutput(MediaType::createDefault_Video(), outputVideoFormat);
+ if (audioStream != -1)
+ tmp &= (!device_status)? configureAudioOutput(MediaType()) : configureAudioOutput(MediaType::createDefault_Audio());
+ return tmp;
}
bool CvCapture_MSMF::open(int index, const cv::VideoCaptureParameters* params)
if (params)
{
configureHW(*params);
+ configureStreams(*params);
+ }
+ if (videoStream != -1 && audioStream != -1 || videoStream == -1 && audioStream == -1)
+ {
+ CV_LOG_DEBUG(NULL, "Only one of the properties CAP_PROP_AUDIO_STREAM " << audioStream << " and " << CAP_PROP_VIDEO_STREAM << " must be different from -1");
+ return false;
}
-
DeviceList devices;
- UINT32 count = devices.read();
+ UINT32 count = 0;
+ if (audioStream != -1)
+ count = devices.read(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_GUID);
+ if (videoStream != -1)
+ count = devices.read(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID);
if (count == 0 || static_cast<UINT32>(index) > count)
{
CV_LOG_DEBUG(NULL, "Device " << index << " not found (total " << count << " devices)");
}
isOpen = true;
+ device_status = true;
camid = index;
readCallback = cb;
duration = 0;
- if (configureOutput(MediaType::createDefault(), outputFormat))
+ if (configureOutput())
{
- frameStep = captureFormat.getFrameStep();
+ frameStep = captureVideoFormat.getFrameStep();
}
-
if (isOpen && !openFinalize_(params))
{
close();
if (params)
{
configureHW(*params);
+ configureStreams(*params);
+ setAudioProperties(*params);
}
-
// Set source reader parameters
_ComPtr<IMFAttributes> attr = getDefaultSourceConfig();
cv::AutoBuffer<wchar_t> unicodeFileName(_filename.length() + 1);
if (SUCCEEDED(MFCreateSourceReaderFromURL(unicodeFileName.data(), attr.Get(), &videoFileSource)))
{
isOpen = true;
- sampleTime = 0;
- if (configureOutput(MediaType(), outputFormat))
+ usedVideoSampleTime = 0;
+ if (configureOutput())
{
- frameStep = captureFormat.getFrameStep();
filename = _filename;
+ frameStep = captureVideoFormat.getFrameStep();
PROPVARIANT var;
HRESULT hr;
if (SUCCEEDED(hr = videoFileSource->GetPresentationAttribute((DWORD)MF_SOURCE_READER_MEDIASOURCE, MF_PD_DURATION, &var)) &&
duration = 0;
}
}
-
if (isOpen && !openFinalize_(params))
{
close();
return false;
}
-
+ if (isOpen)
+ if (audioStream != -1 && videoStream != -1)
+ {
+ isOpen = grabFrame();
+ if (isOpen)
+ grabIsDone = true;
+ }
return isOpen;
}
return true;
}
-bool CvCapture_MSMF::grabFrame()
+bool CvCapture_MSMF::configureStreams(const cv::VideoCaptureParameters& params)
{
- CV_TRACE_FUNCTION();
- if (readCallback) // async "live" capture mode
+ if (params.has(CAP_PROP_VIDEO_STREAM))
{
- HRESULT hr = 0;
- SourceReaderCB* reader = ((SourceReaderCB*)readCallback.Get());
- if (!reader->m_reader)
+ double value = params.get<double>(CAP_PROP_VIDEO_STREAM);
+ if (value == -1 || value == 0)
+ videoStream = static_cast<int>(value);
+ else
{
- // Initiate capturing with async callback
- reader->m_reader = videoFileSource.Get();
- reader->m_dwStreamIndex = dwStreamIndex;
- if (FAILED(hr = videoFileSource->ReadSample(dwStreamIndex, 0, NULL, NULL, NULL, NULL)))
- {
- CV_LOG_ERROR(NULL, "videoio(MSMF): can't grab frame - initial async ReadSample() call failed: " << hr);
- reader->m_reader = NULL;
- return false;
- }
+ CV_LOG_ERROR(NULL, "VIDEOIO/MSMF: CAP_PROP_VIDEO_STREAM parameter value is invalid/unsupported: " << value);
+ return false;
}
- BOOL bEOS = false;
- if (FAILED(hr = reader->Wait(10000, videoSample, bEOS))) // 10 sec
+ }
+ if (params.has(CAP_PROP_AUDIO_STREAM))
+ {
+ double value = params.get<double>(CAP_PROP_AUDIO_STREAM);
+ if (value == -1 || value > -1)
+ audioStream = static_cast<int>(value);
+ else
{
- CV_LOG_WARNING(NULL, "videoio(MSMF): can't grab frame. Error: " << hr);
+ CV_LOG_ERROR(NULL, "VIDEOIO/MSMF: CAP_PROP_AUDIO_STREAM parameter value is invalid/unsupported: " << value);
return false;
}
- if (bEOS)
+ }
+ return true;
+}
+bool CvCapture_MSMF::setAudioProperties(const cv::VideoCaptureParameters& params)
+{
+ if (params.has(CAP_PROP_AUDIO_DATA_DEPTH))
+ {
+ int value = static_cast<int>(params.get<double>(CAP_PROP_AUDIO_DATA_DEPTH));
+ if (value != CV_8S && value != CV_16S && value != CV_32S && value != CV_32F)
{
- CV_LOG_WARNING(NULL, "videoio(MSMF): EOS signal. Capture stream is lost");
+ CV_LOG_ERROR(NULL, "VIDEOIO/MSMF: CAP_PROP_AUDIO_DATA_DEPTH parameter value is invalid/unsupported: " << value);
return false;
}
- sampleTime = reader->m_lastSampleTimestamp;
- return true;
+ else
+ {
+ outputAudioFormat = value;
+ }
}
- else if (isOpen)
+ if (params.has(CAP_PROP_AUDIO_SYNCHRONIZE))
+ {
+ int value = static_cast<int>(params.get<double>(CAP_PROP_AUDIO_SYNCHRONIZE));
+ syncLastFrame = (value != 0) ? true : false;
+ }
+ return true;
+}
+
+bool CvCapture_MSMF::grabVideoFrame()
+{
+ DWORD streamIndex, flags;
+ HRESULT hr;
+ usedVideoSample.Release();
+
+ bool returnFlag = false;
+ bool stopFlag = false;
+ if (audioStream != -1)
{
- DWORD streamIndex, flags;
- videoSample.Release();
- HRESULT hr;
- for(;;)
+ usedVideoSample.swap(impendingVideoSample);
+ std::swap(usedVideoSampleTime, impendingVideoSampleTime);
+ }
+ while (!stopFlag)
+ {
+ for (;;)
{
CV_TRACE_REGION("ReadSample");
if (!SUCCEEDED(hr = videoFileSource->ReadSample(
- dwStreamIndex, // Stream index.
+ dwVideoStreamIndex, // Stream index.
0, // Flags.
&streamIndex, // Receives the actual stream index.
&flags, // Receives status flags.
- &sampleTime, // Receives the time stamp.
- &videoSample // Receives the sample or NULL.
+ &impendingVideoSampleTime, // Receives the time stamp.
+ &impendingVideoSample // Receives the sample or NULL.
)))
break;
- if (streamIndex != dwStreamIndex)
+ if (streamIndex != dwVideoStreamIndex)
break;
if (flags & (MF_SOURCE_READERF_ERROR | MF_SOURCE_READERF_ALLEFFECTSREMOVED | MF_SOURCE_READERF_ENDOFSTREAM))
break;
- if (videoSample)
+ if (impendingVideoSample)
break;
if (flags & MF_SOURCE_READERF_STREAMTICK)
{
CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream tick detected. Retrying to grab the frame");
}
}
+ if (SUCCEEDED(hr))
+ {
+ if (streamIndex != dwVideoStreamIndex)
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): Wrong stream read. Abort capturing");
+ close();
+ }
+ else if (flags & MF_SOURCE_READERF_ERROR)
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream reading error. Abort capturing");
+ close();
+ }
+ else if (flags & MF_SOURCE_READERF_ALLEFFECTSREMOVED)
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream decoding error. Abort capturing");
+ close();
+ }
+ else if (flags & MF_SOURCE_READERF_ENDOFSTREAM)
+ {
+ vEOS = true;
+ lastFrame = true;
+ stopFlag = true;
+ if (audioStream == -1)
+ returnFlag = false;
+ else if (usedVideoSample)
+ returnFlag = true;
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): End of video stream detected");
+ }
+ else
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): got video frame with timestamp=" << impendingVideoSampleTime);
+ if (audioStream != -1)
+ {
+ if (!usedVideoSample)
+ {
+ usedVideoSample.swap(impendingVideoSample);
+ std::swap(usedVideoSampleTime, impendingVideoSampleTime);
+ videoStartOffset = usedVideoSampleTime;
+ }
+ else
+ {
+ stopFlag = true;
+ }
+ if (impendingVideoSample)
+ {
+ nFrame++;
+ videoSampleDuration = impendingVideoSampleTime - usedVideoSampleTime;
+ requiredAudioTime = impendingVideoSampleTime - givenAudioTime;
+ givenAudioTime += requiredAudioTime;
+ }
+ }
+ else
+ {
+ usedVideoSample.swap(impendingVideoSample);
+ std::swap(usedVideoSampleTime, impendingVideoSampleTime);
+ stopFlag = true;
+ nFrame++;
+ }
+ if (flags & MF_SOURCE_READERF_NEWSTREAM)
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): New stream detected");
+ }
+ if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED)
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream native media type changed");
+ }
+ if (flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream current media type changed");
+ }
+ returnFlag = true;
+ }
+ }
+ }
+ return returnFlag;
+}
+bool CvCapture_MSMF::grabAudioFrame()
+{
+ DWORD streamIndex, flags;
+ HRESULT hr;
+ _ComPtr<IMFSample> audioSample = NULL;
+ audioSamples.clear();
+
+ bool returnFlag = false;
+ audioTime = 0;
+ int numberOfSamples = -1;
+ if (bufferedAudioDuration*1e7 > requiredAudioTime)
+ return true;
+ while ((!vEOS) ? audioTime <= requiredAudioTime : !aEOS)
+ {
+ if (audioStartOffset - usedVideoSampleTime > videoSampleDuration)
+ return true;
+ for (;;)
+ {
+ CV_TRACE_REGION("ReadSample");
+ if (!SUCCEEDED(hr = videoFileSource->ReadSample(
+ dwAudioStreamIndex, // Stream index.
+ 0, // Flags.
+ &streamIndex, // Receives the actual stream index.
+ &flags, // Receives status flags.
+ &audioSampleTime, // Receives the time stamp.
+ &audioSample // Receives the sample or NULL.
+ )))
+ break;
+ if (streamIndex != dwAudioStreamIndex)
+ break;
+ if (flags & (MF_SOURCE_READERF_ERROR | MF_SOURCE_READERF_ALLEFFECTSREMOVED | MF_SOURCE_READERF_ENDOFSTREAM))
+ break;
+ if (audioSample)
+ break;
+ if (flags & MF_SOURCE_READERF_STREAMTICK)
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream tick detected. Retrying to grab the frame");
+ }
+ }
if (SUCCEEDED(hr))
{
- if (streamIndex != dwStreamIndex)
+ if (streamIndex != dwAudioStreamIndex)
{
CV_LOG_DEBUG(NULL, "videoio(MSMF): Wrong stream read. Abort capturing");
close();
}
else if (flags & MF_SOURCE_READERF_ENDOFSTREAM)
{
- sampleTime += frameStep;
- CV_LOG_DEBUG(NULL, "videoio(MSMF): End of stream detected");
+ aEOS = true;
+ if (videoStream != -1 && !vEOS)
+ returnFlag = true;
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): End of audio stream detected");
+ break;
}
else
{
- sampleTime += frameStep;
+ audioSamples.push_back(audioSample);
+ audioSample = NULL;
+ numberOfSamples++;
+ audioSamples[numberOfSamples]->GetSampleDuration(&audioSampleDuration);
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): got audio frame with timestamp=" << audioSampleTime << " duration=" << audioSampleDuration);
+ audioTime += (LONGLONG)(audioSampleDuration + bufferedAudioDuration*1e7);
+ if (nFrame == 1 && audioStartOffset == -1)
+ {
+ audioStartOffset = audioSampleTime - audioSampleDuration;
+ requiredAudioTime -= audioStartOffset;
+ }
if (flags & MF_SOURCE_READERF_NEWSTREAM)
{
CV_LOG_DEBUG(NULL, "videoio(MSMF): New stream detected");
{
CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream current media type changed");
}
- return true;
+ returnFlag = true;
+ }
+ }
+ else
+ {
+ CV_LOG_DEBUG(NULL, "videoio(MSMF): ReadSample() method is not succeeded");
+ return false;
+ }
+ }
+
+ if (!audioSamples.empty() || !bufferAudioData.empty() && aEOS)
+ {
+ _ComPtr<IMFMediaBuffer> buf = NULL;
+ std::vector<BYTE> audioDataInUse;
+ BYTE* ptr = NULL;
+ DWORD maxsize = 0, cursize = 0;
+ CV_TRACE_REGION("get_contiguous_buffer");
+ for (auto item : audioSamples)
+ {
+ if (!SUCCEEDED(item->ConvertToContiguousBuffer(&buf)))
+ {
+ CV_TRACE_REGION("get_buffer");
+ DWORD bcnt = 0;
+ if (!SUCCEEDED(item->GetBufferCount(&bcnt)))
+ break;
+ if (bcnt == 0)
+ break;
+ if (!SUCCEEDED(item->GetBufferByIndex(0, &buf)))
+ break;
+ }
+ if (!SUCCEEDED(buf->Lock(&ptr, &maxsize, &cursize)))
+ break;
+ size_t lastSize = bufferAudioData.size();
+ bufferAudioData.resize(lastSize+cursize);
+ for (unsigned int i = 0; i < cursize; i++)
+ {
+ bufferAudioData[lastSize+i]=*(ptr+i);
+ }
+ CV_TRACE_REGION_NEXT("unlock");
+ buf->Unlock();
+ buf = NULL;
+ }
+ audioSamples.clear();
+
+ audioSamplePos += chunkLengthOfBytes/((captureAudioFormat.bit_per_sample/8)*captureAudioFormat.nChannels);
+ chunkLengthOfBytes = (videoStream != -1) ? (LONGLONG)((requiredAudioTime*captureAudioFormat.nSamplesPerSec*captureAudioFormat.nChannels*(captureAudioFormat.bit_per_sample)/8)/1e7) : cursize;
+ if ((videoStream != -1) && (chunkLengthOfBytes % ((int)(captureAudioFormat.bit_per_sample)/8* (int)captureAudioFormat.nChannels) != 0))
+ {
+ if ( (double)audioSamplePos/captureAudioFormat.nSamplesPerSec + audioStartOffset * 1e-7 - usedVideoSampleTime * 1e-7 >= 0 )
+ chunkLengthOfBytes -= numberOfAdditionalAudioBytes;
+ numberOfAdditionalAudioBytes = ((int)(captureAudioFormat.bit_per_sample)/8* (int)captureAudioFormat.nChannels)
+ - chunkLengthOfBytes % ((int)(captureAudioFormat.bit_per_sample)/8* (int)captureAudioFormat.nChannels);
+ chunkLengthOfBytes += numberOfAdditionalAudioBytes;
+ }
+ if (lastFrame && !syncLastFrame|| aEOS && !vEOS)
+ {
+ chunkLengthOfBytes = bufferAudioData.size();
+ }
+ CV_Check((double)chunkLengthOfBytes, chunkLengthOfBytes >= INT_MIN || chunkLengthOfBytes <= INT_MAX, "MSMF: The chunkLengthOfBytes is out of the allowed range");
+ copy(bufferAudioData.begin(), bufferAudioData.begin() + (int)chunkLengthOfBytes, std::back_inserter(audioDataInUse));
+ bufferAudioData.erase(bufferAudioData.begin(), bufferAudioData.begin() + (int)chunkLengthOfBytes);
+ if (audioFrame.empty())
+ {
+ switch (outputAudioFormat)
+ {
+ case CV_8S:
+ cv::Mat((int)chunkLengthOfBytes/(captureAudioFormat.nChannels), captureAudioFormat.nChannels, CV_8S, audioDataInUse.data()).copyTo(audioFrame);
+ break;
+ case CV_16S:
+ cv::Mat((int)chunkLengthOfBytes/(2*captureAudioFormat.nChannels), captureAudioFormat.nChannels, CV_16S, audioDataInUse.data()).copyTo(audioFrame);
+ break;
+ case CV_32S:
+ cv::Mat((int)chunkLengthOfBytes/(4*captureAudioFormat.nChannels), captureAudioFormat.nChannels, CV_32S, audioDataInUse.data()).copyTo(audioFrame);
+ break;
+ case CV_32F:
+ cv::Mat((int)chunkLengthOfBytes/(4*captureAudioFormat.nChannels), captureAudioFormat.nChannels, CV_32F, audioDataInUse.data()).copyTo(audioFrame);
+ break;
+ default:
+ break;
+ }
+ }
+ audioDataInUse.clear();
+ audioDataInUse.shrink_to_fit();
+ }
+
+ return returnFlag;
+}
+
+bool CvCapture_MSMF::grabFrame()
+{
+ CV_TRACE_FUNCTION();
+
+ if (grabIsDone)
+ {
+ grabIsDone = false;
+ return true;
+ }
+
+ audioFrame = Mat();
+ if (readCallback) // async "live" capture mode
+ {
+ audioSamples.push_back(NULL);
+ HRESULT hr = 0;
+ SourceReaderCB* reader = ((SourceReaderCB*)readCallback.Get());
+ DWORD dwStreamIndex = 0;
+ if (videoStream != -1)
+ dwStreamIndex = dwVideoStreamIndex;
+ if (audioStream != -1)
+ dwStreamIndex = dwAudioStreamIndex;
+ if (!reader->m_reader)
+ {
+ // Initiate capturing with async callback
+ reader->m_reader = videoFileSource.Get();
+ reader->m_dwStreamIndex = dwStreamIndex;
+ if (FAILED(hr = videoFileSource->ReadSample(dwStreamIndex, 0, NULL, NULL, NULL, NULL)))
+ {
+ CV_LOG_ERROR(NULL, "videoio(MSMF): can't grab frame - initial async ReadSample() call failed: " << hr);
+ reader->m_reader = NULL;
+ return false;
}
}
+ BOOL bEOS = false;
+ if (FAILED(hr = reader->Wait( videoStream == -1 ? INFINITE : 10000, (videoStream != -1) ? usedVideoSample : audioSamples[0], bEOS))) // 10 sec
+ {
+ CV_LOG_WARNING(NULL, "videoio(MSMF): can't grab frame. Error: " << hr);
+ return false;
+ }
+ if (bEOS)
+ {
+ CV_LOG_WARNING(NULL, "videoio(MSMF): EOS signal. Capture stream is lost");
+ return false;
+ }
+ if (videoStream != -1)
+ usedVideoSampleTime = reader->m_lastSampleTimestamp;
+ return true;
+ }
+ else if (isOpen)
+ {
+ if (vEOS)
+ return false;
+
+ bool returnFlag = true;
+
+ if (videoStream != -1)
+ {
+ if (!vEOS)
+ returnFlag &= grabVideoFrame();
+ if (!returnFlag)
+ return false;
+ }
+
+ if (audioStream != -1)
+ {
+ bufferedAudioDuration = (double)(bufferAudioData.size()/((captureAudioFormat.bit_per_sample/8)*captureAudioFormat.nChannels))/captureAudioFormat.nSamplesPerSec;
+ audioFrame.release();
+ if (!aEOS)
+ returnFlag &= grabAudioFrame();
+ }
+
+ return returnFlag;
}
return false;
}
-bool CvCapture_MSMF::retrieveFrame(int, cv::OutputArray frame)
+bool CvCapture_MSMF::retrieveVideoFrame(cv::OutputArray frame)
{
CV_TRACE_FUNCTION();
do
{
- if (!videoSample)
+ if (!usedVideoSample)
break;
_ComPtr<IMFMediaBuffer> buf = NULL;
-
CV_TRACE_REGION("get_contiguous_buffer");
- if (!SUCCEEDED(videoSample->ConvertToContiguousBuffer(&buf)))
+ if (!SUCCEEDED(usedVideoSample->ConvertToContiguousBuffer(&buf)))
{
CV_TRACE_REGION("get_buffer");
DWORD bcnt = 0;
- if (!SUCCEEDED(videoSample->GetBufferCount(&bcnt)))
+ if (!SUCCEEDED(usedVideoSample->GetBufferCount(&bcnt)))
break;
if (bcnt == 0)
break;
- if (!SUCCEEDED(videoSample->GetBufferByIndex(0, &buf)))
+ if (!SUCCEEDED(usedVideoSample->GetBufferByIndex(0, &buf)))
break;
}
break;
if (convertFormat)
{
- if (lock2d || (unsigned int)cursize == captureFormat.sampleSize)
+ if (lock2d || (unsigned int)cursize == captureVideoFormat.sampleSize)
{
- switch (outputFormat)
+ switch (outputVideoFormat)
{
case CV_CAP_MODE_YUYV:
- cv::Mat(captureFormat.height, captureFormat.width, CV_8UC2, ptr, pitch).copyTo(frame);
+ cv::Mat(captureVideoFormat.height, captureVideoFormat.width, CV_8UC2, ptr, pitch).copyTo(frame);
break;
case CV_CAP_MODE_BGR:
if (captureMode == MODE_HW)
- cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC4, ptr, pitch), frame, cv::COLOR_BGRA2BGR);
+ cv::cvtColor(cv::Mat(captureVideoFormat.height, captureVideoFormat.width, CV_8UC4, ptr, pitch), frame, cv::COLOR_BGRA2BGR);
else
- cv::Mat(captureFormat.height, captureFormat.width, CV_8UC3, ptr, pitch).copyTo(frame);
+ cv::Mat(captureVideoFormat.height, captureVideoFormat.width, CV_8UC3, ptr, pitch).copyTo(frame);
break;
case CV_CAP_MODE_RGB:
if (captureMode == MODE_HW)
- cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC4, ptr, pitch), frame, cv::COLOR_BGRA2BGR);
+ cv::cvtColor(cv::Mat(captureVideoFormat.height, captureVideoFormat.width, CV_8UC4, ptr, pitch), frame, cv::COLOR_BGRA2BGR);
else
- cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC3, ptr, pitch), frame, cv::COLOR_BGR2RGB);
+ cv::cvtColor(cv::Mat(captureVideoFormat.height, captureVideoFormat.width, CV_8UC3, ptr, pitch), frame, cv::COLOR_BGR2RGB);
break;
case CV_CAP_MODE_GRAY:
- cv::Mat(captureFormat.height, captureFormat.width, CV_8UC1, ptr, pitch).copyTo(frame);
+ cv::Mat(captureVideoFormat.height, captureVideoFormat.width, CV_8UC1, ptr, pitch).copyTo(frame);
break;
default:
frame.release();
return false;
}
+bool CvCapture_MSMF::retrieveAudioFrame(int index, cv::OutputArray frame)
+{
+ CV_TRACE_FUNCTION();
+ if (audioStartOffset - usedVideoSampleTime > videoSampleDuration)
+ {
+ frame.release();
+ return true;
+ }
+ do
+ {
+ if (audioFrame.empty())
+ {
+ frame.release();
+ if (aEOS)
+ return true;
+ }
+ cv::Mat data;
+ switch (outputAudioFormat)
+ {
+ case CV_8S:
+ data = cv::Mat(1, audioFrame.rows, CV_8S);
+ for (int i = 0; i < audioFrame.rows; i++)
+ data.at<char>(0,i) = audioFrame.at<char>(i,index-audioBaseIndex);
+ break;
+ case CV_16S:
+ data = cv::Mat(1, audioFrame.rows, CV_16S);
+ for (int i = 0; i < audioFrame.rows; i++)
+ data.at<short>(0,i) = audioFrame.at<short>(i,index-audioBaseIndex);
+ break;
+ case CV_32S:
+ data = cv::Mat(1, audioFrame.rows, CV_32S);
+ for (int i = 0; i < audioFrame.rows; i++)
+ data.at<int>(0,i) = audioFrame.at<int>(i,index-audioBaseIndex);
+ break;
+ case CV_32F:
+ data = cv::Mat(1, audioFrame.rows, CV_32F);
+ for (int i = 0; i < audioFrame.rows; i++)
+ data.at<float>(0,i) = audioFrame.at<float>(i,index-audioBaseIndex);
+ break;
+ default:
+ frame.release();
+ break;
+ }
+ if (!data.empty())
+ data.copyTo(frame);
+
+ return !frame.empty();
+ } while (0);
+
+ return false;
+}
+
+bool CvCapture_MSMF::retrieveFrame(int index, cv::OutputArray frame)
+{
+ CV_TRACE_FUNCTION();
+ if (index < 0)
+ return false;
+ if ((unsigned int)index < audioBaseIndex)
+ {
+ if (videoStream == -1)
+ {
+ frame.release();
+ return false;
+ }
+ else
+ return retrieveVideoFrame(frame);
+ }
+ else
+ {
+ if (audioStream == -1)
+ {
+ frame.release();
+ return false;
+ }
+ else
+ return retrieveAudioFrame(index, frame);
+ }
+}
+
bool CvCapture_MSMF::setTime(double time, bool rough)
{
+ if (videoStream == -1)
+ return false;
+ if (videoStream != -1 && audioStream != -1)
+ if (time != 0)
+ return false;
PROPVARIANT var;
if (SUCCEEDED(videoFileSource->GetPresentationAttribute((DWORD)MF_SOURCE_READER_MEDIASOURCE, MF_SOURCE_READER_MEDIASOURCE_CHARACTERISTICS, &var)) &&
var.vt == VT_UI4 && var.ulVal & MFMEDIASOURCE_CAN_SEEK)
{
- videoSample.Release();
+ usedVideoSample.Release();
bool useGrabbing = time > 0 && !rough && !(var.ulVal & MFMEDIASOURCE_HAS_SLOW_SEEK);
PropVariantClear(&var);
- sampleTime = (useGrabbing && time >= frameStep) ? (LONGLONG)floor(time + 0.5) - frameStep : (LONGLONG)floor(time + 0.5);
+ usedVideoSampleTime = (useGrabbing) ? 0 : (LONGLONG)floor(time + 0.5);
+ nFrame = (useGrabbing) ? 0 : usedVideoSampleTime/frameStep;
+ givenAudioTime = (useGrabbing) ? 0 : nFrame*frameStep;
var.vt = VT_I8;
- var.hVal.QuadPart = sampleTime;
+ var.hVal.QuadPart = usedVideoSampleTime;
bool resOK = SUCCEEDED(videoFileSource->SetCurrentPosition(GUID_NULL, var));
PropVariantClear(&var);
if (resOK && useGrabbing)
{
LONGLONG timeborder = (LONGLONG)floor(time + 0.5) - frameStep / 2;
- do { resOK = grabFrame(); videoSample.Release(); } while (resOK && sampleTime < timeborder);
+ do { resOK = grabFrame(); usedVideoSample.Release(); } while (resOK && usedVideoSampleTime < timeborder);
}
return resOK;
}
return false;
}
+bool CvCapture_MSMF::setTime(int numberFrame)
+{
+ if (videoStream == -1)
+ return false;
+ if (videoStream != -1 && audioStream != -1)
+ if (numberFrame != 0)
+ return false;
+ PROPVARIANT var;
+ if (SUCCEEDED(videoFileSource->GetPresentationAttribute((DWORD)MF_SOURCE_READER_MEDIASOURCE, MF_SOURCE_READER_MEDIASOURCE_CHARACTERISTICS, &var)) &&
+ var.vt == VT_UI4 && var.ulVal & MFMEDIASOURCE_CAN_SEEK)
+ {
+ usedVideoSample.Release();
+ PropVariantClear(&var);
+ usedVideoSampleTime = 0;
+ nFrame = 0;
+ givenAudioTime = 0;
+ var.vt = VT_I8;
+ var.hVal.QuadPart = usedVideoSampleTime;
+ bool resOK = SUCCEEDED(videoFileSource->SetCurrentPosition(GUID_NULL, var));
+ PropVariantClear(&var);
+ while (resOK && nFrame < numberFrame) { resOK = grabFrame(); usedVideoSample.Release(); };
+ return resOK;
+ }
+ return false;
+}
+
template <typename CtrlT>
bool CvCapture_MSMF::readComplexPropery(long prop, long & val) const
{
case CV_CAP_PROP_CONVERT_RGB:
return convertFormat ? 1 : 0;
case CV_CAP_PROP_SAR_NUM:
- return captureFormat.aspectRatioNum;
+ return captureVideoFormat.aspectRatioNum;
case CV_CAP_PROP_SAR_DEN:
- return captureFormat.aspectRatioDenom;
+ return captureVideoFormat.aspectRatioDenom;
case CV_CAP_PROP_FRAME_WIDTH:
- return captureFormat.width;
+ return captureVideoFormat.width;
case CV_CAP_PROP_FRAME_HEIGHT:
- return captureFormat.height;
+ return captureVideoFormat.height;
case CV_CAP_PROP_FOURCC:
- return captureFormat.subType.Data1;
+ return captureVideoFormat.subType.Data1;
case CV_CAP_PROP_FPS:
- return captureFormat.getFramerate();
+ return captureVideoFormat.getFramerate();
case CV_CAP_PROP_FRAME_COUNT:
if (duration != 0)
- return floor(((double)duration / 1e7)* captureFormat.getFramerate() + 0.5);
+ return floor(((double)duration / 1e7)* captureVideoFormat.getFramerate() + 0.5);
else
break;
case CV_CAP_PROP_POS_FRAMES:
- return floor(((double)sampleTime / 1e7)* captureFormat.getFramerate() + 0.5);
+ return (double)nFrame;
case CV_CAP_PROP_POS_MSEC:
- return (double)sampleTime / 1e4;
+ return (double)usedVideoSampleTime / 1e4;
+ case CAP_PROP_AUDIO_POS:
+ return (double)audioSamplePos;
case CV_CAP_PROP_POS_AVI_RATIO:
if (duration != 0)
- return (double)sampleTime / duration;
+ return (double)usedVideoSampleTime / duration;
else
break;
case CV_CAP_PROP_BRIGHTNESS:
case CV_CAP_PROP_ISO_SPEED:
case CV_CAP_PROP_SETTINGS:
case CV_CAP_PROP_BUFFERSIZE:
+ case CAP_PROP_AUDIO_BASE_INDEX:
+ return audioBaseIndex;
+ case CAP_PROP_AUDIO_TOTAL_STREAMS:
+ return numberOfAudioStreams;
+ case CAP_PROP_AUDIO_TOTAL_CHANNELS:
+ return captureAudioFormat.nChannels;
+ case CAP_PROP_AUDIO_SAMPLES_PER_SECOND:
+ return captureAudioFormat.nSamplesPerSec;
+ case CAP_PROP_AUDIO_DATA_DEPTH:
+ return outputAudioFormat;
+ case CAP_PROP_AUDIO_SHIFT_NSEC:
+ return (double)(audioStartOffset - videoStartOffset)*1e2;
default:
break;
}
bool CvCapture_MSMF::setProperty( int property_id, double value )
{
- MediaType newFormat = captureFormat;
+ MediaType newFormat = captureVideoFormat;
if (isOpen)
switch (property_id)
{
return false;
}
case CV_CAP_PROP_FOURCC:
- return configureOutput(newFormat, (int)cvRound(value));
+ return configureVideoOutput(newFormat, (int)cvRound(value));
case CV_CAP_PROP_FORMAT:
- return configureOutput(newFormat, (int)cvRound(value));
+ return configureVideoOutput(newFormat, (int)cvRound(value));
case CV_CAP_PROP_CONVERT_RGB:
convertFormat = (value != 0);
- return configureOutput(newFormat, outputFormat);
+ return configureVideoOutput(newFormat, outputVideoFormat);
case CV_CAP_PROP_SAR_NUM:
if (value > 0)
{
newFormat.aspectRatioNum = (UINT32)cvRound(value);
- return configureOutput(newFormat, outputFormat);
+ return configureVideoOutput(newFormat, outputVideoFormat);
}
break;
case CV_CAP_PROP_SAR_DEN:
if (value > 0)
{
newFormat.aspectRatioDenom = (UINT32)cvRound(value);
- return configureOutput(newFormat, outputFormat);
+ return configureVideoOutput(newFormat, outputVideoFormat);
}
break;
case CV_CAP_PROP_FRAME_WIDTH:
if (value >= 0)
{
newFormat.width = (UINT32)cvRound(value);
- return configureOutput(newFormat, outputFormat);
+ return configureVideoOutput(newFormat, outputVideoFormat);
}
break;
case CV_CAP_PROP_FRAME_HEIGHT:
if (value >= 0)
{
newFormat.height = (UINT32)cvRound(value);
- return configureOutput(newFormat, outputFormat);
+ return configureVideoOutput(newFormat, outputVideoFormat);
}
break;
case CV_CAP_PROP_FPS:
if (value >= 0)
{
newFormat.setFramerate(value);
- return configureOutput(newFormat, outputFormat);
+ return configureVideoOutput(newFormat, outputVideoFormat);
}
break;
case CV_CAP_PROP_FRAME_COUNT:
return setTime(duration * value, true);
break;
case CV_CAP_PROP_POS_FRAMES:
- if (std::fabs(captureFormat.getFramerate()) > 0)
- return setTime(value * 1e7 / captureFormat.getFramerate(), false);
+ if (std::fabs(captureVideoFormat.getFramerate()) > 0)
+ return setTime((int)value);
break;
case CV_CAP_PROP_POS_MSEC:
return setTime(value * 1e4, false);
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+//file name, number of audio channels, epsilon, video type, weight, height, number of frame, number of audio samples, fps, psnr Threshold, backend
+typedef std::tuple<std::string, int, double, int, int, int, int, int, int, double, VideoCaptureAPIs> paramCombination;
+//file name, number of audio channels, number of audio samples, epsilon, backend
+typedef std::tuple<std::string, int, int, double, VideoCaptureAPIs> param;
+
+class AudioBaseTest
+{
+protected:
+ AudioBaseTest(){};
+ void getValidAudioData()
+ {
+ const double step = 3.14/22050;
+ double value = 0;
+ validAudioData.resize(expectedNumAudioCh);
+ for (int nCh = 0; nCh < expectedNumAudioCh; nCh++)
+ {
+ value = 0;
+ for(unsigned int i = 0; i < numberOfSamples; i++)
+ {
+ if (i != 0 && i % 44100 == 0)
+ value = 0;
+ validAudioData[nCh].push_back(sin(value));
+ value += step;
+ }
+ }
+ }
+ void checkAudio()
+ {
+ getValidAudioData();
+
+ ASSERT_EQ(expectedNumAudioCh, (int)audioData.size());
+ for (unsigned int nCh = 0; nCh < audioData.size(); nCh++)
+ {
+ ASSERT_EQ(numberOfSamples, audioData[nCh].size()) << "nCh=" << nCh;
+ for (unsigned int i = 0; i < numberOfSamples; i++)
+ {
+ EXPECT_NEAR(validAudioData[nCh][i], audioData[nCh][i], epsilon) << "sample index=" << i << " nCh=" << nCh;
+ }
+ }
+ }
+protected:
+ int expectedNumAudioCh;
+ unsigned int numberOfSamples;
+ double epsilon;
+ VideoCaptureAPIs backend;
+ std::string root;
+ std::string fileName;
+
+ std::vector<std::vector<double>> validAudioData;
+ std::vector<std::vector<double>> audioData;
+ std::vector<int> params;
+
+ Mat audioFrame;
+ VideoCapture cap;
+};
+
+class AudioTestFixture : public AudioBaseTest, public testing::TestWithParam <param>
+{
+public:
+ AudioTestFixture()
+ {
+ fileName = get<0>(GetParam());
+ expectedNumAudioCh = get<1>(GetParam());
+ numberOfSamples = get<2>(GetParam());
+ epsilon = get<3>(GetParam());
+ backend = get<4>(GetParam());
+ root = "audio/";
+ params = { CAP_PROP_AUDIO_STREAM, 0,
+ CAP_PROP_VIDEO_STREAM, -1,
+ CAP_PROP_AUDIO_DATA_DEPTH, CV_16S };
+ }
+
+ void doTest()
+ {
+ ASSERT_TRUE(cap.open(findDataFile(root + fileName), backend, params));
+ const int audioBaseIndex = static_cast<int>(cap.get(cv::CAP_PROP_AUDIO_BASE_INDEX));
+ const int numberOfChannels = (int)cap.get(CAP_PROP_AUDIO_TOTAL_CHANNELS);
+ ASSERT_EQ(expectedNumAudioCh, numberOfChannels);
+ double f = 0;
+ audioData.resize(numberOfChannels);
+ for (;;)
+ {
+ if (cap.grab())
+ {
+ for (int nCh = 0; nCh < numberOfChannels; nCh++)
+ {
+ ASSERT_TRUE(cap.retrieve(audioFrame, audioBaseIndex));
+ ASSERT_EQ(CV_16SC1, audioFrame.type()) << audioData[nCh].size();
+ for (int i = 0; i < audioFrame.cols; i++)
+ {
+ f = ((double) audioFrame.at<signed short>(0,i)) / (double) 32768;
+ audioData[nCh].push_back(f);
+ }
+ }
+ }
+ else { break; }
+ }
+ ASSERT_FALSE(audioData.empty());
+
+ checkAudio();
+ }
+};
+
+const param audioParams[] =
+{
+ param("test_audio.wav", 1, 132300, 0.0001, cv::CAP_MSMF),
+ param("test_mono_audio.mp3", 1, 133104, 0.12, cv::CAP_MSMF),
+ param("test_stereo_audio.mp3", 2, 133104, 0.12, cv::CAP_MSMF),
+ param("test_audio.mp4", 1, 133104, 0.15, cv::CAP_MSMF)
+};
+
+class Audio : public AudioTestFixture{};
+
+TEST_P(Audio, audio)
+{
+ if (!videoio_registry::hasBackend(cv::VideoCaptureAPIs(backend)))
+ throw SkipTestException(cv::videoio_registry::getBackendName(backend) + " backend was not found");
+
+ doTest();
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Audio, testing::ValuesIn(audioParams));
+
+class MediaTestFixture : public AudioBaseTest, public testing::TestWithParam <paramCombination>
+{
+public:
+ MediaTestFixture():
+ videoType(get<3>(GetParam())),
+ height(get<4>(GetParam())),
+ width(get<5>(GetParam())),
+ numberOfFrames(get<6>(GetParam())),
+ fps(get<8>(GetParam())),
+ psnrThreshold(get<9>(GetParam()))
+ {
+ fileName = get<0>(GetParam());
+ expectedNumAudioCh = get<1>(GetParam());
+ numberOfSamples = get<7>(GetParam());
+ epsilon = get<2>(GetParam());
+ backend = get<10>(GetParam());
+ root = "audio/";
+ params = { CAP_PROP_AUDIO_STREAM, 0,
+ CAP_PROP_VIDEO_STREAM, 0,
+ CAP_PROP_AUDIO_DATA_DEPTH, CV_16S };
+ };
+
+ void doTest()
+ {
+ ASSERT_TRUE(cap.open(findDataFile(root + fileName), backend, params));
+
+ const int audioBaseIndex = static_cast<int>(cap.get(cv::CAP_PROP_AUDIO_BASE_INDEX));
+ const int numberOfChannels = (int)cap.get(CAP_PROP_AUDIO_TOTAL_CHANNELS);
+ ASSERT_EQ(expectedNumAudioCh, numberOfChannels);
+
+ const int samplePerSecond = (int)cap.get(CAP_PROP_AUDIO_SAMPLES_PER_SECOND);
+ ASSERT_EQ(44100, samplePerSecond);
+ int samplesPerFrame = (int)(1./fps*samplePerSecond);
+ int audioSamplesTolerance = samplesPerFrame / 2;
+
+ double audio0_timestamp = 0;
+
+ Mat videoFrame;
+ Mat img(height, width, videoType);
+ audioData.resize(numberOfChannels);
+ for (int frame = 0; frame < numberOfFrames; frame++)
+ {
+ SCOPED_TRACE(cv::format("frame=%d", frame));
+
+ ASSERT_TRUE(cap.grab());
+
+ if (frame == 0)
+ {
+ double audio_shift = cap.get(CAP_PROP_AUDIO_SHIFT_NSEC);
+ double video0_timestamp = cap.get(CAP_PROP_POS_MSEC) * 1e-3;
+ audio0_timestamp = video0_timestamp + audio_shift * 1e-9;
+ std::cout << "video0 timestamp: " << video0_timestamp << " audio0 timestamp: " << audio0_timestamp << " (audio shift nanoseconds: " << audio_shift << " , seconds: " << audio_shift * 1e-9 << ")" << std::endl;
+ }
+
+ ASSERT_TRUE(cap.retrieve(videoFrame));
+ if (epsilon >= 0)
+ {
+ generateFrame(frame, numberOfFrames, img);
+ ASSERT_EQ(img.size, videoFrame.size);
+ double psnr = cvtest::PSNR(img, videoFrame);
+ EXPECT_GE(psnr, psnrThreshold);
+ }
+
+ int audioFrameCols = 0;
+ for (int nCh = 0; nCh < numberOfChannels; nCh++)
+ {
+ ASSERT_TRUE(cap.retrieve(audioFrame, audioBaseIndex+nCh));
+ if (audioFrame.empty())
+ continue;
+ ASSERT_EQ(CV_16SC1, audioFrame.type());
+ if (nCh == 0)
+ audioFrameCols = audioFrame.cols;
+ else
+ ASSERT_EQ(audioFrameCols, audioFrame.cols) << "channel "<< nCh;
+ for (int i = 0; i < audioFrame.cols; i++)
+ {
+ double f = audioFrame.at<signed short>(0,i) / 32768.0;
+ audioData[nCh].push_back(f);
+ }
+ }
+
+ if (frame < 5 || frame >= numberOfFrames-5)
+ std::cout << "frame=" << frame << ": audioFrameSize=" << audioFrameCols << " videoTimestamp=" << cap.get(CAP_PROP_POS_MSEC) << " ms" << std::endl;
+ else if (frame == 6)
+ std::cout << "frame..." << std::endl;
+
+ if (audioFrameCols == 0)
+ continue;
+ if (frame != 0 && frame != numberOfFrames-1)
+ {
+ // validate audio position
+ EXPECT_NEAR(
+ cap.get(CAP_PROP_AUDIO_POS) / samplePerSecond + audio0_timestamp,
+ cap.get(CAP_PROP_POS_MSEC) * 1e-3,
+ (1.0 / fps) * 0.3)
+ << "CAP_PROP_AUDIO_POS=" << cap.get(CAP_PROP_AUDIO_POS) << " CAP_PROP_POS_MSEC=" << cap.get(CAP_PROP_POS_MSEC);
+ }
+ if (frame != 0 && frame != numberOfFrames-1 && audioData[0].size() != (size_t)numberOfSamples)
+ {
+ // validate audio frame size
+ EXPECT_NEAR(audioFrame.cols, samplesPerFrame, audioSamplesTolerance);
+ }
+ }
+ ASSERT_FALSE(cap.grab());
+ ASSERT_FALSE(audioData.empty());
+
+ std::cout << "Total audio samples=" << audioData[0].size() << std::endl;
+
+ if (epsilon >= 0)
+ checkAudio();
+ }
+protected:
+ const int videoType;
+ const int height;
+ const int width;
+ const int numberOfFrames;
+ const int fps;
+ const double psnrThreshold;
+};
+
+const paramCombination mediaParams[] =
+{
+ paramCombination("test_audio.mp4", 1, 0.15, CV_8UC3, 240, 320, 90, 131819, 30, 30., cv::CAP_MSMF)
+#if 0
+ // https://filesamples.com/samples/video/mp4/sample_960x400_ocean_with_audio.mp4
+ , paramCombination("sample_960x400_ocean_with_audio.mp4", 2, -1/*eplsilon*/, CV_8UC3, 400, 960, 1116, 2056588, 30, 30., cv::CAP_MSMF)
+#endif
+};
+
+class Media : public MediaTestFixture{};
+
+TEST_P(Media, audio)
+{
+ if (!videoio_registry::hasBackend(cv::VideoCaptureAPIs(backend)))
+ throw SkipTestException(cv::videoio_registry::getBackendName(backend) + " backend was not found");
+
+ doTest();
+}
+
+INSTANTIATE_TEST_CASE_P(/**/, Media, testing::ValuesIn(mediaParams));
+
+}} //namespace