diff --git a/include/ccap_def.h b/include/ccap_def.h index 107cad3..8f0288d 100644 --- a/include/ccap_def.h +++ b/include/ccap_def.h @@ -35,7 +35,7 @@ namespace ccap { enum PixelFormatConstants : uint32_t { /// `kPixelFormatRGBBit` indicates that the pixel format is RGB or RGBA. kPixelFormatRGBBit = 1 << 3, - /// `kPixelFormatRGBBit` indicates that the pixel format is BGR or BGRA. + /// `kPixelFormatBGRBit` indicates that the pixel format is BGR or BGRA. kPixelFormatBGRBit = 1 << 4, /// Color Bit Mask @@ -82,7 +82,6 @@ enum class PixelFormat : uint32_t { * In software design, you can implement a toggle option to allow users to choose whether * the received Frame is FullRange or VideoRange based on what they observe. * @note This format is also known by other names, such as YUV420P or IYUV. - * @refitem #NV12 */ I420 = 1 << 2 | kPixelFormatYUVColorBit, @@ -191,10 +190,14 @@ enum class PropertyName { /** * @brief The output pixel format of ccap. Can be different from PixelFormatInternal. - * @note If PixelFormatInternal is RGB(A), PixelFormatOutput cannot be set to a YUV format. + * @note If PixelFormatInternal is RGB(A), PixelFormatOutput cannot be set to a YUV format (RGB->YUV conversion is not supported). + * If PixelFormatInternal is YUV and PixelFormatOutput is a different YUV subtype, conversion requires libyuv; + * without it the frame will keep the camera format and no conversion is performed. * If PixelFormatInternal is YUV and PixelFormatOutput is RGB(A), BT.601 will be used for conversion. - * For other cases, there are no issues. - * If PixelFormatInternal and PixelFormatOutput are the same format, data conversion will be skipped and the original data will be used directly. + * If PixelFormatOutput is set to PixelFormat::Unknown (or not set), the camera's native format is used as-is + * and no conversion is performed. + * If PixelFormatInternal and PixelFormatOutput are the same format AND the camera natively supports + * PixelFormatInternal, data conversion will be skipped and the original data will be used directly. * In general, setting both PixelFormatInternal and PixelFormatOutput to YUV formats can achieve better performance. */ PixelFormatOutput = 0x30002, diff --git a/src/ccap_convert_frame.cpp b/src/ccap_convert_frame.cpp index c9d9f78..73c8a33 100644 --- a/src/ccap_convert_frame.cpp +++ b/src/ccap_convert_frame.cpp @@ -10,6 +10,7 @@ #include "ccap_convert.h" #include "ccap_imp.h" +#include "ccap_utils.h" #include #include @@ -229,8 +230,23 @@ inline bool inplaceConvertFrameImp(VideoFrame* frame, PixelFormat toFormat, bool return inplaceConvertFrameYUV2YUV(frame, toFormat, verticalFlip); #endif + if (isInputYUV && isOutputYUV) { + static bool sLoggedYuv2YuvUnsupported = false; + if (!sLoggedYuv2YuvUnsupported) { + CCAP_LOG_W("ccap: YUV to different YUV subtype conversion is not supported without libyuv, skipping conversion\n"); + sLoggedYuv2YuvUnsupported = true; + } + return false; + } + if (isInputYUV) // yuv -> BGR return inplaceConvertFrameYUV2RGBColor(frame, toFormat, verticalFlip); + + static bool sLoggedRgbToYuvUnsupported = false; + if (!sLoggedRgbToYuvUnsupported) { + CCAP_LOG_W("ccap: RGB to YUV conversion is not supported, skipping conversion\n"); + sLoggedRgbToYuvUnsupported = true; + } return false; // no rgb -> yuv } diff --git a/src/ccap_file_reader_apple.mm b/src/ccap_file_reader_apple.mm index 6e03f34..4266492 100644 --- a/src/ccap_file_reader_apple.mm +++ b/src/ccap_file_reader_apple.mm @@ -382,10 +382,11 @@ - (void)processFrame:(CMSampleBufferRef)sampleBuffer { // Check if conversion or flip is needed auto& prop = _provider->getFrameProperty(); - bool isOutputYUV = (newFrame->pixelFormat & kPixelFormatYUVColorBit) != 0; + PixelFormat effectiveOutputFormat = (prop.outputPixelFormat == PixelFormat::Unknown) ? newFrame->pixelFormat : prop.outputPixelFormat; + bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit) != 0; FrameOrientation targetOrientation = isOutputYUV ? FrameOrientation::TopToBottom : _provider->frameOrientation(); bool shouldFlip = !isOutputYUV && (inputOrientation != targetOrientation); - bool shouldConvert = newFrame->pixelFormat != prop.outputPixelFormat; + bool shouldConvert = newFrame->pixelFormat != effectiveOutputFormat; newFrame->orientation = targetOrientation; @@ -397,8 +398,11 @@ - (void)processFrame:(CMSampleBufferRef)sampleBuffer { newFrame->allocator = f ? f() : std::make_shared(); } - zeroCopy = !inplaceConvertFrame(newFrame.get(), prop.outputPixelFormat, shouldFlip); - CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly); + zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip); + if (!zeroCopy) { + CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly); + newFrame->nativeHandle = nullptr; + } } if (zeroCopy) { diff --git a/src/ccap_file_reader_windows.cpp b/src/ccap_file_reader_windows.cpp index 7424d82..78c28a5 100644 --- a/src/ccap_file_reader_windows.cpp +++ b/src/ccap_file_reader_windows.cpp @@ -448,10 +448,11 @@ void FileReaderWindows::readLoop() { // Check if conversion or flip is needed auto& prop = m_provider->getFrameProperty(); - bool isOutputYUV = (prop.outputPixelFormat & kPixelFormatYUVColorBit) != 0; + PixelFormat effectiveOutputFormat = (prop.outputPixelFormat == PixelFormat::Unknown) ? newFrame->pixelFormat : prop.outputPixelFormat; + bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit) != 0; FrameOrientation targetOrientation = isOutputYUV ? FrameOrientation::TopToBottom : m_provider->frameOrientation(); bool shouldFlip = !isOutputYUV && (inputOrientation != targetOrientation); - bool shouldConvert = newFrame->pixelFormat != prop.outputPixelFormat; + bool shouldConvert = newFrame->pixelFormat != effectiveOutputFormat; newFrame->orientation = targetOrientation; @@ -462,7 +463,7 @@ void FileReaderWindows::readLoop() { auto&& f = m_provider->getAllocatorFactory(); newFrame->allocator = f ? f() : std::make_shared(); } - inplaceConvertFrame(newFrame.get(), prop.outputPixelFormat, shouldFlip); + zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip); } newFrame->frameIndex = m_currentFrameIndex; diff --git a/src/ccap_imp_apple.mm b/src/ccap_imp_apple.mm index a77dc8d..e0efdaa 100644 --- a/src/ccap_imp_apple.mm +++ b/src/ccap_imp_apple.mm @@ -873,6 +873,9 @@ - (void)captureOutput:(AVCaptureOutput*)output CMTime timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer); auto internalFormat = _provider->getFrameProperty().cameraPixelFormat; auto outputFormat = _provider->getFrameProperty().outputPixelFormat; + if (outputFormat == PixelFormat::Unknown) { + outputFormat = internalFormat; + } newFrame->timestamp = (uint64_t)(CMTimeGetSeconds(timestamp) * 1e9); newFrame->width = (uint32_t)CVPixelBufferGetWidth(imageBuffer); @@ -905,6 +908,8 @@ - (void)captureOutput:(AVCaptureOutput*)output } /// iOS/macOS does not support i420, and we do not intend to support nv12 to i420 conversion here. + /// When both internal and output formats are YUV, zeroCopy is used regardless of subtype differences + /// (e.g., NV12 vs I420). The frame will carry the actual camera format, not the requested output format. bool zeroCopy = ((internalFormat & kPixelFormatYUVColorBit) && (outputFormat & kPixelFormatYUVColorBit)) || (internalFormat == outputFormat && _provider->frameOrientation() == kDefaultFrameOrientation); @@ -924,7 +929,10 @@ - (void)captureOutput:(AVCaptureOutput*)output zeroCopy = !inplaceConvertFrame(newFrame.get(), outputFormat, (int)(newFrame->orientation != kDefaultFrameOrientation)); - CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly); + if (!zeroCopy) { + CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly); + newFrame->nativeHandle = nullptr; + } if (verboseLogEnabled()) { #ifdef DEBUG diff --git a/src/ccap_imp_linux.cpp b/src/ccap_imp_linux.cpp index 96713b8..4727b81 100644 --- a/src/ccap_imp_linux.cpp +++ b/src/ccap_imp_linux.cpp @@ -549,7 +549,8 @@ bool ProviderV4L2::readFrame() { // Check input/output format types and orientations bool isInputYUV = (frame->pixelFormat & kPixelFormatYUVColorBit) != 0; - bool isOutputYUV = (m_frameProp.outputPixelFormat & kPixelFormatYUVColorBit) != 0; + PixelFormat effectiveOutputFormat = (m_frameProp.outputPixelFormat == PixelFormat::Unknown) ? frame->pixelFormat : m_frameProp.outputPixelFormat; + bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit) != 0; auto inputOrientation = FrameOrientation::TopToBottom; // V4L2 always provides TopToBottom // Set output orientation based on format type @@ -557,8 +558,7 @@ bool ProviderV4L2::readFrame() { // Check if we need conversion or flipping bool shouldFlip = frame->orientation != inputOrientation && !isOutputYUV; - bool shouldConvert = (m_frameProp.outputPixelFormat != PixelFormat::Unknown && - m_frameProp.outputPixelFormat != frame->pixelFormat); + bool shouldConvert = (effectiveOutputFormat != frame->pixelFormat); bool zeroCopy = !shouldConvert && !shouldFlip; uint8_t* bufferData = static_cast(m_buffers[buf.index].start); @@ -614,7 +614,7 @@ bool ProviderV4L2::readFrame() { std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now(); - zeroCopy = !inplaceConvertFrame(frame.get(), m_frameProp.outputPixelFormat, shouldFlip); + zeroCopy = !inplaceConvertFrame(frame.get(), effectiveOutputFormat, shouldFlip); double durInMs = (std::chrono::steady_clock::now() - startTime).count() / 1.e6; static double s_allCostTime = 0; @@ -630,10 +630,10 @@ bool ProviderV4L2::readFrame() { CCAP_LOG_V( "ccap: inplaceConvertFrame requested pixel format: %s, actual pixel format: %s, flip: %s, cost time %s: (cur %g ms, avg %g ms)\n", - pixelFormatToString(m_frameProp.outputPixelFormat).data(), pixelFormatToString(m_frameProp.cameraPixelFormat).data(), + pixelFormatToString(effectiveOutputFormat).data(), pixelFormatToString(m_frameProp.cameraPixelFormat).data(), shouldFlip ? "YES" : "NO", mode, durInMs, s_allCostTime / s_frames); } else { - zeroCopy = !inplaceConvertFrame(frame.get(), m_frameProp.outputPixelFormat, shouldFlip); + zeroCopy = !inplaceConvertFrame(frame.get(), effectiveOutputFormat, shouldFlip); } } diff --git a/src/ccap_imp_linux.h b/src/ccap_imp_linux.h index ec548de..8b4cb9f 100644 --- a/src/ccap_imp_linux.h +++ b/src/ccap_imp_linux.h @@ -101,12 +101,12 @@ class ProviderV4L2 : public ProviderImp { bool m_isStreaming = false; // V4L2 device capabilities - struct v4l2_capability m_caps{}; + struct v4l2_capability m_caps {}; std::vector m_supportedFormats; std::vector m_supportedResolutions; // Current format - struct v4l2_format m_currentFormat{}; + struct v4l2_format m_currentFormat {}; // Buffer management std::vector m_buffers; diff --git a/src/ccap_imp_windows.cpp b/src/ccap_imp_windows.cpp index 07e23b1..ab4efe7 100644 --- a/src/ccap_imp_windows.cpp +++ b/src/ccap_imp_windows.cpp @@ -843,16 +843,17 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::SampleCB(double sampleTime, IMedia uint32_t bufferLen = mediaSample->GetActualDataLength(); bool isInputYUV = (m_frameProp.cameraPixelFormat & kPixelFormatYUVColorBit); - bool isOutputYUV = (m_frameProp.outputPixelFormat & kPixelFormatYUVColorBit); + PixelFormat effectiveOutputFormat = (m_frameProp.outputPixelFormat == PixelFormat::Unknown) ? m_frameProp.cameraPixelFormat : m_frameProp.outputPixelFormat; + bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit); newFrame->pixelFormat = m_frameProp.cameraPixelFormat; newFrame->width = m_frameProp.width; newFrame->height = m_frameProp.height; newFrame->orientation = isOutputYUV ? FrameOrientation::TopToBottom : m_frameOrientation; - newFrame->nativeHandle = mediaSample; + newFrame->nativeHandle = nullptr; bool shouldFlip = newFrame->orientation != m_inputOrientation && !isOutputYUV; - bool shouldConvert = m_frameProp.cameraPixelFormat != m_frameProp.outputPixelFormat; + bool shouldConvert = m_frameProp.cameraPixelFormat != effectiveOutputFormat; bool zeroCopy = !shouldConvert && !shouldFlip; if (isInputYUV) { @@ -920,7 +921,7 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::SampleCB(double sampleTime, IMedia std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now(); - zeroCopy = !inplaceConvertFrame(newFrame.get(), m_frameProp.outputPixelFormat, shouldFlip); + zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip); double durInMs = (std::chrono::steady_clock::now() - startTime).count() / 1.e6; static double s_allCostTime = 0; @@ -936,10 +937,10 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::SampleCB(double sampleTime, IMedia CCAP_LOG_V( "ccap: inplaceConvertFrame requested pixel format: %s, actual pixel format: %s, flip: %s, cost time %s: (cur %g ms, avg %g ms)\n", - pixelFormatToString(m_frameProp.outputPixelFormat).data(), pixelFormatToString(m_frameProp.cameraPixelFormat).data(), + pixelFormatToString(effectiveOutputFormat).data(), pixelFormatToString(m_frameProp.cameraPixelFormat).data(), shouldFlip ? "YES" : "NO", mode, durInMs, s_allCostTime / s_frames); } else { - zeroCopy = !inplaceConvertFrame(newFrame.get(), m_frameProp.outputPixelFormat, shouldFlip); + zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip); } newFrame->sizeInBytes = newFrame->stride[0] * newFrame->height + (newFrame->stride[1] + newFrame->stride[2]) * newFrame->height / 2; @@ -949,6 +950,7 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::SampleCB(double sampleTime, IMedia // Conversion may fail. If conversion fails, fall back to zero-copy mode. // In this case, the returned format is the original camera input format. newFrame->sizeInBytes = bufferLen; + newFrame->nativeHandle = mediaSample; mediaSample->AddRef(); // Ensure data lifecycle auto manager = std::make_shared([newFrame, mediaSample]() mutable { @@ -1001,7 +1003,7 @@ HRESULT STDMETHODCALLTYPE ProviderDirectShow::BufferCB(double SampleTime, BYTE* return S_OK; } -HRESULT STDMETHODCALLTYPE ProviderDirectShow::QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR * __RPC_FAR * ppvObject) { +HRESULT STDMETHODCALLTYPE ProviderDirectShow::QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject) { static constexpr const IID IID_ISampleGrabberCB = { 0x0579154A, 0x2B53, 0x4994, { 0xB0, 0xD0, 0xE7, 0x73, 0x14, 0x8E, 0xFF, 0x85 } }; if (riid == IID_IUnknown) { @@ -1166,7 +1168,7 @@ void ProviderDirectShow::close() { bool ProviderDirectShow::start() { if (!m_isOpened) return false; - // File mode + // File mode #ifdef CCAP_ENABLE_FILE_PLAYBACK if (m_isFileMode && m_fileReader) { return m_fileReader->start(); diff --git a/src/ccap_imp_windows.h b/src/ccap_imp_windows.h index fe45ab6..6e4dd02 100644 --- a/src/ccap_imp_windows.h +++ b/src/ccap_imp_windows.h @@ -93,7 +93,7 @@ class ProviderDirectShow : public ProviderImp, public ISampleGrabberCB { inline FrameOrientation frameOrientation() const { return m_frameOrientation; } private: - HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR * __RPC_FAR * ppvObject) override; + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject) override; ULONG STDMETHODCALLTYPE AddRef(void) override; ULONG STDMETHODCALLTYPE Release(void) override; diff --git a/src/ccap_imp_windows_msmf.cpp b/src/ccap_imp_windows_msmf.cpp index cec5ab8..e20abdf 100644 --- a/src/ccap_imp_windows_msmf.cpp +++ b/src/ccap_imp_windows_msmf.cpp @@ -747,7 +747,8 @@ void ProviderMSMF::readLoop() { newFrame->height = m_activeHeight; newFrame->nativeHandle = nullptr; - bool isOutputYUV = (m_frameProp.outputPixelFormat & kPixelFormatYUVColorBit) != 0; + PixelFormat effectiveOutputFormat = (m_frameProp.outputPixelFormat == PixelFormat::Unknown) ? m_activePixelFormat : m_frameProp.outputPixelFormat; + bool isOutputYUV = (effectiveOutputFormat & kPixelFormatYUVColorBit) != 0; FrameOrientation targetOrientation = isOutputYUV ? FrameOrientation::TopToBottom : m_frameOrientation; newFrame->orientation = targetOrientation; @@ -801,7 +802,7 @@ void ProviderMSMF::readLoop() { } bool shouldFlip = !isOutputYUV && targetOrientation != m_inputOrientation; - bool shouldConvert = newFrame->pixelFormat != m_frameProp.outputPixelFormat; + bool shouldConvert = newFrame->pixelFormat != effectiveOutputFormat; bool zeroCopy = !shouldConvert && !shouldFlip; if (!zeroCopy) { @@ -809,7 +810,7 @@ void ProviderMSMF::readLoop() { newFrame->allocator = m_allocatorFactory ? m_allocatorFactory() : std::make_shared(); } - zeroCopy = !inplaceConvertFrame(newFrame.get(), m_frameProp.outputPixelFormat, shouldFlip); + zeroCopy = !inplaceConvertFrame(newFrame.get(), effectiveOutputFormat, shouldFlip); newFrame->sizeInBytes = newFrame->stride[0] * newFrame->height + (newFrame->stride[1] + newFrame->stride[2]) * newFrame->height / 2; }