nividia
NIVIDIA 硬解码学习2
做项目学习笔记
引言
在NIVIDIA 硬解码学习1中大概了解了硬解码的几个组成部分。
下载了最新的【Video_Codec_SDK_8.2.16】进行了简答的学习。
SDK Samples
最简单的硬解码器实现 APPDec
int main(int argc, char **argv)
{
char szInFilePath[256] = "", szOutFilePath[256] = "";
bool bOutPlanar = false;
int iGpu = 0;
Rect cropRect = {};
Dim resizeDim = {};
try
{
// 按命令行参数读取输入文件等,例如 test.h265
ParsecommandLine(argc, argv, szInFilePath, szOutFilePath, bOutPlanar, iGpu, cropRect, resizeDim);
CheckInputFile(szInFilePath);
if (!*szOutFilePath) {
sprintf(szOutFilePath, bOutPlanar ? "out.planar" : "out.native");
}
// 初始化cuda环境
ck(cuInit(0));
int nGpu = 0;
ck(cuDeviceGetCount(&nGpu));
if (iGpu < 0 || iGpu >= nGpu) {
std::cout << "GPU ordinal out of range. Should be within [" << 0 << ", " << nGpu - 1 << "]" << std::endl;
return 1;
}
CUdevice cuDevice = 0;
ck(cuDeviceGet(&cuDevice, iGpu));
char szDeviceName[80];
ck(cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice));
std::cout << "GPU in use: " << szDeviceName << std::endl;
CUcontext cuContext = NULL;
// 设置CUDA上下文!!!
ck(cuCtxCreate(&cuContext, 0, cuDevice));
std::cout << "Decode with demuxing." << std::endl;
/// 进行解码(下面讲解)
DecodeMediaFile(cuContext, szInFilePath, szOutFilePath, bOutPlanar, cropRect, resizeDim);
}
catch (const std::exception& ex)
{
std::cout << ex.what();
exit(1);
}
return 0;
}
- 具体硬解码流程
DecodeMediaFile
函数
void DecodeMediaFile(CUcontext cuContext, const char *szInFilePath, const char *szOutFilePath, bool bOutPlanar,const Rect &cropRect, const Dim &resizeDim)
{
// 输出
std::ofstream fpOut(szOutFilePath, std::iOS::out | std::ios::binary);
if (!fpOut)
{
std::ostringstream err;
err << "Unable to open output file: " << szOutFilePath << std::endl;
throw std::invalid_argument(err.str());
}
// 解析输入的文件,FFmpegDemuxer是对FFmpeg封装的一个解析文件的类
FFmpegDemuxer demuxer(szInFilePath);
// 创建硬解码器 设置了三个重要的回调函数
NvDecoder dec(cuContext/*CUDA上下文*/, demuxer.GetWidth(), demuxer.GetHeight(), false, FFmpeg2NvCodecId(demuxer.GetVideoCodec())/*获得对应解码器名称*/, NULL, false, false, &cropRect, &resizeDim);
int nVideoBytes = 0, nFrameReturned = 0, nFrame = 0;
uint8_t *pVideo = NULL, **ppFrame;
do {
// Demux 解析,获得每一帧码流的数据存在pVideo中,nVideoBytes为数据的字节数
demuxer.Demux(&pVideo, &nVideoBytes);
// 实际解码进入函数
dec.Decode(pVideo, nVideoBytes, &ppFrame, &nFrameReturned);
if (!nFrame && nFrameReturned)
LOG(INFO) << dec.GetVideoInfo();
// 硬解码是一个异步过程,nFrameReturned表示解码得到了多少帧
for (int i = 0; i < nFrameReturned; i++) {
if (bOutPlanar) {
// 转换格式
ConvertToPlanar(ppFrame[i], dec.GetWidth(), dec.GetHeight(), dec.GetBitdepth());
}
// 写文件
fpOut.write(reinterpret_cast<char*>(ppFrame[i]), dec.GetFrameSize());
}
nFrame += nFrameReturned;
} while (nVideoBytes);
std::cout << "Total frame decoded: " << nFrame << std::endl
<< "Saved in file " << szOutFilePath << " in "
<< (dec.GetBitDepth() == 8 ? (bOutPlanar ? "iyuv" : "nv12") : (bOutPlanar ? "yuv420p16" : "p016"))
<< " format" << std::endl;
fpOut.close();
}
- 看一下初始化解码器的代码!!!
NvDecoder::NvDecoder(CUcontext cuContext, int nWidth, int nHeight, bool bUseDeviceFrame, cudaVideoCodec eCodec, std::mutex *pMutex,
bool bLowLatency, bool bDeviceFramePitched, const Rect *pCropRect, const Dim *pResizeDim, int maxWidth, int maxHeight) :
m_cuContext(cuContext), m_bUseDeviceFrame(bUseDeviceFrame), m_eCodec(eCodec), m_pMutex(pMutex), m_bDeviceFramePitched(bDeviceFramePitched),
m_nMaxWidth (maxWidth), m_nMaxHeight(maxHeight)
{
if (pCropRect) m_cropRect = *pCropRect;
if (pResizeDim) m_resizeDim = *pResizeDim;
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
// CUVIDparserparams:该接口用来创建VideoParser
// 主要参数是设置三个回调函数 实现对解析出来的数据的处理
CUVIDPARSERPARAMS videoParserparameters = {};
videoParserParameters.CodecType = eCodec;
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
videoParserParameters.ulMaxdisplayDelay = bLowLatency ? 0 : 1;
videoParserParameters.pUserData = this;
// 三个回调函数
videoParserParameters.pfnsequenceCallback = handleVideoSequenceProc; // Callback function to be registered for getting a callback when decoding of sequence starts
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc; // 准备开始解码的时候调用
videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc; // 解码出数据调用
if (m_pMutex) m_pMutex->lock();
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
if (m_pMutex) m_pMutex->unlock();
}
- 看一下实际进行解码的函数
Decode
bool NvDecoder::Decode(const uint8_t *pData, int nSize, uint8_t ***pppFrame, int *pnFrameReturned, uint32_t flags, int64_t **ppTimestamp, int64_t timestamp, CUstream stream)
{
if (!m_hParser)
{
NVDEC_THROW_ERROR("Parser not initialized.", CUDA_ERROR_NOT_INITIALIZED);
return false;
}
m_nDecodedFrame = 0;
// AVPacket转CUVIDSOURCEDATAPACKET,并交给cuvidParseVideoData进行
CUVIDSOURCEDATAPACKET packet = {0};
packet.payload = pData;
packet.payload_size = nSize;
packet.flags = flags | CUVID_PKT_TIMESTAMP;
packet.timestamp = timestamp;
if (!pData || nSize == 0) {
packet.flags |= CUVID_PKT_ENDOFSTREAM;
}
m_cuvidStream = stream;
if (m_pMutex) m_pMutex->lock(); // 解码要加锁
printf("------> cuvidParseVideoData\n");
// cuvidParseVideoData 一直将数据传递给解码
NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
printf("------> cuvidParseVideoData done!\n");
if (m_pMutex) m_pMutex->unlock(); // 解锁
m_cuvidStream = 0;
// 检测是否解码的帧数大于0了。
if (m_nDecodedFrame > 0)
{
printf("m_nDecodedFrame:%d\n", m_nDecodedFrame);
if (pppFrame)
{
m_vpFrameRet.clear(); // 将返回的队列清空
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
// 将m_vpFrame传给m_vpFrameRet;
m_vpFrameRet.insert(m_vpFrameRet.begin(), m_vpFrame.begin(), m_vpFrame.begin() + m_nDecodedFrame);
*pppFrame = &m_vpFrameRet[0];//
}
if (ppTimestamp)
{
*ppTimestamp = &m_vTimestamp[0];
}
}
if (pnFrameReturned)
{
*pnFrameReturned = m_nDecodedFrame;
}
return true;
}
- 因为我准备是为了获得 解码后的显存数据 所以重点看了 第三个回调函数.
HandlePictureDisplay
int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
CUVIDPROCPARAMS videoProcessingParameters = {};
videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
videoProcessingParameters.output_stream = m_cuvidStream;
CUdeviceptr dpSrcFrame = 0;
unsigned int nSrcPitch = 0;
// MapVideoFrame:拿到解码后数据在显存的指针 --> dpSrcFrame
NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index, &dpSrcFrame,
&nSrcPitch, &videoProcessingParameters));
CUVIDGETDECODESTATUS DecodeStatus;
memset(&DecodeStatus, 0, sizeof(DecodeStatus));
CUresult result = cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
if (result == CUDA_SUCCESS && (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error || DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed))
{
printf("Decode error occurred for picture %d\n", m_nPicNumInDecodeorder[pDispInfo->picture_index]);
}
printf("HandlePictureDisplay::m_nDecodedFrame:%d\n", m_nDecodedFrame);
uint8_t *pDecodedFrame = nullptr;
{
// lock_guard 自动解锁 当控件离开lock_guard创建对象的范围时,lock_guard被破坏并释放互斥体。
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
// 解出一帧 m_nDecodedFrame+1,且若不够空间了,则开辟空间
if ((unsigned)++m_nDecodedFrame > m_vpFrame.size())
{
printf("HandlePictureDisplay::m_nDecodedFrame:%d\n", m_nDecodedFrame);
// Not enough frames in stock
m_nFrameAlloc++;
uint8_t *pFrame = NULL;
if (m_bUseDeviceFrame) //m_bUseDeviceFrame 初始化解码器的时候设置的,是否使用显卡内存,是得解码出来的数据不转到cpu内存
{
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
if (m_bDeviceFramePitched)
{
CUDA_DRVAPI_CALL(cuMemallocPitch((CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, m_nWidth * (m_nBitDepthMinus8 ? 2 : 1), m_nHeight * 3 / 2, 16));
}
else
{
unsigned int FrameSize = GetFrameSize(); // h*w*3/2;
int inputWidth = GetWidth();
int inputHeight = GetHeight();
CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
}
else // cpu内存
{
pFrame = new uint8_t[GetFrameSize()]; // 开辟空间
}
m_vpFrame.push_back(pFrame);
}
pDecodedFrame = m_vpFrame[m_nDecodedFrame - 1]; // 取到最后一个
}
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); // 启用context
printf("cuCtxPushCurrent!\n");
CUDA_memcpy2D m = { 0 };
m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
m.srcDevice = dpSrcFrame;
m.srcPitch = nSrcPitch;
m.dstMemoryType = m_bUseDeviceFrame ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : m_nWidth * (m_nBitDepthMinus8 ? 2 : 1);
m.WidthInBytes = m_nWidth * (m_nBitDepthMinus8 ? 2 : 1);
m.Height = m_nHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * m_nSurfaceHeight);
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nHeight);
m.Height = m_nHeight / 2;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
// 解码完成,NV12格式 pDecodedFrame
// NV12TORGBA
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); // 拷贝结束,取消上下文
if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
m_vTimestamp.resize(m_vpFrame.size());
}
m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
return 1;
}
后记
- 这个硬解码还是比较简单.主要是封装得比较好了.看起来比较容易.
- 后续还要再看一下其他几个解码器的使用示例.
相关阅读
C#高级特性:动态绑定 动态绑定 动态绑定将类型绑定(类型解析、成员和操作过程)从编译时推迟到了运行时。在编译时,如果程序员知
搭建minecraft服务器,用于python学习 参考链接: https://www.instructables.com/id/Python-coding-for-Minecraft/ https://blog.c
根据Andrew Ng的Lecture notes, 我重新整理了梯度下降(including LMS/BGDSGD)的相关知识。 首先,引入一个例子, 假设我们现在有一个数
十七个常见介词 to 到 of 关于 in 在里面 for 为了 on 在上面 with 和 at 在 by 经过 from 从 up 向上的 about 关于 into 进入
一、基本概念决策树的定义:首先,决策树是一种有监督的分类算法——即给定X,Y值,构建X,Y的映射关系。不同于线性回归等是多项式,决策树是