MythTV master
mythnvdecinterop.cpp
Go to the documentation of this file.
1// MythTV
2#include "libmythbase/mythconfig.h"
7
8// Std
9#include <chrono>
10#include <thread>
11
12extern "C" {
13#include "libavutil/log.h"
14#define FFNV_LOG_FUNC(logctx, msg, ...) av_log(logctx, AV_LOG_ERROR, msg, __VA_ARGS__)
15#define FFNV_DEBUG_LOG_FUNC(logctx, msg, ...) av_log(logctx, AV_LOG_DEBUG, msg, __VA_ARGS__)
16#include <ffnvcodec/dynlink_loader.h>
17}
18
19#define LOC QString("NVDECInterop: ")
20
21// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
22#define CUDA_CHECK(CUDA_FUNCS, CUDA_CALL) \
23{ \
24 CUresult res = (CUDA_FUNCS)->CUDA_CALL; \
25 if (res != CUDA_SUCCESS) { \
26 const char * desc; \
27 (CUDA_FUNCS)->cuGetErrorString(res, &desc); \
28 LOG(VB_GENERAL, LOG_ERR, LOC + QString("CUDA error %1 (%2)").arg(res).arg(desc)); \
29 } \
30}
31
33 : MythOpenGLInterop(Context, GL_NVDEC, Player)
34{
36}
37
39{
40 m_referenceFrames.clear();
43}
44
46{
47 if (!(m_cudaContext && m_cudaFuncs))
48 return;
49
51 CUDA_CHECK(m_cudaFuncs, cuCtxPushCurrent(m_cudaContext))
52
53 if (!m_openglTextures.isEmpty())
54 {
55 LOG(VB_PLAYBACK, LOG_INFO, LOC + "Deleting CUDA resources");
56 for (auto it = m_openglTextures.constBegin(); it != m_openglTextures.constEnd(); ++it)
57 {
58 std::vector<MythVideoTextureOpenGL*> textures = it.value();
59 for (auto & texture : textures)
60 {
61 auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(texture->m_data);
62 if (data && data->second)
63 CUDA_CHECK(m_cudaFuncs, cuGraphicsUnregisterResource(data->second))
64 delete data;
65 texture->m_data = nullptr;
66 }
67 }
68 }
69
70 CUcontext dummy = nullptr;
71 CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy))
72
74}
75
77{
78 return m_cudaFuncs && m_cudaContext;
79}
80
82{
83 return m_cudaContext;
84}
85
87{
88 if (!(Context && Player))
89 return nullptr;
90
92 GetNVDECTypes(Context, types);
93 if (auto nvdec = types.find(FMT_NVDEC); nvdec != types.end())
94 {
95 auto matchType = [](auto type){ return (type == GL_NVDEC); };
96 if (std::any_of(nvdec->second.cbegin(), nvdec->second.cend(), matchType))
97 return new MythNVDECInterop(Player, Context);
98 }
99 return nullptr;
100}
101
103{
104 if (Render)
105 Types[FMT_NVDEC] = { GL_NVDEC };
106}
107
115std::vector<MythVideoTextureOpenGL*>
117 MythVideoColourSpace* ColourSpace,
119 FrameScanType Scan)
120{
121 std::vector<MythVideoTextureOpenGL*> result;
122 if (!Frame || !m_cudaContext || !m_cudaFuncs)
123 return result;
124
125 if (Context && (Context != m_openglContext))
126 LOG(VB_GENERAL, LOG_WARNING, LOC + "Mismatched OpenGL contexts");
127
128 // Check size
129 QSize surfacesize(Frame->m_width, Frame->m_height);
130 if (m_textureSize != surfacesize)
131 {
132 if (!m_textureSize.isEmpty())
133 {
134 LOG(VB_GENERAL, LOG_WARNING, LOC + QString("Video texture size changed! %1x%2->%3x%4")
135 .arg(m_textureSize.width()).arg(m_textureSize.height())
136 .arg(Frame->m_width).arg(Frame->m_height));
137 }
139 m_textureSize = surfacesize;
140 }
141
142 // Lock
144
145 // Update colourspace and initialise on first frame
146 if (ColourSpace)
147 {
148 if (m_openglTextures.isEmpty())
150 ColourSpace->UpdateColourSpace(Frame);
151 }
152
153 // Retrieve hardware frames context and AVCUDADeviceContext
154 if ((Frame->m_pixFmt != AV_PIX_FMT_CUDA) || (Frame->m_type != FMT_NVDEC) ||
155 !Frame->m_buffer || !Frame->m_priv[0] || !Frame->m_priv[1])
156 {
157 return result;
158 }
159
160 auto cudabuffer = reinterpret_cast<CUdeviceptr>(Frame->m_buffer);
161 if (!cudabuffer)
162 return result;
163
164 // make the CUDA context current
165 CUcontext dummy = nullptr;
166 CUDA_CHECK(m_cudaFuncs, cuCtxPushCurrent(m_cudaContext))
167
168 // create and map textures for a new buffer
169 VideoFrameType type = (Frame->m_swPixFmt == AV_PIX_FMT_NONE) ? FMT_NV12 :
170 MythAVUtil::PixelFormatToFrameType(static_cast<AVPixelFormat>(Frame->m_swPixFmt));
171 bool p010 = MythVideoFrame::ColorDepth(type) > 8;
172 if (!m_openglTextures.contains(cudabuffer))
173 {
174 std::vector<QSize> sizes;
175 sizes.emplace_back(Frame->m_width, Frame->m_height);
176 sizes.emplace_back(Frame->m_width, Frame->m_height >> 1);
177 std::vector<MythVideoTextureOpenGL*> textures =
179 if (textures.empty())
180 {
181 CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy))
182 return result;
183 }
184
185 bool success = true;
186 for (uint plane = 0; plane < textures.size(); ++plane)
187 {
188 // N.B. I think the texture formats for P010 are not strictly compliant
189 // with OpenGL ES 3.X but the Nvidia driver does not complain.
190 MythVideoTextureOpenGL *tex = textures[plane];
191 tex->m_allowGLSLDeint = true;
192 m_openglContext->glBindTexture(tex->m_target, tex->m_textureId);
193 QOpenGLTexture::PixelFormat format = QOpenGLTexture::Red;
194 QOpenGLTexture::PixelType pixtype = p010 ? QOpenGLTexture::UInt16 : QOpenGLTexture::UInt8;
195 QOpenGLTexture::TextureFormat internal = p010 ? QOpenGLTexture::R16_UNorm : QOpenGLTexture::R8_UNorm;
196 int width = tex->m_size.width();
197
198 if (plane)
199 {
200 internal = p010 ? QOpenGLTexture::RG16_UNorm : QOpenGLTexture::RG8_UNorm;
201 format = QOpenGLTexture::RG;
202 width /= 2;
203 }
204
205 m_openglContext->glTexImage2D(tex->m_target, 0, internal, width, tex->m_size.height(),
206 0, format, pixtype, nullptr);
207
208 CUarray array = nullptr;
209 CUgraphicsResource graphicsResource = nullptr;
210 CUDA_CHECK(m_cudaFuncs, cuGraphicsGLRegisterImage(&graphicsResource, tex->m_textureId,
211 QOpenGLTexture::Target2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD))
212 if (graphicsResource)
213 {
214 CUDA_CHECK(m_cudaFuncs, cuGraphicsMapResources(1, &graphicsResource, nullptr))
215 CUDA_CHECK(m_cudaFuncs, cuGraphicsSubResourceGetMappedArray(&array, graphicsResource, 0, 0))
216 CUDA_CHECK(m_cudaFuncs, cuGraphicsUnmapResources(1, &graphicsResource, nullptr))
217 tex->m_data = reinterpret_cast<unsigned char*>(new QPair<CUarray,CUgraphicsResource>(array, graphicsResource));
218 }
219 else
220 {
221 success = false;
222 break;
223 }
224 }
225
226 if (success)
227 {
228 m_openglTextures.insert(cudabuffer, textures);
229 }
230 else
231 {
232 for (auto & texture : textures)
233 {
234 auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(texture->m_data);
235 if (data && data->second)
236 CUDA_CHECK(m_cudaFuncs, cuGraphicsUnregisterResource(data->second))
237 delete data;
238 texture->m_data = nullptr;
239 if (texture->m_textureId)
240 m_openglContext->glDeleteTextures(1, &texture->m_textureId);
242 }
243 }
244 }
245
246 if (!m_openglTextures.contains(cudabuffer))
247 {
248 CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy))
249 return result;
250 }
251
252 // Copy device data to array data (i.e. texture) - surely this can be avoided?
253 // In theory, asynchronous copies should not be required but we use async
254 // followed by stream synchronisation to ensure CUDA and OpenGL are in sync
255 // which avoids presenting old/stale frames when the GPU is under load.
256 result = m_openglTextures[cudabuffer];
257 for (uint i = 0; i < result.size(); ++i)
258 {
259 auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(result[i]->m_data);
260 CUDA_MEMCPY2D cpy;
261 memset(&cpy, 0, sizeof(cpy));
262 cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
263 cpy.srcDevice = cudabuffer + static_cast<CUdeviceptr>(Frame->m_offsets[i]);
264 cpy.srcPitch = static_cast<size_t>(Frame->m_pitches[i]);
265 cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
266 cpy.dstArray = data->first;
267 cpy.WidthInBytes = static_cast<size_t>(result[i]->m_size.width()) * (p010 ? 2 : 1);
268 cpy.Height = static_cast<size_t>(result[i]->m_size.height());
269 CUDA_CHECK(m_cudaFuncs, cuMemcpy2DAsync(&cpy, nullptr))
270 }
271
272 CUDA_CHECK(m_cudaFuncs, cuStreamSynchronize(nullptr))
273 CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy))
274
275 // GLSL deinterlacing. The decoder will pick up any CPU or driver preference
276 // and return a stream of deinterlaced frames. Just check for GLSL here.
277 bool needreferences = false;
278 if (is_interlaced(Scan) && !Frame->m_alreadyDeinterlaced)
279 {
280 MythDeintType shader = Frame->GetDoubleRateOption(DEINT_SHADER);
281 if (shader)
282 needreferences = shader == DEINT_HIGH;
283 else
284 needreferences = Frame->GetSingleRateOption(DEINT_SHADER) == DEINT_HIGH;
285 }
286
287 if (needreferences)
288 {
289 if (qAbs(Frame->m_frameCounter - m_discontinuityCounter) > 1)
290 m_referenceFrames.clear();
291
292 RotateReferenceFrames(cudabuffer);
293 int size = m_referenceFrames.size();
294
295 CUdeviceptr next = m_referenceFrames[0];
296 CUdeviceptr current = m_referenceFrames[size > 1 ? 1 : 0];
297 CUdeviceptr last = m_referenceFrames[size > 2 ? 2 : 0];
298
299 if (!m_openglTextures.contains(next) || !m_openglTextures.contains(current) ||
300 !m_openglTextures.contains(last))
301 {
302 LOG(VB_GENERAL, LOG_ERR, LOC + "Reference frame error");
303 return result;
304 }
305
306 result = m_openglTextures[last];
307 std::copy(m_openglTextures[current].cbegin(), m_openglTextures[current].cend(), std::back_inserter(result));
308 std::copy(m_openglTextures[next].cbegin(), m_openglTextures[next].cend(), std::back_inserter(result));
309 return result;
310 }
311 m_referenceFrames.clear();
312 m_discontinuityCounter = Frame->m_frameCounter;
313
314 return result;
315}
316
322{
324}
325
326bool MythNVDECInterop::CreateCUDAPriv(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs,
327 CUcontext& CudaContext, bool& Retry)
328{
329 Retry = false;
330 if (!GLContext)
331 return false;
332
333 // Make OpenGL context current
334 OpenGLLocker locker(GLContext);
335
336 // retrieve CUDA entry points
337 if (cuda_load_functions(&CudaFuncs, nullptr) != 0)
338 {
339 LOG(VB_PLAYBACK, LOG_ERR, LOC + "Failed to load functions");
340 return false;
341 }
342
343 // create a CUDA context for the current device
344 CUdevice cudevice = 0;
345 CUcontext dummy = nullptr;
346 CUresult res = CudaFuncs->cuInit(0);
347 if (res != CUDA_SUCCESS)
348 {
349 LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to initialise CUDA API");
350 return false;
351 }
352
353 unsigned int devicecount = 0;
354 res = CudaFuncs->cuGLGetDevices(&devicecount, &cudevice, 1, CU_GL_DEVICE_LIST_ALL);
355 if (res != CUDA_SUCCESS)
356 {
357 LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to get CUDA device");
358 return false;
359 }
360
361 if (devicecount < 1)
362 {
363 LOG(VB_GENERAL, LOG_ERR, LOC + "No CUDA devices");
364 return false;
365 }
366
367 res = CudaFuncs->cuCtxCreate(&CudaContext, CU_CTX_SCHED_BLOCKING_SYNC, cudevice);
368 if (res != CUDA_SUCCESS)
369 {
370 LOG(VB_GENERAL, LOG_ERR, LOC + QString("Failed to create CUDA context (Err: %1)")
371 .arg(res));
372 Retry = true;
373 return false;
374 }
375
376 CudaFuncs->cuCtxPopCurrent(&dummy);
377 LOG(VB_PLAYBACK, LOG_INFO, LOC + "Created CUDA context");
378 return true;
379}
380
381bool MythNVDECInterop::CreateCUDAContext(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs,
382 CUcontext& CudaContext)
383{
384 if (!gCoreContext->IsUIThread())
385 {
386 LOG(VB_GENERAL, LOG_ERR, LOC + "Must create CUDA context from main thread");
387 return false;
388 }
389
390 int retries = 0;
391 bool retry = false;
392 while (retries++ < 5)
393 {
394 if (CreateCUDAPriv(GLContext, CudaFuncs, CudaContext, retry))
395 return true;
396 CleanupContext(GLContext, CudaFuncs, CudaContext);
397 if (!retry)
398 break;
399 LOG(VB_GENERAL, LOG_WARNING, LOC + "Will retry in 50ms");
400 std::this_thread::sleep_for(50ms);
401 }
402 return false;
403}
404
405void MythNVDECInterop::CleanupContext(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs,
406 CUcontext& CudaContext)
407{
408 if (!GLContext)
409 return;
410
411 OpenGLLocker locker(GLContext);
412 if (CudaFuncs)
413 {
414 if (CudaContext)
415 CUDA_CHECK(CudaFuncs, cuCtxDestroy(CudaContext))
416 cuda_free_functions(&CudaFuncs);
417 }
418}
419
421{
422 if (!Buffer)
423 return;
424
425 // don't retain twice for double rate
426 if (!m_referenceFrames.empty() && (m_referenceFrames[0] == Buffer))
427 return;
428
429 m_referenceFrames.push_front(Buffer);
430
431 // release old frames
432 while (m_referenceFrames.size() > 3)
433 m_referenceFrames.pop_back();
434}
static VideoFrameType PixelFormatToFrameType(AVPixelFormat Fmt)
Definition: mythavutil.cpp:72
unsigned char * m_data
uint64_t m_discontinuityCounter
std::map< VideoFrameType, InteropTypes > InteropMap
std::vector< MythVideoTextureOpenGL * > Acquire(MythRenderOpenGL *Context, MythVideoColourSpace *ColourSpace, MythVideoFrame *Frame, FrameScanType Scan) override
Map CUDA video memory to OpenGL textures.
CUcontext GetCUDAContext()
static bool CreateCUDAPriv(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext, bool &Retry)
bool InitialiseCuda()
Initialise a CUDA context.
CudaFunctions * m_cudaFuncs
MythNVDECInterop(MythPlayerUI *Player, MythRenderOpenGL *Context)
static bool CreateCUDAContext(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext)
~MythNVDECInterop() override
void RotateReferenceFrames(CUdeviceptr Buffer)
static MythNVDECInterop * CreateNVDEC(MythPlayerUI *Player, MythRenderOpenGL *Context)
static void GetNVDECTypes(MythRenderOpenGL *Render, MythInteropGPU::InteropMap &Types)
void DeleteTextures() override
QVector< CUdeviceptr > m_referenceFrames
CUcontext m_cudaContext
static void CleanupContext(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext)
virtual void DeleteTextures()
MythRenderOpenGL * m_openglContext
QHash< unsigned long long, std::vector< MythVideoTextureOpenGL * > > m_openglTextures
MythVideoColourSpace contains a QMatrix4x4 that can convert YCbCr data to RGB.
void SetSupportedAttributes(PictureAttributeSupported Supported)
Enable the given set of picture attributes.
bool UpdateColourSpace(const MythVideoFrame *Frame)
Set the current colourspace to use.
static int ColorDepth(int Format)
Definition: mythframe.h:398
static std::vector< MythVideoTextureOpenGL * > CreateTextures(MythRenderOpenGL *Context, VideoFrameType Type, VideoFrameType Format, std::vector< QSize > Sizes, GLenum Target=QOpenGLTexture::Target2D)
Create a set of textures suitable for the given Type and Format.
static void DeleteTexture(MythRenderOpenGL *Context, MythVideoTextureOpenGL *Texture)
unsigned int uint
Definition: freesurround.h:24
static const struct wl_interface * types[]
MythCoreContext * gCoreContext
This global variable contains the MythCoreContext instance for the app.
MythDeintType
Definition: mythframe.h:67
@ DEINT_HIGH
Definition: mythframe.h:71
@ DEINT_SHADER
Definition: mythframe.h:73
VideoFrameType
Definition: mythframe.h:20
@ FMT_NVDEC
Definition: mythframe.h:62
@ FMT_NV12
Definition: mythframe.h:52
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:39
#define LOC
#define CUDA_CHECK(CUDA_FUNCS, CUDA_CALL)
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:15
MBASE_PUBLIC long long copy(QFile &dst, QFile &src, uint block_size=0)
Copies src file to dst file.
FrameScanType
Definition: videoouttypes.h:95
bool is_interlaced(FrameScanType Scan)
#define ALL_PICTURE_ATTRIBUTES