MythTV master
mythnvdecinterop.cpp
Go to the documentation of this file.
1// MythTV
6
7// Std
8#include <chrono>
9#include <thread>
10
11extern "C" {
12#include "libavutil/log.h"
13#define FFNV_LOG_FUNC(logctx, msg, ...) av_log(logctx, AV_LOG_ERROR, msg, __VA_ARGS__)
14#define FFNV_DEBUG_LOG_FUNC(logctx, msg, ...) av_log(logctx, AV_LOG_DEBUG, msg, __VA_ARGS__)
15#include <ffnvcodec/dynlink_loader.h>
16}
17
18#define LOC QString("NVDECInterop: ")
19
20// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
21#define CUDA_CHECK(CUDA_FUNCS, CUDA_CALL) \
22{ \
23 CUresult res = (CUDA_FUNCS)->CUDA_CALL; \
24 if (res != CUDA_SUCCESS) { \
25 const char * desc; \
26 (CUDA_FUNCS)->cuGetErrorString(res, &desc); \
27 LOG(VB_GENERAL, LOG_ERR, LOC + QString("CUDA error %1 (%2)").arg(res).arg(desc)); \
28 } \
29}
30
32 : MythOpenGLInterop(Context, GL_NVDEC, Player)
33{
35}
36
38{
39 m_referenceFrames.clear();
42}
43
45{
46 if (!(m_cudaContext && m_cudaFuncs))
47 return;
48
50 CUDA_CHECK(m_cudaFuncs, cuCtxPushCurrent(m_cudaContext))
51
52 if (!m_openglTextures.isEmpty())
53 {
54 LOG(VB_PLAYBACK, LOG_INFO, LOC + "Deleting CUDA resources");
55 for (auto it = m_openglTextures.constBegin(); it != m_openglTextures.constEnd(); ++it)
56 {
57 std::vector<MythVideoTextureOpenGL*> textures = it.value();
58 for (auto & texture : textures)
59 {
60 auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(texture->m_data);
61 if (data && data->second)
62 CUDA_CHECK(m_cudaFuncs, cuGraphicsUnregisterResource(data->second))
63 delete data;
64 texture->m_data = nullptr;
65 }
66 }
67 }
68
69 CUcontext dummy = nullptr;
70 CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy))
71
73}
74
76{
77 return m_cudaFuncs && m_cudaContext;
78}
79
81{
82 return m_cudaContext;
83}
84
86{
87 if (!(Context && Player))
88 return nullptr;
89
91 GetNVDECTypes(Context, types);
92 if (auto nvdec = types.find(FMT_NVDEC); nvdec != types.end())
93 {
94 auto matchType = [](auto type){ return (type == GL_NVDEC); };
95 if (std::any_of(nvdec->second.cbegin(), nvdec->second.cend(), matchType))
96 return new MythNVDECInterop(Player, Context);
97 }
98 return nullptr;
99}
100
102{
103 if (Render)
104 Types[FMT_NVDEC] = { GL_NVDEC };
105}
106
114std::vector<MythVideoTextureOpenGL*>
116 MythVideoColourSpace* ColourSpace,
118 FrameScanType Scan)
119{
120 std::vector<MythVideoTextureOpenGL*> result;
121 if (!Frame || !m_cudaContext || !m_cudaFuncs)
122 return result;
123
124 if (Context && (Context != m_openglContext))
125 LOG(VB_GENERAL, LOG_WARNING, LOC + "Mismatched OpenGL contexts");
126
127 // Check size
128 QSize surfacesize(Frame->m_width, Frame->m_height);
129 if (m_textureSize != surfacesize)
130 {
131 if (!m_textureSize.isEmpty())
132 {
133 LOG(VB_GENERAL, LOG_WARNING, LOC + QString("Video texture size changed! %1x%2->%3x%4")
134 .arg(m_textureSize.width()).arg(m_textureSize.height())
135 .arg(Frame->m_width).arg(Frame->m_height));
136 }
138 m_textureSize = surfacesize;
139 }
140
141 // Lock
143
144 // Update colourspace and initialise on first frame
145 if (ColourSpace)
146 {
147 if (m_openglTextures.isEmpty())
149 ColourSpace->UpdateColourSpace(Frame);
150 }
151
152 // Retrieve hardware frames context and AVCUDADeviceContext
153 if ((Frame->m_pixFmt != AV_PIX_FMT_CUDA) || (Frame->m_type != FMT_NVDEC) ||
154 !Frame->m_buffer || !Frame->m_priv[0] || !Frame->m_priv[1])
155 {
156 return result;
157 }
158
159 auto cudabuffer = reinterpret_cast<CUdeviceptr>(Frame->m_buffer);
160 if (!cudabuffer)
161 return result;
162
163 // make the CUDA context current
164 CUcontext dummy = nullptr;
165 CUDA_CHECK(m_cudaFuncs, cuCtxPushCurrent(m_cudaContext))
166
167 // create and map textures for a new buffer
168 VideoFrameType type = (Frame->m_swPixFmt == AV_PIX_FMT_NONE) ? FMT_NV12 :
169 MythAVUtil::PixelFormatToFrameType(static_cast<AVPixelFormat>(Frame->m_swPixFmt));
170 bool p010 = MythVideoFrame::ColorDepth(type) > 8;
171 if (!m_openglTextures.contains(cudabuffer))
172 {
173 std::vector<QSize> sizes;
174 sizes.emplace_back(Frame->m_width, Frame->m_height);
175 sizes.emplace_back(Frame->m_width, Frame->m_height >> 1);
176 std::vector<MythVideoTextureOpenGL*> textures =
178 if (textures.empty())
179 {
180 CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy))
181 return result;
182 }
183
184 bool success = true;
185 for (uint plane = 0; plane < textures.size(); ++plane)
186 {
187 // N.B. I think the texture formats for P010 are not strictly compliant
188 // with OpenGL ES 3.X but the Nvidia driver does not complain.
189 MythVideoTextureOpenGL *tex = textures[plane];
190 tex->m_allowGLSLDeint = true;
191 m_openglContext->glBindTexture(tex->m_target, tex->m_textureId);
192 QOpenGLTexture::PixelFormat format = QOpenGLTexture::Red;
193 QOpenGLTexture::PixelType pixtype = p010 ? QOpenGLTexture::UInt16 : QOpenGLTexture::UInt8;
194 QOpenGLTexture::TextureFormat internal = p010 ? QOpenGLTexture::R16_UNorm : QOpenGLTexture::R8_UNorm;
195 int width = tex->m_size.width();
196
197 if (plane)
198 {
199 internal = p010 ? QOpenGLTexture::RG16_UNorm : QOpenGLTexture::RG8_UNorm;
200 format = QOpenGLTexture::RG;
201 width /= 2;
202 }
203
204 m_openglContext->glTexImage2D(tex->m_target, 0, internal, width, tex->m_size.height(),
205 0, format, pixtype, nullptr);
206
207 CUarray array = nullptr;
208 CUgraphicsResource graphicsResource = nullptr;
209 CUDA_CHECK(m_cudaFuncs, cuGraphicsGLRegisterImage(&graphicsResource, tex->m_textureId,
210 QOpenGLTexture::Target2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD))
211 if (graphicsResource)
212 {
213 CUDA_CHECK(m_cudaFuncs, cuGraphicsMapResources(1, &graphicsResource, nullptr))
214 CUDA_CHECK(m_cudaFuncs, cuGraphicsSubResourceGetMappedArray(&array, graphicsResource, 0, 0))
215 CUDA_CHECK(m_cudaFuncs, cuGraphicsUnmapResources(1, &graphicsResource, nullptr))
216 tex->m_data = reinterpret_cast<unsigned char*>(new QPair<CUarray,CUgraphicsResource>(array, graphicsResource));
217 }
218 else
219 {
220 success = false;
221 break;
222 }
223 }
224
225 if (success)
226 {
227 m_openglTextures.insert(cudabuffer, textures);
228 }
229 else
230 {
231 for (auto & texture : textures)
232 {
233 auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(texture->m_data);
234 if (data && data->second)
235 CUDA_CHECK(m_cudaFuncs, cuGraphicsUnregisterResource(data->second))
236 delete data;
237 texture->m_data = nullptr;
238 if (texture->m_textureId)
239 m_openglContext->glDeleteTextures(1, &texture->m_textureId);
241 }
242 }
243 }
244
245 if (!m_openglTextures.contains(cudabuffer))
246 {
247 CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy))
248 return result;
249 }
250
251 // Copy device data to array data (i.e. texture) - surely this can be avoided?
252 // In theory, asynchronous copies should not be required but we use async
253 // followed by stream synchronisation to ensure CUDA and OpenGL are in sync
254 // which avoids presenting old/stale frames when the GPU is under load.
255 result = m_openglTextures[cudabuffer];
256 for (uint i = 0; i < result.size(); ++i)
257 {
258 auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(result[i]->m_data);
259 CUDA_MEMCPY2D cpy;
260 memset(&cpy, 0, sizeof(cpy));
261 cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
262 cpy.srcDevice = cudabuffer + static_cast<CUdeviceptr>(Frame->m_offsets[i]);
263 cpy.srcPitch = static_cast<size_t>(Frame->m_pitches[i]);
264 cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
265 cpy.dstArray = data->first;
266 cpy.WidthInBytes = static_cast<size_t>(result[i]->m_size.width()) * (p010 ? 2 : 1);
267 cpy.Height = static_cast<size_t>(result[i]->m_size.height());
268 CUDA_CHECK(m_cudaFuncs, cuMemcpy2DAsync(&cpy, nullptr))
269 }
270
271 CUDA_CHECK(m_cudaFuncs, cuStreamSynchronize(nullptr))
272 CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy))
273
274 // GLSL deinterlacing. The decoder will pick up any CPU or driver preference
275 // and return a stream of deinterlaced frames. Just check for GLSL here.
276 bool needreferences = false;
277 if (is_interlaced(Scan) && !Frame->m_alreadyDeinterlaced)
278 {
279 MythDeintType shader = Frame->GetDoubleRateOption(DEINT_SHADER);
280 if (shader)
281 needreferences = shader == DEINT_HIGH;
282 else
283 needreferences = Frame->GetSingleRateOption(DEINT_SHADER) == DEINT_HIGH;
284 }
285
286 if (needreferences)
287 {
288 if (qAbs(Frame->m_frameCounter - m_discontinuityCounter) > 1)
289 m_referenceFrames.clear();
290
291 RotateReferenceFrames(cudabuffer);
292 int size = m_referenceFrames.size();
293
294 CUdeviceptr next = m_referenceFrames[0];
295 CUdeviceptr current = m_referenceFrames[size > 1 ? 1 : 0];
296 CUdeviceptr last = m_referenceFrames[size > 2 ? 2 : 0];
297
298 if (!m_openglTextures.contains(next) || !m_openglTextures.contains(current) ||
299 !m_openglTextures.contains(last))
300 {
301 LOG(VB_GENERAL, LOG_ERR, LOC + "Reference frame error");
302 return result;
303 }
304
305 result = m_openglTextures[last];
306 std::copy(m_openglTextures[current].cbegin(), m_openglTextures[current].cend(), std::back_inserter(result));
307 std::copy(m_openglTextures[next].cbegin(), m_openglTextures[next].cend(), std::back_inserter(result));
308 return result;
309 }
310 m_referenceFrames.clear();
311 m_discontinuityCounter = Frame->m_frameCounter;
312
313 return result;
314}
315
321{
323}
324
325bool MythNVDECInterop::CreateCUDAPriv(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs,
326 CUcontext& CudaContext, bool& Retry)
327{
328 Retry = false;
329 if (!GLContext)
330 return false;
331
332 // Make OpenGL context current
333 OpenGLLocker locker(GLContext);
334
335 // retrieve CUDA entry points
336 if (cuda_load_functions(&CudaFuncs, nullptr) != 0)
337 {
338 LOG(VB_PLAYBACK, LOG_ERR, LOC + "Failed to load functions");
339 return false;
340 }
341
342 // create a CUDA context for the current device
343 CUdevice cudevice = 0;
344 CUcontext dummy = nullptr;
345 CUresult res = CudaFuncs->cuInit(0);
346 if (res != CUDA_SUCCESS)
347 {
348 LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to initialise CUDA API");
349 return false;
350 }
351
352 unsigned int devicecount = 0;
353 res = CudaFuncs->cuGLGetDevices(&devicecount, &cudevice, 1, CU_GL_DEVICE_LIST_ALL);
354 if (res != CUDA_SUCCESS)
355 {
356 LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to get CUDA device");
357 return false;
358 }
359
360 if (devicecount < 1)
361 {
362 LOG(VB_GENERAL, LOG_ERR, LOC + "No CUDA devices");
363 return false;
364 }
365
366 res = CudaFuncs->cuCtxCreate(&CudaContext, CU_CTX_SCHED_BLOCKING_SYNC, cudevice);
367 if (res != CUDA_SUCCESS)
368 {
369 LOG(VB_GENERAL, LOG_ERR, LOC + QString("Failed to create CUDA context (Err: %1)")
370 .arg(res));
371 Retry = true;
372 return false;
373 }
374
375 CudaFuncs->cuCtxPopCurrent(&dummy);
376 LOG(VB_PLAYBACK, LOG_INFO, LOC + "Created CUDA context");
377 return true;
378}
379
380bool MythNVDECInterop::CreateCUDAContext(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs,
381 CUcontext& CudaContext)
382{
383 if (!gCoreContext->IsUIThread())
384 {
385 LOG(VB_GENERAL, LOG_ERR, LOC + "Must create CUDA context from main thread");
386 return false;
387 }
388
389 int retries = 0;
390 bool retry = false;
391 while (retries++ < 5)
392 {
393 if (CreateCUDAPriv(GLContext, CudaFuncs, CudaContext, retry))
394 return true;
395 CleanupContext(GLContext, CudaFuncs, CudaContext);
396 if (!retry)
397 break;
398 LOG(VB_GENERAL, LOG_WARNING, LOC + "Will retry in 50ms");
399 std::this_thread::sleep_for(50ms);
400 }
401 return false;
402}
403
404void MythNVDECInterop::CleanupContext(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs,
405 CUcontext& CudaContext)
406{
407 if (!GLContext)
408 return;
409
410 OpenGLLocker locker(GLContext);
411 if (CudaFuncs)
412 {
413 if (CudaContext)
414 CUDA_CHECK(CudaFuncs, cuCtxDestroy(CudaContext))
415 cuda_free_functions(&CudaFuncs);
416 }
417}
418
420{
421 if (!Buffer)
422 return;
423
424 // don't retain twice for double rate
425 if (!m_referenceFrames.empty() && (m_referenceFrames[0] == Buffer))
426 return;
427
428 m_referenceFrames.push_front(Buffer);
429
430 // release old frames
431 while (m_referenceFrames.size() > 3)
432 m_referenceFrames.pop_back();
433}
static VideoFrameType PixelFormatToFrameType(AVPixelFormat Fmt)
Definition: mythavutil.cpp:72
unsigned char * m_data
uint64_t m_discontinuityCounter
std::map< VideoFrameType, InteropTypes > InteropMap
std::vector< MythVideoTextureOpenGL * > Acquire(MythRenderOpenGL *Context, MythVideoColourSpace *ColourSpace, MythVideoFrame *Frame, FrameScanType Scan) override
Map CUDA video memory to OpenGL textures.
CUcontext GetCUDAContext()
static bool CreateCUDAPriv(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext, bool &Retry)
bool InitialiseCuda()
Initialise a CUDA context.
CudaFunctions * m_cudaFuncs
MythNVDECInterop(MythPlayerUI *Player, MythRenderOpenGL *Context)
static bool CreateCUDAContext(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext)
~MythNVDECInterop() override
void RotateReferenceFrames(CUdeviceptr Buffer)
static MythNVDECInterop * CreateNVDEC(MythPlayerUI *Player, MythRenderOpenGL *Context)
static void GetNVDECTypes(MythRenderOpenGL *Render, MythInteropGPU::InteropMap &Types)
void DeleteTextures() override
QVector< CUdeviceptr > m_referenceFrames
CUcontext m_cudaContext
static void CleanupContext(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext)
virtual void DeleteTextures()
MythRenderOpenGL * m_openglContext
QHash< unsigned long long, std::vector< MythVideoTextureOpenGL * > > m_openglTextures
MythVideoColourSpace contains a QMatrix4x4 that can convert YCbCr data to RGB.
void SetSupportedAttributes(PictureAttributeSupported Supported)
Enable the given set of picture attributes.
bool UpdateColourSpace(const MythVideoFrame *Frame)
Set the current colourspace to use.
static int ColorDepth(int Format)
Definition: mythframe.h:398
static std::vector< MythVideoTextureOpenGL * > CreateTextures(MythRenderOpenGL *Context, VideoFrameType Type, VideoFrameType Format, std::vector< QSize > Sizes, GLenum Target=QOpenGLTexture::Target2D)
Create a set of textures suitable for the given Type and Format.
static void DeleteTexture(MythRenderOpenGL *Context, MythVideoTextureOpenGL *Texture)
unsigned int uint
Definition: compat.h:60
static const struct wl_interface * types[]
MythCoreContext * gCoreContext
This global variable contains the MythCoreContext instance for the app.
MythDeintType
Definition: mythframe.h:67
@ DEINT_HIGH
Definition: mythframe.h:71
@ DEINT_SHADER
Definition: mythframe.h:73
VideoFrameType
Definition: mythframe.h:20
@ FMT_NVDEC
Definition: mythframe.h:62
@ FMT_NV12
Definition: mythframe.h:52
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:39
#define LOC
#define CUDA_CHECK(CUDA_FUNCS, CUDA_CALL)
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:15
MBASE_PUBLIC long long copy(QFile &dst, QFile &src, uint block_size=0)
Copies src file to dst file.
FrameScanType
Definition: videoouttypes.h:95
bool is_interlaced(FrameScanType Scan)
#define ALL_PICTURE_ATTRIBUTES