MythTV  master
mythnvdecinterop.cpp
Go to the documentation of this file.
1 // MythTV
2 #include "mythconfig.h"
3 #include "mythcorecontext.h"
4 #include "videocolourspace.h"
5 #include "mythnvdecinterop.h"
6 
7 // Std
8 #include <chrono>
9 #include <thread>
10 
11 #define LOC QString("NVDECInterop: ")
12 
13 #define CUDA_CHECK(CUDA_FUNCS, CUDA_CALL) \
14 { \
15  CUresult res = (CUDA_FUNCS)->CUDA_CALL; \
16  if (res != CUDA_SUCCESS) { \
17  const char * desc; \
18  (CUDA_FUNCS)->cuGetErrorString(res, &desc); \
19  LOG(VB_GENERAL, LOG_ERR, LOC + QString("CUDA error %1 (%2)").arg(res).arg(desc)); \
20  } \
21 }
22 
24  : MythOpenGLInterop(Context, NVDEC),
25  m_cudaContext()
26 {
28 }
29 
31 {
32  m_referenceFrames.clear();
35 }
36 
38 {
39  if (!(m_cudaContext && m_cudaFuncs))
40  return;
41 
42  OpenGLLocker locker(m_context);
43  CUDA_CHECK(m_cudaFuncs, cuCtxPushCurrent(m_cudaContext));
44 
45  if (!m_openglTextures.isEmpty())
46  {
47  LOG(VB_PLAYBACK, LOG_INFO, LOC + "Deleting CUDA resources");
48  for (auto it = m_openglTextures.constBegin(); it != m_openglTextures.constEnd(); ++it)
49  {
50  vector<MythVideoTexture*> textures = it.value();
51  for (auto & texture : textures)
52  {
53  auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(texture->m_data);
54  if (data && data->second)
55  CUDA_CHECK(m_cudaFuncs, cuGraphicsUnregisterResource(data->second));
56  delete data;
57  texture->m_data = nullptr;
58  }
59  }
60  }
61 
62  CUcontext dummy = nullptr;
63  CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy));
64 
66 }
67 
69 {
70  return m_cudaFuncs && m_cudaContext;
71 }
72 
74 {
75  return m_cudaContext;
76 }
77 
79 {
80  if (Context)
81  return new MythNVDECInterop(Context);
82  return nullptr;
83 }
84 
86 {
87  if ((FMT_NVDEC != Format) || !gCoreContext->IsUIThread())
88  return Unsupported;
89 
91  return Unsupported;
92  return NVDEC;
93 }
94 
103  VideoColourSpace *ColourSpace,
104  VideoFrame *Frame,
105  FrameScanType Scan)
106 {
107  vector<MythVideoTexture*> result;
108  if (!Frame || !m_cudaContext || !m_cudaFuncs)
109  return result;
110 
111  if (Context && (Context != m_context))
112  LOG(VB_GENERAL, LOG_WARNING, LOC + "Mismatched OpenGL contexts");
113 
114  // Check size
115  QSize surfacesize(Frame->width, Frame->height);
116  if (m_openglTextureSize != surfacesize)
117  {
118  if (!m_openglTextureSize.isEmpty())
119  {
120  LOG(VB_GENERAL, LOG_WARNING, LOC + QString("Video texture size changed! %1x%2->%3x%4")
121  .arg(m_openglTextureSize.width()).arg(m_openglTextureSize.height())
122  .arg(Frame->width).arg(Frame->height));
123  }
124  DeleteTextures();
125  m_openglTextureSize = surfacesize;
126  }
127 
128  // Lock
129  OpenGLLocker locker(m_context);
130 
131  // Update colourspace and initialise on first frame
132  if (ColourSpace)
133  {
134  if (m_openglTextures.isEmpty())
136  ColourSpace->UpdateColourSpace(Frame);
137  }
138 
139  // Retrieve hardware frames context and AVCUDADeviceContext
140  if ((Frame->pix_fmt != AV_PIX_FMT_CUDA) || (Frame->codec != FMT_NVDEC) ||
141  !Frame->buf || !Frame->priv[0] || !Frame->priv[1])
142  {
143  return result;
144  }
145 
146  auto cudabuffer = reinterpret_cast<CUdeviceptr>(Frame->buf);
147  if (!cudabuffer)
148  return result;
149 
150  // make the CUDA context current
151  CUcontext dummy = nullptr;
152  CUDA_CHECK(m_cudaFuncs, cuCtxPushCurrent(m_cudaContext));
153 
154  // create and map textures for a new buffer
155  VideoFrameType type = (Frame->sw_pix_fmt == AV_PIX_FMT_NONE) ? FMT_NV12 :
156  PixelFormatToFrameType(static_cast<AVPixelFormat>(Frame->sw_pix_fmt));
157  bool p010 = ColorDepth(type) > 8;
158  if (!m_openglTextures.contains(cudabuffer))
159  {
160  vector<QSize> sizes;
161  sizes.emplace_back(QSize(Frame->width, Frame->height));
162  sizes.emplace_back(QSize(Frame->width, Frame->height >> 1));
163  vector<MythVideoTexture*> textures =
165  if (textures.empty())
166  {
167  CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy));
168  return result;
169  }
170 
171  bool success = true;
172  for (uint plane = 0; plane < textures.size(); ++plane)
173  {
174  // N.B. I think the texture formats for P010 are not strictly compliant
175  // with OpenGL ES 3.X but the Nvidia driver does not complain.
176  MythVideoTexture *tex = textures[plane];
177  tex->m_allowGLSLDeint = true;
178  m_context->glBindTexture(tex->m_target, tex->m_textureId);
179  QOpenGLTexture::PixelFormat format = QOpenGLTexture::Red;
180  QOpenGLTexture::PixelType pixtype = p010 ? QOpenGLTexture::UInt16 : QOpenGLTexture::UInt8;
181  QOpenGLTexture::TextureFormat internal = p010 ? QOpenGLTexture::R16_UNorm : QOpenGLTexture::R8_UNorm;
182  int width = tex->m_size.width();
183 
184  if (plane)
185  {
186  internal = p010 ? QOpenGLTexture::RG16_UNorm : QOpenGLTexture::RG8_UNorm;
187  format = QOpenGLTexture::RG;
188  width /= 2;
189  }
190 
191  m_context->glTexImage2D(tex->m_target, 0, internal, width, tex->m_size.height(),
192  0, format, pixtype, nullptr);
193 
194  CUarray array = nullptr;
195  CUgraphicsResource graphicsResource = nullptr;
196  CUDA_CHECK(m_cudaFuncs, cuGraphicsGLRegisterImage(&graphicsResource, tex->m_textureId,
197  QOpenGLTexture::Target2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
198  if (graphicsResource)
199  {
200  CUDA_CHECK(m_cudaFuncs, cuGraphicsMapResources(1, &graphicsResource, nullptr));
201  CUDA_CHECK(m_cudaFuncs, cuGraphicsSubResourceGetMappedArray(&array, graphicsResource, 0, 0));
202  CUDA_CHECK(m_cudaFuncs, cuGraphicsUnmapResources(1, &graphicsResource, nullptr));
203  tex->m_data = reinterpret_cast<unsigned char*>(new QPair<CUarray,CUgraphicsResource>(array, graphicsResource));
204  }
205  else
206  {
207  success = false;
208  break;
209  }
210  }
211 
212  if (success)
213  {
214  m_openglTextures.insert(cudabuffer, textures);
215  }
216  else
217  {
218  for (auto & texture : textures)
219  {
220  auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(texture->m_data);
221  if (data && data->second)
222  CUDA_CHECK(m_cudaFuncs, cuGraphicsUnregisterResource(data->second));
223  delete data;
224  texture->m_data = nullptr;
225  if (texture->m_textureId)
226  m_context->glDeleteTextures(1, &texture->m_textureId);
228  }
229  }
230  }
231 
232  if (!m_openglTextures.contains(cudabuffer))
233  {
234  CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy));
235  return result;
236  }
237 
238  // Copy device data to array data (i.e. texture) - surely this can be avoided?
239  // In theory, asynchronous copies should not be required but we use async
240  // followed by stream synchronisation to ensure CUDA and OpenGL are in sync
241  // which avoids presenting old/stale frames when the GPU is under load.
242  result = m_openglTextures[cudabuffer];
243  for (uint i = 0; i < result.size(); ++i)
244  {
245  auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(result[i]->m_data);
246  CUDA_MEMCPY2D cpy;
247  memset(&cpy, 0, sizeof(cpy));
248  cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
249  cpy.srcDevice = cudabuffer + static_cast<CUdeviceptr>(Frame->offsets[i]);
250  cpy.srcPitch = static_cast<size_t>(Frame->pitches[i]);
251  cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
252  cpy.dstArray = data->first;
253  cpy.WidthInBytes = static_cast<size_t>(result[i]->m_size.width()) * (p010 ? 2 : 1);
254  cpy.Height = static_cast<size_t>(result[i]->m_size.height());
255  CUDA_CHECK(m_cudaFuncs, cuMemcpy2DAsync(&cpy, nullptr));
256  }
257 
258  CUDA_CHECK(m_cudaFuncs, cuStreamSynchronize(nullptr));
259  CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy));
260 
261  // GLSL deinterlacing. The decoder will pick up any CPU or driver preference
262  // and return a stream of deinterlaced frames. Just check for GLSL here.
263  bool needreferences = false;
264  if (is_interlaced(Scan) && !Frame->already_deinterlaced)
265  {
267  if (shader)
268  needreferences = shader == DEINT_HIGH;
269  else
270  needreferences = GetSingleRateOption(Frame, DEINT_SHADER) == DEINT_HIGH;
271  }
272 
273  if (needreferences)
274  {
275  if (abs(Frame->frameCounter - m_discontinuityCounter) > 1)
276  m_referenceFrames.clear();
277 
278  RotateReferenceFrames(cudabuffer);
279  int size = m_referenceFrames.size();
280 
281  CUdeviceptr next = m_referenceFrames[0];
282  CUdeviceptr current = m_referenceFrames[size > 1 ? 1 : 0];
283  CUdeviceptr last = m_referenceFrames[size > 2 ? 2 : 0];
284 
285  if (!m_openglTextures.contains(next) || !m_openglTextures.contains(current) ||
286  !m_openglTextures.contains(last))
287  {
288  LOG(VB_GENERAL, LOG_ERR, LOC + "Reference frame error");
289  return result;
290  }
291 
292  result = m_openglTextures[last];
293  for (MythVideoTexture* tex : qAsConst(m_openglTextures[current]))
294  result.push_back(tex);
295  for (MythVideoTexture* tex : qAsConst(m_openglTextures[next]))
296  result.push_back(tex);
297  return result;
298  }
299  m_referenceFrames.clear();
300  m_discontinuityCounter = Frame->frameCounter;
301 
302  return result;
303 }
304 
310 {
312 }
313 
314 bool MythNVDECInterop::CreateCUDAPriv(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs,
315  CUcontext &CudaContext, bool &Retry)
316 {
317  Retry = false;
318  if (!GLContext)
319  return false;
320 
321  // Make OpenGL context current
322  OpenGLLocker locker(GLContext);
323 
324  // retrieve CUDA entry points
325  if (cuda_load_functions(&CudaFuncs, nullptr) != 0)
326  {
327  LOG(VB_PLAYBACK, LOG_ERR, LOC + "Failed to load functions");
328  return false;
329  }
330 
331  // create a CUDA context for the current device
332  CUdevice cudevice = 0;
333  CUcontext dummy = nullptr;
334  CUresult res = CudaFuncs->cuInit(0);
335  if (res != CUDA_SUCCESS)
336  {
337  LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to initialise CUDA API");
338  return false;
339  }
340 
341  unsigned int devicecount = 0;
342  res = CudaFuncs->cuGLGetDevices(&devicecount, &cudevice, 1, CU_GL_DEVICE_LIST_ALL);
343  if (res != CUDA_SUCCESS)
344  {
345  LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to get CUDA device");
346  return false;
347  }
348 
349  if (devicecount < 1)
350  {
351  LOG(VB_GENERAL, LOG_ERR, LOC + "No CUDA devices");
352  return false;
353  }
354 
355  res = CudaFuncs->cuCtxCreate(&CudaContext, CU_CTX_SCHED_BLOCKING_SYNC, cudevice);
356  if (res != CUDA_SUCCESS)
357  {
358  LOG(VB_GENERAL, LOG_ERR, LOC + QString("Failed to create CUDA context (Err: %1)")
359  .arg(res));
360  Retry = true;
361  return false;
362  }
363 
364  CudaFuncs->cuCtxPopCurrent(&dummy);
365  LOG(VB_PLAYBACK, LOG_INFO, LOC + "Created CUDA context");
366  return true;
367 }
368 
369 bool MythNVDECInterop::CreateCUDAContext(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs,
370  CUcontext &CudaContext)
371 {
372  if (!gCoreContext->IsUIThread())
373  {
374  LOG(VB_GENERAL, LOG_ERR, LOC + "Must create CUDA context from main thread");
375  return false;
376  }
377 
378  int retries = 0;
379  bool retry = false;
380  while (retries++ < 5)
381  {
382  if (CreateCUDAPriv(GLContext, CudaFuncs, CudaContext, retry))
383  return true;
384  CleanupContext(GLContext, CudaFuncs, CudaContext);
385  if (!retry)
386  break;
387  LOG(VB_GENERAL, LOG_WARNING, LOC + "Will retry in 50ms");
388  std::this_thread::sleep_for(std::chrono::milliseconds(50));
389  }
390  return false;
391 }
392 
393 void MythNVDECInterop::CleanupContext(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs,
394  CUcontext &CudaContext)
395 {
396  if (!GLContext)
397  return;
398 
399  OpenGLLocker locker(GLContext);
400  if (CudaFuncs)
401  {
402  if (CudaContext)
403  CUDA_CHECK(CudaFuncs, cuCtxDestroy(CudaContext));
404  cuda_free_functions(&CudaFuncs);
405  }
406 }
407 
409 {
410  if (!Buffer)
411  return;
412 
413  // don't retain twice for double rate
414  if (!m_referenceFrames.empty() && (m_referenceFrames[0] == Buffer))
415  return;
416 
417  m_referenceFrames.push_front(Buffer);
418 
419  // release old frames
420  while (m_referenceFrames.size() > 3)
421  m_referenceFrames.pop_back();
422 }
static bool CreateCUDAPriv(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext, bool &Retry)
static Type GetInteropType(VideoFrameType Format)
VideoColourSpace contains a QMatrix4x4 that can convert YCbCr data to RGB.
static vector< MythVideoTexture * > CreateTextures(MythRenderOpenGL *Context, VideoFrameType Type, VideoFrameType Format, vector< QSize > Sizes, GLenum Target=QOpenGLTexture::Target2D)
Create a set of textures suitable for the given Type and Format.
MythDeintType GetDoubleRateOption(const VideoFrame *Frame, MythDeintType Type, MythDeintType Override)
Definition: mythframe.cpp:847
bool InitialiseCuda(void)
Initialise a CUDA context.
long long m_discontinuityCounter
QHash< QString, Action * > Context
Definition: action.h:77
MythNVDECInterop(MythRenderOpenGL *Context)
CUcontext GetCUDAContext(void)
static void CleanupContext(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext)
FrameScanType
Definition: videoouttypes.h:78
VideoFrameType PixelFormatToFrameType(AVPixelFormat fmt)
Definition: mythavutil.cpp:68
VideoFrameType
Definition: mythframe.h:23
~MythNVDECInterop() override
MythCoreContext * gCoreContext
This global variable contains the MythCoreContext instance for the app.
#define CUDA_CHECK(CUDA_FUNCS, CUDA_CALL)
static void DeleteTexture(MythRenderOpenGL *Context, MythVideoTexture *Texture)
int ColorDepth(int Format)
Return the color depth for the given MythTV frame format.
Definition: mythframe.cpp:808
MythDeintType
Definition: mythframe.h:120
CudaFunctions * m_cudaFuncs
#define LOC
CUcontext m_cudaContext
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:10
QVector< CUdeviceptr > m_referenceFrames
unsigned int uint
Definition: compat.h:140
#define ALL_PICTURE_ATTRIBUTES
void SetSupportedAttributes(PictureAttributeSupported Supported)
Enable the given set of picture attributes.
virtual void DeleteTextures(void)
MythRenderOpenGL * m_context
vector< MythVideoTexture * > Acquire(MythRenderOpenGL *Context, VideoColourSpace *ColourSpace, VideoFrame *Frame, FrameScanType Scan) override
Map CUDA video memory to OpenGL textures.
PictureAttribute next(PictureAttributeSupported Supported, PictureAttribute Attribute)
static MythNVDECInterop * Create(MythRenderOpenGL *Context)
void RotateReferenceFrames(CUdeviceptr Buffer)
static MythRenderOpenGL * GetOpenGLRender(void)
QHash< unsigned long long, vector< MythVideoTexture * > > m_openglTextures
static bool CreateCUDAContext(MythRenderOpenGL *GLContext, CudaFunctions *&CudaFuncs, CUcontext &CudaContext)
bool is_interlaced(FrameScanType Scan)
unsigned char * m_data
MythDeintType GetSingleRateOption(const VideoFrame *Frame, MythDeintType Type, MythDeintType Override)
Definition: mythframe.cpp:834
bool UpdateColourSpace(const VideoFrame *Frame)
Set the current colourspace to use.
void DeleteTextures(void) override
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:23