MythTV  master
mythframe.cpp
Go to the documentation of this file.
1 //
2 // mythframe.cpp
3 // MythTV
4 //
5 // Created by Jean-Yves Avenard on 10/06/2014.
6 // Copyright (c) 2014 Bubblestuff Pty Ltd. All rights reserved.
7 //
8 // derived from copy.c: Fast YV12/NV12 copy from VLC project
9 // portion of SSE Code Copyright (C) 2010 Laurent Aimar
10 
11 /******************************************************************************
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU Lesser General Public License as published by
14  * the Free Software Foundation; either version 2.1 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public License
23  * along with this program; if not, write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26 
27 #include <mythtimer.h>
28 #include "mythconfig.h"
29 #include "mythframe.h"
30 #include "mythcorecontext.h"
31 #include "mythlogging.h"
32 
34 {
35  switch (Type)
36  {
37  case FMT_NONE: return "None";
38  case FMT_RGB24: return "RGB24";
39  case FMT_YV12: return "YUV420P";
40  case FMT_RGB32: return "RGB32";
41  case FMT_ARGB32: return "ARGB32";
42  case FMT_RGBA32: return "RGBA32";
43  case FMT_YUV422P: return "YUV422P";
44  case FMT_BGRA: return "BGRA";
45  case FMT_YUY2: return "YUY2";
46  case FMT_NV12: return "NV12";
47  case FMT_P010: return "P010";
48  case FMT_P016: return "P016";
49  case FMT_YUV420P9: return "YUV420P9";
50  case FMT_YUV420P10: return "YUV420P10";
51  case FMT_YUV420P12: return "YUV420P12";
52  case FMT_YUV420P14: return "YUV420P14";
53  case FMT_YUV420P16: return "YUV420P16";
54  case FMT_YUV422P9: return "YUV422P9";
55  case FMT_YUV422P10: return "YUV422P10";
56  case FMT_YUV422P12: return "YUV422P12";
57  case FMT_YUV422P14: return "YUV422P14";
58  case FMT_YUV422P16: return "YUV422P16";
59  case FMT_YUV444P: return "YUV444P";
60  case FMT_YUV444P9: return "YUV444P9";
61  case FMT_YUV444P10: return "YUV444P10";
62  case FMT_YUV444P12: return "YUV444P12";
63  case FMT_YUV444P14: return "YUV444P14";
64  case FMT_YUV444P16: return "YUV444P16";
65  case FMT_VDPAU: return "VDPAU";
66  case FMT_VAAPI: return "VAAPI";
67  case FMT_DXVA2: return "DXVA2";
68  case FMT_MMAL: return "MMAL";
69  case FMT_MEDIACODEC: return "MediaCodec";
70  case FMT_VTB: return "VideoToolBox";
71  case FMT_NVDEC: return "NVDec";
72  case FMT_DRMPRIME: return "DRM-PRIME";
73  }
74  return "?";
75 }
76 
77 #if ARCH_X86
78 
79 static bool features_detected = false;
80 static bool has_sse2 = false;
81 static bool has_sse3 = false;
82 static bool has_ssse3 = false;
83 static bool has_sse4 = false;
84 
85 #if defined _WIN32 && !defined __MINGW32__
86 // Windows
87 #define cpuid __cpuid
88 
89 #else
90 /* NOLINTNEXTLINE(readability-non-const-parameter) */
91 inline void cpuid(int CPUInfo[4],int InfoType)
92 {
93  __asm__ __volatile__ (
94  // pic requires to save ebx/rbx
95 #if ARCH_X86_32
96  "push %%ebx\n"
97 #endif
98  "cpuid\n"
99  "movl %%ebx ,%[ebx]\n"
100 #if ARCH_X86_32
101  "pop %%ebx\n"
102 #endif
103  :"=a" (CPUInfo[0]),
104  [ebx] "=r"(CPUInfo[1]),
105  "=c" (CPUInfo[2]),
106  "=d" (CPUInfo[3])
107  :"a" (InfoType)
108  );
109 }
110 #endif
111 
112 static void cpu_detect_features()
113 {
114  int info[4];
115  cpuid(info, 0);
116  int nIds = info[0];
117 
118  // Detect Features
119  if (nIds >= 0x00000001)
120  {
121  cpuid(info,0x00000001);
122  has_sse2 = (info[3] & (1 << 26)) != 0;
123  has_sse3 = (info[2] & (1 << 0)) != 0;
124  has_ssse3 = (info[2] & (1 << 9)) != 0;
125  has_sse4 = (info[2] & (1 << 19)) != 0;
126  }
127  features_detected = true;
128 }
129 
130 static inline bool sse2_check()
131 {
132  if (!features_detected)
133  cpu_detect_features();
134  return has_sse2;
135 }
136 
137 static inline bool sse3_check()
138 {
139  if (!features_detected)
140  cpu_detect_features();
141  return has_sse3;
142 }
143 
144 static inline bool ssse3_check()
145 {
146  if (!features_detected)
147  cpu_detect_features();
148  return has_ssse3;
149 }
150 
151 static inline bool sse4_check()
152 {
153  if (!features_detected)
154  cpu_detect_features();
155  return has_sse4;
156 }
157 
158 static inline void SSE_splitplanes(uint8_t* dstu, int dstu_pitch,
159  uint8_t* dstv, int dstv_pitch,
160  const uint8_t* src, int src_pitch,
161  int width, int height)
162 {
163  const uint8_t shuffle[] = { 0, 2, 4, 6, 8, 10, 12, 14,
164  1, 3, 5, 7, 9, 11, 13, 15 };
165  const uint8_t mask[] = { 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
166  0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00 };
167  const bool sse3 = sse3_check();
168  const bool ssse3 = ssse3_check();
169 
170  asm volatile ("mfence");
171 
172 #define LOAD64A \
173  "movdqa 0(%[src]), %%xmm0\n" \
174  "movdqa 16(%[src]), %%xmm1\n" \
175  "movdqa 32(%[src]), %%xmm2\n" \
176  "movdqa 48(%[src]), %%xmm3\n"
177 
178 #define LOAD64U \
179  "movdqu 0(%[src]), %%xmm0\n" \
180  "movdqu 16(%[src]), %%xmm1\n" \
181  "movdqu 32(%[src]), %%xmm2\n" \
182  "movdqu 48(%[src]), %%xmm3\n"
183 
184 #define STORE2X32 \
185  "movq %%xmm0, 0(%[dst1])\n" \
186  "movq %%xmm1, 8(%[dst1])\n" \
187  "movhpd %%xmm0, 0(%[dst2])\n" \
188  "movhpd %%xmm1, 8(%[dst2])\n" \
189  "movq %%xmm2, 16(%[dst1])\n" \
190  "movq %%xmm3, 24(%[dst1])\n" \
191  "movhpd %%xmm2, 16(%[dst2])\n" \
192  "movhpd %%xmm3, 24(%[dst2])\n"
193 
194  for (int y = 0; y < height; y++)
195  {
196  int x = 0;
197 
198  if (((uintptr_t)src & 0xf) == 0)
199  {
200  if (sse3 && ssse3)
201  {
202  for (; x < (width & ~31); x += 32)
203  {
204  asm volatile (
205  "movdqu (%[shuffle]), %%xmm7\n"
206  LOAD64A
207  "pshufb %%xmm7, %%xmm0\n"
208  "pshufb %%xmm7, %%xmm1\n"
209  "pshufb %%xmm7, %%xmm2\n"
210  "pshufb %%xmm7, %%xmm3\n"
211  STORE2X32
212  : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm7");
213  }
214  }
215  else
216  {
217  for (; x < (width & ~31); x += 32)
218  {
219  asm volatile (
220  "movdqu (%[mask]), %%xmm7\n"
221  LOAD64A
222  "movdqa %%xmm0, %%xmm4\n"
223  "movdqa %%xmm1, %%xmm5\n"
224  "movdqa %%xmm2, %%xmm6\n"
225  "psrlw $8, %%xmm0\n"
226  "psrlw $8, %%xmm1\n"
227  "pand %%xmm7, %%xmm4\n"
228  "pand %%xmm7, %%xmm5\n"
229  "pand %%xmm7, %%xmm6\n"
230  "packuswb %%xmm4, %%xmm0\n"
231  "packuswb %%xmm5, %%xmm1\n"
232  "pand %%xmm3, %%xmm7\n"
233  "psrlw $8, %%xmm2\n"
234  "psrlw $8, %%xmm3\n"
235  "packuswb %%xmm6, %%xmm2\n"
236  "packuswb %%xmm7, %%xmm3\n"
237  STORE2X32
238  : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
239  }
240  }
241  }
242  else
243  {
244  if (sse3 && ssse3)
245  {
246  for (; x < (width & ~31); x += 32)
247  {
248  asm volatile (
249  "movdqu (%[shuffle]), %%xmm7\n"
250  LOAD64U
251  "pshufb %%xmm7, %%xmm0\n"
252  "pshufb %%xmm7, %%xmm1\n"
253  "pshufb %%xmm7, %%xmm2\n"
254  "pshufb %%xmm7, %%xmm3\n"
255  STORE2X32
256  : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm7");
257  }
258  }
259  else
260  {
261  for (; x < (width & ~31); x += 32)
262  {
263  asm volatile (
264  "movdqu (%[mask]), %%xmm7\n"
265  LOAD64U
266  "movdqu %%xmm0, %%xmm4\n"
267  "movdqu %%xmm1, %%xmm5\n"
268  "movdqu %%xmm2, %%xmm6\n"
269  "psrlw $8, %%xmm0\n"
270  "psrlw $8, %%xmm1\n"
271  "pand %%xmm7, %%xmm4\n"
272  "pand %%xmm7, %%xmm5\n"
273  "pand %%xmm7, %%xmm6\n"
274  "packuswb %%xmm4, %%xmm0\n"
275  "packuswb %%xmm5, %%xmm1\n"
276  "pand %%xmm3, %%xmm7\n"
277  "psrlw $8, %%xmm2\n"
278  "psrlw $8, %%xmm3\n"
279  "packuswb %%xmm6, %%xmm2\n"
280  "packuswb %%xmm7, %%xmm3\n"
281  STORE2X32
282  : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
283  }
284  }
285  }
286 
287  for (; x < width; x++)
288  {
289  dstu[x] = src[2*x+0];
290  dstv[x] = src[2*x+1];
291  }
292  src += src_pitch;
293  dstu += dstu_pitch;
294  dstv += dstv_pitch;
295  }
296  asm volatile ("mfence");
297 
298 #undef STORE2X32
299 #undef LOAD64U
300 #undef LOAD64A
301 }
302 #endif /* ARCH_X86 */
303 
304 static void splitplanes(uint8_t* dstu, int dstu_pitch,
305  uint8_t* dstv, int dstv_pitch,
306  const uint8_t* src, int src_pitch,
307  int width, int height)
308 {
309  for (int y = 0; y < height; y++)
310  {
311  for (int x = 0; x < width; x++)
312  {
313  dstu[x] = src[2*x+0];
314  dstv[x] = src[2*x+1];
315  }
316  src += src_pitch;
317  dstu += dstu_pitch;
318  dstv += dstv_pitch;
319  }
320 }
321 
322 void framecopy(VideoFrame* dst, const VideoFrame* src, bool useSSE)
323 {
324  VideoFrameType codec = dst->codec;
325  if (!(dst->codec == src->codec ||
326  (src->codec == FMT_NV12 && dst->codec == FMT_YV12)))
327  return;
328 
330  dst->repeat_pict = src->repeat_pict;
331  dst->top_field_first = src->top_field_first;
333  dst->colorspace = src->colorspace;
334  dst->colorrange = src->colorrange;
335  dst->colorprimaries = src->colorprimaries;
336  dst->colortransfer = src->colortransfer;
337  dst->chromalocation = src->chromalocation;
338 
339  if (FMT_YV12 == codec)
340  {
341  int width = src->width;
342  int height = src->height;
343  int dwidth = dst->width;
344  int dheight = dst->height;
345 
346  if (src->codec == FMT_NV12 &&
347  height == dheight && width == dwidth)
348  {
349  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
350  src->buf + src->offsets[0], src->pitches[0],
351  width, height);
352 #if ARCH_X86
353  if (useSSE && sse2_check())
354  {
355  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
356  dst->buf + dst->offsets[2], dst->pitches[2],
357  src->buf + src->offsets[1], src->pitches[1],
358  (width+1) / 2, (height+1) / 2);
359  asm volatile ("emms");
360  return;
361  }
362 #else
363  Q_UNUSED(useSSE);
364 #endif
365  splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
366  dst->buf + dst->offsets[2], dst->pitches[2],
367  src->buf + src->offsets[1], src->pitches[1],
368  (width+1) / 2, (height+1) / 2);
369  return;
370  }
371 
372  if (dst->pitches[0] != src->pitches[0] ||
373  dst->pitches[1] != src->pitches[1] ||
374  dst->pitches[2] != src->pitches[2])
375  {
376  // We have a different stride between the two frames
377  // drop the garbage data
378  height = (dst->height < src->height) ? dst->height : src->height;
379  width = (dst->width < src->width) ? dst->width : src->width;
380  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
381  src->buf + src->offsets[0], src->pitches[0],
382  width, height);
383  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
384  src->buf + src->offsets[1], src->pitches[1],
385  (width+1) / 2, (height+1) / 2);
386  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
387  src->buf + src->offsets[2], src->pitches[2],
388  (width+1) / 2, (height+1) / 2);
389  return;
390  }
391 
392  int height0 = (dst->height < src->height) ? dst->height : src->height;
393  int height1 = (height0+1) >> 1;
394  int height2 = (height0+1) >> 1;
395  int pitch0 = ((dst->pitches[0] < src->pitches[0]) ?
396  dst->pitches[0] : src->pitches[0]);
397  int pitch1 = ((dst->pitches[1] < src->pitches[1]) ?
398  dst->pitches[1] : src->pitches[1]);
399  int pitch2 = ((dst->pitches[2] < src->pitches[2]) ?
400  dst->pitches[2] : src->pitches[2]);
401 
402  memcpy(dst->buf + dst->offsets[0],
403  src->buf + src->offsets[0], pitch0 * height0);
404  memcpy(dst->buf + dst->offsets[1],
405  src->buf + src->offsets[1], pitch1 * height1);
406  memcpy(dst->buf + dst->offsets[2],
407  src->buf + src->offsets[2], pitch2 * height2);
408  }
409 }
410 
411 /***************************************
412  * USWC Fast Copy
413  *
414  * https://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers:
415  ***************************************/
416 #if ARCH_X86
417 #define COPY16(dstp, srcp, load, store) \
418  asm volatile ( \
419  load " 0(%[src]), %%xmm1\n" \
420  store " %%xmm1, 0(%[dst])\n" \
421  : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1")
422 
423 #define COPY64(dstp, srcp, load, store) \
424  asm volatile ( \
425  load " 0(%[src]), %%xmm1\n" \
426  load " 16(%[src]), %%xmm2\n" \
427  load " 32(%[src]), %%xmm3\n" \
428  load " 48(%[src]), %%xmm4\n" \
429  store " %%xmm1, 0(%[dst])\n" \
430  store " %%xmm2, 16(%[dst])\n" \
431  store " %%xmm3, 32(%[dst])\n" \
432  store " %%xmm4, 48(%[dst])\n" \
433  : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1", "xmm2", "xmm3", "xmm4")
434 
435 /*
436  * Optimized copy from "Uncacheable Speculative Write Combining" memory
437  * as used by some hardware accelerated decoder (VAAPI and DXVA2).
438  */
439 static void CopyFromUswc(uint8_t *dst, int dst_pitch,
440  const uint8_t *src, int src_pitch,
441  int width, int height)
442 {
443  const bool sse4 = sse4_check();
444 
445  asm volatile ("mfence");
446 
447  for (int y = 0; y < height; y++)
448  {
449  const int unaligned = (-(uintptr_t)src) & 0x0f;
450  int x = unaligned;
451 
452  if (sse4)
453  {
454  if (!unaligned)
455  {
456  for (; x+63 < width; x += 64)
457  {
458  COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
459  }
460  }
461  else
462  {
463  COPY16(dst, src, "movdqu", "movdqa");
464  for (; x+63 < width; x += 64)
465  {
466  COPY64(&dst[x], &src[x], "movntdqa", "movdqu");
467  }
468  }
469  }
470  else
471  {
472  if (!unaligned)
473  {
474  for (; x+63 < width; x += 64)
475  {
476  COPY64(&dst[x], &src[x], "movdqa", "movdqa");
477  }
478  }
479  else
480  {
481  COPY16(dst, src, "movdqu", "movdqa");
482  for (; x+63 < width; x += 64)
483  {
484  COPY64(&dst[x], &src[x], "movdqa", "movdqu");
485  }
486  }
487  }
488 
489  for (; x < width; x++)
490  {
491  dst[x] = src[x];
492  }
493 
494  src += src_pitch;
495  dst += dst_pitch;
496  }
497  asm volatile ("mfence");
498 }
499 
500 static void Copy2d(uint8_t *dst, int dst_pitch,
501  const uint8_t *src, int src_pitch,
502  int width, int height)
503 {
504  for (int y = 0; y < height; y++)
505  {
506  int x = 0;
507 
508  bool unaligned = ((intptr_t)dst & 0x0f) != 0;
509  if (!unaligned)
510  {
511  for (; x+63 < width; x += 64)
512  {
513  COPY64(&dst[x], &src[x], "movdqa", "movntdq");
514  }
515  }
516  else
517  {
518  for (; x+63 < width; x += 64)
519  {
520  COPY64(&dst[x], &src[x], "movdqa", "movdqu");
521  }
522  }
523 
524  for (; x < width; x++)
525  {
526  dst[x] = src[x];
527  }
528 
529  src += src_pitch;
530  dst += dst_pitch;
531  }
532 }
533 
534 static void SSE_copyplane(uint8_t *dst, int dst_pitch,
535  const uint8_t *src, int src_pitch,
536  uint8_t *cache, int cache_size,
537  int width, int height)
538 {
539  const int w16 = (width+15) & ~15;
540  const int hstep = cache_size / w16;
541 
542  for (int y = 0; y < height; y += hstep)
543  {
544  const int hblock = std::min(hstep, height - y);
545 
546  /* Copy a bunch of line into our cache */
547  CopyFromUswc(cache, w16,
548  src, src_pitch,
549  width, hblock);
550 
551  /* Copy from our cache to the destination */
552  Copy2d(dst, dst_pitch,
553  cache, w16,
554  width, hblock);
555 
556  /* */
557  src += src_pitch * hblock;
558  dst += dst_pitch * hblock;
559  }
560 }
561 
562 static void SSE_splitplanes(uint8_t *dstu, int dstu_pitch,
563  uint8_t *dstv, int dstv_pitch,
564  const uint8_t *src, int src_pitch,
565  uint8_t *cache, int cache_size,
566  int width, int height)
567 {
568  const int w16 = (2*width+15) & ~15;
569  const int hstep = cache_size / w16;
570 
571  for (int y = 0; y < height; y += hstep)
572  {
573  const int hblock = std::min(hstep, height - y);
574 
575  /* Copy a bunch of line into our cache */
576  CopyFromUswc(cache, w16, src, src_pitch,
577  2*width, hblock);
578 
579  /* Copy from our cache to the destination */
580  SSE_splitplanes(dstu, dstu_pitch, dstv, dstv_pitch,
581  cache, w16, width, hblock);
582 
583  /* */
584  src += src_pitch * hblock;
585  dstu += dstu_pitch * hblock;
586  dstv += dstv_pitch * hblock;
587  }
588 }
589 #endif // ARCH_X86
590 
591 MythUSWCCopy::MythUSWCCopy(int width, bool nocache)
592 {
593 #if ARCH_X86
594  if (!nocache)
595  {
596  allocateCache(width);
597  }
598 #else
599  Q_UNUSED(width);
600  Q_UNUSED(nocache);
601 #endif
602 }
603 
605 {
606  m_size = 0;
607 #if ARCH_X86
608  av_freep(&m_cache);
609 #endif
610 }
611 
623 {
625  dst->repeat_pict = src->repeat_pict;
626  dst->top_field_first = src->top_field_first;
628  dst->colorspace = src->colorspace;
629  dst->colorrange = src->colorrange;
630  dst->colorprimaries = src->colorprimaries;
631  dst->colortransfer = src->colortransfer;
632  dst->chromalocation = src->chromalocation;
633 
634  int width = src->width;
635  int height = src->height;
636 
637  if (src->codec == FMT_NV12)
638  {
639 #if ARCH_X86
640  if (sse2_check())
641  {
642  MythTimer *timer = nullptr;
643 
644  if ((m_uswc != uswcState::Use_SW) && m_cache)
645  {
646  if (m_uswc == uswcState::Detect)
647  {
648  timer = new MythTimer(MythTimer::kStartRunning);
649  }
650  SSE_copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
651  src->buf + src->offsets[0], src->pitches[0],
652  m_cache, m_size,
653  width, height);
654  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
655  dst->buf + dst->offsets[2], dst->pitches[2],
656  src->buf + src->offsets[1], src->pitches[1],
657  m_cache, m_size,
658  (width+1) / 2, (height+1) / 2);
659  if (m_uswc == uswcState::Detect)
660  {
661  // Measure how long standard method takes
662  // if shorter, use it in the future
663  long sse_duration = timer->nsecsElapsed();
664  timer->restart();
665  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
666  src->buf + src->offsets[0], src->pitches[0],
667  width, height);
668  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
669  dst->buf + dst->offsets[2], dst->pitches[2],
670  src->buf + src->offsets[1], src->pitches[1],
671  (width+1) / 2, (height+1) / 2);
672  if (timer->nsecsElapsed() < sse_duration)
673  {
675  LOG(VB_GENERAL, LOG_DEBUG, "Enabling USWC code acceleration");
676  }
677  else
678  {
680  }
681  delete timer;
682  }
683  }
684  else
685  {
686  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
687  src->buf + src->offsets[0], src->pitches[0],
688  width, height);
689  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
690  dst->buf + dst->offsets[2], dst->pitches[2],
691  src->buf + src->offsets[1], src->pitches[1],
692  (width+1) / 2, (height+1) / 2);
693  }
694  asm volatile ("emms");
695  return;
696  }
697 #endif
698  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
699  src->buf + src->offsets[0], src->pitches[0],
700  width, height);
701  splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
702  dst->buf + dst->offsets[2], dst->pitches[2],
703  src->buf + src->offsets[1], src->pitches[1],
704  (width+1) / 2, (height+1) / 2);
705  return;
706  }
707 
708 #if ARCH_X86
709  if (sse2_check() && (m_uswc != uswcState::Use_SW) && m_cache)
710  {
711  MythTimer *timer = nullptr;
712 
713  if (m_uswc == uswcState::Detect)
714  {
715  timer = new MythTimer(MythTimer::kStartRunning);
716  }
717  SSE_copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
718  src->buf + src->offsets[0], src->pitches[0],
719  m_cache, m_size,
720  width, height);
721  SSE_copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
722  src->buf + src->offsets[1], src->pitches[1],
723  m_cache, m_size,
724  (width+1) / 2, (height+1) / 2);
725  SSE_copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
726  src->buf + src->offsets[2], src->pitches[2],
727  m_cache, m_size,
728  (width+1) / 2, (height+1) / 2);
729  if (m_uswc == uswcState::Detect)
730  {
731  // Measure how long standard method takes
732  // if shorter, use it in the future
733  long sse_duration = timer->nsecsElapsed();
734  timer->restart();
735  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
736  src->buf + src->offsets[0], src->pitches[0],
737  width, height);
738  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
739  src->buf + src->offsets[1], src->pitches[1],
740  (width+1) / 2, (height+1) / 2);
741  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
742  src->buf + src->offsets[2], src->pitches[2],
743  (width+1) / 2, (height+1) / 2);
744  if (timer->nsecsElapsed() < sse_duration)
745  {
747  LOG(VB_GENERAL, LOG_DEBUG, "Enabling USWC code acceleration");
748  }
749  else
750  {
752  }
753  delete timer;
754  }
755  asm volatile ("emms");
756  return;
757  }
758 #endif
759  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
760  src->buf + src->offsets[0], src->pitches[0],
761  width, height);
762  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
763  src->buf + src->offsets[1], src->pitches[1],
764  (width+1) / 2, (height+1) / 2);
765  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
766  src->buf + src->offsets[2], src->pitches[2],
767  (width+1) / 2, (height+1) / 2);
768 }
769 
774 {
776 }
777 
779 {
780  av_freep(&m_cache);
781  m_size = std::max((width + 63) & ~63, 4096);
782  m_cache = (uint8_t*)av_malloc(m_size);
783 }
784 
789 void MythUSWCCopy::setUSWC(bool uswc)
790 {
792 }
793 
797 void MythUSWCCopy::reset(int width)
798 {
799 #if ARCH_X86
800  allocateCache(width);
801 #else
802  Q_UNUSED(width);
803 #endif
805 }
806 
809 {
810  switch (Format)
811  {
812  case FMT_YUV420P9:
813  case FMT_YUV422P9:
814  case FMT_YUV444P9: return 9;
815  case FMT_P010:
816  case FMT_YUV420P10:
817  case FMT_YUV422P10:
818  case FMT_YUV444P10: return 10;
819  case FMT_YUV420P12:
820  case FMT_YUV422P12:
821  case FMT_YUV444P12: return 12;
822  case FMT_YUV420P14:
823  case FMT_YUV422P14:
824  case FMT_YUV444P14: return 14;
825  case FMT_P016:
826  case FMT_YUV420P16:
827  case FMT_YUV422P16:
828  case FMT_YUV444P16: return 16;
829  default: break;
830  }
831  return 8;
832 }
833 
835  MythDeintType Override)
836 {
837  if (Frame)
838  {
839  MythDeintType options = Frame->deinterlace_single &
840  (Override ? Override : Frame->deinterlace_allowed);
841  if (options & Type)
842  return GetDeinterlacer(options);
843  }
844  return DEINT_NONE;
845 }
846 
848  MythDeintType Override)
849 {
850  if (Frame)
851  {
852  MythDeintType options = Frame->deinterlace_double &
853  (Override ? Override : Frame->deinterlace_allowed);
854  if (options & Type)
855  return GetDeinterlacer(options);
856  }
857  return DEINT_NONE;
858 }
859 
861 {
862  return Option & (DEINT_BASIC | DEINT_MEDIUM | DEINT_HIGH);
863 }
FMT_MEDIACODEC
@ FMT_MEDIACODEC
Definition: mythframe.h:65
GetSingleRateOption
MythDeintType GetSingleRateOption(const VideoFrame *Frame, MythDeintType Type, MythDeintType Override)
Definition: mythframe.cpp:834
MythUSWCCopy::allocateCache
void allocateCache(int width)
Definition: mythframe.cpp:778
DEINT_MEDIUM
@ DEINT_MEDIUM
Definition: mythframe.h:125
MythUSWCCopy::m_size
int m_size
Definition: mythframe.h:214
FMT_YUV420P10
@ FMT_YUV420P10
Definition: mythframe.h:30
FMT_YUV420P16
@ FMT_YUV420P16
Definition: mythframe.h:33
VideoFrame::pitches
int pitches[3]
Y, U, & V pitches.
Definition: mythframe.h:161
FMT_VTB
@ FMT_VTB
Definition: mythframe.h:66
FMT_YUV420P14
@ FMT_YUV420P14
Definition: mythframe.h:32
MythTimer
A QElapsedTimer based timer to replace use of QTime as a timer.
Definition: mythtimer.h:13
FMT_DRMPRIME
@ FMT_DRMPRIME
Definition: mythframe.h:68
MythUSWCCopy::m_uswc
uswcState m_uswc
Definition: mythframe.h:215
FMT_YUV444P10
@ FMT_YUV444P10
Definition: mythframe.h:50
FMT_YUV422P14
@ FMT_YUV422P14
Definition: mythframe.h:45
MythUSWCCopy::resetUSWCDetection
void resetUSWCDetection(void)
reset USWC detection.
Definition: mythframe.cpp:773
Frame
Definition: zmdefines.h:93
FMT_P010
@ FMT_P010
Definition: mythframe.h:58
DEINT_NONE
@ DEINT_NONE
Definition: mythframe.h:123
GetDoubleRateOption
MythDeintType GetDoubleRateOption(const VideoFrame *Frame, MythDeintType Type, MythDeintType Override)
Definition: mythframe.cpp:847
MythUSWCCopy::m_cache
uint8_t * m_cache
Definition: mythframe.h:213
MythDate::Format
Format
Definition: mythdate.h:12
MythTimer::nsecsElapsed
int64_t nsecsElapsed(void) const
Returns nanoseconds elapsed since last start() or restart()
Definition: mythtimer.cpp:118
FMT_YUV422P
@ FMT_YUV422P
Definition: mythframe.h:41
VideoFrame::buf
unsigned char * buf
Definition: mythframe.h:140
LOG
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:23
mythframe.h
VideoFrame
Definition: mythframe.h:137
VideoFrame::interlaced_reversed
bool interlaced_reversed
Definition: mythframe.h:155
FMT_YUV444P14
@ FMT_YUV444P14
Definition: mythframe.h:52
FMT_YUV444P
@ FMT_YUV444P
Definition: mythframe.h:48
VideoFrame::codec
VideoFrameType codec
Definition: mythframe.h:139
FMT_NONE
@ FMT_NONE
Definition: mythframe.h:26
VideoFrame::colorrange
int colorrange
Definition: mythframe.h:167
VideoFrame::repeat_pict
bool repeat_pict
used to unlock the scan type
Definition: mythframe.h:157
framecopy
void framecopy(VideoFrame *dst, const VideoFrame *src, bool useSSE)
Definition: mythframe.cpp:322
mythlogging.h
VideoFrame::interlaced_frame
int interlaced_frame
1 if interlaced. 0 if not interlaced. -1 if unknown.
Definition: mythframe.h:153
FMT_YUV444P9
@ FMT_YUV444P9
Definition: mythframe.h:49
FMT_YV12
@ FMT_YV12
Definition: mythframe.h:28
FMT_YUV422P10
@ FMT_YUV422P10
Definition: mythframe.h:43
FMT_YUV444P16
@ FMT_YUV444P16
Definition: mythframe.h:53
copyplane
static void copyplane(uint8_t *dst, int dst_pitch, const uint8_t *src, int src_pitch, int width, int height)
Definition: mythframe.h:540
MythUSWCCopy::~MythUSWCCopy
virtual ~MythUSWCCopy()
Definition: mythframe.cpp:604
splitplanes
static void splitplanes(uint8_t *dstu, int dstu_pitch, uint8_t *dstv, int dstv_pitch, const uint8_t *src, int src_pitch, int width, int height)
Definition: mythframe.cpp:304
VideoFrame::colorprimaries
int colorprimaries
Definition: mythframe.h:168
FMT_BGRA
@ FMT_BGRA
Definition: mythframe.h:36
FMT_YUV420P12
@ FMT_YUV420P12
Definition: mythframe.h:31
VideoFrame::top_field_first
bool top_field_first
true if top field is first.
Definition: mythframe.h:154
uswcState::Use_SSE
@ Use_SSE
VideoFrame::colortransfer
int colortransfer
Definition: mythframe.h:169
VideoFrame::chromalocation
int chromalocation
Definition: mythframe.h:170
FMT_YUV444P12
@ FMT_YUV444P12
Definition: mythframe.h:51
FMT_ARGB32
@ FMT_ARGB32
Definition: mythframe.h:38
VideoFrame::colorspace
int colorspace
Definition: mythframe.h:166
FMT_NVDEC
@ FMT_NVDEC
Definition: mythframe.h:67
FMT_YUV422P9
@ FMT_YUV422P9
Definition: mythframe.h:42
MythDeintType
MythDeintType
Definition: mythframe.h:121
VideoFrame::offsets
int offsets[3]
Y, U, & V offsets.
Definition: mythframe.h:162
MythTimer::kStartRunning
@ kStartRunning
Definition: mythtimer.h:17
ColorDepth
int ColorDepth(int Format)
Return the color depth for the given MythTV frame format.
Definition: mythframe.cpp:808
FMT_YUV420P9
@ FMT_YUV420P9
Definition: mythframe.h:29
mythcorecontext.h
FMT_VDPAU
@ FMT_VDPAU
Definition: mythframe.h:61
FMT_YUV422P16
@ FMT_YUV422P16
Definition: mythframe.h:46
FMT_YUY2
@ FMT_YUY2
Definition: mythframe.h:55
FMT_DXVA2
@ FMT_DXVA2
Definition: mythframe.h:63
FMT_RGB24
@ FMT_RGB24
Definition: mythframe.h:35
FMT_YUV422P12
@ FMT_YUV422P12
Definition: mythframe.h:44
DEINT_HIGH
@ DEINT_HIGH
Definition: mythframe.h:126
uswcState::Detect
@ Detect
VideoFrame::height
int height
Definition: mythframe.h:143
MythUSWCCopy::MythUSWCCopy
MythUSWCCopy(int width, bool nocache=false)
Definition: mythframe.cpp:591
mythtimer.h
av_malloc
void * av_malloc(unsigned int size)
FMT_RGB32
@ FMT_RGB32
endian dependent format, ARGB or BGRA
Definition: mythframe.h:37
FMT_MMAL
@ FMT_MMAL
Definition: mythframe.h:64
MythUSWCCopy::setUSWC
void setUSWC(bool uswc)
disable USWC detection.
Definition: mythframe.cpp:789
VideoFrameType
VideoFrameType
Definition: mythframe.h:24
FMT_VAAPI
@ FMT_VAAPI
Definition: mythframe.h:62
FMT_NV12
@ FMT_NV12
Definition: mythframe.h:57
DEINT_BASIC
@ DEINT_BASIC
Definition: mythframe.h:124
FMT_P016
@ FMT_P016
Definition: mythframe.h:59
format_description
const char * format_description(VideoFrameType Type)
Definition: mythframe.cpp:33
uswcState::Use_SW
@ Use_SW
MythUSWCCopy::copy
void copy(VideoFrame *dst, const VideoFrame *src)
Definition: mythframe.cpp:622
build_compdb.options
options
Definition: build_compdb.py:11
GetDeinterlacer
MythDeintType GetDeinterlacer(MythDeintType Option)
Definition: mythframe.cpp:860
VideoFrame::width
int width
Definition: mythframe.h:142
MythTimer::restart
int restart(void)
Returns milliseconds elapsed since last start() or restart() and resets the count.
Definition: mythtimer.cpp:62
MythUSWCCopy::reset
void reset(int width)
Will reset the cache for a frame with "width" and reset USWC detection.
Definition: mythframe.cpp:797
FMT_RGBA32
@ FMT_RGBA32
Definition: mythframe.h:39