MythTV  master
mythframe.cpp
Go to the documentation of this file.
1 //
2 // mythframe.cpp
3 // MythTV
4 //
5 // Created by Jean-Yves Avenard on 10/06/2014.
6 // Copyright (c) 2014 Bubblestuff Pty Ltd. All rights reserved.
7 //
8 // derived from copy.c: Fast YV12/NV12 copy from VLC project
9 // portion of SSE Code Copyright (C) 2010 Laurent Aimar
10 
11 /******************************************************************************
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU Lesser General Public License as published by
14  * the Free Software Foundation; either version 2.1 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public License
23  * along with this program; if not, write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26 
27 #include <mythtimer.h>
28 #include "mythconfig.h"
29 #include "mythframe.h"
30 #include "mythcorecontext.h"
31 #include "mythlogging.h"
32 
34 {
35  switch (Type)
36  {
37  case FMT_NONE: return "None";
38  case FMT_RGB24: return "RGB24";
39  case FMT_YV12: return "YUV420P";
40  case FMT_RGB32: return "RGB32";
41  case FMT_ARGB32: return "ARGB32";
42  case FMT_RGBA32: return "RGBA32";
43  case FMT_YUV422P: return "YUV422P";
44  case FMT_BGRA: return "BGRA";
45  case FMT_YUY2: return "YUY2";
46  case FMT_NV12: return "NV12";
47  case FMT_P010: return "P010";
48  case FMT_P016: return "P016";
49  case FMT_YUV420P9: return "YUV420P9";
50  case FMT_YUV420P10: return "YUV420P10";
51  case FMT_YUV420P12: return "YUV420P12";
52  case FMT_YUV420P14: return "YUV420P14";
53  case FMT_YUV420P16: return "YUV420P16";
54  case FMT_YUV422P9: return "YUV422P9";
55  case FMT_YUV422P10: return "YUV422P10";
56  case FMT_YUV422P12: return "YUV422P12";
57  case FMT_YUV422P14: return "YUV422P14";
58  case FMT_YUV422P16: return "YUV422P16";
59  case FMT_YUV444P: return "YUV444P";
60  case FMT_YUV444P9: return "YUV444P9";
61  case FMT_YUV444P10: return "YUV444P10";
62  case FMT_YUV444P12: return "YUV444P12";
63  case FMT_YUV444P14: return "YUV444P14";
64  case FMT_YUV444P16: return "YUV444P16";
65  case FMT_VDPAU: return "VDPAU";
66  case FMT_VAAPI: return "VAAPI";
67  case FMT_DXVA2: return "DXVA2";
68  case FMT_MMAL: return "MMAL";
69  case FMT_MEDIACODEC: return "MediaCodec";
70  case FMT_VTB: return "VideoToolBox";
71  case FMT_NVDEC: return "NVDec";
72  case FMT_DRMPRIME: return "DRM-PRIME";
73  }
74  return "?";
75 }
76 
77 #ifndef __MAX
78 # define __MAX(a, b) ( ((a) > (b)) ? (a) : (b) )
79 #endif
80 #ifndef __MIN
81 # define __MIN(a, b) ( ((a) < (b)) ? (a) : (b) )
82 #endif
83 
84 #if ARCH_X86
85 
86 static bool features_detected = false;
87 static bool has_sse2 = false;
88 static bool has_sse3 = false;
89 static bool has_ssse3 = false;
90 static bool has_sse4 = false;
91 
92 #if defined _WIN32 && !defined __MINGW32__
93 // Windows
94 #define cpuid __cpuid
95 
96 #else
97 /* NOLINTNEXTLINE(readability-non-const-parameter) */
98 inline void cpuid(int CPUInfo[4],int InfoType)
99 {
100  __asm__ __volatile__ (
101  // pic requires to save ebx/rbx
102 #if ARCH_X86_32
103  "push %%ebx\n"
104 #endif
105  "cpuid\n"
106  "movl %%ebx ,%[ebx]\n"
107 #if ARCH_X86_32
108  "pop %%ebx\n"
109 #endif
110  :"=a" (CPUInfo[0]),
111  [ebx] "=r"(CPUInfo[1]),
112  "=c" (CPUInfo[2]),
113  "=d" (CPUInfo[3])
114  :"a" (InfoType)
115  );
116 }
117 #endif
118 
119 static void cpu_detect_features()
120 {
121  int info[4];
122  cpuid(info, 0);
123  int nIds = info[0];
124 
125  // Detect Features
126  if (nIds >= 0x00000001)
127  {
128  cpuid(info,0x00000001);
129  has_sse2 = (info[3] & (1 << 26)) != 0;
130  has_sse3 = (info[2] & (1 << 0)) != 0;
131  has_ssse3 = (info[2] & (1 << 9)) != 0;
132  has_sse4 = (info[2] & (1 << 19)) != 0;
133  }
134  features_detected = true;
135 }
136 
137 static inline bool sse2_check()
138 {
139  if (!features_detected)
140  cpu_detect_features();
141  return has_sse2;
142 }
143 
144 static inline bool sse3_check()
145 {
146  if (!features_detected)
147  cpu_detect_features();
148  return has_sse3;
149 }
150 
151 static inline bool ssse3_check()
152 {
153  if (!features_detected)
154  cpu_detect_features();
155  return has_ssse3;
156 }
157 
158 static inline bool sse4_check()
159 {
160  if (!features_detected)
161  cpu_detect_features();
162  return has_sse4;
163 }
164 
165 static inline void SSE_splitplanes(uint8_t* dstu, int dstu_pitch,
166  uint8_t* dstv, int dstv_pitch,
167  const uint8_t* src, int src_pitch,
168  int width, int height)
169 {
170  const uint8_t shuffle[] = { 0, 2, 4, 6, 8, 10, 12, 14,
171  1, 3, 5, 7, 9, 11, 13, 15 };
172  const uint8_t mask[] = { 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
173  0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00 };
174  const bool sse3 = sse3_check();
175  const bool ssse3 = ssse3_check();
176 
177  asm volatile ("mfence");
178 
179 #define LOAD64A \
180  "movdqa 0(%[src]), %%xmm0\n" \
181  "movdqa 16(%[src]), %%xmm1\n" \
182  "movdqa 32(%[src]), %%xmm2\n" \
183  "movdqa 48(%[src]), %%xmm3\n"
184 
185 #define LOAD64U \
186  "movdqu 0(%[src]), %%xmm0\n" \
187  "movdqu 16(%[src]), %%xmm1\n" \
188  "movdqu 32(%[src]), %%xmm2\n" \
189  "movdqu 48(%[src]), %%xmm3\n"
190 
191 #define STORE2X32 \
192  "movq %%xmm0, 0(%[dst1])\n" \
193  "movq %%xmm1, 8(%[dst1])\n" \
194  "movhpd %%xmm0, 0(%[dst2])\n" \
195  "movhpd %%xmm1, 8(%[dst2])\n" \
196  "movq %%xmm2, 16(%[dst1])\n" \
197  "movq %%xmm3, 24(%[dst1])\n" \
198  "movhpd %%xmm2, 16(%[dst2])\n" \
199  "movhpd %%xmm3, 24(%[dst2])\n"
200 
201  for (int y = 0; y < height; y++)
202  {
203  int x = 0;
204 
205  if (((uintptr_t)src & 0xf) == 0)
206  {
207  if (sse3 && ssse3)
208  {
209  for (; x < (width & ~31); x += 32)
210  {
211  asm volatile (
212  "movdqu (%[shuffle]), %%xmm7\n"
213  LOAD64A
214  "pshufb %%xmm7, %%xmm0\n"
215  "pshufb %%xmm7, %%xmm1\n"
216  "pshufb %%xmm7, %%xmm2\n"
217  "pshufb %%xmm7, %%xmm3\n"
218  STORE2X32
219  : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm7");
220  }
221  }
222  else
223  {
224  for (; x < (width & ~31); x += 32)
225  {
226  asm volatile (
227  "movdqu (%[mask]), %%xmm7\n"
228  LOAD64A
229  "movdqa %%xmm0, %%xmm4\n"
230  "movdqa %%xmm1, %%xmm5\n"
231  "movdqa %%xmm2, %%xmm6\n"
232  "psrlw $8, %%xmm0\n"
233  "psrlw $8, %%xmm1\n"
234  "pand %%xmm7, %%xmm4\n"
235  "pand %%xmm7, %%xmm5\n"
236  "pand %%xmm7, %%xmm6\n"
237  "packuswb %%xmm4, %%xmm0\n"
238  "packuswb %%xmm5, %%xmm1\n"
239  "pand %%xmm3, %%xmm7\n"
240  "psrlw $8, %%xmm2\n"
241  "psrlw $8, %%xmm3\n"
242  "packuswb %%xmm6, %%xmm2\n"
243  "packuswb %%xmm7, %%xmm3\n"
244  STORE2X32
245  : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
246  }
247  }
248  }
249  else
250  {
251  if (sse3 && ssse3)
252  {
253  for (; x < (width & ~31); x += 32)
254  {
255  asm volatile (
256  "movdqu (%[shuffle]), %%xmm7\n"
257  LOAD64U
258  "pshufb %%xmm7, %%xmm0\n"
259  "pshufb %%xmm7, %%xmm1\n"
260  "pshufb %%xmm7, %%xmm2\n"
261  "pshufb %%xmm7, %%xmm3\n"
262  STORE2X32
263  : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm7");
264  }
265  }
266  else
267  {
268  for (; x < (width & ~31); x += 32)
269  {
270  asm volatile (
271  "movdqu (%[mask]), %%xmm7\n"
272  LOAD64U
273  "movdqu %%xmm0, %%xmm4\n"
274  "movdqu %%xmm1, %%xmm5\n"
275  "movdqu %%xmm2, %%xmm6\n"
276  "psrlw $8, %%xmm0\n"
277  "psrlw $8, %%xmm1\n"
278  "pand %%xmm7, %%xmm4\n"
279  "pand %%xmm7, %%xmm5\n"
280  "pand %%xmm7, %%xmm6\n"
281  "packuswb %%xmm4, %%xmm0\n"
282  "packuswb %%xmm5, %%xmm1\n"
283  "pand %%xmm3, %%xmm7\n"
284  "psrlw $8, %%xmm2\n"
285  "psrlw $8, %%xmm3\n"
286  "packuswb %%xmm6, %%xmm2\n"
287  "packuswb %%xmm7, %%xmm3\n"
288  STORE2X32
289  : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
290  }
291  }
292  }
293 
294  for (; x < width; x++)
295  {
296  dstu[x] = src[2*x+0];
297  dstv[x] = src[2*x+1];
298  }
299  src += src_pitch;
300  dstu += dstu_pitch;
301  dstv += dstv_pitch;
302  }
303  asm volatile ("mfence");
304 
305 #undef STORE2X32
306 #undef LOAD64U
307 #undef LOAD64A
308 }
309 #endif /* ARCH_X86 */
310 
311 static void splitplanes(uint8_t* dstu, int dstu_pitch,
312  uint8_t* dstv, int dstv_pitch,
313  const uint8_t* src, int src_pitch,
314  int width, int height)
315 {
316  for (int y = 0; y < height; y++)
317  {
318  for (int x = 0; x < width; x++)
319  {
320  dstu[x] = src[2*x+0];
321  dstv[x] = src[2*x+1];
322  }
323  src += src_pitch;
324  dstu += dstu_pitch;
325  dstv += dstv_pitch;
326  }
327 }
328 
329 void framecopy(VideoFrame* dst, const VideoFrame* src, bool useSSE)
330 {
331  VideoFrameType codec = dst->codec;
332  if (!(dst->codec == src->codec ||
333  (src->codec == FMT_NV12 && dst->codec == FMT_YV12)))
334  return;
335 
337  dst->repeat_pict = src->repeat_pict;
338  dst->top_field_first = src->top_field_first;
340  dst->colorspace = src->colorspace;
341  dst->colorrange = src->colorrange;
342  dst->colorprimaries = src->colorprimaries;
343  dst->colortransfer = src->colortransfer;
344  dst->chromalocation = src->chromalocation;
345 
346  if (FMT_YV12 == codec)
347  {
348  int width = src->width;
349  int height = src->height;
350  int dwidth = dst->width;
351  int dheight = dst->height;
352 
353  if (src->codec == FMT_NV12 &&
354  height == dheight && width == dwidth)
355  {
356  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
357  src->buf + src->offsets[0], src->pitches[0],
358  width, height);
359 #if ARCH_X86
360  if (useSSE && sse2_check())
361  {
362  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
363  dst->buf + dst->offsets[2], dst->pitches[2],
364  src->buf + src->offsets[1], src->pitches[1],
365  (width+1) / 2, (height+1) / 2);
366  asm volatile ("emms");
367  return;
368  }
369 #else
370  Q_UNUSED(useSSE);
371 #endif
372  splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
373  dst->buf + dst->offsets[2], dst->pitches[2],
374  src->buf + src->offsets[1], src->pitches[1],
375  (width+1) / 2, (height+1) / 2);
376  return;
377  }
378 
379  if (dst->pitches[0] != src->pitches[0] ||
380  dst->pitches[1] != src->pitches[1] ||
381  dst->pitches[2] != src->pitches[2])
382  {
383  // We have a different stride between the two frames
384  // drop the garbage data
385  height = (dst->height < src->height) ? dst->height : src->height;
386  width = (dst->width < src->width) ? dst->width : src->width;
387  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
388  src->buf + src->offsets[0], src->pitches[0],
389  width, height);
390  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
391  src->buf + src->offsets[1], src->pitches[1],
392  (width+1) / 2, (height+1) / 2);
393  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
394  src->buf + src->offsets[2], src->pitches[2],
395  (width+1) / 2, (height+1) / 2);
396  return;
397  }
398 
399  int height0 = (dst->height < src->height) ? dst->height : src->height;
400  int height1 = (height0+1) >> 1;
401  int height2 = (height0+1) >> 1;
402  int pitch0 = ((dst->pitches[0] < src->pitches[0]) ?
403  dst->pitches[0] : src->pitches[0]);
404  int pitch1 = ((dst->pitches[1] < src->pitches[1]) ?
405  dst->pitches[1] : src->pitches[1]);
406  int pitch2 = ((dst->pitches[2] < src->pitches[2]) ?
407  dst->pitches[2] : src->pitches[2]);
408 
409  memcpy(dst->buf + dst->offsets[0],
410  src->buf + src->offsets[0], pitch0 * height0);
411  memcpy(dst->buf + dst->offsets[1],
412  src->buf + src->offsets[1], pitch1 * height1);
413  memcpy(dst->buf + dst->offsets[2],
414  src->buf + src->offsets[2], pitch2 * height2);
415  }
416 }
417 
418 /***************************************
419  * USWC Fast Copy
420  *
421  * https://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers:
422  ***************************************/
423 #if ARCH_X86
424 #define COPY16(dstp, srcp, load, store) \
425  asm volatile ( \
426  load " 0(%[src]), %%xmm1\n" \
427  store " %%xmm1, 0(%[dst])\n" \
428  : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1")
429 
430 #define COPY64(dstp, srcp, load, store) \
431  asm volatile ( \
432  load " 0(%[src]), %%xmm1\n" \
433  load " 16(%[src]), %%xmm2\n" \
434  load " 32(%[src]), %%xmm3\n" \
435  load " 48(%[src]), %%xmm4\n" \
436  store " %%xmm1, 0(%[dst])\n" \
437  store " %%xmm2, 16(%[dst])\n" \
438  store " %%xmm3, 32(%[dst])\n" \
439  store " %%xmm4, 48(%[dst])\n" \
440  : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1", "xmm2", "xmm3", "xmm4")
441 
442 /*
443  * Optimized copy from "Uncacheable Speculative Write Combining" memory
444  * as used by some hardware accelerated decoder (VAAPI and DXVA2).
445  */
446 static void CopyFromUswc(uint8_t *dst, int dst_pitch,
447  const uint8_t *src, int src_pitch,
448  int width, int height)
449 {
450  const bool sse4 = sse4_check();
451 
452  asm volatile ("mfence");
453 
454  for (int y = 0; y < height; y++)
455  {
456  const int unaligned = (-(uintptr_t)src) & 0x0f;
457  int x = unaligned;
458 
459  if (sse4)
460  {
461  if (!unaligned)
462  {
463  for (; x+63 < width; x += 64)
464  {
465  COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
466  }
467  }
468  else
469  {
470  COPY16(dst, src, "movdqu", "movdqa");
471  for (; x+63 < width; x += 64)
472  {
473  COPY64(&dst[x], &src[x], "movntdqa", "movdqu");
474  }
475  }
476  }
477  else
478  {
479  if (!unaligned)
480  {
481  for (; x+63 < width; x += 64)
482  {
483  COPY64(&dst[x], &src[x], "movdqa", "movdqa");
484  }
485  }
486  else
487  {
488  COPY16(dst, src, "movdqu", "movdqa");
489  for (; x+63 < width; x += 64)
490  {
491  COPY64(&dst[x], &src[x], "movdqa", "movdqu");
492  }
493  }
494  }
495 
496  for (; x < width; x++)
497  {
498  dst[x] = src[x];
499  }
500 
501  src += src_pitch;
502  dst += dst_pitch;
503  }
504  asm volatile ("mfence");
505 }
506 
507 static void Copy2d(uint8_t *dst, int dst_pitch,
508  const uint8_t *src, int src_pitch,
509  int width, int height)
510 {
511  for (int y = 0; y < height; y++)
512  {
513  int x = 0;
514 
515  bool unaligned = ((intptr_t)dst & 0x0f) != 0;
516  if (!unaligned)
517  {
518  for (; x+63 < width; x += 64)
519  {
520  COPY64(&dst[x], &src[x], "movdqa", "movntdq");
521  }
522  }
523  else
524  {
525  for (; x+63 < width; x += 64)
526  {
527  COPY64(&dst[x], &src[x], "movdqa", "movdqu");
528  }
529  }
530 
531  for (; x < width; x++)
532  {
533  dst[x] = src[x];
534  }
535 
536  src += src_pitch;
537  dst += dst_pitch;
538  }
539 }
540 
541 static void SSE_copyplane(uint8_t *dst, int dst_pitch,
542  const uint8_t *src, int src_pitch,
543  uint8_t *cache, int cache_size,
544  int width, int height)
545 {
546  const int w16 = (width+15) & ~15;
547  const int hstep = cache_size / w16;
548 
549  for (int y = 0; y < height; y += hstep)
550  {
551  const int hblock = __MIN(hstep, height - y);
552 
553  /* Copy a bunch of line into our cache */
554  CopyFromUswc(cache, w16,
555  src, src_pitch,
556  width, hblock);
557 
558  /* Copy from our cache to the destination */
559  Copy2d(dst, dst_pitch,
560  cache, w16,
561  width, hblock);
562 
563  /* */
564  src += src_pitch * hblock;
565  dst += dst_pitch * hblock;
566  }
567 }
568 
569 static void SSE_splitplanes(uint8_t *dstu, int dstu_pitch,
570  uint8_t *dstv, int dstv_pitch,
571  const uint8_t *src, int src_pitch,
572  uint8_t *cache, int cache_size,
573  int width, int height)
574 {
575  const int w16 = (2*width+15) & ~15;
576  const int hstep = cache_size / w16;
577 
578  for (int y = 0; y < height; y += hstep)
579  {
580  const int hblock = __MIN(hstep, height - y);
581 
582  /* Copy a bunch of line into our cache */
583  CopyFromUswc(cache, w16, src, src_pitch,
584  2*width, hblock);
585 
586  /* Copy from our cache to the destination */
587  SSE_splitplanes(dstu, dstu_pitch, dstv, dstv_pitch,
588  cache, w16, width, hblock);
589 
590  /* */
591  src += src_pitch * hblock;
592  dstu += dstu_pitch * hblock;
593  dstv += dstv_pitch * hblock;
594  }
595 }
596 #endif // ARCH_X86
597 
598 MythUSWCCopy::MythUSWCCopy(int width, bool nocache)
599 {
600 #if ARCH_X86
601  if (!nocache)
602  {
603  allocateCache(width);
604  }
605 #else
606  Q_UNUSED(width);
607  Q_UNUSED(nocache);
608 #endif
609 }
610 
612 {
613  m_size = 0;
614 #if ARCH_X86
615  av_freep(&m_cache);
616 #endif
617 }
618 
630 {
632  dst->repeat_pict = src->repeat_pict;
633  dst->top_field_first = src->top_field_first;
635  dst->colorspace = src->colorspace;
636  dst->colorrange = src->colorrange;
637  dst->colorprimaries = src->colorprimaries;
638  dst->colortransfer = src->colortransfer;
639  dst->chromalocation = src->chromalocation;
640 
641  int width = src->width;
642  int height = src->height;
643 
644  if (src->codec == FMT_NV12)
645  {
646 #if ARCH_X86
647  if (sse2_check())
648  {
649  MythTimer *timer = nullptr;
650 
651  if ((m_uswc != uswcState::Use_SW) && m_cache)
652  {
653  if (m_uswc == uswcState::Detect)
654  {
655  timer = new MythTimer(MythTimer::kStartRunning);
656  }
657  SSE_copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
658  src->buf + src->offsets[0], src->pitches[0],
659  m_cache, m_size,
660  width, height);
661  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
662  dst->buf + dst->offsets[2], dst->pitches[2],
663  src->buf + src->offsets[1], src->pitches[1],
664  m_cache, m_size,
665  (width+1) / 2, (height+1) / 2);
666  if (m_uswc == uswcState::Detect)
667  {
668  // Measure how long standard method takes
669  // if shorter, use it in the future
670  long sse_duration = timer->nsecsElapsed();
671  timer->restart();
672  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
673  src->buf + src->offsets[0], src->pitches[0],
674  width, height);
675  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
676  dst->buf + dst->offsets[2], dst->pitches[2],
677  src->buf + src->offsets[1], src->pitches[1],
678  (width+1) / 2, (height+1) / 2);
679  if (timer->nsecsElapsed() < sse_duration)
680  {
682  LOG(VB_GENERAL, LOG_DEBUG, "Enabling USWC code acceleration");
683  }
684  else
685  {
687  }
688  delete timer;
689  }
690  }
691  else
692  {
693  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
694  src->buf + src->offsets[0], src->pitches[0],
695  width, height);
696  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
697  dst->buf + dst->offsets[2], dst->pitches[2],
698  src->buf + src->offsets[1], src->pitches[1],
699  (width+1) / 2, (height+1) / 2);
700  }
701  asm volatile ("emms");
702  return;
703  }
704 #endif
705  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
706  src->buf + src->offsets[0], src->pitches[0],
707  width, height);
708  splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
709  dst->buf + dst->offsets[2], dst->pitches[2],
710  src->buf + src->offsets[1], src->pitches[1],
711  (width+1) / 2, (height+1) / 2);
712  return;
713  }
714 
715 #if ARCH_X86
716  if (sse2_check() && (m_uswc != uswcState::Use_SW) && m_cache)
717  {
718  MythTimer *timer = nullptr;
719 
720  if (m_uswc == uswcState::Detect)
721  {
722  timer = new MythTimer(MythTimer::kStartRunning);
723  }
724  SSE_copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
725  src->buf + src->offsets[0], src->pitches[0],
726  m_cache, m_size,
727  width, height);
728  SSE_copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
729  src->buf + src->offsets[1], src->pitches[1],
730  m_cache, m_size,
731  (width+1) / 2, (height+1) / 2);
732  SSE_copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
733  src->buf + src->offsets[2], src->pitches[2],
734  m_cache, m_size,
735  (width+1) / 2, (height+1) / 2);
736  if (m_uswc == uswcState::Detect)
737  {
738  // Measure how long standard method takes
739  // if shorter, use it in the future
740  long sse_duration = timer->nsecsElapsed();
741  timer->restart();
742  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
743  src->buf + src->offsets[0], src->pitches[0],
744  width, height);
745  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
746  src->buf + src->offsets[1], src->pitches[1],
747  (width+1) / 2, (height+1) / 2);
748  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
749  src->buf + src->offsets[2], src->pitches[2],
750  (width+1) / 2, (height+1) / 2);
751  if (timer->nsecsElapsed() < sse_duration)
752  {
754  LOG(VB_GENERAL, LOG_DEBUG, "Enabling USWC code acceleration");
755  }
756  else
757  {
759  }
760  delete timer;
761  }
762  asm volatile ("emms");
763  return;
764  }
765 #endif
766  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
767  src->buf + src->offsets[0], src->pitches[0],
768  width, height);
769  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
770  src->buf + src->offsets[1], src->pitches[1],
771  (width+1) / 2, (height+1) / 2);
772  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
773  src->buf + src->offsets[2], src->pitches[2],
774  (width+1) / 2, (height+1) / 2);
775 }
776 
781 {
783 }
784 
786 {
787  av_freep(&m_cache);
788  m_size = __MAX((width + 63) & ~63, 4096);
789  m_cache = (uint8_t*)av_malloc(m_size);
790 }
791 
796 void MythUSWCCopy::setUSWC(bool uswc)
797 {
799 }
800 
804 void MythUSWCCopy::reset(int width)
805 {
806 #if ARCH_X86
807  allocateCache(width);
808 #else
809  Q_UNUSED(width);
810 #endif
812 }
813 
816 {
817  switch (Format)
818  {
819  case FMT_YUV420P9:
820  case FMT_YUV422P9:
821  case FMT_YUV444P9: return 9;
822  case FMT_P010:
823  case FMT_YUV420P10:
824  case FMT_YUV422P10:
825  case FMT_YUV444P10: return 10;
826  case FMT_YUV420P12:
827  case FMT_YUV422P12:
828  case FMT_YUV444P12: return 12;
829  case FMT_YUV420P14:
830  case FMT_YUV422P14:
831  case FMT_YUV444P14: return 14;
832  case FMT_P016:
833  case FMT_YUV420P16:
834  case FMT_YUV422P16:
835  case FMT_YUV444P16: return 16;
836  default: break;
837  }
838  return 8;
839 }
840 
842  MythDeintType Override)
843 {
844  if (Frame)
845  {
846  MythDeintType options = Frame->deinterlace_single &
847  (Override ? Override : Frame->deinterlace_allowed);
848  if (options & Type)
849  return GetDeinterlacer(options);
850  }
851  return DEINT_NONE;
852 }
853 
855  MythDeintType Override)
856 {
857  if (Frame)
858  {
859  MythDeintType options = Frame->deinterlace_double &
860  (Override ? Override : Frame->deinterlace_allowed);
861  if (options & Type)
862  return GetDeinterlacer(options);
863  }
864  return DEINT_NONE;
865 }
866 
868 {
869  return Option & (DEINT_BASIC | DEINT_MEDIUM | DEINT_HIGH);
870 }
int pitches[3]
Y, U, & V pitches.
Definition: mythframe.h:160
int restart(void)
Returns milliseconds elapsed since last start() or restart() and resets the count.
Definition: mythtimer.cpp:62
A QElapsedTimer based timer to replace use of QTime as a timer.
Definition: mythtimer.h:13
MythDeintType GetDoubleRateOption(const VideoFrame *Frame, MythDeintType Type, MythDeintType Override)
Definition: mythframe.cpp:854
unsigned char * buf
Definition: mythframe.h:139
static void splitplanes(uint8_t *dstu, int dstu_pitch, uint8_t *dstv, int dstv_pitch, const uint8_t *src, int src_pitch, int width, int height)
Definition: mythframe.cpp:311
void reset(int width)
Will reset the cache for a frame with "width" and reset USWC detection.
Definition: mythframe.cpp:804
VideoFrameType codec
Definition: mythframe.h:138
endian dependent format, ARGB or BGRA
Definition: mythframe.h:36
int colorrange
Definition: mythframe.h:166
int top_field_first
1 if top field is first.
Definition: mythframe.h:153
VideoFrameType
Definition: mythframe.h:23
void setUSWC(bool uswc)
disable USWC detection.
Definition: mythframe.cpp:796
int interlaced_frame
1 if interlaced.
Definition: mythframe.h:152
int ColorDepth(int Format)
Return the color depth for the given MythTV frame format.
Definition: mythframe.cpp:815
#define __MIN(a, b)
Definition: mythframe.cpp:81
MythDeintType
Definition: mythframe.h:120
int colorprimaries
Definition: mythframe.h:167
int colortransfer
Definition: mythframe.h:168
int chromalocation
Definition: mythframe.h:169
void copy(VideoFrame *dst, const VideoFrame *src)
Definition: mythframe.cpp:629
#define __MAX(a, b)
Definition: mythframe.cpp:78
void framecopy(VideoFrame *dst, const VideoFrame *src, bool useSSE)
Definition: mythframe.cpp:329
void resetUSWCDetection(void)
reset USWC detection.
Definition: mythframe.cpp:780
void * av_malloc(unsigned int size)
uint8_t * m_cache
Definition: mythframe.h:212
uswcState m_uswc
Definition: mythframe.h:214
static void copyplane(uint8_t *dst, int dst_pitch, const uint8_t *src, int src_pitch, int width, int height)
Definition: mythframe.h:539
int interlaced_reversed
Definition: mythframe.h:154
int repeat_pict
used to unlock the scan type
Definition: mythframe.h:156
int offsets[3]
Y, U, & V offsets.
Definition: mythframe.h:161
int64_t nsecsElapsed(void) const
Returns nanoseconds elapsed since last start() or restart()
Definition: mythtimer.cpp:118
int height
Definition: mythframe.h:142
int colorspace
Definition: mythframe.h:165
virtual ~MythUSWCCopy()
Definition: mythframe.cpp:611
const char * format_description(VideoFrameType Type)
Definition: mythframe.cpp:33
MythDeintType GetDeinterlacer(MythDeintType Option)
Definition: mythframe.cpp:867
MythUSWCCopy(int width, bool nocache=false)
Definition: mythframe.cpp:598
void allocateCache(int width)
Definition: mythframe.cpp:785
MythDeintType GetSingleRateOption(const VideoFrame *Frame, MythDeintType Type, MythDeintType Override)
Definition: mythframe.cpp:841
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:41