MythTV  master
mythframe.cpp
Go to the documentation of this file.
1 //
2 // mythframe.cpp
3 // MythTV
4 //
5 // Created by Jean-Yves Avenard on 10/06/2014.
6 // Copyright (c) 2014 Bubblestuff Pty Ltd. All rights reserved.
7 //
8 // derived from copy.c: Fast YV12/NV12 copy from VLC project
9 // portion of SSE Code Copyright (C) 2010 Laurent Aimar
10 
11 /******************************************************************************
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU Lesser General Public License as published by
14  * the Free Software Foundation; either version 2.1 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public License
23  * along with this program; if not, write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26 
27 #include <mythtimer.h>
28 #include "mythconfig.h"
29 #include "mythframe.h"
30 #include "mythcorecontext.h"
31 #include "mythlogging.h"
32 
33 extern "C" {
34 #include "libavcodec/avcodec.h"
35 }
36 
37 #ifndef __MAX
38 # define __MAX(a, b) ( ((a) > (b)) ? (a) : (b) )
39 #endif
40 #ifndef __MIN
41 # define __MIN(a, b) ( ((a) < (b)) ? (a) : (b) )
42 #endif
43 
44 #if ARCH_X86
45 
46 static bool features_detected = false;
47 static bool has_sse2 = false;
48 static bool has_sse3 = false;
49 static bool has_ssse3 = false;
50 static bool has_sse4 = false;
51 
52 #if defined _WIN32 && !defined __MINGW32__
53 // Windows
54 #define cpuid __cpuid
55 
56 #else
57 inline void cpuid(int CPUInfo[4],int InfoType)
58 {
59  __asm__ __volatile__ (
60  // pic requires to save ebx/rbx
61 #if ARCH_X86_32
62  "push %%ebx\n"
63 #endif
64  "cpuid\n"
65  "movl %%ebx ,%[ebx]\n"
66 #if ARCH_X86_32
67  "pop %%ebx\n"
68 #endif
69  :"=a" (CPUInfo[0]),
70  [ebx] "=r"(CPUInfo[1]),
71  "=c" (CPUInfo[2]),
72  "=d" (CPUInfo[3])
73  :"a" (InfoType)
74  );
75 }
76 #endif
77 
78 static void cpu_detect_features()
79 {
80  int info[4];
81  cpuid(info, 0);
82  int nIds = info[0];
83 
84  // Detect Features
85  if (nIds >= 0x00000001)
86  {
87  cpuid(info,0x00000001);
88  has_sse2 = (info[3] & (1 << 26)) != 0;
89  has_sse3 = (info[2] & (1 << 0)) != 0;
90  has_ssse3 = (info[2] & (1 << 9)) != 0;
91  has_sse4 = (info[2] & (1 << 19)) != 0;
92  }
93  features_detected = true;
94 }
95 
96 static inline bool sse2_check()
97 {
98  if (!features_detected)
99  cpu_detect_features();
100  return has_sse2;
101 }
102 
103 static inline bool sse3_check()
104 {
105  if (!features_detected)
106  cpu_detect_features();
107  return has_sse3;
108 }
109 
110 static inline bool ssse3_check()
111 {
112  if (!features_detected)
113  cpu_detect_features();
114  return has_ssse3;
115 }
116 
117 static inline bool sse4_check()
118 {
119  if (!features_detected)
120  cpu_detect_features();
121  return has_sse4;
122 }
123 
124 static inline void SSE_splitplanes(uint8_t* dstu, int dstu_pitch,
125  uint8_t* dstv, int dstv_pitch,
126  const uint8_t* src, int src_pitch,
127  int width, int height)
128 {
129  const uint8_t shuffle[] = { 0, 2, 4, 6, 8, 10, 12, 14,
130  1, 3, 5, 7, 9, 11, 13, 15 };
131  const uint8_t mask[] = { 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
132  0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00 };
133  const bool sse3 = sse3_check();
134  const bool ssse3 = ssse3_check();
135 
136  asm volatile ("mfence");
137 
138 #define LOAD64A \
139  "movdqa 0(%[src]), %%xmm0\n" \
140  "movdqa 16(%[src]), %%xmm1\n" \
141  "movdqa 32(%[src]), %%xmm2\n" \
142  "movdqa 48(%[src]), %%xmm3\n"
143 
144 #define LOAD64U \
145  "movdqu 0(%[src]), %%xmm0\n" \
146  "movdqu 16(%[src]), %%xmm1\n" \
147  "movdqu 32(%[src]), %%xmm2\n" \
148  "movdqu 48(%[src]), %%xmm3\n"
149 
150 #define STORE2X32 \
151  "movq %%xmm0, 0(%[dst1])\n" \
152  "movq %%xmm1, 8(%[dst1])\n" \
153  "movhpd %%xmm0, 0(%[dst2])\n" \
154  "movhpd %%xmm1, 8(%[dst2])\n" \
155  "movq %%xmm2, 16(%[dst1])\n" \
156  "movq %%xmm3, 24(%[dst1])\n" \
157  "movhpd %%xmm2, 16(%[dst2])\n" \
158  "movhpd %%xmm3, 24(%[dst2])\n"
159 
160  for (int y = 0; y < height; y++)
161  {
162  int x = 0;
163 
164  if (((uintptr_t)src & 0xf) == 0)
165  {
166  if (sse3 && ssse3)
167  {
168  for (; x < (width & ~31); x += 32)
169  {
170  asm volatile (
171  "movdqu (%[shuffle]), %%xmm7\n"
172  LOAD64A
173  "pshufb %%xmm7, %%xmm0\n"
174  "pshufb %%xmm7, %%xmm1\n"
175  "pshufb %%xmm7, %%xmm2\n"
176  "pshufb %%xmm7, %%xmm3\n"
177  STORE2X32
178  : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm7");
179  }
180  }
181  else
182  {
183  for (; x < (width & ~31); x += 32)
184  {
185  asm volatile (
186  "movdqu (%[mask]), %%xmm7\n"
187  LOAD64A
188  "movdqa %%xmm0, %%xmm4\n"
189  "movdqa %%xmm1, %%xmm5\n"
190  "movdqa %%xmm2, %%xmm6\n"
191  "psrlw $8, %%xmm0\n"
192  "psrlw $8, %%xmm1\n"
193  "pand %%xmm7, %%xmm4\n"
194  "pand %%xmm7, %%xmm5\n"
195  "pand %%xmm7, %%xmm6\n"
196  "packuswb %%xmm4, %%xmm0\n"
197  "packuswb %%xmm5, %%xmm1\n"
198  "pand %%xmm3, %%xmm7\n"
199  "psrlw $8, %%xmm2\n"
200  "psrlw $8, %%xmm3\n"
201  "packuswb %%xmm6, %%xmm2\n"
202  "packuswb %%xmm7, %%xmm3\n"
203  STORE2X32
204  : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
205  }
206  }
207  }
208  else
209  {
210  if (sse3 && ssse3)
211  {
212  for (; x < (width & ~31); x += 32)
213  {
214  asm volatile (
215  "movdqu (%[shuffle]), %%xmm7\n"
216  LOAD64U
217  "pshufb %%xmm7, %%xmm0\n"
218  "pshufb %%xmm7, %%xmm1\n"
219  "pshufb %%xmm7, %%xmm2\n"
220  "pshufb %%xmm7, %%xmm3\n"
221  STORE2X32
222  : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm7");
223  }
224  }
225  else
226  {
227  for (; x < (width & ~31); x += 32)
228  {
229  asm volatile (
230  "movdqu (%[mask]), %%xmm7\n"
231  LOAD64U
232  "movdqu %%xmm0, %%xmm4\n"
233  "movdqu %%xmm1, %%xmm5\n"
234  "movdqu %%xmm2, %%xmm6\n"
235  "psrlw $8, %%xmm0\n"
236  "psrlw $8, %%xmm1\n"
237  "pand %%xmm7, %%xmm4\n"
238  "pand %%xmm7, %%xmm5\n"
239  "pand %%xmm7, %%xmm6\n"
240  "packuswb %%xmm4, %%xmm0\n"
241  "packuswb %%xmm5, %%xmm1\n"
242  "pand %%xmm3, %%xmm7\n"
243  "psrlw $8, %%xmm2\n"
244  "psrlw $8, %%xmm3\n"
245  "packuswb %%xmm6, %%xmm2\n"
246  "packuswb %%xmm7, %%xmm3\n"
247  STORE2X32
248  : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
249  }
250  }
251  }
252 
253  for (; x < width; x++)
254  {
255  dstu[x] = src[2*x+0];
256  dstv[x] = src[2*x+1];
257  }
258  src += src_pitch;
259  dstu += dstu_pitch;
260  dstv += dstv_pitch;
261  }
262  asm volatile ("mfence");
263 
264 #undef STORE2X32
265 #undef LOAD64U
266 #undef LOAD64A
267 }
268 #endif /* ARCH_X86 */
269 
270 static inline void copyplane(uint8_t* dst, int dst_pitch,
271  const uint8_t* src, int src_pitch,
272  int width, int height)
273 {
274  for (int y = 0; y < height; y++)
275  {
276  memcpy(dst, src, width);
277  src += src_pitch;
278  dst += dst_pitch;
279  }
280 }
281 
282 static void splitplanes(uint8_t* dstu, int dstu_pitch,
283  uint8_t* dstv, int dstv_pitch,
284  const uint8_t* src, int src_pitch,
285  int width, int height)
286 {
287  for (int y = 0; y < height; y++)
288  {
289  for (int x = 0; x < width; x++)
290  {
291  dstu[x] = src[2*x+0];
292  dstv[x] = src[2*x+1];
293  }
294  src += src_pitch;
295  dstu += dstu_pitch;
296  dstv += dstv_pitch;
297  }
298 }
299 
300 void framecopy(VideoFrame* dst, const VideoFrame* src, bool useSSE)
301 {
302  VideoFrameType codec = dst->codec;
303  if (!(dst->codec == src->codec ||
304  (src->codec == FMT_NV12 && dst->codec == FMT_YV12)))
305  return;
306 
308  dst->repeat_pict = src->repeat_pict;
309  dst->top_field_first = src->top_field_first;
310 
311  if (FMT_YV12 == codec)
312  {
313  int width = src->width;
314  int height = src->height;
315  int dwidth = dst->width;
316  int dheight = dst->height;
317 
318  if (src->codec == FMT_NV12 &&
319  height == dheight && width == dwidth)
320  {
321  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
322  src->buf + src->offsets[0], src->pitches[0],
323  width, height);
324 #if ARCH_X86
325  if (useSSE && sse2_check())
326  {
327  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
328  dst->buf + dst->offsets[2], dst->pitches[2],
329  src->buf + src->offsets[1], src->pitches[1],
330  (width+1) / 2, (height+1) / 2);
331  asm volatile ("emms");
332  return;
333  }
334 #else
335  Q_UNUSED(useSSE);
336 #endif
337  splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
338  dst->buf + dst->offsets[2], dst->pitches[2],
339  src->buf + src->offsets[1], src->pitches[1],
340  (width+1) / 2, (height+1) / 2);
341  return;
342  }
343 
344  if (dst->pitches[0] != src->pitches[0] ||
345  dst->pitches[1] != src->pitches[1] ||
346  dst->pitches[2] != src->pitches[2])
347  {
348  // We have a different stride between the two frames
349  // drop the garbage data
350  height = (dst->height < src->height) ? dst->height : src->height;
351  width = (dst->width < src->width) ? dst->width : src->width;
352  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
353  src->buf + src->offsets[0], src->pitches[0],
354  width, height);
355  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
356  src->buf + src->offsets[1], src->pitches[1],
357  (width+1) / 2, (height+1) / 2);
358  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
359  src->buf + src->offsets[2], src->pitches[2],
360  (width+1) / 2, (height+1) / 2);
361  return;
362  }
363 
364  int height0 = (dst->height < src->height) ? dst->height : src->height;
365  int height1 = (height0+1) >> 1;
366  int height2 = (height0+1) >> 1;
367  int pitch0 = ((dst->pitches[0] < src->pitches[0]) ?
368  dst->pitches[0] : src->pitches[0]);
369  int pitch1 = ((dst->pitches[1] < src->pitches[1]) ?
370  dst->pitches[1] : src->pitches[1]);
371  int pitch2 = ((dst->pitches[2] < src->pitches[2]) ?
372  dst->pitches[2] : src->pitches[2]);
373 
374  memcpy(dst->buf + dst->offsets[0],
375  src->buf + src->offsets[0], pitch0 * height0);
376  memcpy(dst->buf + dst->offsets[1],
377  src->buf + src->offsets[1], pitch1 * height1);
378  memcpy(dst->buf + dst->offsets[2],
379  src->buf + src->offsets[2], pitch2 * height2);
380  }
381 }
382 
383 /***************************************
384  * USWC Fast Copy
385  *
386  * https://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers:
387  ***************************************/
388 #if ARCH_X86
389 #define COPY16(dstp, srcp, load, store) \
390  asm volatile ( \
391  load " 0(%[src]), %%xmm1\n" \
392  store " %%xmm1, 0(%[dst])\n" \
393  : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1")
394 
395 #define COPY64(dstp, srcp, load, store) \
396  asm volatile ( \
397  load " 0(%[src]), %%xmm1\n" \
398  load " 16(%[src]), %%xmm2\n" \
399  load " 32(%[src]), %%xmm3\n" \
400  load " 48(%[src]), %%xmm4\n" \
401  store " %%xmm1, 0(%[dst])\n" \
402  store " %%xmm2, 16(%[dst])\n" \
403  store " %%xmm3, 32(%[dst])\n" \
404  store " %%xmm4, 48(%[dst])\n" \
405  : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1", "xmm2", "xmm3", "xmm4")
406 
407 /*
408  * Optimized copy from "Uncacheable Speculative Write Combining" memory
409  * as used by some hardware accelerated decoder (VAAPI and DXVA2).
410  */
411 static void CopyFromUswc(uint8_t *dst, int dst_pitch,
412  const uint8_t *src, int src_pitch,
413  int width, int height)
414 {
415  const bool sse4 = sse4_check();
416 
417  asm volatile ("mfence");
418 
419  for (int y = 0; y < height; y++)
420  {
421  const int unaligned = (-(uintptr_t)src) & 0x0f;
422  int x = unaligned;
423 
424  if (sse4)
425  {
426  if (!unaligned)
427  {
428  for (; x+63 < width; x += 64)
429  {
430  COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
431  }
432  }
433  else
434  {
435  COPY16(dst, src, "movdqu", "movdqa");
436  for (; x+63 < width; x += 64)
437  {
438  COPY64(&dst[x], &src[x], "movntdqa", "movdqu");
439  }
440  }
441  }
442  else
443  {
444  if (!unaligned)
445  {
446  for (; x+63 < width; x += 64)
447  {
448  COPY64(&dst[x], &src[x], "movdqa", "movdqa");
449  }
450  }
451  else
452  {
453  COPY16(dst, src, "movdqu", "movdqa");
454  for (; x+63 < width; x += 64)
455  {
456  COPY64(&dst[x], &src[x], "movdqa", "movdqu");
457  }
458  }
459  }
460 
461  for (; x < width; x++)
462  {
463  dst[x] = src[x];
464  }
465 
466  src += src_pitch;
467  dst += dst_pitch;
468  }
469  asm volatile ("mfence");
470 }
471 
472 static void Copy2d(uint8_t *dst, int dst_pitch,
473  const uint8_t *src, int src_pitch,
474  int width, int height)
475 {
476  for (int y = 0; y < height; y++)
477  {
478  int x = 0;
479 
480  bool unaligned = ((intptr_t)dst & 0x0f) != 0;
481  if (!unaligned)
482  {
483  for (; x+63 < width; x += 64)
484  {
485  COPY64(&dst[x], &src[x], "movdqa", "movntdq");
486  }
487  }
488  else
489  {
490  for (; x+63 < width; x += 64)
491  {
492  COPY64(&dst[x], &src[x], "movdqa", "movdqu");
493  }
494  }
495 
496  for (; x < width; x++)
497  {
498  dst[x] = src[x];
499  }
500 
501  src += src_pitch;
502  dst += dst_pitch;
503  }
504 }
505 
506 static void SSE_copyplane(uint8_t *dst, int dst_pitch,
507  const uint8_t *src, int src_pitch,
508  uint8_t *cache, int cache_size,
509  int width, int height)
510 {
511  const int w16 = (width+15) & ~15;
512  const int hstep = cache_size / w16;
513 
514  for (int y = 0; y < height; y += hstep)
515  {
516  const int hblock = __MIN(hstep, height - y);
517 
518  /* Copy a bunch of line into our cache */
519  CopyFromUswc(cache, w16,
520  src, src_pitch,
521  width, hblock);
522 
523  /* Copy from our cache to the destination */
524  Copy2d(dst, dst_pitch,
525  cache, w16,
526  width, hblock);
527 
528  /* */
529  src += src_pitch * hblock;
530  dst += dst_pitch * hblock;
531  }
532 }
533 
534 static void SSE_splitplanes(uint8_t *dstu, int dstu_pitch,
535  uint8_t *dstv, int dstv_pitch,
536  const uint8_t *src, int src_pitch,
537  uint8_t *cache, int cache_size,
538  int width, int height)
539 {
540  const int w16 = (2*width+15) & ~15;
541  const int hstep = cache_size / w16;
542 
543  for (int y = 0; y < height; y += hstep)
544  {
545  const int hblock = __MIN(hstep, height - y);
546 
547  /* Copy a bunch of line into our cache */
548  CopyFromUswc(cache, w16, src, src_pitch,
549  2*width, hblock);
550 
551  /* Copy from our cache to the destination */
552  SSE_splitplanes(dstu, dstu_pitch, dstv, dstv_pitch,
553  cache, w16, width, hblock);
554 
555  /* */
556  src += src_pitch * hblock;
557  dstu += dstu_pitch * hblock;
558  dstv += dstv_pitch * hblock;
559  }
560 }
561 #endif // ARCH_X86
562 
563 MythUSWCCopy::MythUSWCCopy(int width, bool nocache)
564 {
565 #if ARCH_X86
566  if (!nocache)
567  {
568  allocateCache(width);
569  }
570 #else
571  Q_UNUSED(width);
572  Q_UNUSED(nocache);
573 #endif
574 }
575 
577 {
578  m_size = 0;
579 #if ARCH_X86
580  av_freep(&m_cache);
581 #endif
582 }
583 
595 {
597  dst->repeat_pict = src->repeat_pict;
598  dst->top_field_first = src->top_field_first;
599 
600  int width = src->width;
601  int height = src->height;
602 
603  if (src->codec == FMT_NV12)
604  {
605 #if ARCH_X86
606  if (sse2_check())
607  {
608  MythTimer *timer;
609 
610  if ((m_uswc != uswcState::Use_SW) && m_cache)
611  {
612  if (m_uswc == uswcState::Detect)
613  {
614  timer = new MythTimer(MythTimer::kStartRunning);
615  }
616  SSE_copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
617  src->buf + src->offsets[0], src->pitches[0],
618  m_cache, m_size,
619  width, height);
620  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
621  dst->buf + dst->offsets[2], dst->pitches[2],
622  src->buf + src->offsets[1], src->pitches[1],
623  m_cache, m_size,
624  (width+1) / 2, (height+1) / 2);
625  if (m_uswc == uswcState::Detect)
626  {
627  // Measure how long standard method takes
628  // if shorter, use it in the future
629  long sse_duration = timer->nsecsElapsed();
630  timer->restart();
631  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
632  src->buf + src->offsets[0], src->pitches[0],
633  width, height);
634  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
635  dst->buf + dst->offsets[2], dst->pitches[2],
636  src->buf + src->offsets[1], src->pitches[1],
637  (width+1) / 2, (height+1) / 2);
638  if (timer->nsecsElapsed() < sse_duration)
639  {
641  LOG(VB_GENERAL, LOG_DEBUG, "Enabling USWC code acceleration");
642  }
643  else
644  {
646  }
647  delete timer;
648  }
649  }
650  else
651  {
652  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
653  src->buf + src->offsets[0], src->pitches[0],
654  width, height);
655  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
656  dst->buf + dst->offsets[2], dst->pitches[2],
657  src->buf + src->offsets[1], src->pitches[1],
658  (width+1) / 2, (height+1) / 2);
659  }
660  asm volatile ("emms");
661  return;
662  }
663 #endif
664  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
665  src->buf + src->offsets[0], src->pitches[0],
666  width, height);
667  splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
668  dst->buf + dst->offsets[2], dst->pitches[2],
669  src->buf + src->offsets[1], src->pitches[1],
670  (width+1) / 2, (height+1) / 2);
671  return;
672  }
673 
674 #if ARCH_X86
675  if (sse2_check() && (m_uswc != uswcState::Use_SW) && m_cache)
676  {
677  MythTimer *timer;
678 
679  if (m_uswc == uswcState::Detect)
680  {
681  timer = new MythTimer(MythTimer::kStartRunning);
682  }
683  SSE_copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
684  src->buf + src->offsets[0], src->pitches[0],
685  m_cache, m_size,
686  width, height);
687  SSE_copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
688  src->buf + src->offsets[1], src->pitches[1],
689  m_cache, m_size,
690  (width+1) / 2, (height+1) / 2);
691  SSE_copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
692  src->buf + src->offsets[2], src->pitches[2],
693  m_cache, m_size,
694  (width+1) / 2, (height+1) / 2);
695  if (m_uswc == uswcState::Detect)
696  {
697  // Measure how long standard method takes
698  // if shorter, use it in the future
699  long sse_duration = timer->nsecsElapsed();
700  timer->restart();
701  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
702  src->buf + src->offsets[0], src->pitches[0],
703  width, height);
704  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
705  src->buf + src->offsets[1], src->pitches[1],
706  (width+1) / 2, (height+1) / 2);
707  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
708  src->buf + src->offsets[2], src->pitches[2],
709  (width+1) / 2, (height+1) / 2);
710  if (timer->nsecsElapsed() < sse_duration)
711  {
713  LOG(VB_GENERAL, LOG_DEBUG, "Enabling USWC code acceleration");
714  }
715  else
716  {
718  }
719  delete timer;
720  }
721  asm volatile ("emms");
722  return;
723  }
724 #endif
725  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
726  src->buf + src->offsets[0], src->pitches[0],
727  width, height);
728  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
729  src->buf + src->offsets[1], src->pitches[1],
730  (width+1) / 2, (height+1) / 2);
731  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
732  src->buf + src->offsets[2], src->pitches[2],
733  (width+1) / 2, (height+1) / 2);
734 }
735 
740 {
742 }
743 
745 {
746  av_freep(&m_cache);
747  m_size = __MAX((width + 63) & ~63, 4096);
748  m_cache = (uint8_t*)av_malloc(m_size);
749 }
750 
755 void MythUSWCCopy::setUSWC(bool uswc)
756 {
758 }
759 
763 void MythUSWCCopy::reset(int width)
764 {
765 #if ARCH_X86
766  allocateCache(width);
767 #else
768  Q_UNUSED(width);
769 #endif
771 }
int pitches[3]
Y, U, & V pitches.
Definition: mythframe.h:63
int restart(void)
Returns milliseconds elapsed since last start() or restart() and resets the count.
Definition: mythtimer.cpp:62
A QElapsedTimer based timer to replace use of QTime as a timer.
Definition: mythtimer.h:13
static void splitplanes(uint8_t *dstu, int dstu_pitch, uint8_t *dstv, int dstv_pitch, const uint8_t *src, int src_pitch, int width, int height)
Definition: mythframe.cpp:282
void reset(int width)
Will reset the cache for a frame with "width" and reset USWC detection.
Definition: mythframe.cpp:763
int repeat_pict
Definition: mythframe.h:59
enum FrameType_ VideoFrameType
void setUSWC(bool uswc)
disable USWC detection.
Definition: mythframe.cpp:755
#define __MIN(a, b)
Definition: mythframe.cpp:41
int offsets[3]
Y, U, & V offsets.
Definition: mythframe.h:64
static void copyplane(uint8_t *dst, int dst_pitch, const uint8_t *src, int src_pitch, int width, int height)
Definition: mythframe.cpp:270
void copy(VideoFrame *dst, const VideoFrame *src)
Definition: mythframe.cpp:594
#define __MAX(a, b)
Definition: mythframe.cpp:38
int height
Definition: mythframe.h:42
void framecopy(VideoFrame *dst, const VideoFrame *src, bool useSSE)
Definition: mythframe.cpp:300
void resetUSWCDetection(void)
reset USWC detection.
Definition: mythframe.cpp:739
int top_field_first
1 if top field is first.
Definition: mythframe.h:58
void * av_malloc(unsigned int size)
uint8_t * m_cache
Definition: mythframe.h:96
uswcState m_uswc
Definition: mythframe.h:98
#define LOG(_MASK_, _LEVEL_, _STRING_)
Definition: mythlogging.h:41
int64_t nsecsElapsed(void) const
Returns nanoseconds elapsed since last start() or restart()
Definition: mythtimer.cpp:118
virtual ~MythUSWCCopy()
Definition: mythframe.cpp:576
MythUSWCCopy(int width, bool nocache=false)
Definition: mythframe.cpp:563
int interlaced_frame
1 if interlaced.
Definition: mythframe.h:57
void allocateCache(int width)
Definition: mythframe.cpp:744
unsigned char * buf
Definition: mythframe.h:39
VideoFrameType codec
Definition: mythframe.h:38