MythTV  master
util-opengl.cpp
Go to the documentation of this file.
1 // -*- Mode: c++ -*-
2 
3 #include <cstring>
4 #include <cstdint>
5 
6 #include <QSize>
7 
8 #include "compat.h"
9 #include "util-opengl.h"
10 
11 #ifdef MMX
12 extern "C" {
13 #include "ffmpeg-mmx.h"
14 }
15 
16 static const mmx_t mmx_1s = {0xffffffffffffffffULL};
17 
18 static inline void mmx_pack_alpha1s_high(const uint8_t *y1, const uint8_t *y2)
19 {
20  movq_m2r (mmx_1s, mm4);
21  punpckhbw_m2r (*y1, mm4);
22  movq_m2r (mmx_1s, mm7);
23  punpckhbw_m2r (*y2, mm7);
24 }
25 
26 static inline void mmx_pack_alpha1s_low(const uint8_t *y1, const uint8_t *y2)
27 {
28  movq_m2r (mmx_1s, mm4);
29  punpcklbw_m2r (*y1, mm4);
30  movq_m2r (mmx_1s, mm7);
31  punpcklbw_m2r (*y2, mm7);
32 }
33 
34 static inline void mmx_pack_middle(uint8_t *dest1, uint8_t *dest2)
35 {
36  movq_r2r (mm3, mm5);
37  punpcklbw_r2r (mm2, mm5);
38 
39  movq_r2r (mm5, mm6);
40  punpcklbw_r2r (mm4, mm6);
41  movq_r2m (mm6, *(dest1));
42 
43  movq_r2r (mm5, mm6);
44  punpckhbw_r2r (mm4, mm6);
45  movq_r2m (mm6, *(dest1 + 8));
46 
47  movq_r2r (mm5, mm6);
48  punpcklbw_r2r (mm7, mm6);
49  movq_r2m (mm6, *(dest2));
50 
51  movq_r2r (mm5, mm6);
52  punpckhbw_r2r (mm7, mm6);
53  movq_r2m (mm6, *(dest2 + 8));
54 }
55 
56 static inline void mmx_pack_end(uint8_t *dest1, uint8_t *dest2)
57 {
58  punpckhbw_r2r (mm2, mm3);
59 
60  movq_r2r (mm3, mm6);
61  punpcklbw_r2r (mm4, mm6);
62  movq_r2m (mm6, *(dest1 + 16));
63 
64  movq_r2r (mm3, mm6);
65  punpckhbw_r2r (mm4, mm6);
66  movq_r2m (mm6, *(dest1 + 24));
67 
68  movq_r2r (mm3, mm6);
69  punpcklbw_r2r (mm7, mm6);
70  movq_r2m (mm6, *(dest2 + 16));
71 
72  punpckhbw_r2r (mm7, mm3);
73  movq_r2m (mm3, *(dest2 + 24));
74 }
75 
76 static inline void mmx_pack_easy(uint8_t *dest, const uint8_t *y)
77 {
78  movq_m2r (mmx_1s, mm4);
79  punpcklbw_m2r (*y, mm4);
80 
81  movq_r2r (mm3, mm5);
82  punpcklbw_r2r (mm2, mm5);
83 
84  movq_r2r (mm5, mm6);
85  punpcklbw_r2r (mm4, mm6);
86  movq_r2m (mm6, *(dest));
87 
88  movq_r2r (mm5, mm6);
89  punpckhbw_r2r (mm4, mm6);
90  movq_r2m (mm6, *(dest + 8));
91 
92  movq_m2r (mmx_1s, mm4);
93  punpckhbw_m2r (*y, mm4);
94 
95  punpckhbw_r2r (mm2, mm3);
96 
97  movq_r2r (mm3, mm6);
98  punpcklbw_r2r (mm4, mm6);
99  movq_r2m (mm6, *(dest + 16));
100 
101  punpckhbw_r2r (mm4, mm3);
102  movq_r2m (mm3, *(dest + 24));
103 }
104 
105 static const mmx_t mmx_0s = {0x0000000000000000LL};
106 static const mmx_t mmx_round = {0x0002000200020002LL};
107 
108 static inline void mmx_interp_start(const uint8_t *left, const uint8_t *right)
109 {
110  movd_m2r (*left, mm5);
111  punpcklbw_m2r (mmx_0s, mm5);
112 
113  movq_r2r (mm5, mm4);
114  paddw_r2r (mm4, mm4);
115  paddw_r2r (mm5, mm4);
116  paddw_m2r (mmx_round, mm4);
117 
118  movd_m2r (*right, mm5);
119  punpcklbw_m2r (mmx_0s, mm5);
120  paddw_r2r (mm5, mm4);
121 
122  psrlw_i2r (2, mm4);
123 }
124 
125 static inline void mmx_interp_endu(void)
126 {
127  movq_r2r (mm4, mm2);
128  psllw_i2r (8, mm2);
129  paddb_r2r (mm4, mm2);
130 }
131 
132 static inline void mmx_interp_endv(void)
133 {
134  movq_r2r (mm4, mm3);
135  psllw_i2r (8, mm3);
136  paddb_r2r (mm4, mm3);
137 }
138 
139 static inline void mmx_pack_chroma(const uint8_t *u, const uint8_t *v)
140 {
141  movd_m2r (*u, mm2);
142  movd_m2r (*v, mm3);
143  punpcklbw_r2r (mm2, mm2);
144  punpcklbw_r2r (mm3, mm3);
145 }
146 #endif // MMX
147 
148 static inline void c_interp(unsigned dest[4], unsigned a, unsigned b,
149  unsigned c, unsigned d)
150 {
151  unsigned int tmp = a;
152  tmp *= 3;
153  tmp += 2;
154  tmp += c;
155  dest[0] = tmp >> 2;
156 
157  tmp = b;
158  tmp *= 3;
159  tmp += 2;
160  tmp += d;
161  dest[1] = tmp >> 2;
162 
163  tmp = c;
164  tmp *= 3;
165  tmp += 2;
166  tmp += a;
167  dest[2] = tmp >> 2;
168 
169  tmp = d;
170  tmp *= 3;
171  tmp += 2;
172  tmp += b;
173  dest[3] = tmp >> 2;
174 }
175 
176 #ifdef __GNUC__
177 #define MYTH_PACKED __attribute__((packed))
178 #else
179 #define MYTH_PACKED
180 #endif
181 static inline unsigned c_pack2(uint8_t dest[],
182  uint8_t v, uint8_t u, uint8_t y1, uint8_t y2)
183 {
184  struct pack
185  {
186  uint8_t v1, a1, u1, y1;
187  uint8_t v2, a2, u2, y2;
188  } MYTH_PACKED tmp = {v, 0xff, u, y1, v, 0xff, u, y2};
189 
190  *reinterpret_cast< struct pack * >(dest) = tmp;
191 
192  return sizeof tmp;
193 }
194 
195 void pack_yv12progressive(const unsigned char *source,
196  const unsigned char *dest,
197  const int *offsets, const int *pitches,
198  const QSize &size)
199 {
200  const int width = size.width();
201  const int height = size.height();
202 
203  if (height % 2 || width % 2)
204  return;
205 
206 #ifdef MMX
207  int residual = width % 8;
208  int mmx_width = width - residual;
209  int c_start_w = mmx_width;
210 #else
211  int residual = 0;
212  int c_start_w = 0;
213 #endif
214 
215  uint bgra_width = width << 2;
216  uint chroma_width = width >> 1;
217 
218  uint y_extra = (pitches[0] << 1) - width + residual;
219  uint u_extra = pitches[1] - chroma_width + (residual >> 1);
220  uint v_extra = pitches[2] - chroma_width + (residual >> 1);
221  uint d_extra = bgra_width + (residual << 2);
222 
223  uint8_t *ypt_1 = (uint8_t *)source + offsets[0];
224  uint8_t *ypt_2 = ypt_1 + pitches[0];
225  uint8_t *upt = (uint8_t *)source + offsets[1];
226  uint8_t *vpt = (uint8_t *)source + offsets[2];
227  uint8_t *dst_1 = (uint8_t *) dest;
228  uint8_t *dst_2 = dst_1 + bgra_width;
229 
230 #ifdef MMX
231  for (int row = 0; row < height; row += 2)
232  {
233  for (int col = 0; col < mmx_width; col += 8)
234  {
235  mmx_pack_chroma(upt, vpt);
236  mmx_pack_alpha1s_low(ypt_1, ypt_2);
237  mmx_pack_middle(dst_1, dst_2);
238  mmx_pack_alpha1s_high(ypt_1, ypt_2);
239  mmx_pack_end(dst_1, dst_2);
240 
241  dst_1 += 32; dst_2 += 32;
242  ypt_1 += 8; ypt_2 += 8;
243  upt += 4; vpt += 4;
244 
245  }
246  ypt_1 += y_extra; ypt_2 += y_extra;
247  upt += u_extra; vpt += v_extra;
248  dst_1 += d_extra; dst_2 += d_extra;
249  }
250 
251  emms();
252 
253  if (residual)
254  {
255  y_extra = (pitches[0] << 1) - width + mmx_width;
256  u_extra = pitches[1] - chroma_width + (mmx_width >> 1);
257  v_extra = pitches[2] - chroma_width + (mmx_width >> 1);
258  d_extra = bgra_width + (mmx_width << 2);
259 
260  ypt_1 = (uint8_t *)source + offsets[0] + mmx_width;
261  ypt_2 = ypt_1 + pitches[0];
262  upt = (uint8_t *)source + offsets[1] + (mmx_width>>1);
263  vpt = (uint8_t *)source + offsets[2] + (mmx_width>>1);
264  dst_1 = (uint8_t *) dest + (mmx_width << 2);
265  dst_2 = dst_1 + bgra_width;
266  }
267  else
268  {
269  return;
270  }
271 #endif //MMX
272 
273  for (int row = 0; row < height; row += 2)
274  {
275  for (int col = c_start_w; col < width; col += 2)
276  {
277  *(dst_1++) = *vpt; *(dst_2++) = *vpt;
278  *(dst_1++) = 255; *(dst_2++) = 255;
279  *(dst_1++) = *upt; *(dst_2++) = *upt;
280  *(dst_1++) = *(ypt_1++);
281  *(dst_2++) = *(ypt_2++);
282 
283  *(dst_1++) = *vpt; *(dst_2++) = *(vpt++);
284  *(dst_1++) = 255; *(dst_2++) = 255;
285  *(dst_1++) = *upt; *(dst_2++) = *(upt++);
286  *(dst_1++) = *(ypt_1++);
287  *(dst_2++) = *(ypt_2++);
288  }
289  ypt_1 += y_extra; ypt_2 += y_extra;
290  upt += u_extra; vpt += v_extra;
291  dst_1 += d_extra; dst_2 += d_extra;
292  }
293 }
294 
295 void pack_yv12interlaced(const unsigned char *source,
296  unsigned char *dest,
297  const int offsets[3],
298  const int pitches[3],
299  const QSize &size)
300 {
301  const int width = size.width();
302  int height = size.height();
303 
304  if (height % 4 || width % 2)
305  return;
306 
307  const uint bgra_width = width << 2;
308  const uint chroma_width = width >> 1;
309  uint ywrap = (pitches[0] << 1) - width;
310  const uint uwrap = (pitches[1] << 1) - chroma_width;
311  const uint vwrap = (pitches[2] << 1) - chroma_width;
312 
313  const uint8_t *ypt_1 = source + offsets[0];
314  const uint8_t *ypt_2 = ypt_1 + pitches[0];
315  const uint8_t *ypt_3 = ypt_1 + (pitches[0] * (height - 2));
316  const uint8_t *ypt_4 = ypt_3 + pitches[0];
317 
318  const uint8_t *u1 = source + offsets[1];
319  const uint8_t *v1 = source + offsets[2];
320  const uint8_t *u2 = u1 + pitches[1];
321  const uint8_t *v2 = v1 + pitches[2];
322  const uint8_t *u3 = u1 + (pitches[1] * ((height - 4) >> 1));
323  const uint8_t *v3 = v1 + (pitches[2] * ((height - 4) >> 1));
324  const uint8_t *u4 = u3 + pitches[1];
325  const uint8_t *v4 = v3 + pitches[2];
326 
327  uint8_t *dst_1 = dest;
328  uint8_t *dst_3 = dst_1 + (bgra_width * (height - 2));
329 
330  // Allocate a 4 line packed data buffer
331  // NB dest is probably graphics memory so access may be slow
332  const uint bufsize = bgra_width * 4;
333  uint8_t *buf = new uint8_t[bufsize];
334 
335  uint8_t *b1 = buf;
336  uint8_t *b2 = b1 + bgra_width;
337  const uint len = bgra_width * 2; // 2 line buffer size
338  uint8_t *b3 = buf + len;
339  uint8_t *b4 = b3 + bgra_width;
340 
341 #ifdef MMX
342 
343  if (!(width % 8))
344  {
345  // pack first 2 and last 2 rows
346  for (int col = 0; col < width; col += 8)
347  {
348  mmx_pack_chroma(u1, v1);
349  mmx_pack_easy(b1, ypt_1);
350  mmx_pack_chroma(u2, v2);
351  mmx_pack_easy(b2, ypt_2);
352  mmx_pack_chroma(u3, v3);
353  mmx_pack_easy(b3, ypt_3);
354  mmx_pack_chroma(u4, v4);
355  mmx_pack_easy(b4, ypt_4);
356 
357  b1 += 32; b2 += 32; b3 += 32; b4 += 32;
358  ypt_1 += 8; ypt_2 += 8; ypt_3 += 8; ypt_4 += 8;
359  u1 += 4; v1 += 4; u2 += 4; v2 += 4;
360  u3 += 4; v3 += 4; u4 += 4; v4 += 4;
361  }
362 
363  memcpy(dst_1, buf, len);
364  dst_1 += len;
365  memcpy(dst_3, buf + len, len);
366 
367  ypt_1 += ywrap; ypt_2 += ywrap;
368  ypt_3 = ypt_2 + pitches[0];
369  ypt_4 = ypt_3 + pitches[0];
370 
371  ywrap = (pitches[0] << 2) - width;
372 
373  u1 = (uint8_t *)source + offsets[1];
374  v1 = (uint8_t *)source + offsets[2];
375  u2 = u1 + pitches[1]; v2 = v1 + pitches[2];
376  u3 = u2 + pitches[1]; v3 = v2 + pitches[2];
377  u4 = u3 + pitches[1]; v4 = v3 + pitches[2];
378 
379  height -= 4;
380 
381  // pack main body
382  for (int row = 0 ; row < height; row += 4)
383  {
384  b1 = buf;
385  b2 = b1 + bgra_width;
386  b3 = b2 + bgra_width;
387  b4 = b3 + bgra_width;
388 
389  for (int col = 0; col < width; col += 8)
390  {
393  mmx_pack_easy(b1, ypt_1);
394 
397  mmx_pack_easy(b2, ypt_2);
398 
401  mmx_pack_easy(b3, ypt_3);
402 
405  mmx_pack_easy(b4, ypt_4);
406 
407  b1 += 32; b2 += 32; b3 += 32; b4 += 32;
408  ypt_1 += 8; ypt_2 += 8; ypt_3 += 8; ypt_4 += 8;
409  u1 += 4; u2 += 4; u3 += 4; u4 += 4;
410  v1 += 4; v2 += 4; v3 += 4; v4 += 4;
411  }
412 
413  // Copy the packed data to dest
414  memcpy(dst_1, buf, bufsize);
415  dst_1 += bufsize;
416 
417  ypt_1 += ywrap; ypt_2 += ywrap; ypt_3 += ywrap; ypt_4 += ywrap;
418  u1 += uwrap; v1 += vwrap; u2 += uwrap; v2 += vwrap;
419  u3 += uwrap; v3 += vwrap; u4 += uwrap;v4 += vwrap;
420  }
421 
422  emms();
423 
424  delete[] buf;
425  return;
426  }
427 #endif //MMX
428 
429  // pack first 2 and last 2 rows
430  for (int col = 0; col < width; col += 2)
431  {
432  b1 += c_pack2(b1, *(v1++), *(u1++), ypt_1[col], ypt_1[col + 1]);
433  b2 += c_pack2(b2, *(v2++), *(u2++), ypt_2[col], ypt_2[col + 1]);
434  b3 += c_pack2(b3, *(v3++), *(u3++), ypt_3[col], ypt_3[col + 1]);
435  b4 += c_pack2(b4, *(v4++), *(u4++), ypt_4[col], ypt_4[col + 1]);
436  }
437 
438  // Copy the packed data to dest
439  memcpy(dst_1, buf, len);
440  dst_1 += len;
441  memcpy(dst_3, buf + len, len);
442 
443  ywrap = pitches[0] << 2;
444 
445  ypt_1 += ywrap; ypt_2 += ywrap;
446  ypt_3 = ypt_2 + pitches[0];
447  ypt_4 = ypt_3 + pitches[0];
448 
449  u1 = source + offsets[1];
450  v1 = source + offsets[2];
451  u2 = u1 + pitches[1]; v2 = v1 + pitches[2];
452  u3 = u2 + pitches[1]; v3 = v2 + pitches[2];
453  u4 = u3 + pitches[1]; v4 = v3 + pitches[2];
454 
455  height -= 4;
456 
457  // pack main body
458  for (int row = 0; row < height; row += 4)
459  {
460  b1 = buf;
461  b2 = b1 + bgra_width;
462  b3 = b2 + bgra_width;
463  b4 = b3 + bgra_width;
464 
465  for (int col = 0; col < width; col += 2)
466  {
467  unsigned v[4], u[4];
468 
469  c_interp(v, *(v1++), *(v2++), *(v3++), *(v4++));
470  c_interp(u, *(u1++), *(u2++), *(u3++), *(u4++));
471 
472  b1 += c_pack2(b1, v[0], u[0], ypt_1[col], ypt_1[col + 1]);
473  b2 += c_pack2(b2, v[1], u[1], ypt_2[col], ypt_2[col + 1]);
474  b3 += c_pack2(b3, v[2], u[2], ypt_3[col], ypt_3[col + 1]);
475  b4 += c_pack2(b4, v[3], u[3], ypt_4[col], ypt_4[col + 1]);
476  }
477 
478  // Copy the packed data to dest
479  memcpy(dst_1, buf, bufsize);
480  dst_1 += bufsize;
481 
482  ypt_1 += ywrap; ypt_2 += ywrap; ypt_3 += ywrap; ypt_4 += ywrap;
483  u1 += uwrap; u2 += uwrap; u3 += uwrap; u4 += uwrap;
484  v1 += vwrap; v2 += vwrap; v3 += vwrap; v4 += vwrap;
485  }
486 
487  delete[] buf;
488 }
static const mmx_t mmx_0s
static const mmx_t mmx_1s
Definition: util-opengl.cpp:16
static void mmx_interp_endu(void)
static void mmx_interp_start(const uint8_t *left, const uint8_t *right)
static void mmx_pack_alpha1s_low(const uint8_t *y1, const uint8_t *y2)
Definition: util-opengl.cpp:26
static void c_interp(unsigned dest[4], unsigned a, unsigned b, unsigned c, unsigned d)
unsigned int uint
Definition: compat.h:140
static void mmx_pack_end(uint8_t *dest1, uint8_t *dest2)
Definition: util-opengl.cpp:56
static void mmx_pack_easy(uint8_t *dest, const uint8_t *y)
Definition: util-opengl.cpp:76
#define MYTH_PACKED
static guint32 * tmp
Definition: goom_core.c:35
unsigned char b
Definition: ParseText.cpp:329
static const uint16_t * d
static void mmx_pack_alpha1s_high(const uint8_t *y1, const uint8_t *y2)
Definition: util-opengl.cpp:18
void pack_yv12progressive(const unsigned char *source, const unsigned char *dest, const int *offsets, const int *pitches, const QSize &size)
#define mmx_t
static void mmx_pack_middle(uint8_t *dest1, uint8_t *dest2)
Definition: util-opengl.cpp:34
static unsigned c_pack2(uint8_t dest[], uint8_t v, uint8_t u, uint8_t y1, uint8_t y2)
#define emms()
Definition: mm_arch.h:15
static void mmx_pack_chroma(const uint8_t *u, const uint8_t *v)
static void mmx_interp_endv(void)
void pack_yv12interlaced(const unsigned char *source, unsigned char *dest, const int offsets[3], const int pitches[3], const QSize &size)
static const mmx_t mmx_round