MythTV  master
filter_linearblend.c
Go to the documentation of this file.
1 // a linear blending deinterlacer yoinked from the mplayer sources.
2 
3 #include <stdlib.h>
4 #include <stdio.h>
5 
6 #include "mythconfig.h"
7 #if HAVE_STDINT_H
8 #include <stdint.h>
9 #endif
10 
11 #if HAVE_MMX || HAVE_AMD3DNOW
12 #include "ffmpeg-mmx.h"
13 #endif
14 
15 #include "../mm_arch.h"
16 #if HAVE_ALTIVEC_H
17  #include <altivec.h>
18 #endif
19 
20 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
21 #define PAVGUSB(a,b) "pavgusb " #a ", " #b " \n\t"
22 
23 #include "filter.h"
24 #include "mythframe.h"
25 
26 typedef struct LBFilter
27 {
29 
30  /* functions and variables below here considered "private" */
31  int mm_flags;
32  void (*subfilter)(unsigned char *, int);
34 } LBFilter;
35 
36 void linearBlend(unsigned char *src, int stride);
37 void linearBlendMMX(unsigned char *src, int stride);
38 void linearBlend3DNow(unsigned char *src, int stride);
39 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field);
40 
41 #if HAVE_ALTIVEC
42 inline void linearBlendAltivec(unsigned char *src, int stride);
43 #endif
44 
45 #ifdef MMX
46 
47 void linearBlendMMX(unsigned char *src, int stride)
48 {
49 // src += 4 * stride;
50  __asm__ volatile(
51  "lea (%0, %1), %%"FF_REG_a" \n\t"
52  "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d" \n\t"
53 
54  "movq (%0), %%mm0 \n\t" // L0
55  "movq (%%"FF_REG_a", %1), %%mm1 \n\t" // L2
56  PAVGB(%%mm1, %%mm0) // L0+L2
57  "movq (%%"FF_REG_a"), %%mm2 \n\t" // L1
58  PAVGB(%%mm2, %%mm0)
59  "movq %%mm0, (%0) \n\t"
60  "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" // L3
61  PAVGB(%%mm0, %%mm2) // L1+L3
62  PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3
63  "movq %%mm2, (%%"FF_REG_a") \n\t"
64  "movq (%0, %1, 4), %%mm2 \n\t" // L4
65  PAVGB(%%mm2, %%mm1) // L2+L4
66  PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4
67  "movq %%mm1, (%%"FF_REG_a", %1) \n\t"
68  "movq (%%"FF_REG_d"), %%mm1 \n\t" // L5
69  PAVGB(%%mm1, %%mm0) // L3+L5
70  PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5
71  "movq %%mm0, (%%"FF_REG_a", %1, 2) \n\t"
72  "movq (%%"FF_REG_d", %1), %%mm0 \n\t" // L6
73  PAVGB(%%mm0, %%mm2) // L4+L6
74  PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6
75  "movq %%mm2, (%0, %1, 4) \n\t"
76  "movq (%%"FF_REG_d", %1, 2), %%mm2 \n\t" // L7
77  PAVGB(%%mm2, %%mm1) // L5+L7
78  PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7
79  "movq %%mm1, (%%"FF_REG_d") \n\t"
80  "movq (%0, %1, 8), %%mm1 \n\t" // L8
81  PAVGB(%%mm1, %%mm0) // L6+L8
82  PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8
83  "movq %%mm0, (%%"FF_REG_d", %1) \n\t"
84  "movq (%%"FF_REG_d", %1, 4), %%mm0 \n\t" // L9
85  PAVGB(%%mm0, %%mm2) // L7+L9
86  PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9
87  "movq %%mm2, (%%"FF_REG_d", %1, 2) \n\t"
88 
89  : : "r" (src), "r" ((long)stride)
90  : "%"FF_REG_a, "%"FF_REG_d
91  );
92 }
93 
94 void linearBlend3DNow(unsigned char *src, int stride)
95 {
96 // src += 4 * stride;
97  __asm__ volatile(
98  "lea (%0, %1), %%"FF_REG_a" \n\t"
99  "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d" \n\t"
100 
101  "movq (%0), %%mm0 \n\t" // L0
102  "movq (%%"FF_REG_a", %1), %%mm1 \n\t" // L2
103  PAVGUSB(%%mm1, %%mm0) // L0+L2
104  "movq (%%"FF_REG_a"), %%mm2 \n\t" // L1
105  PAVGUSB(%%mm2, %%mm0)
106  "movq %%mm0, (%0) \n\t"
107  "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" // L3
108  PAVGUSB(%%mm0, %%mm2) // L1+L3
109  PAVGUSB(%%mm1, %%mm2) // 2L2 + L1 + L3
110  "movq %%mm2, (%%"FF_REG_a") \n\t"
111  "movq (%0, %1, 4), %%mm2 \n\t" // L4
112  PAVGUSB(%%mm2, %%mm1) // L2+L4
113  PAVGUSB(%%mm0, %%mm1) // 2L3 + L2 + L4
114  "movq %%mm1, (%%"FF_REG_a", %1) \n\t"
115  "movq (%%"FF_REG_d"), %%mm1 \n\t" // L5
116  PAVGUSB(%%mm1, %%mm0) // L3+L5
117  PAVGUSB(%%mm2, %%mm0) // 2L4 + L3 + L5
118  "movq %%mm0, (%%"FF_REG_a", %1, 2) \n\t"
119  "movq (%%"FF_REG_d", %1), %%mm0 \n\t" // L6
120  PAVGUSB(%%mm0, %%mm2) // L4+L6
121  PAVGUSB(%%mm1, %%mm2) // 2L5 + L4 + L6
122  "movq %%mm2, (%0, %1, 4) \n\t"
123  "movq (%%"FF_REG_d", %1, 2), %%mm2 \n\t" // L7
124  PAVGUSB(%%mm2, %%mm1) // L5+L7
125  PAVGUSB(%%mm0, %%mm1) // 2L6 + L5 + L7
126  "movq %%mm1, (%%"FF_REG_d") \n\t"
127  "movq (%0, %1, 8), %%mm1 \n\t" // L8
128  PAVGUSB(%%mm1, %%mm0) // L6+L8
129  PAVGUSB(%%mm2, %%mm0) // 2L7 + L6 + L8
130  "movq %%mm0, (%%"FF_REG_d", %1) \n\t"
131  "movq (%%"FF_REG_d", %1, 4), %%mm0 \n\t" // L9
132  PAVGUSB(%%mm0, %%mm2) // L7+L9
133  PAVGUSB(%%mm1, %%mm2) // 2L8 + L7 + L9
134  "movq %%mm2, (%%"FF_REG_d", %1, 2) \n\t"
135 
136  : : "r" (src), "r" ((long)stride)
137  : "%"FF_REG_a, "%"FF_REG_d
138  );
139 }
140 
141 #endif
142 
143 #if HAVE_ALTIVEC
144 
145 inline void linearBlendAltivec(unsigned char *src, int stride)
146 {
147  vector unsigned char a, b, c;
148  int i;
149 
150  b = vec_ld(0, src);
151  c = vec_ld(stride, src);
152 
153  for (i = 2; i < 10; i++)
154  {
155  a = b;
156  b = c;
157  c = vec_ld(stride * i, src);
158  vec_st(vec_avg(vec_avg(a, c), b), stride * (i - 2), src);
159  }
160 }
161 
162 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field)
163 {
164  (void)field;
165  (void)f;
166  int height = frame->height;
167  unsigned char *yptr = frame->buf + frame->offsets[0];
168  int stride = frame->pitches[0];
169  int ymax = height - 8;
170  int x,y;
171  unsigned char *src = 0;
172  unsigned char *uoff = frame->buf + frame->offsets[1];
173  unsigned char *voff = frame->buf + frame->offsets[2];
174  TF_VARS;
175 
176  TF_START;
177 
178  if ((stride & 0xf) || ((unsigned int)yptr & 0xf))
179  {
180  for (y = 0; y < ymax; y += 8)
181  {
182  for (x = 0; x < stride; x += 8)
183  {
184  src = yptr + x + y * stride;
185  linearBlend(src, stride);
186  }
187  }
188  }
189  else
190  {
191  src = yptr;
192  for (y = 0; y < ymax; y += 8)
193  {
194  for (x = 0; x < stride; x += 16)
195  {
196  linearBlendAltivec(src, stride);
197  src += 16;
198  }
199  src += stride * 7;
200  }
201  }
202 
203  stride = frame->pitches[1];
204  ymax = height / 2 - 8;
205 
206  if ((stride & 0xf) || ((unsigned int)uoff & 0xf))
207  {
208  for (y = 0; y < ymax; y += 8)
209  {
210  for (x = 0; x < stride; x += 8)
211  {
212  src = uoff + x + y * stride;
213  linearBlend(src, stride);
214 
215  src = voff + x + y * stride;
216  linearBlend(src, stride);
217  }
218  }
219  }
220  else
221  {
222  for (y = 0; y < ymax; y += 8)
223  {
224  for (x = 0; x < stride; x += 16)
225  {
226  linearBlendAltivec(src, stride);
227  uoff += 16;
228 
229  linearBlendAltivec(src, stride);
230  voff += 16;
231  }
232  uoff += stride * 7;
233  voff += stride * 7;
234  }
235  }
236 
237  TF_END(vf, "LinearBlendAltivec: ");
238  return 0;
239 }
240 
241 #endif /* HAVE_ALTIVEC */
242 
243 void linearBlend(unsigned char *src, int stride)
244 {
245  for (int x = 0; x < 2; x++)
246  {
247  int a= *(uint32_t*)&src[stride*0];
248  int b= *(uint32_t*)&src[stride*1];
249  int c= *(uint32_t*)&src[stride*2];
250  a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
251  *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
252 
253  a= *(uint32_t*)&src[stride*3];
254  b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
255  *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
256 
257  b= *(uint32_t*)&src[stride*4];
258  c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
259  *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
260 
261  c= *(uint32_t*)&src[stride*5];
262  a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
263  *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
264 
265  a= *(uint32_t*)&src[stride*6];
266  b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
267  *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
268 
269  b= *(uint32_t*)&src[stride*7];
270  c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
271  *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
272 
273  c= *(uint32_t*)&src[stride*8];
274  a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
275  *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
276 
277  a= *(uint32_t*)&src[stride*9];
278  b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
279  *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
280 
281  src += 4;
282  }
283 }
284 
285 static int linearBlendFilter(VideoFilter *f, VideoFrame *frame, int field)
286 {
287  (void)field;
288  int height = frame->height;
289  unsigned char *yptr = frame->buf + frame->offsets[0];
290  int stride = frame->pitches[0];
291  int ymax = height - 8;
292  int x,y;
293  unsigned char *src;
294  unsigned char *uoff = frame->buf + frame->offsets[1];
295  unsigned char *voff = frame->buf + frame->offsets[2];
296  LBFilter *vf = (LBFilter *)f;
297  TF_VARS;
298 
299  TF_START;
300 
301  for (y = 0; y < ymax; y+=8)
302  {
303  for (x = 0; x < stride; x+=8)
304  {
305  src = yptr + x + y * stride;
306  (vf->subfilter)(src, stride);
307  }
308  }
309 
310  stride = frame->pitches[1];
311  ymax = height / 2 - 8;
312 
313  for (y = 0; y < ymax; y += 8)
314  {
315  for (x = 0; x < stride; x += 8)
316  {
317  src = uoff + x + y * stride;
318  (vf->subfilter)(src, stride);
319 
320  src = voff + x + y * stride;
321  (vf->subfilter)(src, stride);
322  }
323  }
324 
325 #if HAVE_MMX || HAVE_AMD3DNOW
326  if ((vf->mm_flags & AV_CPU_FLAG_MMX2) || (vf->mm_flags & AV_CPU_FLAG_3DNOW))
327  emms();
328 #endif
329 
330  TF_END(vf, "LinearBlend: ");
331  return 0;
332 }
333 
335  VideoFrameType outpixfmt,
336  const int *width, const int *height, const char *options,
337  int threads)
338 {
339  LBFilter *filter;
340  (void)width;
341  (void)height;
342  (void)options;
343  (void)threads;
344  if (inpixfmt != FMT_YV12 || outpixfmt != FMT_YV12)
345  return NULL;
346 
347  filter = malloc(sizeof(LBFilter));
348 
349  if (filter == NULL)
350  {
351  fprintf(stderr,"Couldn't allocate memory for filter\n");
352  return NULL;
353  }
354 
355  filter->vf.filter = &linearBlendFilter;
356  filter->subfilter = &linearBlend; /* Default, non accellerated */
357  filter->mm_flags = av_get_cpu_flags();
358  if (HAVE_MMX && filter->mm_flags & AV_CPU_FLAG_MMX2)
359  filter->subfilter = &linearBlendMMX;
360  else if (HAVE_AMD3DNOW && filter->mm_flags & AV_CPU_FLAG_3DNOW)
361  filter->subfilter = &linearBlend3DNow;
362  else if (HAVE_ALTIVEC && filter->mm_flags & AV_CPU_FLAG_ALTIVEC)
364 
365  filter->vf.cleanup = NULL;
366  TF_INIT(filter);
367  return (VideoFilter *)filter;
368 }
369 
370 static FmtConv FmtList[] =
371 {
372  { FMT_YV12, FMT_YV12 },
373  FMT_NULL
374 };
375 
377 {
378  {
380  .name= (char*)"linearblend",
381  .descript= (char*)"fast blending deinterlace filter",
382  .formats= FmtList,
383  .libname= NULL
384  },
385  FILT_NULL
386 };
int pitches[3]
Y, U, & V pitches.
Definition: mythframe.h:63
int(* filter)(struct VideoFilter_ *, VideoFrame *, int)
Definition: filter.h:37
init_filter filter_init
Definition: filter.h:28
void(* subfilter)(unsigned char *, int)
stderr
Definition: ttvdb.py:1426
#define TF_VARS
Definition: filter.h:112
#define NULL
Definition: H264Parser.h:62
static int linearBlendFilter(VideoFilter *f, VideoFrame *frame, int field)
void(* cleanup)(struct VideoFilter_ *)
Definition: filter.h:38
#define HAVE_MMX
Definition: aclib.c:78
enum FrameType_ VideoFrameType
void linearBlend3DNow(unsigned char *src, int stride)
void linearBlendMMX(unsigned char *src, int stride)
unsigned char b
Definition: ParseText.cpp:329
struct LBFilter LBFilter
int offsets[3]
Y, U, & V offsets.
Definition: mythframe.h:64
#define PAVGB(a, b)
int height
Definition: mythframe.h:42
void linearBlend(unsigned char *src, int stride)
VideoFilter vf
#define FMT_NULL
Definition: filter.h:20
#define TF_END(filter, prefix)
Definition: filter.h:114
#define emms()
Definition: mm_arch.h:15
#define PAVGUSB(a, b)
#define FILT_NULL
Definition: filter.h:47
int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field)
const FilterInfo filter_table[]
#define TF_INIT(filter)
Definition: filter.h:110
static FmtConv FmtList[]
static VideoFilter * new_filter(VideoFrameType inpixfmt, VideoFrameType outpixfmt, const int *width, const int *height, const char *options, int threads)
#define TF_START
Definition: filter.h:113
unsigned char * buf
Definition: mythframe.h:39