MythTV  master
zoom_filter_xmmx.c
Go to the documentation of this file.
1 #include "mythconfig.h"
2 
3 /* Prototypes to keep gcc from spewing warnings */
4 void zoom_filter_xmmx (int prevX, int prevY, const unsigned int *expix1, const unsigned int *expix2, const int *brutS, const int *brutD, int buffratio, int precalCoef[16][16]);
6 
7 
8 #if defined(MMX) && !defined(ARCH_X86_64)
9 /* a definir pour avoir exactement le meme resultat que la fonction C
10  * (un chouillat plus lent)
11  */
12 #define STRICT_COMPAT
13 //#define HAVE_ATHLON
14 
15 #define BUFFPOINTNB 16
16 #define BUFFPOINTMASK 0xffff
17 #define BUFFINCR 0xff
18 
19 #define sqrtperte 16
20 // faire : a % sqrtperte <=> a & pertemask
21 #define PERTEMASK 0xf
22 // faire : a / sqrtperte <=> a >> PERTEDEC
23 #define PERTEDEC 4
24 
25 
26 //#define MMX_TRACE
27 #include "mmx.h"
28 #include "libavutil/cpu.h"
29 
31  return (av_get_cpu_flags() & AV_CPU_FLAG_SSE) >> 3;
32 }
33 
34 void zoom_filter_xmmx (int prevX, int prevY,
35  unsigned int *expix1, unsigned int *expix2,
36  int *lbruS, int *lbruD, int buffratio,
37  int precalCoef[16][16])
38 {
39  int bufsize = prevX * prevY; /* taille du buffer */
40  volatile int loop; /* variable de boucle */
41 
42  mmx_t *brutS = (mmx_t*)lbruS; /* buffer de transformation source */
43  mmx_t *brutD = (mmx_t*)lbruD; /* buffer de transformation dest */
44 
45  volatile mmx_t prevXY;
46  volatile mmx_t ratiox;
47  //volatile mmx_t interpix;
48 
50 
51  prevXY.ud[0] = (prevX-1)<<PERTEDEC;
52  prevXY.ud[1] = (prevY-1)<<PERTEDEC;
53 
54  ratiox.d[0] = buffratio;
55  ratiox.d[1] = buffratio;
56  movq_m2r (ratiox, mm6);
57  pslld_i2r (16,mm6);
58 
59  pxor_r2r (mm7,mm7); /* mise a zero de mm7 */
60 
61  loop=0;
62 
63  /*
64  * NOTE : mm6 et mm7 ne sont pas modifies dans la boucle.
65  */
66  while (loop < bufsize)
67  {
68  /*
69  * pre : mm6 = [buffratio<<16|buffratio<<16]
70  * post : mm0 = S + ((D-S)*buffratio)>>16 format [X|Y]
71  * modified = mm0,mm1,mm2
72  */
73 
74  __asm__ __volatile__ (
75  "movq %0,%%mm0\n"
76  "movq %1,%%mm1\n"
77  : :"m"(brutS[loop]),"m"(brutD[loop])
78  ); /* mm0 = S */
79 
80  psubd_r2r (mm0,mm1); /* mm1 = D - S */
81  movq_r2r (mm1, mm2); /* mm2 = D - S */
82 
83  pslld_i2r (16,mm1);
84  mmx_r2r (pmulhuw, mm6, mm1); /* mm1 = ?? */
85  pmullw_r2r (mm6, mm2);
86 
87  paddd_r2r (mm2, mm1); /* mm1 = (D - S) * buffratio >> 16 */
88  pslld_i2r (16,mm0);
89 
90  paddd_r2r (mm1, mm0); /* mm0 = S + mm1 */
91  psrld_i2r (16, mm0);
92 
93  /*
94  * pre : mm0 : position vector on screen
95  * prevXY : coordinate of the lower-right point on screen
96  * post : clipped mm0
97  * modified : mm0,mm1,mm2
98  */
99  movq_m2r (prevXY,mm1);
100  pcmpgtd_r2r (mm0, mm1); /* mm0 en X contient :
101  1111 si prevXY > px
102  0000 si prevXY <= px
103  (idem pour y) */
104 #ifdef STRICT_COMPAT
105  movq_r2r (mm1,mm2);
106  punpckhdq_r2r (mm2,mm2);
107  punpckldq_r2r (mm1,mm1);
108  pand_r2r (mm2, mm0);
109 #endif
110  pand_r2r (mm1, mm0); /* on met a zero la partie qui deborde */
111 
112  /*
113  * pre : mm0 : clipped position on screen
114  *
115  * post : mm3 & mm4 : coefs for this position
116  * mm1 : X vector [0|X]
117  *
118  * modif : eax,ecx
119  */
120  __asm__ __volatile__ (
121  "movd %%mm0,%%ecx\n"
122  "movq %%mm0,%%mm1\n"
123 
124  "andl $15,%%ecx\n"
125  "psrlq $32,%%mm1\n"
126 
127  "shll $6,%%ecx\n"
128  "movd %%mm1,%%eax\n"
129 
130  "addl %0,%%ecx\n"
131  "andl $15,%%eax\n"
132 
133  "movd (%%ecx,%%eax,4),%%mm3\n"
134  /* ::"X"(precalCoef):"eax","ecx"); */
135  ::"m"(precalCoef):"eax","ecx");
136 
137 
138  /*
139  * extraction des coefficients...
140  *
141  * pre : coef dans mm3
142  *
143  * post : coef extraits dans mm3 (c1 & c2)
144  * et mm4 (c3 & c4)
145  *
146  * modif : mm5
147  */
148 // entrelace avec portion d'apres (cf les '^')
149 // movq_r2r (mm3, mm5); /* ??-??-??-??-c4-c3-c2-c1 */
150 // punpcklbw_r2r (mm5, mm3); /* c4-c4-c3-c3-c2-c2-c1-c1 */
151 // movq_r2r (mm3, mm4); /* c4-c4-c3-c3-c2-c2-c1-c1 */
152 // movq_r2r (mm3, mm5); /* c4-c4-c3-c3-c2-c2-c1-c1 */
153 
154 // punpcklbw_r2r (mm5, mm3); /* c2-c2-c2-c2-c1-c1-c1-c1 */
155 // punpckhbw_r2r (mm5, mm4); /* c4-c4-c4-c4-c3-c3-c3-c3 */
156 
157  /*
158  * pre : mm0 : Y pos [*|Y]
159  * mm1 : X pos [*|X]
160  *
161  * post : mm0 : expix1[position]
162  * mm2 : expix1[position+largeur]
163  *
164  * modif : eax,ecx
165  */
166  psrld_i2r (PERTEDEC,mm0);
167  psrld_i2r (PERTEDEC,mm1);
168  __asm__ __volatile__ (
169  "movd %%mm1,%%eax\n"
170  /*^*/ "movq %%mm3,%%mm5\n" /*^*/
171 
172  "mull %1\n"
173  "movd %%mm0,%%ecx\n"
174  /*^*/ "punpcklbw %%mm5, %%mm3\n" /*^*/
175 
176  "addl %%ecx,%%eax\n"
177  /*^*/ "movq %%mm3,%%mm4\n" /*^*/
178  /*^*/ "movq %%mm3,%%mm5\n" /*^*/
179 
180  "movl %0,%%ecx\n"
181  /*^*/ "punpcklbw %%mm5,%%mm3\n" /*^*/
182 
183  "movq (%%ecx,%%eax,4),%%mm0\n"
184  /*^*/ "punpckhbw %%mm5,%%mm4\n" /*^*/
185 
186  "addl %1,%%eax\n"
187  "movq (%%ecx,%%eax,4),%%mm2\n"
188 
189  : : "X"(expix1), "X"(prevX):"eax","ecx"
190  );
191 
192  /*
193  * pre : mm0 : expix1[position]
194  * mm2 : expix1[position+largeur]
195  * mm3 & mm4 : coefs
196  */
197 
198  /* recopie des deux premiers pixels dans mm0 et mm1 */
199  movq_r2r (mm0, mm1); /* b1-v1-r1-a1-b2-v2-r2-a2 */
200 
201  /* depackage du premier pixel */
202  punpcklbw_r2r (mm7, mm0); /* 00-b2-00-v2-00-r2-00-a2 */
203 
204  /* extraction des coefficients... */
205 
206  movq_r2r (mm3, mm5); /* c2-c2-c2-c2-c1-c1-c1-c1 */
207 
208  /*^en parrallele^*/ /* depackage du 2ieme pixel */
209  /*^*/ punpckhbw_r2r (mm7, mm1); /* 00-b1-00-v1-00-r1-00-a1 */
210 
211  punpcklbw_r2r (mm7, mm5); /* 00-c1-00-c1-00-c1-00-c1 */
212  punpckhbw_r2r (mm7, mm3); /* 00-c2-00-c2-00-c2-00-c2 */
213 
214  /* multiplication des pixels par les coefficients */
215  pmullw_r2r (mm5, mm0); /* c1*b2-c1*v2-c1*r2-c1*a2 */
216  pmullw_r2r (mm3, mm1); /* c2*b1-c2*v1-c2*r1-c2*a1 */
217  paddw_r2r (mm1, mm0);
218 
219  /* ...extraction des 2 derniers coefficients */
220  movq_r2r (mm4, mm5); /* c4-c4-c4-c4-c3-c3-c3-c3 */
221  punpcklbw_r2r (mm7, mm4); /* 00-c3-00-c3-00-c3-00-c3 */
222  punpckhbw_r2r (mm7, mm5); /* 00-c4-00-c4-00-c4-00-c4 */
223 
224  /* recuperation des 2 derniers pixels */
225  movq_r2r (mm2, mm1);
226 
227  /* depackage des pixels */
228  punpcklbw_r2r (mm7, mm1);
229  punpckhbw_r2r (mm7, mm2);
230 
231  /* multiplication pas les coeffs */
232  pmullw_r2r (mm4, mm1);
233  pmullw_r2r (mm5, mm2);
234 
235  /* ajout des valeurs obtenues à la valeur finale */
236  paddw_r2r (mm1, mm0);
237  paddw_r2r (mm2, mm0);
238 
239  /* division par 256 = 16+16+16+16, puis repackage du pixel final */
240  psrlw_i2r (8, mm0);
241  packuswb_r2r (mm7, mm0);
242 
243  movd_r2m (mm0,expix2[loop]);
244 
245  ++loop;
246  }
247 #ifdef HAVE_ATHLON
248  __asm__ __volatile__ ("femms\n");
249 #else
250  emms();
251 #endif
252 }
253 #else
255  return 0;
256 }
257 void zoom_filter_xmmx (int prevX, int prevY,
258  const unsigned int *expix1, const unsigned int *expix2,
259  const int *brutS, const int *brutD, int buffratio,
260  int precalCoef[16][16])
261 {
262  (void) prevX; (void) prevY;
263  (void) expix1; (void) expix2;
264  (void) brutS; (void) brutD;
265  (void) buffratio; (void) precalCoef;
266 }
267 #endif
int buffratio
modif by jeko : fixedpoint : buffration = (16:16) (donc 0<=buffration<=2^16)
Definition: filters.c:125
int zoom_filter_xmmx_supported(void)
unsigned int prevY
Definition: filters.c:109
unsigned int prevX
Definition: filters.c:109
guint32 * expix2
Definition: filters.c:104
void zoom_filter_xmmx(int prevX, int prevY, const unsigned int *expix1, const unsigned int *expix2, const int *brutS, const int *brutD, int buffratio, int precalCoef[16][16])
int precalCoef[16][16]
modif d'optim by Jeko : precalcul des 4 coefs résultant des 2 pos
Definition: filters.c:144
signed int * brutD
Definition: filters.c:99
signed int * brutS
Definition: filters.c:98
#define PERTEDEC
guint32 * expix1
Definition: filters.c:103