MythTV master
zoom_filter_xmmx.cpp
Go to the documentation of this file.
1#include "zoom_filters.h"
2#include "libmythbase/mythconfig.h"
3
4#include <QtGlobal>
5#if QT_VERSION >= QT_VERSION_CHECK(6,5,0)
6#include <QtProcessorDetection>
7#endif
8#if HAVE_MMX && !defined(Q_PROCESSOR_X86_64)
9/* a definir pour avoir exactement le meme resultat que la fonction C
10 * (un chouillat plus lent)
11 */
12#define STRICT_COMPAT
13//#define HAVE_ATHLON
14
15#define BUFFPOINTNB 16
16#define BUFFPOINTMASK 0xffff
17#define BUFFINCR 0xff
18
19#define sqrtperte 16
20// faire : a % sqrtperte <=> a & pertemask
21#define PERTEMASK 0xf
22// faire : a / sqrtperte <=> a >> PERTEDEC
23#define PERTEDEC 4
24
25
26//#define MMX_TRACE
27#include "mmx.h"
28extern "C" {
29#include "libavutil/cpu.h"
30}
31
33 return (av_get_cpu_flags() & AV_CPU_FLAG_SSE) >> 3;
34}
35
36void zoom_filter_xmmx (int prevX, int prevY,
37 unsigned int *expix1, unsigned int *expix2,
38 const sintvec& lbruS,
39 const sintvec& lbruD,
40 int buffratio,
42{
43 int bufsize = prevX * prevY; /* taille du buffer */
44 volatile int loop; /* variable de boucle */
45
46 mmx_t *brutS = (mmx_t*)lbruS.data(); /* buffer de transformation source */
47 mmx_t *brutD = (mmx_t*)lbruD.data(); /* buffer de transformation dest */
48
49 volatile mmx_t prevXY;
50 volatile mmx_t ratiox;
51 //volatile mmx_t interpix;
52
54
55 prevXY.ud[0] = (prevX-1)<<PERTEDEC;
56 prevXY.ud[1] = (prevY-1)<<PERTEDEC;
57
58 ratiox.d[0] = buffratio;
59 ratiox.d[1] = buffratio;
60 movq_m2r (ratiox, mm6);
61 pslld_i2r (16,mm6);
62
63 pxor_r2r (mm7,mm7); /* mise a zero de mm7 */
64
65 loop=0;
66
67 /*
68 * NOTE : mm6 et mm7 ne sont pas modifies dans la boucle.
69 */
70 while (loop < bufsize)
71 {
72 /*
73 * pre : mm6 = [buffratio<<16|buffratio<<16]
74 * post : mm0 = S + ((D-S)*buffratio)>>16 format [X|Y]
75 * modified = mm0,mm1,mm2
76 */
77
78 __asm__ __volatile__ (
79 "movq %0,%%mm0\n"
80 "movq %1,%%mm1\n"
81 : :"m"(brutS[loop]),"m"(brutD[loop])
82 ); /* mm0 = S */
83
84 psubd_r2r (mm0,mm1); /* mm1 = D - S */
85 movq_r2r (mm1, mm2); /* mm2 = D - S */
86
87 pslld_i2r (16,mm1);
88 mmx_r2r (pmulhuw, mm6, mm1); /* mm1 = ?? */
89 pmullw_r2r (mm6, mm2);
90
91 paddd_r2r (mm2, mm1); /* mm1 = (D - S) * buffratio >> 16 */
92 pslld_i2r (16,mm0);
93
94 paddd_r2r (mm1, mm0); /* mm0 = S + mm1 */
95 psrld_i2r (16, mm0);
96
97 /*
98 * pre : mm0 : position vector on screen
99 * prevXY : coordinate of the lower-right point on screen
100 * post : clipped mm0
101 * modified : mm0,mm1,mm2
102 */
103 movq_m2r (prevXY,mm1);
104 pcmpgtd_r2r (mm0, mm1); /* mm0 en X contient :
105 1111 si prevXY > px
106 0000 si prevXY <= px
107 (idem pour y) */
108#ifdef STRICT_COMPAT
109 movq_r2r (mm1,mm2);
110 punpckhdq_r2r (mm2,mm2);
111 punpckldq_r2r (mm1,mm1);
112 pand_r2r (mm2, mm0);
113#endif
114 pand_r2r (mm1, mm0); /* on met a zero la partie qui deborde */
115
116 /*
117 * pre : mm0 : clipped position on screen
118 *
119 * post : mm3 & mm4 : coefs for this position
120 * mm1 : X vector [0|X]
121 *
122 * modif : eax,ecx
123 */
124 __asm__ __volatile__ (
125 "movd %%mm0,%%ecx\n"
126 "movq %%mm0,%%mm1\n"
127
128 "andl $15,%%ecx\n"
129 "psrlq $32,%%mm1\n"
130
131 "shll $6,%%ecx\n"
132 "movd %%mm1,%%eax\n"
133
134 "addl %0,%%ecx\n"
135 "andl $15,%%eax\n"
136
137 "movd (%%ecx,%%eax,4),%%mm3\n"
138 /* ::"X"(precalCoef):"eax","ecx"); */
139 ::"m"(precalCoef):"eax","ecx");
140
141
142 /*
143 * extraction des coefficients...
144 *
145 * pre : coef dans mm3
146 *
147 * post : coef extraits dans mm3 (c1 & c2)
148 * et mm4 (c3 & c4)
149 *
150 * modif : mm5
151 */
152// entrelace avec portion d'apres (cf les '^')
153// movq_r2r (mm3, mm5); /* ??-??-??-??-c4-c3-c2-c1 */
154// punpcklbw_r2r (mm5, mm3); /* c4-c4-c3-c3-c2-c2-c1-c1 */
155// movq_r2r (mm3, mm4); /* c4-c4-c3-c3-c2-c2-c1-c1 */
156// movq_r2r (mm3, mm5); /* c4-c4-c3-c3-c2-c2-c1-c1 */
157
158// punpcklbw_r2r (mm5, mm3); /* c2-c2-c2-c2-c1-c1-c1-c1 */
159// punpckhbw_r2r (mm5, mm4); /* c4-c4-c4-c4-c3-c3-c3-c3 */
160
161 /*
162 * pre : mm0 : Y pos [*|Y]
163 * mm1 : X pos [*|X]
164 *
165 * post : mm0 : expix1[position]
166 * mm2 : expix1[position+largeur]
167 *
168 * modif : eax,ecx
169 */
170 psrld_i2r (PERTEDEC,mm0);
171 psrld_i2r (PERTEDEC,mm1);
172 __asm__ __volatile__ (
173 "movd %%mm1,%%eax\n"
174 /*^*/ "movq %%mm3,%%mm5\n" /*^*/
175
176 "mull %1\n"
177 "movd %%mm0,%%ecx\n"
178 /*^*/ "punpcklbw %%mm5, %%mm3\n" /*^*/
179
180 "addl %%ecx,%%eax\n"
181 /*^*/ "movq %%mm3,%%mm4\n" /*^*/
182 /*^*/ "movq %%mm3,%%mm5\n" /*^*/
183
184 "movl %0,%%ecx\n"
185 /*^*/ "punpcklbw %%mm5,%%mm3\n" /*^*/
186
187 "movq (%%ecx,%%eax,4),%%mm0\n"
188 /*^*/ "punpckhbw %%mm5,%%mm4\n" /*^*/
189
190 "addl %1,%%eax\n"
191 "movq (%%ecx,%%eax,4),%%mm2\n"
192
193 : : "X"(expix1), "X"(prevX):"eax","ecx"
194 );
195
196 /*
197 * pre : mm0 : expix1[position]
198 * mm2 : expix1[position+largeur]
199 * mm3 & mm4 : coefs
200 */
201
202 /* recopie des deux premiers pixels dans mm0 et mm1 */
203 movq_r2r (mm0, mm1); /* b1-v1-r1-a1-b2-v2-r2-a2 */
204
205 /* depackage du premier pixel */
206 punpcklbw_r2r (mm7, mm0); /* 00-b2-00-v2-00-r2-00-a2 */
207
208 /* extraction des coefficients... */
209
210 movq_r2r (mm3, mm5); /* c2-c2-c2-c2-c1-c1-c1-c1 */
211
212 /*^en parrallele^*/ /* depackage du 2ieme pixel */
213 /*^*/ punpckhbw_r2r (mm7, mm1); /* 00-b1-00-v1-00-r1-00-a1 */
214
215 punpcklbw_r2r (mm7, mm5); /* 00-c1-00-c1-00-c1-00-c1 */
216 punpckhbw_r2r (mm7, mm3); /* 00-c2-00-c2-00-c2-00-c2 */
217
218 /* multiplication des pixels par les coefficients */
219 pmullw_r2r (mm5, mm0); /* c1*b2-c1*v2-c1*r2-c1*a2 */
220 pmullw_r2r (mm3, mm1); /* c2*b1-c2*v1-c2*r1-c2*a1 */
221 paddw_r2r (mm1, mm0);
222
223 /* ...extraction des 2 derniers coefficients */
224 movq_r2r (mm4, mm5); /* c4-c4-c4-c4-c3-c3-c3-c3 */
225 punpcklbw_r2r (mm7, mm4); /* 00-c3-00-c3-00-c3-00-c3 */
226 punpckhbw_r2r (mm7, mm5); /* 00-c4-00-c4-00-c4-00-c4 */
227
228 /* recuperation des 2 derniers pixels */
229 movq_r2r (mm2, mm1);
230
231 /* depackage des pixels */
232 punpcklbw_r2r (mm7, mm1);
233 punpckhbw_r2r (mm7, mm2);
234
235 /* multiplication pas les coeffs */
236 pmullw_r2r (mm4, mm1);
237 pmullw_r2r (mm5, mm2);
238
239 /* ajout des valeurs obtenues à la valeur finale */
240 paddw_r2r (mm1, mm0);
241 paddw_r2r (mm2, mm0);
242
243 /* division par 256 = 16+16+16+16, puis repackage du pixel final */
244 psrlw_i2r (8, mm0);
245 packuswb_r2r (mm7, mm0);
246
247 movd_r2m (mm0,expix2[loop]);
248
249 ++loop;
250 }
251#ifdef HAVE_ATHLON
252 __asm__ __volatile__ ("femms\n");
253#else
254 emms();
255#endif
256}
257#else
259 return 0;
260}
261void zoom_filter_xmmx ([[maybe_unused]] int prevX,
262 [[maybe_unused]] int prevY,
263 [[maybe_unused]] unsigned int *expix1,
264 [[maybe_unused]] unsigned int *expix2,
265 [[maybe_unused]] const sintvec& brutS,
266 [[maybe_unused]] const sintvec& brutD,
267 [[maybe_unused]] int buffratio,
268 [[maybe_unused]] GoomCoefficients& precalCoef)
269{
270}
271#endif
sintvec brutS
Definition: filters.cpp:82
uint32_t * expix2
Definition: filters.cpp:88
unsigned int prevY
Definition: filters.cpp:93
unsigned int prevX
Definition: filters.cpp:93
GoomCoefficients precalCoef
modif d'optim by Jeko : precalcul des 4 coefs résultant des 2 pos
Definition: filters.cpp:128
int buffratio
modif by jeko : fixedpoint : buffration = (16:16) (donc 0<=buffration<=2^16)
Definition: filters.cpp:109
static constexpr uint8_t PERTEDEC
Definition: filters.cpp:119
uint32_t * expix1
Definition: filters.cpp:87
sintvec brutD
Definition: filters.cpp:83
#define punpckhbw_r2r(regs, regd)
Definition: mmx.h:203
#define emms()
Definition: mmx.h:82
#define psrld_i2r(imm, reg)
Definition: mmx.h:175
#define movq_m2r(var, reg)
Definition: mmx.h:94
#define psubd_r2r(regs, regd)
Definition: mmx.h:188
#define punpckhdq_r2r(regs, regd)
Definition: mmx.h:205
#define packuswb_r2r(regs, regd)
Definition: mmx.h:106
#define psrlw_i2r(imm, reg)
Definition: mmx.h:181
#define punpckldq_r2r(regs, regd)
Definition: mmx.h:212
#define punpcklbw_r2r(regs, regd)
Definition: mmx.h:210
#define mmx_r2r(op, regs, regd)
Definition: mmx.h:78
#define pslld_i2r(imm, reg)
Definition: mmx.h:158
#define pmullw_r2r(regs, regd)
Definition: mmx.h:153
#define movd_r2m(reg, var)
Definition: mmx.h:85
#define pcmpgtd_r2r(regs, regd)
Definition: mmx.h:141
#define movq_r2r(regs, regd)
Definition: mmx.h:97
#define paddd_r2r(regs, regd)
Definition: mmx.h:111
#define pxor_r2r(regs, regd)
Definition: mmx.h:217
#define paddw_r2r(regs, regd)
Definition: mmx.h:113
#define pand_r2r(regs, regd)
Definition: mmx.h:126
Definition: mmx.h:35
unsigned int ud[2]
Definition: mmx.h:39
int d[2]
Definition: mmx.h:38
void zoom_filter_xmmx(int prevX, int prevY, unsigned int *expix1, unsigned int *expix2, const sintvec &brutS, const sintvec &brutD, int buffratio, GoomCoefficients &precalCoef)
int zoom_filter_xmmx_supported()
std::vector< signed int > sintvec
Definition: zoom_filters.h:4
std::array< std::array< int, 16 >, 16 > GoomCoefficients
Definition: zoom_filters.h:5