8 using namespace soundtouch;
14 int loops = count >> 4;
16 const float *mp = mPos;
17 const float *cp = cPos;
20 "xorpd %%xmm7, %%xmm7 \n\t"
22 "movups (%1), %%xmm0 \n\t"
23 "movups 16(%1), %%xmm1 \n\t"
24 "mulps (%2), %%xmm0 \n\t"
25 "movups 32(%1), %%xmm2 \n\t"
26 "addps %%xmm0, %%xmm7 \n\t"
27 "mulps 16(%2), %%xmm1 \n\t"
28 "movups 48(%1), %%xmm3 \n\t"
29 "mulps 32(%2), %%xmm2 \n\t"
30 "addps %%xmm1, %%xmm7 \n\t"
31 "mulps 48(%2), %%xmm3 \n\t"
32 "addps %%xmm2, %%xmm7 \n\t"
35 "addps %%xmm3, %%xmm7 \n\t"
38 "haddps %%xmm7, %%xmm7 \n\t"
39 "cvtps2pd %%xmm7, %%xmm7 \n\t"
40 "haddpd %%xmm7, %%xmm7 \n\t"
41 "movsd %%xmm7, %0 \n\t"
42 :
"=m"(corr),
"+r"(mp),
"+r"(cp)
46 for (; i < count; i++)
47 corr += *mp++ * *cp++;
56 int loops = count >> 4;
58 const float *mp = mPos;
59 const float *cp = cPos;
62 "xorpd %%xmm7, %%xmm7 \n\t"
64 "movups (%1), %%xmm0 \n\t"
65 "movups 16(%1), %%xmm1 \n\t"
66 "mulps (%2), %%xmm0 \n\t"
67 "movups 32(%1), %%xmm2 \n\t"
68 "addps %%xmm0, %%xmm7 \n\t"
69 "mulps 16(%2), %%xmm1 \n\t"
70 "movups 48(%1), %%xmm3 \n\t"
71 "mulps 32(%2), %%xmm2 \n\t"
72 "addps %%xmm1, %%xmm7 \n\t"
73 "mulps 48(%2), %%xmm3 \n\t"
74 "addps %%xmm2, %%xmm7 \n\t"
77 "addps %%xmm3, %%xmm7 \n\t"
80 "movaps %%xmm7, %%xmm6 \n\t"
81 "shufps $0x4e, %%xmm7, %%xmm6 \n\t"
82 "addps %%xmm6, %%xmm7 \n\t"
83 "cvtps2pd %%xmm7, %%xmm7 \n\t"
84 "movapd %%xmm7, %%xmm6 \n\t"
85 "shufpd $0x01, %%xmm7, %%xmm6 \n\t"
86 "addpd %%xmm6, %%xmm7 \n\t"
87 "movsd %%xmm7, %0 \n\t"
88 :
"=m"(corr),
"+r"(mp),
"+r"(cp)
92 for (; i < count; i++)
93 corr += *mp++ * *cp++;
102 int loops = count >> 4;
104 const float *mp = mPos;
105 const float *cp = cPos;
108 "xorpd %%xmm7, %%xmm7 \n\t"
110 "movups (%1), %%xmm0 \n\t"
111 "movups 16(%1), %%xmm1 \n\t"
112 "mulps (%2), %%xmm0 \n\t"
113 "movups 32(%1), %%xmm2 \n\t"
114 "addps %%xmm0, %%xmm7 \n\t"
115 "mulps 16(%2), %%xmm1 \n\t"
116 "movups 48(%1), %%xmm3 \n\t"
117 "mulps 32(%2), %%xmm2 \n\t"
118 "addps %%xmm1, %%xmm7 \n\t"
119 "mulps 48(%2), %%xmm3 \n\t"
120 "addps %%xmm2, %%xmm7 \n\t"
123 "addps %%xmm3, %%xmm7 \n\t"
126 "haddps %%xmm7, %%xmm7 \n\t"
127 "cvtps2pd %%xmm7, %%xmm7 \n\t"
128 "haddpd %%xmm7, %%xmm7 \n\t"
129 "movsd %%xmm7, %0 \n\t"
130 :
"=m"(corr),
"+r"(mp),
"+r"(cp)
134 for (; i < count; i += 2)
135 corr += (mp[i] * cp[i] + mp[i + 1] * cp[i + 1]);
144 int loops = count >> 4;
146 const float *mp = mPos;
147 const float *cp = cPos;
150 "xorpd %%xmm7, %%xmm7 \n\t"
152 "movups (%1), %%xmm0 \n\t"
153 "movups 16(%1), %%xmm1 \n\t"
154 "mulps (%2), %%xmm0 \n\t"
155 "movups 32(%1), %%xmm2 \n\t"
156 "addps %%xmm0, %%xmm7 \n\t"
157 "mulps 16(%2), %%xmm1 \n\t"
158 "movups 48(%1), %%xmm3 \n\t"
159 "mulps 32(%2), %%xmm2 \n\t"
160 "addps %%xmm1, %%xmm7 \n\t"
161 "mulps 48(%2), %%xmm3 \n\t"
162 "addps %%xmm2, %%xmm7 \n\t"
165 "addps %%xmm3, %%xmm7 \n\t"
168 "movaps %%xmm7, %%xmm6 \n\t"
169 "shufps $0x4e, %%xmm7, %%xmm6 \n\t"
170 "addps %%xmm6, %%xmm7 \n\t"
171 "cvtps2pd %%xmm7, %%xmm7 \n\t"
172 "movapd %%xmm7, %%xmm6 \n\t"
173 "shufpd $0x01, %%xmm7, %%xmm6 \n\t"
174 "addpd %%xmm6, %%xmm7 \n\t"
175 "movsd %%xmm7, %0 \n\t"
176 :
"=m"(corr),
"+r"(mp),
"+r"(cp)
180 for (; i < count; i += 2)
181 corr += (mp[i] * cp[i] + mp[i + 1] * cp[i + 1]);
190 const float *i = input;
195 "cvtsi2ss %%ecx, %%xmm7 \n\t"
197 "punpckldq %%xmm7, %%xmm7 \n\t"
198 "xorpd %%xmm6, %%xmm6 \n\t"
199 "punpckldq %%xmm7, %%xmm7 \n\t"
200 "rcpps %%xmm7, %%xmm1 \n\t"
201 "mulps %%xmm1, %%xmm7 \n\t"
203 "movups (%1), %%xmm2 \n\t"
204 "movups 16(%1), %%xmm4 \n\t"
205 "mulps %%xmm6, %%xmm2 \n\t"
206 "movups (%2), %%xmm3 \n\t"
207 "movups 16(%2), %%xmm5 \n\t"
208 "mulps %%xmm7, %%xmm3 \n\t"
210 "mulps %%xmm6, %%xmm4 \n\t"
211 "addps %%xmm2, %%xmm3 \n\t"
212 "mulps %%xmm7, %%xmm5 \n\t"
213 "movups %%xmm3, (%3) \n\t"
214 "addps %%xmm4, %%xmm5 \n\t"
216 "movups %%xmm5, 16(%3) \n\t"
217 "addps %%xmm1, %%xmm6 \n\t"
219 "subps %%xmm1, %%xmm7 \n\t"
227 "cvtsi2ss %%ecx, %%xmm7 \n\t"
230 "punpckldq %%xmm7, %%xmm7 \n\t"
231 "xorpd %%xmm6, %%xmm6 \n\t"
232 "punpckldq %%xmm7, %%xmm7 \n\t"
233 "rcpps %%xmm7, %%xmm1 \n\t"
234 "mulps %%xmm1, %%xmm7 \n\t"
236 "movups (%1), %%xmm2 \n\t"
237 "movups 16(%1), %%xmm4 \n\t"
238 "mulps %%xmm6, %%xmm2 \n\t"
239 "movups (%2), %%xmm3 \n\t"
240 "movups 16(%2), %%xmm5 \n\t"
241 "mulps %%xmm7, %%xmm3 \n\t"
242 "addps %%xmm1, %%xmm6 \n\t"
244 "addps %%xmm2, %%xmm3 \n\t"
246 "subps %%xmm1, %%xmm7 \n\t"
247 "movups %%xmm3, (%3) \n\t"
249 "mulps %%xmm6, %%xmm4 \n\t"
251 "mulps %%xmm7, %%xmm5 \n\t"
252 "addps %%xmm1, %%xmm6 \n\t"
254 "addps %%xmm4, %%xmm5 \n\t"
255 "subps %%xmm1, %%xmm7 \n\t"
256 "movups %%xmm5, (%3) \n\t"
268 const float *i = input;
272 "cvtsi2ss %%ecx, %%xmm7 \n\t"
274 "xorpd %%xmm6, %%xmm6 \n\t"
275 "punpckldq %%xmm7, %%xmm7 \n\t"
276 "rcpps %%xmm7, %%xmm1 \n\t"
277 "mulps %%xmm1, %%xmm7 \n\t"
279 "movups (%1), %%xmm2 \n\t"
280 "movups 8(%1), %%xmm4 \n\t"
281 "mulps %%xmm6, %%xmm2 \n\t"
282 "movups (%2), %%xmm3 \n\t"
283 "movups 8(%2), %%xmm5 \n\t"
284 "mulps %%xmm7, %%xmm3 \n\t"
285 "addps %%xmm1, %%xmm6 \n\t"
286 "addps %%xmm2, %%xmm3 \n\t"
287 "subps %%xmm1, %%xmm7 \n\t"
288 "movlps %%xmm3, (%3) \n\t"
290 "mulps %%xmm6, %%xmm4 \n\t"
292 "mulps %%xmm7, %%xmm5 \n\t"
293 "addps %%xmm1, %%xmm6 \n\t"
295 "addps %%xmm4, %%xmm5 \n\t"
296 "subps %%xmm1, %%xmm7 \n\t"
297 "movlps %%xmm5, (%3) \n\t"
332 for (i = 0; i < newLen; i++)
343 for (
int i = 0; i < count; i += 2)
346 "xorpd %%xmm6, %%xmm6 \n\t"
347 "xorpd %%xmm7, %%xmm7 \n\t"
349 "movups (%1), %%xmm1 \n\t"
350 "movups 8(%1), %%xmm2 \n\t"
351 "mulps (%2), %%xmm1 \n\t"
352 "movups 16(%1), %%xmm3 \n\t"
353 "mulps (%2), %%xmm2 \n\t"
354 "addps %%xmm1, %%xmm6 \n\t"
355 "movups 24(%1), %%xmm4 \n\t"
356 "addps %%xmm2, %%xmm7 \n\t"
357 "mulps 16(%2), %%xmm3 \n\t"
358 "movups 32(%1), %%xmm1 \n\t"
359 "mulps 16(%2), %%xmm4 \n\t"
360 "addps %%xmm3, %%xmm6 \n\t"
361 "movups 40(%1), %%xmm2 \n\t"
362 "addps %%xmm4, %%xmm7 \n\t"
363 "mulps 32(%2), %%xmm1 \n\t"
364 "movups 48(%1), %%xmm3 \n\t"
365 "mulps 32(%2), %%xmm2 \n\t"
366 "addps %%xmm1, %%xmm6 \n\t"
367 "movups 56(%1), %%xmm4 \n\t"
368 "addps %%xmm2, %%xmm7 \n\t"
369 "mulps 48(%2), %%xmm3 \n\t"
371 "mulps 48(%2), %%xmm4 \n\t"
372 "addps %%xmm3, %%xmm6 \n\t"
374 "addps %%xmm4, %%xmm7 \n\t"
377 "movhlps %%xmm6, %%xmm0 \n\t"
378 "movlhps %%xmm7, %%xmm0 \n\t"
379 "shufps $0xe4, %%xmm7, %%xmm6 \n\t"
380 "addps %%xmm0, %%xmm6 \n\t"
381 "movups %%xmm6, (%0) \n\t"