diff --git a/mythtv/libs/libmythsoundtouch/sse_gcc.cpp b/mythtv/libs/libmythsoundtouch/sse_gcc.cpp
index c26d9a3..5ef4fd6 100644
a
|
b
|
long TDStretchSSE2::calcCrossCorrMulti(const short *mPos, const short *cPos) con |
17 | 17 | int count = (overlapLength * channels) - channels; |
18 | 18 | long loops = count >> 5; |
19 | 19 | long remainder = count - (loops<<5); |
| 20 | const short *mp = mPos; |
| 21 | const short *cp = cPos; |
20 | 22 | |
21 | | mPos += channels; |
22 | | cPos += channels; |
| 23 | mp += channels; |
| 24 | cp += channels; |
23 | 25 | |
24 | | asm( |
| 26 | asm volatile ( |
25 | 27 | "xorps %%xmm5, %%xmm5 \n\t" |
26 | 28 | "movd %4, %%xmm7 \n\t" |
27 | 29 | "1: \n\t" |
… |
… |
long TDStretchSSE2::calcCrossCorrMulti(const short *mPos, const short *cPos) con |
50 | 52 | "sub $1, %%ecx \n\t" |
51 | 53 | "jnz 1b \n\t" |
52 | 54 | "movdqa %%xmm5, %0 \n\t" |
53 | | :"=m"(out[0]) |
54 | | :"r"(mPos), "r"(cPos), "c"(loops), "m"(overlapDividerBits) |
| 55 | :"=m"(out[0]),"+r"(mp), "+r"(cp) |
| 56 | :"c"(loops), "m"(overlapDividerBits) |
55 | 57 | ); |
56 | 58 | |
57 | 59 | corr = out[0] + out[1] + out[2] + out[3]; |
58 | 60 | |
59 | | mPos += loops<<5; |
60 | | cPos += loops<<5; |
61 | | |
62 | 61 | for (i = 0; i < remainder; i++) |
63 | 62 | corr += (mPos[i] * cPos[i]) >> overlapDividerBits; |
64 | 63 | |
… |
… |
long TDStretchSSE2::calcCrossCorrStereo(const short *mPos, const short *cPos) co |
72 | 71 | int count = (overlapLength<<1) - 2; |
73 | 72 | long loops = count >> 5; |
74 | 73 | long remainder = count - (loops<<5); |
| 74 | const short *mp = mPos; |
| 75 | const short *cp = cPos; |
75 | 76 | |
76 | | mPos += 2; |
77 | | cPos += 2; |
| 77 | mp += 2; |
| 78 | cp += 2; |
78 | 79 | |
79 | | asm( |
| 80 | asm volatile ( |
80 | 81 | "xorps %%xmm5, %%xmm5 \n\t" |
81 | 82 | "movd %4, %%xmm7 \n\t" |
82 | 83 | "1: \n\t" |
… |
… |
long TDStretchSSE2::calcCrossCorrStereo(const short *mPos, const short *cPos) co |
105 | 106 | "sub $1, %%ecx \n\t" |
106 | 107 | "jnz 1b \n\t" |
107 | 108 | "movdqa %%xmm5, %0 \n\t" |
108 | | :"=m"(out[0]) |
109 | | :"r"(mPos), "r"(cPos), "c"(loops), "m"(overlapDividerBits) |
| 109 | :"=m"(out[0]),"+r"(mp),"+r"(cp) |
| 110 | :"c"(loops), "m"(overlapDividerBits) |
110 | 111 | ); |
111 | 112 | |
112 | 113 | corr = out[0] + out[1] + out[2] + out[3]; |
113 | 114 | |
114 | | mPos += loops<<5; |
115 | | cPos += loops<<5; |
116 | | |
117 | 115 | for (i = 0; i < remainder; i += 2) |
118 | 116 | corr += (mPos[i] * cPos[i] + |
119 | 117 | mPos[i+1] * cPos[i+1]) >> overlapDividerBits; |
… |
… |
long TDStretchSSE2::calcCrossCorrStereo(const short *mPos, const short *cPos) co |
121 | 119 | return corr; |
122 | 120 | } |
123 | 121 | |
124 | | __attribute__((noinline)) |
125 | 122 | void TDStretchSSE2::overlapMulti(short *output, const short *input) const |
126 | 123 | { |
127 | | asm( |
| 124 | |
| 125 | short *o = output; |
| 126 | const short *i = input; |
| 127 | const short *m = pMidBuffer; |
| 128 | long ch = (long)channels; |
| 129 | |
| 130 | asm volatile ( |
128 | 131 | "movd %%ecx, %%xmm0 \n\t" |
129 | 132 | "shl %6 \n\t" |
130 | 133 | "punpckldq %%xmm0, %%xmm0 \n\t" |
… |
… |
void TDStretchSSE2::overlapMulti(short *output, const short *input) const |
160 | 163 | "add %6, %5 \n\t" |
161 | 164 | "sub $1, %%ecx \n\t" |
162 | 165 | "jnz 1b \n\t" |
163 | | ::"c"(overlapLength),"m"(sadd),"m"(ones),"r"(input),"r"(pMidBuffer), |
164 | | "r"(output),"r"((long)channels) |
| 166 | ::"c"(overlapLength),"m"(sadd),"m"(ones),"r"(i),"r"(m),"r"(o),"r"(ch) |
| 167 | :"memory" |
165 | 168 | ); |
166 | 169 | } |
167 | 170 | |
168 | | __attribute__((noinline)) |
169 | 171 | void TDStretchSSE2::overlapStereo(short *output, const short *input) const |
170 | 172 | { |
171 | | asm( |
| 173 | short *o = output; |
| 174 | const short *i = input; |
| 175 | const short *m = pMidBuffer; |
| 176 | |
| 177 | asm volatile ( |
172 | 178 | "movd %%ecx, %%mm0 \n\t" |
173 | 179 | "pxor %%mm7, %%mm7 \n\t" |
174 | 180 | "punpckldq %%mm0, %%mm0 \n\t" |
… |
… |
void TDStretchSSE2::overlapStereo(short *output, const short *input) const |
206 | 212 | "sub $1, %%ecx \n\t" |
207 | 213 | "jnz 1b \n\t" |
208 | 214 | "emms \n\t" |
209 | | ::"c"(overlapLength),"m"(sadd),"m"(ones),"r"(input),"r"(pMidBuffer), |
210 | | "r"(output) |
| 215 | ::"c"(overlapLength),"m"(sadd),"m"(ones),"r"(i),"r"(m),"r"(o) |
| 216 | :"memory" |
211 | 217 | ); |
212 | 218 | } |
213 | 219 | #endif // ALLOW_SSE2 |