Ticket #5890: soundtouch-update.patch
File soundtouch-update.patch, 11.3 KB (added by , 15 years ago) |
---|
-
mythtv/libs/libmythsoundtouch/STTypes.h
diff --git a/mythtv/libs/libmythsoundtouch/STTypes.h b/mythtv/libs/libmythsoundtouch/STTypes.h index acba355..58472de 100644
a b namespace soundtouch 116 116 #if _WIN32 || __MMX__ || MMX 117 117 // Allow MMX optimizations 118 118 #define ALLOW_MMX 1 119 #define ALLOW_SSE 1 119 120 #endif 120 121 #endif 121 122 -
mythtv/libs/libmythsoundtouch/libmythsoundtouch.pro
diff --git a/mythtv/libs/libmythsoundtouch/libmythsoundtouch.pro b/mythtv/libs/libmythsoundtouch/libmythsoundtouch.pro index 3c98681..01aabbf 100644
a b INCLUDEPATH += ../../libs/libavcodec ../.. 11 11 #build position independent code since the library is linked into a shared library 12 12 QMAKE_CXXFLAGS += -fPIC -DPIC 13 13 14 contains(ARCH_X86_64, yes) { DEFINES += ALLOW_SSE }15 16 14 QMAKE_CLEAN += $(TARGET) $(TARGETA) $(TARGETD) $(TARGET0) $(TARGET1) $(TARGET2) 17 15 18 16 # Input … … SOURCES += SoundTouch.cpp 30 28 SOURCES += TDStretch.cpp 31 29 SOURCES += cpu_detect_x86_gcc.cpp 32 30 SOURCES += mmx_gcc.cpp 33 34 contains(ARCH_X86_64, yes) { SOURCES += sse_gcc.cpp } 31 SOURCES += sse_gcc.cpp 35 32 36 33 include ( ../libs-targetfix.pro ) -
mythtv/libs/libmythsoundtouch/sse_gcc.cpp
diff --git a/mythtv/libs/libmythsoundtouch/sse_gcc.cpp b/mythtv/libs/libmythsoundtouch/sse_gcc.cpp index 872f441..732f4eb 100644
a b 1 1 // SSE2 version of the expensive routines for 16 bit integer samples 2 #include "../../config.h" 2 3 #include "STTypes.h" 3 4 #include "TDStretch.h" 4 5 using namespace std; 5 6 using namespace soundtouch; 6 7 8 #if defined(ARCH_X86_32) 9 static unsigned long long ones = 0x0001ffff0001ffffULL; 10 #elif defined(ARCH_X86_64) 11 static unsigned long ones = 0x0001ffff0001ffffUL; 12 #endif 13 7 14 #ifdef ALLOW_SSE 8 15 long TDStretchSSE::calcCrossCorrMulti(const short *mPos, const short *cPos) const 9 16 { 10 11 17 long corr = 0; 12 18 int i, out[4]; 13 19 int count = (overlapLength * channels) - channels; … … long TDStretchSSE::calcCrossCorrMulti(const short *mPos, const short *cPos) cons 18 24 cPos += channels; 19 25 20 26 asm( 21 "xorps %%xmm 8, %%xmm8\n\t"22 "movd %4, %%xmm 9\n\t"27 "xorps %%xmm5, %%xmm5 \n\t" 28 "movd %4, %%xmm7 \n\t" 23 29 "1: \n\t" 24 30 "movupd (%1), %%xmm0 \n\t" 31 "movupd (%2), %%xmm1 \n\t" 25 32 "movupd 16(%1), %%xmm2 \n\t" 33 "pmaddwd %%xmm0, %%xmm1 \n\t" 26 34 "movupd 32(%1), %%xmm4 \n\t" 27 35 "movupd 48(%1), %%xmm6 \n\t" 28 " movupd (%2), %%xmm1\n\t"36 "psrad %%xmm7, %%xmm1 \n\t" 29 37 "movupd 16(%2), %%xmm3 \n\t" 30 "movupd 32(%2), %%xmm5 \n\t" 31 "movupd 48(%2), %%xmm7 \n\t" 32 "pmaddwd %%xmm0, %%xmm1 \n\t" 38 "paddd %%xmm1, %%xmm5 \n\t" 39 "movupd 32(%2), %%xmm0 \n\t" 33 40 "pmaddwd %%xmm2, %%xmm3 \n\t" 34 " pmaddwd %%xmm4, %%xmm5\n\t"35 "pmaddwd %%xmm 6, %%xmm7\n\t"36 "psrad %%xmm 9, %%xmm1\n\t"37 "p srad %%xmm9, %%xmm3\n\t"38 "p addd %%xmm1, %%xmm8\n\t"39 "p srad %%xmm9, %%xmm5 \n\t"40 "p addd %%xmm3, %%xmm8\n\t"41 "p srad %%xmm9, %%xmm7\n\t"41 "movupd 48(%2), %%xmm1 \n\t" 42 "pmaddwd %%xmm4, %%xmm0 \n\t" 43 "psrad %%xmm7, %%xmm3 \n\t" 44 "pmaddwd %%xmm6, %%xmm1 \n\t" 45 "psrad %%xmm7, %%xmm0 \n\t" 46 "paddd %%xmm3, %%xmm5 \n\t" 47 "psrad %%xmm7, %%xmm1 \n\t" 48 "paddd %%xmm0, %%xmm5 \n\t" 42 49 "add $64, %1 \n\t" 43 "paddd %%xmm 5, %%xmm8\n\t"50 "paddd %%xmm1, %%xmm5 \n\t" 44 51 "add $64, %2 \n\t" 45 "paddd %%xmm7, %%xmm8 \n\t"46 52 "loop 1b \n\t" 47 "movdqa %%xmm 8, %0 \n\t"48 :"=m"(out )49 :"r"(mPos), "r"(cPos), "c"(loops), " r"(overlapDividerBits)53 "movdqa %%xmm5, %0 \n\t" 54 :"=m"(out[0]) 55 :"r"(mPos), "r"(cPos), "c"(loops), "m"(overlapDividerBits) 50 56 ); 51 57 52 58 corr = out[0] + out[1] + out[2] + out[3]; … … long TDStretchSSE::calcCrossCorrMulti(const short *mPos, const short *cPos) cons 58 64 corr += (mPos[i] * cPos[i]) >> overlapDividerBits; 59 65 60 66 return corr; 61 62 67 } 63 68 64 69 long TDStretchSSE::calcCrossCorrStereo(const short *mPos, const short *cPos) const 65 70 { 66 67 71 long corr = 0; 68 72 int i, out[4]; 69 73 int count = (overlapLength<<1) - 2; … … long TDStretchSSE::calcCrossCorrStereo(const short *mPos, const short *cPos) con 74 78 cPos += 2; 75 79 76 80 asm( 77 "xorps %%xmm 8, %%xmm8\n\t"78 "movd %4, %%xmm 9\n\t"81 "xorps %%xmm5, %%xmm5 \n\t" 82 "movd %4, %%xmm7 \n\t" 79 83 "1: \n\t" 80 84 "movupd (%1), %%xmm0 \n\t" 85 "movupd (%2), %%xmm1 \n\t" 81 86 "movupd 16(%1), %%xmm2 \n\t" 87 "pmaddwd %%xmm0, %%xmm1 \n\t" 82 88 "movupd 32(%1), %%xmm4 \n\t" 83 89 "movupd 48(%1), %%xmm6 \n\t" 84 " movupd (%2), %%xmm1\n\t"90 "psrad %%xmm7, %%xmm1 \n\t" 85 91 "movupd 16(%2), %%xmm3 \n\t" 86 "movupd 32(%2), %%xmm5 \n\t" 87 "movupd 48(%2), %%xmm7 \n\t" 88 "pmaddwd %%xmm0, %%xmm1 \n\t" 92 "paddd %%xmm1, %%xmm5 \n\t" 93 "movupd 32(%2), %%xmm0 \n\t" 89 94 "pmaddwd %%xmm2, %%xmm3 \n\t" 90 " pmaddwd %%xmm4, %%xmm5\n\t"91 "pmaddwd %%xmm 6, %%xmm7\n\t"92 "psrad %%xmm 9, %%xmm1\n\t"93 "p srad %%xmm9, %%xmm3\n\t"94 "p addd %%xmm1, %%xmm8\n\t"95 "p srad %%xmm9, %%xmm5 \n\t"96 "p addd %%xmm3, %%xmm8\n\t"97 "p srad %%xmm9, %%xmm7\n\t"95 "movupd 48(%2), %%xmm1 \n\t" 96 "pmaddwd %%xmm4, %%xmm0 \n\t" 97 "psrad %%xmm7, %%xmm3 \n\t" 98 "pmaddwd %%xmm6, %%xmm1 \n\t" 99 "psrad %%xmm7, %%xmm0 \n\t" 100 "paddd %%xmm3, %%xmm5 \n\t" 101 "psrad %%xmm7, %%xmm1 \n\t" 102 "paddd %%xmm0, %%xmm5 \n\t" 98 103 "add $64, %1 \n\t" 99 "paddd %%xmm 5, %%xmm8\n\t"104 "paddd %%xmm1, %%xmm5 \n\t" 100 105 "add $64, %2 \n\t" 101 "paddd %%xmm7, %%xmm8 \n\t"102 106 "loop 1b \n\t" 103 "movdqa %%xmm 8, %0 \n\t"104 :"=m"(out )105 :"r"(mPos), "r"(cPos), "c"(loops), " r"(overlapDividerBits)107 "movdqa %%xmm5, %0 \n\t" 108 :"=m"(out[0]) 109 :"r"(mPos), "r"(cPos), "c"(loops), "m"(overlapDividerBits) 106 110 ); 107 111 108 112 corr = out[0] + out[1] + out[2] + out[3]; … … long TDStretchSSE::calcCrossCorrStereo(const short *mPos, const short *cPos) con 115 119 mPos[i+1] * cPos[i+1]) >> overlapDividerBits; 116 120 117 121 return corr; 118 119 122 } 120 123 121 124 void TDStretchSSE::overlapMulti(short *output, const short *input) const 122 125 { 123 unsigned long ones = 0x0001ffff0001ffffUL;124 125 126 asm( 126 127 "movd %%ecx, %%xmm0 \n\t" 127 "punpckldq %%xmm0, %%xmm0 \n\t"128 128 "shl %6 \n\t" 129 129 "punpckldq %%xmm0, %%xmm0 \n\t" 130 "movd %1, %%xmm1 \n\t"131 130 "movq %2, %%xmm2 \n\t" 131 "punpckldq %%xmm0, %%xmm0 \n\t" 132 "movd %1, %%xmm1 \n\t" 132 133 "punpckldq %%xmm2, %%xmm2 \n\t" 133 134 "1: \n\t" 134 135 "movdqu (%3), %%xmm3 \n\t" 135 136 "movdqu (%4), %%xmm4 \n\t" 136 "movdq u%%xmm4, %%xmm5 \n\t"137 "movdqa %%xmm4, %%xmm5 \n\t" 137 138 "punpcklwd %%xmm3, %%xmm4 \n\t" 138 "punpckhwd %%xmm3, %%xmm5 \n\t"139 139 "add %6, %3 \n\t" 140 "punpckhwd %%xmm3, %%xmm5 \n\t" 140 141 "pmaddwd %%xmm0, %%xmm4 \n\t" 142 "add %6, %4 \n\t" 141 143 "pmaddwd %%xmm0, %%xmm5 \n\t" 142 144 "psrad %%xmm1, %%xmm4 \n\t" 143 145 "psrad %%xmm1, %%xmm5 \n\t" 144 "add %6, %4 \n\t"145 146 "packssdw %%xmm5, %%xmm4 \n\t" 147 "paddw %%xmm2, %%xmm0 \n\t" 146 148 "movdqu %%xmm4, (%5) \n\t" 147 149 "add %6, %5 \n\t" 148 "paddw %%xmm2, %%xmm0 \n\t"149 150 "loop 1b \n\t" 150 ::"c"(overlapLength)," r"(overlapDividerBits),151 " r"(ones),"r"(input),"r"(pMidBuffer),"r"(output),151 ::"c"(overlapLength),"m"(overlapDividerBits), 152 "m"(ones),"r"(input),"r"(pMidBuffer),"r"(output), 152 153 "r"((long)channels) 153 154 ); 154 155 } … … void TDStretchSSE::overlapMulti(short *output, const short *input) const 156 157 void TDStretchSSE::overlapStereo(short *output, const short *input) const 157 158 { 158 159 // 4 bytes per sample - use MMX 159 unsigned long ones = 0x0001ffff0001ffffUL;160 161 160 asm( 162 161 "movd %%ecx, %%mm0 \n\t" 163 "shr $1, %%ecx \n\t"164 162 "punpckldq %%mm0, %%mm0 \n\t" 165 " movq %1, %%mm1\n\t"163 "shr $1, %%ecx \n\t" 166 164 "movq %%mm0, %%mm6 \n\t" 167 165 "movq %2, %%mm2 \n\t" 168 166 "paddw %%mm2, %%mm6 \n\t" 167 "movd %1, %%mm1 \n\t" 169 168 "paddw %%mm2, %%mm2 \n\t" 170 169 "1: \n\t" 171 "movq (%3), %%mm3 \n\t"172 170 "movq (%4), %%mm4 \n\t" 171 "movq (%3), %%mm3 \n\t" 173 172 "movq %%mm4, %%mm5 \n\t" 174 173 "punpcklwd %%mm3, %%mm4 \n\t" 175 "punpckhwd %%mm3, %%mm5 \n\t"176 174 "add $8, %3 \n\t" 177 175 "pmaddwd %%mm0, %%mm4 \n\t" 178 "p maddwd %%mm6, %%mm5 \n\t"176 "punpckhwd %%mm3, %%mm5 \n\t" 179 177 "psrad %%mm1, %%mm4 \n\t" 180 "p srad %%mm1, %%mm5 \n\t"178 "pmaddwd %%mm6, %%mm5 \n\t" 181 179 "add $8, %4 \n\t" 180 "psrad %%mm1, %%mm5 \n\t" 181 "paddw %%mm2, %%mm0 \n\t" 182 182 "packssdw %%mm5, %%mm4 \n\t" 183 "paddw %%mm2, %%mm6 \n\t" 183 184 "movq %%mm4, (%5) \n\t" 184 185 "add $8, %5 \n\t" 185 "paddw %%mm2, %%mm0 \n\t"186 "paddw %%mm2, %%mm6 \n\t"187 186 "loop 1b \n\t" 188 187 "emms \n\t" 189 ::"c"(overlapLength)," r"((long)overlapDividerBits),190 " r"(ones),"r"(input),"r"(pMidBuffer),"r"(output)188 ::"c"(overlapLength),"m"(overlapDividerBits), 189 "m"(ones),"r"(input),"r"(pMidBuffer),"r"(output) 191 190 ); 192 193 191 } 194 192 #endif // ALLOW_SSE