Ticket #5890: soundtouch-update-1.patch
File soundtouch-update-1.patch, 13.1 KB (added by , 15 years ago) |
---|
-
mythtv/libs/libmythsoundtouch/STTypes.h
diff --git a/mythtv/libs/libmythsoundtouch/STTypes.h b/mythtv/libs/libmythsoundtouch/STTypes.h index acba355..eee6030 100644
a b namespace soundtouch 116 116 #if _WIN32 || __MMX__ || MMX 117 117 // Allow MMX optimizations 118 118 #define ALLOW_MMX 1 119 #define ALLOW_SSE2 1 119 120 #endif 120 121 #endif 121 122 … … namespace soundtouch 133 134 #endif 134 135 135 136 #if WIN32 || __MMX__ || MMX 136 #define ALLOW_SSE 137 #define ALLOW_SSE_FLOAT 1 137 138 #endif 138 139 #endif 139 140 -
mythtv/libs/libmythsoundtouch/TDStretch.cpp
diff --git a/mythtv/libs/libmythsoundtouch/TDStretch.cpp b/mythtv/libs/libmythsoundtouch/TDStretch.cpp index fda9c31..a96c14e 100644
a b TDStretch * TDStretch::newInstance() 804 804 805 805 // Check if MMX/SSE/3DNow! instruction set extensions supported by CPU 806 806 807 #ifdef ALLOW_SSE 808 if (uExtensions & MM_SSE )807 #ifdef ALLOW_SSE2 808 if (uExtensions & MM_SSE2) 809 809 { 810 810 // SSE support 811 return ::new TDStretchSSE ;811 return ::new TDStretchSSE2; 812 812 } 813 813 else 814 #endif // ALLOW_SSE 814 #endif // ALLOW_SSE2 815 815 816 816 817 817 #ifdef ALLOW_MMX -
mythtv/libs/libmythsoundtouch/TDStretch.h
diff --git a/mythtv/libs/libmythsoundtouch/TDStretch.h b/mythtv/libs/libmythsoundtouch/TDStretch.h index 48a0cc8..a365790 100644
a b public: 268 268 #endif /// ALLOW_3DNOW 269 269 270 270 271 #ifdef ALLOW_SSE 271 #ifdef ALLOW_SSE2 272 272 /// Class that implements SSE optimized routines for 16bit integer samples type. 273 class TDStretchSSE : public TDStretch273 class TDStretchSSE2 : public TDStretch 274 274 { 275 275 protected: 276 276 #ifdef MULTICHANNEL … … public: 281 281 virtual void overlapStereo(short *output, const short *input) const; 282 282 }; 283 283 284 #endif /// ALLOW_SSE 284 #endif /// ALLOW_SSE2 285 285 286 286 } 287 287 #endif /// TDStretch_H -
mythtv/libs/libmythsoundtouch/libmythsoundtouch.pro
diff --git a/mythtv/libs/libmythsoundtouch/libmythsoundtouch.pro b/mythtv/libs/libmythsoundtouch/libmythsoundtouch.pro index 3c98681..01aabbf 100644
a b INCLUDEPATH += ../../libs/libavcodec ../.. 11 11 #build position independent code since the library is linked into a shared library 12 12 QMAKE_CXXFLAGS += -fPIC -DPIC 13 13 14 contains(ARCH_X86_64, yes) { DEFINES += ALLOW_SSE }15 16 14 QMAKE_CLEAN += $(TARGET) $(TARGETA) $(TARGETD) $(TARGET0) $(TARGET1) $(TARGET2) 17 15 18 16 # Input … … SOURCES += SoundTouch.cpp 30 28 SOURCES += TDStretch.cpp 31 29 SOURCES += cpu_detect_x86_gcc.cpp 32 30 SOURCES += mmx_gcc.cpp 33 34 contains(ARCH_X86_64, yes) { SOURCES += sse_gcc.cpp } 31 SOURCES += sse_gcc.cpp 35 32 36 33 include ( ../libs-targetfix.pro ) -
mythtv/libs/libmythsoundtouch/sse_gcc.cpp
diff --git a/mythtv/libs/libmythsoundtouch/sse_gcc.cpp b/mythtv/libs/libmythsoundtouch/sse_gcc.cpp index 872f441..129d1ca 100644
a b 1 1 // SSE2 version of the expensive routines for 16 bit integer samples 2 #include "../../config.h" 2 3 #include "STTypes.h" 3 4 #include "TDStretch.h" 4 5 using namespace std; 5 6 using namespace soundtouch; 6 7 7 #ifdef ALLOW_SSE 8 long TDStretchSSE::calcCrossCorrMulti(const short *mPos, const short *cPos) const 8 #if defined(ARCH_X86_32) 9 static unsigned long long ones = 0x0001ffff0001ffffULL; 10 #elif defined(ARCH_X86_64) 11 static unsigned long ones = 0x0001ffff0001ffffUL; 12 #endif 13 14 #ifdef ALLOW_SSE2 15 long TDStretchSSE2::calcCrossCorrMulti(const short *mPos, const short *cPos) const 9 16 { 10 11 17 long corr = 0; 12 18 int i, out[4]; 13 19 int count = (overlapLength * channels) - channels; … … long TDStretchSSE::calcCrossCorrMulti(const short *mPos, const short *cPos) cons 18 24 cPos += channels; 19 25 20 26 asm( 21 "xorps %%xmm 8, %%xmm8\n\t"22 "movd %4, %%xmm 9\n\t"27 "xorps %%xmm5, %%xmm5 \n\t" 28 "movd %4, %%xmm7 \n\t" 23 29 "1: \n\t" 24 30 "movupd (%1), %%xmm0 \n\t" 31 "movupd (%2), %%xmm1 \n\t" 25 32 "movupd 16(%1), %%xmm2 \n\t" 33 "pmaddwd %%xmm0, %%xmm1 \n\t" 26 34 "movupd 32(%1), %%xmm4 \n\t" 27 35 "movupd 48(%1), %%xmm6 \n\t" 28 " movupd (%2), %%xmm1\n\t"36 "psrad %%xmm7, %%xmm1 \n\t" 29 37 "movupd 16(%2), %%xmm3 \n\t" 30 "movupd 32(%2), %%xmm5 \n\t" 31 "movupd 48(%2), %%xmm7 \n\t" 32 "pmaddwd %%xmm0, %%xmm1 \n\t" 38 "paddd %%xmm1, %%xmm5 \n\t" 39 "movupd 32(%2), %%xmm0 \n\t" 33 40 "pmaddwd %%xmm2, %%xmm3 \n\t" 34 " pmaddwd %%xmm4, %%xmm5\n\t"35 "pmaddwd %%xmm 6, %%xmm7\n\t"36 "psrad %%xmm 9, %%xmm1\n\t"37 "p srad %%xmm9, %%xmm3\n\t"38 "p addd %%xmm1, %%xmm8\n\t"39 "p srad %%xmm9, %%xmm5 \n\t"40 "p addd %%xmm3, %%xmm8\n\t"41 "p srad %%xmm9, %%xmm7\n\t"41 "movupd 48(%2), %%xmm1 \n\t" 42 "pmaddwd %%xmm4, %%xmm0 \n\t" 43 "psrad %%xmm7, %%xmm3 \n\t" 44 "pmaddwd %%xmm6, %%xmm1 \n\t" 45 "psrad %%xmm7, %%xmm0 \n\t" 46 "paddd %%xmm3, %%xmm5 \n\t" 47 "psrad %%xmm7, %%xmm1 \n\t" 48 "paddd %%xmm0, %%xmm5 \n\t" 42 49 "add $64, %1 \n\t" 43 "paddd %%xmm 5, %%xmm8\n\t"50 "paddd %%xmm1, %%xmm5 \n\t" 44 51 "add $64, %2 \n\t" 45 "paddd %%xmm7, %%xmm8 \n\t"46 52 "loop 1b \n\t" 47 "movdqa %%xmm 8, %0 \n\t"48 :"=m"(out )49 :"r"(mPos), "r"(cPos), "c"(loops), " r"(overlapDividerBits)53 "movdqa %%xmm5, %0 \n\t" 54 :"=m"(out[0]) 55 :"r"(mPos), "r"(cPos), "c"(loops), "m"(overlapDividerBits) 50 56 ); 51 57 52 58 corr = out[0] + out[1] + out[2] + out[3]; … … long TDStretchSSE::calcCrossCorrMulti(const short *mPos, const short *cPos) cons 58 64 corr += (mPos[i] * cPos[i]) >> overlapDividerBits; 59 65 60 66 return corr; 61 62 67 } 63 68 64 long TDStretchSSE ::calcCrossCorrStereo(const short *mPos, const short *cPos) const69 long TDStretchSSE2::calcCrossCorrStereo(const short *mPos, const short *cPos) const 65 70 { 66 67 71 long corr = 0; 68 72 int i, out[4]; 69 73 int count = (overlapLength<<1) - 2; … … long TDStretchSSE::calcCrossCorrStereo(const short *mPos, const short *cPos) con 74 78 cPos += 2; 75 79 76 80 asm( 77 "xorps %%xmm 8, %%xmm8\n\t"78 "movd %4, %%xmm 9\n\t"81 "xorps %%xmm5, %%xmm5 \n\t" 82 "movd %4, %%xmm7 \n\t" 79 83 "1: \n\t" 80 84 "movupd (%1), %%xmm0 \n\t" 85 "movupd (%2), %%xmm1 \n\t" 81 86 "movupd 16(%1), %%xmm2 \n\t" 87 "pmaddwd %%xmm0, %%xmm1 \n\t" 82 88 "movupd 32(%1), %%xmm4 \n\t" 83 89 "movupd 48(%1), %%xmm6 \n\t" 84 " movupd (%2), %%xmm1\n\t"90 "psrad %%xmm7, %%xmm1 \n\t" 85 91 "movupd 16(%2), %%xmm3 \n\t" 86 "movupd 32(%2), %%xmm5 \n\t" 87 "movupd 48(%2), %%xmm7 \n\t" 88 "pmaddwd %%xmm0, %%xmm1 \n\t" 92 "paddd %%xmm1, %%xmm5 \n\t" 93 "movupd 32(%2), %%xmm0 \n\t" 89 94 "pmaddwd %%xmm2, %%xmm3 \n\t" 90 " pmaddwd %%xmm4, %%xmm5\n\t"91 "pmaddwd %%xmm 6, %%xmm7\n\t"92 "psrad %%xmm 9, %%xmm1\n\t"93 "p srad %%xmm9, %%xmm3\n\t"94 "p addd %%xmm1, %%xmm8\n\t"95 "p srad %%xmm9, %%xmm5 \n\t"96 "p addd %%xmm3, %%xmm8\n\t"97 "p srad %%xmm9, %%xmm7\n\t"95 "movupd 48(%2), %%xmm1 \n\t" 96 "pmaddwd %%xmm4, %%xmm0 \n\t" 97 "psrad %%xmm7, %%xmm3 \n\t" 98 "pmaddwd %%xmm6, %%xmm1 \n\t" 99 "psrad %%xmm7, %%xmm0 \n\t" 100 "paddd %%xmm3, %%xmm5 \n\t" 101 "psrad %%xmm7, %%xmm1 \n\t" 102 "paddd %%xmm0, %%xmm5 \n\t" 98 103 "add $64, %1 \n\t" 99 "paddd %%xmm 5, %%xmm8\n\t"104 "paddd %%xmm1, %%xmm5 \n\t" 100 105 "add $64, %2 \n\t" 101 "paddd %%xmm7, %%xmm8 \n\t"102 106 "loop 1b \n\t" 103 "movdqa %%xmm 8, %0 \n\t"104 :"=m"(out )105 :"r"(mPos), "r"(cPos), "c"(loops), " r"(overlapDividerBits)107 "movdqa %%xmm5, %0 \n\t" 108 :"=m"(out[0]) 109 :"r"(mPos), "r"(cPos), "c"(loops), "m"(overlapDividerBits) 106 110 ); 107 111 108 112 corr = out[0] + out[1] + out[2] + out[3]; … … long TDStretchSSE::calcCrossCorrStereo(const short *mPos, const short *cPos) con 115 119 mPos[i+1] * cPos[i+1]) >> overlapDividerBits; 116 120 117 121 return corr; 118 119 122 } 120 123 121 void TDStretchSSE ::overlapMulti(short *output, const short *input) const124 void TDStretchSSE2::overlapMulti(short *output, const short *input) const 122 125 { 123 unsigned long ones = 0x0001ffff0001ffffUL;124 125 126 asm( 126 127 "movd %%ecx, %%xmm0 \n\t" 127 "punpckldq %%xmm0, %%xmm0 \n\t"128 128 "shl %6 \n\t" 129 129 "punpckldq %%xmm0, %%xmm0 \n\t" 130 "movd %1, %%xmm1 \n\t"131 130 "movq %2, %%xmm2 \n\t" 131 "punpckldq %%xmm0, %%xmm0 \n\t" 132 "movd %1, %%xmm1 \n\t" 132 133 "punpckldq %%xmm2, %%xmm2 \n\t" 133 134 "1: \n\t" 134 135 "movdqu (%3), %%xmm3 \n\t" 135 136 "movdqu (%4), %%xmm4 \n\t" 136 "movdq u%%xmm4, %%xmm5 \n\t"137 "movdqa %%xmm4, %%xmm5 \n\t" 137 138 "punpcklwd %%xmm3, %%xmm4 \n\t" 138 "punpckhwd %%xmm3, %%xmm5 \n\t"139 139 "add %6, %3 \n\t" 140 "punpckhwd %%xmm3, %%xmm5 \n\t" 140 141 "pmaddwd %%xmm0, %%xmm4 \n\t" 142 "add %6, %4 \n\t" 141 143 "pmaddwd %%xmm0, %%xmm5 \n\t" 142 144 "psrad %%xmm1, %%xmm4 \n\t" 143 145 "psrad %%xmm1, %%xmm5 \n\t" 144 "add %6, %4 \n\t"145 146 "packssdw %%xmm5, %%xmm4 \n\t" 147 "paddw %%xmm2, %%xmm0 \n\t" 146 148 "movdqu %%xmm4, (%5) \n\t" 147 149 "add %6, %5 \n\t" 148 "paddw %%xmm2, %%xmm0 \n\t"149 150 "loop 1b \n\t" 150 ::"c"(overlapLength)," r"(overlapDividerBits),151 " r"(ones),"r"(input),"r"(pMidBuffer),"r"(output),151 ::"c"(overlapLength),"m"(overlapDividerBits), 152 "m"(ones),"r"(input),"r"(pMidBuffer),"r"(output), 152 153 "r"((long)channels) 153 154 ); 154 155 } 155 156 156 void TDStretchSSE ::overlapStereo(short *output, const short *input) const157 void TDStretchSSE2::overlapStereo(short *output, const short *input) const 157 158 { 158 159 // 4 bytes per sample - use MMX 159 unsigned long ones = 0x0001ffff0001ffffUL;160 161 160 asm( 162 161 "movd %%ecx, %%mm0 \n\t" 163 "shr $1, %%ecx \n\t"164 162 "punpckldq %%mm0, %%mm0 \n\t" 165 " movq %1, %%mm1\n\t"163 "shr $1, %%ecx \n\t" 166 164 "movq %%mm0, %%mm6 \n\t" 167 165 "movq %2, %%mm2 \n\t" 168 166 "paddw %%mm2, %%mm6 \n\t" 167 "movd %1, %%mm1 \n\t" 169 168 "paddw %%mm2, %%mm2 \n\t" 170 169 "1: \n\t" 171 "movq (%3), %%mm3 \n\t"172 170 "movq (%4), %%mm4 \n\t" 171 "movq (%3), %%mm3 \n\t" 173 172 "movq %%mm4, %%mm5 \n\t" 174 173 "punpcklwd %%mm3, %%mm4 \n\t" 175 "punpckhwd %%mm3, %%mm5 \n\t"176 174 "add $8, %3 \n\t" 177 175 "pmaddwd %%mm0, %%mm4 \n\t" 178 "p maddwd %%mm6, %%mm5 \n\t"176 "punpckhwd %%mm3, %%mm5 \n\t" 179 177 "psrad %%mm1, %%mm4 \n\t" 180 "p srad %%mm1, %%mm5 \n\t"178 "pmaddwd %%mm6, %%mm5 \n\t" 181 179 "add $8, %4 \n\t" 180 "psrad %%mm1, %%mm5 \n\t" 181 "paddw %%mm2, %%mm0 \n\t" 182 182 "packssdw %%mm5, %%mm4 \n\t" 183 "paddw %%mm2, %%mm6 \n\t" 183 184 "movq %%mm4, (%5) \n\t" 184 185 "add $8, %5 \n\t" 185 "paddw %%mm2, %%mm0 \n\t"186 "paddw %%mm2, %%mm6 \n\t"187 186 "loop 1b \n\t" 188 187 "emms \n\t" 189 ::"c"(overlapLength)," r"((long)overlapDividerBits),190 " r"(ones),"r"(input),"r"(pMidBuffer),"r"(output)188 ::"c"(overlapLength),"m"(overlapDividerBits), 189 "m"(ones),"r"(input),"r"(pMidBuffer),"r"(output) 191 190 ); 192 193 191 } 194 #endif // ALLOW_SSE 192 #endif // ALLOW_SSE2