Ticket #4200: intelleopardmmx.patch
File intelleopardmmx.patch, 22.7 KB (added by , 16 years ago) |
---|
-
configure
1564 1564 enable backend 1565 1565 enable darwin 1566 1566 disable ivtv 1567 osxver=$(uname -r | cut -c 1)1568 if [ ${osxver} = "9" ]; then1569 disable mmx1570 fi1571 1567 disable need_memalign 1572 1568 disable opengl_video 1573 1569 disable opengl_vsync -
libs/libavcodec/i386/motion_est_mmx.c
167 167 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) 168 168 { 169 169 asm volatile( 170 "movq "MANGLE(bone)", %%mm5 \n\t"170 "movq %4, %%mm5 \n\t" 171 171 "movq (%1), %%mm0 \n\t" 172 172 "pavgb 1(%1), %%mm0 \n\t" 173 173 "add %3, %1 \n\t" … … 190 190 "sub $2, %0 \n\t" 191 191 " jg 1b \n\t" 192 192 : "+r" (h), "+r" (blk1), "+r" (blk2) 193 : "r" ((long)stride) 193 : "r" ((long)stride), "m" (bone) 194 194 ); 195 195 } 196 196 … … 258 258 "punpckhbw %%mm7, %%mm5 \n\t" 259 259 "paddw %%mm4, %%mm2 \n\t" 260 260 "paddw %%mm5, %%mm3 \n\t" 261 "movq 16+ "MANGLE(round_tab)", %%mm5 \n\t"261 "movq 16+%5, %%mm5 \n\t" 262 262 "paddw %%mm2, %%mm0 \n\t" 263 263 "paddw %%mm3, %%mm1 \n\t" 264 264 "paddw %%mm5, %%mm0 \n\t" … … 281 281 "add %4, %%"REG_a" \n\t" 282 282 " js 1b \n\t" 283 283 : "+a" (len) 284 : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) 284 : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[0]) 285 285 ); 286 286 } 287 287 -
libs/libavcodec/i386/dsputil_h264_template_mmx.c
188 188 "pxor %%mm7, %%mm7 \n\t" 189 189 "movd %5, %%mm2 \n\t" 190 190 "movd %6, %%mm3 \n\t" 191 "movq "MANGLE(ff_pw_8)", %%mm4\n\t"192 "movq "MANGLE(ff_pw_8)", %%mm5\n\t"191 "movq %7, %%mm4\n\t" 192 "movq %7, %%mm5\n\t" 193 193 "punpcklwd %%mm2, %%mm2 \n\t" 194 194 "punpcklwd %%mm3, %%mm3 \n\t" 195 195 "punpcklwd %%mm2, %%mm2 \n\t" … … 246 246 "sub $2, %2 \n\t" 247 247 "jnz 1b \n\t" 248 248 : "+r"(dst), "+r"(src), "+r"(h) 249 : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y) 249 : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y), "m"(ff_pw_8) 250 250 ); 251 251 } 252 252 -
libs/libavcodec/i386/dsputil_mmx.c
1923 1923 1924 1924 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ 1925 1925 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\ 1926 "movq " MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\1926 "movq "#pw_20", %%mm4 \n\t" /* 20 */\ 1927 1927 "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\ 1928 1928 "movq "#in7", " #m3 " \n\t" /* d */\ 1929 1929 "movq "#in0", %%mm5 \n\t" /* D */\ … … 1935 1935 "paddw " #m5 ", %%mm6 \n\t" /* x2 */\ 1936 1936 "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\ 1937 1937 "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\ 1938 "pmullw " MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\1938 "pmullw "#pw_3", %%mm5 \n\t" /* -6x2 + 3x3 */\ 1939 1939 "paddw " #rnd ", %%mm4 \n\t" /* x2 */\ 1940 1940 "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\ 1941 1941 "psraw $5, %%mm5 \n\t"\ … … 1969 1969 "paddw %%mm5, %%mm5 \n\t" /* 2b */\ 1970 1970 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ 1971 1971 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ 1972 "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\1972 "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\ 1973 1973 "paddw %%mm4, %%mm0 \n\t" /* a */\ 1974 1974 "paddw %%mm1, %%mm5 \n\t" /* d */\ 1975 "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\1975 "pmullw %7, %%mm0 \n\t" /* 20a */\ 1976 1976 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ 1977 1977 "paddw %6, %%mm6 \n\t"\ 1978 1978 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ … … 1995 1995 "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\ 1996 1996 "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\ 1997 1997 "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\ 1998 "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\1998 "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\ 1999 1999 "paddw %%mm2, %%mm1 \n\t" /* a */\ 2000 2000 "paddw %%mm6, %%mm4 \n\t" /* d */\ 2001 "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\2001 "pmullw %7, %%mm1 \n\t" /* 20a */\ 2002 2002 "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\ 2003 2003 "paddw %6, %%mm1 \n\t"\ 2004 2004 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\ … … 2021 2021 "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\ 2022 2022 "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\ 2023 2023 "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\ 2024 "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\2024 "pmullw %8, %%mm0 \n\t" /* 3c - 6b */\ 2025 2025 "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\ 2026 2026 "paddw %%mm3, %%mm2 \n\t" /* d */\ 2027 2027 "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\ … … 2029 2029 "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\ 2030 2030 "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\ 2031 2031 "paddw %%mm2, %%mm6 \n\t" /* a */\ 2032 "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\2032 "pmullw %7, %%mm6 \n\t" /* 20a */\ 2033 2033 "paddw %6, %%mm0 \n\t"\ 2034 2034 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ 2035 2035 "psraw $5, %%mm0 \n\t"\ … … 2044 2044 "paddw %%mm2, %%mm5 \n\t" /* d */\ 2045 2045 "paddw %%mm6, %%mm6 \n\t" /* 2b */\ 2046 2046 "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\ 2047 "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\2048 "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\2047 "pmullw %7, %%mm3 \n\t" /* 20a */\ 2048 "pmullw %8, %%mm4 \n\t" /* 3c - 6b */\ 2049 2049 "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\ 2050 2050 "paddw %6, %%mm4 \n\t"\ 2051 2051 "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\ … … 2058 2058 "decl %2 \n\t"\ 2059 2059 " jnz 1b \n\t"\ 2060 2060 : "+a"(src), "+c"(dst), "+m"(h)\ 2061 : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ 2061 : "d"((long)srcStride), "S"((long)dstStride),\ 2062 "m"(temp), "m"(ROUNDER),\ 2063 "m"(ff_pw_20), "m"(ff_pw_3)\ 2062 2064 : "memory"\ 2063 2065 );\ 2064 2066 }\ … … 2136 2138 "paddw %%mm5, %%mm5 \n\t" /* 2b */\ 2137 2139 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ 2138 2140 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ 2139 "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\2141 "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\ 2140 2142 "paddw %%mm4, %%mm0 \n\t" /* a */\ 2141 2143 "paddw %%mm1, %%mm5 \n\t" /* d */\ 2142 "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\2144 "pmullw %7, %%mm0 \n\t" /* 20a */\ 2143 2145 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ 2144 2146 "paddw %6, %%mm6 \n\t"\ 2145 2147 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ … … 2157 2159 "paddw %%mm5, %%mm4 \n\t" /* d */\ 2158 2160 "paddw %%mm2, %%mm2 \n\t" /* 2b */\ 2159 2161 "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\ 2160 "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\2161 "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\2162 "pmullw %7, %%mm1 \n\t" /* 20a */\ 2163 "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\ 2162 2164 "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\ 2163 2165 "paddw %6, %%mm1 \n\t"\ 2164 2166 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\ … … 2171 2173 "decl %2 \n\t"\ 2172 2174 " jnz 1b \n\t"\ 2173 2175 : "+a"(src), "+c"(dst), "+m"(h)\ 2174 : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ 2176 : "S"((long)srcStride), "D"((long)dstStride),\ 2177 "m"(temp), "m"(ROUNDER),\ 2178 "m"(ff_pw_20), "m"(ff_pw_3)\ 2175 2179 : "memory"\ 2176 2180 );\ 2177 2181 }\ … … 2250 2254 "movq 8(%0), %%mm1 \n\t"\ 2251 2255 "movq 16(%0), %%mm2 \n\t"\ 2252 2256 "movq 24(%0), %%mm3 \n\t"\ 2253 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\2254 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\2257 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ 2258 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ 2255 2259 "add %4, %1 \n\t"\ 2256 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\2260 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ 2257 2261 \ 2258 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\2262 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ 2259 2263 "add %4, %1 \n\t"\ 2260 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\2261 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\2264 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ 2265 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ 2262 2266 "add %4, %1 \n\t"\ 2263 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\2264 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\2267 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ 2268 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ 2265 2269 "add %4, %1 \n\t"\ 2266 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\2267 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\2270 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ 2271 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ 2268 2272 "add %4, %1 \n\t"\ 2269 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\2270 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\2273 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ 2274 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ 2271 2275 "add %4, %1 \n\t"\ 2272 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\2276 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ 2273 2277 \ 2274 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\2278 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ 2275 2279 "add %4, %1 \n\t" \ 2276 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\2277 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\2280 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ 2281 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ 2278 2282 \ 2279 2283 "add $136, %0 \n\t"\ 2280 2284 "add %6, %1 \n\t"\ … … 2282 2286 " jnz 1b \n\t"\ 2283 2287 \ 2284 2288 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ 2285 : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\ 2289 : "r"((long)dstStride), "r"(2*(long)dstStride),\ 2290 "m"(ROUNDER), "g"(4-14*(long)dstStride),\ 2291 "m"(ff_pw_20), "m"(ff_pw_3)\ 2286 2292 :"memory"\ 2287 2293 );\ 2288 2294 }\ … … 2322 2328 "movq 8(%0), %%mm1 \n\t"\ 2323 2329 "movq 16(%0), %%mm2 \n\t"\ 2324 2330 "movq 24(%0), %%mm3 \n\t"\ 2325 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\2326 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\2331 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ 2332 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ 2327 2333 "add %4, %1 \n\t"\ 2328 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\2334 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ 2329 2335 \ 2330 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\2336 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ 2331 2337 "add %4, %1 \n\t"\ 2332 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\2338 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ 2333 2339 \ 2334 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\2340 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ 2335 2341 "add %4, %1 \n\t"\ 2336 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\2337 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\2342 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ 2343 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ 2338 2344 \ 2339 2345 "add $72, %0 \n\t"\ 2340 2346 "add %6, %1 \n\t"\ … … 2342 2348 " jnz 1b \n\t"\ 2343 2349 \ 2344 2350 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ 2345 : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\ 2351 : "r"((long)dstStride), "r"(2*(long)dstStride),\ 2352 "m"(ROUNDER), "g"(4-6*(long)dstStride),\ 2353 "m"(ff_pw_20), "m"(ff_pw_3)\ 2346 2354 : "memory"\ 2347 2355 );\ 2348 2356 }\ … … 2977 2985 double c = 2.0 / (len-1.0); 2978 2986 int n2 = len>>1; 2979 2987 long i = -n2*sizeof(int32_t); 2980 long j = n2*sizeof(int32_t);2981 2988 asm volatile( 2982 2989 "movsd %0, %%xmm7 \n\t" 2983 2990 "movapd %1, %%xmm6 \n\t" … … 2995 3002 "movapd %%xmm6, %%xmm0 \n\t"\ 2996 3003 "subpd %%xmm1, %%xmm0 \n\t"\ 2997 3004 "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ 2998 "cvtpi2pd (%4,%0), %%xmm2 \n\t"\ 2999 "cvtpi2pd (%5,%1), %%xmm3 \n\t"\ 3005 "cvtpi2pd (%3,%0), %%xmm2 \n\t"\ 3000 3006 "mulpd %%xmm0, %%xmm2 \n\t"\ 3007 "movapd %%xmm2, (%1,%0,2) \n\t"\ 3008 "negl %0\n\t"\ 3009 "cvtpi2pd (%4,%0), %%xmm3 \n\t"\ 3001 3010 "mulpd %%xmm1, %%xmm3 \n\t"\ 3002 "movapd %%xmm2, (%2,%0,2) \n\t"\ 3003 MOVPD" %%xmm3, (%3,%1,2) \n\t"\ 3011 MOVPD" %%xmm3, (%2,%0,2) \n\t"\ 3004 3012 "subpd %%xmm5, %%xmm7 \n\t"\ 3005 " sub $8, %1\n\t"\3013 "negl %0\n\t"\ 3006 3014 "add $8, %0 \n\t"\ 3007 3015 "jl 1b \n\t"\ 3008 :"+&r"(i) , "+&r"(j)\3016 :"+&r"(i)\ 3009 3017 :"r"(w_data+n2), "r"(w_data+len-2-n2),\ 3010 3018 "r"(data+n2), "r"(data+len-2-n2)\ 3011 3019 ); -
libs/libavcodec/i386/h264dsp_mmx.c
341 341 // in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) 342 342 // out: mm1=p0\' mm2=q0\' 343 343 // clobbers: mm0,3-6 344 #define H264_DEBLOCK_P0_Q0(pb_01, pb_3 f)\344 #define H264_DEBLOCK_P0_Q0(pb_01, pb_3, pb_a1)\ 345 345 "movq %%mm1 , %%mm5 \n\t"\ 346 346 "pxor %%mm2 , %%mm5 \n\t" /* p0^q0*/\ 347 347 "pand "#pb_01" , %%mm5 \n\t" /* (p0^q0)&1*/\ 348 348 "pcmpeqb %%mm4 , %%mm4 \n\t"\ 349 349 "pxor %%mm4 , %%mm3 \n\t"\ 350 350 "pavgb %%mm0 , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\ 351 "pavgb " MANGLE(ff_pb_3)", %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\351 "pavgb "#pb_3" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\ 352 352 "pxor %%mm1 , %%mm4 \n\t"\ 353 353 "pavgb %%mm2 , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\ 354 354 "pavgb %%mm5 , %%mm3 \n\t"\ 355 355 "paddusb %%mm4 , %%mm3 \n\t" /* d+128+33*/\ 356 "movq " MANGLE(ff_pb_A1)", %%mm6 \n\t"\356 "movq "#pb_a1" , %%mm6 \n\t"\ 357 357 "psubusb %%mm3 , %%mm6 \n\t"\ 358 "psubusb " MANGLE(ff_pb_A1)", %%mm3 \n\t"\358 "psubusb "#pb_a1" , %%mm3 \n\t"\ 359 359 "pminub %%mm7 , %%mm6 \n\t"\ 360 360 "pminub %%mm7 , %%mm3 \n\t"\ 361 361 "psubusb %%mm6 , %%mm1 \n\t"\ … … 422 422 H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6) 423 423 424 424 /* filter p0, q0 */ 425 H264_DEBLOCK_P0_Q0(%8, unused)425 H264_DEBLOCK_P0_Q0(%8, %9, %10) 426 426 "movq %%mm1, (%1,%3,2) \n\t" 427 427 "movq %%mm2, (%2) \n\t" 428 428 429 429 : "=m"(*tmp0) 430 430 : "r"(pix-3*stride), "r"(pix), "r"((long)stride), 431 431 "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1), 432 "m"(mm_bone) 432 "m"(mm_bone), "m" (ff_pb_3), "m" (ff_pb_A1) 433 433 ); 434 434 } 435 435 … … 470 470 "movd %3, %%mm6 \n\t" 471 471 "punpcklbw %%mm6, %%mm6 \n\t" 472 472 "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask 473 H264_DEBLOCK_P0_Q0(%6, %7 )473 H264_DEBLOCK_P0_Q0(%6, %7, %8) 474 474 "movq %%mm1, (%0,%2) \n\t" 475 475 "movq %%mm2, (%1) \n\t" 476 476 477 477 :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), 478 478 "r"(*(uint32_t*)tc0), 479 "m"(alpha1), "m"(beta1), "m"(mm_bone), "m" (ff_pb_3F)479 "m"(alpha1), "m"(beta1), "m"(mm_bone), "m" (ff_pb_3), "m" (ff_pb_A1) 480 480 ); 481 481 } 482 482 -
libs/libavcodec/i386/simple_idct_mmx.c
363 363 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ 364 364 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ 365 365 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ 366 "movq "MANGLE(wm1010)", %%mm4\n\t"\366 "movq %3, %%mm4 \n\t"\ 367 367 "pand %%mm0, %%mm4 \n\t"\ 368 368 "por %%mm1, %%mm4 \n\t"\ 369 369 "por %%mm2, %%mm4 \n\t"\ … … 437 437 "jmp 2f \n\t"\ 438 438 "1: \n\t"\ 439 439 "pslld $16, %%mm0 \n\t"\ 440 "#paddd "MANGLE(d40000)", %%mm0\n\t"\440 "#paddd %4, %%mm0 \n\t"\ 441 441 "psrad $13, %%mm0 \n\t"\ 442 442 "packssdw %%mm0, %%mm0 \n\t"\ 443 443 "movq %%mm0, " #dst " \n\t"\ … … 471 471 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ 472 472 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ 473 473 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ 474 "movq "MANGLE(wm1010)", %%mm4\n\t"\474 "movq %3, %%mm4 \n\t"\ 475 475 "pand %%mm0, %%mm4 \n\t"\ 476 476 "por %%mm1, %%mm4 \n\t"\ 477 477 "por %%mm2, %%mm4 \n\t"\ … … 545 545 "jmp 2f \n\t"\ 546 546 "1: \n\t"\ 547 547 "pslld $16, %%mm0 \n\t"\ 548 "paddd "MANGLE(d40000)", %%mm0\n\t"\548 "paddd %4, %%mm0 \n\t"\ 549 549 "psrad $13, %%mm0 \n\t"\ 550 550 "packssdw %%mm0, %%mm0 \n\t"\ 551 551 "movq %%mm0, " #dst " \n\t"\ … … 1270 1270 */ 1271 1271 1272 1272 "9: \n\t" 1273 :: "r" (block), "r" (temp), "r" (coeffs) 1273 :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m"(d40000) 1274 1274 : "%eax" 1275 1275 ); 1276 1276 } -
filters/filters.pro
2 2 3 3 # Directories 4 4 SUBDIRS += invert linearblend denoise3d quickdnr kerneldeint crop force convert 5 SUBDIRS += adjust onefield bobdeint ivtc greedyhdeintyadif5 SUBDIRS += adjust onefield bobdeint ivtc yadif 6 6 7 7 # This filter is currently broken, because the FFmpeg code that 8 8 # it depends on was moved into a seperate library (libpostproc).