| 1 | /* |
| 2 | * Copyright (C) MPlayer Project license GPL v2 |
| 3 | */ |
| 4 | |
| 5 | |
| 6 | #include <stdio.h> |
| 7 | #include <stdlib.h> |
| 8 | #include <string.h> |
| 9 | #include "pullup.h" |
| 10 | #include "config.h" |
| 11 | |
| 12 | |
| 13 | |
| 14 | #ifdef HAVE_MMX |
| 15 | static int diff_y_mmx(unsigned char *a, unsigned char *b, int s) |
| 16 | { |
| 17 | int ret; |
| 18 | asm volatile ( |
| 19 | "movl $4, %%ecx \n\t" |
| 20 | "pxor %%mm4, %%mm4 \n\t" |
| 21 | "pxor %%mm7, %%mm7 \n\t" |
| 22 | |
| 23 | ".balign 16 \n\t" |
| 24 | "1: \n\t" |
| 25 | |
| 26 | "movq (%%esi), %%mm0 \n\t" |
| 27 | "movq (%%esi), %%mm2 \n\t" |
| 28 | "addl %%eax, %%esi \n\t" |
| 29 | "movq (%%edi), %%mm1 \n\t" |
| 30 | "addl %%eax, %%edi \n\t" |
| 31 | "psubusb %%mm1, %%mm2 \n\t" |
| 32 | "psubusb %%mm0, %%mm1 \n\t" |
| 33 | "movq %%mm2, %%mm0 \n\t" |
| 34 | "movq %%mm1, %%mm3 \n\t" |
| 35 | "punpcklbw %%mm7, %%mm0 \n\t" |
| 36 | "punpcklbw %%mm7, %%mm1 \n\t" |
| 37 | "punpckhbw %%mm7, %%mm2 \n\t" |
| 38 | "punpckhbw %%mm7, %%mm3 \n\t" |
| 39 | "paddw %%mm0, %%mm4 \n\t" |
| 40 | "paddw %%mm1, %%mm4 \n\t" |
| 41 | "paddw %%mm2, %%mm4 \n\t" |
| 42 | "paddw %%mm3, %%mm4 \n\t" |
| 43 | |
| 44 | "decl %%ecx \n\t" |
| 45 | "jnz 1b \n\t" |
| 46 | |
| 47 | "movq %%mm4, %%mm3 \n\t" |
| 48 | "punpcklwd %%mm7, %%mm4 \n\t" |
| 49 | "punpckhwd %%mm7, %%mm3 \n\t" |
| 50 | "paddd %%mm4, %%mm3 \n\t" |
| 51 | "movd %%mm3, %%eax \n\t" |
| 52 | "psrlq $32, %%mm3 \n\t" |
| 53 | "movd %%mm3, %%edx \n\t" |
| 54 | "addl %%edx, %%eax \n\t" |
| 55 | "emms \n\t" |
| 56 | : "=a" (ret) |
| 57 | : "S" (a), "D" (b), "a" (s) |
| 58 | : "%ecx", "%edx" |
| 59 | ); |
| 60 | return ret; |
| 61 | } |
| 62 | |
| 63 | static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s) |
| 64 | { |
| 65 | int ret; |
| 66 | asm volatile ( |
| 67 | "movl $4, %%ecx \n\t" |
| 68 | "pxor %%mm6, %%mm6 \n\t" |
| 69 | "pxor %%mm7, %%mm7 \n\t" |
| 70 | "subl %%eax, %%edi \n\t" |
| 71 | |
| 72 | ".balign 16 \n\t" |
| 73 | "2: \n\t" |
| 74 | |
| 75 | "movq (%%esi), %%mm0 \n\t" |
| 76 | "movq (%%edi), %%mm1 \n\t" |
| 77 | "punpcklbw %%mm7, %%mm0 \n\t" |
| 78 | "movq (%%edi,%%eax), %%mm2 \n\t" |
| 79 | "punpcklbw %%mm7, %%mm1 \n\t" |
| 80 | "punpcklbw %%mm7, %%mm2 \n\t" |
| 81 | "paddw %%mm0, %%mm0 \n\t" |
| 82 | "paddw %%mm2, %%mm1 \n\t" |
| 83 | "movq %%mm0, %%mm2 \n\t" |
| 84 | "psubusw %%mm1, %%mm0 \n\t" |
| 85 | "psubusw %%mm2, %%mm1 \n\t" |
| 86 | "paddw %%mm0, %%mm6 \n\t" |
| 87 | "paddw %%mm1, %%mm6 \n\t" |
| 88 | |
| 89 | "movq (%%esi), %%mm0 \n\t" |
| 90 | "movq (%%edi), %%mm1 \n\t" |
| 91 | "punpckhbw %%mm7, %%mm0 \n\t" |
| 92 | "movq (%%edi,%%eax), %%mm2 \n\t" |
| 93 | "punpckhbw %%mm7, %%mm1 \n\t" |
| 94 | "punpckhbw %%mm7, %%mm2 \n\t" |
| 95 | "paddw %%mm0, %%mm0 \n\t" |
| 96 | "paddw %%mm2, %%mm1 \n\t" |
| 97 | "movq %%mm0, %%mm2 \n\t" |
| 98 | "psubusw %%mm1, %%mm0 \n\t" |
| 99 | "psubusw %%mm2, %%mm1 \n\t" |
| 100 | "paddw %%mm0, %%mm6 \n\t" |
| 101 | "paddw %%mm1, %%mm6 \n\t" |
| 102 | |
| 103 | "movq (%%edi,%%eax), %%mm0 \n\t" |
| 104 | "movq (%%esi), %%mm1 \n\t" |
| 105 | "punpcklbw %%mm7, %%mm0 \n\t" |
| 106 | "movq (%%esi,%%eax), %%mm2 \n\t" |
| 107 | "punpcklbw %%mm7, %%mm1 \n\t" |
| 108 | "punpcklbw %%mm7, %%mm2 \n\t" |
| 109 | "paddw %%mm0, %%mm0 \n\t" |
| 110 | "paddw %%mm2, %%mm1 \n\t" |
| 111 | "movq %%mm0, %%mm2 \n\t" |
| 112 | "psubusw %%mm1, %%mm0 \n\t" |
| 113 | "psubusw %%mm2, %%mm1 \n\t" |
| 114 | "paddw %%mm0, %%mm6 \n\t" |
| 115 | "paddw %%mm1, %%mm6 \n\t" |
| 116 | |
| 117 | "movq (%%edi,%%eax), %%mm0 \n\t" |
| 118 | "movq (%%esi), %%mm1 \n\t" |
| 119 | "punpckhbw %%mm7, %%mm0 \n\t" |
| 120 | "movq (%%esi,%%eax), %%mm2 \n\t" |
| 121 | "punpckhbw %%mm7, %%mm1 \n\t" |
| 122 | "punpckhbw %%mm7, %%mm2 \n\t" |
| 123 | "paddw %%mm0, %%mm0 \n\t" |
| 124 | "paddw %%mm2, %%mm1 \n\t" |
| 125 | "movq %%mm0, %%mm2 \n\t" |
| 126 | "psubusw %%mm1, %%mm0 \n\t" |
| 127 | "psubusw %%mm2, %%mm1 \n\t" |
| 128 | "paddw %%mm0, %%mm6 \n\t" |
| 129 | "paddw %%mm1, %%mm6 \n\t" |
| 130 | |
| 131 | "addl %%eax, %%esi \n\t" |
| 132 | "addl %%eax, %%edi \n\t" |
| 133 | "decl %%ecx \n\t" |
| 134 | "jnz 2b \n\t" |
| 135 | |
| 136 | "movq %%mm6, %%mm5 \n\t" |
| 137 | "punpcklwd %%mm7, %%mm6 \n\t" |
| 138 | "punpckhwd %%mm7, %%mm5 \n\t" |
| 139 | "paddd %%mm6, %%mm5 \n\t" |
| 140 | "movd %%mm5, %%eax \n\t" |
| 141 | "psrlq $32, %%mm5 \n\t" |
| 142 | "movd %%mm5, %%edx \n\t" |
| 143 | "addl %%edx, %%eax \n\t" |
| 144 | |
| 145 | "emms \n\t" |
| 146 | : "=a" (ret) |
| 147 | : "S" (a), "D" (b), "a" (s) |
| 148 | : "%ecx", "%edx" |
| 149 | ); |
| 150 | return ret; |
| 151 | } |
| 152 | |
| 153 | static int var_y_mmx(unsigned char *a, unsigned char *b, int s) |
| 154 | { |
| 155 | int ret; |
| 156 | asm volatile ( |
| 157 | "movl $3, %%ecx \n\t" |
| 158 | "pxor %%mm4, %%mm4 \n\t" |
| 159 | "pxor %%mm7, %%mm7 \n\t" |
| 160 | |
| 161 | ".balign 16 \n\t" |
| 162 | "1: \n\t" |
| 163 | |
| 164 | "movq (%%esi), %%mm0 \n\t" |
| 165 | "movq (%%esi), %%mm2 \n\t" |
| 166 | "movq (%%esi,%%eax), %%mm1 \n\t" |
| 167 | "addl %%eax, %%esi \n\t" |
| 168 | "psubusb %%mm1, %%mm2 \n\t" |
| 169 | "psubusb %%mm0, %%mm1 \n\t" |
| 170 | "movq %%mm2, %%mm0 \n\t" |
| 171 | "movq %%mm1, %%mm3 \n\t" |
| 172 | "punpcklbw %%mm7, %%mm0 \n\t" |
| 173 | "punpcklbw %%mm7, %%mm1 \n\t" |
| 174 | "punpckhbw %%mm7, %%mm2 \n\t" |
| 175 | "punpckhbw %%mm7, %%mm3 \n\t" |
| 176 | "paddw %%mm0, %%mm4 \n\t" |
| 177 | "paddw %%mm1, %%mm4 \n\t" |
| 178 | "paddw %%mm2, %%mm4 \n\t" |
| 179 | "paddw %%mm3, %%mm4 \n\t" |
| 180 | |
| 181 | "decl %%ecx \n\t" |
| 182 | "jnz 1b \n\t" |
| 183 | |
| 184 | "movq %%mm4, %%mm3 \n\t" |
| 185 | "punpcklwd %%mm7, %%mm4 \n\t" |
| 186 | "punpckhwd %%mm7, %%mm3 \n\t" |
| 187 | "paddd %%mm4, %%mm3 \n\t" |
| 188 | "movd %%mm3, %%eax \n\t" |
| 189 | "psrlq $32, %%mm3 \n\t" |
| 190 | "movd %%mm3, %%edx \n\t" |
| 191 | "addl %%edx, %%eax \n\t" |
| 192 | "emms \n\t" |
| 193 | : "=a" (ret) |
| 194 | : "S" (a), "a" (s) |
| 195 | : "%ecx", "%edx" |
| 196 | ); |
| 197 | return 4*ret; |
| 198 | } |
| 199 | #endif |
| 200 | |
| 201 | #define ABS(a) (((a)^((a)>>31))-((a)>>31)) |
| 202 | |
| 203 | static int diff_y(unsigned char *a, unsigned char *b, int s) |
| 204 | { |
| 205 | int i, j, diff=0; |
| 206 | for (i=4; i; i--) { |
| 207 | for (j=0; j<8; j++) diff += ABS(a[j]-b[j]); |
| 208 | a+=s; b+=s; |
| 209 | } |
| 210 | return diff; |
| 211 | } |
| 212 | |
| 213 | static int licomb_y(unsigned char *a, unsigned char *b, int s) |
| 214 | { |
| 215 | int i, j, diff=0; |
| 216 | for (i=4; i; i--) { |
| 217 | for (j=0; j<8; j++) |
| 218 | diff += ABS((a[j]<<1) - b[j-s] - b[j]) |
| 219 | + ABS((b[j]<<1) - a[j] - a[j+s]); |
| 220 | a+=s; b+=s; |
| 221 | } |
| 222 | return diff; |
| 223 | } |
| 224 | |
| 225 | static int qpcomb_y(unsigned char *a, unsigned char *b, int s) |
| 226 | { |
| 227 | int i, j, diff=0; |
| 228 | for (i=4; i; i--) { |
| 229 | for (j=0; j<8; j++) |
| 230 | diff += ABS(a[j] - 3*b[j-s] + 3*a[j+s] - b[j]); |
| 231 | a+=s; b+=s; |
| 232 | } |
| 233 | return diff; |
| 234 | } |
| 235 | |
| 236 | #if 0 |
| 237 | static int licomb_y_test(unsigned char *a, unsigned char *b, int s) |
| 238 | { |
| 239 | int c = licomb_y(a,b,s); |
| 240 | int m = licomb_y_mmx(a,b,s); |
| 241 | if (c != m) printf("%d != %d\n", c, m); |
| 242 | return m; |
| 243 | } |
| 244 | #endif |
| 245 | |
| 246 | static int var_y(unsigned char *a, unsigned char *b, int s) |
| 247 | { |
| 248 | int i, j, var=0; |
| 249 | for (i=3; i; i--) { |
| 250 | for (j=0; j<8; j++) { |
| 251 | var += ABS(a[j]-a[j+s]); |
| 252 | } |
| 253 | a+=s; b+=s; |
| 254 | } |
| 255 | return 4*var; /* match comb scaling */ |
| 256 | } |
| 257 | |
| 258 | |
| 259 | |
| 260 | |
| 261 | |
| 262 | |
| 263 | |
| 264 | |
| 265 | |
| 266 | static void alloc_buffer(struct pullup_context *c, struct pullup_buffer *b) |
| 267 | { |
| 268 | int i; |
| 269 | if (b->planes) return; |
| 270 | b->planes = calloc(c->nplanes, sizeof(unsigned char *)); |
| 271 | for (i = 0; i < c->nplanes; i++) { |
| 272 | b->planes[i] = malloc(c->h[i]*c->stride[i]); |
| 273 | /* Deal with idiotic 128=0 for chroma: */ |
| 274 | memset(b->planes[i], c->background[i], c->h[i]*c->stride[i]); |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | struct pullup_buffer *pullup_lock_buffer(struct pullup_buffer *b, int parity) |
| 279 | { |
| 280 | if (!b) return 0; |
| 281 | if ((parity+1) & 1) b->lock[0]++; |
| 282 | if ((parity+1) & 2) b->lock[1]++; |
| 283 | return b; |
| 284 | } |
| 285 | |
| 286 | void pullup_release_buffer(struct pullup_buffer *b, int parity) |
| 287 | { |
| 288 | if (!b) return; |
| 289 | if ((parity+1) & 1) b->lock[0]--; |
| 290 | if ((parity+1) & 2) b->lock[1]--; |
| 291 | } |
| 292 | |
| 293 | struct pullup_buffer *pullup_get_buffer(struct pullup_context *c, int parity) |
| 294 | { |
| 295 | int i; |
| 296 | |
| 297 | /* Try first to get the sister buffer for the previous field */ |
| 298 | if (parity < 2 && c->last && parity != c->last->parity |
| 299 | && !c->last->buffer->lock[parity]) { |
| 300 | alloc_buffer(c, c->last->buffer); |
| 301 | return pullup_lock_buffer(c->last->buffer, parity); |
| 302 | } |
| 303 | |
| 304 | /* Prefer a buffer with both fields open */ |
| 305 | for (i = 0; i < c->nbuffers; i++) { |
| 306 | if (c->buffers[i].lock[0]) continue; |
| 307 | if (c->buffers[i].lock[1]) continue; |
| 308 | alloc_buffer(c, &c->buffers[i]); |
| 309 | return pullup_lock_buffer(&c->buffers[i], parity); |
| 310 | } |
| 311 | |
| 312 | if (parity == 2) return 0; |
| 313 | |
| 314 | /* Search for any half-free buffer */ |
| 315 | for (i = 0; i < c->nbuffers; i++) { |
| 316 | if (((parity+1) & 1) && c->buffers[i].lock[0]) continue; |
| 317 | if (((parity+1) & 2) && c->buffers[i].lock[1]) continue; |
| 318 | alloc_buffer(c, &c->buffers[i]); |
| 319 | return pullup_lock_buffer(&c->buffers[i], parity); |
| 320 | } |
| 321 | |
| 322 | return 0; |
| 323 | } |
| 324 | |
| 325 | |
| 326 | |
| 327 | |
| 328 | |
| 329 | |
| 330 | static void compute_metric(struct pullup_context *c, |
| 331 | struct pullup_field *fa, int pa, |
| 332 | struct pullup_field *fb, int pb, |
| 333 | int (*func)(unsigned char *, unsigned char *, int), int *dest) |
| 334 | { |
| 335 | unsigned char *a, *b; |
| 336 | int x, y; |
| 337 | int mp = c->metric_plane; |
| 338 | int xstep = c->bpp[mp]; |
| 339 | int ystep = c->stride[mp]<<3; |
| 340 | int s = c->stride[mp]<<1; /* field stride */ |
| 341 | int w = c->metric_w*xstep; |
| 342 | |
| 343 | if (!fa->buffer || !fb->buffer) return; |
| 344 | |
| 345 | /* Shortcut for duplicate fields (e.g. from RFF flag) */ |
| 346 | if (fa->buffer == fb->buffer && pa == pb) { |
| 347 | memset(dest, 0, c->metric_len * sizeof(int)); |
| 348 | return; |
| 349 | } |
| 350 | |
| 351 | a = fa->buffer->planes[mp] + pa * c->stride[mp] + c->metric_offset; |
| 352 | b = fb->buffer->planes[mp] + pb * c->stride[mp] + c->metric_offset; |
| 353 | |
| 354 | for (y = c->metric_h; y; y--) { |
| 355 | for (x = 0; x < w; x += xstep) { |
| 356 | *dest++ = func(a + x, b + x, s); |
| 357 | } |
| 358 | a += ystep; b += ystep; |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | |
| 363 | |
| 364 | |
| 365 | |
| 366 | static void alloc_metrics(struct pullup_context *c, struct pullup_field *f) |
| 367 | { |
| 368 | f->diffs = calloc(c->metric_len, sizeof(int)); |
| 369 | f->comb = calloc(c->metric_len, sizeof(int)); |
| 370 | f->var = calloc(c->metric_len, sizeof(int)); |
| 371 | /* add more metrics here as needed */ |
| 372 | } |
| 373 | |
| 374 | static struct pullup_field *make_field_queue(struct pullup_context *c, int len) |
| 375 | { |
| 376 | struct pullup_field *head, *f; |
| 377 | f = head = calloc(1, sizeof(struct pullup_field)); |
| 378 | alloc_metrics(c, f); |
| 379 | for (; len > 0; len--) { |
| 380 | f->next = calloc(1, sizeof(struct pullup_field)); |
| 381 | f->next->prev = f; |
| 382 | f = f->next; |
| 383 | alloc_metrics(c, f); |
| 384 | } |
| 385 | f->next = head; |
| 386 | head->prev = f; |
| 387 | return head; |
| 388 | } |
| 389 | |
| 390 | static void check_field_queue(struct pullup_context *c) |
| 391 | { |
| 392 | if (c->head->next == c->first) { |
| 393 | struct pullup_field *f = calloc(1, sizeof(struct pullup_field)); |
| 394 | alloc_metrics(c, f); |
| 395 | f->prev = c->head; |
| 396 | f->next = c->first; |
| 397 | c->head->next = f; |
| 398 | c->first->prev = f; |
| 399 | } |
| 400 | } |
| 401 | |
| 402 | void pullup_submit_field(struct pullup_context *c, struct pullup_buffer *b, int parity) |
| 403 | { |
| 404 | struct pullup_field *f; |
| 405 | |
| 406 | /* Grow the circular list if needed */ |
| 407 | check_field_queue(c); |
| 408 | |
| 409 | /* Cannot have two fields of same parity in a row; drop the new one */ |
| 410 | if (c->last && c->last->parity == parity) return; |
| 411 | |
| 412 | f = c->head; |
| 413 | f->parity = parity; |
| 414 | f->buffer = pullup_lock_buffer(b, parity); |
| 415 | f->flags = 0; |
| 416 | f->breaks = 0; |
| 417 | f->affinity = 0; |
| 418 | |
| 419 | compute_metric(c, f, parity, f->prev->prev, parity, c->diff, f->diffs); |
| 420 | compute_metric(c, parity?f->prev:f, 0, parity?f:f->prev, 1, c->comb, f->comb); |
| 421 | compute_metric(c, f, parity, f, -1, c->var, f->var); |
| 422 | |
| 423 | /* Advance the circular list */ |
| 424 | if (!c->first) c->first = c->head; |
| 425 | c->last = c->head; |
| 426 | c->head = c->head->next; |
| 427 | } |
| 428 | |
| 429 | void pullup_flush_fields(struct pullup_context *c) |
| 430 | { |
| 431 | struct pullup_field *f; |
| 432 | |
| 433 | for (f = c->first; f && f != c->head; f = f->next) { |
| 434 | pullup_release_buffer(f->buffer, f->parity); |
| 435 | f->buffer = 0; |
| 436 | } |
| 437 | c->first = c->last = 0; |
| 438 | } |
| 439 | |
| 440 | |
| 441 | |
| 442 | |
| 443 | |
| 444 | |
| 445 | |
| 446 | |
| 447 | #define F_HAVE_BREAKS 1 |
| 448 | #define F_HAVE_AFFINITY 2 |
| 449 | |
| 450 | |
| 451 | #define BREAK_LEFT 1 |
| 452 | #define BREAK_RIGHT 2 |
| 453 | |
| 454 | |
| 455 | |
| 456 | |
| 457 | static int queue_length(struct pullup_field *begin, struct pullup_field *end) |
| 458 | { |
| 459 | int count = 1; |
| 460 | struct pullup_field *f; |
| 461 | |
| 462 | if (!begin || !end) return 0; |
| 463 | for (f = begin; f != end; f = f->next) count++; |
| 464 | return count; |
| 465 | } |
| 466 | |
| 467 | static int find_first_break(struct pullup_field *f, int max) |
| 468 | { |
| 469 | int i; |
| 470 | for (i = 0; i < max; i++) { |
| 471 | if (f->breaks & BREAK_RIGHT || f->next->breaks & BREAK_LEFT) |
| 472 | return i+1; |
| 473 | f = f->next; |
| 474 | } |
| 475 | return 0; |
| 476 | } |
| 477 | |
| 478 | static void compute_breaks(struct pullup_context *c, struct pullup_field *f0) |
| 479 | { |
| 480 | int i; |
| 481 | struct pullup_field *f1 = f0->next; |
| 482 | struct pullup_field *f2 = f1->next; |
| 483 | struct pullup_field *f3 = f2->next; |
| 484 | int l, max_l=0, max_r=0; |
| 485 | //struct pullup_field *ff; |
| 486 | //for (i=0, ff=c->first; ff != f0; i++, ff=ff->next); |
| 487 | |
| 488 | if (f0->flags & F_HAVE_BREAKS) return; |
| 489 | //printf("\n%d: ", i); |
| 490 | f0->flags |= F_HAVE_BREAKS; |
| 491 | |
| 492 | /* Special case when fields are 100% identical */ |
| 493 | if (f0->buffer == f2->buffer && f1->buffer != f3->buffer) { |
| 494 | f2->breaks |= BREAK_RIGHT; |
| 495 | return; |
| 496 | } |
| 497 | if (f0->buffer != f2->buffer && f1->buffer == f3->buffer) { |
| 498 | f1->breaks |= BREAK_LEFT; |
| 499 | return; |
| 500 | } |
| 501 | |
| 502 | for (i = 0; i < c->metric_len; i++) { |
| 503 | l = f2->diffs[i] - f3->diffs[i]; |
| 504 | if (l > max_l) max_l = l; |
| 505 | if (-l > max_r) max_r = -l; |
| 506 | } |
| 507 | /* Don't get tripped up when differences are mostly quant error */ |
| 508 | //printf("%d %d\n", max_l, max_r); |
| 509 | if (max_l + max_r < 128) return; |
| 510 | if (max_l > 4*max_r) f1->breaks |= BREAK_LEFT; |
| 511 | if (max_r > 4*max_l) f2->breaks |= BREAK_RIGHT; |
| 512 | } |
| 513 | |
| 514 | static void compute_affinity(struct pullup_context *c, struct pullup_field *f) |
| 515 | { |
| 516 | int i; |
| 517 | int max_l=0, max_r=0, l; |
| 518 | if (f->flags & F_HAVE_AFFINITY) return; |
| 519 | f->flags |= F_HAVE_AFFINITY; |
| 520 | if (f->buffer == f->next->next->buffer) { |
| 521 | f->affinity = 1; |
| 522 | f->next->affinity = 0; |
| 523 | f->next->next->affinity = -1; |
| 524 | f->next->flags |= F_HAVE_AFFINITY; |
| 525 | f->next->next->flags |= F_HAVE_AFFINITY; |
| 526 | return; |
| 527 | } |
| 528 | if (1) { |
| 529 | for (i = 0; i < c->metric_len; i++) { |
| 530 | int lv = f->prev->var[i]; |
| 531 | int rv = f->next->var[i]; |
| 532 | int v = f->var[i]; |
| 533 | int lc = f->comb[i] - (v+lv) + ABS(v-lv); |
| 534 | int rc = f->next->comb[i] - (v+rv) + ABS(v-rv); |
| 535 | lc = lc>0 ? lc : 0; |
| 536 | rc = rc>0 ? rc : 0; |
| 537 | l = lc - rc; |
| 538 | if (l > max_l) max_l = l; |
| 539 | if (-l > max_r) max_r = -l; |
| 540 | } |
| 541 | if (max_l + max_r < 64) return; |
| 542 | if (max_r > 6*max_l) f->affinity = -1; |
| 543 | else if (max_l > 6*max_r) f->affinity = 1; |
| 544 | } else { |
| 545 | for (i = 0; i < c->metric_len; i++) { |
| 546 | l = f->comb[i] - f->next->comb[i]; |
| 547 | if (l > max_l) max_l = l; |
| 548 | if (-l > max_r) max_r = -l; |
| 549 | } |
| 550 | if (max_l + max_r < 64) return; |
| 551 | if (max_r > 2*max_l) f->affinity = -1; |
| 552 | else if (max_l > 2*max_r) f->affinity = 1; |
| 553 | } |
| 554 | } |
| 555 | |
| 556 | static void foo(struct pullup_context *c) |
| 557 | { |
| 558 | struct pullup_field *f = c->first; |
| 559 | int i, n = queue_length(f, c->last); |
| 560 | for (i = 0; i < n-1; i++) { |
| 561 | if (i < n-3) compute_breaks(c, f); |
| 562 | compute_affinity(c, f); |
| 563 | f = f->next; |
| 564 | } |
| 565 | } |
| 566 | |
| 567 | static int decide_frame_length(struct pullup_context *c) |
| 568 | { |
| 569 | struct pullup_field *f0 = c->first; |
| 570 | struct pullup_field *f1 = f0->next; |
| 571 | struct pullup_field *f2 = f1->next; |
| 572 | struct pullup_field *f3 = f2->next; |
| 573 | int l; |
| 574 | |
| 575 | if (queue_length(c->first, c->last) < 4) return 0; |
| 576 | foo(c); |
| 577 | |
| 578 | if (f0->affinity == -1) return 1; |
| 579 | |
| 580 | l = find_first_break(f0, 3); |
| 581 | if (l == 1 && c->strict_breaks < 0) l = 0; |
| 582 | |
| 583 | switch (l) { |
| 584 | case 1: |
| 585 | if (c->strict_breaks < 1 && f0->affinity == 1 && f1->affinity == -1) |
| 586 | return 2; |
| 587 | else return 1; |
| 588 | case 2: |
| 589 | /* FIXME: strictly speaking, f0->prev is no longer valid... :) */ |
| 590 | if (c->strict_pairs |
| 591 | && (f0->prev->breaks & BREAK_RIGHT) && (f2->breaks & BREAK_LEFT) |
| 592 | && (f0->affinity != 1 || f1->affinity != -1) ) |
| 593 | return 1; |
| 594 | if (f1->affinity == 1) return 1; |
| 595 | else return 2; |
| 596 | case 3: |
| 597 | if (f2->affinity == 1) return 2; |
| 598 | else return 3; |
| 599 | default: |
| 600 | /* 9 possibilities covered before switch */ |
| 601 | if (f1->affinity == 1) return 1; /* covers 6 */ |
| 602 | else if (f1->affinity == -1) return 2; /* covers 6 */ |
| 603 | else if (f2->affinity == -1) { /* covers 2 */ |
| 604 | if (f0->affinity == 1) return 3; |
| 605 | else return 1; |
| 606 | } |
| 607 | else return 2; /* the remaining 6 */ |
| 608 | } |
| 609 | } |
| 610 | |
| 611 | |
| 612 | static void print_aff_and_breaks(struct pullup_context *c, struct pullup_field *f) |
| 613 | { |
| 614 | int i; |
| 615 | struct pullup_field *f0 = f; |
| 616 | const char aff_l[] = "+..", aff_r[] = "..+"; |
| 617 | printf("\naffinity: "); |
| 618 | for (i = 0; i < 4; i++) { |
| 619 | printf("%c%d%c", aff_l[1+f->affinity], i, aff_r[1+f->affinity]); |
| 620 | f = f->next; |
| 621 | } |
| 622 | f = f0; |
| 623 | printf("\nbreaks: "); |
| 624 | for (i=0; i<4; i++) { |
| 625 | printf("%c%d%c", f->breaks & BREAK_LEFT ? '|' : '.', i, f->breaks & BREAK_RIGHT ? '|' : '.'); |
| 626 | f = f->next; |
| 627 | } |
| 628 | printf("\n"); |
| 629 | } |
| 630 | |
| 631 | |
| 632 | |
| 633 | |
| 634 | |
| 635 | struct pullup_frame *pullup_get_frame(struct pullup_context *c) |
| 636 | { |
| 637 | int i; |
| 638 | struct pullup_frame *fr = c->frame; |
| 639 | int n = decide_frame_length(c); |
| 640 | int aff = c->first->next->affinity; |
| 641 | |
| 642 | if (!n) return 0; |
| 643 | if (fr->lock) return 0; |
| 644 | |
| 645 | if (c->verbose) { |
| 646 | print_aff_and_breaks(c, c->first); |
| 647 | printf("duration: %d \n", n); |
| 648 | } |
| 649 | |
| 650 | fr->lock++; |
| 651 | fr->length = n; |
| 652 | fr->parity = c->first->parity; |
| 653 | fr->buffer = 0; |
| 654 | for (i = 0; i < n; i++) { |
| 655 | /* We cheat and steal the buffer without release+relock */ |
| 656 | fr->ifields[i] = c->first->buffer; |
| 657 | c->first->buffer = 0; |
| 658 | c->first = c->first->next; |
| 659 | } |
| 660 | |
| 661 | if (n == 1) { |
| 662 | fr->ofields[fr->parity] = fr->ifields[0]; |
| 663 | fr->ofields[fr->parity^1] = 0; |
| 664 | } else if (n == 2) { |
| 665 | fr->ofields[fr->parity] = fr->ifields[0]; |
| 666 | fr->ofields[fr->parity^1] = fr->ifields[1]; |
| 667 | } else if (n == 3) { |
| 668 | if (aff == 0) |
| 669 | aff = (fr->ifields[0] == fr->ifields[1]) ? -1 : 1; |
| 670 | /* else if (c->verbose) printf("forced aff: %d \n", aff); */ |
| 671 | fr->ofields[fr->parity] = fr->ifields[1+aff]; |
| 672 | fr->ofields[fr->parity^1] = fr->ifields[1]; |
| 673 | } |
| 674 | pullup_lock_buffer(fr->ofields[0], 0); |
| 675 | pullup_lock_buffer(fr->ofields[1], 1); |
| 676 | |
| 677 | if (fr->ofields[0] == fr->ofields[1]) { |
| 678 | fr->buffer = fr->ofields[0]; |
| 679 | pullup_lock_buffer(fr->buffer, 2); |
| 680 | return fr; |
| 681 | } |
| 682 | return fr; |
| 683 | } |
| 684 | |
| 685 | static void copy_field(struct pullup_context *c, struct pullup_buffer *dest, |
| 686 | struct pullup_buffer *src, int parity) |
| 687 | { |
| 688 | int i, j; |
| 689 | unsigned char *d, *s; |
| 690 | for (i = 0; i < c->nplanes; i++) { |
| 691 | s = src->planes[i] + parity*c->stride[i]; |
| 692 | d = dest->planes[i] + parity*c->stride[i]; |
| 693 | for (j = c->h[i]>>1; j; j--) { |
| 694 | memcpy(d, s, c->stride[i]); |
| 695 | s += c->stride[i]<<1; |
| 696 | d += c->stride[i]<<1; |
| 697 | } |
| 698 | } |
| 699 | } |
| 700 | |
| 701 | void pullup_pack_frame(struct pullup_context *c, struct pullup_frame *fr) |
| 702 | { |
| 703 | int i; |
| 704 | int par = fr->parity; |
| 705 | if (fr->buffer) return; |
| 706 | if (fr->length < 2) return; /* FIXME: deal with this */ |
| 707 | for (i = 0; i < 2; i++) |
| 708 | { |
| 709 | if (fr->ofields[i]->lock[i^1]) continue; |
| 710 | fr->buffer = fr->ofields[i]; |
| 711 | pullup_lock_buffer(fr->buffer, 2); |
| 712 | copy_field(c, fr->buffer, fr->ofields[i^1], i^1); |
| 713 | return; |
| 714 | } |
| 715 | fr->buffer = pullup_get_buffer(c, 2); |
| 716 | copy_field(c, fr->buffer, fr->ofields[0], 0); |
| 717 | copy_field(c, fr->buffer, fr->ofields[1], 1); |
| 718 | } |
| 719 | |
| 720 | void pullup_release_frame(struct pullup_frame *fr) |
| 721 | { |
| 722 | int i; |
| 723 | for (i = 0; i < fr->length; i++) |
| 724 | pullup_release_buffer(fr->ifields[i], fr->parity ^ (i&1)); |
| 725 | pullup_release_buffer(fr->ofields[0], 0); |
| 726 | pullup_release_buffer(fr->ofields[1], 1); |
| 727 | if (fr->buffer) pullup_release_buffer(fr->buffer, 2); |
| 728 | fr->lock--; |
| 729 | } |
| 730 | |
| 731 | |
| 732 | |
| 733 | |
| 734 | |
| 735 | |
| 736 | struct pullup_context *pullup_alloc_context(void) |
| 737 | { |
| 738 | struct pullup_context *c; |
| 739 | |
| 740 | c = calloc(1, sizeof(struct pullup_context)); |
| 741 | |
| 742 | return c; |
| 743 | } |
| 744 | |
| 745 | void pullup_preinit_context(struct pullup_context *c) |
| 746 | { |
| 747 | c->bpp = calloc(c->nplanes, sizeof(int)); |
| 748 | c->w = calloc(c->nplanes, sizeof(int)); |
| 749 | c->h = calloc(c->nplanes, sizeof(int)); |
| 750 | c->stride = calloc(c->nplanes, sizeof(int)); |
| 751 | c->background = calloc(c->nplanes, sizeof(int)); |
| 752 | } |
| 753 | |
| 754 | void pullup_init_context(struct pullup_context *c) |
| 755 | { |
| 756 | int mp = c->metric_plane; |
| 757 | if (c->nbuffers < 10) c->nbuffers = 10; |
| 758 | c->buffers = calloc(c->nbuffers, sizeof (struct pullup_buffer)); |
| 759 | |
| 760 | c->metric_w = (c->w[mp] - ((c->junk_left + c->junk_right) << 3)) >> 3; |
| 761 | c->metric_h = (c->h[mp] - ((c->junk_top + c->junk_bottom) << 1)) >> 3; |
| 762 | c->metric_offset = c->junk_left*c->bpp[mp] + (c->junk_top<<1)*c->stride[mp]; |
| 763 | c->metric_len = c->metric_w * c->metric_h; |
| 764 | |
| 765 | c->head = make_field_queue(c, 8); |
| 766 | |
| 767 | c->frame = calloc(1, sizeof (struct pullup_frame)); |
| 768 | c->frame->ifields = calloc(3, sizeof (struct pullup_buffer *)); |
| 769 | |
| 770 | switch(c->format) { |
| 771 | case PULLUP_FMT_Y: |
| 772 | c->diff = diff_y; |
| 773 | c->comb = licomb_y; |
| 774 | c->var = var_y; |
| 775 | #ifdef HAVE_MMX |
| 776 | if (c->cpu & PULLUP_CPU_MMX) { |
| 777 | c->diff = diff_y_mmx; |
| 778 | c->comb = licomb_y_mmx; |
| 779 | c->var = var_y_mmx; |
| 780 | } |
| 781 | #endif |
| 782 | /* c->comb = qpcomb_y; */ |
| 783 | break; |
| 784 | #if 0 |
| 785 | case PULLUP_FMT_YUY2: |
| 786 | c->diff = diff_yuy2; |
| 787 | break; |
| 788 | case PULLUP_FMT_RGB32: |
| 789 | c->diff = diff_rgb32; |
| 790 | break; |
| 791 | #endif |
| 792 | } |
| 793 | } |
| 794 | |
| 795 | void pullup_free_context(struct pullup_context *c) |
| 796 | { |
| 797 | struct pullup_field *f; |
| 798 | free(c->buffers); |
| 799 | f = c->head; |
| 800 | do { |
| 801 | free(f->diffs); |
| 802 | free(f->comb); |
| 803 | f = f->next; |
| 804 | free(f->prev); |
| 805 | } while (f != c->head); |
| 806 | free(c->frame); |
| 807 | free(c); |
| 808 | } |
| 809 | |
| 810 | |
| 811 | |
| 812 | |
| 813 | |
| 814 | |
| 815 | |
| 816 | |
| 817 | |