| 93 | | movq_r2r (mm3, mm4); // copy 8 src2 bytes |
|---|
| 94 | | |
|---|
| 95 | | punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes |
|---|
| 96 | | punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes |
|---|
| 97 | | |
|---|
| 98 | | punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes |
|---|
| 99 | | punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes |
|---|
| 100 | | |
|---|
| 101 | | paddw_r2r (mm3, mm1); // add lows to mm1 |
|---|
| 102 | | paddw_m2r (round1, mm1); |
|---|
| 103 | | psraw_i2r (1, mm1); // /2 |
|---|
| 104 | | |
|---|
| 105 | | paddw_r2r (mm4, mm2); // add highs to mm2 |
|---|
| 106 | | paddw_m2r (round1, mm2); |
|---|
| 107 | | psraw_i2r (1, mm2); // /2 |
|---|
| 108 | | |
|---|
| 109 | | packuswb_r2r (mm2, mm1); // pack (w/ saturation) |
|---|
| 110 | | movq_r2m (mm1, *dest); // store result in dest |
|---|
| | 98 | por_r2r (mm3, mm2); |
|---|
| | 99 | psrlq_i2r (1, mm3); |
|---|
| | 100 | pand_m2r (mask7f, mm3); |
|---|
| | 101 | |
|---|
| | 102 | paddb_r2r (mm1, mm3); |
|---|
| | 103 | pand_m2r (mask1, mm2); |
|---|
| | 104 | paddb_r2r (mm3, mm2); |
|---|
| | 105 | movq_r2m (mm2, *dest); // store result in dest |
|---|
| 409 | | static inline void MC_avg_xy_mmx (int width, int height, |
|---|
| 410 | | yuv_data_t * dest, yuv_data_t * ref, int stride) |
|---|
| 411 | | { |
|---|
| 412 | | yuv_data_t * ref_next = ref+stride; |
|---|
| 413 | | |
|---|
| 414 | | mmx_zero_reg (); |
|---|
| 415 | | |
|---|
| 416 | | do { |
|---|
| 417 | | mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); |
|---|
| 418 | | |
|---|
| 419 | | if (width == 16) |
|---|
| 420 | | mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, |
|---|
| 421 | | ref_next+8, ref_next+9); |
|---|
| 422 | | |
|---|
| 423 | | dest += stride; |
|---|
| 424 | | ref += stride; |
|---|
| 425 | | ref_next += stride; |
|---|
| | 404 | static inline void MC_avg_xy_8wide_mmx (int height, yuv_data_t * dest, |
|---|
| | 405 | yuv_data_t * ref, int stride) |
|---|
| | 406 | { |
|---|
| | 407 | pxor_r2r (mm0, mm0); |
|---|
| | 408 | movq_m2r (round4, mm7); |
|---|
| | 409 | |
|---|
| | 410 | movq_m2r (*ref, mm1); // calculate first row ref[0] + ref[1] |
|---|
| | 411 | movq_r2r (mm1, mm2); |
|---|
| | 412 | |
|---|
| | 413 | punpcklbw_r2r (mm0, mm1); |
|---|
| | 414 | punpckhbw_r2r (mm0, mm2); |
|---|
| | 415 | |
|---|
| | 416 | movq_m2r (*(ref+1), mm3); |
|---|
| | 417 | movq_r2r (mm3, mm4); |
|---|
| | 418 | |
|---|
| | 419 | punpcklbw_r2r (mm0, mm3); |
|---|
| | 420 | punpckhbw_r2r (mm0, mm4); |
|---|
| | 421 | |
|---|
| | 422 | paddw_r2r (mm3, mm1); |
|---|
| | 423 | paddw_r2r (mm4, mm2); |
|---|
| | 424 | |
|---|
| | 425 | ref += stride; |
|---|
| | 426 | |
|---|
| | 427 | do { |
|---|
| | 428 | |
|---|
| | 429 | movq_m2r (*ref, mm5); // calculate next row ref[0] + ref[1] |
|---|
| | 430 | movq_r2r (mm5, mm6); |
|---|
| | 431 | |
|---|
| | 432 | punpcklbw_r2r (mm0, mm5); |
|---|
| | 433 | punpckhbw_r2r (mm0, mm6); |
|---|
| | 434 | |
|---|
| | 435 | movq_m2r (*(ref+1), mm3); |
|---|
| | 436 | movq_r2r (mm3, mm4); |
|---|
| | 437 | |
|---|
| | 438 | punpcklbw_r2r (mm0, mm3); |
|---|
| | 439 | punpckhbw_r2r (mm0, mm4); |
|---|
| | 440 | |
|---|
| | 441 | paddw_r2r (mm3, mm5); |
|---|
| | 442 | paddw_r2r (mm4, mm6); |
|---|
| | 443 | |
|---|
| | 444 | movq_r2r (mm7, mm3); // calculate round4 + previous row + current row |
|---|
| | 445 | movq_r2r (mm7, mm4); |
|---|
| | 446 | |
|---|
| | 447 | paddw_r2r (mm1, mm3); |
|---|
| | 448 | paddw_r2r (mm2, mm4); |
|---|
| | 449 | |
|---|
| | 450 | paddw_r2r (mm5, mm3); |
|---|
| | 451 | paddw_r2r (mm6, mm4); |
|---|
| | 452 | |
|---|
| | 453 | psraw_i2r (2, mm3); // /4 |
|---|
| | 454 | psraw_i2r (2, mm4); // /4 |
|---|
| | 455 | |
|---|
| | 456 | movq_m2r (*dest, mm1); // calculate (subtotal + dest[0] + round1) / 2 |
|---|
| | 457 | movq_r2r (mm1, mm2); |
|---|
| | 458 | |
|---|
| | 459 | punpcklbw_r2r (mm0, mm1); |
|---|
| | 460 | punpckhbw_r2r (mm0, mm2); |
|---|
| | 461 | |
|---|
| | 462 | paddw_r2r (mm1, mm3); |
|---|
| | 463 | paddw_r2r (mm2, mm4); |
|---|
| | 464 | |
|---|
| | 465 | paddw_m2r (round1, mm3); |
|---|
| | 466 | paddw_m2r (round1, mm4); |
|---|
| | 467 | |
|---|
| | 468 | psraw_i2r (1, mm3); // /2 |
|---|
| | 469 | psraw_i2r (1, mm4); // /2 |
|---|
| | 470 | |
|---|
| | 471 | packuswb_r2r (mm4, mm3); // pack (w/ saturation) |
|---|
| | 472 | movq_r2m (mm3, *dest); // store result in dest |
|---|
| | 473 | |
|---|
| | 474 | movq_r2r (mm5, mm1); // remember current row for the next pass |
|---|
| | 475 | movq_r2r (mm6, mm2); |
|---|
| | 476 | |
|---|
| | 477 | ref += stride; |
|---|
| | 478 | dest += stride; |
|---|
| | 479 | |
|---|
| 438 | | MC_avg_xy_mmx (8, height, dest, ref, stride); |
|---|
| 439 | | } |
|---|
| 440 | | |
|---|
| 441 | | //----------------------------------------------------------------------- |
|---|
| 442 | | |
|---|
| 443 | | static inline void MC_put_xy_mmx (int width, int height, |
|---|
| 444 | | yuv_data_t * dest, yuv_data_t * ref, int stride) |
|---|
| 445 | | { |
|---|
| 446 | | yuv_data_t * ref_next = ref+stride; |
|---|
| 447 | | |
|---|
| 448 | | mmx_zero_reg (); |
|---|
| 449 | | |
|---|
| 450 | | do { |
|---|
| 451 | | mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); |
|---|
| 452 | | |
|---|
| 453 | | if (width == 16) |
|---|
| 454 | | mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); |
|---|
| 455 | | |
|---|
| 456 | | dest += stride; |
|---|
| 457 | | ref += stride; |
|---|
| 458 | | ref_next += stride; |
|---|
| | 493 | MC_avg_xy_8wide_mmx(height, dest, ref, stride); |
|---|
| | 494 | } |
|---|
| | 495 | |
|---|
| | 496 | //----------------------------------------------------------------------- |
|---|
| | 497 | |
|---|
| | 498 | static inline void MC_put_xy_8wide_mmx (int height, yuv_data_t * dest, |
|---|
| | 499 | yuv_data_t * ref, int stride) |
|---|
| | 500 | { |
|---|
| | 501 | pxor_r2r (mm0, mm0); |
|---|
| | 502 | movq_m2r (round4, mm7); |
|---|
| | 503 | |
|---|
| | 504 | movq_m2r (*ref, mm1); // calculate first row ref[0] + ref[1] |
|---|
| | 505 | movq_r2r (mm1, mm2); |
|---|
| | 506 | |
|---|
| | 507 | punpcklbw_r2r (mm0, mm1); |
|---|
| | 508 | punpckhbw_r2r (mm0, mm2); |
|---|
| | 509 | |
|---|
| | 510 | movq_m2r (*(ref+1), mm3); |
|---|
| | 511 | movq_r2r (mm3, mm4); |
|---|
| | 512 | |
|---|
| | 513 | punpcklbw_r2r (mm0, mm3); |
|---|
| | 514 | punpckhbw_r2r (mm0, mm4); |
|---|
| | 515 | |
|---|
| | 516 | paddw_r2r (mm3, mm1); |
|---|
| | 517 | paddw_r2r (mm4, mm2); |
|---|
| | 518 | |
|---|
| | 519 | ref += stride; |
|---|
| | 520 | |
|---|
| | 521 | do { |
|---|
| | 522 | |
|---|
| | 523 | movq_m2r (*ref, mm5); // calculate next row ref[0] + ref[1] |
|---|
| | 524 | movq_r2r (mm5, mm6); |
|---|
| | 525 | |
|---|
| | 526 | punpcklbw_r2r (mm0, mm5); |
|---|
| | 527 | punpckhbw_r2r (mm0, mm6); |
|---|
| | 528 | |
|---|
| | 529 | movq_m2r (*(ref+1), mm3); |
|---|
| | 530 | movq_r2r (mm3, mm4); |
|---|
| | 531 | |
|---|
| | 532 | punpcklbw_r2r (mm0, mm3); |
|---|
| | 533 | punpckhbw_r2r (mm0, mm4); |
|---|
| | 534 | |
|---|
| | 535 | paddw_r2r (mm3, mm5); |
|---|
| | 536 | paddw_r2r (mm4, mm6); |
|---|
| | 537 | |
|---|
| | 538 | movq_r2r (mm7, mm3); // calculate round4 + previous row + current row |
|---|
| | 539 | movq_r2r (mm7, mm4); |
|---|
| | 540 | |
|---|
| | 541 | paddw_r2r (mm1, mm3); |
|---|
| | 542 | paddw_r2r (mm2, mm4); |
|---|
| | 543 | |
|---|
| | 544 | paddw_r2r (mm5, mm3); |
|---|
| | 545 | paddw_r2r (mm6, mm4); |
|---|
| | 546 | |
|---|
| | 547 | psraw_i2r (2, mm3); // /4 |
|---|
| | 548 | psraw_i2r (2, mm4); // /4 |
|---|
| | 549 | |
|---|
| | 550 | packuswb_r2r (mm4, mm3); // pack (w/ saturation) |
|---|
| | 551 | movq_r2m (mm3, *dest); // store result in dest |
|---|
| | 552 | |
|---|
| | 553 | movq_r2r (mm5, mm1); // advance to the next row |
|---|
| | 554 | movq_r2r (mm6, mm2); |
|---|
| | 555 | |
|---|
| | 556 | ref += stride; |
|---|
| | 557 | dest += stride; |
|---|
| | 558 | |
|---|