| 348 | | Index: libavcodec/i386/flacdsp_mmx.c |
|---|
| 349 | | =================================================================== |
|---|
| 350 | | --- libavcodec/i386/flacdsp_mmx.c (revision 11270) |
|---|
| 351 | | +++ libavcodec/i386/flacdsp_mmx.c (working copy) |
|---|
| 352 | | @@ -26,7 +26,6 @@ |
|---|
| 353 | | double c = 2.0 / (len-1.0); |
|---|
| 354 | | int n2 = len>>1; |
|---|
| 355 | | long i = -n2*sizeof(int32_t); |
|---|
| 356 | | - long j = n2*sizeof(int32_t); |
|---|
| 357 | | asm volatile( |
|---|
| 358 | | "movsd %0, %%xmm7 \n\t" |
|---|
| 359 | | "movapd %1, %%xmm6 \n\t" |
|---|
| 360 | | @@ -44,17 +43,18 @@ |
|---|
| 361 | | "movapd %%xmm6, %%xmm0 \n\t"\ |
|---|
| 362 | | "subpd %%xmm1, %%xmm0 \n\t"\ |
|---|
| 363 | | "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ |
|---|
| 364 | | - "cvtpi2pd (%4,%0), %%xmm2 \n\t"\ |
|---|
| 365 | | - "cvtpi2pd (%5,%1), %%xmm3 \n\t"\ |
|---|
| 366 | | + "cvtpi2pd (%3,%0), %%xmm2 \n\t"\ |
|---|
| 367 | | "mulpd %%xmm0, %%xmm2 \n\t"\ |
|---|
| 368 | | + "movapd %%xmm2, (%1,%0,2) \n\t"\ |
|---|
| 369 | | + "negl %0\n\t"\ |
|---|
| 370 | | + "cvtpi2pd (%4,%0), %%xmm3 \n\t"\ |
|---|
| 371 | | "mulpd %%xmm1, %%xmm3 \n\t"\ |
|---|
| 372 | | - "movapd %%xmm2, (%2,%0,2) \n\t"\ |
|---|
| 373 | | - MOVPD" %%xmm3, (%3,%1,2) \n\t"\ |
|---|
| 374 | | + MOVPD" %%xmm3, (%2,%0,2) \n\t"\ |
|---|
| 375 | | "subpd %%xmm5, %%xmm7 \n\t"\ |
|---|
| 376 | | - "sub $8, %1 \n\t"\ |
|---|
| 377 | | + "negl %0\n\t"\ |
|---|
| 378 | | "add $8, %0 \n\t"\ |
|---|
| 379 | | "jl 1b \n\t"\ |
|---|
| 380 | | - :"+&r"(i), "+&r"(j)\ |
|---|
| 381 | | + :"+&r"(i)\ |
|---|
| 382 | | :"r"(w_data+n2), "r"(w_data+len-2-n2),\ |
|---|
| 383 | | "r"(data+n2), "r"(data+len-2-n2)\ |
|---|
| 384 | | ); |
|---|
| 385 | | @@ -85,9 +85,11 @@ |
|---|
| 386 | | long i = -len*sizeof(double); |
|---|
| 387 | | if(j == lag-2) { |
|---|
| 388 | | asm volatile( |
|---|
| 389 | | - "movsd %6, %%xmm0 \n\t" |
|---|
| 390 | | - "movsd %6, %%xmm1 \n\t" |
|---|
| 391 | | - "movsd %6, %%xmm2 \n\t" |
|---|
| 392 | | + "movsd %0, %%xmm0 \n\t" |
|---|
| 393 | | + "movsd %0, %%xmm1 \n\t" |
|---|
| 394 | | + "movsd %0, %%xmm2 \n\t" |
|---|
| 395 | | + :: "m"(*ff_pd_1) ); |
|---|
| 396 | | + asm volatile( |
|---|
| 397 | | "1: \n\t" |
|---|
| 398 | | "movapd (%4,%0), %%xmm3 \n\t" |
|---|
| 399 | | "movupd -8(%5,%0), %%xmm4 \n\t" |
|---|
| 400 | | @@ -110,12 +112,14 @@ |
|---|
| 401 | | "movsd %%xmm1, %2 \n\t" |
|---|
| 402 | | "movsd %%xmm2, %3 \n\t" |
|---|
| 403 | | :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) |
|---|
| 404 | | - :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1) |
|---|
| 405 | | + :"r"(data1+len), "r"(data1+len-j) |
|---|
| 406 | | ); |
|---|
| 407 | | } else { |
|---|
| 408 | | asm volatile( |
|---|
| 409 | | - "movsd %5, %%xmm0 \n\t" |
|---|
| 410 | | - "movsd %5, %%xmm1 \n\t" |
|---|
| 411 | | + "movsd %0, %%xmm0 \n\t" |
|---|
| 412 | | + "movsd %0, %%xmm1 \n\t" |
|---|
| 413 | | + :: "m"(*ff_pd_1) ); |
|---|
| 414 | | + asm volatile( |
|---|
| 415 | | "1: \n\t" |
|---|
| 416 | | "movapd (%3,%0), %%xmm3 \n\t" |
|---|
| 417 | | "movupd -8(%4,%0), %%xmm4 \n\t" |
|---|
| 418 | | @@ -132,7 +136,7 @@ |
|---|
| 419 | | "movsd %%xmm0, %1 \n\t" |
|---|
| 420 | | "movsd %%xmm1, %2 \n\t" |
|---|
| 421 | | :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) |
|---|
| 422 | | - :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1) |
|---|
| 423 | | + :"r"(data1+len), "r"(data1+len-j) |
|---|
| 424 | | ); |
|---|
| 425 | | } |
|---|
| 426 | | } |
|---|