Changeset 6c50d21cebc1c6c809e56400f4cc5ed7aa6856a4
- Timestamp:
- 26/03/08 07:28:36
(8 months ago)
- Author:
- Rafaël Carré <funman@videolan.org>
- git-committer:
- Rafaël Carré <funman@videolan.org> 1206512916 +0100
- git-parent:
[df95113d3e501442e5362c6d763e2decb6ec0211]
- git-author:
- Rafaël Carré <funman@videolan.org> 1206512916 +0100
- Message:
let gcc choose how to reference memory addresses in i420_rgx mmx asm
patch by Alexis Ballier, fix #1383
-
Files:
-
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
| r6ee1e19 |
r6c50d21 |
|
| 35 | 35 | static const uint64_t foo __asm__ (#foo) __attribute__((unused)) |
|---|
| 36 | 36 | #endif |
|---|
| 37 | | USED_U64(mmx_80w) = 0x0080008000800080ULL; |
|---|
| 38 | | USED_U64(mmx_10w) = 0x1010101010101010ULL; |
|---|
| 39 | | USED_U64(mmx_00ffw) = 0x00ff00ff00ff00ffULL; |
|---|
| 40 | | USED_U64(mmx_Y_coeff) = 0x253f253f253f253fULL; |
|---|
| 41 | | |
|---|
| 42 | | USED_U64(mmx_U_green) = 0xf37df37df37df37dULL; |
|---|
| 43 | | USED_U64(mmx_U_blue) = 0x4093409340934093ULL; |
|---|
| 44 | | USED_U64(mmx_V_red) = 0x3312331233123312ULL; |
|---|
| 45 | | USED_U64(mmx_V_green) = 0xe5fce5fce5fce5fcULL; |
|---|
| 46 | | |
|---|
| 47 | | USED_U64(mmx_mask_f8) = 0xf8f8f8f8f8f8f8f8ULL; |
|---|
| 48 | | USED_U64(mmx_mask_fc) = 0xfcfcfcfcfcfcfcfcULL; |
|---|
| | 37 | USED_U64(mmx_80w) = 0x0080008000800080ULL; /* Will be referenced as %4 |
|---|
| | 38 | * in inline asm */ |
|---|
| | 39 | USED_U64(mmx_10w) = 0x1010101010101010ULL; /* -- as %5 */ |
|---|
| | 40 | USED_U64(mmx_00ffw) = 0x00ff00ff00ff00ffULL; /* -- as %6 */ |
|---|
| | 41 | USED_U64(mmx_Y_coeff) = 0x253f253f253f253fULL; /* -- as %7 */ |
|---|
| | 42 | |
|---|
| | 43 | USED_U64(mmx_U_green) = 0xf37df37df37df37dULL; /* -- as %8 */ |
|---|
| | 44 | USED_U64(mmx_U_blue) = 0x4093409340934093ULL; /* -- as %9 */ |
|---|
| | 45 | USED_U64(mmx_V_red) = 0x3312331233123312ULL; /* -- as %10 */ |
|---|
| | 46 | USED_U64(mmx_V_green) = 0xe5fce5fce5fce5fcULL; /* -- as %11 */ |
|---|
| | 47 | |
|---|
| | 48 | USED_U64(mmx_mask_f8) = 0xf8f8f8f8f8f8f8f8ULL; /* -- as %12 */ |
|---|
| | 49 | USED_U64(mmx_mask_fc) = 0xfcfcfcfcfcfcfcfcULL; /* -- as %13 */ |
|---|
| 49 | 50 | #undef USED_U64 |
|---|
| 50 | 51 | |
|---|
| … | … | |
| 60 | 61 | : \ |
|---|
| 61 | 62 | : "r" (p_y), "r" (p_u), \ |
|---|
| 62 | | "r" (p_v), "r" (p_buffer) ); \ |
|---|
| | 63 | "r" (p_v), "r" (p_buffer), \ |
|---|
| | 64 | "m" (mmx_80w), "m" (mmx_10w), \ |
|---|
| | 65 | "m" (mmx_00ffw), "m" (mmx_Y_coeff), \ |
|---|
| | 66 | "m" (mmx_U_green), "m" (mmx_U_blue), \ |
|---|
| | 67 | "m" (mmx_V_red), "m" (mmx_V_green), \ |
|---|
| | 68 | "m" (mmx_mask_f8), "m" (mmx_mask_fc) ); \ |
|---|
| 63 | 69 | } while(0) |
|---|
| 64 | 70 | |
|---|
| 65 | 71 | #define MMX_END __asm__ __volatile__ ( "emms" ) |
|---|
| 66 | | |
|---|
| 67 | | /* Use RIP-relative code in PIC mode on amd64 */ |
|---|
| 68 | | #if defined(__x86_64__) && defined(__PIC__) |
|---|
| 69 | | # define G "(%%rip)" |
|---|
| 70 | | #else |
|---|
| 71 | | # define G |
|---|
| 72 | | #endif |
|---|
| 73 | 72 | |
|---|
| 74 | 73 | #define MMX_INIT_16 " \n\ |
|---|
| … | … | |
| 104 | 103 | punpcklbw %%mm4, %%mm0 # scatter 4 Cb 00 u3 00 u2 00 u1 00 u0 \n\ |
|---|
| 105 | 104 | punpcklbw %%mm4, %%mm1 # scatter 4 Cr 00 v3 00 v2 00 v1 00 v0 \n\ |
|---|
| 106 | | psubsw mmx_80w"G", %%mm0 # Cb -= 128 \n\ |
|---|
| 107 | | psubsw mmx_80w"G", %%mm1 # Cr -= 128 \n\ |
|---|
| | 105 | psubsw %4, %%mm0 # Cb -= 128 \n\ |
|---|
| | 106 | psubsw %4, %%mm1 # Cr -= 128 \n\ |
|---|
| 108 | 107 | psllw $3, %%mm0 # Promote precision \n\ |
|---|
| 109 | 108 | psllw $3, %%mm1 # Promote precision \n\ |
|---|
| 110 | 109 | movq %%mm0, %%mm2 # Copy 4 Cb 00 u3 00 u2 00 u1 00 u0 \n\ |
|---|
| 111 | 110 | movq %%mm1, %%mm3 # Copy 4 Cr 00 v3 00 v2 00 v1 00 v0 \n\ |
|---|
| 112 | | pmulhw mmx_U_green"G", %%mm2 # Mul Cb with green coeff -> Cb green \n\ |
|---|
| 113 | | pmulhw mmx_V_green"G", %%mm3 # Mul Cr with green coeff -> Cr green \n\ |
|---|
| 114 | | pmulhw mmx_U_blue"G", %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0 \n\ |
|---|
| 115 | | pmulhw mmx_V_red"G", %%mm1 # Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0 \n\ |
|---|
| | 111 | pmulhw %8, %%mm2 # Mul Cb with green coeff -> Cb green \n\ |
|---|
| | 112 | pmulhw %11, %%mm3 # Mul Cr with green coeff -> Cr green \n\ |
|---|
| | 113 | pmulhw %9, %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0 \n\ |
|---|
| | 114 | pmulhw %10, %%mm1 # Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0 \n\ |
|---|
| 116 | 115 | paddsw %%mm3, %%mm2 # Cb green + Cr green -> Cgreen \n\ |
|---|
| 117 | 116 | \n\ |
|---|
| 118 | 117 | # convert the luma part \n\ |
|---|
| 119 | | psubusb mmx_10w"G", %%mm6 # Y -= 16 \n\ |
|---|
| | 118 | psubusb %5, %%mm6 # Y -= 16 \n\ |
|---|
| 120 | 119 | movq %%mm6, %%mm7 # Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\ |
|---|
| 121 | | pand mmx_00ffw"G", %%mm6 # get Y even 00 Y6 00 Y4 00 Y2 00 Y0 \n\ |
|---|
| | 120 | pand %6, %%mm6 # get Y even 00 Y6 00 Y4 00 Y2 00 Y0 \n\ |
|---|
| 122 | 121 | psrlw $8, %%mm7 # get Y odd 00 Y7 00 Y5 00 Y3 00 Y1 \n\ |
|---|
| 123 | 122 | psllw $3, %%mm6 # Promote precision \n\ |
|---|
| 124 | 123 | psllw $3, %%mm7 # Promote precision \n\ |
|---|
| 125 | | pmulhw mmx_Y_coeff"G", %%mm6 # Mul 4 Y even 00 y6 00 y4 00 y2 00 y0 \n\ |
|---|
| 126 | | pmulhw mmx_Y_coeff"G", %%mm7 # Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1 \n\ |
|---|
| | 124 | pmulhw %7, %%mm6 # Mul 4 Y even 00 y6 00 y4 00 y2 00 y0 \n\ |
|---|
| | 125 | pmulhw %7, %%mm7 # Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1 \n\ |
|---|
| 127 | 126 | " |
|---|
| 128 | 127 | |
|---|
| … | … | |
| 169 | 168 | #define MMX_YUV_GRAY " \n\ |
|---|
| 170 | 169 | # convert the luma part \n\ |
|---|
| 171 | | psubusb mmx_10w"G", %%mm6 \n\ |
|---|
| | 170 | psubusb %5, %%mm6 \n\ |
|---|
| 172 | 171 | movq %%mm6, %%mm7 \n\ |
|---|
| 173 | | pand mmx_00ffw"G", %%mm6 \n\ |
|---|
| | 172 | pand %6, %%mm6 \n\ |
|---|
| 174 | 173 | psrlw $8, %%mm7 \n\ |
|---|
| 175 | 174 | psllw $3, %%mm6 \n\ |
|---|
| 176 | 175 | psllw $3, %%mm7 \n\ |
|---|
| 177 | | pmulhw mmx_Y_coeff"G", %%mm6 \n\ |
|---|
| 178 | | pmulhw mmx_Y_coeff"G", %%mm7 \n\ |
|---|
| | 176 | pmulhw %7, %%mm6 \n\ |
|---|
| | 177 | pmulhw %7, %%mm7 \n\ |
|---|
| 179 | 178 | packuswb %%mm6, %%mm6 \n\ |
|---|
| 180 | 179 | packuswb %%mm7, %%mm7 \n\ |
|---|
| … | … | |
| 184 | 183 | #define MMX_UNPACK_16_GRAY " \n\ |
|---|
| 185 | 184 | movq %%mm6, %%mm5 \n\ |
|---|
| 186 | | pand mmx_mask_f8"G", %%mm6 \n\ |
|---|
| 187 | | pand mmx_mask_fc"G", %%mm5 \n\ |
|---|
| | 185 | pand %12, %%mm6 \n\ |
|---|
| | 186 | pand %13, %%mm5 \n\ |
|---|
| 188 | 187 | movq %%mm6, %%mm7 \n\ |
|---|
| 189 | 188 | psrlw $3, %%mm7 \n\ |
|---|
| … | … | |
| 214 | 213 | #define MMX_UNPACK_15 " \n\ |
|---|
| 215 | 214 | # mask unneeded bits off \n\ |
|---|
| 216 | | pand mmx_mask_f8"G", %%mm0 # b7b6b5b4 b3______ b7b6b5b4 b3______ \n\ |
|---|
| | 215 | pand %12, %%mm0 # b7b6b5b4 b3______ b7b6b5b4 b3______ \n\ |
|---|
| 217 | 216 | psrlw $3,%%mm0 # ______b7 b6b5b4b3 ______b7 b6b5b4b3 \n\ |
|---|
| 218 | | pand mmx_mask_f8"G", %%mm2 # g7g6g5g4 g3______ g7g6g5g4 g3______ \n\ |
|---|
| 219 | | pand mmx_mask_f8"G", %%mm1 # r7r6r5r4 r3______ r7r6r5r4 r3______ \n\ |
|---|
| | 217 | pand %12, %%mm2 # g7g6g5g4 g3______ g7g6g5g4 g3______ \n\ |
|---|
| | 218 | pand %12, %%mm1 # r7r6r5r4 r3______ r7r6r5r4 r3______ \n\ |
|---|
| 220 | 219 | psrlw $1,%%mm1 # __r7r6r5 r4r3____ __r7r6r5 r4r3____ \n\ |
|---|
| 221 | 220 | pxor %%mm4, %%mm4 # zero mm4 \n\ |
|---|
| … | … | |
| 250 | 249 | #define MMX_UNPACK_16 " \n\ |
|---|
| 251 | 250 | # mask unneeded bits off \n\ |
|---|
| 252 | | pand mmx_mask_f8"G", %%mm0 # b7b6b5b4 b3______ b7b6b5b4 b3______ \n\ |
|---|
| 253 | | pand mmx_mask_fc"G", %%mm2 # g7g6g5g4 g3g2____ g7g6g5g4 g3g2____ \n\ |
|---|
| 254 | | pand mmx_mask_f8"G", %%mm1 # r7r6r5r4 r3______ r7r6r5r4 r3______ \n\ |
|---|
| | 251 | pand %12, %%mm0 # b7b6b5b4 b3______ b7b6b5b4 b3______ \n\ |
|---|
| | 252 | pand %13, %%mm2 # g7g6g5g4 g3g2____ g7g6g5g4 g3g2____ \n\ |
|---|
| | 253 | pand %12, %%mm1 # r7r6r5r4 r3______ r7r6r5r4 r3______ \n\ |
|---|
| 255 | 254 | psrlw $3,%%mm0 # ______b7 b6b5b4b3 ______b7 b6b5b4b3 \n\ |
|---|
| 256 | 255 | pxor %%mm4, %%mm4 # zero mm4 \n\ |
|---|