Changeset df3b5eec76004d2dcfbabc36ac65f64755bdbc33
- Timestamp:
- 08/15/07 18:15:45 (11 months ago)
- git-parent:
- Files:
-
- AUTHORS (modified) (1 diff)
- NEWS (modified) (1 diff)
- modules/video_chroma/i420_yuy2.h (modified) (3 diffs)
- modules/video_chroma/i422_yuy2.c (modified) (3 diffs)
- modules/video_chroma/i422_yuy2.h (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
AUTHORS
r7bcb462 rdf3b5ee 191 191 D: ActiveX control 192 192 D: Safari/Firefox plugin for MacOS X 193 D: Direct3D Video output 194 D: SSE2 chroma converters 195 D: improved MMX chroma converters 193 196 S: Ireland 194 197 NEWS
r360c0d9 rdf3b5ee 82 82 * Fix random characters problem in RSS filter. 83 83 * Add rotate-deciangle for more precision on rotate filter 84 * Support for Intel SSE2 intruction set in chroma converters 85 * Improved use of Intel MMX intruction set in chroma converters 84 86 85 87 Audio output modules/video_chroma/i420_yuy2.h
r5e4dc54 rdf3b5ee 367 367 xmm1 = _mm_loadl_epi64((__m128i *)p_u); \ 368 368 xmm2 = _mm_loadl_epi64((__m128i *)p_v); \ 369 xmm0 = _mm_load _si128((__m128i *)p_y1);\370 xmm3 = _mm_load _si128((__m128i *)p_y2);\369 xmm0 = _mm_loadu_si128((__m128i *)p_y1); \ 370 xmm3 = _mm_loadu_si128((__m128i *)p_y2); \ 371 371 _mm_prefetch(p_line1, _MM_HINT_NTA); \ 372 372 _mm_prefetch(p_line2, _MM_HINT_NTA); \ … … 403 403 xmm1 = _mm_loadl_epi64((__m128i *)p_v); \ 404 404 xmm2 = _mm_loadl_epi64((__m128i *)p_u); \ 405 xmm0 = _mm_load _si128((__m128i *)p_y1);\406 xmm3 = _mm_load _si128((__m128i *)p_y2);\405 xmm0 = _mm_loadu_si128((__m128i *)p_y1); \ 406 xmm3 = _mm_loadu_si128((__m128i *)p_y2); \ 407 407 _mm_prefetch(p_line1, _MM_HINT_NTA); \ 408 408 _mm_prefetch(p_line2, _MM_HINT_NTA); \ … … 440 440 xmm1 = _mm_loadl_epi64((__m128i *)p_u); \ 441 441 xmm2 = _mm_loadl_epi64((__m128i *)p_v); \ 442 xmm0 = _mm_load _si128((__m128i *)p_y1);\443 xmm3 = _mm_load _si128((__m128i *)p_y2);\442 xmm0 = _mm_loadu_si128((__m128i *)p_y1); \ 443 xmm3 = _mm_loadu_si128((__m128i *)p_y2); \ 444 444 _mm_prefetch(p_line1, _MM_HINT_NTA); \ 445 445 _mm_prefetch(p_line2, _MM_HINT_NTA); \ modules/video_chroma/i422_yuy2.c
r93a719b rdf3b5ee 443 443 int i_x, i_y; 444 444 445 const int i_source_margin = p_source->p[0].i_pitch 446 - p_source->p[0].i_visible_pitch; 447 const int i_source_margin_c = p_source->p[1].i_pitch 448 - p_source->p[1].i_visible_pitch; 449 const int i_dest_margin = p_dest->p->i_pitch 450 - p_dest->p->i_visible_pitch; 451 452 #if defined (MODULE_NAME_IS_i422_yuy2_sse2) 453 454 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch| 455 ((int)p_line|(int)p_y))) ) 456 { 457 /* use faster SSE2 aligned fetch and store */ 458 for( i_y = p_vout->render.i_height ; i_y-- ; ) 459 { 460 p_line -= 2 * p_dest->p->i_pitch; 461 462 for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) 463 { 464 SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED ); 465 } 466 for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) 467 { 468 C_YUV422_UYVY( p_line, p_y, p_u, p_v ); 469 } 470 p_y += i_source_margin; 471 p_u += i_source_margin_c; 472 p_v += i_source_margin_c; 473 p_line += i_dest_margin; 474 } 475 } 476 else { 477 /* use slower SSE2 unaligned fetch and store */ 478 for( i_y = p_vout->render.i_height ; i_y-- ; ) 479 { 480 p_line -= 2 * p_dest->p->i_pitch; 481 482 for( i_x = p_vout->render.i_width / 16 ; i_x-- ; ) 483 { 484 SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED ); 485 } 486 for( i_x = ( p_vout->render.i_width % 16 ) / 2; i_x-- ; ) 487 { 488 C_YUV422_UYVY( p_line, p_y, p_u, p_v ); 489 } 490 p_y += i_source_margin; 491 p_u += i_source_margin_c; 492 p_v += i_source_margin_c; 493 p_line += i_dest_margin; 494 } 495 } 496 SSE2_END; 497 498 #else 499 445 500 for( i_y = p_vout->render.i_height ; i_y-- ; ) 446 501 { … … 458 513 #endif 459 514 } 515 p_y += i_source_margin; 516 p_u += i_source_margin_c; 517 p_v += i_source_margin_c; 518 p_line += i_dest_margin; 460 519 } 461 520 #if defined (MODULE_NAME_IS_i422_yuy2_mmx) … … 463 522 #elif defined (MODULE_NAME_IS_i422_yuy2_sse2) 464 523 SSE2_END; 524 #endif 525 465 526 #endif 466 527 } modules/video_chroma/i422_yuy2.h
r262b177 rdf3b5ee 234 234 #include <emmintrin.h> 235 235 236 #define SSE2_CALL(SSE2_INSTRUCTIONS) \ 237 do { \ 238 __m128i xmm0, xmm1, xmm2; \ 239 SSE2_INSTRUCTIONS \ 240 p_line += 32; p_y += 16; \ 241 p_u += 8; p_v += 8; \ 242 } while(0) 236 243 237 244 #define SSE2_END _mm_sfence() 245 246 #define SSE2_YUV422_YUYV_ALIGNED \ 247 xmm0 = _mm_load_si128((__m128i *)p_y); \ 248 xmm1 = _mm_loadl_epi64((__m128i *)p_u); \ 249 xmm2 = _mm_loadl_epi64((__m128i *)p_v); \ 250 xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \ 251 xmm2 = xmm0; \ 252 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ 253 _mm_stream_si128((__m128i*)(p_line), xmm2); \ 254 xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \ 255 _mm_stream_si128((__m128i*)(p_line+16), xmm0); 256 257 #define SSE2_YUV422_YUYV_UNALIGNED \ 258 xmm0 = _mm_loadu_si128((__m128i *)p_y); \ 259 xmm1 = _mm_loadl_epi64((__m128i *)p_u); \ 260 xmm2 = _mm_loadl_epi64((__m128i *)p_v); \ 261 xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \ 262 xmm2 = xmm0; \ 263 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ 264 _mm_storeu_si128((__m128i*)(p_line), xmm2); \ 265 xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \ 266 _mm_storeu_si128((__m128i*)(p_line+16), xmm0); 267 268 #define SSE2_YUV422_YVYU_ALIGNED \ 269 xmm0 = _mm_load_si128((__m128i *)p_y); \ 270 xmm2 = _mm_loadl_epi64((__m128i *)p_u); \ 271 xmm1 = _mm_loadl_epi64((__m128i *)p_v); \ 272 xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \ 273 xmm2 = xmm0; \ 274 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ 275 _mm_stream_si128((__m128i*)(p_line), xmm2); \ 276 xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \ 277 _mm_stream_si128((__m128i*)(p_line+16), xmm0); 278 279 #define SSE2_YUV422_YVYU_UNALIGNED \ 280 xmm0 = _mm_loadu_si128((__m128i *)p_y); \ 281 xmm2 = _mm_loadl_epi64((__m128i *)p_u); \ 282 xmm1 = _mm_loadl_epi64((__m128i *)p_v); \ 283 xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \ 284 xmm2 = xmm0; \ 285 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ 286 _mm_storeu_si128((__m128i*)(p_line), xmm2); \ 287 xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \ 288 _mm_storeu_si128((__m128i*)(p_line+16), xmm0); 289 290 #define SSE2_YUV422_UYVY_ALIGNED \ 291 xmm0 = _mm_load_si128((__m128i *)p_y); \ 292 xmm1 = _mm_loadl_epi64((__m128i *)p_u); \ 293 xmm2 = _mm_loadl_epi64((__m128i *)p_v); \ 294 xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \ 295 xmm2 = xmm1; \ 296 xmm2 = _mm_unpacklo_epi8(xmm2, xmm0); \ 297 _mm_stream_si128((__m128i*)(p_line), xmm2); \ 298 xmm1 = _mm_unpackhi_epi8(xmm1, xmm0); \ 299 _mm_stream_si128((__m128i*)(p_line+16), xmm1); 300 301 #define SSE2_YUV422_UYVY_UNALIGNED \ 302 xmm0 = _mm_loadu_si128((__m128i *)p_y); \ 303 xmm1 = _mm_loadl_epi64((__m128i *)p_u); \ 304 xmm2 = _mm_loadl_epi64((__m128i *)p_v); \ 305 xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \ 306 xmm2 = xmm1; \ 307 xmm2 = _mm_unpacklo_epi8(xmm2, xmm0); \ 308 _mm_storeu_si128((__m128i*)(p_line), xmm2); \ 309 xmm1 = _mm_unpackhi_epi8(xmm1, xmm0); \ 310 _mm_storeu_si128((__m128i*)(p_line+16), xmm1); 238 311 239 312 #endif
