Lines Matching refs:mm0

335                   movq       mm0,mask0
337 pand mm0,mm7 //nonzero if keep byte
338 pcmpeqb mm0,mm6 //zeros->1s, v versa
348 pand mm4,mm0
349 movq mm6,mm0
439 movq mm0,mask0
442 pand mm0,mm7
445 pcmpeqb mm0,mm6
456 pand mm4,mm0
457 movq mm6,mm0
558 movq mm0,mask0
562 pand mm0,mm7
566 pcmpeqb mm0,mm6
578 pand mm4,mm0
579 movq mm6,mm0
694 movq mm0,mask0
699 pand mm0,mm7
704 pcmpeqb mm0,mm6
718 pand mm4,mm0
719 movq mm6,mm0
839 movq mm0,mask0
846 pand mm0,mm7
853 pcmpeqb mm0,mm6
869 pand mm7,mm0
870 movq mm6,mm0
1235 movd mm0, [esi] ; X X X X X v2 v1 v0
1236 pand mm0, const4 ; 0 0 0 0 0 v2 v1 v0
1237 movq mm1, mm0 ; 0 0 0 0 0 v2 v1 v0
1238 psllq mm0, 16 ; 0 0 0 v2 v1 v0 0 0
1239 movq mm2, mm0 ; 0 0 0 v2 v1 v0 0 0
1240 psllq mm0, 24 ; v2 v1 v0 0 0 0 0 0
1242 por mm0, mm2 ; v2 v1 v0 v2 v1 v0 0 0
1243 por mm0, mm1 ; v2 v1 v0 v2 v1 v0 v2 v1
1244 movq mm3, mm0 ; v2 v1 v0 v2 v1 v0 v2 v1
1245 psllq mm0, 16 ; v0 v2 v1 v0 v2 v1 0 0
1247 punpckhdq mm3, mm0 ; v0 v2 v1 v0 v2 v1 v0 v2
1249 psrlq mm0, 32 ; 0 0 0 0 v0 v2 v1 v0
1251 punpckldq mm0, mm4 ; v1 v0 v2 v1 v0 v2 v1 v0
1253 movq [edi], mm0
1270 movd mm0, [esi] ; X X X X X v2 v1 v0
1271 pand mm0, const4 ; 0 0 0 0 0 v2 v1 v0
1272 movq mm1, mm0 ; 0 0 0 0 0 v2 v1 v0
1273 psllq mm0, 16 ; 0 0 0 v2 v1 v0 0 0
1274 movq mm2, mm0 ; 0 0 0 v2 v1 v0 0 0
1275 psllq mm0, 24 ; v2 v1 v0 0 0 0 0 0
1277 por mm0, mm2 ; v2 v1 v0 v2 v1 v0 0 0
1278 por mm0, mm1 ; v2 v1 v0 v2 v1 v0 v2 v1
1279 movq [edi+4], mm0 ; move to memory
1280 psrlq mm0, 16 ; 0 0 v2 v1 v0 v2 v1 v0
1281 movd [edi], mm0 ; move to memory
1305 movq mm0, [esi] ; X X v2 v1 v0 v5 v4 v3
1306 movq mm7, mm0 ; X X v2 v1 v0 v5 v4 v3
1307 movq mm6, mm0 ; X X v2 v1 v0 v5 v4 v3
1308 psllq mm0, 24 ; v1 v0 v5 v4 v3 0 0 0
1311 por mm0, mm7 ; v1 v0 v5 v4 v3 v5 v4 v3
1314 movq [edi], mm0 ; move quad to memory
1361 movd mm0, [esi] ; X X X X v0 v1 v2 v3
1362 movq mm1, mm0 ; X X X X v0 v1 v2 v3
1363 punpcklbw mm0, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1364 movq mm2, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1365 punpcklwd mm0, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
1366 movq mm3, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
1367 punpckldq mm0, mm0 ; v3 v3 v3 v3 v3 v3 v3 v3
1369 movq [edi], mm0 ; move to memory v3
1428 movd mm0, [esi] ; X X X X v0 v1 v2 v3
1429 punpcklbw mm0, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1430 movq mm1, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1431 punpcklwd mm0, mm0 ; v2 v2 v2 v2 v3 v3 v3 v3
1433 movq [edi], mm0 ; move to memory v2 and v3
1470 movq mm0, [esi] ; v0 v1 v2 v3 v4 v5 v6 v7
1471 movq mm1, mm0 ; v0 v1 v2 v3 v4 v5 v6 v7
1472 punpcklbw mm0, mm0 ; v4 v4 v5 v5 v6 v6 v7 v7
1473 //movq mm1, mm0 ; v0 v0 v1 v1 v2 v2 v3 v3
1477 movq [edi], mm0 ; move to memory v4 v5 v6 and v7
1517 movd mm0, [esi] ; X X X X v1 v0 v3 v2
1518 punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1519 movq mm1, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1520 punpckldq mm0, mm0 ; v3 v2 v3 v2 v3 v2 v3 v2
1522 movq [edi], mm0
1523 movq [edi + 8], mm0
1563 movd mm0, [esi] ; X X X X v1 v0 v3 v2
1564 punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1565 movq mm1, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1566 punpckldq mm0, mm0 ; v3 v2 v3 v2 v3 v2 v3 v2
1568 movq [edi], mm0
1608 movd mm0, [esi] ; X X X X v1 v0 v3 v2
1609 punpcklwd mm0, mm0 ; v1 v0 v1 v0 v3 v2 v3 v2
1611 movq [edi], mm0
1652 movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
1653 movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
1654 punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
1656 movq [edi], mm0
1657 movq [edi + 8], mm0
1658 movq [edi + 16], mm0
1659 movq [edi + 24], mm0
1701 movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
1702 movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
1703 punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
1705 movq [edi], mm0
1706 movq [edi + 8], mm0
1746 movq mm0, [esi] ; v3 v2 v1 v0 v7 v6 v5 v4
1747 movq mm1, mm0 ; v3 v2 v1 v0 v7 v6 v5 v4
1748 punpckldq mm0, mm0 ; v7 v6 v5 v4 v7 v6 v5 v4
1750 movq [edi], mm0
2010 movq mm0, [edi + ebx] // Load mm0 with Avg(x)
2019 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2028 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2032 movq mm2, mm0 // mov updated Raws to mm2
2041 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2047 movq mm2, mm0 // mov updated Raws to mm2
2059 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2063 movq [edi + ebx - 8], mm0
2066 movq mm2, mm0 // mov updated Raw(x) to mm2
2097 movq mm0, [edi + ebx]
2105 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2114 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2117 movq mm2, mm0 // mov updated Raws to mm2
2127 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active
2131 movq [edi + ebx - 8], mm0
2133 movq mm2, mm0 // mov updated Raws to mm2
2156 movq mm0, [edi + ebx]
2165 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2174 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
2177 movq mm2, mm0 // mov updated Raws to mm2
2186 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
2190 movq mm2, mm0 // mov updated Raws to mm2
2201 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
2205 movq mm2, mm0 // mov updated Raws to mm2
2217 paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
2221 movq [edi + ebx - 8], mm0
2223 movq mm2, mm0 // mov updated Raws to mm2
2273 movq mm0, [edi + ebx]
2283 paddb mm0, mm3 // add LBCarrys to Avg for each byte
2285 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2286 paddb mm0, mm2 // add (Raw/2) to Avg for each byte
2288 movq [edi + ebx - 8], mm0
2289 movq mm2, mm0 // reuse as Raw(x-bpp)
2306 movq mm0, [edi + ebx]
2316 paddb mm0, mm3 // add LBCarrys to Avg for each byte
2318 paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
2320 paddb mm0, mm2 // add (Raw/2) to Avg for each byte
2322 movq [edi + ebx - 8], mm0
2486 pxor mm0, mm0
2492 punpcklbw mm1, mm0 // Unpack High bytes of a
2494 punpcklbw mm2, mm0 // Unpack High bytes of b
2498 punpcklbw mm3, mm0 // Unpack High bytes of c
2510 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2512 pand mm0, mm4 // Only pav bytes < 0 in mm7
2514 psubw mm4, mm0
2515 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2516 psubw mm4, mm0
2518 pxor mm0, mm0
2519 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2520 pand mm0, mm6 // Only pav bytes < 0 in mm7
2522 psubw mm6, mm0
2525 psubw mm6, mm0
2527 movq mm0, mm7
2530 // use mm0 mask copy to merge a & b
2531 pand mm2, mm0
2533 pandn mm0, mm1
2535 paddw mm0, mm2
2540 pandn mm7, mm0
2542 pxor mm0, mm0
2548 punpcklbw mm3, mm0 // Unpack High bytes of c
2553 punpcklbw mm1, mm0 // Unpack High bytes of a
2555 punpcklbw mm2, mm0 // Unpack High bytes of b
2570 pcmpgtw mm0, mm5 // Create mask pbv bytes < 0
2572 pand mm0, mm5 // Only pbv bytes < 0 in mm0
2574 psubw mm5, mm0
2576 psubw mm5, mm0
2578 pxor mm0, mm0
2579 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2580 pand mm0, mm6 // Only pav bytes < 0 in mm7
2581 psubw mm6, mm0
2584 psubw mm6, mm0
2586 movq mm0, mm7
2589 // use mm0 mask copy to merge a & b
2590 pand mm2, mm0
2592 pandn mm0, mm1
2594 paddw mm0, mm2
2599 pandn mm7, mm0
2602 pxor mm0, mm0
2606 punpckhbw mm2, mm0 // Unpack High bytes of b
2614 punpckhbw mm3, mm0 // Unpack High bytes of c
2619 punpckhbw mm1, mm0 // Unpack High bytes of a
2626 pxor mm0, mm0
2632 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2634 pand mm0, mm4 // Only pav bytes < 0 in mm7
2635 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2636 psubw mm4, mm0
2638 psubw mm4, mm0
2640 pxor mm0, mm0
2641 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2642 pand mm0, mm6 // Only pav bytes < 0 in mm7
2643 psubw mm6, mm0
2646 psubw mm6, mm0
2648 movq mm0, mm7
2649 // use mm0 mask copy to merge a & b
2650 pand mm2, mm0
2653 pandn mm0, mm1
2655 paddw mm0, mm2
2660 pandn mm7, mm0
2670 pxor mm0, mm0 // pxor does not affect flags
2694 pxor mm0, mm0
2700 punpcklbw mm1, mm0 // Unpack Low bytes of a
2702 punpcklbw mm2, mm0 // Unpack Low bytes of b
2707 punpcklbw mm3, mm0 // Unpack Low bytes of c
2718 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2720 pand mm0, mm4 // Only pav bytes < 0 in mm7
2722 psubw mm4, mm0
2723 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2724 psubw mm4, mm0
2726 pxor mm0, mm0
2727 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2728 pand mm0, mm6 // Only pav bytes < 0 in mm7
2730 psubw mm6, mm0
2733 psubw mm6, mm0
2735 movq mm0, mm7
2738 // use mm0 mask copy to merge a & b
2739 pand mm2, mm0
2741 pandn mm0, mm1
2743 paddw mm0, mm2
2748 pandn mm7, mm0
2750 pxor mm0, mm0
2765 punpckhbw mm3, mm0 // Unpack High bytes of c
2768 punpckhbw mm2, mm0 // Unpack High bytes of b
2769 punpckhbw mm1, mm0 // Unpack High bytes of a
2782 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2784 pand mm0, mm4 // Only pav bytes < 0 in mm7
2786 psubw mm4, mm0
2787 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2788 psubw mm4, mm0
2790 pxor mm0, mm0
2791 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2792 pand mm0, mm6 // Only pav bytes < 0 in mm7
2794 psubw mm6, mm0
2797 psubw mm6, mm0
2799 movq mm0, mm7
2802 // use mm0 mask copy to merge a & b
2803 pand mm2, mm0
2805 pandn mm0, mm1
2807 paddw mm0, mm2
2812 pandn mm7, mm0
2815 pxor mm0, mm0
2835 pxor mm0, mm0
2842 punpckhbw mm1, mm0 // Unpack Low bytes of a
2844 punpcklbw mm2, mm0 // Unpack High bytes of b
2847 punpckhbw mm3, mm0 // Unpack High bytes of c
2858 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2860 pand mm0, mm4 // Only pav bytes < 0 in mm7
2862 psubw mm4, mm0
2863 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2864 psubw mm4, mm0
2866 pxor mm0, mm0
2867 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2868 pand mm0, mm6 // Only pav bytes < 0 in mm7
2870 psubw mm6, mm0
2873 psubw mm6, mm0
2875 movq mm0, mm7
2878 // use mm0 mask copy to merge a & b
2879 pand mm2, mm0
2881 pandn mm0, mm1
2883 paddw mm0, mm2
2888 pandn mm7, mm0
2890 pxor mm0, mm0
2896 punpcklbw mm3, mm0 // Unpack High bytes of c
2900 punpckhbw mm2, mm0 // Unpack Low bytes of b
2901 punpcklbw mm1, mm0 // Unpack Low bytes of a
2914 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2916 pand mm0, mm4 // Only pav bytes < 0 in mm7
2918 psubw mm4, mm0
2919 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2920 psubw mm4, mm0
2922 pxor mm0, mm0
2923 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2924 pand mm0, mm6 // Only pav bytes < 0 in mm7
2926 psubw mm6, mm0
2929 psubw mm6, mm0
2931 movq mm0, mm7
2934 // use mm0 mask copy to merge a & b
2935 pand mm2, mm0
2937 pandn mm0, mm1
2939 paddw mm0, mm2
2944 pandn mm7, mm0
2947 pxor mm0, mm0
2966 pxor mm0, mm0
2973 punpcklbw mm1, mm0 // Unpack Low bytes of a
2975 punpcklbw mm2, mm0 // Unpack Low bytes of b
2978 punpcklbw mm3, mm0 // Unpack Low bytes of c
2989 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
2991 pand mm0, mm4 // Only pav bytes < 0 in mm7
2993 psubw mm4, mm0
2994 pand mm7, mm5 // Only pbv bytes < 0 in mm0
2995 psubw mm4, mm0
2997 pxor mm0, mm0
2998 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
2999 pand mm0, mm6 // Only pav bytes < 0 in mm7
3001 psubw mm6, mm0
3004 psubw mm6, mm0
3006 movq mm0, mm7
3009 // use mm0 mask copy to merge a & b
3010 pand mm2, mm0
3012 pandn mm0, mm1
3014 paddw mm0, mm2
3019 pandn mm7, mm0
3021 pxor mm0, mm0
3027 punpckhbw mm3, mm0 // Unpack High bytes of c
3032 punpckhbw mm2, mm0 // Unpack High bytes of b
3033 punpckhbw mm1, mm0 // Unpack High bytes of a
3046 pcmpgtw mm0, mm4 // Create mask pav bytes < 0
3048 pand mm0, mm4 // Only pav bytes < 0 in mm7
3050 psubw mm4, mm0
3051 pand mm7, mm5 // Only pbv bytes < 0 in mm0
3052 psubw mm4, mm0
3054 pxor mm0, mm0
3055 pcmpgtw mm0, mm6 // Create mask pcv bytes < 0
3056 pand mm0, mm6 // Only pav bytes < 0 in mm7
3058 psubw mm6, mm0
3061 psubw mm6, mm0
3063 movq mm0, mm7
3066 // use mm0 mask copy to merge a & b
3067 pand mm2, mm0
3069 pandn mm0, mm1
3071 paddw mm0, mm2
3076 pandn mm7, mm0
3079 pxor mm0, mm0
3320 movq mm0, [edi+ebx]
3321 paddb mm0, mm1
3323 movq mm1, mm0 // mov updated Raws to mm1
3326 paddb mm0, mm1
3328 movq mm1, mm0 // mov updated Raws to mm1
3332 paddb mm0, mm1
3334 movq [edi+ebx-8], mm0 // Write updated Raws back to array
3336 movq mm1, mm0
3392 movq mm0, [edi+ebx]
3393 paddb mm0, mm1
3395 movq mm1, mm0 // mov updated Raws to mm1
3400 paddb mm0, mm1
3402 movq [edi+ebx-8], mm0
3403 movq mm1, mm0 // Prep for doing 1st add at top of loop
3433 movq mm0, [edi+ebx]
3434 paddb mm0, mm1
3436 movq mm1, mm0 // mov updated Raws to mm1
3439 paddb mm0, mm1
3441 movq mm1, mm0 // mov updated Raws to mm1
3444 paddb mm0, mm1
3446 movq mm1, mm0 // mov updated Raws to mm1
3450 paddb mm0, mm1
3452 movq [edi+ebx-8], mm0 // Write updated Raws back to array
3453 movq mm1, mm0 // Prep for doing 1st add at top of loop
3470 movq mm0, [edi+ebx] // Load Sub(x) for 1st 8 bytes
3471 paddb mm0, mm7
3473 movq [edi+ebx], mm0 // Write Raw(x) for 1st 8 bytes
3474 // Now mm0 will be used as Raw(x-bpp) for
3479 paddb mm1, mm0
3505 movq mm0, [edi+ebx]
3507 paddb mm0, mm7
3509 movq [edi+ebx-8], mm0 // use -8 to offset early add to ebx
3510 movq mm7, mm0 // Move calculated Raw(x) data to mm1 to
3526 movq mm0, [edi+ebx]
3529 paddb mm0, mm1
3531 movq [edi+ebx-8], mm0 // mov does not affect flags; -8 to offset
3595 movq mm0, [edi+ebx]
3597 paddb mm0, mm1
3599 movq [edi+ebx], mm0
3611 movq mm0, [edi+ebx+32]
3613 paddb mm0, mm1
3615 movq [edi+ebx+32], mm0
3645 // Loop using MMX registers mm0 & mm1 to update 8 bytes simultaneously
3648 movq mm0, [edi+ebx]
3650 paddb mm0, mm1
3652 movq [edi+ebx-8], mm0 // movq does not affect flags; -8 to offset add ebx