20814N/Adiff --git a/gfx/ycbcr/Makefile.in b/gfx/ycbcr/Makefile.in
20814N/Aindex 4e72694..d4e9646 100644
20814N/A--- a/gfx/ycbcr/Makefile.in
20814N/A+++ b/gfx/ycbcr/Makefile.in
20814N/A@@ -44,7 +44,7 @@ yuv_convert_sse2.$(OBJ_SUFFIX): CXXFLAGS += -msse2
20814N/A endif
20814N/A
20814N/A ifdef SOLARIS_SUNPRO_CXX
20814N/A-yuv_convert_mmx.$(OBJ_SUFFIX): CXXFLAGS += -xarch=mmx -xO4
20814N/A+yuv_convert_mmx.$(OBJ_SUFFIX): CXXFLAGS += -xarch=sse -xO4
20814N/A yuv_convert_sse2.$(OBJ_SUFFIX): CXXFLAGS += -xarch=sse2 -xO4
20814N/A endif
20814N/A
20814N/A@@ -87,3 +87,10 @@ EXTRA_DSO_LDOPTS += \
20814N/A $(NULL)
20814N/A
20814N/A include $(topsrcdir)/config/rules.mk
20814N/A+
20814N/A+ifeq (86,$(findstring 86,$(OS_TEST)))
20814N/A+ifdef SOLARIS_SUNPRO_CXX
20814N/A+yuv_row_posix.$(OBJ_SUFFIX): yuv_row_posix.il
20814N/A+yuv_row_posix.$(OBJ_SUFFIX): OS_CXXFLAGS += -xarch=sse -xO4 $(srcdir)/yuv_row_posix.il
20814N/A+endif
20814N/A+endif
20814N/Adiff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
20814N/Aindex 0e9e329..ae724a0 100644
20814N/A--- a/gfx/ycbcr/yuv_convert.cpp
20814N/A+++ b/gfx/ycbcr/yuv_convert.cpp
20814N/A@@ -120,9 +120,11 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
20814N/A }
20814N/A }
20814N/A
20814N/A+#ifdef ARCH_CPU_X86_FAMILY
20814N/A // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
20814N/A if (has_sse)
20814N/A EMMS();
20814N/A+#endif
20814N/A }
20814N/A
20814N/A // C version does 8 at a time to mimic MMX code
20814N/A@@ -362,9 +364,12 @@ NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
20814N/A #endif
20814N/A }
20814N/A }
20814N/A+
20814N/A+#ifdef ARCH_CPU_X86_FAMILY
20814N/A // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
20814N/A if (has_mmx)
20814N/A EMMS();
20814N/A+#endif
20814N/A }
20814N/A
20814N/A } // namespace gfx
20814N/Adiff --git a/gfx/ycbcr/yuv_row_posix.cpp b/gfx/ycbcr/yuv_row_posix.cpp
20814N/Aindex b359db4..eed4c15 100644
20814N/A--- a/gfx/ycbcr/yuv_row_posix.cpp
20814N/A+++ b/gfx/ycbcr/yuv_row_posix.cpp
20814N/A@@ -258,7 +258,7 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A );
20814N/A }
20814N/A
20814N/A-#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__)
20814N/A+#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__) && !defined(__SUNPRO_CC)
20814N/A
20814N/A // PIC version is slower because less registers are available, so
20814N/A // non-PIC is used on platforms where it is possible.
20814N/A@@ -564,7 +564,7 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A width, source_dx);
20814N/A }
20814N/A
20814N/A-#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__)
20814N/A+#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__) && !defined(__SUNPRO_CC)
20814N/A
20814N/A void PICConvertYUVToRGB32Row_SSE(const uint8* y_buf,
20814N/A const uint8* u_buf,
20814N/A@@ -884,6 +884,128 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A
20814N/A LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
20814N/A }
20814N/A+
20814N/A+#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__SUNPRO_CC)
20814N/A+
20814N/A+void FastConvertYUVToRGB32Row_IL(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width);
20814N/A+
20814N/A+void FastConvertYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width) {
20814N/A+ if (mozilla::supports_sse()) {
20814N/A+ FastConvertYUVToRGB32Row_IL(y_buf, u_buf, v_buf, rgb_buf, width);
20814N/A+ return;
20814N/A+ }
20814N/A+
20814N/A+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
20814N/A+}
20814N/A+
20814N/A+void ScaleYUVToRGB32Row_IL(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int source_dx);
20814N/A+
20814N/A+void ScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int source_dx) {
20814N/A+ if (mozilla::supports_sse()) {
20814N/A+ ScaleYUVToRGB32Row_IL(y_buf, u_buf, v_buf, rgb_buf,
20814N/A+ width, source_dx);
20814N/A+ return;
20814N/A+ }
20814N/A+
20814N/A+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
20814N/A+ width, source_dx);
20814N/A+}
20814N/A+
20814N/A+void LinearScaleYUVToRGB32Row_IL(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int source_dx);
20814N/A+
20814N/A+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int source_dx) {
20814N/A+ if (mozilla::supports_sse()) {
20814N/A+ LinearScaleYUVToRGB32Row_IL(y_buf, u_buf, v_buf, rgb_buf,
20814N/A+ width, source_dx);
20814N/A+ return;
20814N/A+ }
20814N/A+
20814N/A+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
20814N/A+ width, source_dx);
20814N/A+}
20814N/A+
20814N/A+/*
20814N/A+void PICConvertYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int16 *kCoefficientsRgbY);
20814N/A+
20814N/A+void FastConvertYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width) {
20814N/A+ PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
20814N/A+ &kCoefficientsRgbY[0][0]);
20814N/A+}
20814N/A+
20814N/A+void PICScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int source_dx,
20814N/A+ int16 *kCoefficientsRgbY);
20814N/A+
20814N/A+void ScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int source_dx) {
20814N/A+ PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
20814N/A+ &kCoefficientsRgbY[0][0]);
20814N/A+}
20814N/A+
20814N/A+void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int source_dx,
20814N/A+ int16 *kCoefficientsRgbY);
20814N/A+
20814N/A+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A+ const uint8* u_buf,
20814N/A+ const uint8* v_buf,
20814N/A+ uint8* rgb_buf,
20814N/A+ int width,
20814N/A+ int source_dx) {
20814N/A+ PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
20814N/A+ &kCoefficientsRgbY[0][0]);
20814N/A+}
20814N/A+*/
20814N/A+
20814N/A #else
20814N/A void FastConvertYUVToRGB32Row(const uint8* y_buf,
20814N/A const uint8* u_buf,
20814N/Adiff --git a/gfx/ycbcr/yuv_row_posix.il b/gfx/ycbcr/yuv_row_posix.il
20814N/Anew file mode 100644
20814N/Aindex 0000000..faf6463
20814N/A--- /dev/null
20814N/A+++ b/gfx/ycbcr/yuv_row_posix.il
20814N/A@@ -0,0 +1,480 @@
20814N/A+! void FastConvertYUVToRGB32Row_IL(const uint8* y_buf,
20814N/A+! const uint8* u_buf,
20814N/A+! const uint8* v_buf,
20814N/A+! uint8* rgb_buf,
20814N/A+! int width);
20814N/A+ .inline FastConvertYUVToRGB32Row_IL, 20
20814N/A+ pusha
20814N/A+ mov 0x20(%esp),%edx
20814N/A+ mov 0x24(%esp),%edi
20814N/A+ mov 0x28(%esp),%esi
20814N/A+ mov 0x2c(%esp),%ebp
20814N/A+ mov 0x30(%esp),%ecx
20814N/A+ jmp 1f
20814N/A+
20814N/A+0:
20814N/A+ movzbl (%edi),%eax
20814N/A+ add $0x1,%edi
20814N/A+ movzbl (%esi),%ebx
20814N/A+ add $0x1,%esi
20814N/A+ movq kCoefficientsRgbY+2048(,%eax,8),%mm0
20814N/A+ movzbl (%edx),%eax
20814N/A+ paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0
20814N/A+ movzbl 0x1(%edx),%ebx
20814N/A+ movq kCoefficientsRgbY(,%eax,8),%mm1
20814N/A+ add $0x2,%edx
20814N/A+ movq kCoefficientsRgbY(,%ebx,8),%mm2
20814N/A+ paddsw %mm0,%mm1
20814N/A+ paddsw %mm0,%mm2
20814N/A+ psraw $0x6,%mm1
20814N/A+ psraw $0x6,%mm2
20814N/A+ packuswb %mm2,%mm1
20814N/A+ movntq %mm1,0x0(%ebp)
20814N/A+ add $0x8,%ebp
20814N/A+1:
20814N/A+ sub $0x2,%ecx
20814N/A+ jns 0b
20814N/A+
20814N/A+ and $0x1,%ecx
20814N/A+ je 2f
20814N/A+
20814N/A+ movzbl (%edi),%eax
20814N/A+ movq kCoefficientsRgbY+2048(,%eax,8),%mm0
20814N/A+ movzbl (%esi),%eax
20814N/A+ paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0
20814N/A+ movzbl (%edx),%eax
20814N/A+ movq kCoefficientsRgbY(,%eax,8),%mm1
20814N/A+ paddsw %mm0,%mm1
20814N/A+ psraw $0x6,%mm1
20814N/A+ packuswb %mm1,%mm1
20814N/A+ movd %mm1,0x0(%ebp)
20814N/A+2:
20814N/A+ popa
20814N/A+ .end
20814N/A+
20814N/A+! void ScaleYUVToRGB32Row_IL(const uint8* y_buf,
20814N/A+! const uint8* u_buf,
20814N/A+! const uint8* v_buf,
20814N/A+! uint8* rgb_buf,
20814N/A+! int width,
20814N/A+! int source_dx);
20814N/A+ .inline ScaleYUVToRGB32Row_IL, 24
20814N/A+ pusha
20814N/A+ mov 0x20(%esp),%edx
20814N/A+ mov 0x24(%esp),%edi
20814N/A+ mov 0x28(%esp),%esi
20814N/A+ mov 0x2c(%esp),%ebp
20814N/A+ mov 0x30(%esp),%ecx
20814N/A+ xor %ebx,%ebx
20814N/A+ jmp 1f
20814N/A+
20814N/A+0:
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+ movzbl (%edi,%eax,1),%eax
20814N/A+ movq kCoefficientsRgbY+2048(,%eax,8),%mm0
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+ movzbl (%esi,%eax,1),%eax
20814N/A+ paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0
20814N/A+ mov %ebx,%eax
20814N/A+ add 0x34(%esp),%ebx
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%eax
20814N/A+ movq kCoefficientsRgbY(,%eax,8),%mm1
20814N/A+ mov %ebx,%eax
20814N/A+ add 0x34(%esp),%ebx
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%eax
20814N/A+ movq kCoefficientsRgbY(,%eax,8),%mm2
20814N/A+ paddsw %mm0,%mm1
20814N/A+ paddsw %mm0,%mm2
20814N/A+ psraw $0x6,%mm1
20814N/A+ psraw $0x6,%mm2
20814N/A+ packuswb %mm2,%mm1
20814N/A+ movntq %mm1,0x0(%ebp)
20814N/A+ add $0x8,%ebp
20814N/A+1:
20814N/A+ sub $0x2,%ecx
20814N/A+ jns 0b
20814N/A+
20814N/A+ and $0x1,%ecx
20814N/A+ je 2f
20814N/A+
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+ movzbl (%edi,%eax,1),%eax
20814N/A+ movq kCoefficientsRgbY+2048(,%eax,8),%mm0
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+ movzbl (%esi,%eax,1),%eax
20814N/A+ paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%eax
20814N/A+ movq kCoefficientsRgbY(,%eax,8),%mm1
20814N/A+ paddsw %mm0,%mm1
20814N/A+ psraw $0x6,%mm1
20814N/A+ packuswb %mm1,%mm1
20814N/A+ movd %mm1,0x0(%ebp)
20814N/A+
20814N/A+2:
20814N/A+ popa
20814N/A+ .end
20814N/A+
20814N/A+! void LinearScaleYUVToRGB32Row_IL(const uint8* y_buf,
20814N/A+! const uint8* u_buf,
20814N/A+! const uint8* v_buf,
20814N/A+! uint8* rgb_buf,
20814N/A+! int width,
20814N/A+! int source_dx);
20814N/A+ .inline LinearScaleYUVToRGB32Row_IL, 24
20814N/A+ pusha
20814N/A+ mov 0x20(%esp),%edx
20814N/A+ mov 0x24(%esp),%edi
20814N/A+ mov 0x2c(%esp),%ebp
20814N/A+
20814N/A+ ! source_width = width * source_dx + ebx
20814N/A+ mov 0x30(%esp), %ecx
20814N/A+ imull 0x34(%esp), %ecx
20814N/A+ mov %ecx, 0x30(%esp)
20814N/A+
20814N/A+ mov 0x34(%esp), %ecx
20814N/A+ xor %ebx,%ebx
20814N/A+ ! x = 0
20814N/A+ cmp $0x20000,%ecx
20814N/A+ ! if source_dx >= 2.0
20814N/A+ jl 1f
20814N/A+ mov $0x8000,%ebx
20814N/A+ ! x = 0.5 for 1/2 or less
20814N/A+ jmp 1f
20814N/A+
20814N/A+0:
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+
20814N/A+ movzbl (%edi,%eax,1),%ecx
20814N/A+ movzbl 1(%edi,%eax,1),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ andl $0x1fffe, %eax
20814N/A+ imul %eax, %esi
20814N/A+ xorl $0x1fffe, %eax
20814N/A+ imul %eax, %ecx
20814N/A+ addl %esi, %ecx
20814N/A+ shrl $17, %ecx
20814N/A+ movq kCoefficientsRgbY+2048(,%ecx,8),%mm0
20814N/A+
20814N/A+ mov 0x28(%esp),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+
20814N/A+ movzbl (%esi,%eax,1),%ecx
20814N/A+ movzbl 1(%esi,%eax,1),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ andl $0x1fffe, %eax
20814N/A+ imul %eax, %esi
20814N/A+ xorl $0x1fffe, %eax
20814N/A+ imul %eax, %ecx
20814N/A+ addl %esi, %ecx
20814N/A+ shrl $17, %ecx
20814N/A+ paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0
20814N/A+
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%ecx
20814N/A+ movzbl 1(%edx,%eax,1),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ add 0x34(%esp),%ebx
20814N/A+ andl $0xffff, %eax
20814N/A+ imul %eax, %esi
20814N/A+ xorl $0xffff, %eax
20814N/A+ imul %eax, %ecx
20814N/A+ addl %esi, %ecx
20814N/A+ shrl $16, %ecx
20814N/A+ movq kCoefficientsRgbY(,%ecx,8),%mm1
20814N/A+
20814N/A+ cmp 0x30(%esp), %ebx
20814N/A+ jge 2f
20814N/A+
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%ecx
20814N/A+ movzbl 1(%edx,%eax,1),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ add 0x34(%esp),%ebx
20814N/A+ andl $0xffff, %eax
20814N/A+ imul %eax, %esi
20814N/A+ xorl $0xffff, %eax
20814N/A+ imul %eax, %ecx
20814N/A+ addl %esi, %ecx
20814N/A+ shrl $16, %ecx
20814N/A+ movq kCoefficientsRgbY(,%ecx,8),%mm2
20814N/A+
20814N/A+ paddsw %mm0,%mm1
20814N/A+ paddsw %mm0,%mm2
20814N/A+ psraw $0x6,%mm1
20814N/A+ psraw $0x6,%mm2
20814N/A+ packuswb %mm2,%mm1
20814N/A+ movntq %mm1,0x0(%ebp)
20814N/A+ add $0x8,%ebp
20814N/A+
20814N/A+1:
20814N/A+ cmp 0x30(%esp), %ebx
20814N/A+ jl 0b
20814N/A+ jmp 3f
20814N/A+
20814N/A+2:
20814N/A+ paddsw %mm0, %mm1
20814N/A+ psraw $6, %mm1
20814N/A+ packuswb %mm1, %mm1
20814N/A+ movd %mm1, (%ebp)
20814N/A+
20814N/A+3:
20814N/A+ popa
20814N/A+ .end
20814N/A+
20814N/A+! void PICConvertYUVToRGB32Row(const uint8* y_buf,
20814N/A+! const uint8* u_buf,
20814N/A+! const uint8* v_buf,
20814N/A+! uint8* rgb_buf,
20814N/A+! int width,
20814N/A+! int16 *kCoefficientsRgbY);
20814N/A+
20814N/A+ .inline PICConvertYUVToRGB32Row, 24
20814N/A+ pusha
20814N/A+ mov 0x20(%esp),%edx
20814N/A+ mov 0x24(%esp),%edi
20814N/A+ mov 0x28(%esp),%esi
20814N/A+ mov 0x2c(%esp),%ebp
20814N/A+ mov 0x34(%esp),%ecx
20814N/A+
20814N/A+ jmp 1f
20814N/A+
20814N/A+0:
20814N/A+ movzbl (%edi),%eax
20814N/A+ add $0x1,%edi
20814N/A+ movzbl (%esi),%ebx
20814N/A+ add $0x1,%esi
20814N/A+ movq 2048(%ecx,%eax,8),%mm0
20814N/A+ movzbl (%edx),%eax
20814N/A+ paddsw 4096(%ecx,%ebx,8),%mm0
20814N/A+ movzbl 0x1(%edx),%ebx
20814N/A+ movq 0(%ecx,%eax,8),%mm1
20814N/A+ add $0x2,%edx
20814N/A+ movq 0(%ecx,%ebx,8),%mm2
20814N/A+ paddsw %mm0,%mm1
20814N/A+ paddsw %mm0,%mm2
20814N/A+ psraw $0x6,%mm1
20814N/A+ psraw $0x6,%mm2
20814N/A+ packuswb %mm2,%mm1
20814N/A+ movntq %mm1,0x0(%ebp)
20814N/A+ add $0x8,%ebp
20814N/A+1:
20814N/A+ subl $0x2,0x30(%esp)
20814N/A+ jns 0b
20814N/A+
20814N/A+ andl $0x1,0x30(%esp)
20814N/A+ je 2f
20814N/A+
20814N/A+ movzbl (%edi),%eax
20814N/A+ movq 2048(%ecx,%eax,8),%mm0
20814N/A+ movzbl (%esi),%eax
20814N/A+ paddsw 4096(%ecx,%eax,8),%mm0
20814N/A+ movzbl (%edx),%eax
20814N/A+ movq 0(%ecx,%eax,8),%mm1
20814N/A+ paddsw %mm0,%mm1
20814N/A+ psraw $0x6,%mm1
20814N/A+ packuswb %mm1,%mm1
20814N/A+ movd %mm1,0x0(%ebp)
20814N/A+2:
20814N/A+ popa
20814N/A+ .end
20814N/A+
20814N/A+
20814N/A+! void PICScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A+! const uint8* u_buf,
20814N/A+! const uint8* v_buf,
20814N/A+! uint8* rgb_buf,
20814N/A+! int width,
20814N/A+! int source_dx,
20814N/A+! int16 *kCoefficientsRgbY);
20814N/A+
20814N/A+ .inline PICScaleYUVToRGB32Row, 28
20814N/A+ pusha
20814N/A+ mov 0x20(%esp),%edx
20814N/A+ mov 0x24(%esp),%edi
20814N/A+ mov 0x28(%esp),%esi
20814N/A+ mov 0x2c(%esp),%ebp
20814N/A+ mov 0x38(%esp),%ecx
20814N/A+ xor %ebx,%ebx
20814N/A+ jmp 1f
20814N/A+
20814N/A+0:
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+ movzbl (%edi,%eax,1),%eax
20814N/A+ movq 2048(%ecx,%eax,8),%mm0
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+ movzbl (%esi,%eax,1),%eax
20814N/A+ paddsw 4096(%ecx,%eax,8),%mm0
20814N/A+ mov %ebx,%eax
20814N/A+ add 0x34(%esp),%ebx
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%eax
20814N/A+ movq 0(%ecx,%eax,8),%mm1
20814N/A+ mov %ebx,%eax
20814N/A+ add 0x34(%esp),%ebx
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%eax
20814N/A+ movq 0(%ecx,%eax,8),%mm2
20814N/A+ paddsw %mm0,%mm1
20814N/A+ paddsw %mm0,%mm2
20814N/A+ psraw $0x6,%mm1
20814N/A+ psraw $0x6,%mm2
20814N/A+ packuswb %mm2,%mm1
20814N/A+ movntq %mm1,0x0(%ebp)
20814N/A+ add $0x8,%ebp
20814N/A+1:
20814N/A+ subl $0x2,0x30(%esp)
20814N/A+ jns 0b
20814N/A+
20814N/A+ andl $0x1,0x30(%esp)
20814N/A+ je 2f
20814N/A+
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+ movzbl (%edi,%eax,1),%eax
20814N/A+ movq 2048(%ecx,%eax,8),%mm0
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+ movzbl (%esi,%eax,1),%eax
20814N/A+ paddsw 4096(%ecx,%eax,8),%mm0
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%eax
20814N/A+ movq 0(%ecx,%eax,8),%mm1
20814N/A+ paddsw %mm0,%mm1
20814N/A+ psraw $0x6,%mm1
20814N/A+ packuswb %mm1,%mm1
20814N/A+ movd %mm1,0x0(%ebp)
20814N/A+
20814N/A+2:
20814N/A+ popa
20814N/A+ .end
20814N/A+
20814N/A+
20814N/A+! void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
20814N/A+! const uint8* u_buf,
20814N/A+! const uint8* v_buf,
20814N/A+! uint8* rgb_buf,
20814N/A+! int width,
20814N/A+! int source_dx,
20814N/A+! int16 *kCoefficientsRgbY);
20814N/A+ .inline PICLinearScaleYUVToRGB32Row, 28
20814N/A+ pusha
20814N/A+ mov 0x20(%esp),%edx
20814N/A+ mov 0x2c(%esp),%ebp
20814N/A+ mov 0x30(%esp),%ecx
20814N/A+ mov 0x38(%esp),%edi
20814N/A+ xor %ebx,%ebx
20814N/A+
20814N/A+ ! source_width = width * source_dx + ebx
20814N/A+ mov 0x30(%esp), %ecx
20814N/A+ imull 0x34(%esp), %ecx
20814N/A+ mov %ecx, 0x30(%esp)
20814N/A+
20814N/A+ mov 0x34(%esp), %ecx
20814N/A+ xor %ebx,%ebx
20814N/A+ ! x = 0
20814N/A+ cmp $0x20000,%ecx
20814N/A+ ! if source_dx >= 2.0
20814N/A+ jl 1f
20814N/A+ mov $0x8000,%ebx
20814N/A+ ! x = 0.5 for 1/2 or less
20814N/A+ jmp 1f
20814N/A+
20814N/A+0:
20814N/A+ mov 0x24(%esp),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+
20814N/A+ movzbl (%esi,%eax,1),%ecx
20814N/A+ movzbl 1(%esi,%eax,1),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ andl $0x1fffe, %eax
20814N/A+ imul %eax, %esi
20814N/A+ xorl $0x1fffe, %eax
20814N/A+ imul %eax, %ecx
20814N/A+ addl %esi, %ecx
20814N/A+ shrl $17, %ecx
20814N/A+ movq 2048(%edi,%ecx,8),%mm0
20814N/A+
20814N/A+ mov 0x28(%esp),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x11,%eax
20814N/A+
20814N/A+ movzbl (%esi,%eax,1),%ecx
20814N/A+ movzbl 1(%esi,%eax,1),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ andl $0x1fffe, %eax
20814N/A+ imul %eax, %esi
20814N/A+ xorl $0x1fffe, %eax
20814N/A+ imul %eax, %ecx
20814N/A+ addl %esi, %ecx
20814N/A+ shrl $17, %ecx
20814N/A+ paddsw 4096(%edi,%ecx,8),%mm0
20814N/A+
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%ecx
20814N/A+ movzbl 1(%edx,%eax,1),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ add 0x34(%esp),%ebx
20814N/A+ andl $0xffff, %eax
20814N/A+ imul %eax, %esi
20814N/A+ xorl $0xffff, %eax
20814N/A+ imul %eax, %ecx
20814N/A+ addl %esi, %ecx
20814N/A+ shrl $16, %ecx
20814N/A+ movq (%edi,%ecx,8),%mm1
20814N/A+
20814N/A+ cmp 0x30(%esp), %ebx
20814N/A+ jge 2f
20814N/A+
20814N/A+ mov %ebx,%eax
20814N/A+ sar $0x10,%eax
20814N/A+ movzbl (%edx,%eax,1),%ecx
20814N/A+ movzbl 1(%edx,%eax,1),%esi
20814N/A+ mov %ebx,%eax
20814N/A+ add 0x34(%esp),%ebx
20814N/A+ andl $0xffff, %eax
20814N/A+ imul %eax, %esi
20814N/A+ xorl $0xffff, %eax
20814N/A+ imul %eax, %ecx
20814N/A+ addl %esi, %ecx
20814N/A+ shrl $16, %ecx
20814N/A+ movq (%edi,%ecx,8),%mm2
20814N/A+
20814N/A+ paddsw %mm0,%mm1
20814N/A+ paddsw %mm0,%mm2
20814N/A+ psraw $0x6,%mm1
20814N/A+ psraw $0x6,%mm2
20814N/A+ packuswb %mm2,%mm1
20814N/A+ movntq %mm1,0x0(%ebp)
20814N/A+ add $0x8,%ebp
20814N/A+
20814N/A+1:
20814N/A+ cmp %ebx, 0x30(%esp)
20814N/A+ jg 0b
20814N/A+ jmp 3f
20814N/A+
20814N/A+2:
20814N/A+ paddsw %mm0, %mm1
20814N/A+ psraw $6, %mm1
20814N/A+ packuswb %mm1, %mm1
20814N/A+ movd %mm1, (%ebp)
20814N/A+
20814N/A+3:
20814N/A+ popa
20814N/A+ .end
20814N/Adiff --git a/gfx/ycbcr/yuv_row_table.cpp b/gfx/ycbcr/yuv_row_table.cpp
20814N/Aindex ad71341..518e947 100644
20814N/A--- a/gfx/ycbcr/yuv_row_table.cpp
20814N/A+++ b/gfx/ycbcr/yuv_row_table.cpp
20814N/A@@ -226,6 +226,10 @@ SIMD_ALIGNED(int16 kCoefficientsRgbY[256 * 3][4]) = {
20814N/A RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
20814N/A };
20814N/A
20814N/A+#ifdef __SUNPRO_CC
20814N/A+#pragma align 16 (kCoefficientsRgbY)
20814N/A+#endif
20814N/A+
20814N/A #undef RGBY
20814N/A #undef RGBU
20814N/A #undef RGBV