[ros-dev] [ros-diffs] [tkreuzer] 42353: asm version of DIB_32BPP_ColorFill: - Add frame pointer - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned - Optimize the loop - Add comments
Timo Kreuzer
timo.kreuzer at web.de
Mon Aug 3 05:31:05 CEST 2009
I hereby challenge you to provide portable C code, that - compiled with
gcc - is faster than this assembly code.
Should be done in a few lines.
I bet my ass on it: You will fail! No matter what optimization you choose.
You would also fail with msvc or Intel compiler.
Regards,
Timo
Alex Ionescu wrote:
> The version that GCC 4.4 and CL 15 will generate would be way more optimized
> than this unportable/slower assembly code.
> This isn't 1994 anymore. You can't beat the compiler anymore.
>
> Best regards,
> Alex Ionescu
>
>
> On Sun, Aug 2, 2009 at 3:31 PM, <tkreuzer at svn.reactos.org> wrote:
>
>
>> Author: tkreuzer
>> Date: Mon Aug 3 00:31:29 2009
>> New Revision: 42353
>>
>> URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev
>> Log:
>> asm version of DIB_32BPP_ColorFill:
>> - Add frame pointer
>> - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
>> - Optimize the loop
>> - Add comments
>>
>> Modified:
>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>
>> Modified:
>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>> URL:
>> http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s?rev=42353&r1=42352&r2=42353&view=diff
>>
>> ==============================================================================
>> --- trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>> [iso-8859-1] (original)
>> +++ trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>> [iso-8859-1] Mon Aug 3 00:31:29 2009
>> @@ -4,78 +4,62 @@
>> * FILE: subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.c
>> * PURPOSE: ASM optimised 32bpp ColorFill
>> * PROGRAMMERS: Magnus Olsen
>> + * Timo Kreuzer (timo.kreuzer at rectos.org)
>> */
>>
>> - .globl _DIB_32BPP_ColorFill
>> - .intel_syntax noprefix
>> +.intel_syntax noprefix
>>
>> - .def _DIB_32BPP_ColorFill;
>> - .scl 2;
>> - .type 32;
>> - .endef
>> -
>> - _DIB_32BPP_ColorFill:
>> - sub esp, 24
>> - mov ecx, [esp+32]
>> - mov [esp+8], ebx
>> - mov ebx, [esp+28]
>> - mov [esp+20], ebp
>> - mov ebp, [esp+36]
>> - mov [esp+12], esi
>> - mov [esp+16], edi
>> - mov edi, [ecx]
>> - mov esi, [ecx+8]
>> - mov edx, [ebx+36]
>> - sub esi, edi
>> - mov edi, [ecx+4]
>> - mov eax, edi
>> - imul eax, edx
>> - add eax, [ebx+32]
>> - mov ebx, [ecx]
>> - lea eax, [eax+ebx*4]
>> - mov [esp+4], eax
>> - mov eax, [ecx+12]
>> - cmp eax, edi
>> - jbe end
>> - sub eax, edi
>> - mov [esp], eax
>> - lea esi, [esi+0]
>> +/*
>> + * BOOLEAN
>> + * _cdecl
>> + * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
>> +*/
>>
>> - for_loop:
>> - mov eax, ebp
>> - cld
>> - mov ebx, esi
>> - mov edi, [esp+4]
>> - test edi, 3
>> - jnz algin_draw
>> - mov ecx, esi
>> - rep stosd
>> - add [esp+4], edx
>> - dec dword ptr [esp]
>> - jnz for_loop
>> - end:
>> - mov ebx, [esp+8]
>> - mov eax, 1
>> - mov esi, [esp+12]
>> - mov edi, [esp+16]
>> - mov ebp, [esp+20]
>> - add esp, 24
>> - ret
>> +.globl _DIB_32BPP_ColorFill
>> +_DIB_32BPP_ColorFill:
>> + push ebp
>> + mov ebp, esp
>> + push ebx
>> + push esi
>> + push edi
>> + sub esp, 4 /* Space for lDelta */
>>
>> - algin_draw:
>> - stosd
>> - dec ebx
>> - mov ecx, ebx
>> - rol eax, 16
>> - stosd
>> - add [esp+4], edx
>> - dec dword ptr [esp]
>> - jnz for_loop
>> + mov edx, [ebp+12] /* edx = prcl */
>> + mov ecx, [ebp+8] /* ecx = pso */
>>
>> - mov ebx, [esp+8]
>> - mov eax, 1
>> - mov esi, [esp+12]
>> - mov edi, [esp+16]
>> - mov ebp, [esp+20]
>> - add esp, 24
>> - ret
>> + mov ebx, [ecx+0x24] /* ebx = pso->lDelta; */
>> + mov [esp], ebx /* lDelta = pso->lDelta; */
>> + mov edi, [edx+4] /* edi = prcl->top; */
>> + mov eax, edi /* eax = prcl->top; */
>> + imul eax, ebx /* eax = prcl->top * pso->lDelta; */
>> + add eax, [ecx+0x20] /* eax += pso->pvScan0; */
>> + mov ebx, [edx] /* ebx = prcl->left; */
>> + lea esi, [eax+ebx*4] /* esi = pvLine0 = eax + 4 * prcl->left;
>> */
>> +
>> + mov ebx, [edx+8] /* ebx = prcl->right; */
>> + sub ebx, [edx] /* ebx = prcl->right - prcl->left; */
>> + jbe end /* if (ebx <= 0) goto end; */
>> +
>> + mov edx, [edx+12] /* edx = prcl->bottom; */
>> + sub edx, edi /* edx -= prcl->top; */
>> + jbe end /* if (eax <= 0) goto end; */
>> +
>> + mov eax, [ebp+16] /* eax = iColor; */
>> + cld
>> +
>> +for_loop: /* do { */
>> + mov edi, esi /* edi = pvLine0; */
>> + mov ecx, ebx /* ecx = cx; */
>> + rep stosd /* memset(pvLine0, iColor, cx); */
>> + add esi, [esp] /* pvLine0 += lDelta; */
>> + dec edx /* cy--; */
>> + jnz for_loop /* } while (cy > 0); */
>> +
>> +end:
>> + mov eax, 1
>> + add esp, 4
>> + pop edi
>> + pop esi
>> + pop ebx
>> + pop ebp
>> + ret
>>
>>
>>
>>
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.reactos.org/pipermail/ros-dev/attachments/20090803/f3f777e3/attachment.htm
More information about the Ros-dev
mailing list