[ros-dev] [ros-diffs] [tkreuzer] 42353: asm version of DIB_32BPP_ColorFill: - Add frame pointer - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned - Optimize the loop - Add comments

Timo Kreuzer timo.kreuzer at web.de
Mon Aug 3 14:35:10 CEST 2009


That would be a few lines, wouldn't it?
Ok, let me do the work for you.
And now compile and show me how the loop would be optimized anywhere
near the asm code.
Or can you do better?

BOOLEAN
DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
{
    ULONG lDelta, cx, cy;
    ULONG pulLine;

    lDelta = pso->lDelta;
    pulLine= (PULONG)((PCHAR)pso->pvScan0 + prcl->top * lDelta + prcl->left * 4);

    cx = prcl->right - prcl->left;
    if (cx <= 0)
        return TRUE;

    cy = prcl->bottom - prcl->top;
    if (cy <= 0)
        return TRUE;

    do
    {
        memset(pulLine, iColor, cx); 
        pulLine += lDelta / 4;
        cy--;
    } while (cy > 0);

    return TRUE;
}



Aleksey Bragin schrieb:
> "in a few lines" - and what if about using the same algorithm you used
> in this assembly, but without pretending to be compiler?
>
>
> WBR,
> Aleksey.
>
> On Aug 3, 2009, at 7:31 AM, Timo Kreuzer wrote:
>
>> I hereby challenge you to provide portable C code, that - compiled
>> with gcc - is faster than this assembly code.
>> Should be done in a few lines.
>>
>> I bet my ass on it: You will fail! No matter what optimization you
>> choose.
>> You would also fail with msvc or Intel compiler.
>>
>> Regards,
>> Timo
>>
>> Alex Ionescu wrote:
>>> The version that GCC 4.4 and CL 15 will generate would be way more
>>> optimized
>>> than this unportable/slower assembly code.
>>> This isn't 1994 anymore. You can't beat the compiler anymore.
>>>
>>> Best regards,
>>> Alex Ionescu
>>>
>>>
>>> On Sun, Aug 2, 2009 at 3:31 PM, <tkreuzer at svn.reactos.org> wrote:
>>>
>>>
>>>> Author: tkreuzer
>>>> Date: Mon Aug  3 00:31:29 2009
>>>> New Revision: 42353
>>>>
>>>> URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev
>>>> Log:
>>>> asm version of DIB_32BPP_ColorFill:
>>>> - Add frame pointer
>>>> - Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
>>>> - Optimize the loop
>>>> - Add comments
>>>>
>>>> Modified:
>>>>    trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>>>
>>>> Modified:
>>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>>> URL:
>>>> http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s?rev=42353&r1=42352&r2=42353&view=diff
>>>>
>>>>
>>>> ==============================================================================
>>>>
>>>> ---
>>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>>> [iso-8859-1] (original)
>>>> +++
>>>> trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
>>>> [iso-8859-1] Mon Aug  3 00:31:29 2009
>>>> @@ -4,78 +4,62 @@
>>>>  * FILE:           
>>>> subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.c
>>>>  * PURPOSE:         ASM optimised 32bpp ColorFill
>>>>  * PROGRAMMERS:     Magnus Olsen
>>>> + *                  Timo Kreuzer (timo.kreuzer at rectos.org)
>>>>  */
>>>>
>>>> -  .globl _DIB_32BPP_ColorFill
>>>> -  .intel_syntax noprefix
>>>> +.intel_syntax noprefix
>>>>
>>>> -  .def   _DIB_32BPP_ColorFill;
>>>> -  .scl 2;
>>>> -  .type        32;
>>>> -  .endef
>>>> -
>>>> -  _DIB_32BPP_ColorFill:
>>>> -                        sub     esp, 24
>>>> -                        mov     ecx, [esp+32]
>>>> -                        mov     [esp+8], ebx
>>>> -                        mov     ebx, [esp+28]
>>>> -                        mov     [esp+20], ebp
>>>> -                        mov     ebp, [esp+36]
>>>> -                        mov     [esp+12], esi
>>>> -                        mov     [esp+16], edi
>>>> -                        mov     edi, [ecx]
>>>> -                        mov     esi, [ecx+8]
>>>> -                        mov     edx, [ebx+36]
>>>> -                        sub     esi, edi
>>>> -                        mov     edi, [ecx+4]
>>>> -                        mov     eax, edi
>>>> -                        imul    eax, edx
>>>> -                        add     eax, [ebx+32]
>>>> -                        mov     ebx, [ecx]
>>>> -                        lea     eax, [eax+ebx*4]
>>>> -                        mov     [esp+4], eax
>>>> -                        mov     eax, [ecx+12]
>>>> -                        cmp     eax, edi
>>>> -                        jbe     end
>>>> -                        sub     eax, edi
>>>> -                        mov     [esp], eax
>>>> -                        lea     esi, [esi+0]
>>>> +/*
>>>> + * BOOLEAN
>>>> + * _cdecl
>>>> + * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
>>>> +*/
>>>>
>>>> -               for_loop:
>>>> -                        mov     eax, ebp
>>>> -                        cld
>>>> -                        mov     ebx, esi
>>>> -                        mov     edi, [esp+4]
>>>> -                        test    edi, 3
>>>> -                        jnz     algin_draw
>>>> -                        mov     ecx, esi
>>>> -                        rep stosd
>>>> -                        add     [esp+4], edx
>>>> -                        dec     dword ptr [esp]
>>>> -                        jnz     for_loop
>>>> -               end:
>>>> -                        mov     ebx, [esp+8]
>>>> -                        mov     eax, 1
>>>> -                        mov     esi, [esp+12]
>>>> -                        mov     edi, [esp+16]
>>>> -                        mov     ebp, [esp+20]
>>>> -                        add     esp, 24
>>>> -                        ret
>>>> +.globl _DIB_32BPP_ColorFill
>>>> +_DIB_32BPP_ColorFill:
>>>> +        push    ebp
>>>> +        mov     ebp, esp
>>>> +        push    ebx
>>>> +        push    esi
>>>> +        push    edi
>>>> +        sub     esp, 4            /* Space for lDelta */
>>>>
>>>> -               algin_draw:
>>>> -                        stosd
>>>> -                        dec     ebx
>>>> -                        mov     ecx, ebx
>>>> -                        rol     eax, 16
>>>> -                        stosd
>>>> -                        add     [esp+4], edx
>>>> -                        dec     dword ptr [esp]
>>>> -                        jnz     for_loop
>>>> +        mov     edx, [ebp+12]     /* edx = prcl */
>>>> +        mov     ecx, [ebp+8]      /* ecx = pso */
>>>>
>>>> -                        mov     ebx, [esp+8]
>>>> -                        mov     eax, 1
>>>> -                        mov     esi, [esp+12]
>>>> -                        mov     edi, [esp+16]
>>>> -                        mov     ebp, [esp+20]
>>>> -                        add     esp, 24
>>>> -                        ret
>>>> +        mov     ebx, [ecx+0x24]   /* ebx = pso->lDelta; */
>>>> +        mov     [esp], ebx        /* lDelta = pso->lDelta; */
>>>> +        mov     edi, [edx+4]      /* edi = prcl->top; */
>>>> +        mov     eax, edi          /* eax = prcl->top; */
>>>> +        imul    eax, ebx          /* eax = prcl->top *
>>>> pso->lDelta; */
>>>> +        add     eax, [ecx+0x20]   /* eax += pso->pvScan0; */
>>>> +        mov     ebx, [edx]        /* ebx = prcl->left; */
>>>> +        lea     esi, [eax+ebx*4]  /* esi = pvLine0 = eax + 4 *
>>>> prcl->left;
>>>> */
>>>> +
>>>> +        mov     ebx, [edx+8]      /* ebx = prcl->right; */
>>>> +        sub     ebx, [edx]        /* ebx = prcl->right -
>>>> prcl->left; */
>>>> +        jbe     end               /* if (ebx <= 0) goto end; */
>>>> +
>>>> +        mov     edx, [edx+12]     /* edx = prcl->bottom; */
>>>> +        sub     edx, edi          /* edx -= prcl->top; */
>>>> +        jbe     end               /* if (eax <= 0) goto end; */
>>>> +
>>>> +        mov     eax, [ebp+16]     /* eax = iColor; */
>>>> +        cld
>>>> +
>>>> +for_loop:                         /* do { */
>>>> +        mov     edi, esi          /*   edi = pvLine0; */
>>>> +        mov     ecx, ebx          /*   ecx = cx; */
>>>> +        rep stosd                 /*   memset(pvLine0, iColor,
>>>> cx); */
>>>> +        add     esi, [esp]        /*   pvLine0 += lDelta; */
>>>> +        dec     edx               /*   cy--; */
>>>> +        jnz     for_loop          /* } while (cy > 0); */
>>>> +
>>>> +end:
>>>> +        mov     eax, 1
>>>> +        add     esp, 4
>>>> +        pop     edi
>>>> +        pop     esi
>>>> +        pop     ebx
>>>> +        pop     ebp
>>>> +        ret
>>>>
>>>>
>>>>
>>>>
>>>
>>> _______________________________________________
>>> Ros-dev mailing list
>>> Ros-dev at reactos.org
>>> http://www.reactos.org/mailman/listinfo/ros-dev
>>
>> _______________________________________________
>> Ros-dev mailing list
>> Ros-dev at reactos.org
>> http://www.reactos.org/mailman/listinfo/ros-dev
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> Ros-dev mailing list
> Ros-dev at reactos.org
> http://www.reactos.org/mailman/listinfo/ros-dev

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.reactos.org/pipermail/ros-dev/attachments/20090803/26e38f0a/attachment.htm 


More information about the Ros-dev mailing list