[ros-diffs] [sir_richard] 45064: Perf improvements: [NTOS]: Optimize trap entry/exit by manually copying registers to the trap frame the correct way from the first time. Avoids conversion between PUSHA and KTRAP_FRAMEs and makes trap frames compatible the whole time (other than being slightly faster). [NTOS]: Provide compiler with hints on likely code paths during trap entry and exit, which makes the code more linear and improves performance. The following assumptions (known to be true) are made: (1) Interrupts happen more often than system calls (per unit of time), so prioritize paths we take during interrupts. (2) The CPU spends most of its time in Ring 3, so prioritize traps from user-mode. (3) V8086 mode, debugging, 16-bit stacks, are uncommon, so de-prioritize them. [NTOS]: Use KTRAP_FRAME offset names recommended by Timo instead of substraction which was confusing some people (still seems clearer to me).

sir_richard at svn.reactos.org sir_richard at svn.reactos.org
Wed Jan 13 23:01:20 CET 2010


Author: sir_richard
Date: Wed Jan 13 23:01:20 2010
New Revision: 45064

URL: http://svn.reactos.org/svn/reactos?rev=45064&view=rev
Log:
Perf improvements:
    [NTOS]: Optimize trap entry/exit by manually copying registers to the trap frame the correct way from the first time. Avoids conversion between PUSHA and KTRAP_FRAMEs and makes trap frames compatible the whole time (other than being slightly faster).
    [NTOS]: Provide compiler with hints on likely code paths during trap entry and exit, which makes the code more linear and improves performance. The following assumptions (known to be true) are made: (1) Interrupts happen more often than system calls (per unit of time), so prioritize paths we take during interrupts. (2) The CPU spends most of its time in Ring 3, so prioritize traps from user-mode. (3) V8086 mode, debugging, 16-bit stacks, are uncommon, so de-prioritize them.
    [NTOS]: Use KTRAP_FRAME offset names recommended by Timo instead of substraction which was confusing some people (still seems clearer to me).

Modified:
    trunk/reactos/ntoskrnl/include/internal/i386/asmmacro.S
    trunk/reactos/ntoskrnl/include/internal/trap_x.h
    trunk/reactos/ntoskrnl/ke/i386/traphdlr.c

Modified: trunk/reactos/ntoskrnl/include/internal/i386/asmmacro.S
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/include/internal/i386/asmmacro.S?rev=45064&r1=45063&r2=45064&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/include/internal/i386/asmmacro.S [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/include/internal/i386/asmmacro.S [iso-8859-1] Wed Jan 13 23:01:20 2010
@@ -237,9 +237,17 @@
         push 0
     .endif
     
-    /* Save all register state before we touch it */
-    pushad
-    
+    /* Make space for trap frame and save registers before we enter C code */
+    sub esp, KTRAP_FRAME_ERROR_CODE
+    mov [esp+KTRAP_FRAME_EAX], eax
+    mov [esp+KTRAP_FRAME_EBX], ebx
+    mov [esp+KTRAP_FRAME_ECX], ecx
+    mov [esp+KTRAP_FRAME_EDX], edx
+    mov [esp+KTRAP_FRAME_ESI], esi
+    mov [esp+KTRAP_FRAME_EDI], edi
+    mov [esp+KTRAP_FRAME_EBP], ebp
+    mov ecx, esp
+        
     /*
      * The GPF and Invalid Opcode handlers are performance critical when talking
      * about V8086 traps, because they control the main flow of execution during
@@ -270,14 +278,9 @@
      * up DS/ES and lets us go on our merry way...
      */
     .if \FastV86
-        /* ESP+12 is EFLAGS on interrupt frame, add 8*4 for the PUSHAD frame */
-        mov edx, [esp+12+8*4]
+        mov edx, [esp+KTRAP_FRAME_EFLAGS]
     .endif
-    
-    /* Now make space for the trap frame and store the pointer as first arg */
-    sub esp, KTRAP_FRAME_LENGTH - KTRAP_FRAME_PREVIOUS_MODE
-    mov ecx, esp
-    
+
     /* Normally we just have one parameter, but fast V86 handlers need two */
     .if \FastV86
         jmp @&Name&Handler at 8

Modified: trunk/reactos/ntoskrnl/include/internal/trap_x.h
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/include/internal/trap_x.h?rev=45064&r1=45063&r2=45064&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/include/internal/trap_x.h [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/include/internal/trap_x.h [iso-8859-1] Wed Jan 13 23:01:20 2010
@@ -139,44 +139,6 @@
 
 VOID
 FORCEINLINE
-KiTrapFrameFromPushaStack(IN PKTRAP_FRAME TrapFrame)
-{
-    /*
-     * This sequence is Bavarian Alchemist Black Magic
-     *
-     * *** DO NOT MODIFY ***
-     */
-    TrapFrame->Edx = TrapFrame->Esi;
-    TrapFrame->Esi = TrapFrame->PreviousPreviousMode;
-    TrapFrame->Ecx = TrapFrame->Ebx;
-    TrapFrame->Ebx = TrapFrame->Edi;
-    TrapFrame->Edi = TrapFrame->Eax;
-    TrapFrame->Eax = TrapFrame->Ebp;
-    TrapFrame->Ebp = (ULONG)TrapFrame->ExceptionList;
-    TrapFrame->TempEsp = TrapFrame->SegFs;
-}
-
-VOID
-FORCEINLINE
-KiPushaStackFromTrapFrame(IN PKTRAP_FRAME TrapFrame)
-{
-    /*
-     * This sequence is Bavarian Alchemist Black Magic
-     *
-     * *** DO NOT MODIFY ***
-     */
-    TrapFrame->SegFs = TrapFrame->TempEsp;
-    TrapFrame->ExceptionList = (PVOID)TrapFrame->Ebp;
-    TrapFrame->Ebp = TrapFrame->Eax;
-    TrapFrame->Eax = TrapFrame->Edi;
-    TrapFrame->Edi = TrapFrame->Ebx;
-    TrapFrame->Ebx = TrapFrame->Ecx;
-    TrapFrame->PreviousPreviousMode = TrapFrame->Esi;
-    TrapFrame->Esi = TrapFrame->Edx;
-}    
-
-VOID
-FORCEINLINE
 KiCheckForApcDelivery(IN PKTRAP_FRAME TrapFrame)
 {
     PKTHREAD Thread;
@@ -247,19 +209,29 @@
 VOID
 KiTrapReturn(IN PKTRAP_FRAME TrapFrame)
 {
-    /* Restore registers */
-    KiPushaStackFromTrapFrame(TrapFrame);
-
     /* Regular interrupt exit */
     __asm__ __volatile__
     (
         "movl %0, %%esp\n"
-        "addl %1, %%esp\n"
-        "popa\n"
-        "addl $4, %%esp\n"
+        "movl %c[a](%%esp), %%eax\n"
+        "movl %c[b](%%esp), %%ebx\n"
+        "movl %c[c](%%esp), %%ecx\n"
+        "movl %c[d](%%esp), %%edx\n"
+        "movl %c[s](%%esp), %%esi\n"
+        "movl %c[i](%%esp), %%edi\n"
+        "movl %c[p](%%esp), %%ebp\n"
+        "addl $%c[e],%%esp\n"
         "iret\n"
         :
-        : "r"(TrapFrame), "i"(KTRAP_FRAME_LENGTH - KTRAP_FRAME_PREVIOUS_MODE)
+        : "r"(TrapFrame),
+          [a] "i"(KTRAP_FRAME_EAX),
+          [b] "i"(KTRAP_FRAME_EBX),
+          [c] "i"(KTRAP_FRAME_ECX),
+          [d] "i"(KTRAP_FRAME_EDX),
+          [s] "i"(KTRAP_FRAME_ESI),
+          [i] "i"(KTRAP_FRAME_EDI),
+          [p] "i"(KTRAP_FRAME_EBP),
+          [e] "i"(KTRAP_FRAME_EIP)
         : "%esp"
     );
 }

Modified: trunk/reactos/ntoskrnl/ke/i386/traphdlr.c
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/ke/i386/traphdlr.c?rev=45064&r1=45063&r2=45064&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/ke/i386/traphdlr.c [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/ke/i386/traphdlr.c [iso-8859-1] Wed Jan 13 23:01:20 2010
@@ -56,14 +56,11 @@
     KTRAP_EXIT_SKIP_BITS SkipBits = { .Bits = Skip };
     KiExitTrapDebugChecks(TrapFrame, SkipBits);
 
-    /* If you skip volatile reload, you must skip segment reload */
-    ASSERT((SkipBits.SkipVolatiles == FALSE) || (SkipBits.SkipSegments == TRUE));
-
     /* Restore the SEH handler chain */
     KeGetPcr()->Tib.ExceptionList = TrapFrame->ExceptionList;
     
     /* Check if the previous mode must be restored */
-    if (!SkipBits.SkipPreviousMode)
+    if (__builtin_expect(!SkipBits.SkipPreviousMode, 0)) /* More INTS than SYSCALLs */
     {
         /* Not handled yet */
         UNIMPLEMENTED;
@@ -71,7 +68,7 @@
     }
 
     /* Check if there are active debug registers */
-    if (TrapFrame->Dr7 & ~DR7_RESERVED_MASK)
+    if (__builtin_expect(TrapFrame->Dr7 & ~DR7_RESERVED_MASK, 0))
     {
         /* Not handled yet */
         UNIMPLEMENTED;
@@ -79,10 +76,10 @@
     }
     
     /* Check if this was a V8086 trap */
-    if (TrapFrame->EFlags & EFLAGS_V86_MASK) KiTrapReturn(TrapFrame);
+    if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 0)) KiTrapReturn(TrapFrame);
 
     /* Check if the trap frame was edited */
-    if (!(TrapFrame->SegCs & FRAME_EDITED))
+    if (__builtin_expect(!(TrapFrame->SegCs & FRAME_EDITED), 0))
     {
         /* Not handled yet */
         UNIMPLEMENTED;
@@ -90,7 +87,7 @@
     }
     
     /* Check if this is a user trap */
-    if (KiUserTrap(TrapFrame))
+    if (__builtin_expect(KiUserTrap(TrapFrame), 1)) /* Ring 3 is where we spend time */
     {
         /* Check if segments should be restored */
         if (!SkipBits.SkipSegments)
@@ -105,37 +102,30 @@
         /* Always restore FS since it goes from KPCR to TEB */
         Ke386SetFs(TrapFrame->SegFs);
     }
-    
-    /* Check if the caller wants to skip volatiles */
-    if (SkipBits.SkipVolatiles)
-    {
+
+    /* Check for ABIOS code segment */
+    if (__builtin_expect(TrapFrame->SegCs == 0x80, 0))
+    {
+        /* Not handled yet */
+        UNIMPLEMENTED;
+        while (TRUE);
+    }
+    
+    /* Check for system call -- a system call skips volatiles! */
+    if (__builtin_expect(SkipBits.SkipVolatiles, 0)) /* More INTs than SYSCALLs */
+    {
+        /* Not handled yet */
         /* 
          * When we do the system call handler through this path, we need
          * to have some sort to restore the kernel EAX instead of pushing
          * back the user EAX. We'll figure it out...
          */
-        DPRINT1("Warning: caller doesn't want volatiles restored\n"); 
-    }
-
-    /* Check for ABIOS code segment */
-    if (TrapFrame->SegCs == 0x80)
-    {
-        /* Not handled yet */
-        UNIMPLEMENTED;
-        while (TRUE);
-    }
-    
-    /* Check for system call -- a system call skips volatiles! */
-    if (SkipBits.SkipVolatiles)
-    {
-        /* Not handled yet */
-        UNIMPLEMENTED;
-        while (TRUE);
+        DPRINT1("Warning: caller doesn't want volatiles restored\n");
     }
     else
     {
         /* Return from interrupt */
-        KiTrapReturn(TrapFrame); 
+        KiTrapReturn(TrapFrame);
     }
 }
 
@@ -154,7 +144,7 @@
         Thread->Alerted[KernelMode] = FALSE;
 
         /* Are there pending user APCs? */
-        if (!Thread->ApcState.UserApcPending) break;
+        if (__builtin_expect(!Thread->ApcState.UserApcPending, 1)) break;
 
         /* Raise to APC level and enable interrupts */
         OldIrql = KfRaiseIrql(APC_LEVEL);
@@ -168,11 +158,11 @@
         _disable();
         
         /* Return if this isn't V86 mode anymore */
-        if (TrapFrame->EFlags & EFLAGS_V86_MASK) return;
+        if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 0)) return;
     }
      
     /* If we got here, we're still in a valid V8086 context, so quit it */
-    if (TrapFrame->Dr7 & ~DR7_RESERVED_MASK)
+    if (__builtin_expect(TrapFrame->Dr7 & ~DR7_RESERVED_MASK, 0))
     {
         /* Not handled yet */
         UNIMPLEMENTED;
@@ -207,10 +197,7 @@
     Ke386SetFs(KGDT_R0_PCR);
     Ke386SetDs(KGDT_R3_DATA | RPL_MASK);
     Ke386SetEs(KGDT_R3_DATA | RPL_MASK);
-    
-    /* Save registers */
-    KiTrapFrameFromPushaStack(TrapFrame);
-    
+
     /* Save exception list and bogus previous mode */
     TrapFrame->PreviousPreviousMode = -1;
     TrapFrame->ExceptionList = KeGetPcr()->Tib.ExceptionList;
@@ -220,7 +207,7 @@
     
     /* Save DR7 and check for debugging */
     TrapFrame->Dr7 = __readdr(7);
-    if (TrapFrame->Dr7 & ~DR7_RESERVED_MASK)
+    if (__builtin_expect(TrapFrame->Dr7 & ~DR7_RESERVED_MASK, 0))
     {
         UNIMPLEMENTED;
         while (TRUE);
@@ -231,21 +218,18 @@
 FASTCALL
 KiEnterInterruptTrap(IN PKTRAP_FRAME TrapFrame)
 {
-    /* Save registers */
-    KiTrapFrameFromPushaStack(TrapFrame);
-    
     /* Set bogus previous mode */
     TrapFrame->PreviousPreviousMode = -1;
     
     /* Check for V86 mode */
-    if (TrapFrame->EFlags & EFLAGS_V86_MASK)
+    if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 0))
     {
         UNIMPLEMENTED;
         while (TRUE);
     }
     
     /* Check if this wasn't kernel code */
-    if (TrapFrame->SegCs != KGDT_R0_CODE)
+    if (__builtin_expect(TrapFrame->SegCs != KGDT_R0_CODE, 1)) /* Ring 3 is more common */
     {
         /* Save segments and then switch to correct ones */
         TrapFrame->SegFs = Ke386GetFs();
@@ -269,7 +253,7 @@
     
     /* Flush DR7 and check for debugging */
     TrapFrame->Dr7 = 0;
-    if (KeGetCurrentThread()->DispatcherHeader.DebugActive & 0xFF)
+    if (__builtin_expect(KeGetCurrentThread()->DispatcherHeader.DebugActive & 0xFF, 0))
     {
         UNIMPLEMENTED;
         while (TRUE);
@@ -307,22 +291,19 @@
     TrapFrame->SegGs = Ke386GetGs();
     Ke386SetFs(KGDT_R0_PCR);
 
-    /* Save registers */
-    KiTrapFrameFromPushaStack(TrapFrame);
-    
     /* Save exception list and bogus previous mode */
     TrapFrame->PreviousPreviousMode = -1;
     TrapFrame->ExceptionList = KeGetPcr()->Tib.ExceptionList;
     
     /* Check for 16-bit stack */
-    if ((ULONG_PTR)TrapFrame < 0x10000)
+    if (__builtin_expect((ULONG_PTR)TrapFrame < 0x10000, 0))
     {
         UNIMPLEMENTED;
         while (TRUE);
     }
     
     /* Check for V86 mode */
-    if (TrapFrame->EFlags & EFLAGS_V86_MASK)
+    if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 0))
     {
         /* Restore V8086 segments into Protected Mode segments */
         TrapFrame->SegFs = TrapFrame->V86Fs;
@@ -336,7 +317,7 @@
     
     /* Flush DR7 and check for debugging */
     TrapFrame->Dr7 = 0;
-    if (KeGetCurrentThread()->DispatcherHeader.DebugActive & 0xFF)
+    if (__builtin_expect(KeGetCurrentThread()->DispatcherHeader.DebugActive & 0xFF, 0))
     {
         UNIMPLEMENTED;
         while (TRUE);
@@ -1041,13 +1022,13 @@
     KIRQL OldIrql;
     
     /* Check for V86 GPF */
-    if (EFlags & EFLAGS_V86_MASK)
+    if (__builtin_expect(EFlags & EFLAGS_V86_MASK, 1))
     {
         /* Enter V86 trap */
         KiEnterV86Trap(TrapFrame);
         
         /* Must be a VDM process */
-        if (!PsGetCurrentProcess()->VdmObjects)
+        if (__builtin_expect(!PsGetCurrentProcess()->VdmObjects, 0))
         {
             /* Enable interrupts */
             _enable();
@@ -1063,7 +1044,7 @@
         _enable();
         
         /* Handle the V86 opcode */
-        if (Ki386HandleOpcodeV86(TrapFrame) == 0xFF)
+        if (__builtin_expect(Ki386HandleOpcodeV86(TrapFrame) == 0xFF, 0))
         {
             /* Should only happen in VDM mode */
             UNIMPLEMENTED;
@@ -1075,7 +1056,7 @@
         _disable();
         
         /* Do a quick V86 exit if possible */
-        if (TrapFrame->EFlags & EFLAGS_V86_MASK) KiExitV86Trap(TrapFrame);
+        if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 1)) KiExitV86Trap(TrapFrame);
         
         /* Exit trap the slow way */
         KiEoiHelper(TrapFrame);




More information about the Ros-diffs mailing list