[ros-dev] Patch - somebody please review (FPU state saving)

Gregor Anich blight at blight.eu.org
Thu Nov 4 15:21:07 CET 2004


Hi!

Here's a patch which mainly adds FPU state saving support on 
taskswitching. It does not protect User/Kernel mode "mixup" because I 
want this to be discussed further before it is implemented.
I have also attached a program which tests if the FPU state is saved, it 
creates 2 threads which both do some FPU calculations and checks the result.
One thing which is changed in the patch is that the taskswitching 
function (Ki386ContextSwitch) does not calculate the TSS->Esp0 from 
NewThread->InitialStack but saves it on the stack and restores it. For a 
v86 call there seems to be no more reason to change the InitialStack of 
the thread (InitialStack seems to be used only for setting up TSS->Esp0 
on taskswitch - not anymore - and for IoGetInitialStack) so a v86 call 
saves and changes TSS->Esp0 instead of Thread->InitialStack.

I have been talking with Filip about having 2 FPU save areas, one for 
kernel and one for user-mode... he was against it because of the stack 
space which is taken away. He also told me that the stack size cannot be 
changed because some drivers rely on it to calculate the remaining 
stack. What we could do is
1) Use KeSave/RestoreFloatingState() in KM when FPU is used.
2) Compile kernel-mode code with soft-fpu switch to make the compiler 
call a softfpu lib instead of using the x87
3) Put the FPU save areas somewhere else. Maybe we could allocate 
MM_STACK_SIZE + 2 * sizeof (FX_SAVE_AREA) and set InitialStack to 
StackBase + MM_STACK_SIZE (like now) so the stack size available for 
code would not change.

It would also be cool to save the state only when needed. Maybe this 
could be done with a per-processor pointer to a FX_SAVE_AREA which is 
set to the last thread's FX_SAVE_AREA which used the FPU.
When a not-present exception happens (after a taskswitch where TS in 
%cr0 was set) we check if the pointer is NULL, if it is not we save the 
current state into the area it points to and set it to point to the 
current thread's FX_SAVE_AREA and load the saved state (if any) of the 
current thread.
When a thread terminates and per-processor FX_SAVE_AREA pointer points 
to the terminated thread's FX_SAVE_AREA we set it to NULL.

Thanks,
  blight
-------------- next part --------------
A non-text attachment was scrubbed...
Name: fputest.zip
Type: application/octet-stream
Size: 112893 bytes
Desc: not available
Url : http://reactos.com:8080/pipermail/ros-dev/attachments/20041104/9fcb68a6/fputest-0001.obj
-------------- next part --------------
Index: include/ntos/tss.h
===================================================================
RCS file: /CVS/ReactOS/reactos/include/ntos/tss.h,v
retrieving revision 1.4
diff -u -r1.4 tss.h
--- include/ntos/tss.h	5 Jan 2004 14:28:19 -0000	1.4
+++ include/ntos/tss.h	29 Oct 2004 14:55:49 -0000
@@ -5,7 +5,8 @@
 #ifndef __INCLUDE_DDK_I386_TSS_H
 #define __INCLUDE_DDK_I386_TSS_H
 
-#define KTSS_ESP0 (0x4)
+#define KTSS_ESP0      (0x4)
+#define KTSS_EFLAGS    (0x24)
 #define KTSS_IOMAPBASE (0x66)
 
 #ifndef __ASM__
Index: ntoskrnl/include/internal/trap.h
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/include/internal/trap.h,v
retrieving revision 1.4
diff -u -r1.4 trap.h
--- ntoskrnl/include/internal/trap.h	8 Sep 2002 10:23:23 -0000	1.4
+++ ntoskrnl/include/internal/trap.h	3 Nov 2004 17:43:44 -0000
@@ -27,10 +27,9 @@
 #ifndef __NTOSKRNL_INCLUDE_INTERNAL_TRAP_H
 #define __NTOSKRNL_INCLUDE_INTERNAL_TRAP_H
 
-#define TF_SAVED_ORIG_STACK (0x8C)
-#define TF_REGS         (0x90)
-#define TF_ORIG_EBP     (0x94)
-
+#define TF_SAVED_EXCEPTION_STACK (0x8C)
+#define TF_REGS                  (0x90)
+#define TF_ORIG_EBP              (0x94)
 
 #ifndef __ASM__
 
@@ -40,7 +39,7 @@
 {
   KTRAP_FRAME Tf;
   
-  ULONG SavedInitialStack;
+  ULONG SavedExceptionStack;
 
   /*
    * These are put on the top of the stack by the routine that entered
Index: ntoskrnl/include/internal/i386/fpu.h
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/include/internal/i386/fpu.h,v
retrieving revision 1.5
diff -u -r1.5 fpu.h
--- ntoskrnl/include/internal/i386/fpu.h	12 Oct 2004 00:33:30 -0000	1.5
+++ ntoskrnl/include/internal/i386/fpu.h	22 Oct 2004 22:37:26 -0000
@@ -20,11 +20,20 @@
 #ifndef __NTOSKRNL_INCLUDE_INTERNAL_I386_FPU_H
 #define __NTOSKRNL_INCLUDE_INTERNAL_I386_FPU_H
 
+#define SIZEOF_FX_SAVE_AREA    528
+
+#ifndef __ASM__
+
+#include <internal/i386/ke.h>
+
 extern ULONG HardwareMathSupport;
 
 VOID
 KiCheckFPU(VOID);
 
+NTSTATUS
+KiHandleFpuFault(PKTRAP_FRAME Tf, ULONG ExceptionNr);
+
 typedef struct _FNSAVE_FORMAT {
 	ULONG ControlWord;
 	ULONG StatusWord;
@@ -62,4 +71,7 @@
 	ULONG Cr0NpxState;
 } FX_SAVE_AREA, *PFX_SAVE_AREA;
 
+#endif /* !__ASM__ */
+
 #endif /* __NTOSKRNL_INCLUDE_INTERNAL_I386_FPU_H */
+
Index: ntoskrnl/include/internal/i386/ke.h
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/include/internal/i386/ke.h,v
retrieving revision 1.15
diff -u -r1.15 ke.h
--- ntoskrnl/include/internal/i386/ke.h	31 Oct 2004 12:49:36 -0000	1.15
+++ ntoskrnl/include/internal/i386/ke.h	4 Nov 2004 00:17:41 -0000
@@ -69,9 +69,17 @@
 #define KTRAP_FRAME_RESERVED9      (0x8A)
 #define KTRAP_FRAME_SIZE           (0x8C)
 
+#define X86_EFLAGS_IF           0x00000200 /* Interrupt Enable flag */
+#define X86_EFLAGS_IOPL         0x00003000 /* I/O Privilege Level bits */
+#define X86_EFLAGS_NT           0x00004000 /* Nested Task flag */
 #define X86_EFLAGS_VM           0x00020000 /* Virtual Mode */
 #define X86_EFLAGS_ID           0x00200000 /* CPUID detection flag */
 
+#define X86_CR0_NE              0x00000020 /* enable native FPU error reporting */
+#define X86_CR0_TS              0x00000008 /* enable exception on FPU instruction for task switch */
+#define X86_CR0_EM              0x00000004 /* enable FPU emulation (disable FPU) */
+#define X86_CR0_MP              0x00000002 /* enable FPU monitoring */
+
 #define X86_CR4_PAE             0x00000020 /* enable physical address extensions */
 #define X86_CR4_PGE             0x00000080 /* enable global pages */
 
@@ -85,6 +93,11 @@
 
 #define X86_EXT_FEATURE_3DNOW   0x40000000 /* 3DNOW! extension present */
 
+/* Possible values for KTHREAD's NpxState */
+#define NPX_STATE_INVALID   0x01
+#define NPX_STATE_VALID     0x02
+#define NPX_STATE_DIRTY     0x04
+
 #ifndef __ASM__
 
 typedef struct _KTRAP_FRAME
@@ -187,12 +200,20 @@
 	                                 : "r" (X));
 #define Ke386SaveFlags(x)        __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */)
 #define Ke386RestoreFlags(x)     __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory")
-#define Ke386GetCr4()            ({ \
+
+#define _Ke386GetCr(N)           ({ \
                                      unsigned int __d; \
-                                     __asm__("movl %%cr4,%0\n\t" :"=r" (__d)); \
+                                     __asm__("movl %%cr" #N ",%0\n\t" :"=r" (__d)); \
                                      __d; \
                                  })
-#define Ke386SetCr4(X)           __asm__ __volatile__("movl %0,%%cr4": :"r" (X));
+#define _Ke386SetCr(N,X)         __asm__ __volatile__("movl %0,%%cr" #N : :"r" (X));
+
+#define Ke386GetCr0()            _Ke386GetCr(0)
+#define Ke386SetCr0(X)           _Ke386SetCr(0,X)
+#define Ke386GetCr2()            _Ke386GetCr(2)
+#define Ke386SetCr2(X)           _Ke386SetCr(2,X)
+#define Ke386GetCr4()            _Ke386GetCr(4)
+#define Ke386SetCr4(X)           _Ke386SetCr(4,X)
 
 static inline void Ki386Cpuid(ULONG Op, PULONG Eax, PULONG Ebx, PULONG Ecx, PULONG Edx)
 {
@@ -207,6 +228,7 @@
 
 
 #elif defined(_MSC_VER)
+
 #define Ke386DisableInterrupts() __asm cli
 #define Ke386EnableInterrupts()  __asm sti
 #define Ke386HaltProcessor()     __asm hlt
Index: ntoskrnl/include/internal/i386/ps.h
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/include/internal/i386/ps.h,v
retrieving revision 1.16
diff -u -r1.16 ps.h
--- ntoskrnl/include/internal/i386/ps.h	30 Oct 2004 23:48:56 -0000	1.16
+++ ntoskrnl/include/internal/i386/ps.h	3 Nov 2004 19:02:51 -0000
@@ -26,6 +26,7 @@
 #define KTHREAD_STACK_LIMIT       0x1C
 #define KTHREAD_TEB               0x20
 #define KTHREAD_KERNEL_STACK      0x28
+#define KTHREAD_NPX_STATE         0x31
 #define KTHREAD_APCSTATE_PROCESS  0x44
 #define KTHREAD_SERVICE_TABLE     0xDC
 #define KTHREAD_PREVIOUS_MODE     0x137
@@ -51,12 +52,12 @@
 
 #pragma pack(push,4)
 
-// Fixme: Use correct types?
+/* Fixme: Use correct types? */
 typedef struct _KPROCESSOR_STATE {
    PCONTEXT ContextFrame;
    PVOID SpecialRegisters;
 } KPROCESSOR_STATE;
-  
+
 /* ProcessoR Control Block */ 
 typedef struct _KPRCB {
 	USHORT MinorVersion;
Index: ntoskrnl/ke/main.c
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/main.c,v
retrieving revision 1.202
diff -u -r1.202 main.c
--- ntoskrnl/ke/main.c	24 Oct 2004 15:26:14 -0000	1.202
+++ ntoskrnl/ke/main.c	3 Nov 2004 17:43:51 -0000
@@ -297,6 +297,7 @@
   ASSERT(FIELD_OFFSET(KTHREAD, InitialStack) == KTHREAD_INITIAL_STACK);
   ASSERT(FIELD_OFFSET(KTHREAD, Teb) == KTHREAD_TEB);
   ASSERT(FIELD_OFFSET(KTHREAD, KernelStack) == KTHREAD_KERNEL_STACK);
+  ASSERT(FIELD_OFFSET(KTHREAD, NpxState) == KTHREAD_NPX_STATE);
   ASSERT(FIELD_OFFSET(KTHREAD, ServiceTable) == KTHREAD_SERVICE_TABLE);
   ASSERT(FIELD_OFFSET(KTHREAD, PreviousMode) == KTHREAD_PREVIOUS_MODE);
   ASSERT(FIELD_OFFSET(KTHREAD, TrapFrame) == KTHREAD_TRAP_FRAME);
@@ -307,6 +308,7 @@
   ASSERT(FIELD_OFFSET(KPROCESS, IopmOffset) == KPROCESS_IOPM_OFFSET);
   ASSERT(FIELD_OFFSET(KPROCESS, LdtDescriptor) == KPROCESS_LDT_DESCRIPTOR0);
   ASSERT(FIELD_OFFSET(KTRAP_FRAME, Reserved9) == KTRAP_FRAME_RESERVED9);
+  ASSERT(FIELD_OFFSET(KV86M_TRAP_FRAME, SavedExceptionStack) == TF_SAVED_EXCEPTION_STACK);
   ASSERT(FIELD_OFFSET(KV86M_TRAP_FRAME, regs) == TF_REGS);
   ASSERT(FIELD_OFFSET(KV86M_TRAP_FRAME, orig_ebp) == TF_ORIG_EBP);
 
@@ -314,6 +316,12 @@
   ASSERT(FIELD_OFFSET(KPCR, Self) == KPCR_SELF);
   ASSERT(FIELD_OFFSET(KPCR, PrcbData) + FIELD_OFFSET(KPRCB, CurrentThread) == KPCR_CURRENT_THREAD);  
 
+  ASSERT(FIELD_OFFSET(KTSS, Esp0) == KTSS_ESP0);
+  ASSERT(FIELD_OFFSET(KTSS, Eflags) == KTSS_EFLAGS);
+  ASSERT(FIELD_OFFSET(KTSS, IoMapBase) == KTSS_IOMAPBASE);
+
+  ASSERT(sizeof(FX_SAVE_AREA) == SIZEOF_FX_SAVE_AREA);
+
   LdrInit1();
 
   KeLowerIrql(DISPATCH_LEVEL);
Index: ntoskrnl/ke/i386/bthread.S
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/i386/bthread.S,v
retrieving revision 1.10
diff -u -r1.10 bthread.S
--- ntoskrnl/ke/i386/bthread.S	31 Oct 2004 19:46:10 -0000	1.10
+++ ntoskrnl/ke/i386/bthread.S	3 Nov 2004 19:05:17 -0000
@@ -141,18 +141,6 @@
 	addl	$24, %esp
 .L3:
 
-	/*
-	 * Load the floating point registers
-	 */
-	movl	(_HardwareMathSupport), %eax
-	testl	%eax,%eax
-	jz	.L2
-	testl	$(CONTEXT_FLOATING_POINT & ~CONTEXT_i386), %ebx
-	jz	.L2
-	frstor	(%esp)
-.L2:
-	addl	$112, %esp
-
 	/* Load the rest of the thread's user mode context. */
 	movl	$0, %eax
 	jmp	KeReturnFromSystemCallWithHook
Index: ntoskrnl/ke/i386/exp.c
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/i386/exp.c,v
retrieving revision 1.93
diff -u -r1.93 exp.c
--- ntoskrnl/ke/i386/exp.c	22 Oct 2004 20:32:48 -0000	1.93
+++ ntoskrnl/ke/i386/exp.c	23 Oct 2004 18:26:36 -0000
@@ -40,6 +40,10 @@
 #define _STR(x) #x
 #define STR(x) _STR(x)
 
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof (x) / sizeof (x[0]))
+#endif
+
 extern void interrupt_handler2e(void);
 extern void interrupt_handler2d(void);
 
@@ -84,6 +88,7 @@
     "Stack Segment Fault",
     "General Protection",
     "Page Fault",
+    "Reserved(15)",
     "Math Fault",
     "Alignment Check",
     "Machine Check"
@@ -106,6 +111,7 @@
     STATUS_STACK_OVERFLOW,
     STATUS_ACCESS_VIOLATION,
     STATUS_ACCESS_VIOLATION,
+    STATUS_ACCESS_VIOLATION, /* RESERVED */
     STATUS_ACCESS_VIOLATION, /* STATUS_FLT_INVALID_OPERATION */
     STATUS_DATATYPE_MISALIGNMENT,
     STATUS_ACCESS_VIOLATION
@@ -167,7 +173,7 @@
     }
   else
     {
-      if (ExceptionNr < 16)
+      if (ExceptionNr < ARRAY_SIZE(ExceptionToNtStatus))
 	{
 	  Er.ExceptionCode = ExceptionToNtStatus[ExceptionNr];
 	}
@@ -190,7 +196,7 @@
 ULONG
 KiDoubleFaultHandler(VOID)
 {
-  unsigned int cr2_;
+  unsigned int cr2;
   ULONG StackLimit;
   ULONG StackBase;
   ULONG Esp0;
@@ -211,15 +217,7 @@
   Esp0 = OldTss->Esp;
 
   /* Get CR2 */
-#if defined(__GNUC__)
-  __asm__("movl %%cr2,%0\n\t" : "=d" (cr2_));
-#elif defined(_MSC_VER)
-  __asm mov eax, cr2;
-  __asm mov cr2_, eax;
-#else
-#error Unknown compiler for inline assembler
-#endif
-
+  cr2 = Ke386GetCr2();
   if (PsGetCurrentThread() != NULL &&
       PsGetCurrentThread()->ThreadsProcess != NULL)
     {
@@ -245,7 +243,7 @@
    /*
     * Print out the CPU registers
     */
-   if (ExceptionNr < 19)
+   if (ExceptionNr < ARRAY_SIZE(ExceptionTypeStrings))
      {
        DbgPrint("%s Exception: %d(%x)\n", ExceptionTypeStrings[ExceptionNr],
 		ExceptionNr, 0);
@@ -257,7 +255,7 @@
    DbgPrint("CS:EIP %x:%x ", OldTss->Cs, OldTss->Eip);
    KeRosPrintAddress((PVOID)OldTss->Eip);
    DbgPrint("\n");
-   DbgPrint("cr2 %x cr3 %x ", cr2_, OldCr3);
+   DbgPrint("cr2 %x cr3 %x ", cr2, OldCr3);
    DbgPrint("Proc: %x ",PsGetCurrentProcess());
    if (PsGetCurrentProcess() != NULL)
      {
@@ -420,7 +418,7 @@
    /*
     * Print out the CPU registers
     */
-   if (ExceptionNr < 19)
+   if (ExceptionNr < ARRAY_SIZE(ExceptionTypeStrings))
      {
 	DbgPrint("%s Exception: %d(%x)\n", ExceptionTypeStrings[ExceptionNr],
 		 ExceptionNr, Tf->ErrorCode&0xffff);
@@ -529,7 +527,7 @@
  *        Complete CPU context
  */
 {
-   unsigned int cr2_;
+   unsigned int cr2;
    NTSTATUS Status;
    ULONG Esp0;
 
@@ -540,22 +538,15 @@
    Esp0 = (ULONG)&Tf->Eip;
 
    /* Get CR2 */
-#if defined(__GNUC__)
-   __asm__("movl %%cr2,%0\n\t" : "=d" (cr2_));
-#elif defined(_MSC_VER)
-  __asm mov eax, cr2;
-  __asm mov cr2_, eax;
-#else
-#error Unknown compiler for inline assembler
-#endif
-   Tf->DebugPointer = (PVOID)cr2_;
+   cr2 = Ke386GetCr2();
+   Tf->DebugPointer = (PVOID)cr2;
 
    /*
     * If this was a V86 mode exception then handle it specially
     */
    if (Tf->Eflags & (1 << 17))
      {
-       return(KeV86Exception(ExceptionNr, Tf, cr2_));
+       return(KeV86Exception(ExceptionNr, Tf, cr2));
      }
 
    /*
@@ -574,7 +565,7 @@
     */
    if (ExceptionNr == 14)
      {
-        if (Ke386NoExecute && Tf->ErrorCode & 0x10 && cr2_ >= KERNEL_BASE)
+        if (Ke386NoExecute && Tf->ErrorCode & 0x10 && cr2 >= KERNEL_BASE)
 	{
            KEBUGCHECKWITHTF(ATTEMPTED_EXECUTE_OF_NOEXECUTE_MEMORY, 0, 0, 0, 0, Tf);
 	}
@@ -585,7 +576,7 @@
 	Status = MmPageFault(Tf->Cs&0xffff,
 			     &Tf->Eip,
 			     &Tf->Eax,
-			     cr2_,
+			     cr2,
 			     Tf->ErrorCode);
 	if (NT_SUCCESS(Status))
 	  {
@@ -606,15 +597,27 @@
      }
 
    /*
+    * Try to handle device-not-present and math-fault exceptions.
+    */
+   if (ExceptionNr == 7 || ExceptionNr == 16)
+     {
+       Status = KiHandleFpuFault(Tf, ExceptionNr);
+       if (NT_SUCCESS(Status))
+         {
+           return(0);
+         }
+     }
+
+   /*
     * Handle user exceptions differently
     */
    if ((Tf->Cs & 0xFFFF) == USER_CS)
      {
-       return(KiUserTrapHandler(Tf, ExceptionNr, (PVOID)cr2_));
+       return(KiUserTrapHandler(Tf, ExceptionNr, (PVOID)cr2));
      }
    else
     {
-      return(KiKernelTrapHandler(Tf, ExceptionNr, (PVOID)cr2_));
+      return(KiKernelTrapHandler(Tf, ExceptionNr, (PVOID)cr2));
     }
 }
 
Index: ntoskrnl/ke/i386/fpu.c
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/i386/fpu.c,v
retrieving revision 1.13
diff -u -r1.13 fpu.c
--- ntoskrnl/ke/i386/fpu.c	15 Aug 2004 16:39:05 -0000	1.13
+++ ntoskrnl/ke/i386/fpu.c	4 Nov 2004 02:48:39 -0000
@@ -32,6 +32,33 @@
 #define NDEBUG
 #include <internal/debug.h>
 
+/* DEFINES *******************************************************************/
+#define EXCEPTION_FLT_DENORMAL_OPERAND  	(0xc000008dL)
+#define EXCEPTION_FLT_DIVIDE_BY_ZERO    	(0xc000008eL)
+#define EXCEPTION_FLT_INEXACT_RESULT    	(0xc000008fL)
+#define EXCEPTION_FLT_INVALID_OPERATION 	(0xc0000090L)
+#define EXCEPTION_FLT_OVERFLOW            	(0xc0000091L)
+#define EXCEPTION_FLT_STACK_CHECK       	(0xc0000092L)
+#define EXCEPTION_FLT_UNDERFLOW          	(0xc0000093L)
+
+/* x87 Status Word exception flags */
+#define X87_SW_IE       (1<<0)   /* Invalid Operation */
+#define X87_SW_DE       (1<<1)   /* Denormalized Operand */
+#define X87_SW_ZE       (1<<2)   /* Zero Devide */
+#define X87_SW_OE       (1<<3)   /* Overflow */
+#define X87_SW_UE       (1<<4)   /* Underflow */
+#define X87_SW_PE       (1<<5)   /* Precision */
+#define X87_SW_SE       (1<<6)   /* Stack Fault */
+
+#define X87_SW_ES       (1<<7)   /* Error Summary */
+
+#if 0
+#define X87_SW_C0       (1<<8)   /* Condition code */
+#define X87_SW_C1       (1<<9)   /* Condition code */
+#define X87_SW_C2       (1<<10)  /* Condition code */
+#define X87_SW_C3       (1<<14)  /* Condition code */
+#endif
+
 /* GLOBALS *******************************************************************/
 
 ULONG HardwareMathSupport;
@@ -42,56 +69,41 @@
 KiCheckFPU(VOID)
 {
    unsigned short int status;
-   int cr0_;
+   int cr0;
    
    HardwareMathSupport = 0;
-   
-#if defined(__GNUC__)
-   __asm__("movl %%cr0, %0\n\t" : "=a" (cr0_));
-   /* Set NE and MP. */
-   cr0_ = cr0_ | 0x22;
-   /* Clear EM */
-   cr0_ = cr0_ & (~0x4);
-   __asm__("movl %0, %%cr0\n\t" : : "a" (cr0_));
+   cr0 = Ke386GetCr0();
+   cr0 |= X86_CR0_NE | X86_CR0_MP;
+   cr0 &= ~X86_CR0_EM;
+   Ke386SetCr0(cr0);
 
+#if defined(__GNUC__)
    __asm__("clts\n\t");
    __asm__("fninit\n\t");
    __asm__("fstsw %0\n\t" : "=a" (status));
-   if (status != 0)
-     {
-	__asm__("movl %%cr0, %0\n\t" : "=a" (cr0_));
-	/* Set the EM flag in CR0 so any FPU instructions cause a trap. */
-	cr0_ = cr0_ | 0x4;
-	__asm__("movl %0, %%cr0\n\t" :
-		: "a" (cr0_));
-	return;
-     }
-   /* fsetpm for i287, ignored by i387 */
-   __asm__(".byte 0xDB, 0xE4\n\t");
 #elif defined(_MSC_VER)
-   __asm mov eax, cr0;
-   __asm mov cr0_, eax;
-   cr0_ |= 0x22;	/* Set NE and MP. */
-   cr0_ &= ~0x4;	/* Clear EM */
    __asm
    {
-	   mov eax, cr0_;
-	   mov cr0, eax;
 	   clts;
 	   fninit;
 	   fstsw status
    }
+#else
+#error Unknown compiler for inline assembler
+#endif
+
    if (status != 0)
      {
-	__asm mov eax, cr0_;
-	__asm or eax, 4; /* Set the EM flag in CR0 so any FPU instructions cause a trap. */
-	__asm mov cr0, eax;
+	/* Set the EM flag in CR0 so any FPU instructions cause a trap. */
+	Ke386SetCr0(Ke386GetCr0() | X86_CR0_EM);
 	return;
      }
+
    /* fsetpm for i287, ignored by i387 */
+#if defined(__GNUC__)
+   __asm__(".byte 0xDB, 0xE4\n\t");
+#elif defined(_MSC_VER)
    __asm _emit 0xDB __asm _emit 0xe4
-//   __asm fsetpm;
-//   __asm__(".byte 0xDB, 0xE4\n\t");
 #else
 #error Unknown compiler for inline assembler
 #endif
@@ -109,6 +121,12 @@
 {
   char *FpState;
 
+  /* check if we are doing software emulation */
+  if (!HardwareMathSupport)
+    {
+      return STATUS_ILLEGAL_FLOAT_CONTEXT;
+    }
+
   FpState = ExAllocatePool(PagedPool, FPU_STATE_SIZE);
   if (NULL == FpState)
     {
@@ -146,3 +164,104 @@
 
   return STATUS_SUCCESS;
 }
+
+NTSTATUS
+KiHandleFpuFault(PKTRAP_FRAME Tf, ULONG ExceptionNr)
+{
+  if (ExceptionNr == 7) /* device not present */
+    {
+      unsigned int cr0 = Ke386GetCr0();
+      PFX_SAVE_AREA FxSaveArea = (PFX_SAVE_AREA)((char *)KeGetCurrentThread()->InitialStack - sizeof (FX_SAVE_AREA));
+      
+      ASSERT((cr0 & X86_CR0_TS) == X86_CR0_TS);
+      ASSERT((Tf->Eflags & X86_EFLAGS_VM) == 0);
+      ASSERT((cr0 & X86_CR0_EM) == 0);
+
+      Ke386DisableInterrupts();
+
+      cr0 &= ~X86_CR0_TS;
+      Ke386SetCr0(cr0);
+
+      DPRINT("Device not present exception happened! (Cr0 = 0x%x, NpxState = 0x%x)\n", Ke386GetCr0(), KeGetCurrentThread()->NpxState);
+      if (KeGetCurrentThread()->NpxState & NPX_STATE_VALID)
+        {
+#if 1
+          DPRINT("Restoring previous FPU state\n");
+          DPRINT("ControlWord   = 0x%08x\n", FxSaveArea->U.FnArea.ControlWord);
+          DPRINT("StatusWord    = 0x%08x\n", FxSaveArea->U.FnArea.StatusWord);
+          DPRINT("TagWord       = 0x%08x\n", FxSaveArea->U.FnArea.TagWord);
+          DPRINT("ErrorOffset   = 0x%08x\n", FxSaveArea->U.FnArea.ErrorOffset);
+          DPRINT("ErrorSelector = 0x%08x\n", FxSaveArea->U.FnArea.ErrorSelector);
+          DPRINT("DataOffset    = 0x%08x\n", FxSaveArea->U.FnArea.DataOffset);
+          DPRINT("DataSelector  = 0x%08x\n", FxSaveArea->U.FnArea.DataSelector);
+          DPRINT("Thread->InitialStack = 0x%08x\n", (ULONG)KeGetCurrentThread()->InitialStack);
+          DPRINT("Thread->KernelStack  = 0x%08x\n", (ULONG)KeGetCurrentThread()->KernelStack);
+#endif
+          __asm__ __volatile__("frstor %0" : : "m"(*FxSaveArea));
+        }
+      else /* NpxState & NPX_STATE_INVALID */
+        {
+          DPRINT("Setting up clean FPU state\n");
+          __asm__ __volatile__("finit");
+        }
+      KeGetCurrentThread()->NpxState |= NPX_STATE_DIRTY;
+      DPRINT("Device not present exception handled!\n");
+      Ke386EnableInterrupts();
+      return STATUS_SUCCESS;
+    }
+  else
+    {
+      WORD FpuStatusWord;
+      EXCEPTION_RECORD Er;
+      CONTEXT Context;
+      KPROCESSOR_MODE PreviousMode;
+
+      ASSERT(ExceptionNr == 16); /* math fault */
+
+      PreviousMode = ((Tf->Cs & 0xffff) == USER_CS) ? (UserMode) : (KernelMode);
+      DPRINT("Math-fault happened! (PreviousMode = %s)\n",
+              (PreviousMode == UserMode) ? ("UserMode") : ("KernelMode"));
+      /* Get fpu status and clear exception flags */
+      __asm__("fnstenv %0" : "=m"(Context.FloatSave));
+      __asm__("fnclex"); /* FIXME: clear only handled exceptions? */
+      FpuStatusWord = Context.FloatSave.StatusWord & 0xffff;
+
+      Ke386EnableInterrupts();
+
+      DPRINT("FpuStatusWord = 0x%04x\n", FpuStatusWord);
+
+      /* Dispatch exception */
+      if (FpuStatusWord & X87_SW_IE)
+        Er.ExceptionCode = EXCEPTION_FLT_INVALID_OPERATION;
+      else if (FpuStatusWord & X87_SW_DE)
+        Er.ExceptionCode = EXCEPTION_FLT_DENORMAL_OPERAND;
+      else if (FpuStatusWord & X87_SW_ZE)
+        Er.ExceptionCode = EXCEPTION_FLT_DIVIDE_BY_ZERO;
+      else if (FpuStatusWord & X87_SW_OE)
+        Er.ExceptionCode = EXCEPTION_FLT_OVERFLOW;
+      else if (FpuStatusWord & X87_SW_UE)
+        Er.ExceptionCode = EXCEPTION_FLT_UNDERFLOW;
+      else if (FpuStatusWord & X87_SW_PE)
+        Er.ExceptionCode = EXCEPTION_FLT_INEXACT_RESULT;
+      else if (FpuStatusWord & X87_SW_SE)
+        Er.ExceptionCode = EXCEPTION_FLT_STACK_CHECK;
+      else
+        ASSERT(0); /* not reached */
+
+      Er.ExceptionFlags = 0;
+      Er.ExceptionRecord = NULL;
+      /* FIXME: is this the right way to get the correct EIP of the faulting instruction? */
+      Er.ExceptionAddress = (PVOID)Context.FloatSave.ErrorOffset;
+      Er.NumberParameters = 0;
+      
+      /* FIXME: we should pass in a context with floating save area to KiDispatchException */
+      DPRINT("Dispatching exception\n");
+      KiDispatchException(&Er, NULL, Tf, PreviousMode, TRUE);
+      
+      DPRINT("Math-fault handled!\n");
+      return STATUS_SUCCESS;
+    }
+
+  return STATUS_UNSUCCESSFUL;
+}
+
Index: ntoskrnl/ke/i386/multiboot.S
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/i386/multiboot.S,v
retrieving revision 1.28
diff -u -r1.28 multiboot.S
--- ntoskrnl/ke/i386/multiboot.S	31 Oct 2004 19:46:10 -0000	1.28
+++ ntoskrnl/ke/i386/multiboot.S	3 Nov 2004 19:02:53 -0000
@@ -3,6 +3,7 @@
 #include <internal/i386/segment.h>
 #include <internal/ps.h>
 #include <internal/i386/mm.h>
+#include <internal/i386/fpu.h>
 
 #define MULTIBOOT_HEADER_MAGIC (0x1BADB002)
 
@@ -279,7 +280,7 @@
 	/*
 	 * Load the initial kernel stack
 	 */
-	movl	$_init_stack_top, %esp
+	movl	$(_init_stack_top - SIZEOF_FX_SAVE_AREA), %esp
 
 	/*
 	 * Initialize EFLAGS
Index: ntoskrnl/ke/i386/thread.c
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/i386/thread.c,v
retrieving revision 1.33
diff -u -r1.33 thread.c
--- ntoskrnl/ke/i386/thread.c	15 Aug 2004 16:39:05 -0000	1.33
+++ ntoskrnl/ke/i386/thread.c	4 Nov 2004 00:18:55 -0000
@@ -31,13 +31,6 @@
 #define NDEBUG
 #include <internal/debug.h>
 
-/* GLOBALS *******************************************************************/
-
-#define FLAG_NT (1<<14)
-#define FLAG_VM (1<<17)
-#define FLAG_IF (1<<9)
-#define FLAG_IOPL ((1<<12)+(1<<13))
-
 /* FUNCTIONS *****************************************************************/
 
 NTSTATUS 
@@ -77,10 +70,10 @@
      {
 	return(STATUS_UNSUCCESSFUL);
      }
-   if ((Context->EFlags & FLAG_IOPL) != 0 ||
-       (Context->EFlags & FLAG_NT) ||
-       (Context->EFlags & FLAG_VM) ||
-       (!(Context->EFlags & FLAG_IF)))
+   if ((Context->EFlags & X86_EFLAGS_IOPL) != 0 ||
+       (Context->EFlags & X86_EFLAGS_NT) ||
+       (Context->EFlags & X86_EFLAGS_VM) ||
+       (!(Context->EFlags & X86_EFLAGS_IF)))
      {
         return(STATUS_UNSUCCESSFUL);
      }
@@ -93,40 +86,37 @@
   PULONG KernelStack;
   ULONG InitSize;
   PKTRAP_FRAME TrapFrame;
+  PFX_SAVE_AREA FxSaveArea;
 
   /*
    * Setup a stack frame for exit from the task switching routine
    */
   
-  InitSize = 5 * sizeof(DWORD) + sizeof(DWORD) + 6 * sizeof(DWORD) + 
-    sizeof(FLOATING_SAVE_AREA) + sizeof(KTRAP_FRAME);
+  InitSize = 6 * sizeof(DWORD) + sizeof(DWORD) + 6 * sizeof(DWORD) +
+           + sizeof(KTRAP_FRAME) + sizeof (FX_SAVE_AREA);
   KernelStack = (PULONG)((char*)Thread->KernelStack - InitSize);
 
   /* Set up the initial frame for the return from the dispatcher. */
-  KernelStack[0] = 0;      /* EDI */
-  KernelStack[1] = 0;      /* ESI */
-  KernelStack[2] = 0;      /* EBX */
-  KernelStack[3] = 0;      /* EBP */
-  KernelStack[4] = (ULONG)&PsBeginThreadWithContextInternal;   /* EIP */
+  KernelStack[0] = (ULONG)Thread->InitialStack - sizeof(FX_SAVE_AREA);  /* TSS->Esp0 */
+  KernelStack[1] = 0;      /* EDI */
+  KernelStack[2] = 0;      /* ESI */
+  KernelStack[3] = 0;      /* EBX */
+  KernelStack[4] = 0;      /* EBP */
+  KernelStack[5] = (ULONG)&PsBeginThreadWithContextInternal;   /* EIP */
 
   /* Save the context flags. */
-  KernelStack[5] = Context->ContextFlags;
+  KernelStack[6] = Context->ContextFlags;
 
   /* Set up the initial values of the debugging registers. */
-  KernelStack[6] = Context->Dr0;
-  KernelStack[7] = Context->Dr1;
-  KernelStack[8] = Context->Dr2;
-  KernelStack[9] = Context->Dr3;
-  KernelStack[10] = Context->Dr6;
-  KernelStack[11] = Context->Dr7;
-
-  /* Set up the initial floating point state. */
-  memcpy((PVOID)&KernelStack[12], (PVOID)&Context->FloatSave,
-	 sizeof(FLOATING_SAVE_AREA));
+  KernelStack[7] = Context->Dr0;
+  KernelStack[8] = Context->Dr1;
+  KernelStack[9] = Context->Dr2;
+  KernelStack[10] = Context->Dr3;
+  KernelStack[11] = Context->Dr6;
+  KernelStack[12] = Context->Dr7;
 
   /* Set up a trap frame from the context. */
-  TrapFrame = (PKTRAP_FRAME)
-    ((char*)KernelStack + 12 * sizeof(DWORD) + sizeof(FLOATING_SAVE_AREA));
+  TrapFrame = (PKTRAP_FRAME)(&KernelStack[13]);
   TrapFrame->DebugEbp = (PVOID)Context->Ebp;
   TrapFrame->DebugEip = (PVOID)Context->Eip;
   TrapFrame->DebugArgMark = 0;
@@ -149,12 +139,39 @@
   TrapFrame->ErrorCode = 0;
   TrapFrame->Cs = Context->SegCs;
   TrapFrame->Eip = Context->Eip;
-  TrapFrame->Eflags = Context->EFlags | FLAG_IF;
-  TrapFrame->Eflags &= ~(FLAG_VM | FLAG_NT | FLAG_IOPL);
+  TrapFrame->Eflags = Context->EFlags | X86_EFLAGS_IF;
+  TrapFrame->Eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_NT | X86_EFLAGS_IOPL);
   TrapFrame->Esp = Context->Esp;
   TrapFrame->Ss = (USHORT)Context->SegSs;
   /* FIXME: Should check for a v86 mode context here. */
 
+  /* Set up the initial floating point state. */
+  if (Context->ContextFlags & CONTEXT_FLOATING_POINT)
+    {
+      FxSaveArea = (PFX_SAVE_AREA)((ULONG_PTR)KernelStack + InitSize - sizeof(FX_SAVE_AREA));
+      memcpy((PVOID)&FxSaveArea->U.FnArea, (PVOID)&Context->FloatSave, sizeof(FxSaveArea->U.FnArea));
+      FxSaveArea->Cr0NpxState = Context->FloatSave.Cr0NpxState;
+      /* FIXME: for SSE we have to convert FLOATING_SAVE_AREA to FX_SAVE_AREA */
+
+#ifndef NDEBUG
+      DPRINT1("Thread's initial FPU status (0x%x):\n", (ULONG)FxSaveArea);
+      DPRINT1("ControlWord   = 0x%08x\n", FxSaveArea->U.FnArea.ControlWord);
+      DPRINT1("StatusWord    = 0x%08x\n", FxSaveArea->U.FnArea.StatusWord);
+      DPRINT1("TagWord       = 0x%08x\n", FxSaveArea->U.FnArea.TagWord);
+      DPRINT1("ErrorOffset   = 0x%08x\n", FxSaveArea->U.FnArea.ErrorOffset);
+      DPRINT1("ErrorSelector = 0x%08x\n", FxSaveArea->U.FnArea.ErrorSelector);
+      DPRINT1("DataOffset    = 0x%08x\n", FxSaveArea->U.FnArea.DataOffset);
+      DPRINT1("DataSelector  = 0x%08x\n", FxSaveArea->U.FnArea.DataSelector);
+      DPRINT1("Thread->InitialStack = 0x%08x\n", (ULONG)Thread->InitialStack);
+      DPRINT1("Thread->KernelStack  = 0x%08x\n", (ULONG)Thread->KernelStack);
+#endif
+      Thread->NpxState = NPX_STATE_VALID;
+    }
+  else
+    {
+      Thread->NpxState = NPX_STATE_INVALID;
+    }
+
   /* Save back the new value of the kernel stack. */
   Thread->KernelStack = (PVOID)KernelStack;
 
@@ -175,21 +192,25 @@
    * Setup a stack frame for exit from the task switching routine
    */
 
-  KernelStack = (PULONG)((char*)Thread->KernelStack - (8*4));
-  KernelStack[0] = 0;      /* EDI */
-  KernelStack[1] = 0;      /* ESI */
-  KernelStack[2] = 0;      /* EBX */
-  KernelStack[3] = 0;      /* EBP */
-  KernelStack[4] = (ULONG)&PsBeginThread;   /* EIP */
-  KernelStack[5] = 0;     /* Return EIP */
-  KernelStack[6] = (ULONG)StartRoutine; /* First argument to PsBeginThread */
-  KernelStack[7] = (ULONG)StartContext; /* Second argument to PsBeginThread */
+  KernelStack = (PULONG)((char*)Thread->KernelStack - (9 * sizeof(DWORD)) - sizeof(FX_SAVE_AREA));
+  KernelStack[0] = (ULONG)Thread->InitialStack - sizeof(FX_SAVE_AREA);  /* TSS->Esp0 */
+  KernelStack[1] = 0;      /* EDI */
+  KernelStack[2] = 0;      /* ESI */
+  KernelStack[3] = 0;      /* EBX */
+  KernelStack[4] = 0;      /* EBP */
+  KernelStack[5] = (ULONG)&PsBeginThread;   /* EIP */
+  KernelStack[6] = 0;     /* Return EIP */
+  KernelStack[7] = (ULONG)StartRoutine; /* First argument to PsBeginThread */
+  KernelStack[8] = (ULONG)StartContext; /* Second argument to PsBeginThread */
   Thread->KernelStack = (VOID*)KernelStack;
 
+  /*
+   * Setup FPU state
+   */
+  Thread->NpxState = NPX_STATE_INVALID;
+
   return(STATUS_SUCCESS);
 }
 
-
-
-
 /* EOF */
+
Index: ntoskrnl/ke/i386/tskswitch.S
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/i386/tskswitch.S,v
retrieving revision 1.19
diff -u -r1.19 tskswitch.S
--- ntoskrnl/ke/i386/tskswitch.S	31 Oct 2004 19:46:10 -0000	1.19
+++ ntoskrnl/ke/i386/tskswitch.S	4 Nov 2004 00:32:57 -0000
@@ -17,16 +17,18 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 /*
- * FILE:            ntoskrnl/ke/i386/tskswitch.S
- * PURPOSE:         Microkernel thread support
+ * FILE:	    ntoskrnl/ke/i386/tskswitch.S
+ * PURPOSE:	 Microkernel thread support
  * PROGRAMMER:      David Welch (welch at cwcom.net)
  * UPDATE HISTORY:
- *                  Created 09/10/00
+ *		  Created 09/10/00
  */
 
 /* INCLUDES ******************************************************************/
 
 #include <internal/i386/segment.h>
+#include <internal/i386/ke.h>
+#include <internal/i386/fpu.h>
 #include <internal/ps.h>
 #include <ntos/tss.h>
 #include <internal/ntoskrnl.h>
@@ -34,12 +36,13 @@
 /* FUNCTIONS ****************************************************************/
 
 .globl _Ki386ContextSwitch
+.func Ki386ContextSwitch
 _Ki386ContextSwitch:
-/* 
+/*
  * FUNCTIONS: Switches to another thread's context
  * ARGUMENTS:
- *        Thread = Thread to switch to
- *        OldThread = Thread to switch from
+ *	Thread = Thread to switch to
+ *	OldThread = Thread to switch from
  */
 	pushl	%ebp
 	movl	%esp, %ebp
@@ -60,7 +63,7 @@
 	 * Get the pointer to the new thread.
 	 */
 	movl	8(%ebp), %ebx
-		
+
 	/*
 	 * Set the base of the TEB selector to the base of the TEB for
 	 * this thread.
@@ -89,7 +92,7 @@
 	xorl	%eax, %eax
 	movl	KTHREAD_APCSTATE_PROCESS(%ebx), %edi
 	testw	$0xFFFF, KPROCESS_LDT_DESCRIPTOR0(%edi)
-	jz	.L4
+	jz	0f
 
 	pushl	KPROCESS_LDT_DESCRIPTOR1(%edi)
 	pushl	KPROCESS_LDT_DESCRIPTOR0(%edi)
@@ -99,35 +102,46 @@
 
 	movl	$LDT_SELECTOR, %eax
 
-.L4:
+0:
 	lldtw	%ax
 
+	movl	12(%ebp), %ebx
+
 	/*
-	 * Load up the iomap offset for this thread in
-	 * preparation for setting it below.
+	 * FIXME: Save debugging state.
 	 */
-	movl	KPROCESS_IOPM_OFFSET(%edi), %eax
 
 	/*
-	 * FIXME: Save debugging state.
+	 * Save floating point state if TS was unset.
 	 */
+	movl	%cr0, %eax
+	testl	$X86_CR0_TS, %eax
+	jnz	1f
+	movl	KTHREAD_INITIAL_STACK(%ebx), %eax
+	fnsave	-SIZEOF_FX_SAVE_AREA(%eax)
+	movb	$NPX_STATE_VALID, KTHREAD_NPX_STATE(%ebx)
+1:
 
 	/*
-	 * FIXME: Save floating point state.
+	 * Load up the iomap offset for this thread in
+	 * preparation for setting it below.
 	 */
+	movl	KPROCESS_IOPM_OFFSET(%edi), %eax
+
+	/*
+	 * Save the stack pointer in this processors TSS
+	 */
+	movl	%fs:KPCR_TSS, %esi
+	pushl	KTSS_ESP0(%esi)
 
 	/*
 	 * Switch stacks
 	 */
-	movl	12(%ebp), %ebx
 	movl	%esp, KTHREAD_KERNEL_STACK(%ebx)
 	movl	8(%ebp), %ebx
 	movl	KTHREAD_KERNEL_STACK(%ebx), %esp
 	movl	KTHREAD_STACK_LIMIT(%ebx), %edi
 
-	/*
-	 * Set the stack pointer in this processors TSS
-	 */
 	movl	%fs:KPCR_TSS, %esi
 
 	/*
@@ -135,9 +149,6 @@
 	 */
 	movw	%ax, KTSS_IOMAPBASE(%esi)
 
-	movl	KTHREAD_INITIAL_STACK(%ebx), %eax
-	movl	%eax, KTSS_ESP0(%esi)
-
 	/*
 	 * Change the address space
 	 */
@@ -146,8 +157,16 @@
 	movl	%eax, %cr3
 
 	/*
-	 * FIXME: Restore floating point state
+	 * Restore the stack pointer in this processors TSS
+	 */
+	popl	KTSS_ESP0(%esi)
+
+	/*
+	 * Set TS in cr0 to catch FPU code and load the FPU state when needed
 	 */
+	movl	%cr0, %eax
+	orl	$X86_CR0_TS, %eax
+	movl	%eax, %cr0
 
 	/*
 	 * FIXME: Restore debugging state
@@ -162,9 +181,9 @@
 	call	_KeReleaseSpinLockFromDpcLevel at 4
 
 	cmpl	$0, _PiNrThreadsAwaitingReaping
-	je	.L3
+	je	4f
 	call	_PiWakeupReaperThread at 0
-.L3:
+4:
 
 	/*
 	 * Restore the saved register and exit
@@ -175,3 +194,5 @@
 
 	popl	%ebp
 	ret
+.endfunc
+
Index: ntoskrnl/ke/i386/tss.c
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/i386/tss.c,v
retrieving revision 1.9
diff -u -r1.9 tss.c
--- ntoskrnl/ke/i386/tss.c	15 Aug 2004 16:39:05 -0000	1.9
+++ ntoskrnl/ke/i386/tss.c	27 Oct 2004 19:53:02 -0000
@@ -134,7 +134,7 @@
   KeGetCurrentKPCR()->TSS = Tss;
 
   /* Initialize the boot TSS. */
-  Tss->Esp0 = (ULONG)Ki386InitialStackArray[Id];
+  Tss->Esp0 = (ULONG)Ki386InitialStackArray[Id] + MM_STACK_SIZE; /* FIXME: - sizeof(FX_SAVE_AREA)? */
   Tss->Ss0 = KERNEL_DS;
   Tss->IoMapBase = 0xFFFF; /* No i/o bitmap */
   Tss->IoBitmap[8192] = 0xFF;   
@@ -154,9 +154,9 @@
 
   /* Initialize the TSS used for handling double faults. */
   TrapTss->Eflags = 0;
-  TrapTss->Esp0 = ((ULONG)TrapStack + MM_STACK_SIZE);
+  TrapTss->Esp0 = ((ULONG)TrapStack + MM_STACK_SIZE); /* FIXME: - sizeof(FX_SAVE_AREA)? */
   TrapTss->Ss0 = KERNEL_DS;
-  TrapTss->Esp = ((ULONG)TrapStack + MM_STACK_SIZE);
+  TrapTss->Esp = ((ULONG)TrapStack + MM_STACK_SIZE); /* FIXME: - sizeof(FX_SAVE_AREA)? */
   TrapTss->Cs = KERNEL_CS;
   TrapTss->Eip = (ULONG)KiTrap8;
   TrapTss->Ss = KERNEL_DS;
@@ -211,7 +211,7 @@
   Ki386InitialStackArray[0] = (PVOID)&init_stack;
 
   /* Initialize the boot TSS. */
-  KiBootTss.Esp0 = (ULONG)&init_stack_top;
+  KiBootTss.Esp0 = (ULONG)&init_stack_top - sizeof(FX_SAVE_AREA);
   KiBootTss.Ss0 = KERNEL_DS;
   //   KiBootTss.IoMapBase = FIELD_OFFSET(KTSS, IoBitmap);
   KiBootTss.IoMapBase = 0xFFFF; /* No i/o bitmap */
@@ -232,9 +232,9 @@
 
   /* Initialize the TSS used for handling double faults. */
   KiBootTrapTss.Eflags = 0;
-  KiBootTrapTss.Esp0 = (ULONG)&trap_stack_top;
+  KiBootTrapTss.Esp0 = (ULONG)&trap_stack_top; /* FIXME: - sizeof(FX_SAVE_AREA)? */
   KiBootTrapTss.Ss0 = KERNEL_DS;
-  KiBootTrapTss.Esp = (ULONG)&trap_stack_top;
+  KiBootTrapTss.Esp = (ULONG)&trap_stack_top; /* FIXME: - sizeof(FX_SAVE_AREA)? */
   KiBootTrapTss.Cs = KERNEL_CS;
   KiBootTrapTss.Eip = (ULONG)KiTrap8;
   KiBootTrapTss.Ss = KERNEL_DS;
Index: ntoskrnl/ke/i386/v86m_sup.S
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ke/i386/v86m_sup.S,v
retrieving revision 1.11
diff -u -r1.11 v86m_sup.S
--- ntoskrnl/ke/i386/v86m_sup.S	31 Oct 2004 19:46:10 -0000	1.11
+++ ntoskrnl/ke/i386/v86m_sup.S	3 Nov 2004 19:12:48 -0000
@@ -71,27 +71,24 @@
 	 * Since we are going to fiddle with the stack pointer this must be
 	 * a critical section for this processor
 	 */
-
-	/*
-	 * Save the old initial stack
-	 */
-	movl	%fs:KPCR_CURRENT_THREAD, %esi
-	movl	KTHREAD_INITIAL_STACK(%esi), %edi
-	pushl	%edi
-
-	/*
-	 * We also need to set the stack in the kthread structure
-	 */
-	movl	%esp, KTHREAD_INITIAL_STACK(%esi)
+	cli
+	
+        /*
+         * Save the exception handler stack from the TSS
+         */
+	movl	%fs:KPCR_TSS, %esi
+	pushl	KTSS_ESP0(%esi)
 
 	/*
 	 * The stack used for handling exceptions from v86 mode in this thread
 	 * will be the current stack adjusted so we don't overwrite the 
 	 * existing stack frames
 	 */
-	movl	%fs:KPCR_TSS, %esi
 	movl	%esp, KTSS_ESP0(%esi)
 
+	/* Exit the critical section */
+        sti
+
 	/*
 	 * Create the stack frame for an iret to v86 mode
 	 */
@@ -180,18 +177,10 @@
 	cli
 
 	/*
-	 * Restore the initial stack
+	 * Restore the exception handler stack in the TSS
 	 */
-	popl	%eax
 	movl	%fs:KPCR_TSS, %esi
-	movl	%eax, KTSS_ESP0(%esi)
-
-	/*
-	 * We also need to set the stack in the kthread structure
-	 */
-	movl	%fs:KPCR_CURRENT_THREAD, %esi
-	movl	KTHREAD_INITIAL_STACK(%esi), %edi
-	movl	%eax, KTHREAD_INITIAL_STACK(%esi)	
+	popl	KTSS_ESP0(%esi)
 
 	/* Exit the critical section */
 	sti
@@ -207,3 +196,4 @@
 	movl	%ebp, %esp
 	popl	%ebp
 	ret
+
Index: ntoskrnl/ps/w32call.c
===================================================================
RCS file: /CVS/ReactOS/reactos/ntoskrnl/ps/w32call.c,v
retrieving revision 1.17
diff -u -r1.17 w32call.c
--- ntoskrnl/ps/w32call.c	30 Oct 2004 23:48:57 -0000	1.17
+++ ntoskrnl/ps/w32call.c	3 Nov 2004 19:13:38 -0000
@@ -44,6 +44,8 @@
   PNTSTATUS CallbackStatus;
   PKTRAP_FRAME SavedTrapFrame;
   PVOID SavedCallbackStack;
+  PVOID SavedExceptionStack;
+  UCHAR SavedNpxState;
 } NTW32CALL_SAVED_STATE, *PNTW32CALL_SAVED_STATE;
 
 typedef struct
@@ -81,6 +83,8 @@
   PNTW32CALL_SAVED_STATE State;
   PKTRAP_FRAME SavedTrapFrame;
   PVOID SavedCallbackStack;
+  PVOID SavedExceptionStack;
+  UCHAR SavedNpxState;
 
   Thread = PsGetCurrentThread();
   if (Thread->Tcb.CallbackStack == NULL)
@@ -102,6 +106,8 @@
   StackLimit = State->SavedStackLimit;
   SavedTrapFrame = State->SavedTrapFrame;
   SavedCallbackStack = State->SavedCallbackStack;
+  SavedExceptionStack = State->SavedExceptionStack;
+  SavedNpxState = State->SavedNpxState;
   
   /*
    * Copy the callback status and the callback result to NtW32Call
@@ -129,7 +135,9 @@
   Thread->Tcb.StackLimit = StackLimit;
   Thread->Tcb.TrapFrame = SavedTrapFrame;
   Thread->Tcb.CallbackStack = SavedCallbackStack;
-  KeGetCurrentKPCR()->TSS->Esp0 = (ULONG)Thread->Tcb.InitialStack;
+  Thread->Tcb.NpxState = SavedNpxState;
+  KeGetCurrentKPCR()->TSS->Esp0 = (ULONG)SavedExceptionStack;
+  Ke386SetCr0(Ke386GetCr0() | X86_CR0_TS); /* set TS */
   KeStackSwitchAndRet((PVOID)(OldStack + 1));
 
   /* Should never return. */
@@ -282,9 +290,9 @@
 
     }
   /* FIXME: Need to check whether we were interrupted from v86 mode. */
-  memcpy((char*)NewStack + StackSize - sizeof(KTRAP_FRAME), Thread->Tcb.TrapFrame,
-	 sizeof(KTRAP_FRAME) - (4 * sizeof(DWORD)));
-  NewFrame = (PKTRAP_FRAME)((char*)NewStack + StackSize - sizeof(KTRAP_FRAME));
+  memcpy((char*)NewStack + StackSize - sizeof(KTRAP_FRAME) - sizeof(FX_SAVE_AREA),
+         Thread->Tcb.TrapFrame, sizeof(KTRAP_FRAME) - (4 * sizeof(DWORD)));
+  NewFrame = (PKTRAP_FRAME)((char*)NewStack + StackSize - sizeof(KTRAP_FRAME) - sizeof(FX_SAVE_AREA));
   NewFrame->Esp -= (ArgumentLength + (4 * sizeof(ULONG))); 
   NewFrame->Eip = (ULONG)LdrpGetSystemDllCallbackDispatcher();
   UserEsp = (PULONG)NewFrame->Esp;
@@ -304,10 +312,19 @@
   SavedState.CallbackStatus = &CallbackStatus;
   SavedState.SavedTrapFrame = Thread->Tcb.TrapFrame;
   SavedState.SavedCallbackStack = Thread->Tcb.CallbackStack;
+  SavedState.SavedExceptionStack = (PVOID)KeGetCurrentKPCR()->TSS->Esp0;
+  SavedState.SavedNpxState = Thread->Tcb.NpxState;
+  if (Thread->Tcb.NpxState & NPX_STATE_DIRTY) /* thread has used FPU, save context */
+    {
+      __asm__("fsave %0" : : "m"(*((char *)Thread->Tcb.InitialStack - sizeof(FX_SAVE_AREA))));
+      SavedState.SavedNpxState = NPX_STATE_VALID;
+      Ke386SetCr0(Ke386GetCr0() | X86_CR0_TS);
+    }
   Thread->Tcb.InitialStack = Thread->Tcb.StackBase = (char*)NewStack + StackSize;
-  Thread->Tcb.StackLimit = (ULONG_PTR)NewStack;
-  Thread->Tcb.KernelStack = (char*)NewStack + StackSize - sizeof(KTRAP_FRAME);
-  KeGetCurrentKPCR()->TSS->Esp0 = (ULONG)Thread->Tcb.InitialStack;
+  Thread->Tcb.StackLimit = (ULONG)NewStack;
+  Thread->Tcb.KernelStack = (char*)NewStack + StackSize - sizeof(KTRAP_FRAME) - sizeof(FX_SAVE_AREA);
+  Thread->Tcb.NpxState = NPX_STATE_INVALID;
+  KeGetCurrentKPCR()->TSS->Esp0 = (ULONG)Thread->Tcb.InitialStack - sizeof(FX_SAVE_AREA);
   KePushAndStackSwitchAndSysRet((ULONG)&SavedState, Thread->Tcb.KernelStack);
 
   /* 


More information about the Ros-dev mailing list