From f086aa018b58f23bc15fbee7b2c648e35bb7dc1c Mon Sep 17 00:00:00 2001
From: John Hodge <tpg@mutabah.net>
Date: Tue, 11 Oct 2011 16:59:12 +0800
Subject: [PATCH] Kernel/x86_64 - Implemented COW, fixed PMM bugs

- Also made kernel stack smaller
---
 Kernel/arch/x86_64/desctab.asm            |   4 +-
 Kernel/arch/x86_64/errors.c               |   2 +-
 Kernel/arch/x86_64/include/arch.h         |   1 +
 Kernel/arch/x86_64/include/common.inc.asm |   2 +
 Kernel/arch/x86_64/include/mm_virt.h      |   1 -
 Kernel/arch/x86_64/include/proc.h         |   3 +-
 Kernel/arch/x86_64/mm_phys.c              |  72 ++++++++---
 Kernel/arch/x86_64/mm_virt.c              | 141 ++++++++++++++++------
 Kernel/arch/x86_64/proc.asm               |   7 +-
 Kernel/arch/x86_64/proc.c                 |  22 ++--
 Kernel/arch/x86_64/start32.asm            |  13 +-
 Kernel/arch/x86_64/start64.asm            |  55 +++------
 12 files changed, 213 insertions(+), 110 deletions(-)

diff --git a/Kernel/arch/x86_64/desctab.asm b/Kernel/arch/x86_64/desctab.asm
index a69e06ad..c9c217c4 100644
--- a/Kernel/arch/x86_64/desctab.asm
+++ b/Kernel/arch/x86_64/desctab.asm
@@ -255,7 +255,7 @@ ErrorCommon:
 	;PUSH_XMM
 	
 	mov rdi, rsp
-	xchg bx, bx
+;	xchg bx, bx
 	call Error_Handler
 	
 	;POP_XMM
@@ -440,3 +440,5 @@ gaIRQ_Handlers:
 	times	16*NUM_IRQ_CALLBACKS	dq	0
 gaIRQ_DataPtrs:
 	times	16*NUM_IRQ_CALLBACKS	dq	0
+
+; vim: ft=nasm
diff --git a/Kernel/arch/x86_64/errors.c b/Kernel/arch/x86_64/errors.c
index c48174ef..d1856db1 100644
--- a/Kernel/arch/x86_64/errors.c
+++ b/Kernel/arch/x86_64/errors.c
@@ -101,7 +101,7 @@ void Error_Backtrace(Uint IP, Uint BP)
 	//	return;
 	//}
 	
-	if( IP > MM_USER_MAX && IP < MM_KERNEL_CODE
+	if( IP > USER_MAX && IP < MM_KERNEL_CODE
 	 && (MM_MODULE_MIN > IP || IP > MM_MODULE_MAX)
 		)
 	{
diff --git a/Kernel/arch/x86_64/include/arch.h b/Kernel/arch/x86_64/include/arch.h
index a0b3ba99..57441b69 100644
--- a/Kernel/arch/x86_64/include/arch.h
+++ b/Kernel/arch/x86_64/include/arch.h
@@ -6,6 +6,7 @@
 #define _ARCH_H_
 
 //#include <stdint.h>
+#define	USER_MAX 	0x00007FFF##FFFFF000
 #define KERNEL_BASE	0xFFFFFFFF##80000000
 #define BITS	64
 #define PAGE_SIZE	0x1000
diff --git a/Kernel/arch/x86_64/include/common.inc.asm b/Kernel/arch/x86_64/include/common.inc.asm
index c3a362a4..d2301777 100644
--- a/Kernel/arch/x86_64/include/common.inc.asm
+++ b/Kernel/arch/x86_64/include/common.inc.asm
@@ -1,4 +1,6 @@
 
+%define INITIAL_KSTACK_SIZE	8
+
 %macro SAVE_GPR 1
 	mov [%1-0x08], r15
 	mov [%1-0x10], r14
diff --git a/Kernel/arch/x86_64/include/mm_virt.h b/Kernel/arch/x86_64/include/mm_virt.h
index 3c7babc6..df75361c 100644
--- a/Kernel/arch/x86_64/include/mm_virt.h
+++ b/Kernel/arch/x86_64/include/mm_virt.h
@@ -50,7 +50,6 @@
 #define USER_LIB_MAX	0x00007000##00000000
 #define USER_STACK_SZ	0x00000000##00020000	// 64 KiB
 #define USER_STACK_TOP	0x00007FFF##FFFFF000
-#define	MM_USER_MAX 	0x00007FFF##FFFFF000
 #define	MM_KERNEL_RANGE	0xFFFF8000##00000000
 #define MM_KHEAP_BASE	(MM_KERNEL_RANGE|(0x8000##00000000))
 #define MM_KHEAP_MAX	(MM_KERNEL_RANGE|(0x9000##00000000))
diff --git a/Kernel/arch/x86_64/include/proc.h b/Kernel/arch/x86_64/include/proc.h
index a23aa37a..2173c34c 100644
--- a/Kernel/arch/x86_64/include/proc.h
+++ b/Kernel/arch/x86_64/include/proc.h
@@ -43,7 +43,8 @@ typedef struct sTaskState
 }	tTaskState;
 
 // === CONSTANTS ===
-#define KERNEL_STACK_SIZE	0x10000	// 64 KiB
+#define KERNEL_STACK_SIZE	0x8000	// 32 KiB
+//#define KERNEL_STACK_SIZE	0x10000	// 64 KiB
 
 #endif
 
diff --git a/Kernel/arch/x86_64/mm_phys.c b/Kernel/arch/x86_64/mm_phys.c
index 893fb5b1..064622f8 100644
--- a/Kernel/arch/x86_64/mm_phys.c
+++ b/Kernel/arch/x86_64/mm_phys.c
@@ -8,6 +8,8 @@
 #include <mboot.h>
 #include <mm_virt.h>
 
+#define TRACE_REF	0
+
 enum eMMPhys_Ranges
 {
 	MM_PHYS_16BIT,	// Does anything need this?
@@ -30,6 +32,14 @@ void	MM_InitPhys_Multiboot(tMBoot_Info *MBoot);
 //void	MM_DerefPhys(tPAddr PAddr);
  int	MM_int_GetRangeID( tPAddr Addr );
 
+// === MACROS ===
+#define PAGE_ALLOC_TEST(__page) 	(gaMainBitmap[(__page)>>6] & (1ULL << ((__page)&63)))
+#define PAGE_ALLOC_SET(__page)  	do{gaMainBitmap[(__page)>>6] |= (1ULL << ((__page)&63));}while(0)
+#define PAGE_ALLOC_CLEAR(__page)	do{gaMainBitmap[(__page)>>6] &= ~(1ULL << ((__page)&63));}while(0)
+//#define PAGE_MULTIREF_TEST(__page)	(gaMultiBitmap[(__page)>>6] & (1ULL << ((__page)&63)))
+//#define PAGE_MULTIREF_SET(__page)	do{gaMultiBitmap[(__page)>>6] |= 1ULL << ((__page)&63);}while(0)
+//#define PAGE_MULTIREF_CLEAR(__page)	do{gaMultiBitmap[(__page)>>6] &= ~(1ULL << ((__page)&63));}while(0)
+
 // === GLOBALS ===
 tMutex	glPhysicalPages;
 Uint64	*gaSuperBitmap = (void*)MM_PAGE_SUPBMP;	// 1 bit = 64 Pages, 16 MiB per Word
@@ -425,12 +435,16 @@ tPAddr MM_AllocPhysRange(int Pages, int MaxBits)
 	for( i = 0; i < Pages; i++, addr++ )
 	{
 		gaMainBitmap[addr >> 6] |= 1LL << (addr & 63);
+		if( MM_GetPhysAddr( (tVAddr)&gaiPageReferences[addr] ) )
+			gaiPageReferences[addr] = 1;
+//		Log("page %P refcount = %i", MM_GetRefCount(addr<<12)); 
 		rangeID = MM_int_GetRangeID(addr << 12);
 		giPhysRangeFree[ rangeID ] --;
 		LOG("%x == %x", addr, giPhysRangeFirst[ rangeID ]);
 		if(addr == giPhysRangeFirst[ rangeID ])
 			giPhysRangeFirst[ rangeID ] += 1;
 	}
+	addr -= Pages;
 	ret = addr;	// Save the return address
 	
 	// Update super bitmap
@@ -444,6 +458,9 @@ tPAddr MM_AllocPhysRange(int Pages, int MaxBits)
 	}
 	
 	Mutex_Release(&glPhysicalPages);
+	#if TRACE_REF
+	Log("MM_AllocPhysRange: ret = %P (Ref %i)", ret << 12, MM_GetRefCount(ret<<12));
+	#endif
 	LEAVE('x', ret << 12);
 	return ret << 12;
 }
@@ -476,27 +493,43 @@ void MM_RefPhys(tPAddr PAddr)
 {
 	Uint64	page = PAddr >> 12;
 	
-	if( PAddr >> 12 > giMaxPhysPage )	return ;
+	if( page > giMaxPhysPage )	return ;
 	
-	if( gaMainBitmap[ page >> 6 ] & (1LL << (page&63)) )
+	if( PAGE_ALLOC_TEST(page) )
 	{
-		// Reference again
-		gaMultiBitmap[ page >> 6 ] |= 1LL << (page&63);
-		if( !MM_GetPhysAddr( ((tVAddr)&gaiPageReferences[ page ]) & ~0xFFF ) ) {
-			if( !MM_Allocate( ((tVAddr)&gaiPageReferences[ page ]) & ~0xFFF ) ) {
+		tVAddr	ref_base = ((tVAddr)&gaiPageReferences[ page ]) & ~0xFFF;
+		// Allocate reference page
+		if( !MM_GetPhysAddr(ref_base) )
+		{
+			const int	pages_per_refpage = PAGE_SIZE/sizeof(gaiPageReferences[0]);
+			 int	i;
+			 int	page_base = page / pages_per_refpage * pages_per_refpage;
+			if( !MM_Allocate( ref_base ) ) {
 				Log_Error("Arch", "Out of memory when allocating reference count page");
 				return ;
 			}
+			// Fill block
+			Log("Allocated references for %P-%P", page_base << 12, (page_base+pages_per_refpage-1)<<12);
+			for( i = 0; i < pages_per_refpage; i ++ ) {
+				 int	pg = page_base + i;
+				gaiPageReferences[pg] = !!PAGE_ALLOC_TEST(pg);
+			}
 		}
-		gaiPageReferences[ page ] ++;
+		gaiPageReferences[page] ++;
 	}
 	else
 	{
 		// Allocate
-		gaMainBitmap[page >> 6] |= 1LL << (page&63);
-		if( gaMainBitmap[page >> 6 ] + 1 == 0 )
+		PAGE_ALLOC_SET(page);
+		if( gaMainBitmap[page >> 6] + 1 == 0 )
 			gaSuperBitmap[page>> 12] |= 1LL << ((page >> 6) & 63);
+		if( MM_GetPhysAddr( (tVAddr)&gaiPageReferences[page] ) )
+			gaiPageReferences[page] = 1;
 	}
+
+	#if TRACE_REF
+	Log("MM_RefPhys: %P referenced (%i)", page << 12, MM_GetRefCount(page << 12));
+	#endif
 }
 
 /**
@@ -508,18 +541,17 @@ void MM_DerefPhys(tPAddr PAddr)
 	
 	if( PAddr >> 12 > giMaxPhysPage )	return ;
 	
-	if( gaMultiBitmap[ page >> 6 ] & (1LL << (page&63)) ) {
+	if( MM_GetPhysAddr( (tVAddr) &gaiPageReferences[page] ) )
+	{
 		gaiPageReferences[ page ] --;
-		if( gaiPageReferences[ page ] == 1 )
-			gaMultiBitmap[ page >> 6 ] &= ~(1LL << (page&63));
 		if( gaiPageReferences[ page ] == 0 )
-			gaMainBitmap[ page >> 6 ] &= ~(1LL << (page&63));
+			PAGE_ALLOC_CLEAR(page);
 	}
 	else
-		gaMainBitmap[ page >> 6 ] &= ~(1LL << (page&63));
+		PAGE_ALLOC_CLEAR(page);
 	
 	// Update the free counts if the page was freed
-	if( !(gaMainBitmap[ page >> 6 ] & (1LL << (page&63))) )
+	if( !PAGE_ALLOC_TEST(page) )
 	{
 		 int	rangeID;
 		rangeID = MM_int_GetRangeID( PAddr );
@@ -534,19 +566,23 @@ void MM_DerefPhys(tPAddr PAddr)
 	if(gaMainBitmap[ page >> 6 ] + 1 != 0 ) {
 		gaSuperBitmap[page >> 12] &= ~(1LL << ((page >> 6) & 63));
 	}
+	
+	#if TRACE_REF
+	Log("Page %P dereferenced (%i)", page << 12, MM_GetRefCount(page << 12));
+	#endif
 }
 
 int MM_GetRefCount( tPAddr PAddr )
 {
 	PAddr >>= 12;
 	
-	if( PAddr >> 12 > giMaxPhysPage )	return 0;
+	if( PAddr > giMaxPhysPage )	return 0;
 
-	if( gaMultiBitmap[ PAddr >> 6 ] & (1LL << (PAddr&63)) ) {
+	if( MM_GetPhysAddr( (tVAddr)&gaiPageReferences[PAddr] ) ) {
 		return gaiPageReferences[PAddr];
 	}
 
-	if( gaMainBitmap[ PAddr >> 6 ] & (1LL << (PAddr&63)) )
+	if( PAGE_ALLOC_TEST(PAddr) )
 	{
 		return 1;
 	}
diff --git a/Kernel/arch/x86_64/mm_virt.c b/Kernel/arch/x86_64/mm_virt.c
index d9ae34cf..a8d890a1 100644
--- a/Kernel/arch/x86_64/mm_virt.c
+++ b/Kernel/arch/x86_64/mm_virt.c
@@ -56,10 +56,12 @@
 extern void	Error_Backtrace(Uint IP, Uint BP);
 extern tPAddr	gInitialPML4[512];
 extern void	Threads_SegFault(tVAddr Addr);
+extern char	_UsertextBase[];
 
 // === PROTOTYPES ===
 void	MM_InitVirt(void);
 //void	MM_FinishVirtualInit(void);
+void	MM_int_ClonePageEnt( Uint64 *Ent, void *NextLevel, tVAddr Addr, int bTable );
  int	MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs);
 void	MM_DumpTables(tVAddr Start, tVAddr End);
  int	MM_GetPageEntryPtr(tVAddr Addr, BOOL bTemp, BOOL bAllocate, BOOL bLargePage, tPAddr **Pointer);
@@ -75,6 +77,7 @@ tMutex	glMM_TempFractalLock;
 // === CODE ===
 void MM_InitVirt(void)
 {
+	Log_Debug("MMVirt", "&PAGEMAPLVL4(0) = %p", &PAGEMAPLVL4(0));
 //	MM_DumpTables(0, -1L);
 }
 
@@ -84,6 +87,67 @@ void MM_FinishVirtualInit(void)
 }
 
 /**
+ * \brief Clone a page from an entry
+ * \param Ent	Pointer to the entry in the PML4/PDP/PD/PT
+ * \param NextLevel	Pointer to contents of the entry
+ * \param Addr	Dest address
+ * \note Used in COW
+ */
+void MM_int_ClonePageEnt( Uint64 *Ent, void *NextLevel, tVAddr Addr, int bTable )
+{
+	tPAddr	curpage = *Ent & PADDR_MASK; 
+	if( MM_GetRefCount( curpage ) <= 0 ) {
+		Log_KernelPanic("MMVirt", "Page %P still marked COW, but unreferenced", curpage);
+	}
+//	Log_Debug("MM_Virt", "%P refcount %i", curpage, MM_GetRefCount( curpage ));
+	if( MM_GetRefCount( curpage ) == 1 )
+	{
+		*Ent &= ~PF_COW;
+		*Ent |= PF_PRESENT|PF_WRITE;
+//		Log_Debug("MMVirt", "COW ent at %p (%p), last (%P)", Ent, NextLevel, curpage);
+	}
+	else
+	{
+		void	*tmp;
+		tPAddr	paddr;
+		
+		if( !(paddr = MM_AllocPhys()) ) {
+			Threads_SegFault(Addr);
+			return ;
+		}
+
+		ASSERT(paddr != curpage);
+			
+		tmp = (void*)MM_MapTemp(paddr);
+		memcpy( tmp, NextLevel, 0x1000 );
+		MM_FreeTemp( (tVAddr)tmp );
+		
+//		Log_Debug("MMVirt", "COW ent at %p (%p) from %P to %P", Ent, NextLevel, curpage, paddr);
+
+		MM_DerefPhys( curpage );
+		*Ent &= PF_USER;
+		*Ent |= paddr|PF_PRESENT|PF_WRITE;
+	}
+	INVLPG( (tVAddr)NextLevel );
+	
+	// Mark COW on pages
+	if(bTable) 
+	{
+		Uint64	*dp = NextLevel;
+		 int	i;
+		for( i = 0; i < 512; i ++ )
+		{
+			if( !(dp[i] & PF_PRESENT) )	continue;
+			MM_RefPhys( dp[i] & PADDR_MASK );
+			if( dp[i] & PF_WRITE ) {
+				dp[i] &= ~PF_WRITE;
+				dp[i] |= PF_COW;
+			}
+		}
+	}
+}
+
+/*
  * \brief Called on a page fault
  */
 int MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
@@ -93,34 +157,36 @@ int MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
 	if( PAGEMAPLVL4(Addr>>39) & PF_PRESENT
 	 && PAGEDIRPTR (Addr>>30) & PF_PRESENT
 	 && PAGEDIR    (Addr>>21) & PF_PRESENT
-	 && PAGETABLE  (Addr>>12) & PF_PRESENT
-	 && PAGETABLE  (Addr>>12) & PF_COW )
+	 && PAGETABLE  (Addr>>12) & PF_PRESENT )
 	{
-		tPAddr	paddr;
-		if(MM_GetRefCount( PAGETABLE(Addr>>12) & PADDR_MASK ) == 1)
+		// PML4 Entry
+		if( PAGEMAPLVL4(Addr>>39) & PF_COW )
 		{
-			PAGETABLE(Addr>>12) &= ~PF_COW;
-			PAGETABLE(Addr>>12) |= PF_PRESENT|PF_WRITE;
+			tPAddr	*dp = &PAGEDIRPTR((Addr>>39)*512);
+			MM_int_ClonePageEnt( &PAGEMAPLVL4(Addr>>39), dp, Addr, 1 );
+//			MM_DumpTables(Addr>>39 << 39, (((Addr>>39) + 1) << 39) - 1);
 		}
-		else
+		// PDP Entry
+		if( PAGEDIRPTR(Addr>>30) & PF_COW )
 		{
-			void	*tmp;
-			//Log("MM_PageFault: COW - MM_DuplicatePage(0x%x)", Addr);
-			paddr = MM_AllocPhys();
-			if( !paddr ) {
-				Threads_SegFault(Addr);
-				return 0;
-			}
-			tmp = (void*)MM_MapTemp(paddr);
-			memcpy( tmp, (void*)(Addr & ~0xFFF), 0x1000 );
-			MM_FreeTemp( (tVAddr)tmp );
-			MM_DerefPhys( PAGETABLE(Addr>>12) & PADDR_MASK );
-			PAGETABLE(Addr>>12) &= PF_USER;
-			PAGETABLE(Addr>>12) |= paddr|PF_PRESENT|PF_WRITE;
+			tPAddr	*dp = &PAGEDIR( (Addr>>30)*512 );
+			MM_int_ClonePageEnt( &PAGEDIRPTR(Addr>>30), dp, Addr, 1 );
+//			MM_DumpTables(Addr>>30 << 30, (((Addr>>30) + 1) << 30) - 1);
+		}
+		// PD Entry
+		if( PAGEDIR(Addr>>21) & PF_COW )
+		{
+			tPAddr	*dp = &PAGETABLE( (Addr>>21)*512 );
+			MM_int_ClonePageEnt( &PAGEDIR(Addr>>21), dp, Addr, 1 );
+//			MM_DumpTables(Addr>>21 << 21, (((Addr>>21) + 1) << 21) - 1);
+		}
+		// PT Entry
+		if( PAGETABLE(Addr>>12) & PF_COW )
+		{
+			MM_int_ClonePageEnt( &PAGETABLE(Addr>>12), (void*)(Addr & ~0xFFF), Addr, 0 );
+			INVLPG( Addr & ~0xFFF );
+			return 0;
 		}
-		
-		INVLPG( Addr & ~0xFFF );
-		return 0;
 	}
 	#endif
 	
@@ -215,19 +281,16 @@ void MM_DumpTables(tVAddr Start, tVAddr End)
 			if( !(PAGEMAPLVL4(page>>27) & PF_PRESENT) ) {
 				page += (1 << 27) - 1;
 				curPos += (1L << 39) - 0x1000;
-				//Debug("pml4 ent unset (page = 0x%x now)", page);
 				continue;
 			}
 			if( !(PAGEDIRPTR(page>>18) & PF_PRESENT) ) {
 				page += (1 << 18) - 1;
 				curPos += (1L << 30) - 0x1000;
-				//Debug("pdp ent unset (page = 0x%x now)", page);
 				continue;
 			}
 			if( !(PAGEDIR(page>>9) & PF_PRESENT) ) {
 				page += (1 << 9) - 1;
 				curPos += (1L << 21) - 0x1000;
-				//Debug("pd ent unset (page = 0x%x now)", page);
 				continue;
 			}
 			if( !(PAGETABLE(page) & PF_PRESENT) )	continue;
@@ -382,7 +445,7 @@ void MM_Unmap(tVAddr VAddr)
 	if( !(PAGEDIRPTR(VAddr >> 30) & 1) )	return ;
 	// Check Page Dir
 	if( !(PAGEDIR(VAddr >> 21) & 1) )	return ;
-	
+
 	PAGETABLE(VAddr >> PTAB_SHIFT) = 0;
 	INVLPG( VAddr );
 }
@@ -581,6 +644,7 @@ tVAddr MM_MapHWPages(tPAddr PAddr, Uint Number)
 			ret -= 0x1000;
 			PAddr -= 0x1000;
 			MM_Map(ret, PAddr);
+			MM_RefPhys(PAddr);
 		}
 		
 		return ret;
@@ -598,6 +662,7 @@ void MM_UnmapHWPages(tVAddr VAddr, Uint Number)
 //	Log_KernelPanic("MM", "TODO: Implement MM_UnmapHWPages");
 	while( Number -- )
 	{
+		MM_DerefPhys( MM_GetPhysAddr(VAddr) );
 		MM_Unmap(VAddr);
 		VAddr += 0x1000;
 	}
@@ -626,10 +691,7 @@ tVAddr MM_AllocDMA(int Pages, int MaxBits, tPAddr *PhysAddr)
 		phys = MM_AllocPhys();
 		*PhysAddr = phys;
 		ret = MM_MapHWPages(phys, 1);
-		if(ret == 0) {
-			MM_DerefPhys(phys);
-			return 0;
-		}
+		MM_DerefPhys(phys);
 		return ret;
 	}
 	
@@ -640,10 +702,11 @@ tVAddr MM_AllocDMA(int Pages, int MaxBits, tPAddr *PhysAddr)
 	
 	// Allocated successfully, now map
 	ret = MM_MapHWPages(phys, Pages);
+	// MapHWPages references the pages, so deref them back down to 1
+	for(;Pages--;phys+=0x1000)
+		MM_DerefPhys(phys);
 	if( ret == 0 ) {
 		// If it didn't map, free then return 0
-		for(;Pages--;phys+=0x1000)
-			MM_DerefPhys(phys);
 		return 0;
 	}
 	
@@ -669,6 +732,8 @@ tVAddr MM_MapTemp(tPAddr PAddr)
 			continue ;
 
 		*ent = PAddr | 3;
+		MM_RefPhys(PAddr);
+		INVLPG(ret);
 		return ret;
 	}
 	return 0;
@@ -702,9 +767,11 @@ tPAddr MM_Clone(void)
 	{
 		TMPMAPLVL4(i) = PAGEMAPLVL4(i);
 //		Log_Debug("MM", "TMPMAPLVL4(%i) = 0x%016llx", i, TMPMAPLVL4(i));
-		if( TMPMAPLVL4(i) & 1 )
-		{
-			MM_RefPhys( TMPMAPLVL4(i) & PADDR_MASK );
+		if( !(TMPMAPLVL4(i) & PF_PRESENT) )	continue ;
+		
+		MM_RefPhys( TMPMAPLVL4(i) & PADDR_MASK );
+		
+		if( TMPMAPLVL4(i) & PF_WRITE ) {
 			TMPMAPLVL4(i) |= PF_COW;
 			TMPMAPLVL4(i) &= ~PF_WRITE;
 		}
@@ -735,7 +802,7 @@ tPAddr MM_Clone(void)
 	//  There is 1 guard page below the stack
 	kstackbase = Proc_GetCurThread()->KernelStack - KERNEL_STACK_SIZE;
 
-//	Log("MM_Clone: kstackbase = %p", kstackbase);
+	Log("MM_Clone: kstackbase = %p", kstackbase);
 	
 	TMPMAPLVL4(MM_KSTACK_BASE >> PML4_SHIFT) = 0;
 	for( i = 1; i < KERNEL_STACK_SIZE/0x1000; i ++ )
diff --git a/Kernel/arch/x86_64/proc.asm b/Kernel/arch/x86_64/proc.asm
index 90eb3dca..e6db3ce2 100644
--- a/Kernel/arch/x86_64/proc.asm
+++ b/Kernel/arch/x86_64/proc.asm
@@ -48,6 +48,7 @@ NewTaskHeader:
 	jmp .hlt
 
 [extern MM_Clone]
+[extern MM_DumpTables]
 [global Proc_CloneInt]
 Proc_CloneInt:
 	PUSH_GPR
@@ -55,13 +56,16 @@ Proc_CloneInt:
 	mov [rdi], rsp
 	call MM_Clone
 	; Save CR3
-	mov rsi, [rsp+0x30]
+	mov rsi, [rsp+0x30]	; Saved version of RSI
 	mov [rsi], rax
 	; Undo the PUSH_GPR
 	add rsp, 0x80
 	mov rax, .newTask
 	ret
 .newTask:
+;	mov rdi, 0
+;	mov rsi, 0x800000000000
+;	call MM_DumpTables
 	POP_GPR
 	xor eax, eax
 	ret
@@ -123,3 +127,4 @@ SwitchTasks:
 	xor eax, eax	; Return zero
 	ret
 
+; vim: ft=nasm
diff --git a/Kernel/arch/x86_64/proc.c b/Kernel/arch/x86_64/proc.c
index de25c450..7dff909b 100644
--- a/Kernel/arch/x86_64/proc.c
+++ b/Kernel/arch/x86_64/proc.c
@@ -16,7 +16,7 @@
 #include <hal_proc.h>
 
 // === FLAGS ===
-#define DEBUG_TRACE_SWITCH	0
+#define DEBUG_TRACE_SWITCH	1
 #define BREAK_ON_SWITCH 	0	// Break into bochs debugger on a task switch
 
 // === CONSTANTS ===
@@ -48,7 +48,7 @@ extern int	giTotalTickets;
 extern int	giNumActiveThreads;
 extern tThread	gThreadZero;
 extern void	Threads_Dump(void);
-extern void	Proc_ReturnToUser(void);
+extern void	Proc_ReturnToUser(tVAddr Handler, tVAddr KStackTop, int Argument);
 extern void	Time_UpdateTimestamp(void);
 extern void	SwitchTasks(Uint NewSP, Uint *OldSP, Uint NewIP, Uint *OldIO, Uint CR3);
 
@@ -486,16 +486,12 @@ int Proc_Clone(Uint Flags)
 	
 	// Save core machine state
 	rip = Proc_CloneInt(&newThread->SavedState.RSP, &newThread->MemState.CR3);
-	if(rip == 0) {
-		outb(0x20, 0x20);	// ACK Timer and return as child
-		__asm__ __volatile__ ("sti");
-		return 0;
-	}
+	if(rip == 0)	return 0;	// Child
 	newThread->KernelStack = cur->KernelStack;
 	newThread->SavedState.RIP = rip;
 
 	// DEBUG	
-//	Log("New (Clone) %p, rsp = %p, cr3 = %p", rip, newThread->SavedState.RSP, newThread->MemState.CR3);
+	Log("New (Clone) %p, rsp = %p, cr3 = %p", rip, newThread->SavedState.RSP, newThread->MemState.CR3);
 	{
 		Uint cr3;
 		__asm__ __volatile__ ("mov %%cr3, %0" : "=r" (cr3));
@@ -563,15 +559,19 @@ Uint Proc_MakeUserStack(void)
 	
 	// Check Prospective Space
 	for( i = USER_STACK_SZ >> 12; i--; )
+	{
 		if( MM_GetPhysAddr( base + (i<<12) ) != 0 )
 			break;
+	}
 	
 	if(i != -1)	return 0;
 	
 	// Allocate Stack - Allocate incrementally to clean up MM_Dump output
 	for( i = 0; i < USER_STACK_SZ/0x1000; i++ )
 	{
-		if( !MM_Allocate( base + (i<<12) ) )
+		tPAddr	alloc = MM_Allocate( base + (i<<12) );
+		Log_Debug("Proc", "Proc_MakeUserStack: alloc = %P", alloc);
+		if( !alloc )
 		{
 			// Error
 			Log_Error("Proc", "Unable to allocate user stack (%i pages requested)", USER_STACK_SZ/0x1000);
@@ -633,6 +633,7 @@ void Proc_StartProcess(Uint16 SS, Uint Stack, Uint Flags, Uint16 CS, Uint IP)
 	}
 	Log("Proc_StartProcess: (SS=%x, Stack=%p, Flags=%x, CS=%x, IP=%p)",
 		SS, Stack, Flags, CS, IP);
+	MM_DumpTables(0, USER_MAX);
 	if(CS == 0x1B)
 	{
 		// 32-bit return
@@ -691,9 +692,8 @@ int Proc_Demote(Uint *Err, int Dest, tRegs *Regs)
  */
 void Proc_CallFaultHandler(tThread *Thread)
 {
-	// Rewinds the stack and calls the user function
 	// Never returns
-	__asm__ __volatile__ ("mov %0, %%rbp;\n\tcall Proc_ReturnToUser" :: "r"(Thread->FaultHandler));
+	Proc_ReturnToUser(Thread->FaultHandler, Thread->KernelStack, Thread->CurFaultNum);
 	for(;;);
 }
 
diff --git a/Kernel/arch/x86_64/start32.asm b/Kernel/arch/x86_64/start32.asm
index 6de3a876..da0739b8 100644
--- a/Kernel/arch/x86_64/start32.asm
+++ b/Kernel/arch/x86_64/start32.asm
@@ -1,3 +1,8 @@
+;
+; Acess2 x86_64 port
+;
+
+%include "arch/x86_64/include/common.inc.asm"
 
 [BITS 32]
 
@@ -150,11 +155,11 @@ gKStackPT:	; Covers 2 MiB
 	; Initial stack - 64KiB
 	dq	0
 	%assign i 0
-	%rep 16-1
+	%rep INITIAL_KSTACK_SIZE-1
 	dd	gInitialKernelStack - KERNEL_BASE + i*0x1000 + 0x103, 0
 	%assign i i+1
 	%endrep
-	times 512-16	dq 0
+	times 512-INITIAL_KSTACK_SIZE	dq 0
 gInitialPT1:	; 2 MiB
 	%assign i 0
 	%rep 512
@@ -171,8 +176,10 @@ gInitialPT2:	; 2 MiB
 [section .padata]
 [global gInitialKernelStack]
 gInitialKernelStack:
-	times 0x1000*(16-1)	db 0	; 16 Pages
+	times 0x1000*(INITIAL_KSTACK_SIZE-1)	db 0	; 8 Pages
 
 [section .rodata]
 csNot64BitCapable:
 	db "Not 64-bit Capable",0
+
+; vim: ft=nasm
diff --git a/Kernel/arch/x86_64/start64.asm b/Kernel/arch/x86_64/start64.asm
index 5027a1e8..b63b63ac 100644
--- a/Kernel/arch/x86_64/start64.asm
+++ b/Kernel/arch/x86_64/start64.asm
@@ -1,6 +1,7 @@
 ;
 ; Acess2 x86_64 Port
 ;
+%include "arch/x86_64/include/common.inc.asm"
 [bits 64]
 ;KERNEL_BASE	equ	0xFFFF800000000000
 KERNEL_BASE	equ	0xFFFFFFFF80000000
@@ -35,7 +36,7 @@ start64:
 	rep stosq
 	
 	; Set kernel stack
-	mov rsp, 0xFFFFA00000000000 + 0x10000
+	mov rsp, 0xFFFFA00000000000 + INITIAL_KSTACK_SIZE*0x1000
 	
 	; Call main
 	mov edi, [gMultibootMagic - KERNEL_BASE]
@@ -55,45 +56,35 @@ GetCPUNum:
 	shr ax, 4	; One 16-byte TSS per CPU
 	ret
 
-KSTACK_USERSTATE_SIZE	equ	(16+1+5)*8	; GPRegs, CPU, IRET
+KSTACK_USERSTATE_SIZE	equ	(5+2+16+2)*8	; IRET, ErrorNum, ErrorCode, GPRs, FS&GS
 [global Proc_ReturnToUser]
-[extern Proc_GetCurThread]
 Proc_ReturnToUser:
-	; RBP is the handler to use
-	
-	call Proc_GetCurThread
-	
-	; EAX is the current thread
-	mov rbx, rax
-	mov rax, [rbx+40]	; Get Kernel Stack
-	sub rax, KSTACK_USERSTATE_SIZE
+	; RDI - Handler
+	; RSI - Kernel Stack
+	; RDX - Signal num
 	
 	;
 	; NOTE: This can cause corruption if the signal happens while the user
 	;       has called a kernel operation.
 	; Good thing this can only be called on a user fault.
 	;
-	
+
+	xchg bx, bx	
 	; Get and alter User SP
-	mov rcx, [rax+KSTACK_USERSTATE_SIZE-3*8]
-	mov rdx, [rbx+60]	; Get Signal Number
-	mov [rcx-8], rdx
-	mov rax, User_Syscall_RetAndExit
+	mov rcx, [rsi-0x20]	; Get user SP
+	xor eax, eax
 	mov [rcx-16], rax
 	sub rcx, 16
 	
-	; Restore Segment Registers
-	mov ax, 0x23
-	mov ds, ax
-	mov es, ax
-	
-	push 0x23	; SS
-	push rcx	; RSP
-	push 0x202	; RFLAGS (IF and Rsvd)
-	push 0x1B	; CS
-	push rbp	; RIP
+	; Drop down to user mode
+	cli
+	mov rsp, rcx	; Set SP
+	mov rcx, rdi	; SYSRET IP
 	
-	iret
+	mov rdi, rdx	; Argument for handler
+	mov r11, 0x202	; RFlags
+	db 0x48
+	sysret
 
 ; int CallWithArgArray(void *Ptr, int NArgs, Uint *Args)
 ; Call a function passing the array as arguments
@@ -152,12 +143,4 @@ CallWithArgArray:
 	pop rbp
 	ret
 
-[section .usertext]
-User_Syscall_RetAndExit:
-	mov rdi, rax
-	jmp User_Syscall_Exit
-User_Syscall_Exit:
-	xor rax, rax
-	; RDI: Return Value
-	int 0xAC
-
+; vim: ft=nasm
-- 
GitLab