diff --git a/Kernel/arch/x86/include/arch.h b/Kernel/arch/x86/include/arch.h
index 9307ada3ce4cc0f17fe6d835bf1400ff653d82ab..8a9870589b27b0010b2798f4696965729f34f70b 100644
--- a/Kernel/arch/x86/include/arch.h
+++ b/Kernel/arch/x86/include/arch.h
@@ -1,7 +1,7 @@
 /*
  * Acess2
  * - x86 Architecture
- * arch/i386/include/arch.h
+ * arch/x86/include/arch.h
  */
 #ifndef _ARCH_H_
 #define _ARCH_H_
diff --git a/Kernel/arch/x86/lib.c b/Kernel/arch/x86/lib.c
index a3050fe1e97443f7a71565eb2fa767d0bc42da07..175f9a5550f5cde85acfb9b070968b9bedfcab48 100644
--- a/Kernel/arch/x86/lib.c
+++ b/Kernel/arch/x86/lib.c
@@ -6,6 +6,8 @@
  */
 #include <acess.h>
 #include <threads_int.h>
+#include <arch_int.h>
+#include <hal_proc.h>	// GetCPUNum
 
 #define TRACE_LOCKS	0
 
@@ -22,8 +24,6 @@ extern tMutex	glPhysAlloc;
 //#define TRACE_LOCK_COND	(Lock != &glDebug_Lock && Lock != &glPhysAlloc.Protector)
 #endif
 
-extern int	GetCPUNum(void);
-
 // === PROTOTYPES ==
 Uint64	__divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
 Uint64	__udivdi3(Uint64 Num, Uint64 Den);
@@ -52,6 +52,17 @@ int CPU_HAS_LOCK(struct sShortSpinlock *Lock)
 	return Lock->Lock == GetCPUNum() + 1;
 }
 
+void __AtomicTestSetLoop(Uint *Ptr, Uint Value)
+{
+	__ASM__(
+		"1:\n\t"
+		"xor %%eax, %%eax;\n\t"
+		"lock cmpxchgl %0, (%1);\n\t"
+		"jnz 1b;\n\t"
+		:: "r"(Value), "r"(Ptr)
+		: "eax" // EAX clobbered
+		);
+}
 /**
  * \brief Acquire a Short Spinlock
  * \param Lock	Lock pointer
@@ -85,15 +96,7 @@ void SHORTLOCK(struct sShortSpinlock *Lock)
 	__ASM__("cli");
 	
 	// Wait for another CPU to release
-	__ASM__(
-		"1:\n\t"
-		"xor %%eax, %%eax;\n\t"
-		"lock cmpxchgl %0, (%1);\n\t"
-		"jnz 1b;\n\t"
-		:: "r"(cpu), "r"(&Lock->Lock)
-		: "eax" // EAX clobbered
-		);
-	
+	__AtomicTestSetLoop( (Uint*)&Lock->Lock, cpu );
 	Lock->IF = IF;
 	
 	#if TRACE_LOCKS
diff --git a/Kernel/arch/x86/mm_virt.c b/Kernel/arch/x86/mm_virt.c
index 137240c9cf214c84778ee7f00c9240f62f1a5d13..6a29c5648c9c41fcc08b92959911a28cffc9591b 100644
--- a/Kernel/arch/x86/mm_virt.c
+++ b/Kernel/arch/x86/mm_virt.c
@@ -16,6 +16,7 @@
 #include <mm_phys.h>
 #include <proc.h>
 #include <hal_proc.h>
+#include <arch_int.h>
 
 #define TAB	22
 
@@ -54,6 +55,15 @@
 
 #define INVLPG(addr)	__asm__ __volatile__ ("invlpg (%0)"::"r"(addr))
 
+#define GET_TEMP_MAPPING(cr3) do { \
+	__ASM__("cli"); \
+	__AtomicTestSetLoop( (Uint *)gpTmpCR3, cr3 | 3 ); \
+} while(0)
+#define REL_TEMP_MAPPING() do { \
+	*gpTmpCR3 = 0; \
+	__ASM__("sti"); \
+} while(0)
+
 typedef Uint32	tTabEnt;
 
 // === IMPORTS ===
@@ -136,6 +146,8 @@ void MM_InstallVirtual(void)
 	for( i = ((tVAddr)&_UsertextEnd-(tVAddr)&_UsertextBase+0xFFF)/4096; i--; ) {
 		MM_SetFlags( (tVAddr)&_UsertextBase + i*4096, 0, MM_PFLAG_KERNEL );
 	}
+	
+	*gpTmpCR3 = 0;
 }
 
 /**
@@ -178,8 +190,9 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
 		INVLPG( Addr & ~0xFFF );
 		return;
 	}
-	
-	__asm__ __volatile__ ("pushf; andw $0xFEFF, 0(%esp); popf");
+
+	// Disable instruction tracing	
+	__ASM__("pushf; andw $0xFEFF, 0(%esp); popf");
 	Proc_GetCurThread()->bInstrTrace = 0;
 
 	// If it was a user, tell the thread handler
@@ -190,7 +203,7 @@ void MM_PageFault(tVAddr Addr, Uint ErrorCode, tRegs *Regs)
 			(ErrorCode&16?" (Instruction Fetch)":"")
 			);
 		Log_Warning("MMVirt", "Instruction %04x:%08x accessed %p", Regs->cs, Regs->eip, Addr);
-		__asm__ __volatile__ ("sti");	// Restart IRQs
+		__ASM__("sti");	// Restart IRQs
 		#if 1
 		Error_Backtrace(Regs->eip, Regs->ebp);
 		#endif
@@ -329,7 +342,7 @@ tPAddr MM_Allocate(tVAddr VAddr)
 {
 	tPAddr	paddr;
 	//ENTER("xVAddr", VAddr);
-	//__asm__ __volatile__ ("xchg %bx,%bx");
+	//__ASM__("xchg %bx,%bx");
 	// Check if the directory is mapped
 	if( gaPageDir[ VAddr >> 22 ] == 0 )
 	{
@@ -414,7 +427,7 @@ tPAddr MM_GetPhysAddr(tVAddr Addr)
  */
 void MM_SetCR3(Uint CR3)
 {
-	__asm__ __volatile__ ("mov %0, %%cr3"::"r"(CR3));
+	__ASM__("mov %0, %%cr3"::"r"(CR3));
 }
 
 /**
@@ -426,7 +439,7 @@ int MM_Map(tVAddr VAddr, tPAddr PAddr)
 	//ENTER("xVAddr xPAddr", VAddr, PAddr);
 	// Sanity check
 	if( PAddr & 0xFFF || VAddr & 0xFFF ) {
-		Warning("MM_Map - Physical or Virtual Addresses are not aligned");
+		Log_Warning("MM_Virt", "MM_Map - Physical or Virtual Addresses are not aligned");
 		//LEAVE('i', 0);
 		return 0;
 	}
@@ -504,6 +517,58 @@ void MM_ClearUser(void)
 	INVLPG( gaPageDir );
 }
 
+/**
+ * \brief Deallocate an address space
+ */
+void MM_ClearSpace(Uint32 CR3)
+{
+	 int	i, j;
+	
+	if(CR3 == (*gpPageCR3 & ~0xFFF)) {
+		Log_Error("MMVirt", "Can't clear current address space");
+		return ;
+	}
+
+	if( MM_GetRefCount(CR3) > 1 ) {
+		Log_Log("MMVirt", "CR3 %P is still referenced, not clearing", CR3);
+		return ;
+	}
+
+	Log_Debug("MMVirt", "Clearing out address space 0x%x from 0x%x", CR3, *gpPageCR3);
+	
+	GET_TEMP_MAPPING(CR3);
+	INVLPG( gaTmpDir );
+
+	for( i = 0; i < 1024; i ++ )
+	{
+		Uint32	*table = &gaTmpTable[i*1024];
+		if( !(gaTmpDir[i] & PF_PRESENT) )
+			continue ;
+
+		INVLPG( table );	
+
+		if( i < 768 || (i > MM_KERNEL_STACKS >> 22 && i < MM_KERNEL_STACKS_END >> 22) )
+		{
+			for( j = 0; j < 1024; j ++ )
+			{
+				if( !(table[j] & 1) )
+					continue;
+				MM_DerefPhys( table[j] & ~0xFFF );
+			}
+		}
+
+		if( i != (PAGE_TABLE_ADDR >> 22) )
+		{		
+			MM_DerefPhys( gaTmpDir[i] & ~0xFFF );
+		}
+	}
+
+
+	MM_DerefPhys( CR3 );
+
+	REL_TEMP_MAPPING();
+}
+
 /**
  * \fn tPAddr MM_Clone(void)
  * \brief Clone the current address space
@@ -511,21 +576,20 @@ void MM_ClearUser(void)
 tPAddr MM_Clone(void)
 {
 	Uint	i, j;
-	tVAddr	ret;
+	tPAddr	ret;
 	Uint	page = 0;
 	tVAddr	kStackBase = Proc_GetCurThread()->KernelStack - MM_KERNEL_STACK_SIZE;
 	void	*tmp;
 	
-	Mutex_Acquire( &glTempFractal );
-	
 	// Create Directory Table
-	*gpTmpCR3 = MM_AllocPhys() | 3;
-	if( *gpTmpCR3 == 3 ) {
-		*gpTmpCR3 = 0;
+	ret = MM_AllocPhys();
+	if( ret == 0 ) {
 		return 0;
 	}
+	
+	// Map
+	GET_TEMP_MAPPING( ret );
 	INVLPG( gaTmpDir );
-	//LOG("Allocated Directory (%x)", *gpTmpCR3);
 	memsetd( gaTmpDir, 0, 1024 );
 	
 	if( Threads_GetPID() != 0 )
@@ -573,6 +637,10 @@ tPAddr MM_Clone(void)
 			gaTmpDir[ PAGE_TABLE_ADDR >> 22 ] = *gpTmpCR3;
 			continue;
 		}
+		if( i == (TMP_TABLE_ADDR >> 22) ) {
+			gaTmpDir[ TMP_TABLE_ADDR >> 22 ] = 0;
+			continue ;
+		}
 		
 		if( gaPageDir[i] == 0 ) {
 			gaTmpDir[i] = 0;
@@ -628,8 +696,7 @@ tPAddr MM_Clone(void)
 		}
 	}
 	
-	ret = *gpTmpCR3 & ~0xFFF;
-	Mutex_Release( &glTempFractal );
+	REL_TEMP_MAPPING();
 	
 	//LEAVE('x', ret);
 	return ret;
@@ -709,15 +776,10 @@ tVAddr MM_NewWorkerStack(Uint *StackContents, size_t ContentsSize)
 	base = WORKER_STACKS + base * WORKER_STACK_SIZE;
 	//Log(" MM_NewWorkerStack: base = 0x%x", base);
 	
-	// Acquire the lock for the temp fractal mappings
-	Mutex_Acquire(&glTempFractal);
-	
 	// Set the temp fractals to TID0's address space
-	*gpTmpCR3 = ((Uint)gaInitPageDir - KERNEL_BASE) | 3;
-	//Log(" MM_NewWorkerStack: *gpTmpCR3 = 0x%x", *gpTmpCR3);
+	GET_TEMP_MAPPING( ((Uint)gaInitPageDir - KERNEL_BASE) );
 	INVLPG( gaTmpDir );
 	
-	
 	// Check if the directory is mapped (we are assuming that the stacks
 	// will fit neatly in a directory)
 	//Log(" MM_NewWorkerStack: gaTmpDir[ 0x%x ] = 0x%x", base>>22, gaTmpDir[ base >> 22 ]);
@@ -732,9 +794,9 @@ tVAddr MM_NewWorkerStack(Uint *StackContents, size_t ContentsSize)
 		page = MM_AllocPhys();
 		gaTmpTable[ (base + addr) >> 12 ] = page | 3;
 	}
-	*gpTmpCR3 = 0;
-	// Release the temp mapping lock
-	Mutex_Release(&glTempFractal);
+
+	// Release temporary fractal
+	REL_TEMP_MAPPING();
 
 	// NOTE: Max of 1 page
 	// `page` is the last allocated page from the previious for loop
diff --git a/Kernel/arch/x86/proc.c b/Kernel/arch/x86/proc.c
index 30f318f38c511fc1f4c53d3e97e960a3ed976374..c804971cfddc97f54c129ce633baaac7069e4be4 100644
--- a/Kernel/arch/x86/proc.c
+++ b/Kernel/arch/x86/proc.c
@@ -12,6 +12,7 @@
 # include <mp.h>
 #endif
 #include <hal_proc.h>
+#include <arch_int.h>
 
 // === FLAGS ===
 #define DEBUG_TRACE_SWITCH	0
@@ -570,7 +571,7 @@ void Proc_ChangeStack(void)
 
 void Proc_ClearThread(tThread *Thread)
 {
-	Log_Warning("Proc", "TODO: Nuke address space etc");
+	MM_ClearSpace(Thread->MemState.CR3);
 	if(Thread->SavedState.SSE) {
 		free(Thread->SavedState.SSE);
 		Thread->SavedState.SSE = NULL;
@@ -587,6 +588,7 @@ int Proc_NewKThread(void (*Fcn)(void*), void *Data)
 	if(!newThread)	return -1;
 	
 	// Set CR3
+	MM_RefPhys( cur->MemState.CR3 );
 	newThread->MemState.CR3 = cur->MemState.CR3;
 
 	// Create new KStack