diff --git a/Kernel/Makefile.BuildNum.x86_64 b/Kernel/Makefile.BuildNum.x86_64
index ded600041c2a7485a6e2727ae6acfcbbea9ad4bc..bb5063bd23b934d7bfd48c579a04c3f4f86b872a 100644
--- a/Kernel/Makefile.BuildNum.x86_64
+++ b/Kernel/Makefile.BuildNum.x86_64
@@ -1 +1 @@
-BUILD_NUM = 208
+BUILD_NUM = 210
diff --git a/Kernel/arch/x86_64/Makefile b/Kernel/arch/x86_64/Makefile
index e30bf8d60fbb737b91feee07869f25b34b0196ee..c4260b515df24d61952449b066843d51733d8bfd 100644
--- a/Kernel/arch/x86_64/Makefile
+++ b/Kernel/arch/x86_64/Makefile
@@ -25,4 +25,4 @@ A_OBJ  = start32.ao start64.ao desctab.ao
 A_OBJ += main.o lib.o proc.o mm_virt.o mm_phys.o vm8086.o
 A_OBJ += kernelpanic.o errors.o
 
-POSTBUILD = objcopy $(BIN) -F elf32 $(BIN)
+POSTBUILD = objcopy $(BIN) -F elf32-i386 $(BIN)
diff --git a/Kernel/arch/x86_64/include/arch.h b/Kernel/arch/x86_64/include/arch.h
index 3993fe9c36258618d36eb59887732a4c1495f23e..c353089a53b9afea1b216d4f56817113a11be761 100644
--- a/Kernel/arch/x86_64/include/arch.h
+++ b/Kernel/arch/x86_64/include/arch.h
@@ -76,53 +76,25 @@ typedef struct sSyscallRegs
  * \brief Short Spinlock structure
  */
 struct sShortSpinlock {
+	#if STACKED_LOCKS == 2
+	volatile void	*Lock;	//!< Lock value
+	#else
 	volatile int	Lock;	//!< Lock value
-	 int	IF;	//!< Interrupt state on call to SHORTLOCK
-};
-/**
- * \brief Determine if a short spinlock is locked
- * \param Lock	Lock pointer
- */
-static inline int IS_LOCKED(struct sShortSpinlock *Lock) {
-	return !!Lock->Lock;
-}
-/**
- * \brief Acquire a Short Spinlock
- * \param Lock	Lock pointer
- * 
- * This type of mutex should only be used for very short sections of code,
- * or in places where a Mutex_* would be overkill, such as appending
- * an element to linked list (usually two assignement lines in C)
- * 
- * \note This type of lock halts interrupts, so ensure that no timing
- * functions are called while it is held.
- */
-static inline void SHORTLOCK(struct sShortSpinlock *Lock) {
-	 int	v = 1;
-	
-	// Save interrupt state
-	__ASM__ ("pushf;\n\tpop %%rax" : "=a"(Lock->IF));
-	Lock->IF &= 0x200;
-	
-	// Stop interrupts
-	__ASM__ ("cli");
+	#endif
 	
-	// Wait for another CPU to release
-	while(v)
-		__ASM__("xchgl %%eax, (%%rdi)":"=a"(v):"a"(1),"D"(&Lock->Lock));
-}
-/**
- * \brief Release a short lock
- * \param Lock	Lock pointer
- */
-static inline void SHORTREL(struct sShortSpinlock *Lock) {
-	Lock->Lock = 0;
-	#if 0	// Which is faster?, meh the test is simpler
-	__ASM__ ("pushf;\n\tor %0, (%%rsp);\n\tpopf" : : "a"(Lock->IF));
-	#else
-	if(Lock->IF)	__ASM__ ("sti");
+	#if LOCK_DISABLE_INTS
+	 int	IF;	//!< Interrupt state on call to SHORTLOCK
+	#endif
+	#if STACKED_LOCKS
+	 int	Depth;
 	#endif
-}
+};
+
+// === FUNCTIONS ===
+extern int	IS_LOCKED(struct sShortSpinlock *Lock);
+extern int	CPU_HAS_LOCK(struct sShortSpinlock *Lock);
+extern void	SHORTLOCK(struct sShortSpinlock *Lock);
+extern void	SHORTREL(struct sShortSpinlock *Lock);
 
 #endif
 
diff --git a/Kernel/arch/x86_64/lib.c b/Kernel/arch/x86_64/lib.c
index ee32d815f336caaa43f30f12d193af91099aea8f..dcd6ec3a4b7badfe99cec25cbfa3e60c00ea429c 100644
--- a/Kernel/arch/x86_64/lib.c
+++ b/Kernel/arch/x86_64/lib.c
@@ -3,7 +3,127 @@
 #include <acess.h>
 #include <arch.h>
 
+// === IMPORTS ===
+extern int	GetCPUNum(void);
+
 // === CODE ===
+/**
+ * \brief Determine if a short spinlock is locked
+ * \param Lock	Lock pointer
+ */
+int IS_LOCKED(struct sShortSpinlock *Lock)
+{
+	return !!Lock->Lock;
+}
+
+/**
+ * \brief Check if the current CPU has the lock
+ * \param Lock	Lock pointer
+ */
+int CPU_HAS_LOCK(struct sShortSpinlock *Lock)
+{
+	#if STACKED_LOCKS == 1
+	return Lock->Lock == GetCPUNum() + 1;
+	#elif STACKED_LOCKS == 2
+	return Lock->Lock == Proc_GetCurThread();
+	#else
+	return 0;
+	#endif
+}
+
+/**
+ * \brief Acquire a Short Spinlock
+ * \param Lock	Lock pointer
+ * 
+ * This type of mutex should only be used for very short sections of code,
+ * or in places where a Mutex_* would be overkill, such as appending
+ * an element to linked list (usually two assignement lines in C)
+ * 
+ * \note This type of lock halts interrupts, so ensure that no timing
+ * functions are called while it is held. As a matter of fact, spend as
+ * little time as possible with this lock held
+ * \note If \a STACKED_LOCKS is set, this type of spinlock can be nested
+ */
+void SHORTLOCK(struct sShortSpinlock *Lock)
+{
+	 int	v = 1;
+	#if LOCK_DISABLE_INTS
+	 int	IF;
+	#endif
+	#if STACKED_LOCKS == 1
+	 int	cpu = GetCPUNum() + 1;
+	#elif STACKED_LOCKS == 2
+	void	*thread = Proc_GetCurThread();
+	#endif
+	
+	#if LOCK_DISABLE_INTS
+	// Save interrupt state and clear interrupts
+	__ASM__ ("pushf;\n\tpop %%eax\n\tcli" : "=a"(IF));
+	IF &= 0x200;	// AND out all but the interrupt flag
+	#endif
+	
+	#if STACKED_LOCKS == 1
+	if( Lock->Lock == cpu ) {
+		Lock->Depth ++;
+		return ;
+	}
+	#elif STACKED_LOCKS == 2
+	if( Lock->Lock == thread ) {
+		Lock->Depth ++;
+		return ;
+	}
+	#endif
+	
+	// Wait for another CPU to release
+	while(v) {
+		// CMPXCHG:
+		//  If r/m32 == EAX, set ZF and set r/m32 = r32
+		//  Else, clear ZF and set EAX = r/m32
+		#if STACKED_LOCKS == 1
+		__ASM__("lock cmpxchgl %2, (%3)"
+			: "=a"(v)
+			: "a"(0), "r"(cpu), "r"(&Lock->Lock)
+			);
+		#elif STACKED_LOCKS == 2
+		__ASM__("lock cmpxchgl %2, (%3)"
+			: "=a"(v)
+			: "a"(0), "r"(thread), "r"(&Lock->Lock)
+			);
+		#else
+		__ASM__("xchgl %%eax, (%%edi)":"=a"(v):"a"(1),"D"(&Lock->Lock));
+		#endif
+	}
+	
+	#if LOCK_DISABLE_INTS
+	Lock->IF = IF;
+	#endif
+}
+/**
+ * \brief Release a short lock
+ * \param Lock	Lock pointer
+ */
+void SHORTREL(struct sShortSpinlock *Lock)
+{
+	#if STACKED_LOCKS
+	if( Lock->Depth ) {
+		Lock->Depth --;
+		return ;
+	}
+	#endif
+	
+	#if LOCK_DISABLE_INTS
+	// Lock->IF can change anytime once Lock->Lock is zeroed
+	if(Lock->IF) {
+		Lock->Lock = 0;
+		__ASM__ ("sti");
+	}
+	else {
+		Lock->Lock = 0;
+	}
+	#else
+	Lock->Lock = 0;
+	#endif
+}
 
 void outb(Uint16 Port, Uint8 Data)
 {
diff --git a/Kernel/arch/x86_64/link.ld b/Kernel/arch/x86_64/link.ld
index 8f75ddb6d9c179dd9b5f35c2b400f9b36350724d..cafbc1e198ea2f7f66723eb9c16d1d5c777a27ff 100644
--- a/Kernel/arch/x86_64/link.ld
+++ b/Kernel/arch/x86_64/link.ld
@@ -11,7 +11,7 @@ _kernel_base = 0xFFFFFFFF80000000;
 OUTPUT_FORMAT(elf32-i386)
 OUTPUT_ARCH(i386:x86-64)
 */
-OUTPUT_FORMAT(elf64)
+OUTPUT_FORMAT(elf64-x86-64)
 ENTRY(start)
 
 SECTIONS {
diff --git a/Usermode/Applications/MultibootCheck_src/MultibootCheck.c b/Usermode/Applications/MultibootCheck_src/MultibootCheck.c
index ebb5b68b7430e8d8a4cb215014a4d52f28ccd192..f0e1afe425a13dfe7487f657a154b4dfea99ac06 100644
--- a/Usermode/Applications/MultibootCheck_src/MultibootCheck.c
+++ b/Usermode/Applications/MultibootCheck_src/MultibootCheck.c
@@ -3,6 +3,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>
 
 // === CONSTANTS ===
 #define	SCAN_SPACE	8192
@@ -10,10 +11,10 @@
 
 // === TYPES ===
 typedef struct {
-	unsigned long	Magic;
-	unsigned long	Flags;
-	unsigned long	Checksum;
-} tMBootImg;
+	uint32_t	Magic;
+	uint32_t	Flags;
+	uint32_t	Checksum;
+} __attribute__((packed)) tMBootImg;
 
 // === PROTOTYPES ===
 void	CheckMultiboot(char *file);