diff --git a/Kernel/arch/armv7/lib.S b/Kernel/arch/armv7/lib.S
new file mode 100644
index 0000000000000000000000000000000000000000..d37c4a18b772a87ebd88d588225eebc7fa638d2a
--- /dev/null
+++ b/Kernel/arch/armv7/lib.S
@@ -0,0 +1,44 @@
+/*
+ * Acess2 ARM
+ * - By John Hodge (thePowersGang)
+ *
+ * arch/arm7/lib.S
+ * - Assembly editions of library functions
+ */
+#include "include/assembly.h"
+
+.globl __memcpy_byte
+__memcpy_byte:
+1:
+	tst r2, r2	@ Check counter
+	moveq pc, lr	@ Return if zero
+	ldrb r3, [r1],#1	@ Read
+	strb r3, [r0],#1	@ Write
+	sub r2, #1
+	b 1b
+
+@ 
+@ Pre-aligned memcpy (32-bit blocks)
+@ 
+.globl __memcpy_align4
+__memcpy_align4:
+	push {r4}
+	mvn r3, #3	@ Mask for checking length
+	
+	@ 4 byte chunk copies
+1:	tst r2, r3
+	ldrne r4, [r1],#4
+	strne r4, [r0],#4
+	subne r2, #4
+	bne 1b
+
+	@ single byte copies to finish off
+2:	tst r2, #3
+	beq 3f
+	ldrb r4, [r1],#1
+	strb r4, [r0],#1
+	sub r2, #1
+	b 2b
+
+3:	pop {r4}
+	mov pc, lr
diff --git a/Kernel/arch/armv7/lib.c b/Kernel/arch/armv7/lib.c
index a59c06256f4e539ac2a00212698cdc939a9469c7..c0feab6c21a10169f6dc5af5f1cb8676d265cfb1 100644
--- a/Kernel/arch/armv7/lib.c
+++ b/Kernel/arch/armv7/lib.c
@@ -5,6 +5,10 @@
  */
 #include <acess.h>
 
+// === IMPORTS ===
+extern void	__memcpy_align4(void *_dest, const void *_src, size_t _length);
+extern void	__memcpy_byte(void *_dest, const void *_src, size_t _length);
+
 // === PROTOTYPES ===
 Uint64	__divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
 Uint32	__divmod32(Uint32 Num, Uint32 Den, Uint32 *Rem);
@@ -18,31 +22,26 @@ Sint32	__modsi3(Sint32 Num, Sint32 Den);
 // === CODE ===
 void *memcpy(void *_dest, const void *_src, size_t _length)
 {
-	Uint32	*dst;
-	const Uint32	*src;
 	Uint8	*dst8 = _dest;
 	const Uint8	*src8 = _src;
 
+	if( ((tVAddr)_dest & 3) == 0 && ((tVAddr)_src & 3) == 0 )
+	{
+		__memcpy_align4(_dest, _src, _length);
+		return _dest;
+	}
+
 	// Handle small copies / Non-aligned
 	if( _length < 4 || ((tVAddr)_dest & 3) != ((tVAddr)_src & 3) )
 	{
-		for( ; _length--; dst8++,src8++ )
-			*dst8 = *src8;
+		__memcpy_byte(_dest, _src, _length);
 		return _dest;
 	}
 
 	// Force alignment
-	while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++;
-	dst = (void *)dst8;	src = (void *)src8;
+	while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++, _length --;
 
-	// DWORD copies
-	for( ; _length > 3; _length -= 4)
-		*dst++ = *src++;
-
-	// Trailing bytes
-	dst8 = (void*)dst;	src8 = (void*)src;
-	for( ; _length; _length -- )
-		*dst8 ++ = *src8 ++;
+	__memcpy_align32(dst8, src8, _length);
 	
 	return _dest;
 }
@@ -86,7 +85,7 @@ void *memset(void *_dest, int _value, size_t _length)
 
 	_value = (Uint8)_value;
 
-	// Handle small copies / Non-aligned
+	// Handle small copies
 	if( _length < 4 )
 	{
 		for( ; _length--; dst8++ )