Merge branch 'master'

author: Jeff Garzik <jgarzik@pobox.com> 2005-11-05 15:39:24 -0500
committer: Jeff Garzik <jgarzik@pobox.com> 2005-11-05 15:39:24 -0500
commit: 7211bb9b64f17b23834d91fc3d0c1d78671ee9a8 (patch)
tree: 541909f86c31fee97cd70d28ec2fe5c23e1ceb02 /arch/arm/lib
parent: f1e691a24955ea987f021f378324fb5003b1b208 (diff)
parent: 70d9d825e0a5a78ec1dacaaaf5c72ff5b0206fab (diff)
21 files changed, 1218 insertions, 844 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 8725d63e4219..391f3ab3ff32 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -7,13 +7,27 @@
 lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
 		   copy_page.o delay.o findbit.o memchr.o memcpy.o    \
-		   memset.o memzero.o setbit.o strncpy_from_user.o    \
-		   strnlen_user.o strchr.o strrchr.o testchangebit.o  \
-		   testclearbit.o testsetbit.o uaccess.o getuser.o    \
-		   putuser.o ashldi3.o ashrdi3.o lshrdi3.o muldi3.o   \
-		   ucmpdi2.o lib1funcs.o div64.o	              \
+		   memmove.o memset.o memzero.o setbit.o              \
+		   strncpy_from_user.o strnlen_user.o                 \
+		   strchr.o strrchr.o                                 \
+		   testchangebit.o testclearbit.o testsetbit.o        \
+		   getuser.o putuser.o clear_user.o                   \
+		   ashldi3.o ashrdi3.o lshrdi3.o muldi3.o             \
+		   ucmpdi2.o lib1funcs.o div64.o sha1.o               \
 		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o
 
+# the code in uaccess.S is not preemption safe and
+# probably faster on ARMv3 only
+ifeq ($CONFIG_PREEMPT,y)
+  lib-y	+= copy_from_user.o copy_to_user.o
+else
+ifneq ($(CONFIG_CPU_32v3),y)
+  lib-y	+= copy_from_user.o copy_to_user.o
+else
+  lib-y	+= uaccess.o
+endif
+endif
+
 ifeq ($(CONFIG_CPU_32v3),y)
   lib-y	+= io-readsw-armv3.o io-writesw-armv3.o
 else
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
new file mode 100644
index 000000000000..561e20717b30
--- /dev/null
+++ b/arch/arm/lib/ashldi3.S
@@ -0,0 +1,48 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+
+#include <linux/linkage.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__ashldi3)
+
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	ah, ah, lsl r2
+	movpl	ah, al, lsl r3
+	orrmi	ah, ah, al, lsr ip
+	mov	al, al, lsl r2
+	mov	pc, lr
+
diff --git a/arch/arm/lib/ashldi3.c b/arch/arm/lib/ashldi3.c
deleted file mode 100644
index b62875cfd8f8..000000000000
--- a/arch/arm/lib/ashldi3.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/* More subroutines needed by GCC output code on some machines.  */
-/* Compile this one with gcc.  */
-/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-/* As a special exception, if you link this library with other files,
-   some of which are compiled with GCC, to produce an executable,
-   this library does not by itself cause the resulting executable
-   to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.
- */
-/* support functions required by the kernel. based on code from gcc-2.95.3 */
-/* I Molton	29/07/01 */
-
-#include "gcclib.h"
-
-s64 __ashldi3(s64 u, int b)
-{
-	DIunion w;
-	int bm;
-	DIunion uu;
-
-	if (b == 0)
-		return u;
-
-	uu.ll = u;
-
-	bm = (sizeof(s32) * BITS_PER_UNIT) - b;
-	if (bm <= 0) {
-		w.s.low = 0;
-		w.s.high = (u32) uu.s.low << -bm;
-	} else {
-		u32 carries = (u32) uu.s.low >> bm;
-		w.s.low = (u32) uu.s.low << b;
-		w.s.high = ((u32) uu.s.high << b) | carries;
-	}
-
-	return w.ll;
-}
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
new file mode 100644
index 000000000000..86fb2a90c301
--- /dev/null
+++ b/arch/arm/lib/ashrdi3.S
@@ -0,0 +1,48 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+
+#include <linux/linkage.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__ashrdi3)
+
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	al, al, lsr r2
+	movpl	al, ah, asr r3
+	orrmi	al, al, ah, lsl ip
+	mov	ah, ah, asr r2
+	mov	pc, lr
+
diff --git a/arch/arm/lib/ashrdi3.c b/arch/arm/lib/ashrdi3.c
deleted file mode 100644
index 9a8600a7543f..000000000000
--- a/arch/arm/lib/ashrdi3.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/* More subroutines needed by GCC output code on some machines.  */
-/* Compile this one with gcc.  */
-/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-/* As a special exception, if you link this library with other files,
-   some of which are compiled with GCC, to produce an executable,
-   this library does not by itself cause the resulting executable
-   to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.
- */
-/* support functions required by the kernel. based on code from gcc-2.95.3 */
-/* I Molton     29/07/01 */
-
-#include "gcclib.h"
-
-s64 __ashrdi3(s64 u, int b)
-{
-	DIunion w;
-	int bm;
-	DIunion uu;
-
-	if (b == 0)
-		return u;
-
-	uu.ll = u;
-
-	bm = (sizeof(s32) * BITS_PER_UNIT) - b;
-	if (bm <= 0) {
-		/* w.s.high = 1..1 or 0..0 */
-		w.s.high = uu.s.high >> (sizeof(s32) * BITS_PER_UNIT - 1);
-		w.s.low = uu.s.high >> -bm;
-	} else {
-		u32 carries = (u32) uu.s.high << bm;
-		w.s.high = uu.s.high >> b;
-		w.s.low = ((u32) uu.s.low >> b) | carries;
-	}
-
-	return w.ll;
-}
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 64a988c1ad44..f35d91fbe117 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,6 +1,6 @@
 #include <linux/config.h>
 
-#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_MPCORE)
+#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_32v6K)
 	.macro	bitop, instr
 	mov	r2, #1
 	and	r3, r0, #7		@ Get bit offset
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
new file mode 100644
index 000000000000..7ff9f831b3f9
--- /dev/null
+++ b/arch/arm/lib/clear_user.S
@@ -0,0 +1,52 @@
+/*
+ *  linux/arch/arm/lib/clear_user.S
+ *
+ *  Copyright (C) 1995, 1996,1997,1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+/* Prototype: int __arch_clear_user(void *addr, size_t sz)
+ * Purpose  : clear some user memory
+ * Params   : addr - user memory address to clear
+ *          : sz   - number of bytes to clear
+ * Returns  : number of bytes NOT cleared
+ */
+ENTRY(__arch_clear_user)
+		stmfd	sp!, {r1, lr}
+		mov	r2, #0
+		cmp	r1, #4
+		blt	2f
+		ands	ip, r0, #3
+		beq	1f
+		cmp	ip, #2
+USER(		strbt	r2, [r0], #1)
+USER(		strlebt	r2, [r0], #1)
+USER(		strltbt	r2, [r0], #1)
+		rsb	ip, ip, #4
+		sub	r1, r1, ip		@  7  6  5  4  3  2  1
+1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
+USER(		strplt	r2, [r0], #4)
+USER(		strplt	r2, [r0], #4)
+		bpl	1b
+		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
+USER(		strplt	r2, [r0], #4)
+2:		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
+USER(		strnebt	r2, [r0], #1)
+USER(		strnebt	r2, [r0], #1)
+		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
+USER(		strnebt	r2, [r0], #1)
+		mov	r0, #0
+		LOADREGS(fd,sp!, {r1, pc})
+
+		.section .fixup,"ax"
+		.align	0
+9001:		LOADREGS(fd,sp!, {r0, pc})
+		.previous
+
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
new file mode 100644
index 000000000000..7497393a0e81
--- /dev/null
+++ b/arch/arm/lib/copy_from_user.S
@@ -0,0 +1,101 @@
+/*
+ *  linux/arch/arm/lib/copy_from_user.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 29, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Prototype:
+ *
+ *	size_t __arch_copy_from_user(void *to, const void *from, size_t n)
+ *
+ * Purpose:
+ *
+ *	copy a block to kernel memory from user memory
+ *
+ * Params:
+ *
+ *	to = kernel memory
+ *	from = user memory
+ *	n = number of bytes to copy
+ *
+ * Return value:
+ *
+ *	Number of bytes NOT copied.
+ */
+
+	.macro ldr1w ptr reg abort
+100:	ldrt \reg, [\ptr], #4
+	.section __ex_table, "a"
+	.long 100b, \abort
+	.previous
+	.endm
+
+	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+	ldr1w \ptr, \reg1, \abort
+	ldr1w \ptr, \reg2, \abort
+	ldr1w \ptr, \reg3, \abort
+	ldr1w \ptr, \reg4, \abort
+	.endm
+
+	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort
+	ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
+	.endm
+
+	.macro ldr1b ptr reg cond=al abort
+100:	ldr\cond\()bt \reg, [\ptr], #1
+	.section __ex_table, "a"
+	.long 100b, \abort
+	.previous
+	.endm
+
+	.macro str1w ptr reg abort
+	str \reg, [\ptr], #4
+	.endm
+
+	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
+
+	.macro str1b ptr reg cond=al abort
+	str\cond\()b \reg, [\ptr], #1
+	.endm
+
+	.macro enter reg1 reg2
+	mov	r3, #0
+	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
+	.endm
+
+	.macro exit reg1 reg2
+	add	sp, sp, #8
+	ldmfd	sp!, {r0, \reg1, \reg2}
+	.endm
+
+	.text
+
+ENTRY(__arch_copy_from_user)
+
+#include "copy_template.S"
+
+	.section .fixup,"ax"
+	.align 0
+	copy_abort_preamble
+	ldmfd	sp!, {r1, r2}
+	sub	r3, r0, r1
+	rsb	r1, r3, r2
+	str	r1, [sp]
+	bl	__memzero
+	ldr	r0, [sp], #4
+	copy_abort_end
+	.previous
+
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
new file mode 100644
index 000000000000..838e435e4922
--- /dev/null
+++ b/arch/arm/lib/copy_template.S
@@ -0,0 +1,255 @@
+/*
+ *  linux/arch/arm/lib/copy_template.s
+ *
+ *  Code template for optimized memory copy functions
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do.  That might be different in the future.
+ */
+//#define CALGN(code...)	code
+#define CALGN(code...)
+
+/*
+ * Theory of operation
+ * -------------------
+ *
+ * This file provides the core code for a forward memory copy used in
+ * the implementation of memcopy(), copy_to_user() and copy_from_user().
+ *
+ * The including file must define the following accessor macros
+ * according to the need of the given function:
+ *
+ * ldr1w ptr reg abort
+ *
+ *	This loads one word from 'ptr', stores it in 'reg' and increments
+ *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
+ *
+ * ldr4w ptr reg1 reg2 reg3 reg4 abort
+ * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ *
+ *	This loads four or eight words starting from 'ptr', stores them
+ *	in provided registers and increments 'ptr' past those words.
+ *	The'abort' argument is used for fixup tables.
+ *
+ * ldr1b ptr reg cond abort
+ *
+ *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
+ *	It also must apply the condition code if provided, otherwise the
+ *	"al" condition is assumed by default.
+ *
+ * str1w ptr reg abort
+ * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ * str1b ptr reg cond abort
+ *
+ *	Same as their ldr* counterparts, but data is stored to 'ptr' location
+ *	rather than being loaded.
+ *
+ * enter reg1 reg2
+ *
+ *	Preserve the provided registers on the stack plus any additional
+ *	data as needed by the implementation including this code. Called
+ *	upon code entry.
+ *
+ * exit reg1 reg2
+ *
+ *	Restore registers with the values previously saved with the
+ *	'preserv' macro. Called upon code termination.
+ */
+
+
+		enter	r4, lr
+
+		subs	r2, r2, #4
+		blt	8f
+		ands	ip, r0, #3
+	PLD(	pld	[r1, #0]		)
+		bne	9f
+		ands	ip, r1, #3
+		bne	10f
+
+1:		subs	r2, r2, #(28)
+		stmfd	sp!, {r5 - r8}
+		blt	5f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	r3, ip, #32		)
+	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
+	CALGN(	bcs	2f			)
+	CALGN(	adr	r4, 6f			)
+	CALGN(	subs	r2, r2, r3		)  @ C gets set
+	CALGN(	add	pc, r4, ip		)
+
+	PLD(	pld	[r1, #0]		)
+2:	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #28]		)
+	PLD(	blt	4f			)
+	PLD(	pld	[r1, #60]		)
+	PLD(	pld	[r1, #92]		)
+
+3:	PLD(	pld	[r1, #124]		)
+4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+		subs	r2, r2, #32
+		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+		bge	3b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	4b			)
+
+5:		ands	ip, r2, #28
+		rsb	ip, ip, #32
+		addne	pc, pc, ip		@ C is always clear here
+		b	7f
+6:		nop
+		ldr1w	r1, r3, abort=20f
+		ldr1w	r1, r4, abort=20f
+		ldr1w	r1, r5, abort=20f
+		ldr1w	r1, r6, abort=20f
+		ldr1w	r1, r7, abort=20f
+		ldr1w	r1, r8, abort=20f
+		ldr1w	r1, lr, abort=20f
+
+		add	pc, pc, ip
+		nop
+		nop
+		str1w	r0, r3, abort=20f
+		str1w	r0, r4, abort=20f
+		str1w	r0, r5, abort=20f
+		str1w	r0, r6, abort=20f
+		str1w	r0, r7, abort=20f
+		str1w	r0, r8, abort=20f
+		str1w	r0, lr, abort=20f
+
+	CALGN(	bcs	2b			)
+
+7:		ldmfd	sp!, {r5 - r8}
+
+8:		movs	r2, r2, lsl #31
+		ldr1b	r1, r3, ne, abort=21f
+		ldr1b	r1, r4, cs, abort=21f
+		ldr1b	r1, ip, cs, abort=21f
+		str1b	r0, r3, ne, abort=21f
+		str1b	r0, r4, cs, abort=21f
+		str1b	r0, ip, cs, abort=21f
+
+		exit	r4, pc
+
+9:		rsb	ip, ip, #4
+		cmp	ip, #2
+		ldr1b	r1, r3, gt, abort=21f
+		ldr1b	r1, r4, ge, abort=21f
+		ldr1b	r1, lr, abort=21f
+		str1b	r0, r3, gt, abort=21f
+		str1b	r0, r4, ge, abort=21f
+		subs	r2, r2, ip
+		str1b	r0, lr, abort=21f
+		blt	8b
+		ands	ip, r1, #3
+		beq	1b
+
+10:		bic	r1, r1, #3
+		cmp	ip, #2
+		ldr1w	r1, lr, abort=21f
+		beq	17f
+		bgt	18f
+
+
+		.macro	forward_copy_shift pull push
+
+		subs	r2, r2, #28
+		blt	14f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	subcc	r2, r2, ip		)
+	CALGN(	bcc	15f			)
+
+11:		stmfd	sp!, {r5 - r9}
+
+	PLD(	pld	[r1, #0]		)
+	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #28]		)
+	PLD(	blt	13f			)
+	PLD(	pld	[r1, #60]		)
+	PLD(	pld	[r1, #92]		)
+
+12:	PLD(	pld	[r1, #124]		)
+13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
+		mov	r3, lr, pull #\pull
+		subs	r2, r2, #32
+		ldr4w	r1, r8, r9, ip, lr, abort=19f
+		orr	r3, r3, r4, push #\push
+		mov	r4, r4, pull #\pull
+		orr	r4, r4, r5, push #\push
+		mov	r5, r5, pull #\pull
+		orr	r5, r5, r6, push #\push
+		mov	r6, r6, pull #\pull
+		orr	r6, r6, r7, push #\push
+		mov	r7, r7, pull #\pull
+		orr	r7, r7, r8, push #\push
+		mov	r8, r8, pull #\pull
+		orr	r8, r8, r9, push #\push
+		mov	r9, r9, pull #\pull
+		orr	r9, r9, ip, push #\push
+		mov	ip, ip, pull #\pull
+		orr	ip, ip, lr, push #\push
+		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
+		bge	12b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	13b			)
+
+		ldmfd	sp!, {r5 - r9}
+
+14:		ands	ip, r2, #28
+		beq	16f
+
+15:		mov	r3, lr, pull #\pull
+		ldr1w	r1, lr, abort=21f
+		subs	ip, ip, #4
+		orr	r3, r3, lr, push #\push
+		str1w	r0, r3, abort=21f
+		bgt	15b
+	CALGN(	cmp	r2, #0			)
+	CALGN(	bge	11b			)
+
+16:		sub	r1, r1, #(\push / 8)
+		b	8b
+
+		.endm
+
+
+		forward_copy_shift	pull=8	push=24
+
+17:		forward_copy_shift	pull=16	push=16
+
+18:		forward_copy_shift	pull=24	push=8
+
+
+/*
+ * Abort preanble and completion macros.
+ * If a fixup handler is required then those macros must surround it.
+ * It is assumed that the fixup code will handle the private part of
+ * the exit macro.
+ */
+
+	.macro	copy_abort_preamble
+19:	ldmfd	sp!, {r5 - r9}
+	b	21f
+20:	ldmfd	sp!, {r5 - r8}
+21:
+	.endm
+
+	.macro	copy_abort_end
+	ldmfd	sp!, {r4, pc}
+	.endm
+
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
new file mode 100644
index 000000000000..4a6d8ea14022
--- /dev/null
+++ b/arch/arm/lib/copy_to_user.S
@@ -0,0 +1,101 @@
+/*
+ *  linux/arch/arm/lib/copy_to_user.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 29, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Prototype:
+ *
+ *	size_t __arch_copy_to_user(void *to, const void *from, size_t n)
+ *
+ * Purpose:
+ *
+ *	copy a block to user memory from kernel memory
+ *
+ * Params:
+ *
+ *	to = user memory
+ *	from = kernel memory
+ *	n = number of bytes to copy
+ *
+ * Return value:
+ *
+ *	Number of bytes NOT copied.
+ */
+
+	.macro ldr1w ptr reg abort
+	ldr \reg, [\ptr], #4
+	.endm
+
+	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+	.endm
+
+	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
+
+	.macro ldr1b ptr reg cond=al abort
+	ldr\cond\()b \reg, [\ptr], #1
+	.endm
+
+	.macro str1w ptr reg abort
+100:	strt \reg, [\ptr], #4
+	.section __ex_table, "a"
+	.long 100b, \abort
+	.previous
+	.endm
+
+	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	str1w \ptr, \reg1, \abort
+	str1w \ptr, \reg2, \abort
+	str1w \ptr, \reg3, \abort
+	str1w \ptr, \reg4, \abort
+	str1w \ptr, \reg5, \abort
+	str1w \ptr, \reg6, \abort
+	str1w \ptr, \reg7, \abort
+	str1w \ptr, \reg8, \abort
+	.endm
+
+	.macro str1b ptr reg cond=al abort
+100:	str\cond\()bt \reg, [\ptr], #1
+	.section __ex_table, "a"
+	.long 100b, \abort
+	.previous
+	.endm
+
+	.macro enter reg1 reg2
+	mov	r3, #0
+	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
+	.endm
+
+	.macro exit reg1 reg2
+	add	sp, sp, #8
+	ldmfd	sp!, {r0, \reg1, \reg2}
+	.endm
+
+	.text
+
+ENTRY(__arch_copy_to_user)
+
+#include "copy_template.S"
+
+	.section .fixup,"ax"
+	.align 0
+	copy_abort_preamble
+	ldmfd	sp!, {r1, r2, r3}
+	sub	r0, r0, r1
+	rsb	r0, r0, r2
+	copy_abort_end
+	.previous
+
diff --git a/arch/arm/lib/gcclib.h b/arch/arm/lib/gcclib.h
deleted file mode 100644
index 8b6dcc656de7..000000000000
--- a/arch/arm/lib/gcclib.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* gcclib.h -- definitions for various functions 'borrowed' from gcc-2.95.3 */
-/* I Molton     29/07/01 */
-
-#include <linux/types.h>
-
-#define BITS_PER_UNIT	8
-#define SI_TYPE_SIZE	(sizeof(s32) * BITS_PER_UNIT)
-
-#ifdef __ARMEB__
-struct DIstruct {
-	s32 high, low;
-};
-#else
-struct DIstruct {
-	s32 low, high;
-};
-#endif
-
-typedef union {
-	struct DIstruct s;
-	s64 ll;
-} DIunion;
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
new file mode 100644
index 000000000000..46c2ed19ec95
--- /dev/null
+++ b/arch/arm/lib/lshrdi3.S
@@ -0,0 +1,48 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+
+#include <linux/linkage.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__lshrdi3)
+
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	al, al, lsr r2
+	movpl	al, ah, lsr r3
+	orrmi	al, al, ah, lsl ip
+	mov	ah, ah, lsr r2
+	mov	pc, lr
+
diff --git a/arch/arm/lib/lshrdi3.c b/arch/arm/lib/lshrdi3.c
deleted file mode 100644
index 3681f49d2b6e..000000000000
--- a/arch/arm/lib/lshrdi3.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/* More subroutines needed by GCC output code on some machines.  */
-/* Compile this one with gcc.  */
-/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-/* As a special exception, if you link this library with other files,
-   some of which are compiled with GCC, to produce an executable,
-   this library does not by itself cause the resulting executable
-   to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.
- */
-/* support functions required by the kernel. based on code from gcc-2.95.3 */
-/* I Molton     29/07/01 */
-
-#include "gcclib.h"
-
-s64 __lshrdi3(s64 u, int b)
-{
-	DIunion w;
-	int bm;
-	DIunion uu;
-
-	if (b == 0)
-		return u;
-
-	uu.ll = u;
-
-	bm = (sizeof(s32) * BITS_PER_UNIT) - b;
-	if (bm <= 0) {
-		w.s.high = 0;
-		w.s.low = (u32) uu.s.high >> -bm;
-	} else {
-		u32 carries = (u32) uu.s.high << bm;
-		w.s.high = (u32) uu.s.high >> b;
-		w.s.low = ((u32) uu.s.low >> b) | carries;
-	}
-
-	return w.ll;
-}
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index f5a593ceb8cc..7e71d6708a8d 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -1,393 +1,59 @@
 /*
  *  linux/arch/arm/lib/memcpy.S
  *
- *  Copyright (C) 1995-1999 Russell King
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	MontaVista Software, Inc.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  ASM optimised string functions
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
  */
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-		.text
-
-#define ENTER	\
-		mov	ip,sp	;\
-		stmfd	sp!,{r0,r4-r9,fp,ip,lr,pc}	;\
-		sub	fp,ip,#4
-
-#define EXIT	\
-		LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
-
-#define EXITEQ	\
-		LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
-
-/*
- * Prototype: void memcpy(void *to,const void *from,unsigned long n);
- */
-ENTRY(memcpy)
-ENTRY(memmove)
-		ENTER
-		cmp	r1, r0
-		bcc	23f
-		subs	r2, r2, #4
-		blt	6f
-	PLD(	pld	[r1, #0]		)
-		ands	ip, r0, #3
-		bne	7f
-		ands	ip, r1, #3
-		bne	8f
+	.macro ldr1w ptr reg abort
+	ldr \reg, [\ptr], #4
+	.endm
 
-1:		subs	r2, r2, #8
-		blt	5f
-		subs	r2, r2, #20
-		blt	4f
-	PLD(	pld	[r1, #28]		)
-	PLD(	subs	r2, r2, #64		)
-	PLD(	blt	3f			)
-2:	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r1, #92]		)
-		ldmia	r1!, {r3 - r9, ip}
-		subs	r2, r2, #32
-		stmgeia	r0!, {r3 - r9, ip}
-		ldmgeia	r1!, {r3 - r9, ip}
-		subges	r2, r2, #32
-		stmia	r0!, {r3 - r9, ip}
-		bge	2b
-3:	PLD(	ldmia	r1!, {r3 - r9, ip}	)
-	PLD(	adds	r2, r2, #32		)
-	PLD(	stmgeia	r0!, {r3 - r9, ip}	)
-	PLD(	ldmgeia	r1!, {r3 - r9, ip}	)
-	PLD(	subges	r2, r2, #32		)
-	PLD(	stmia	r0!, {r3 - r9, ip}	)
-4:		cmn	r2, #16
-		ldmgeia	r1!, {r3 - r6}
-		subge	r2, r2, #16
-		stmgeia	r0!, {r3 - r6}
-		adds	r2, r2, #20
-		ldmgeia	r1!, {r3 - r5}
-		subge	r2, r2, #12
-		stmgeia	r0!, {r3 - r5}
-5:		adds	r2, r2, #8
-		blt	6f
-		subs	r2, r2, #4
-		ldrlt	r3, [r1], #4
-		ldmgeia	r1!, {r4, r5}
-		subge	r2, r2, #4
-		strlt	r3, [r0], #4
-		stmgeia	r0!, {r4, r5}
+	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+	.endm
 
-6:		adds	r2, r2, #4
-		EXITEQ
-		cmp	r2, #2
-		ldrb	r3, [r1], #1
-		ldrgeb	r4, [r1], #1
-		ldrgtb	r5, [r1], #1
-		strb	r3, [r0], #1
-		strgeb	r4, [r0], #1
-		strgtb	r5, [r0], #1
-		EXIT
+	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
 
-7:		rsb	ip, ip, #4
-		cmp	ip, #2
-		ldrb	r3, [r1], #1
-		ldrgeb	r4, [r1], #1
-		ldrgtb	r5, [r1], #1
-		strb	r3, [r0], #1
-		strgeb	r4, [r0], #1
-		strgtb	r5, [r0], #1
-		subs	r2, r2, ip
-		blt	6b
-		ands	ip, r1, #3
-		beq	1b
+	.macro ldr1b ptr reg cond=al abort
+	ldr\cond\()b \reg, [\ptr], #1
+	.endm
 
-8:		bic	r1, r1, #3
-		ldr	r7, [r1], #4
-		cmp	ip, #2
-		bgt	18f
-		beq	13f
-		cmp	r2, #12
-		blt	11f
-	PLD(	pld	[r1, #12]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	10f			)
-	PLD(	pld	[r1, #28]		)
-9:	PLD(	pld	[r1, #44]		)
-10:		mov	r3, r7, pull #8
-		ldmia	r1!, {r4 - r7}
-		subs	r2, r2, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		stmia	r0!, {r3 - r6}
-		bge	9b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	10b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	12f
-11:		mov	r3, r7, pull #8
-		ldr	r7, [r1], #4
-		subs	r2, r2, #4
-		orr	r3, r3, r7, push #24
-		str	r3, [r0], #4
-		bge	11b
-12:		sub	r1, r1, #3
-		b	6b
+	.macro str1w ptr reg abort
+	str \reg, [\ptr], #4
+	.endm
 
-13:		cmp	r2, #12
-		blt	16f
-	PLD(	pld	[r1, #12]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	15f			)
-	PLD(	pld	[r1, #28]		)
-14:	PLD(	pld	[r1, #44]		)
-15:		mov	r3, r7, pull #16
-		ldmia	r1!, {r4 - r7}
-		subs	r2, r2, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		stmia	r0!, {r3 - r6}
-		bge	14b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	15b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	17f
-16:		mov	r3, r7, pull #16
-		ldr	r7, [r1], #4
-		subs	r2, r2, #4
-		orr	r3, r3, r7, push #16
-		str	r3, [r0], #4
-		bge	16b
-17:		sub	r1, r1, #2
-		b	6b
+	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
 
-18:		cmp	r2, #12
-		blt	21f
-	PLD(	pld	[r1, #12]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	20f			)
-	PLD(	pld	[r1, #28]		)
-19:	PLD(	pld	[r1, #44]		)
-20:		mov	r3, r7, pull #24
-		ldmia	r1!, {r4 - r7}
-		subs	r2, r2, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		stmia	r0!, {r3 - r6}
-		bge	19b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	20b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	22f
-21:		mov	r3, r7, pull #24
-		ldr	r7, [r1], #4
-		subs	r2, r2, #4
-		orr	r3, r3, r7, push #8
-		str	r3, [r0], #4
-		bge	21b
-22:		sub	r1, r1, #1
-		b	6b
+	.macro str1b ptr reg cond=al abort
+	str\cond\()b \reg, [\ptr], #1
+	.endm
 
+	.macro enter reg1 reg2
+	stmdb sp!, {r0, \reg1, \reg2}
+	.endm
 
-23:		add	r1, r1, r2
-		add	r0, r0, r2
-		subs	r2, r2, #4
-		blt	29f
-	PLD(	pld	[r1, #-4]		)
-		ands	ip, r0, #3
-		bne	30f
-		ands	ip, r1, #3
-		bne	31f
+	.macro exit reg1 reg2
+	ldmfd sp!, {r0, \reg1, \reg2}
+	.endm
 
-24:		subs	r2, r2, #8
-		blt	28f
-		subs	r2, r2, #20
-		blt	27f
-	PLD(	pld	[r1, #-32]		)
-	PLD(	subs	r2, r2, #64		)
-	PLD(	blt	26f			)
-25:	PLD(	pld	[r1, #-64]		)
-	PLD(	pld	[r1, #-96]		)
-		ldmdb	r1!, {r3 - r9, ip}
-		subs	r2, r2, #32
-		stmgedb	r0!, {r3 - r9, ip}
-		ldmgedb	r1!, {r3 - r9, ip}
-		subges	r2, r2, #32
-		stmdb	r0!, {r3 - r9, ip}
-		bge	25b
-26:	PLD(	ldmdb	r1!, {r3 - r9, ip}	)
-	PLD(	adds	r2, r2, #32		)
-	PLD(	stmgedb	r0!, {r3 - r9, ip}	)
-	PLD(	ldmgedb	r1!, {r3 - r9, ip}	)
-	PLD(	subges	r2, r2, #32		)
-	PLD(	stmdb	r0!, {r3 - r9, ip}	)
-27:		cmn	r2, #16
-		ldmgedb	r1!, {r3 - r6}
-		subge	r2, r2, #16
-		stmgedb	r0!, {r3 - r6}
-		adds	r2, r2, #20
-		ldmgedb	r1!, {r3 - r5}
-		subge	r2, r2, #12
-		stmgedb	r0!, {r3 - r5}
-28:		adds	r2, r2, #8
-		blt	29f
-		subs	r2, r2, #4
-		ldrlt	r3, [r1, #-4]!
-		ldmgedb	r1!, {r4, r5}
-		subge	r2, r2, #4
-		strlt	r3, [r0, #-4]!
-		stmgedb	r0!, {r4, r5}
+	.text
 
-29:		adds	r2, r2, #4
-		EXITEQ
-		cmp	r2, #2
-		ldrb	r3, [r1, #-1]!
-		ldrgeb	r4, [r1, #-1]!
-		ldrgtb	r5, [r1, #-1]!
-		strb	r3, [r0, #-1]!
-		strgeb	r4, [r0, #-1]!
-		strgtb	r5, [r0, #-1]!
-		EXIT
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
-30:		cmp	ip, #2
-		ldrb	r3, [r1, #-1]!
-		ldrgeb	r4, [r1, #-1]!
-		ldrgtb	r5, [r1, #-1]!
-		strb	r3, [r0, #-1]!
-		strgeb	r4, [r0, #-1]!
-		strgtb	r5, [r0, #-1]!
-		subs	r2, r2, ip
-		blt	29b
-		ands	ip, r1, #3
-		beq	24b
-
-31:		bic	r1, r1, #3
-		ldr	r3, [r1], #0
-		cmp	ip, #2
-		blt	41f
-		beq	36f
-		cmp	r2, #12
-		blt	34f
-	PLD(	pld	[r1, #-16]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	33f			)
-	PLD(	pld	[r1, #-32]		)
-32:	PLD(	pld	[r1, #-48]		)
-33:		mov	r7, r3, push #8
-		ldmdb	r1!, {r3, r4, r5, r6}
-		subs	r2, r2, #16
-		orr	r7, r7, r6, pull #24
-		mov	r6, r6, push #8
-		orr	r6, r6, r5, pull #24
-		mov	r5, r5, push #8
-		orr	r5, r5, r4, pull #24
-		mov	r4, r4, push #8
-		orr	r4, r4, r3, pull #24
-		stmdb	r0!, {r4, r5, r6, r7}
-		bge	32b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	33b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	35f
-34:		mov	ip, r3, push #8
-		ldr	r3, [r1, #-4]!
-		subs	r2, r2, #4
-		orr	ip, ip, r3, pull #24
-		str	ip, [r0, #-4]!
-		bge	34b
-35:		add	r1, r1, #3
-		b	29b
-
-36:		cmp	r2, #12
-		blt	39f
-	PLD(	pld	[r1, #-16]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	38f			)
-	PLD(	pld	[r1, #-32]		)
-37:	PLD(	pld	[r1, #-48]		)
-38:		mov	r7, r3, push #16
-		ldmdb	r1!, {r3, r4, r5, r6}
-		subs	r2, r2, #16
-		orr	r7, r7, r6, pull #16
-		mov	r6, r6, push #16
-		orr	r6, r6, r5, pull #16
-		mov	r5, r5, push #16
-		orr	r5, r5, r4, pull #16
-		mov	r4, r4, push #16
-		orr	r4, r4, r3, pull #16
-		stmdb	r0!, {r4, r5, r6, r7}
-		bge	37b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	38b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	40f
-39:		mov	ip, r3, push #16
-		ldr	r3, [r1, #-4]!
-		subs	r2, r2, #4
-		orr	ip, ip, r3, pull #16
-		str	ip, [r0, #-4]!
-		bge	39b
-40:		add	r1, r1, #2
-		b	29b
+ENTRY(memcpy)
 
-41:		cmp	r2, #12
-		blt	44f
-	PLD(	pld	[r1, #-16]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	43f			)
-	PLD(	pld	[r1, #-32]		)
-42:	PLD(	pld	[r1, #-48]		)
-43:		mov	r7, r3, push #24
-		ldmdb	r1!, {r3, r4, r5, r6}
-		subs	r2, r2, #16
-		orr	r7, r7, r6, pull #8
-		mov	r6, r6, push #24
-		orr	r6, r6, r5, pull #8
-		mov	r5, r5, push #24
-		orr	r5, r5, r4, pull #8
-		mov	r4, r4, push #24
-		orr	r4, r4, r3, pull #8
-		stmdb	r0!, {r4, r5, r6, r7}
-		bge	42b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	43b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	45f
-44:		mov	ip, r3, push #24
-		ldr	r3, [r1, #-4]!
-		subs	r2, r2, #4
-		orr	ip, ip, r3, pull #8
-		str	ip, [r0, #-4]!
-		bge	44b
-45:		add	r1, r1, #1
-		b	29b
+#include "copy_template.S"
 
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
new file mode 100644
index 000000000000..ef7fddc14ac9
--- /dev/null
+++ b/arch/arm/lib/memmove.S
@@ -0,0 +1,206 @@
+/*
+ *  linux/arch/arm/lib/memmove.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	(C) MontaVista Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do.  That might be different in the future.
+ */
+//#define CALGN(code...)        code
+#define CALGN(code...)
+
+		.text
+
+/*
+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
+ *
+ * Note:
+ *
+ * If the memory regions don't overlap, we simply branch to memcpy which is
+ * normally a bit faster. Otherwise the copy is done going downwards.  This
+ * is a transposition of the code from copy_template.S but with the copy
+ * occurring in the opposite direction.
+ */
+
+ENTRY(memmove)
+
+		subs	ip, r0, r1
+		cmphi	r2, ip
+		bls	memcpy
+
+		stmfd	sp!, {r0, r4, lr}
+		add	r1, r1, r2
+		add	r0, r0, r2
+		subs	r2, r2, #4
+		blt	8f
+		ands	ip, r0, #3
+	PLD(	pld	[r1, #-4]		)
+		bne	9f
+		ands	ip, r1, #3
+		bne	10f
+
+1:		subs	r2, r2, #(28)
+		stmfd	sp!, {r5 - r8}
+		blt	5f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	bcs	2f			)
+	CALGN(	adr	r4, 6f			)
+	CALGN(	subs	r2, r2, ip		)  @ C is set here
+	CALGN(	add	pc, r4, ip		)
+
+	PLD(	pld	[r1, #-4]		)
+2:	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #-32]		)
+	PLD(	blt	4f			)
+	PLD(	pld	[r1, #-64]		)
+	PLD(	pld	[r1, #-96]		)
+
+3:	PLD(	pld	[r1, #-128]		)
+4:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		subs	r2, r2, #32
+		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		bge	3b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	4b			)
+
+5:		ands	ip, r2, #28
+		rsb	ip, ip, #32
+		addne	pc, pc, ip		@ C is always clear here
+		b	7f
+6:		nop
+		ldr	r3, [r1, #-4]!
+		ldr	r4, [r1, #-4]!
+		ldr	r5, [r1, #-4]!
+		ldr	r6, [r1, #-4]!
+		ldr	r7, [r1, #-4]!
+		ldr	r8, [r1, #-4]!
+		ldr	lr, [r1, #-4]!
+
+		add	pc, pc, ip
+		nop
+		nop
+		str	r3, [r0, #-4]!
+		str	r4, [r0, #-4]!
+		str	r5, [r0, #-4]!
+		str	r6, [r0, #-4]!
+		str	r7, [r0, #-4]!
+		str	r8, [r0, #-4]!
+		str	lr, [r0, #-4]!
+
+	CALGN(	bcs	2b			)
+
+7:		ldmfd	sp!, {r5 - r8}
+
+8:		movs	r2, r2, lsl #31
+		ldrneb	r3, [r1, #-1]!
+		ldrcsb	r4, [r1, #-1]!
+		ldrcsb	ip, [r1, #-1]
+		strneb	r3, [r0, #-1]!
+		strcsb	r4, [r0, #-1]!
+		strcsb	ip, [r0, #-1]
+		ldmfd	sp!, {r0, r4, pc}
+
+9:		cmp	ip, #2
+		ldrgtb	r3, [r1, #-1]!
+		ldrgeb	r4, [r1, #-1]!
+		ldrb	lr, [r1, #-1]!
+		strgtb	r3, [r0, #-1]!
+		strgeb	r4, [r0, #-1]!
+		subs	r2, r2, ip
+		strb	lr, [r0, #-1]!
+		blt	8b
+		ands	ip, r1, #3
+		beq	1b
+
+10:		bic	r1, r1, #3
+		cmp	ip, #2
+		ldr	r3, [r1, #0]
+		beq	17f
+		blt	18f
+
+
+		.macro	backward_copy_shift push pull
+
+		subs	r2, r2, #28
+		blt	14f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	subcc	r2, r2, ip		)
+	CALGN(	bcc	15f			)
+
+11:		stmfd	sp!, {r5 - r9}
+
+	PLD(	pld	[r1, #-4]		)
+	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #-32]		)
+	PLD(	blt	13f			)
+	PLD(	pld	[r1, #-64]		)
+	PLD(	pld	[r1, #-96]		)
+
+12:	PLD(	pld	[r1, #-128]		)
+13:		ldmdb   r1!, {r7, r8, r9, ip}
+		mov     lr, r3, push #\push
+		subs    r2, r2, #32
+		ldmdb   r1!, {r3, r4, r5, r6}
+		orr     lr, lr, ip, pull #\pull
+		mov     ip, ip, push #\push
+		orr     ip, ip, r9, pull #\pull
+		mov     r9, r9, push #\push
+		orr     r9, r9, r8, pull #\pull
+		mov     r8, r8, push #\push
+		orr     r8, r8, r7, pull #\pull
+		mov     r7, r7, push #\push
+		orr     r7, r7, r6, pull #\pull
+		mov     r6, r6, push #\push
+		orr     r6, r6, r5, pull #\pull
+		mov     r5, r5, push #\push
+		orr     r5, r5, r4, pull #\pull
+		mov     r4, r4, push #\push
+		orr     r4, r4, r3, pull #\pull
+		stmdb   r0!, {r4 - r9, ip, lr}
+		bge	12b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	13b			)
+
+		ldmfd	sp!, {r5 - r9}
+
+14:		ands	ip, r2, #28
+		beq	16f
+
+15:		mov     lr, r3, push #\push
+		ldr	r3, [r1, #-4]!
+		subs	ip, ip, #4
+		orr	lr, lr, r3, pull #\pull
+		str	lr, [r0, #-4]!
+		bgt	15b
+	CALGN(	cmp	r2, #0			)
+	CALGN(	bge	11b			)
+
+16:		add	r1, r1, #(\pull / 8)
+		b	8b
+
+		.endm
+
+
+		backward_copy_shift	push=8	pull=24
+
+17:		backward_copy_shift	push=16	pull=16
+
+18:		backward_copy_shift	push=24	pull=8
+
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
new file mode 100644
index 000000000000..c7fbdf005319
--- /dev/null
+++ b/arch/arm/lib/muldi3.S
@@ -0,0 +1,44 @@
+/*
+ *  linux/arch/arm/lib/muldi3.S
+ *
+ *  Author:     Nicolas Pitre
+ *  Created:    Oct 19, 2005
+ *  Copyright:  Monta Vista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+ENTRY(__muldi3)
+
+	mul	xh, yl, xh
+	mla	xh, xl, yh, xh
+	mov	ip, xl, asr #16
+	mov	yh, yl, asr #16
+	bic	xl, xl, ip, lsl #16
+	bic	yl, yl, yh, lsl #16
+	mla	xh, yh, ip, xh
+	mul	yh, xl, yh
+	mul	xl, yl, xl
+	mul	ip, yl, ip
+	adds	xl, xl, yh, lsl #16
+	adc	xh, xh, yh, lsr #16
+	adds	xl, xl, ip, lsl #16
+	adc	xh, xh, ip, lsr #16
+	mov	pc, lr
+
diff --git a/arch/arm/lib/muldi3.c b/arch/arm/lib/muldi3.c
deleted file mode 100644
index 0a3b93313f18..000000000000
--- a/arch/arm/lib/muldi3.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/* More subroutines needed by GCC output code on some machines.  */
-/* Compile this one with gcc.  */
-/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-/* As a special exception, if you link this library with other files,
-   some of which are compiled with GCC, to produce an executable,
-   this library does not by itself cause the resulting executable
-   to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.
- */
-/* support functions required by the kernel. based on code from gcc-2.95.3 */
-/* I Molton     29/07/01 */
-
-#include "gcclib.h"
-
-#define umul_ppmm(xh, xl, a, b) \
-{register u32 __t0, __t1, __t2;                                     \
-  __asm__ ("%@ Inlined umul_ppmm					\n\
-        mov     %2, %5, lsr #16						\n\
-        mov     %0, %6, lsr #16						\n\
-        bic     %3, %5, %2, lsl #16					\n\
-        bic     %4, %6, %0, lsl #16					\n\
-        mul     %1, %3, %4						\n\
-        mul     %4, %2, %4						\n\
-        mul     %3, %0, %3						\n\
-        mul     %0, %2, %0						\n\
-        adds    %3, %4, %3						\n\
-        addcs   %0, %0, #65536						\n\
-        adds    %1, %1, %3, lsl #16					\n\
-        adc     %0, %0, %3, lsr #16"                                    \
-           : "=&r" ((u32) (xh)),                                    \
-             "=r" ((u32) (xl)),                                     \
-             "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
-           : "r" ((u32) (a)),                                       \
-             "r" ((u32) (b)));}
-
-#define __umulsidi3(u, v) \
-  ({DIunion __w;                                                        \
-    umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
-    __w.ll; })
-
-s64 __muldi3(s64 u, s64 v)
-{
-	DIunion w;
-	DIunion uu, vv;
-
-	uu.ll = u, vv.ll = v;
-
-	w.ll = __umulsidi3(uu.s.low, vv.s.low);
-	w.s.high += ((u32) uu.s.low * (u32) vv.s.high
-		     + (u32) uu.s.high * (u32) vv.s.low);
-
-	return w.ll;
-}
diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S
new file mode 100644
index 000000000000..ff6ece487ffc
--- /dev/null
+++ b/arch/arm/lib/sha1.S
@@ -0,0 +1,206 @@
+/*
+ *  linux/arch/arm/lib/sha1.S
+ *
+ *  SHA transform optimized for ARM
+ *
+ *  Copyright:	(C) 2005 by Nicolas Pitre <nico@cam.org>
+ *  Created:	September 17, 2005
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  The reference implementation for this code is linux/lib/sha1.c
+ */
+
+#include <linux/linkage.h>
+
+	.text
+
+
+/*
+ * void sha_transform(__u32 *digest, const char *in, __u32 *W)
+ *
+ * Note: the "in" ptr may be unaligned.
+ */
+
+ENTRY(sha_transform)
+
+	stmfd	sp!, {r4 - r8, lr}
+
+	@ for (i = 0; i < 16; i++)
+	@         W[i] = be32_to_cpu(in[i]); */
+
+#ifdef __ARMEB__
+	mov	r4, r0
+	mov	r0, r2
+	mov	r2, #64
+	bl	memcpy
+	mov	r2, r0
+	mov	r0, r4
+#else
+	mov	r3, r2
+	mov	lr, #16
+1:	ldrb	r4, [r1], #1
+	ldrb	r5, [r1], #1
+	ldrb	r6, [r1], #1
+	ldrb	r7, [r1], #1
+	subs	lr, lr, #1
+	orr	r5, r5, r4, lsl #8
+	orr	r6, r6, r5, lsl #8
+	orr	r7, r7, r6, lsl #8
+	str	r7, [r3], #4
+	bne	1b
+#endif
+
+	@ for (i = 0; i < 64; i++)
+	@         W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31);
+
+	sub	r3, r2, #4
+	mov	lr, #64
+2:	ldr	r4, [r3, #4]!
+	subs	lr, lr, #1
+	ldr	r5, [r3, #8]
+	ldr	r6, [r3, #32]
+	ldr	r7, [r3, #52]
+	eor	r4, r4, r5
+	eor	r4, r4, r6
+	eor	r4, r4, r7
+	mov	r4, r4, ror #31
+	str	r4, [r3, #64]
+	bne	2b
+
+	/*
+	 * The SHA functions are:
+	 *
+	 * f1(B,C,D) = (D ^ (B & (C ^ D)))
+	 * f2(B,C,D) = (B ^ C ^ D)
+	 * f3(B,C,D) = ((B & C) | (D & (B | C)))
+	 *
+	 * Then the sub-blocks are processed as follows:
+	 *
+	 * A' = ror(A, 27) + f(B,C,D) + E + K + *W++
+	 * B' = A
+	 * C' = ror(B, 2)
+	 * D' = C
+	 * E' = D
+	 *
+	 * We therefore unroll each loop 5 times to avoid register shuffling.
+	 * Also the ror for C (and also D and E which are successivelyderived
+	 * from it) is applied in place to cut on an additional mov insn for
+	 * each round.
+	 */
+
+	.macro	sha_f1, A, B, C, D, E
+	ldr	r3, [r2], #4
+	eor	ip, \C, \D
+	add	\E, r1, \E, ror #2
+	and	ip, \B, ip, ror #2
+	add	\E, \E, \A, ror #27
+	eor	ip, ip, \D, ror #2
+	add	\E, \E, r3
+	add	\E, \E, ip
+	.endm
+
+	.macro	sha_f2, A, B, C, D, E
+	ldr	r3, [r2], #4
+	add	\E, r1, \E, ror #2
+	eor	ip, \B, \C, ror #2
+	add	\E, \E, \A, ror #27
+	eor	ip, ip, \D, ror #2
+	add	\E, \E, r3
+	add	\E, \E, ip
+	.endm
+
+	.macro	sha_f3, A, B, C, D, E
+	ldr	r3, [r2], #4
+	add	\E, r1, \E, ror #2
+	orr	ip, \B, \C, ror #2
+	add	\E, \E, \A, ror #27
+	and	ip, ip, \D, ror #2
+	add	\E, \E, r3
+	and	r3, \B, \C, ror #2
+	orr	ip, ip, r3
+	add	\E, \E, ip
+	.endm
+
+	ldmia	r0, {r4 - r8}
+
+	mov	lr, #4
+	ldr	r1, .L_sha_K + 0
+
+	/* adjust initial values */
+	mov	r6, r6, ror #30
+	mov	r7, r7, ror #30
+	mov	r8, r8, ror #30
+
+3:	subs	lr, lr, #1
+	sha_f1	r4, r5, r6, r7, r8
+	sha_f1	r8, r4, r5, r6, r7
+	sha_f1	r7, r8, r4, r5, r6
+	sha_f1	r6, r7, r8, r4, r5
+	sha_f1	r5, r6, r7, r8, r4
+	bne	3b
+
+	ldr	r1, .L_sha_K + 4
+	mov	lr, #4
+
+4:	subs	lr, lr, #1
+	sha_f2	r4, r5, r6, r7, r8
+	sha_f2	r8, r4, r5, r6, r7
+	sha_f2	r7, r8, r4, r5, r6
+	sha_f2	r6, r7, r8, r4, r5
+	sha_f2	r5, r6, r7, r8, r4
+	bne	4b
+
+	ldr	r1, .L_sha_K + 8
+	mov	lr, #4
+
+5:	subs	lr, lr, #1
+	sha_f3	r4, r5, r6, r7, r8
+	sha_f3	r8, r4, r5, r6, r7
+	sha_f3	r7, r8, r4, r5, r6
+	sha_f3	r6, r7, r8, r4, r5
+	sha_f3	r5, r6, r7, r8, r4
+	bne	5b
+
+	ldr	r1, .L_sha_K + 12
+	mov	lr, #4
+
+6:	subs	lr, lr, #1
+	sha_f2	r4, r5, r6, r7, r8
+	sha_f2	r8, r4, r5, r6, r7
+	sha_f2	r7, r8, r4, r5, r6
+	sha_f2	r6, r7, r8, r4, r5
+	sha_f2	r5, r6, r7, r8, r4
+	bne	6b
+
+	ldmia	r0, {r1, r2, r3, ip, lr}
+	add	r4, r1, r4
+	add	r5, r2, r5
+	add	r6, r3, r6, ror #2
+	add	r7, ip, r7, ror #2
+	add	r8, lr, r8, ror #2
+	stmia	r0, {r4 - r8}
+
+	ldmfd	sp!, {r4 - r8, pc}
+
+.L_sha_K:
+	.word	0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+
+
+/*
+ * void sha_init(__u32 *buf)
+ */
+
+.L_sha_initial_digest:
+	.word	0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
+
+ENTRY(sha_init)
+
+	str	lr, [sp, #-4]!
+	adr	r1, .L_sha_initial_digest
+	ldmia	r1, {r1, r2, r3, ip, lr}
+	stmia	r0, {r1, r2, r3, ip, lr}
+	ldr	pc, [sp], #4
+
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index d3ed0636c008..6f1b5b49fe4c 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -43,8 +43,6 @@ ENTRY(__arch_copy_to_user)
 		stmfd	sp!, {r2, r4 - r7, lr}
 		cmp	r2, #4
 		blt	.c2u_not_enough
-	PLD(	pld	[r1, #0]		)
-	PLD(	pld	[r0, #0]		)
 		ands	ip, r0, #3
 		bne	.c2u_dest_not_aligned
 .c2u_dest_aligned:
@@ -73,25 +71,13 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		sub	r2, r2, ip
 		subs	ip, ip, #32
 		blt	.c2u_0rem8lp
-	PLD(	pld	[r1, #28]		)
-	PLD(	pld	[r0, #28]		)
-	PLD(	subs	ip, ip, #64			)
-	PLD(	blt	.c2u_0cpynopld		)
-	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r0, #60]		)
-
-.c2u_0cpy8lp:
-	PLD(	pld	[r1, #92]		)
-	PLD(	pld	[r0, #92]		)
-.c2u_0cpynopld:	ldmia	r1!, {r3 - r6}
+
+.c2u_0cpy8lp:	ldmia	r1!, {r3 - r6}
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		ldmia	r1!, {r3 - r6}
 		subs	ip, ip, #32
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.c2u_0cpy8lp
-	PLD(	cmn	ip, #64			)
-	PLD(	bge	.c2u_0cpynopld		)
-	PLD(	add	ip, ip, #64		)
 
 .c2u_0rem8lp:	cmn	ip, #16
 		ldmgeia	r1!, {r3 - r6}
@@ -143,17 +129,8 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		sub	r2, r2, ip
 		subs	ip, ip, #16
 		blt	.c2u_1rem8lp
-	PLD(	pld	[r1, #12]		)
-	PLD(	pld	[r0, #12]		)
-	PLD(	subs	ip, ip, #32		)
-	PLD(	blt	.c2u_1cpynopld		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	pld	[r0, #28]		)
-
-.c2u_1cpy8lp:
-	PLD(	pld	[r1, #44]		)
-	PLD(	pld	[r0, #44]		)
-.c2u_1cpynopld:	mov	r3, r7, pull #8
+
+.c2u_1cpy8lp:	mov	r3, r7, pull #8
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
 		orr	r3, r3, r4, push #24
@@ -165,9 +142,6 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		orr	r6, r6, r7, push #24
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.c2u_1cpy8lp
-	PLD(	cmn	ip, #32			)
-	PLD(	bge	.c2u_1cpynopld		)
-	PLD(	add	ip, ip, #32		)
 
 .c2u_1rem8lp:	tst	ip, #8
 		movne	r3, r7, pull #8
@@ -210,17 +184,8 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		sub	r2, r2, ip
 		subs	ip, ip, #16
 		blt	.c2u_2rem8lp
-	PLD(	pld	[r1, #12]		)
-	PLD(	pld	[r0, #12]		)
-	PLD(	subs	ip, ip, #32		)
-	PLD(	blt	.c2u_2cpynopld		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	pld	[r0, #28]		)
-
-.c2u_2cpy8lp:
-	PLD(	pld	[r1, #44]		)
-	PLD(	pld	[r0, #44]		)
-.c2u_2cpynopld:	mov	r3, r7, pull #16
+
+.c2u_2cpy8lp:	mov	r3, r7, pull #16
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
 		orr	r3, r3, r4, push #16
@@ -232,9 +197,6 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		orr	r6, r6, r7, push #16
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.c2u_2cpy8lp
-	PLD(	cmn	ip, #32			)
-	PLD(	bge	.c2u_2cpynopld		)
-	PLD(	add	ip, ip, #32		)
 
 .c2u_2rem8lp:	tst	ip, #8
 		movne	r3, r7, pull #16
@@ -277,17 +239,8 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		sub	r2, r2, ip
 		subs	ip, ip, #16
 		blt	.c2u_3rem8lp
-	PLD(	pld	[r1, #12]		)
-	PLD(	pld	[r0, #12]		)
-	PLD(	subs	ip, ip, #32		)
-	PLD(	blt	.c2u_3cpynopld		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	pld	[r0, #28]		)
-
-.c2u_3cpy8lp:
-	PLD(	pld	[r1, #44]		)
-	PLD(	pld	[r0, #44]		)
-.c2u_3cpynopld:	mov	r3, r7, pull #24
+
+.c2u_3cpy8lp:	mov	r3, r7, pull #24
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
 		orr	r3, r3, r4, push #8
@@ -299,9 +252,6 @@ USER(		strt	r3, [r0], #4)			@ May fault
 		orr	r6, r6, r7, push #8
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.c2u_3cpy8lp
-	PLD(	cmn	ip, #32			)
-	PLD(	bge	.c2u_3cpynopld		)
-	PLD(	add	ip, ip, #32		)
 
 .c2u_3rem8lp:	tst	ip, #8
 		movne	r3, r7, pull #24
@@ -356,8 +306,6 @@ ENTRY(__arch_copy_from_user)
 		stmfd	sp!, {r0, r2, r4 - r7, lr}
 		cmp	r2, #4
 		blt	.cfu_not_enough
-	PLD(	pld	[r1, #0]		)
-	PLD(	pld	[r0, #0]		)
 		ands	ip, r0, #3
 		bne	.cfu_dest_not_aligned
 .cfu_dest_aligned:
@@ -385,25 +333,13 @@ USER(		ldrt	r3, [r1], #4)
 		sub	r2, r2, ip
 		subs	ip, ip, #32
 		blt	.cfu_0rem8lp
-	PLD(	pld	[r1, #28]		)
-	PLD(	pld	[r0, #28]		)
-	PLD(	subs	ip, ip, #64			)
-	PLD(	blt	.cfu_0cpynopld		)
-	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r0, #60]		)
-
-.cfu_0cpy8lp:
-	PLD(	pld	[r1, #92]		)
-	PLD(	pld	[r0, #92]		)
-.cfu_0cpynopld:	ldmia	r1!, {r3 - r6}			@ Shouldnt fault
+
+.cfu_0cpy8lp:	ldmia	r1!, {r3 - r6}			@ Shouldnt fault
 		stmia	r0!, {r3 - r6}
 		ldmia	r1!, {r3 - r6}			@ Shouldnt fault
 		subs	ip, ip, #32
 		stmia	r0!, {r3 - r6}
 		bpl	.cfu_0cpy8lp
-	PLD(	cmn	ip, #64			)
-	PLD(	bge	.cfu_0cpynopld		)
-	PLD(	add	ip, ip, #64		)
 
 .cfu_0rem8lp:	cmn	ip, #16
 		ldmgeia	r1!, {r3 - r6}			@ Shouldnt fault
@@ -456,17 +392,8 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		sub	r2, r2, ip
 		subs	ip, ip, #16
 		blt	.cfu_1rem8lp
-	PLD(	pld	[r1, #12]		)
-	PLD(	pld	[r0, #12]		)
-	PLD(	subs	ip, ip, #32		)
-	PLD(	blt	.cfu_1cpynopld		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	pld	[r0, #28]		)
-
-.cfu_1cpy8lp:
-	PLD(	pld	[r1, #44]		)
-	PLD(	pld	[r0, #44]		)
-.cfu_1cpynopld:	mov	r3, r7, pull #8
+
+.cfu_1cpy8lp:	mov	r3, r7, pull #8
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
 		orr	r3, r3, r4, push #24
@@ -478,9 +405,6 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		orr	r6, r6, r7, push #24
 		stmia	r0!, {r3 - r6}
 		bpl	.cfu_1cpy8lp
-	PLD(	cmn	ip, #32			)
-	PLD(	bge	.cfu_1cpynopld		)
-	PLD(	add	ip, ip, #32		)
 
 .cfu_1rem8lp:	tst	ip, #8
 		movne	r3, r7, pull #8
@@ -523,17 +447,8 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		sub	r2, r2, ip
 		subs	ip, ip, #16
 		blt	.cfu_2rem8lp
-	PLD(	pld	[r1, #12]		)
-	PLD(	pld	[r0, #12]		)
-	PLD(	subs	ip, ip, #32		)
-	PLD(	blt	.cfu_2cpynopld		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	pld	[r0, #28]		)
-
-.cfu_2cpy8lp:
-	PLD(	pld	[r1, #44]		)
-	PLD(	pld	[r0, #44]		)
-.cfu_2cpynopld:	mov	r3, r7, pull #16
+
+.cfu_2cpy8lp:	mov	r3, r7, pull #16
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
 		orr	r3, r3, r4, push #16
@@ -545,9 +460,6 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		orr	r6, r6, r7, push #16
 		stmia	r0!, {r3 - r6}
 		bpl	.cfu_2cpy8lp
-	PLD(	cmn	ip, #32			)
-	PLD(	bge	.cfu_2cpynopld		)
-	PLD(	add	ip, ip, #32		)
 
 .cfu_2rem8lp:	tst	ip, #8
 		movne	r3, r7, pull #16
@@ -590,17 +502,8 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		sub	r2, r2, ip
 		subs	ip, ip, #16
 		blt	.cfu_3rem8lp
-	PLD(	pld	[r1, #12]		)
-	PLD(	pld	[r0, #12]		)
-	PLD(	subs	ip, ip, #32		)
-	PLD(	blt	.cfu_3cpynopld		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	pld	[r0, #28]		)
-
-.cfu_3cpy8lp:
-	PLD(	pld	[r1, #44]		)
-	PLD(	pld	[r0, #44]		)
-.cfu_3cpynopld:	mov	r3, r7, pull #24
+
+.cfu_3cpy8lp:	mov	r3, r7, pull #24
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		orr	r3, r3, r4, push #8
 		mov	r4, r4, pull #24
@@ -612,9 +515,6 @@ USER(		ldrt	r7, [r1], #4)			@ May fault
 		stmia	r0!, {r3 - r6}
 		subs	ip, ip, #16
 		bpl	.cfu_3cpy8lp
-	PLD(	cmn	ip, #32			)
-	PLD(	bge	.cfu_3cpynopld		)
-	PLD(	add	ip, ip, #32		)
 
 .cfu_3rem8lp:	tst	ip, #8
 		movne	r3, r7, pull #24
@@ -657,41 +557,3 @@ USER(		ldrgtbt	r3, [r1], #1)			@ May fault
 		LOADREGS(fd,sp!, {r4 - r7, pc})
 		.previous
 
-/* Prototype: int __arch_clear_user(void *addr, size_t sz)
- * Purpose  : clear some user memory
- * Params   : addr - user memory address to clear
- *          : sz   - number of bytes to clear
- * Returns  : number of bytes NOT cleared
- */
-ENTRY(__arch_clear_user)
-		stmfd	sp!, {r1, lr}
-		mov	r2, #0
-		cmp	r1, #4
-		blt	2f
-		ands	ip, r0, #3
-		beq	1f
-		cmp	ip, #2
-USER(		strbt	r2, [r0], #1)
-USER(		strlebt	r2, [r0], #1)
-USER(		strltbt	r2, [r0], #1)
-		rsb	ip, ip, #4
-		sub	r1, r1, ip		@  7  6  5  4  3  2  1
-1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
-USER(		strplt	r2, [r0], #4)
-USER(		strplt	r2, [r0], #4)
-		bpl	1b
-		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
-USER(		strplt	r2, [r0], #4)
-2:		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
-USER(		strnebt	r2, [r0], #1)
-USER(		strnebt	r2, [r0], #1)
-		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
-USER(		strnebt	r2, [r0], #1)
-		mov	r0, #0
-		LOADREGS(fd,sp!, {r1, pc})
-
-		.section .fixup,"ax"
-		.align	0
-9001:		LOADREGS(fd,sp!, {r0, pc})
-		.previous
-
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
new file mode 100644
index 000000000000..112630f93e5d
--- /dev/null
+++ b/arch/arm/lib/ucmpdi2.S
@@ -0,0 +1,35 @@
+/*
+ *  linux/arch/arm/lib/ucmpdi2.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Oct 19, 2005
+ *  Copyright:	Monta Vista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+ENTRY(__ucmpdi2)
+
+	cmp	xh, yh
+	cmpeq	xl, yl
+	movlo	r0, #0
+	moveq	r0, #1
+	movhi	r0, #2
+	mov	pc, lr
+
diff --git a/arch/arm/lib/ucmpdi2.c b/arch/arm/lib/ucmpdi2.c
deleted file mode 100644
index 57f3f2df3850..000000000000
--- a/arch/arm/lib/ucmpdi2.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* More subroutines needed by GCC output code on some machines.  */
-/* Compile this one with gcc.  */
-/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-/* As a special exception, if you link this library with other files,
-   some of which are compiled with GCC, to produce an executable,
-   this library does not by itself cause the resulting executable
-   to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.
- */
-/* support functions required by the kernel. based on code from gcc-2.95.3 */
-/* I Molton     29/07/01 */
-
-#include "gcclib.h"
-
-int __ucmpdi2(s64 a, s64 b)
-{
-	DIunion au, bu;
-
-	au.ll = a, bu.ll = b;
-
-	if ((u32) au.s.high < (u32) bu.s.high)
-		return 0;
-	else if ((u32) au.s.high > (u32) bu.s.high)
-		return 2;
-	if ((u32) au.s.low < (u32) bu.s.low)
-		return 0;
-	else if ((u32) au.s.low > (u32) bu.s.low)
-		return 2;
-	return 1;
-}
author	Jeff Garzik <jgarzik@pobox.com>	2005-11-05 15:39:24 -0500
committer	Jeff Garzik <jgarzik@pobox.com>	2005-11-05 15:39:24 -0500
commit	7211bb9b64f17b23834d91fc3d0c1d78671ee9a8 (patch)
tree	541909f86c31fee97cd70d28ec2fe5c23e1ceb02 /arch/arm/lib
parent	f1e691a24955ea987f021f378324fb5003b1b208 (diff)
parent	70d9d825e0a5a78ec1dacaaaf5c72ff5b0206fab (diff)