summaryrefslogtreecommitdiff
path: root/src/arch/arm/libgcc/lib1funcs.S
diff options
context:
space:
mode:
authorGabe Black <gabeblack@google.com>2013-10-08 18:24:10 -0700
committerMarc Jones <marc.jones@se-eng.com>2014-11-05 01:49:06 +0100
commit169c0df6b8f07268e3bc49f35520df692705f5d8 (patch)
treef9cef8cf389db035e6ea54baf150fdcdfe7dd819 /src/arch/arm/libgcc/lib1funcs.S
parent44bc11c3ebf9f6e9227b6c79c866c898f068ca5c (diff)
downloadcoreboot-169c0df6b8f07268e3bc49f35520df692705f5d8.tar.xz
ARM: Use local versions of libgcc functions instead of linking against libgcc.
The flags used to compile libgcc may make it incompatible with the code it's linked against, and/or the hardware it's going to run on. Rather than try to tease the right libgcc from the compiler, lets just leave it out and use our own implementations of the necessary functions. Most of these implementations were taken from the Linux kernel, except for uldivmod.S which was taken from a CL originally written for U-Boot by Che-Liang Chiou in December of 2010. It was modified to not use the CLZ instruction on machines that don't have it, anything earlier than ARMv5. The top block was taken from an earlier version of the same CL which didn't use CLZ in that spot. The later block was written from scratch. BUG=None TEST=Built and booted into the bootblock on nyan. Ran a series of tests which divided and modded a 64 bit value by various 32 bit values which were powers of 2. Confirmed that this function was used and that the returned value was correct. Printed decimal and hex versions of some values and verified that they equaled each other. Built and booted on pit with serial enabled. BRANCH=None Original-Change-Id: I7527e28af411b7aa7f94579be95a6b352a91a224 Original-Signed-off-by: Gabe Black <gabeblack@google.com> Original-Reviewed-on: https://chromium-review.googlesource.com/172401 Original-Reviewed-by: David Hendricks <dhendrix@chromium.org> Original-Commit-Queue: Gabe Black <gabeblack@chromium.org> Original-Tested-by: Gabe Black <gabeblack@chromium.org> (cherry picked from commit be8c7a8f3292a7d7651b7c6dafc9a2c53afbd402) *** This second patch is cherry-picked and squashed again to *** pick up the libgcc changes that were skipped previously. arm: Move libgcc assembly macros to arch/asm.h libgcc/macros.h contains some useful assembly macros that are common in Linux kernel code and facilitate things such as unified ARM/THUMB assembly. This patch moves it to a more general place where it can be used by other code as well. BUG=None TEST=Snow still boots. Original-Change-Id: If68e8930aaafa706c54cf9a156fac826b31bb193 Original-Signed-off-by: Julius Werner <jwerner@chromium.org> Original-Reviewed-on: https://chromium-review.googlesource.com/182178 Original-Reviewed-by: Vincent Palatin <vpalatin@chromium.org> (cherry picked from commit a780670def94a969829811fa8cf257f12b88f085) *** Additional changes for stage specific builds Signed-off-by: Marc Jones <marc.jones@se-eng.com> Change-Id: Ie3e48f34ebf6fbe20c3dd76ecbcbea7844e9466e Reviewed-on: http://review.coreboot.org/7322 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
Diffstat (limited to 'src/arch/arm/libgcc/lib1funcs.S')
-rw-r--r--src/arch/arm/libgcc/lib1funcs.S344
1 files changed, 344 insertions, 0 deletions
diff --git a/src/arch/arm/libgcc/lib1funcs.S b/src/arch/arm/libgcc/lib1funcs.S
new file mode 100644
index 0000000000..533c369d6e
--- /dev/null
+++ b/src/arch/arm/libgcc/lib1funcs.S
@@ -0,0 +1,344 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico@fluxnic.net>
+ * - contributed to gcc-3.4 on Sep 30, 2003
+ * - adapted for the Linux kernel on Oct 2, 2003
+ */
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+
+#if defined __GNUC__
+
+#include <arch/asm.h>
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __COREBOOT_ARM_ARCH__ >= 5
+
+ clz \curbit, \divisor
+ clz \result, \dividend
+ sub \result, \curbit, \result
+ mov \curbit, #1
+ mov \divisor, \divisor, lsl \result
+ mov \curbit, \curbit, lsl \result
+ mov \result, #0
+
+#else
+
+ @ Initially shift the divisor left 3 bits if possible,
+ @ set curbit accordingly. This allows for curbit to be located
+ @ at the left end of each 4 bit nibbles in the division loop
+ @ to save one loop in most cases.
+ tst \divisor, #0xe0000000
+ moveq \divisor, \divisor, lsl #3
+ moveq \curbit, #8
+ movne \curbit, #1
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ movlo \curbit, \curbit, lsl #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ movlo \curbit, \curbit, lsl #1
+ blo 1b
+
+ mov \result, #0
+
+#endif
+
+ @ Division loop
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ orrhs \result, \result, \curbit
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ orrhs \result, \result, \curbit, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ orrhs \result, \result, \curbit, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ orrhs \result, \result, \curbit, lsr #3
+ cmp \dividend, #0 @ Early termination?
+ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
+ movne \divisor, \divisor, lsr #4
+ bne 1b
+
+.endm
+
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __COREBOOT_ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ rsb \order, \order, #31
+
+#else
+
+ cmp \divisor, #(1 << 16)
+ movhs \divisor, \divisor, lsr #16
+ movhs \order, #16
+ movlo \order, #0
+
+ cmp \divisor, #(1 << 8)
+ movhs \divisor, \divisor, lsr #8
+ addhs \order, \order, #8
+
+ cmp \divisor, #(1 << 4)
+ movhs \divisor, \divisor, lsr #4
+ addhs \order, \order, #4
+
+ cmp \divisor, #(1 << 2)
+ addhi \order, \order, #3
+ addls \order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __COREBOOT_ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ clz \spare, \dividend
+ sub \order, \order, \spare
+ mov \divisor, \divisor, lsl \order
+
+#else
+
+ mov \order, #0
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ addlo \order, \order, #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ addlo \order, \order, #1
+ blo 1b
+
+#endif
+
+ @ Perform all needed substractions to keep only the reminder.
+ @ Do comparisons in batch of 4 first.
+ subs \order, \order, #3 @ yes, 3 is intended here
+ blt 2f
+
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ cmp \dividend, #1
+ mov \divisor, \divisor, lsr #4
+ subges \order, \order, #4
+ bge 1b
+
+ tst \order, #3
+ teqne \dividend, #0
+ beq 5f
+
+ @ Either 1, 2 or 3 comparison/substractions are left.
+2: cmn \order, #2
+ blt 4f
+ beq 3f
+ cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+3: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+4: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+5:
+.endm
+
+
+ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+
+ subs r2, r1, #1
+ moveq pc, lr
+ bcc Ldiv0
+ cmp r0, r1
+ bls 11f
+ tst r1, r2
+ beq 12f
+
+ ARM_DIV_BODY r0, r1, r2, r3
+
+ mov r0, r2
+ mov pc, lr
+
+11: moveq r0, #1
+ movne r0, #0
+ mov pc, lr
+
+12: ARM_DIV2_ORDER r1, r2
+
+ mov r0, r0, lsr r2
+ mov pc, lr
+
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
+
+ENTRY(__umodsi3)
+
+ subs r2, r1, #1 @ compare divisor with 1
+ bcc Ldiv0
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ movls pc, lr
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+ mov pc, lr
+
+ENDPROC(__umodsi3)
+
+ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+
+ cmp r1, #0
+ eor ip, r0, r1 @ save the sign of the result.
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ subs r2, r1, #1 @ division by 1 or -1 ?
+ beq 10f
+ movs r3, r0
+ rsbmi r3, r0, #0 @ positive dividend value
+ cmp r3, r1
+ bls 11f
+ tst r1, r2 @ divisor is power of 2 ?
+ beq 12f
+
+ ARM_DIV_BODY r3, r1, r0, r2
+
+ cmp ip, #0
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+10: teq ip, r0 @ same sign ?
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+11: movlo r0, #0
+ moveq r0, ip, asr #31
+ orreq r0, r0, #1
+ mov pc, lr
+
+12: ARM_DIV2_ORDER r1, r2
+
+ cmp ip, #0
+ mov r0, r3, lsr r2
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
+
+ENTRY(__modsi3)
+
+ cmp r1, #0
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ movs ip, r0 @ preserve sign of dividend
+ rsbmi r0, r0, #0 @ if negative make positive
+ subs r2, r1, #1 @ compare divisor with 1
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ bls 10f
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+10: cmp ip, #0
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+ENDPROC(__modsi3)
+
+ENTRY(__aeabi_uidivmod)
+
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_uidiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
+
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_idiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
+
+ENDPROC(__aeabi_idivmod)
+
+
+Ldiv0:
+ str lr, [sp, #-8]!
+ bl __div0
+ mov r0, #0 @ About as wrong as it could be.
+ ldr pc, [sp], #8
+ENDPROC(Ldiv0)
+
+#endif