2 arch/arm/boot/compressed/lib1funcs.S | 348 +++++++++++++++++++++++++++++++++++
3 1 file changed, 348 insertions(+)
6 +++ linux-2.6.35/arch/arm/boot/compressed/lib1funcs.S
9 + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
11 + * Author: Nicolas Pitre <nico@fluxnic.net>
12 + * - contributed to gcc-3.4 on Sep 30, 2003
13 + * - adapted for the Linux kernel on Oct 2, 2003
16 +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
18 +This file is free software; you can redistribute it and/or modify it
19 +under the terms of the GNU General Public License as published by the
20 +Free Software Foundation; either version 2, or (at your option) any
23 +In addition to the permissions in the GNU General Public License, the
24 +Free Software Foundation gives you unlimited permission to link the
25 +compiled version of this file into combinations with other programs,
26 +and to distribute those combinations without any restriction coming
27 +from the use of this file. (The General Public License restrictions
28 +do apply in other respects; for example, they cover modification of
29 +the file, and distribution when not linked into a combine
32 +This file is distributed in the hope that it will be useful, but
33 +WITHOUT ANY WARRANTY; without even the implied warranty of
34 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 +General Public License for more details.
37 +You should have received a copy of the GNU General Public License
38 +along with this program; see the file COPYING. If not, write to
39 +the Free Software Foundation, 59 Temple Place - Suite 330,
40 +Boston, MA 02111-1307, USA. */
43 +#include <linux/linkage.h>
44 +#include <asm/assembler.h>
47 +.macro ARM_DIV_BODY dividend, divisor, result, curbit
49 +#if __LINUX_ARM_ARCH__ >= 5
51 + clz \curbit, \divisor
52 + clz \result, \dividend
53 + sub \result, \curbit, \result
55 + mov \divisor, \divisor, lsl \result
56 + mov \curbit, \curbit, lsl \result
61 + @ Initially shift the divisor left 3 bits if possible,
62 + @ set curbit accordingly. This allows for curbit to be located
63 + @ at the left end of each 4 bit nibbles in the division loop
64 + @ to save one loop in most cases.
65 + tst \divisor, #0xe0000000
66 + moveq \divisor, \divisor, lsl #3
70 + @ Unless the divisor is very big, shift it up in multiples of
71 + @ four bits, since this is the amount of unwinding in the main
72 + @ division loop. Continue shifting until the divisor is
73 + @ larger than the dividend.
74 +1: cmp \divisor, #0x10000000
75 + cmplo \divisor, \dividend
76 + movlo \divisor, \divisor, lsl #4
77 + movlo \curbit, \curbit, lsl #4
80 + @ For very big divisors, we must shift it a bit at a time, or
81 + @ we will be in danger of overflowing.
82 +1: cmp \divisor, #0x80000000
83 + cmplo \divisor, \dividend
84 + movlo \divisor, \divisor, lsl #1
85 + movlo \curbit, \curbit, lsl #1
93 +1: cmp \dividend, \divisor
94 + subhs \dividend, \dividend, \divisor
95 + orrhs \result, \result, \curbit
96 + cmp \dividend, \divisor, lsr #1
97 + subhs \dividend, \dividend, \divisor, lsr #1
98 + orrhs \result, \result, \curbit, lsr #1
99 + cmp \dividend, \divisor, lsr #2
100 + subhs \dividend, \dividend, \divisor, lsr #2
101 + orrhs \result, \result, \curbit, lsr #2
102 + cmp \dividend, \divisor, lsr #3
103 + subhs \dividend, \dividend, \divisor, lsr #3
104 + orrhs \result, \result, \curbit, lsr #3
105 + cmp \dividend, #0 @ Early termination?
106 + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
107 + movne \divisor, \divisor, lsr #4
113 +.macro ARM_DIV2_ORDER divisor, order
115 +#if __LINUX_ARM_ARCH__ >= 5
117 + clz \order, \divisor
118 + rsb \order, \order, #31
122 + cmp \divisor, #(1 << 16)
123 + movhs \divisor, \divisor, lsr #16
127 + cmp \divisor, #(1 << 8)
128 + movhs \divisor, \divisor, lsr #8
129 + addhs \order, \order, #8
131 + cmp \divisor, #(1 << 4)
132 + movhs \divisor, \divisor, lsr #4
133 + addhs \order, \order, #4
135 + cmp \divisor, #(1 << 2)
136 + addhi \order, \order, #3
137 + addls \order, \order, \divisor, lsr #1
144 +.macro ARM_MOD_BODY dividend, divisor, order, spare
146 +#if __LINUX_ARM_ARCH__ >= 5
148 + clz \order, \divisor
149 + clz \spare, \dividend
150 + sub \order, \order, \spare
151 + mov \divisor, \divisor, lsl \order
157 + @ Unless the divisor is very big, shift it up in multiples of
158 + @ four bits, since this is the amount of unwinding in the main
159 + @ division loop. Continue shifting until the divisor is
160 + @ larger than the dividend.
161 +1: cmp \divisor, #0x10000000
162 + cmplo \divisor, \dividend
163 + movlo \divisor, \divisor, lsl #4
164 + addlo \order, \order, #4
167 + @ For very big divisors, we must shift it a bit at a time, or
168 + @ we will be in danger of overflowing.
169 +1: cmp \divisor, #0x80000000
170 + cmplo \divisor, \dividend
171 + movlo \divisor, \divisor, lsl #1
172 + addlo \order, \order, #1
177 + @ Perform all needed substractions to keep only the reminder.
178 + @ Do comparisons in batch of 4 first.
179 + subs \order, \order, #3 @ yes, 3 is intended here
182 +1: cmp \dividend, \divisor
183 + subhs \dividend, \dividend, \divisor
184 + cmp \dividend, \divisor, lsr #1
185 + subhs \dividend, \dividend, \divisor, lsr #1
186 + cmp \dividend, \divisor, lsr #2
187 + subhs \dividend, \dividend, \divisor, lsr #2
188 + cmp \dividend, \divisor, lsr #3
189 + subhs \dividend, \dividend, \divisor, lsr #3
191 + mov \divisor, \divisor, lsr #4
192 + subges \order, \order, #4
196 + teqne \dividend, #0
199 + @ Either 1, 2 or 3 comparison/substractions are left.
203 + cmp \dividend, \divisor
204 + subhs \dividend, \dividend, \divisor
205 + mov \divisor, \divisor, lsr #1
206 +3: cmp \dividend, \divisor
207 + subhs \dividend, \dividend, \divisor
208 + mov \divisor, \divisor, lsr #1
209 +4: cmp \dividend, \divisor
210 + subhs \dividend, \dividend, \divisor
216 +ENTRY(__aeabi_uidiv)
226 + ARM_DIV_BODY r0, r1, r2, r3
235 +12: ARM_DIV2_ORDER r1, r2
241 +ENDPROC(__aeabi_uidiv)
245 + subs r2, r1, #1 @ compare divisor with 1
247 + cmpne r0, r1 @ compare dividend with divisor
249 + tsthi r1, r2 @ see if divisor is power of 2
253 + ARM_MOD_BODY r0, r1, r2, r3
263 + eor ip, r0, r1 @ save the sign of the result.
265 + rsbmi r1, r1, #0 @ loops below use unsigned.
266 + subs r2, r1, #1 @ division by 1 or -1 ?
269 + rsbmi r3, r0, #0 @ positive dividend value
272 + tst r1, r2 @ divisor is power of 2 ?
275 + ARM_DIV_BODY r3, r1, r0, r2
281 +10: teq ip, r0 @ same sign ?
286 + moveq r0, ip, asr #31
290 +12: ARM_DIV2_ORDER r1, r2
298 +ENDPROC(__aeabi_idiv)
304 + rsbmi r1, r1, #0 @ loops below use unsigned.
305 + movs ip, r0 @ preserve sign of dividend
306 + rsbmi r0, r0, #0 @ if negative make positive
307 + subs r2, r1, #1 @ compare divisor with 1
308 + cmpne r0, r1 @ compare dividend with divisor
310 + tsthi r1, r2 @ see if divisor is power of 2
314 + ARM_MOD_BODY r0, r1, r2, r3
324 +ENTRY(__aeabi_uidivmod)
326 + stmfd sp!, {r0, r1, ip, lr}
328 + ldmfd sp!, {r1, r2, ip, lr}
333 +ENDPROC(__aeabi_uidivmod)
335 +ENTRY(__aeabi_idivmod)
337 + stmfd sp!, {r0, r1, ip, lr}
339 + ldmfd sp!, {r1, r2, ip, lr}
344 +ENDPROC(__aeabi_idivmod)
352 + mov r0, #0 @ About as wrong as it could be.