92b456dacc5959a0e92c0cd7106b333d7b4fd970
[openwrt.git] / target / linux / omap24xx / patches-2.6.37 / 100-optimized-arm-div.patch
1 ---
2 arch/arm/boot/compressed/lib1funcs.S | 348 +++++++++++++++++++++++++++++++++++
3 1 file changed, 348 insertions(+)
4
5 --- /dev/null
6 +++ linux-2.6.35/arch/arm/boot/compressed/lib1funcs.S
7 @@ -0,0 +1,348 @@
8 +/*
9 + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
10 + *
11 + * Author: Nicolas Pitre <nico@fluxnic.net>
12 + * - contributed to gcc-3.4 on Sep 30, 2003
13 + * - adapted for the Linux kernel on Oct 2, 2003
14 + */
15 +
16 +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
17 +
18 +This file is free software; you can redistribute it and/or modify it
19 +under the terms of the GNU General Public License as published by the
20 +Free Software Foundation; either version 2, or (at your option) any
21 +later version.
22 +
23 +In addition to the permissions in the GNU General Public License, the
24 +Free Software Foundation gives you unlimited permission to link the
25 +compiled version of this file into combinations with other programs,
26 +and to distribute those combinations without any restriction coming
27 +from the use of this file. (The General Public License restrictions
28 +do apply in other respects; for example, they cover modification of
29 +the file, and distribution when not linked into a combine
30 +executable.)
31 +
32 +This file is distributed in the hope that it will be useful, but
33 +WITHOUT ANY WARRANTY; without even the implied warranty of
34 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 +General Public License for more details.
36 +
37 +You should have received a copy of the GNU General Public License
38 +along with this program; see the file COPYING. If not, write to
39 +the Free Software Foundation, 59 Temple Place - Suite 330,
40 +Boston, MA 02111-1307, USA. */
41 +
42 +
43 +#include <linux/linkage.h>
44 +#include <asm/assembler.h>
45 +
46 +
47 +.macro ARM_DIV_BODY dividend, divisor, result, curbit
48 +
49 +#if __LINUX_ARM_ARCH__ >= 5
50 +
51 + clz \curbit, \divisor
52 + clz \result, \dividend
53 + sub \result, \curbit, \result
54 + mov \curbit, #1
55 + mov \divisor, \divisor, lsl \result
56 + mov \curbit, \curbit, lsl \result
57 + mov \result, #0
58 +
59 +#else
60 +
61 + @ Initially shift the divisor left 3 bits if possible,
62 + @ set curbit accordingly. This allows for curbit to be located
63 + @ at the left end of each 4 bit nibbles in the division loop
64 + @ to save one loop in most cases.
65 + tst \divisor, #0xe0000000
66 + moveq \divisor, \divisor, lsl #3
67 + moveq \curbit, #8
68 + movne \curbit, #1
69 +
70 + @ Unless the divisor is very big, shift it up in multiples of
71 + @ four bits, since this is the amount of unwinding in the main
72 + @ division loop. Continue shifting until the divisor is
73 + @ larger than the dividend.
74 +1: cmp \divisor, #0x10000000
75 + cmplo \divisor, \dividend
76 + movlo \divisor, \divisor, lsl #4
77 + movlo \curbit, \curbit, lsl #4
78 + blo 1b
79 +
80 + @ For very big divisors, we must shift it a bit at a time, or
81 + @ we will be in danger of overflowing.
82 +1: cmp \divisor, #0x80000000
83 + cmplo \divisor, \dividend
84 + movlo \divisor, \divisor, lsl #1
85 + movlo \curbit, \curbit, lsl #1
86 + blo 1b
87 +
88 + mov \result, #0
89 +
90 +#endif
91 +
92 + @ Division loop
93 +1: cmp \dividend, \divisor
94 + subhs \dividend, \dividend, \divisor
95 + orrhs \result, \result, \curbit
96 + cmp \dividend, \divisor, lsr #1
97 + subhs \dividend, \dividend, \divisor, lsr #1
98 + orrhs \result, \result, \curbit, lsr #1
99 + cmp \dividend, \divisor, lsr #2
100 + subhs \dividend, \dividend, \divisor, lsr #2
101 + orrhs \result, \result, \curbit, lsr #2
102 + cmp \dividend, \divisor, lsr #3
103 + subhs \dividend, \dividend, \divisor, lsr #3
104 + orrhs \result, \result, \curbit, lsr #3
105 + cmp \dividend, #0 @ Early termination?
106 + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
107 + movne \divisor, \divisor, lsr #4
108 + bne 1b
109 +
110 +.endm
111 +
112 +
113 +.macro ARM_DIV2_ORDER divisor, order
114 +
115 +#if __LINUX_ARM_ARCH__ >= 5
116 +
117 + clz \order, \divisor
118 + rsb \order, \order, #31
119 +
120 +#else
121 +
122 + cmp \divisor, #(1 << 16)
123 + movhs \divisor, \divisor, lsr #16
124 + movhs \order, #16
125 + movlo \order, #0
126 +
127 + cmp \divisor, #(1 << 8)
128 + movhs \divisor, \divisor, lsr #8
129 + addhs \order, \order, #8
130 +
131 + cmp \divisor, #(1 << 4)
132 + movhs \divisor, \divisor, lsr #4
133 + addhs \order, \order, #4
134 +
135 + cmp \divisor, #(1 << 2)
136 + addhi \order, \order, #3
137 + addls \order, \order, \divisor, lsr #1
138 +
139 +#endif
140 +
141 +.endm
142 +
143 +
144 +.macro ARM_MOD_BODY dividend, divisor, order, spare
145 +
146 +#if __LINUX_ARM_ARCH__ >= 5
147 +
148 + clz \order, \divisor
149 + clz \spare, \dividend
150 + sub \order, \order, \spare
151 + mov \divisor, \divisor, lsl \order
152 +
153 +#else
154 +
155 + mov \order, #0
156 +
157 + @ Unless the divisor is very big, shift it up in multiples of
158 + @ four bits, since this is the amount of unwinding in the main
159 + @ division loop. Continue shifting until the divisor is
160 + @ larger than the dividend.
161 +1: cmp \divisor, #0x10000000
162 + cmplo \divisor, \dividend
163 + movlo \divisor, \divisor, lsl #4
164 + addlo \order, \order, #4
165 + blo 1b
166 +
167 + @ For very big divisors, we must shift it a bit at a time, or
168 + @ we will be in danger of overflowing.
169 +1: cmp \divisor, #0x80000000
170 + cmplo \divisor, \dividend
171 + movlo \divisor, \divisor, lsl #1
172 + addlo \order, \order, #1
173 + blo 1b
174 +
175 +#endif
176 +
177 + @ Perform all needed substractions to keep only the reminder.
178 + @ Do comparisons in batch of 4 first.
179 + subs \order, \order, #3 @ yes, 3 is intended here
180 + blt 2f
181 +
182 +1: cmp \dividend, \divisor
183 + subhs \dividend, \dividend, \divisor
184 + cmp \dividend, \divisor, lsr #1
185 + subhs \dividend, \dividend, \divisor, lsr #1
186 + cmp \dividend, \divisor, lsr #2
187 + subhs \dividend, \dividend, \divisor, lsr #2
188 + cmp \dividend, \divisor, lsr #3
189 + subhs \dividend, \dividend, \divisor, lsr #3
190 + cmp \dividend, #1
191 + mov \divisor, \divisor, lsr #4
192 + subges \order, \order, #4
193 + bge 1b
194 +
195 + tst \order, #3
196 + teqne \dividend, #0
197 + beq 5f
198 +
199 + @ Either 1, 2 or 3 comparison/substractions are left.
200 +2: cmn \order, #2
201 + blt 4f
202 + beq 3f
203 + cmp \dividend, \divisor
204 + subhs \dividend, \dividend, \divisor
205 + mov \divisor, \divisor, lsr #1
206 +3: cmp \dividend, \divisor
207 + subhs \dividend, \dividend, \divisor
208 + mov \divisor, \divisor, lsr #1
209 +4: cmp \dividend, \divisor
210 + subhs \dividend, \dividend, \divisor
211 +5:
212 +.endm
213 +
214 +
215 +ENTRY(__udivsi3)
216 +ENTRY(__aeabi_uidiv)
217 +
218 + subs r2, r1, #1
219 + moveq pc, lr
220 + bcc Ldiv0
221 + cmp r0, r1
222 + bls 11f
223 + tst r1, r2
224 + beq 12f
225 +
226 + ARM_DIV_BODY r0, r1, r2, r3
227 +
228 + mov r0, r2
229 + mov pc, lr
230 +
231 +11: moveq r0, #1
232 + movne r0, #0
233 + mov pc, lr
234 +
235 +12: ARM_DIV2_ORDER r1, r2
236 +
237 + mov r0, r0, lsr r2
238 + mov pc, lr
239 +
240 +ENDPROC(__udivsi3)
241 +ENDPROC(__aeabi_uidiv)
242 +
243 +ENTRY(__umodsi3)
244 +
245 + subs r2, r1, #1 @ compare divisor with 1
246 + bcc Ldiv0
247 + cmpne r0, r1 @ compare dividend with divisor
248 + moveq r0, #0
249 + tsthi r1, r2 @ see if divisor is power of 2
250 + andeq r0, r0, r2
251 + movls pc, lr
252 +
253 + ARM_MOD_BODY r0, r1, r2, r3
254 +
255 + mov pc, lr
256 +
257 +ENDPROC(__umodsi3)
258 +
259 +ENTRY(__divsi3)
260 +ENTRY(__aeabi_idiv)
261 +
262 + cmp r1, #0
263 + eor ip, r0, r1 @ save the sign of the result.
264 + beq Ldiv0
265 + rsbmi r1, r1, #0 @ loops below use unsigned.
266 + subs r2, r1, #1 @ division by 1 or -1 ?
267 + beq 10f
268 + movs r3, r0
269 + rsbmi r3, r0, #0 @ positive dividend value
270 + cmp r3, r1
271 + bls 11f
272 + tst r1, r2 @ divisor is power of 2 ?
273 + beq 12f
274 +
275 + ARM_DIV_BODY r3, r1, r0, r2
276 +
277 + cmp ip, #0
278 + rsbmi r0, r0, #0
279 + mov pc, lr
280 +
281 +10: teq ip, r0 @ same sign ?
282 + rsbmi r0, r0, #0
283 + mov pc, lr
284 +
285 +11: movlo r0, #0
286 + moveq r0, ip, asr #31
287 + orreq r0, r0, #1
288 + mov pc, lr
289 +
290 +12: ARM_DIV2_ORDER r1, r2
291 +
292 + cmp ip, #0
293 + mov r0, r3, lsr r2
294 + rsbmi r0, r0, #0
295 + mov pc, lr
296 +
297 +ENDPROC(__divsi3)
298 +ENDPROC(__aeabi_idiv)
299 +
300 +ENTRY(__modsi3)
301 +
302 + cmp r1, #0
303 + beq Ldiv0
304 + rsbmi r1, r1, #0 @ loops below use unsigned.
305 + movs ip, r0 @ preserve sign of dividend
306 + rsbmi r0, r0, #0 @ if negative make positive
307 + subs r2, r1, #1 @ compare divisor with 1
308 + cmpne r0, r1 @ compare dividend with divisor
309 + moveq r0, #0
310 + tsthi r1, r2 @ see if divisor is power of 2
311 + andeq r0, r0, r2
312 + bls 10f
313 +
314 + ARM_MOD_BODY r0, r1, r2, r3
315 +
316 +10: cmp ip, #0
317 + rsbmi r0, r0, #0
318 + mov pc, lr
319 +
320 +ENDPROC(__modsi3)
321 +
322 +#ifdef CONFIG_AEABI
323 +
324 +ENTRY(__aeabi_uidivmod)
325 +
326 + stmfd sp!, {r0, r1, ip, lr}
327 + bl __aeabi_uidiv
328 + ldmfd sp!, {r1, r2, ip, lr}
329 + mul r3, r0, r2
330 + sub r1, r1, r3
331 + mov pc, lr
332 +
333 +ENDPROC(__aeabi_uidivmod)
334 +
335 +ENTRY(__aeabi_idivmod)
336 +
337 + stmfd sp!, {r0, r1, ip, lr}
338 + bl __aeabi_idiv
339 + ldmfd sp!, {r1, r2, ip, lr}
340 + mul r3, r0, r2
341 + sub r1, r1, r3
342 + mov pc, lr
343 +
344 +ENDPROC(__aeabi_idivmod)
345 +
346 +#endif
347 +
348 +Ldiv0:
349 +
350 + str lr, [sp, #-8]!
351 + bl __div0
352 + mov r0, #0 @ About as wrong as it could be.
353 + ldr pc, [sp], #8
354 +
355 +
This page took 0.058898 seconds and 3 git commands to generate.