1 From 248d9a5b63bba72bfc316b8a48c6163fce5acc22 Mon Sep 17 00:00:00 2001
2 From: Paulius Zaleckas <paulius.zaleckas@gmail.com>
3 Date: Thu, 18 Feb 2010 21:53:01 +0200
4 Subject: [PATCH] ARM: Use cache alignment from asm/cache.h
6 Make code more optimal for ARM variants with
7 different cache line size.
9 Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
11 arch/arm/boot/compressed/head.S | 11 ++++++-----
12 arch/arm/include/asm/dma-mapping.h | 2 +-
13 arch/arm/kernel/entry-armv.S | 31 ++++++++++++++++---------------
14 arch/arm/kernel/entry-common.S | 7 ++++---
15 arch/arm/kernel/head.S | 3 ++-
16 arch/arm/kernel/vmlinux.lds.S | 5 +++--
17 arch/arm/lib/copy_page.S | 2 +-
18 arch/arm/lib/memchr.S | 3 ++-
19 arch/arm/lib/memset.S | 3 ++-
20 arch/arm/lib/memzero.S | 3 ++-
21 arch/arm/lib/strchr.S | 3 ++-
22 arch/arm/lib/strncpy_from_user.S | 3 ++-
23 arch/arm/lib/strnlen_user.S | 3 ++-
24 arch/arm/lib/strrchr.S | 3 ++-
25 arch/arm/mm/abort-ev4.S | 3 ++-
26 arch/arm/mm/abort-nommu.S | 3 ++-
27 16 files changed, 51 insertions(+), 37 deletions(-)
29 --- a/arch/arm/boot/compressed/head.S
30 +++ b/arch/arm/boot/compressed/head.S
32 * published by the Free Software Foundation.
34 #include <linux/linkage.h>
35 +#include <asm/cache.h>
39 @@ -349,7 +350,7 @@ params: ldr r0, =0x10000100 @ params_p
40 * This routine must preserve:
44 + .align L1_CACHE_SHIFT
45 cache_on: mov r3, #8 @ cache_on function
48 @@ -537,7 +538,7 @@ __common_mmu_cache_on:
49 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
50 mcr p15, 0, r1, c3, c0, 0 @ load domain access control
52 - .align 5 @ cache line aligned
53 + .align L1_CACHE_SHIFT @ cache line aligned
54 1: mcr p15, 0, r0, c1, c0, 0 @ load control register
55 mrc p15, 0, r0, c1, c0, 0 @ and read it back to
56 sub pc, lr, r0, lsr #32 @ properly flush pipeline
57 @@ -556,7 +557,7 @@ __common_mmu_cache_on:
59 * r9-r12,r14 = corrupted
62 + .align L1_CACHE_SHIFT
63 reloc_start: add r9, r5, r0
64 sub r9, r9, #128 @ do not copy the stack
66 @@ -786,7 +787,7 @@ proc_types:
67 * This routine must preserve:
71 + .align L1_CACHE_SHIFT
72 cache_off: mov r3, #12 @ cache_off function
75 @@ -861,7 +862,7 @@ __armv3_mmu_cache_off:
76 * This routine must preserve:
80 + .align L1_CACHE_SHIFT
84 --- a/arch/arm/kernel/entry-armv.S
85 +++ b/arch/arm/kernel/entry-armv.S
87 #include <asm/unwind.h>
88 #include <asm/unistd.h>
90 +#include <asm/cache.h>
92 #include "entry-header.S"
94 @@ -165,7 +166,7 @@ ENDPROC(__und_invalid)
99 + .align L1_CACHE_SHIFT
103 @@ -213,7 +214,7 @@ __dabt_svc:
108 + .align L1_CACHE_SHIFT
112 @@ -257,7 +258,7 @@ svc_preempt:
117 + .align L1_CACHE_SHIFT
119 #ifdef CONFIG_KPROBES
120 @ If a kprobe is about to simulate a "stmdb sp..." instruction,
121 @@ -303,7 +304,7 @@ __und_svc:
126 + .align L1_CACHE_SHIFT
130 @@ -339,7 +340,7 @@ __pabt_svc:
135 + .align L1_CACHE_SHIFT
139 @@ -412,7 +413,7 @@ ENDPROC(__pabt_svc)
144 + .align L1_CACHE_SHIFT
148 @@ -444,7 +445,7 @@ __dabt_usr:
153 + .align L1_CACHE_SHIFT
157 @@ -473,7 +474,7 @@ ENDPROC(__irq_usr)
162 + .align L1_CACHE_SHIFT
166 @@ -689,7 +690,7 @@ __und_usr_unknown:
168 ENDPROC(__und_usr_unknown)
171 + .align L1_CACHE_SHIFT
175 @@ -803,7 +804,7 @@ ENDPROC(__switch_to)
180 + .align L1_CACHE_SHIFT
181 .globl __kuser_helper_start
182 __kuser_helper_start:
184 @@ -843,7 +844,7 @@ __kuser_memory_barrier: @ 0xffff0fa0
189 + .align L1_CACHE_SHIFT
192 * Reference prototype:
193 @@ -973,7 +974,7 @@ kuser_cmpxchg_fixup:
198 + .align L1_CACHE_SHIFT
201 * Reference prototype:
202 @@ -1051,7 +1052,7 @@ __kuser_helper_end:
203 * of which is copied into r0 for the mode specific abort handler.
205 .macro vector_stub, name, mode, correction=0
207 + .align L1_CACHE_SHIFT
211 @@ -1182,7 +1183,7 @@ __stubs_start:
212 .long __und_invalid @ e
213 .long __und_invalid @ f
216 + .align L1_CACHE_SHIFT
218 /*=============================================================================
220 @@ -1212,7 +1213,7 @@ vector_addrexcptn:
221 * We group all the following data together to optimise
222 * for CPUs with separate I & D caches.
225 + .align L1_CACHE_SHIFT
229 --- a/arch/arm/kernel/entry-common.S
230 +++ b/arch/arm/kernel/entry-common.S
233 #include <asm/unistd.h>
234 #include <asm/ftrace.h>
235 +#include <asm/cache.h>
236 #include <mach/entry-macro.S>
237 #include <asm/unwind.h>
239 #include "entry-header.S"
243 + .align L1_CACHE_SHIFT
245 * This is the fast syscall return path. We do as little as
246 * possible here, and this includes saving r0 back into the SVC
247 @@ -221,7 +222,7 @@ ftrace_stub:
248 #define A710(code...)
252 + .align L1_CACHE_SHIFT
254 sub sp, sp, #S_FRAME_SIZE
255 stmia sp, {r0 - r12} @ Calling r0 - r12
256 @@ -354,7 +355,7 @@ __sys_trace_return:
261 + .align L1_CACHE_SHIFT
262 #ifdef CONFIG_ALIGNMENT_TRAP
263 .type __cr_alignment, #object
265 --- a/arch/arm/kernel/head.S
266 +++ b/arch/arm/kernel/head.S
268 #include <asm/memory.h>
269 #include <asm/thread_info.h>
270 #include <asm/system.h>
271 +#include <asm/cache.h>
273 #if (PHYS_OFFSET & 0x001fffff)
274 #error "PHYS_OFFSET must be at an even 2MiB boundary!"
275 @@ -192,7 +193,7 @@ ENDPROC(__enable_mmu)
277 * other registers depend on the function called upon completion
280 + .align L1_CACHE_SHIFT
283 mcr p15, 0, r0, c1, c0, 0 @ write control reg
284 --- a/arch/arm/kernel/vmlinux.lds.S
285 +++ b/arch/arm/kernel/vmlinux.lds.S
287 #include <asm/thread_info.h>
288 #include <asm/memory.h>
289 #include <asm/page.h>
290 +#include <asm/cache.h>
294 --- a/arch/arm/lib/copy_page.S
295 +++ b/arch/arm/lib/copy_page.S
297 #define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 ))
301 + .align L1_CACHE_SHIFT
303 * StrongARM optimised copy_page routine
304 * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s)
305 --- a/arch/arm/lib/memchr.S
306 +++ b/arch/arm/lib/memchr.S
309 #include <linux/linkage.h>
310 #include <asm/assembler.h>
311 +#include <asm/cache.h>
315 + .align L1_CACHE_SHIFT
319 --- a/arch/arm/lib/memset.S
320 +++ b/arch/arm/lib/memset.S
323 #include <linux/linkage.h>
324 #include <asm/assembler.h>
325 +#include <asm/cache.h>
329 + .align L1_CACHE_SHIFT
332 1: subs r2, r2, #4 @ 1 do we have enough
333 --- a/arch/arm/lib/memzero.S
334 +++ b/arch/arm/lib/memzero.S
337 #include <linux/linkage.h>
338 #include <asm/assembler.h>
339 +#include <asm/cache.h>
343 + .align L1_CACHE_SHIFT
346 * Align the pointer in r0. r3 contains the number of bytes that we are
347 --- a/arch/arm/lib/strchr.S
348 +++ b/arch/arm/lib/strchr.S
351 #include <linux/linkage.h>
352 #include <asm/assembler.h>
353 +#include <asm/cache.h>
357 + .align L1_CACHE_SHIFT
361 --- a/arch/arm/lib/strncpy_from_user.S
362 +++ b/arch/arm/lib/strncpy_from_user.S
364 #include <linux/linkage.h>
365 #include <asm/assembler.h>
366 #include <asm/errno.h>
367 +#include <asm/cache.h>
371 + .align L1_CACHE_SHIFT
374 * Copy a string from user space to kernel space.
375 --- a/arch/arm/lib/strnlen_user.S
376 +++ b/arch/arm/lib/strnlen_user.S
378 #include <linux/linkage.h>
379 #include <asm/assembler.h>
380 #include <asm/errno.h>
381 +#include <asm/cache.h>
385 + .align L1_CACHE_SHIFT
387 /* Prototype: unsigned long __strnlen_user(const char *str, long n)
388 * Purpose : get length of a string in user memory
389 --- a/arch/arm/lib/strrchr.S
390 +++ b/arch/arm/lib/strrchr.S
393 #include <linux/linkage.h>
394 #include <asm/assembler.h>
395 +#include <asm/cache.h>
399 + .align L1_CACHE_SHIFT
403 --- a/arch/arm/mm/abort-ev4.S
404 +++ b/arch/arm/mm/abort-ev4.S
406 #include <linux/linkage.h>
407 #include <asm/assembler.h>
408 +#include <asm/cache.h>
410 * Function: v4_early_abort
413 * abort here if the I-TLB and D-TLB aren't seeing the same
414 * picture. Unfortunately, this does happen. We live with it.
417 + .align L1_CACHE_SHIFT
418 ENTRY(v4_early_abort)
419 mrc p15, 0, r1, c5, c0, 0 @ get FSR
420 mrc p15, 0, r0, c6, c0, 0 @ get FAR
421 --- a/arch/arm/mm/abort-nommu.S
422 +++ b/arch/arm/mm/abort-nommu.S
424 #include <linux/linkage.h>
425 #include <asm/assembler.h>
426 +#include <asm/cache.h>
428 * Function: nommu_early_abort
431 * Note: There is no FSR/FAR on !CPU_CP15_MMU cores.
432 * Just fill zero into the registers.
435 + .align L1_CACHE_SHIFT
436 ENTRY(nommu_early_abort)
437 mov r0, #0 @ clear r0, r1 (no FSR/FAR)