large improvement for parallel builds. works without V=99 now and without warnings...
[openwrt.git] / target / linux / brcm-2.4 / patches / 003-bcm47xx_cache_fixes.patch
1 Index: linux-2.4.35.4/arch/mips/kernel/entry.S
2 ===================================================================
3 --- linux-2.4.35.4.orig/arch/mips/kernel/entry.S
4 +++ linux-2.4.35.4/arch/mips/kernel/entry.S
5 @@ -100,6 +100,10 @@ END(except_vec1_generic)
6 * and R4400 SC and MC versions.
7 */
8 NESTED(except_vec3_generic, 0, sp)
9 +#ifdef CONFIG_BCM4710
10 + nop
11 + nop
12 +#endif
13 #if R5432_CP0_INTERRUPT_WAR
14 mfc0 k0, CP0_INDEX
15 #endif
16 Index: linux-2.4.35.4/arch/mips/mm/c-r4k.c
17 ===================================================================
18 --- linux-2.4.35.4.orig/arch/mips/mm/c-r4k.c
19 +++ linux-2.4.35.4/arch/mips/mm/c-r4k.c
20 @@ -14,6 +14,12 @@
21 #include <linux/mm.h>
22 #include <linux/bitops.h>
23
24 +#ifdef CONFIG_BCM4710
25 +#include "../bcm947xx/include/typedefs.h"
26 +#include "../bcm947xx/include/sbconfig.h"
27 +#include <asm/paccess.h>
28 +#endif
29 +
30 #include <asm/bcache.h>
31 #include <asm/bootinfo.h>
32 #include <asm/cacheops.h>
33 @@ -40,6 +46,7 @@ static struct bcache_ops no_sc_ops = {
34 .bc_inv = (void *)no_sc_noop
35 };
36
37 +int bcm4710 = 0;
38 struct bcache_ops *bcops = &no_sc_ops;
39
40 #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x2010)
41 @@ -64,8 +71,10 @@ static inline void r4k_blast_dcache_page
42 static inline void r4k_blast_dcache_page_setup(void)
43 {
44 unsigned long dc_lsize = current_cpu_data.dcache.linesz;
45 -
46 - if (dc_lsize == 16)
47 +
48 + if (bcm4710)
49 + r4k_blast_dcache_page = blast_dcache_page;
50 + else if (dc_lsize == 16)
51 r4k_blast_dcache_page = blast_dcache16_page;
52 else if (dc_lsize == 32)
53 r4k_blast_dcache_page = r4k_blast_dcache_page_dc32;
54 @@ -77,7 +86,9 @@ static void r4k_blast_dcache_page_indexe
55 {
56 unsigned long dc_lsize = current_cpu_data.dcache.linesz;
57
58 - if (dc_lsize == 16)
59 + if (bcm4710)
60 + r4k_blast_dcache_page_indexed = blast_dcache_page_indexed;
61 + else if (dc_lsize == 16)
62 r4k_blast_dcache_page_indexed = blast_dcache16_page_indexed;
63 else if (dc_lsize == 32)
64 r4k_blast_dcache_page_indexed = blast_dcache32_page_indexed;
65 @@ -89,7 +100,9 @@ static inline void r4k_blast_dcache_setu
66 {
67 unsigned long dc_lsize = current_cpu_data.dcache.linesz;
68
69 - if (dc_lsize == 16)
70 + if (bcm4710)
71 + r4k_blast_dcache = blast_dcache;
72 + else if (dc_lsize == 16)
73 r4k_blast_dcache = blast_dcache16;
74 else if (dc_lsize == 32)
75 r4k_blast_dcache = blast_dcache32;
76 @@ -266,6 +279,7 @@ static void r4k___flush_cache_all(void)
77 r4k_blast_dcache();
78 r4k_blast_icache();
79
80 + if (!bcm4710)
81 switch (current_cpu_data.cputype) {
82 case CPU_R4000SC:
83 case CPU_R4000MC:
84 @@ -304,10 +318,10 @@ static void r4k_flush_cache_mm(struct mm
85 * Kludge alert. For obscure reasons R4000SC and R4400SC go nuts if we
86 * only flush the primary caches but R10000 and R12000 behave sane ...
87 */
88 - if (current_cpu_data.cputype == CPU_R4000SC ||
89 + if (!bcm4710 && (current_cpu_data.cputype == CPU_R4000SC ||
90 current_cpu_data.cputype == CPU_R4000MC ||
91 current_cpu_data.cputype == CPU_R4400SC ||
92 - current_cpu_data.cputype == CPU_R4400MC)
93 + current_cpu_data.cputype == CPU_R4400MC))
94 r4k_blast_scache();
95 }
96
97 @@ -383,12 +397,15 @@ static void r4k_flush_icache_range(unsig
98 unsigned long ic_lsize = current_cpu_data.icache.linesz;
99 unsigned long addr, aend;
100
101 + addr = start & ~(dc_lsize - 1);
102 + aend = (end - 1) & ~(dc_lsize - 1);
103 +
104 if (!cpu_has_ic_fills_f_dc) {
105 if (end - start > dcache_size)
106 r4k_blast_dcache();
107 else {
108 - addr = start & ~(dc_lsize - 1);
109 - aend = (end - 1) & ~(dc_lsize - 1);
110 + BCM4710_PROTECTED_FILL_TLB(addr);
111 + BCM4710_PROTECTED_FILL_TLB(aend);
112
113 while (1) {
114 /* Hit_Writeback_Inv_D */
115 @@ -403,8 +420,6 @@ static void r4k_flush_icache_range(unsig
116 if (end - start > icache_size)
117 r4k_blast_icache();
118 else {
119 - addr = start & ~(ic_lsize - 1);
120 - aend = (end - 1) & ~(ic_lsize - 1);
121 while (1) {
122 /* Hit_Invalidate_I */
123 protected_flush_icache_line(addr);
124 @@ -413,6 +428,9 @@ static void r4k_flush_icache_range(unsig
125 addr += ic_lsize;
126 }
127 }
128 +
129 + if (bcm4710)
130 + flush_cache_all();
131 }
132
133 /*
134 @@ -443,7 +461,8 @@ static void r4k_flush_icache_page(struct
135 if (cpu_has_subset_pcaches) {
136 unsigned long addr = (unsigned long) page_address(page);
137
138 - r4k_blast_scache_page(addr);
139 + if (!bcm4710)
140 + r4k_blast_scache_page(addr);
141 ClearPageDcacheDirty(page);
142
143 return;
144 @@ -451,6 +470,7 @@ static void r4k_flush_icache_page(struct
145
146 if (!cpu_has_ic_fills_f_dc) {
147 unsigned long addr = (unsigned long) page_address(page);
148 +
149 r4k_blast_dcache_page(addr);
150 ClearPageDcacheDirty(page);
151 }
152 @@ -477,7 +497,7 @@ static void r4k_dma_cache_wback_inv(unsi
153 /* Catch bad driver code */
154 BUG_ON(size == 0);
155
156 - if (cpu_has_subset_pcaches) {
157 + if (!bcm4710 && cpu_has_subset_pcaches) {
158 unsigned long sc_lsize = current_cpu_data.scache.linesz;
159
160 if (size >= scache_size) {
161 @@ -509,6 +529,8 @@ static void r4k_dma_cache_wback_inv(unsi
162 R4600_HIT_CACHEOP_WAR_IMPL;
163 a = addr & ~(dc_lsize - 1);
164 end = (addr + size - 1) & ~(dc_lsize - 1);
165 + BCM4710_FILL_TLB(a);
166 + BCM4710_FILL_TLB(end);
167 while (1) {
168 flush_dcache_line(a); /* Hit_Writeback_Inv_D */
169 if (a == end)
170 @@ -527,7 +549,7 @@ static void r4k_dma_cache_inv(unsigned l
171 /* Catch bad driver code */
172 BUG_ON(size == 0);
173
174 - if (cpu_has_subset_pcaches) {
175 + if (!bcm4710 && (cpu_has_subset_pcaches)) {
176 unsigned long sc_lsize = current_cpu_data.scache.linesz;
177
178 if (size >= scache_size) {
179 @@ -554,6 +576,8 @@ static void r4k_dma_cache_inv(unsigned l
180 R4600_HIT_CACHEOP_WAR_IMPL;
181 a = addr & ~(dc_lsize - 1);
182 end = (addr + size - 1) & ~(dc_lsize - 1);
183 + BCM4710_FILL_TLB(a);
184 + BCM4710_FILL_TLB(end);
185 while (1) {
186 flush_dcache_line(a); /* Hit_Writeback_Inv_D */
187 if (a == end)
188 @@ -577,6 +601,8 @@ static void r4k_flush_cache_sigtramp(uns
189 unsigned long dc_lsize = current_cpu_data.dcache.linesz;
190
191 R4600_HIT_CACHEOP_WAR_IMPL;
192 + BCM4710_PROTECTED_FILL_TLB(addr);
193 + BCM4710_PROTECTED_FILL_TLB(addr + 4);
194 protected_writeback_dcache_line(addr & ~(dc_lsize - 1));
195 protected_flush_icache_line(addr & ~(ic_lsize - 1));
196 if (MIPS4K_ICACHE_REFILL_WAR) {
197 @@ -986,10 +1012,12 @@ static void __init setup_scache(void)
198 case CPU_R4000MC:
199 case CPU_R4400SC:
200 case CPU_R4400MC:
201 - probe_scache_kseg1 = (probe_func_t) (KSEG1ADDR(&probe_scache));
202 - sc_present = probe_scache_kseg1(config);
203 - if (sc_present)
204 - c->options |= MIPS_CPU_CACHE_CDEX_S;
205 + if (!bcm4710) {
206 + probe_scache_kseg1 = (probe_func_t) (KSEG1ADDR(&probe_scache));
207 + sc_present = probe_scache_kseg1(config);
208 + if (sc_present)
209 + c->options |= MIPS_CPU_CACHE_CDEX_S;
210 + }
211 break;
212
213 case CPU_R10000:
214 @@ -1041,6 +1069,19 @@ static void __init setup_scache(void)
215 static inline void coherency_setup(void)
216 {
217 change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT);
218 +
219 +#if defined(CONFIG_BCM4310) || defined(CONFIG_BCM4704) || defined(CONFIG_BCM5365)
220 + if (BCM330X(current_cpu_data.processor_id)) {
221 + uint32 cm;
222 +
223 + cm = read_c0_diag();
224 + /* Enable icache */
225 + cm |= (1 << 31);
226 + /* Enable dcache */
227 + cm |= (1 << 30);
228 + write_c0_diag(cm);
229 + }
230 +#endif
231
232 /*
233 * c0_status.cu=0 specifies that updates by the sc instruction use
234 @@ -1073,6 +1114,12 @@ void __init ld_mmu_r4xx0(void)
235 memcpy((void *)(KSEG0 + 0x100), &except_vec2_generic, 0x80);
236 memcpy((void *)(KSEG1 + 0x100), &except_vec2_generic, 0x80);
237
238 + if (current_cpu_data.cputype == CPU_BCM4710 && (current_cpu_data.processor_id & PRID_REV_MASK) == 0) {
239 + printk("Enabling BCM4710A0 cache workarounds.\n");
240 + bcm4710 = 1;
241 + } else
242 + bcm4710 = 0;
243 +
244 probe_pcache();
245 setup_scache();
246
247 Index: linux-2.4.35.4/arch/mips/mm/tlbex-mips32.S
248 ===================================================================
249 --- linux-2.4.35.4.orig/arch/mips/mm/tlbex-mips32.S
250 +++ linux-2.4.35.4/arch/mips/mm/tlbex-mips32.S
251 @@ -90,6 +90,9 @@
252 .set noat
253 LEAF(except_vec0_r4000)
254 .set mips3
255 +#ifdef CONFIG_BCM4704
256 + nop
257 +#endif
258 #ifdef CONFIG_SMP
259 mfc0 k1, CP0_CONTEXT
260 la k0, pgd_current
261 Index: linux-2.4.35.4/include/asm-mips/r4kcache.h
262 ===================================================================
263 --- linux-2.4.35.4.orig/include/asm-mips/r4kcache.h
264 +++ linux-2.4.35.4/include/asm-mips/r4kcache.h
265 @@ -15,6 +15,18 @@
266 #include <asm/asm.h>
267 #include <asm/cacheops.h>
268
269 +#ifdef CONFIG_BCM4710
270 +#define BCM4710_DUMMY_RREG() (((sbconfig_t *)(KSEG1ADDR(SB_ENUM_BASE + SBCONFIGOFF)))->sbimstate)
271 +
272 +#define BCM4710_FILL_TLB(addr) (*(volatile unsigned long *)(addr))
273 +#define BCM4710_PROTECTED_FILL_TLB(addr) ({ unsigned long x; get_dbe(x, (volatile unsigned long *)(addr)); })
274 +#else
275 +#define BCM4710_DUMMY_RREG()
276 +
277 +#define BCM4710_FILL_TLB(addr)
278 +#define BCM4710_PROTECTED_FILL_TLB(addr)
279 +#endif
280 +
281 #define cache_op(op,addr) \
282 __asm__ __volatile__( \
283 " .set noreorder \n" \
284 @@ -27,12 +39,25 @@
285
286 static inline void flush_icache_line_indexed(unsigned long addr)
287 {
288 - cache_op(Index_Invalidate_I, addr);
289 + unsigned int way;
290 + unsigned long ws_inc = 1UL << current_cpu_data.dcache.waybit;
291 +
292 + for (way = 0; way < current_cpu_data.dcache.ways; way++) {
293 + cache_op(Index_Invalidate_I, addr);
294 + addr += ws_inc;
295 + }
296 }
297
298 static inline void flush_dcache_line_indexed(unsigned long addr)
299 {
300 - cache_op(Index_Writeback_Inv_D, addr);
301 + unsigned int way;
302 + unsigned long ws_inc = 1UL << current_cpu_data.dcache.waybit;
303 +
304 + for (way = 0; way < current_cpu_data.dcache.ways; way++) {
305 + BCM4710_DUMMY_RREG();
306 + cache_op(Index_Writeback_Inv_D, addr);
307 + addr += ws_inc;
308 + }
309 }
310
311 static inline void flush_scache_line_indexed(unsigned long addr)
312 @@ -47,6 +72,7 @@ static inline void flush_icache_line(uns
313
314 static inline void flush_dcache_line(unsigned long addr)
315 {
316 + BCM4710_DUMMY_RREG();
317 cache_op(Hit_Writeback_Inv_D, addr);
318 }
319
320 @@ -91,6 +117,7 @@ static inline void protected_flush_icach
321 */
322 static inline void protected_writeback_dcache_line(unsigned long addr)
323 {
324 + BCM4710_DUMMY_RREG();
325 __asm__ __volatile__(
326 ".set noreorder\n\t"
327 ".set mips3\n"
328 @@ -138,6 +165,62 @@ static inline void invalidate_tcache_pag
329 : "r" (base), \
330 "i" (op));
331
332 +#define cache_unroll(base,op) \
333 + __asm__ __volatile__(" \
334 + .set noreorder; \
335 + .set mips3; \
336 + cache %1, (%0); \
337 + .set mips0; \
338 + .set reorder" \
339 + : \
340 + : "r" (base), \
341 + "i" (op));
342 +
343 +
344 +static inline void blast_dcache(void)
345 +{
346 + unsigned long start = KSEG0;
347 + unsigned long dcache_size = current_cpu_data.dcache.waysize * current_cpu_data.dcache.ways;
348 + unsigned long end = (start + dcache_size);
349 +
350 + while(start < end) {
351 + BCM4710_DUMMY_RREG();
352 + cache_unroll(start,Index_Writeback_Inv_D);
353 + start += current_cpu_data.dcache.linesz;
354 + }
355 +}
356 +
357 +static inline void blast_dcache_page(unsigned long page)
358 +{
359 + unsigned long start = page;
360 + unsigned long end = start + PAGE_SIZE;
361 +
362 + BCM4710_FILL_TLB(start);
363 + do {
364 + BCM4710_DUMMY_RREG();
365 + cache_unroll(start,Hit_Writeback_Inv_D);
366 + start += current_cpu_data.dcache.linesz;
367 + } while (start < end);
368 +}
369 +
370 +static inline void blast_dcache_page_indexed(unsigned long page)
371 +{
372 + unsigned long start = page;
373 + unsigned long end = start + PAGE_SIZE;
374 + unsigned long ws_inc = 1UL << current_cpu_data.dcache.waybit;
375 + unsigned long ws_end = current_cpu_data.dcache.ways <<
376 + current_cpu_data.dcache.waybit;
377 + unsigned long ws, addr;
378 +
379 + for (ws = 0; ws < ws_end; ws += ws_inc) {
380 + start = page + ws;
381 + for (addr = start; addr < end; addr += current_cpu_data.dcache.linesz) {
382 + BCM4710_DUMMY_RREG();
383 + cache_unroll(addr,Index_Writeback_Inv_D);
384 + }
385 + }
386 +}
387 +
388 static inline void blast_dcache16(void)
389 {
390 unsigned long start = KSEG0;
391 @@ -148,8 +231,9 @@ static inline void blast_dcache16(void)
392 unsigned long ws, addr;
393
394 for (ws = 0; ws < ws_end; ws += ws_inc)
395 - for (addr = start; addr < end; addr += 0x200)
396 + for (addr = start; addr < end; addr += 0x200) {
397 cache16_unroll32(addr|ws,Index_Writeback_Inv_D);
398 + }
399 }
400
401 static inline void blast_dcache16_page(unsigned long page)
402 @@ -173,8 +257,9 @@ static inline void blast_dcache16_page_i
403 unsigned long ws, addr;
404
405 for (ws = 0; ws < ws_end; ws += ws_inc)
406 - for (addr = start; addr < end; addr += 0x200)
407 + for (addr = start; addr < end; addr += 0x200) {
408 cache16_unroll32(addr|ws,Index_Writeback_Inv_D);
409 + }
410 }
411
412 static inline void blast_icache16(void)
413 @@ -196,6 +281,7 @@ static inline void blast_icache16_page(u
414 unsigned long start = page;
415 unsigned long end = start + PAGE_SIZE;
416
417 + BCM4710_FILL_TLB(start);
418 do {
419 cache16_unroll32(start,Hit_Invalidate_I);
420 start += 0x200;
421 @@ -281,6 +367,7 @@ static inline void blast_scache16_page_i
422 : "r" (base), \
423 "i" (op));
424
425 +
426 static inline void blast_dcache32(void)
427 {
428 unsigned long start = KSEG0;
429 @@ -291,8 +378,9 @@ static inline void blast_dcache32(void)
430 unsigned long ws, addr;
431
432 for (ws = 0; ws < ws_end; ws += ws_inc)
433 - for (addr = start; addr < end; addr += 0x400)
434 + for (addr = start; addr < end; addr += 0x400) {
435 cache32_unroll32(addr|ws,Index_Writeback_Inv_D);
436 + }
437 }
438
439 static inline void blast_dcache32_page(unsigned long page)
440 @@ -316,8 +404,9 @@ static inline void blast_dcache32_page_i
441 unsigned long ws, addr;
442
443 for (ws = 0; ws < ws_end; ws += ws_inc)
444 - for (addr = start; addr < end; addr += 0x400)
445 + for (addr = start; addr < end; addr += 0x400) {
446 cache32_unroll32(addr|ws,Index_Writeback_Inv_D);
447 + }
448 }
449
450 static inline void blast_icache32(void)
451 @@ -339,6 +428,7 @@ static inline void blast_icache32_page(u
452 unsigned long start = page;
453 unsigned long end = start + PAGE_SIZE;
454
455 + BCM4710_FILL_TLB(start);
456 do {
457 cache32_unroll32(start,Hit_Invalidate_I);
458 start += 0x400;
459 @@ -443,6 +533,7 @@ static inline void blast_icache64_page(u
460 unsigned long start = page;
461 unsigned long end = start + PAGE_SIZE;
462
463 + BCM4710_FILL_TLB(start);
464 do {
465 cache64_unroll32(start,Hit_Invalidate_I);
466 start += 0x800;
467 Index: linux-2.4.35.4/include/asm-mips/stackframe.h
468 ===================================================================
469 --- linux-2.4.35.4.orig/include/asm-mips/stackframe.h
470 +++ linux-2.4.35.4/include/asm-mips/stackframe.h
471 @@ -209,6 +209,20 @@
472
473 #endif
474
475 +#if defined(CONFIG_BCM4710) || defined(CONFIG_BCM4704)
476 +
477 +#undef RESTORE_SP_AND_RET
478 +#define RESTORE_SP_AND_RET \
479 + lw sp, PT_R29(sp); \
480 + .set mips3; \
481 + nop; \
482 + nop; \
483 + eret; \
484 + .set mips0
485 +
486 +#endif
487 +
488 +
489 #define RESTORE_SP \
490 lw sp, PT_R29(sp); \
491
492 Index: linux-2.4.35.4/mm/memory.c
493 ===================================================================
494 --- linux-2.4.35.4.orig/mm/memory.c
495 +++ linux-2.4.35.4/mm/memory.c
496 @@ -927,6 +927,7 @@ static inline void break_cow(struct vm_a
497 flush_page_to_ram(new_page);
498 flush_cache_page(vma, address);
499 establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
500 + flush_icache_page(vma, new_page);
501 }
502
503 /*
This page took 0.063098 seconds and 5 git commands to generate.