1 diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c
2 --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c 2007-05-20 01:46:35.000000000 +0200
3 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c 2007-05-20 01:51:47.000000000 +0200
6 #include <linux/types.h>
8 -/* No, there's no macro saying 12*4, since it is "hard" to get it into
9 - the asm in a good way. Thus better to expose the problem everywhere.
12 -/* Assuming 1 cycle per dword written or read (ok, not really true), and
13 - one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1)
14 - so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */
16 -#define ZERO_BLOCK_SIZE (1*12*4)
18 -void *memset(void *pdst,
22 + * memset - Fill a region of memory with the given value
23 + * @s: Pointer to the start of the area.
24 + * @c: The byte to fill the area with
25 + * @count: The size of the area.
27 + * Do not use memset() to access IO space, use memset_io() instead.
29 +void *memset(void *s, int c, size_t count)
31 - /* Ok. Now we want the parameters put in special registers.
32 - Make sure the compiler is able to make something useful of this. */
34 - register char *return_dst __asm__ ("r10") = pdst;
35 - register int n __asm__ ("r12") = plen;
36 - register int lc __asm__ ("r11") = c;
38 - /* Most apps use memset sanely. Only those memsetting about 3..4
39 - bytes or less get penalized compared to the generic implementation
40 - - and that's not really sane use. */
42 - /* Ugh. This is fragile at best. Check with newer GCC releases, if
43 - they compile cascaded "x |= x << 8" sanely! */
44 - __asm__("movu.b %0,$r13\n\t"
46 - "move.b %0,$r13\n\t"
47 - "move.d $r13,%0\n\t"
50 - : "=r" (lc) : "0" (lc) : "r13");
53 - register char *dst __asm__ ("r13") = pdst;
55 - /* This is NONPORTABLE, but since this whole routine is */
56 - /* grossly nonportable that doesn't matter. */
58 - if (((unsigned long) pdst & 3) != 0
59 - /* Oops! n=0 must be a legal call, regardless of alignment. */
62 - if ((unsigned long)dst & 1)
69 - if ((unsigned long)dst & 2)
77 - /* Now the fun part. For the threshold value of this, check the equation
79 - /* Decide which copying method to use. */
80 - if (n >= ZERO_BLOCK_SIZE)
82 - /* For large copies we use 'movem' */
84 - /* It is not optimal to tell the compiler about clobbering any
85 - registers; that will move the saving/restoring of those registers
86 - to the function prologue/epilogue, and make non-movem sizes
89 - This method is not foolproof; it assumes that the "asm reg"
90 - declarations at the beginning of the function really are used
91 - here (beware: they may be moved to temporary registers).
92 - This way, we do not have to save/move the registers around into
93 - temporaries; we can safely use them straight away.
95 - If you want to check that the allocation was right; then
96 - check the equalities in the first comment. It should say
97 - "r13=r13, r12=r12, r11=r11" */
99 - ;; Check that the following is true (same register names on
100 - ;; both sides of equal sign, as in r8=r8):
101 - ;; %0=r13, %1=r12, %4=r11
103 - ;; Save the registers we'll clobber in the movem process
104 - ;; on the stack. Don't mention them to gcc, it will only be
121 - ;; Now we've got this:
125 - ;; Update n for the first loop
132 - addq 12*4,$r12 ;; compensate for last loop underflowing n
134 - ;; Restore registers from stack
137 - /* Outputs */ : "=r" (dst), "=r" (n)
138 - /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
142 - /* Either we directly starts copying, using dword copying
143 - in a loop, or we copy as much as possible with 'movem'
144 - and then the last block (<44 bytes) is copied here.
145 - This will work since 'movem' will have updated src,dst,n. */
149 - *((long*)dst)++ = lc;
150 - *((long*)dst)++ = lc;
151 - *((long*)dst)++ = lc;
152 - *((long*)dst)++ = lc;
157 - /* A switch() is definitely the fastest although it takes a LOT of code.
158 - * Particularly if you inline code this.
165 - *(char*)dst = (char) lc;
168 - *(short*)dst = (short) lc;
171 - *((short*)dst)++ = (short) lc;
172 - *(char*)dst = (char) lc;
175 - *((long*)dst)++ = lc;
178 - *((long*)dst)++ = lc;
179 - *(char*)dst = (char) lc;
182 - *((long*)dst)++ = lc;
183 - *(short*)dst = (short) lc;
186 - *((long*)dst)++ = lc;
187 - *((short*)dst)++ = (short) lc;
188 - *(char*)dst = (char) lc;
191 - *((long*)dst)++ = lc;
192 - *((long*)dst)++ = lc;
195 - *((long*)dst)++ = lc;
196 - *((long*)dst)++ = lc;
197 - *(char*)dst = (char) lc;
200 - *((long*)dst)++ = lc;
201 - *((long*)dst)++ = lc;
202 - *(short*)dst = (short) lc;
205 - *((long*)dst)++ = lc;
206 - *((long*)dst)++ = lc;
207 - *((short*)dst)++ = (short) lc;
208 - *(char*)dst = (char) lc;
211 - *((long*)dst)++ = lc;
212 - *((long*)dst)++ = lc;
213 - *((long*)dst)++ = lc;
216 - *((long*)dst)++ = lc;
217 - *((long*)dst)++ = lc;
218 - *((long*)dst)++ = lc;
219 - *(char*)dst = (char) lc;
222 - *((long*)dst)++ = lc;
223 - *((long*)dst)++ = lc;
224 - *((long*)dst)++ = lc;
225 - *(short*)dst = (short) lc;
228 - *((long*)dst)++ = lc;
229 - *((long*)dst)++ = lc;
230 - *((long*)dst)++ = lc;
231 - *((short*)dst)++ = (short) lc;
232 - *(char*)dst = (char) lc;
241 - return return_dst; /* destination pointer. */
243 diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c linux-2.6.19.2/arch/cris/arch-v10/lib/string.c
244 --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c 2007-05-20 01:46:35.000000000 +0200
245 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/string.c 2007-05-20 01:51:19.000000000 +0200
248 #include <linux/types.h>
250 -void *memcpy(void *pdst,
254 + * memcpy - Copy one area of memory to another
255 + * @dest: Where to copy to
256 + * @src: Where to copy from
257 + * @count: The size of the area.
259 + * You should not use this function to access IO space, use memcpy_toio()
260 + * or memcpy_fromio() instead.
262 +void *memcpy(void *dest, const void *src, size_t count)
264 - /* Ok. Now we want the parameters put in special registers.
265 - Make sure the compiler is able to make something useful of this.
266 - As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
268 + const char *s = src;
270 - If gcc was allright, it really would need no temporaries, and no
271 - stack space to save stuff on. */
273 - register void *return_dst __asm__ ("r10") = pdst;
274 - register char *dst __asm__ ("r13") = pdst;
275 - register const char *src __asm__ ("r11") = psrc;
276 - register int n __asm__ ("r12") = pn;
279 - /* When src is aligned but not dst, this makes a few extra needless
280 - cycles. I believe it would take as many to check that the
281 - re-alignment was unnecessary. */
282 - if (((unsigned long) dst & 3) != 0
283 - /* Don't align if we wouldn't copy more than a few bytes; so we
284 - don't have to check further for overflows. */
287 - if ((unsigned long) dst & 1)
290 - *(char*)dst = *(char*)src;
295 - if ((unsigned long) dst & 2)
298 - *(short*)dst = *(short*)src;
304 - /* Decide which copying method to use. */
305 - if (n >= 44*2) /* Break even between movem and
306 - move16 is at 38.7*2, but modulo 44. */
308 - /* For large copies we use 'movem' */
310 - /* It is not optimal to tell the compiler about clobbering any
311 - registers; that will move the saving/restoring of those registers
312 - to the function prologue/epilogue, and make non-movem sizes
315 - This method is not foolproof; it assumes that the "asm reg"
316 - declarations at the beginning of the function really are used
317 - here (beware: they may be moved to temporary registers).
318 - This way, we do not have to save/move the registers around into
319 - temporaries; we can safely use them straight away.
321 - If you want to check that the allocation was right; then
322 - check the equalities in the first comment. It should say
323 - "r13=r13, r11=r11, r12=r12" */
324 - __asm__ volatile ("
325 - ;; Check that the following is true (same register names on
326 - ;; both sides of equal sign, as in r8=r8):
327 - ;; %0=r13, %1=r11, %2=r12
329 - ;; Save the registers we'll use in the movem process
334 - ;; Now we've got this:
339 - ;; Update n for the first loop
347 - addq 44,$r12 ;; compensate for last loop underflowing n
349 - ;; Restore registers from stack
352 - /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
353 - /* Inputs */ : "0" (dst), "1" (src), "2" (n));
357 - /* Either we directly starts copying, using dword copying
358 - in a loop, or we copy as much as possible with 'movem'
359 - and then the last block (<44 bytes) is copied here.
360 - This will work since 'movem' will have updated src,dst,n. */
364 - *((long*)dst)++ = *((long*)src)++;
365 - *((long*)dst)++ = *((long*)src)++;
366 - *((long*)dst)++ = *((long*)src)++;
367 - *((long*)dst)++ = *((long*)src)++;
371 - /* A switch() is definitely the fastest although it takes a LOT of code.
372 - * Particularly if you inline code this.
379 - *(char*)dst = *(char*)src;
382 - *(short*)dst = *(short*)src;
385 - *((short*)dst)++ = *((short*)src)++;
386 - *(char*)dst = *(char*)src;
389 - *((long*)dst)++ = *((long*)src)++;
392 - *((long*)dst)++ = *((long*)src)++;
393 - *(char*)dst = *(char*)src;
396 - *((long*)dst)++ = *((long*)src)++;
397 - *(short*)dst = *(short*)src;
400 - *((long*)dst)++ = *((long*)src)++;
401 - *((short*)dst)++ = *((short*)src)++;
402 - *(char*)dst = *(char*)src;
405 - *((long*)dst)++ = *((long*)src)++;
406 - *((long*)dst)++ = *((long*)src)++;
409 - *((long*)dst)++ = *((long*)src)++;
410 - *((long*)dst)++ = *((long*)src)++;
411 - *(char*)dst = *(char*)src;
414 - *((long*)dst)++ = *((long*)src)++;
415 - *((long*)dst)++ = *((long*)src)++;
416 - *(short*)dst = *(short*)src;
419 - *((long*)dst)++ = *((long*)src)++;
420 - *((long*)dst)++ = *((long*)src)++;
421 - *((short*)dst)++ = *((short*)src)++;
422 - *(char*)dst = *(char*)src;
425 - *((long*)dst)++ = *((long*)src)++;
426 - *((long*)dst)++ = *((long*)src)++;
427 - *((long*)dst)++ = *((long*)src)++;
430 - *((long*)dst)++ = *((long*)src)++;
431 - *((long*)dst)++ = *((long*)src)++;
432 - *((long*)dst)++ = *((long*)src)++;
433 - *(char*)dst = *(char*)src;
436 - *((long*)dst)++ = *((long*)src)++;
437 - *((long*)dst)++ = *((long*)src)++;
438 - *((long*)dst)++ = *((long*)src)++;
439 - *(short*)dst = *(short*)src;
442 - *((long*)dst)++ = *((long*)src)++;
443 - *((long*)dst)++ = *((long*)src)++;
444 - *((long*)dst)++ = *((long*)src)++;
445 - *((short*)dst)++ = *((short*)src)++;
446 - *(char*)dst = *(char*)src;
450 - return return_dst; /* destination pointer. */
456 diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c
457 --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c 2007-05-16 22:11:26.000000000 +0200
458 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c 2007-05-16 23:17:41.000000000 +0200
460 If you want to check that the allocation was right; then
461 check the equalities in the first comment. It should say
462 "r13=r13, r11=r11, r12=r12". */
463 - __asm__ volatile ("\
464 - .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
468 - ;; Save the registers we'll use in the movem process
473 - ;; Now we've got this:
478 - ;; Update n for the first loop
481 -; Since the noted PC of a faulting instruction in a delay-slot of a taken
482 -; branch, is that of the branch target, we actually point at the from-movem
483 -; for this case. There is no ambiguity here; if there was a fault in that
484 -; instruction (meaning a kernel oops), the faulted PC would be the address
485 -; after *that* movem.
493 - addq 44,$r12 ;; compensate for last loop underflowing n
495 - ;; Restore registers from stack
498 - .section .fixup,\"ax\"
500 -; To provide a correct count in r10 of bytes that failed to be copied,
501 -; we jump back into the loop if the loop-branch was taken. There is no
502 -; performance penalty for sany use; the program will segfault soon enough.
516 - .section __ex_table,\"a\"
521 + ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t"
524 + "subq 11*4,$sp\n\t"
525 + "movem $r10,[$sp]\n\t"
528 + "movem [$r11+],$r10\n\t"
531 + "movem $r10,[$r13+]\n\t"
533 + "addq 44,$r12 \n\t"
534 + "movem [$sp+],$r10\n\t"
536 + ".section .fixup,\"ax\"\n\t"
538 + "move.d [$sp],$r10\n\t"
540 + "move.d $r10,[$sp]\n\t"
543 + "movem [$sp+],$r10\n\t"
548 + ".section __ex_table,\"a\"\n\t"
553 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
554 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
555 @@ -253,60 +228,32 @@
556 If you want to check that the allocation was right; then
557 check the equalities in the first comment. It should say
558 "r13=r13, r11=r11, r12=r12" */
559 - __asm__ volatile ("
560 - .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
564 - ;; Save the registers we'll use in the movem process
569 - ;; Now we've got this:
574 - ;; Update n for the first loop
583 - addq 44,$r12 ;; compensate for last loop underflowing n
585 - ;; Restore registers from stack
588 - .section .fixup,\"ax\"
590 -;; Do not jump back into the loop if we fail. For some uses, we get a
591 -;; page fault somewhere on the line. Without checking for page limits,
592 -;; we don't know where, but we need to copy accurately and keep an
593 -;; accurate count; not just clear the whole line. To do that, we fall
594 -;; down in the code below, proceeding with smaller amounts. It should
595 -;; be kept in mind that we have to cater to code like what at one time
596 -;; was in fs/super.c:
597 -;; i = size - copy_from_user((void *)page, data, size);
598 -;; which would cause repeated faults while clearing the remainder of
599 -;; the SIZE bytes at PAGE after the first fault.
600 -;; A caveat here is that we must not fall through from a failing page
605 - addq 44,$r12 ;; Get back count before faulting point.
606 - subq 44,$r11 ;; Get back pointer to faulting movem-line.
607 - jump 4b ;; Fall through, pretending the fault didn't happen.
610 - .section __ex_table,\"a\"
614 + ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t"
617 + "subq 11*4,$sp\n\t"
618 + "movem $r10,[$sp]\n\t"
621 + "movem [$r11+],$r10\n\t"
625 + "movem $r10,[$r13+]\n\t"
626 + "addq 44,$r12 \n\t"
627 + "movem [$sp+],$r10\n\t"
629 + ".section .fixup,\"ax\"\n\t"
631 + "movem [$sp+],$r10\n\t"
636 + ".section __ex_table,\"a\"\n\t"
640 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
641 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
642 @@ -425,66 +372,50 @@
643 If you want to check that the allocation was right; then
644 check the equalities in the first comment. It should say
645 something like "r13=r13, r11=r11, r12=r12". */
646 - __asm__ volatile ("
647 - .ifnc %0%1%2,$r13$r12$r10 \n\
651 - ;; Save the registers we'll clobber in the movem process
652 - ;; on the stack. Don't mention them to gcc, it will only be
670 - ;; Now we've got this:
674 - ;; Update n for the first loop
681 - addq 12*4,$r12 ;; compensate for last loop underflowing n
683 - ;; Restore registers from stack
686 - .section .fixup,\"ax\"
701 - .section __ex_table,\"a\"
707 + ".ifnc %0%1%2,$r13$r12$r10\n\t"
710 + "subq 11*4,$sp\n\t"
711 + "movem $r10,[$sp]\n\t"
724 + "subq 12*4,$r12\n\t"
726 + "subq 12*4,$r12\n\t"
728 + "movem $r11,[$r13+]\n\t"
730 + "addq 12*4,$r12 \n\t"
731 + "movem [$sp+],$r10\n\t"
733 + ".section .fixup,\"ax\"\n\t"
735 + "move.d [$sp],$r10\n\t"
736 + "addq 12*4,$r10\n\t"
737 + "move.d $r10,[$sp]\n\t"
741 + "movem [$sp+],$r10\n\t"
742 + "addq 12*4,$r10\n\t"
743 + "addq 12*4,$r12\n\t"
746 + ".section __ex_table,\"a\"\n\t"
750 /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
751 /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
752 /* Clobber */ : "r11");