1 diff -Nurb src/linux/linux.orig/Documentation/netswap.txt src/linux/linux/Documentation/netswap.txt
2 --- src/linux/linux.orig/Documentation/netswap.txt 1969-12-31 19:00:00.000000000 -0500
3 +++ src/linux/linux/Documentation/netswap.txt 2004-05-31 02:18:03.000000000 -0400
5 + Swapping over network
7 +Support for this is enabled via the CONFIG_NETSWAP option, which is
8 +automatically enabled when enabling swap files located on NFS volumes
9 +(CONFIG_SWAP_VIA_NFS).
11 +When swapping to files located on a network file system like NFS or
12 +CODA or others or to nbd (network block device, see `nbd.txt')
13 +partitions there is the problem that this requires additional memory,
14 +besides the page which is currently swapped in or out, probably at
15 +least two more pages for each page in question.
17 +This means that not only there needs to be free space left in the swap
18 +file or the swap partition, but in addition there must be enough free
19 +memory left in the system to perform the swap out of pages.
21 +This is particularly painful as receiving data over the network itself
22 +consumes memory, and this memory is allocated from an interrupt
23 +context (i.e. in the interrupt handler of the network card). That
24 +means that on a congested network there are chances that the machine
25 +runs out of memory, simply because the network device's interrupt
26 +routines allocate memory faster that it is freed by swapping via
29 +To cope with this problem, there is a new socket option `SO_SWAPPING'
30 +which has to be set on the `SOL_SOCKET' level with setsockopt() (see
31 +setsockopt(2)). When this option is set on any network socket, then
32 +the system will start to drop network packets it receives on any other
33 +socket when the number of free pages falls below a certain threshold.
35 +This threshold initially is 4 pages less than `freepages.min' (see
36 +`Documentation/sysctl/vm.txt') but can be tuned using the sysctl
37 +interface by writing to the file `/proc/sys/net/swapping/threshold'
39 +There are two other files:
41 +`/proc/sys/net/swapping/dropped':
42 + how many network packets have been dropped so far. This file is
43 + writable, writing to it simply sets the counter to the given value
44 + (useful for resetting the counter).
46 +`/proc/sys/net/swapping/sock_count':
47 + How many network sockets have the `SO_SWAPPING' option set (read
50 +When using swap-files on NFS volumes, then the `SO_SWAPPING' option is
51 +set or cleared by swapon/swapoff system calls, so the user need not
54 +Swapping over the network is insecure unless the data would be
55 +encrypted, which is not the case with NFS. It is also very slow.
56 diff -Nurb src/linux/linux.orig/Documentation/nfsswap.txt src/linux/linux/Documentation/nfsswap.txt
57 --- src/linux/linux.orig/Documentation/nfsswap.txt 1969-12-31 19:00:00.000000000 -0500
58 +++ src/linux/linux/Documentation/nfsswap.txt 2004-05-31 02:18:03.000000000 -0400
60 + Swapping to files on NFS volumes
62 +To do this you have to say `Y' or `M' to the CONFIG_SWAP_VIA_NFS
63 +configuration option. When compling support for this as a module you
64 +should read `Documentation/modules.txt'. For auto-loading of the
65 +module during the `swapon' system call you have to place a line like
67 +alias swapfile-mod nfsswap
69 +in `/etc/modules.conf' (or `/etc/conf.modules', depending on your
70 +setup). NFS volumes holding swapfile should be mounted with `rsize'
71 +and `wsize' set to something less than the size of a page, otherwise
72 +deadlocks caused by memory fragmentation can happen, i.e. mount the
73 +volume which is to hold the swapfiles with
75 +mount -t nfs -o rsize=2048,wsize=2048 NFS_SERVER_IP:/server_volume /mount_point
77 +or set the option in `/etc/fstab'. Read `Documentation/nfsroot.txt' to
78 +learn how to set mount options for the root file system, if your swap
79 +files are to be located on the root file system.
81 +Setting the `rsize' and `wsize' to anything less than PAGE_SIZE is a
82 +performance hit, so you probably want to have at least two volumes
83 +mounted, one for the swapfiles, one for the rest.
85 +You may want to read `Documentation/netswap.txt' as well.
87 +Swapfiles on NFS volumes can be treated like any other swapfile,
90 +dd if=/dev/zero of=/swapfiles/SWAPFILE bs=1k count=20480
91 +mkswap /swapfiles/SWAPFILE
92 +swapon /swapfiles/SWAPFILE
94 +will create a 20M swapfile and tell the system to use it. Actually,
95 +one could use lseek(2) to create an empty swapfile. This is different
96 +from swapfiles located on local harddisk.
98 +Swapping over the network is insecure unless the data would be
99 +encrypted, which is not the case with NFS. It is also very slow.
101 diff -Nurb src/linux/linux.orig/drivers/block/blkpg.c src/linux/linux/drivers/block/blkpg.c
102 --- src/linux/linux.orig/drivers/block/blkpg.c 2003-07-04 04:11:31.000000000 -0400
103 +++ src/linux/linux/drivers/block/blkpg.c 2004-05-31 02:18:03.000000000 -0400
105 #include <linux/blk.h> /* for set_device_ro() */
106 #include <linux/blkpg.h>
107 #include <linux/genhd.h>
108 -#include <linux/swap.h> /* for is_swap_partition() */
109 +#include <linux/swap.h> /* for swap_run_test() */
110 #include <linux/module.h> /* for EXPORT_SYMBOL */
112 #include <asm/uaccess.h>
117 +/* swap_run_test() applies this hook to all swapfiles until it returns
118 + * "1". If it never returns "1", the result of swap_run_test() is "0",
121 +static int is_swap_partition_hook(unsigned int flags, struct file *swap_file,
124 + kdev_t swap_dev = S_ISBLK(swap_file->f_dentry->d_inode->i_mode)
125 + ? swap_file->f_dentry->d_inode->i_rdev : 0;
126 + kdev_t dev = *((kdev_t *)testdata);
128 + if (flags & SWP_USED && dev == swap_dev) {
135 +static inline int is_swap_partition(kdev_t dev)
137 + return swap_run_test(is_swap_partition_hook, &dev);
141 * Delete a partition given by partition number
143 diff -Nurb src/linux/linux.orig/fs/Config.in src/linux/linux/fs/Config.in
144 --- src/linux/linux.orig/fs/Config.in 2004-05-31 02:02:43.000000000 -0400
145 +++ src/linux/linux/fs/Config.in 2004-05-31 02:18:03.000000000 -0400
147 mainmenu_option next_comment
148 comment 'File systems'
150 +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
151 + tristate 'Swapping to block devices' CONFIG_BLKDEV_SWAP
153 + define_bool CONFIG_BLKDEV_SWAP y
156 bool 'Quota support' CONFIG_QUOTA
157 tristate 'Kernel automounter support' CONFIG_AUTOFS_FS
158 tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS
160 dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET
161 dep_mbool ' Provide NFSv3 client support' CONFIG_NFS_V3 $CONFIG_NFS_FS
162 dep_bool ' Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP
163 + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
164 + dep_tristate ' Swapping via NFS (EXPERIMENTAL)' CONFIG_SWAP_VIA_NFS $CONFIG_NFS_FS
165 + if [ "$CONFIG_SWAP_VIA_NFS" = "y" -o "$CONFIG_SWAP_VIA_NFS" = "m" ]; then
166 + define_bool CONFIG_NETSWAP y
170 dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET
171 dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD
172 diff -Nurb src/linux/linux.orig/fs/Makefile src/linux/linux/fs/Makefile
173 --- src/linux/linux.orig/fs/Makefile 2004-05-31 02:02:42.000000000 -0400
174 +++ src/linux/linux/fs/Makefile 2004-05-31 02:18:03.000000000 -0400
178 export-objs := filesystems.o open.o dcache.o buffer.o
180 +mod-subdirs := nls nfs
182 obj-y := open.o read_write.o devices.o file_table.o buffer.o \
183 super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
185 subdir-$(CONFIG_JFS_FS) += jfs
186 subdir-$(CONFIG_SQUASHFS) += squashfs
188 +obj-$(CONFIG_BLKDEV_SWAP) += blkdev_swap.o
190 obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
191 obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
192 diff -Nurb src/linux/linux.orig/fs/blkdev_swap.c src/linux/linux/fs/blkdev_swap.c
193 --- src/linux/linux.orig/fs/blkdev_swap.c 1969-12-31 19:00:00.000000000 -0500
194 +++ src/linux/linux/fs/blkdev_swap.c 2004-05-31 02:18:03.000000000 -0400
197 + * Swapping to partitions or files located on partitions.
200 +#include <linux/config.h>
201 +#include <linux/module.h>
202 +#include <linux/init.h>
203 +#include <linux/slab.h>
204 +#include <linux/locks.h>
205 +#include <linux/blkdev.h>
206 +#include <linux/pagemap.h>
207 +#include <linux/swap.h>
208 +#include <linux/fs.h>
210 +#ifdef DEBUG_BLKDEV_SWAP
211 +# define dprintk(fmt...) printk(##fmt)
213 +# define dprintk(fmt...) do { /* */ } while (0)
216 +#define BLKDEV_SWAP_ID "blkdev"
217 +#define BLKDEV_FILE_SWAP_ID "blkdev file"
220 + * Helper function, copied here from buffer.c
224 + * Start I/O on a page.
225 + * This function expects the page to be locked and may return
226 + * before I/O is complete. You then have to check page->locked
227 + * and page->uptodate.
229 + * brw_page() is SMP-safe, although it's being called with the
230 + * kernel lock held - but the code is ready.
232 + * FIXME: we need a swapper_inode->get_block function to remove
233 + * some of the bmap kludges and interface ugliness here.
235 +int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
237 + struct buffer_head *head, *bh;
239 + if (!PageLocked(page))
240 + panic("brw_page: page not locked for I/O");
242 + if (!page->buffers)
243 + create_empty_buffers(page, dev, size);
244 + head = bh = page->buffers;
246 + /* Stage 1: lock all the buffers */
249 + bh->b_blocknr = *(b++);
250 + set_bit(BH_Mapped, &bh->b_state);
251 + set_buffer_async_io(bh);
252 + bh = bh->b_this_page;
253 + } while (bh != head);
255 + /* Stage 2: start the IO */
257 + struct buffer_head *next = bh->b_this_page;
260 + } while (bh != head);
265 + * We implement to methods: swapping to partitions, and swapping to files
266 + * located on partitions.
269 +struct blkdev_swap_data {
274 + struct file * filp;
278 +static int is_blkdev_swapping(unsigned int flags,
279 + struct file * swapf,
282 + struct test_data *testdata = (struct test_data *) data;
283 + struct file * filp = testdata->filp;
284 + kdev_t dev = testdata->dev;
286 + /* Only check filp's that don't match the one already opened
287 + * for us by sys_swapon(). Otherwise, we will always flag a
291 + if (swapf != filp) {
292 + if (dev == swapf->f_dentry->d_inode->i_rdev)
298 +static int blkdev_swap_open(struct file * filp, void **dptr)
302 + struct blkdev_swap_data *data;
304 + struct test_data testdata;
308 + if (!S_ISBLK(filp->f_dentry->d_inode->i_mode)) {
309 + dprintk(__FUNCTION__": can't handle this swap file: %s\n",
310 + swapf->d_name.name);
311 + error = 0; /* not for us */
315 + dev = filp->f_dentry->d_inode->i_rdev;
316 + set_blocksize(dev, PAGE_SIZE);
319 + (blk_size[MAJOR(dev)] && !blk_size[MAJOR(dev)][MINOR(dev)])) {
320 + printk("blkdev_swap_open: blkdev weirdness for %s\n",
321 + filp->f_dentry->d_name.name);
325 + /* Check to make sure that we aren't already swapping. */
327 + testdata.filp = filp;
328 + testdata.dev = dev;
329 + if (swap_run_test(is_blkdev_swapping, &testdata)) {
330 + printk("blkdev_swap_open: already swapping to %s\n",
331 + filp->f_dentry->d_name.name);
336 + if (blk_size[MAJOR(dev)])
337 + swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
338 + >> (PAGE_SHIFT - 10);
340 + if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
341 + printk("blkdev_swap_open: can't allocate data for %s\n",
342 + filp->f_dentry->d_name.name);
349 + dprintk("blkdev_swap_open: returning %d\n", swapfilesize);
350 + return swapfilesize;
354 + return error; /* this swap thing is not for us */
357 +static int blkdev_swap_release(struct file * filp, void *data)
359 + dprintk("blkdev_swap_release: releasing swap device %s\n",
360 + filp->f_dentry->d_name.name);
366 +static int blkdev_rw_page(int rw, struct page *page, unsigned long offset,
369 + struct blkdev_swap_data *data = (struct blkdev_swap_data *)ptr;
370 + brw_page(rw, page, data->dev, (int *)&offset, PAGE_SIZE);
374 +static struct swap_ops blkdev_swap_ops = {
376 + blkdev_swap_release,
380 +struct blkdevfile_swap_data {
381 + struct inode *swapf;
384 +static int is_blkdevfile_swapping(unsigned int flags,
385 + struct file * swapf,
388 + struct file * filp = (struct file *) data;
390 + /* Only check filp's that don't match the one already opened
391 + * for us by sys_swapon(). Otherwise, we will always flag a
395 + if (swapf != filp) {
396 + if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode)
402 +static int blkdevfile_swap_open(struct file *swapf, void **dptr)
406 + struct blkdevfile_swap_data *data;
410 + /* first check whether this is a regular file located on a local
413 + if (!S_ISREG(swapf->f_dentry->d_inode->i_mode)) {
414 + dprintk("blkdevfile_swap_open: "
415 + "can't handle this swap file: %s\n",
416 + swapf->d_name.name);
417 + error = 0; /* not for us */
420 + if (!swapf->f_dentry->d_inode->i_mapping->a_ops->bmap) {
421 + dprintk("blkdevfile_swap_open: no bmap for file: %s\n",
422 + swapf->d_name.name);
423 + error = 0; /* not for us */
427 + if (swap_run_test(is_blkdevfile_swapping, swapf)) {
428 + dprintk("blkdevfile_swap_open: already swapping to %s\n",
429 + swapf->d_name.name);
433 + swapfilesize = swapf->f_dentry->d_inode->i_size >> PAGE_SHIFT;
434 + if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
438 + data->swapf = swapf->f_dentry->d_inode;
440 + return swapfilesize;
447 +static int blkdevfile_swap_release(struct file *swapf, void *data)
454 +static int blkdevfile_rw_page(int rw, struct page *page, unsigned long offset,
457 + struct blkdevfile_swap_data *data = (struct blkdevfile_swap_data *)ptr;
458 + struct inode * swapf = data->swapf;
460 + unsigned int block = offset
461 + << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
462 + kdev_t dev = swapf->i_dev;
464 + int zones[PAGE_SIZE/512];
467 + block_size = swapf->i_sb->s_blocksize;
468 + for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
469 + if (!(zones[i] = bmap(swapf,block++))) {
470 + printk("blkdevfile_rw_page: bad swap file\n");
475 + /* block_size == PAGE_SIZE/zones_used */
476 + brw_page(rw, page, dev, zones, block_size);
480 +static struct swap_ops blkdevfile_swap_ops = {
481 + blkdevfile_swap_open,
482 + blkdevfile_swap_release,
486 +int __init blkdev_swap_init(void)
488 + (void)register_swap_method(BLKDEV_SWAP_ID, &blkdev_swap_ops);
489 + (void)register_swap_method(BLKDEV_FILE_SWAP_ID, &blkdevfile_swap_ops);
493 +void __exit blkdev_swap_exit(void)
495 + unregister_swap_method(BLKDEV_SWAP_ID);
496 + unregister_swap_method(BLKDEV_FILE_SWAP_ID);
499 +module_init(blkdev_swap_init)
500 +module_exit(blkdev_swap_exit)
502 +MODULE_LICENSE("GPL");
503 +MODULE_AUTHOR("Many. Stuffed into a module by cH (Claus-Justus Heine)");
504 +MODULE_DESCRIPTION("Swapping to partitions and files on local hard-disks");
505 diff -Nurb src/linux/linux.orig/fs/buffer.c src/linux/linux/fs/buffer.c
506 --- src/linux/linux.orig/fs/buffer.c 2003-07-04 04:12:05.000000000 -0400
507 +++ src/linux/linux/fs/buffer.c 2004-05-31 02:21:05.000000000 -0400
509 bh->b_private = private;
512 -static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
513 +void end_buffer_io_async(struct buffer_head * bh, int uptodate)
515 static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
517 @@ -2344,35 +2344,6 @@
521 -int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
523 - struct buffer_head *head, *bh;
525 - if (!PageLocked(page))
526 - panic("brw_page: page not locked for I/O");
528 - if (!page->buffers)
529 - create_empty_buffers(page, dev, size);
530 - head = bh = page->buffers;
532 - /* Stage 1: lock all the buffers */
535 - bh->b_blocknr = *(b++);
536 - set_bit(BH_Mapped, &bh->b_state);
537 - set_buffer_async_io(bh);
538 - bh = bh->b_this_page;
539 - } while (bh != head);
541 - /* Stage 2: start the IO */
543 - struct buffer_head *next = bh->b_this_page;
546 - } while (bh != head);
550 int block_symlink(struct inode *inode, const char *symname, int len)
552 struct address_space *mapping = inode->i_mapping;
553 diff -Nurb src/linux/linux.orig/fs/nfs/Makefile src/linux/linux/fs/nfs/Makefile
554 --- src/linux/linux.orig/fs/nfs/Makefile 2003-07-04 04:12:07.000000000 -0400
555 +++ src/linux/linux/fs/nfs/Makefile 2004-05-31 02:18:03.000000000 -0400
557 obj-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
558 obj-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
560 -obj-m := $(O_TARGET)
561 +obj-$(CONFIG_SWAP_VIA_NFS) += nfsswap.o
562 +ifeq ($(CONFIG_SWAP_VIA_NFS),m)
563 +export-objs := nfs_syms.o
567 +ifeq ($(CONFIG_NFS_FS),m)
568 +obj-m += $(O_TARGET)
571 include $(TOPDIR)/Rules.make
572 diff -Nurb src/linux/linux.orig/fs/nfs/file.c src/linux/linux/fs/nfs/file.c
573 --- src/linux/linux.orig/fs/nfs/file.c 2003-07-04 04:12:07.000000000 -0400
574 +++ src/linux/linux/fs/nfs/file.c 2004-05-31 02:18:03.000000000 -0400
576 setattr: nfs_notify_change,
579 -/* Hack for future NFS swap support */
581 -# define IS_SWAPFILE(inode) (0)
585 * Flush all dirty pages, and check for write errors.
588 inode->i_ino, (unsigned long) count, (unsigned long) *ppos);
591 - if (IS_SWAPFILE(inode))
593 result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
597 result = generic_file_write(file, buf, count, ppos);
602 - printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
607 diff -Nurb src/linux/linux.orig/fs/nfs/nfs_syms.c src/linux/linux/fs/nfs/nfs_syms.c
608 --- src/linux/linux.orig/fs/nfs/nfs_syms.c 1969-12-31 19:00:00.000000000 -0500
609 +++ src/linux/linux/fs/nfs/nfs_syms.c 2004-05-31 02:18:03.000000000 -0400
611 +#include <linux/config.h>
612 +#define __NO_VERSION__
613 +#include <linux/module.h>
614 +#include <linux/types.h>
615 +#include <linux/sunrpc/clnt.h>
616 +#include <linux/nfs_fs.h>
618 +EXPORT_SYMBOL(__nfs_refresh_inode);
619 +EXPORT_SYMBOL(nfs_write_attributes);
621 diff -Nurb src/linux/linux.orig/fs/nfs/nfsswap.c src/linux/linux/fs/nfs/nfsswap.c
622 --- src/linux/linux.orig/fs/nfs/nfsswap.c 1969-12-31 19:00:00.000000000 -0500
623 +++ src/linux/linux/fs/nfs/nfsswap.c 2004-05-31 02:18:03.000000000 -0400
626 + * Swapping to files located on NFS mounted volumes
627 + * Copyright (c) 2000 Claus-Justus Heine
631 +#include <linux/config.h>
632 +#include <linux/module.h>
633 +#include <linux/init.h>
634 +#include <linux/types.h>
635 +#include <linux/slab.h>
636 +#include <linux/swap.h>
637 +#include <linux/pagemap.h>
638 +#include <linux/file.h>
639 +#include <linux/fs.h>
640 +#include <linux/socket.h>
641 +#include <linux/smp_lock.h>
642 +#include <net/netswapping.h>
643 +#include <net/sock.h>
645 +#include <linux/sunrpc/clnt.h>
646 +#include <linux/nfs_fs.h>
647 +#include <linux/nfs_fs_sb.h>
648 +#include <asm/uaccess.h>
650 +#define NFSDBG_FACILITY NFSDBG_SWAP
652 +#define NFS_SWAP_ID "nfs file"
654 +/* we cache some values here. In principle, we only need the file.
656 +struct nfs_swap_data {
658 + struct inode *inode;
659 + struct nfs_server *server;
660 + struct socket *socket;
663 +/* Nearly a clone of nfs_readpage_sync() in read.c, but "struct page" does not
664 + * contain information about the file offset when swapping. So.
666 +static int nfs_read_swap_page(struct page *page,
667 + struct nfs_server *server,
668 + struct inode *inode,
671 + unsigned int rsize = server->rsize;
672 + unsigned int count = PAGE_SIZE;
673 + unsigned int offset = 0; /* always at start of page */
675 + struct rpc_cred *cred;
676 + struct nfs_fattr fattr;
678 + cred = nfs_file_cred(file);
685 + result = NFS_PROTO(inode)->read(inode, cred,
688 + offset, rsize, page, &eof);
689 + nfs_refresh_inode(inode, &fattr);
693 + * Even if we had a partial success we can't mark the page
697 + if (result == -EISDIR)
703 + if (result < rsize) /* NFSv2ism */
708 + char *kaddr = kmap(page);
709 + memset(kaddr + offset, 0, count);
712 + flush_dcache_page(page);
719 +/* Like nfs_writepage_sync(), but when swapping page->index does not encode
720 + * the offset in the swap file alone.
723 +static int nfs_write_swap_page(struct page *page,
724 + struct nfs_server *server,
725 + struct inode *inode,
728 + struct rpc_cred *cred;
729 + unsigned int wsize = server->wsize;
730 + unsigned int count = PAGE_SIZE;
731 + unsigned int offset = 0;
733 + struct nfs_writeverf verf;
734 + struct nfs_fattr fattr;
736 + cred = nfs_file_cred(file);
743 + result = NFS_PROTO(inode)->write(inode, cred, &fattr,
744 + NFS_RW_SWAP|NFS_RW_SYNC,
745 + offset, wsize, page, &verf);
746 + nfs_write_attributes(inode, &fattr);
752 + if (result != wsize)
753 + printk("NFS: short write, wsize=%u, result=%d\n",
758 + * If we've extended the file, update the inode
759 + * now so we don't invalidate the cache.
761 + if (offset > inode->i_size)
762 + inode->i_size = offset;
772 +/* Unluckily (for us) form 2.4.19 -> 2.4.20 the nfs-proc's where
773 + * changed and expect now a proper file-mapping page, where index
774 + * encodes the offset alone.
776 + * What we do: we save the original value of page->index, initialize
777 + * page->index to what the NFS/sun-rpc subsystem expects and restore
780 +static int nfs_rw_swap_page(int rw, struct page *page,
781 + unsigned long offset, void *dptr)
784 + struct nfs_swap_data *data = dptr;
785 + unsigned long alloc_flag = current->flags & PF_MEMALLOC;
786 + unsigned long page_index;
788 + if (!PageLocked(page))
789 + panic("nfs_rw_swap_page: page not locked for I/O");
791 + /* prevent memory deadlocks */
792 + if (!(current->flags & PF_MEMALLOC)) {
793 + dprintk("nfs_rw_swap_page: Setting PF_MEMALLOC\n");
795 + current->flags |= PF_MEMALLOC;
797 + /* now tweak the page->index field ... */
798 + page_index = page->index;
799 + page->index = ((loff_t)offset*(loff_t)PAGE_SIZE) >> PAGE_CACHE_SHIFT;
802 + error = nfs_write_swap_page(page,
807 + error = nfs_read_swap_page(page,
814 + current->flags &= ~PF_MEMALLOC;
817 + /* now restore the page->index field ... */
818 + page->index = page_index;
821 + /* Must mark the page invalid after I/O error */
822 + SetPageError(page);
823 + ClearPageUptodate(page);
825 + ClearPageError(page);
826 + SetPageUptodate(page);
829 + if (!error) { /* in case of an error rw_swap_page() likes to unlock
835 + return error < 0 ? 0 : 1;
838 +static int is_nfsfile_swapping(unsigned int flags,
839 + struct file * swapf,
842 + struct file * filp = (struct file *) data;
844 + /* Only check filp's that don't match the one already opened
845 + * for us by sys_swapon(). Otherwise, we will always flag a
849 + if (swapf != filp) {
850 + if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode)
856 +static int nfs_swap_open(struct file *swapf, void **dptr)
860 + struct nfs_swap_data *data;
863 + struct inode *inode = swapf->f_dentry->d_inode;
867 + if (!S_ISREG(inode->i_mode)) {
868 + dprintk("nfs_swap_open: can't handle this swap file: %s\n",
869 + swapf->f_dentry->d_name.name);
870 + error = 0; /* not for us */
873 + /* determine whether this file really is located on an NFS mounted
876 + if (!inode->i_sb || inode->i_sb->s_magic != NFS_SUPER_MAGIC) {
877 + dprintk("nfs_swap_open: %s is not an NFS file.\n",
878 + swapf->f_dentry->d_name.name);
879 + error = 0; /* not for us */
883 + if (swap_run_test(is_nfsfile_swapping, swapf)) {
884 + dprintk("nfs_swap_open: already swapping to %s\n",
885 + swapf->f_dentry->d_name.name);
889 + swapfilesize = inode->i_size >> PAGE_SHIFT;
890 + if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
894 + data->file = swapf;
895 + data->inode = inode;
896 + data->server = NFS_SERVER(inode);
897 + data->socket = data->server->client->cl_xprt->sock;
899 + /* set socket option SO_SWAPPING */
902 + error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING,
903 + (char *)&on, sizeof(on));
906 + dprintk("nfs_swap_open: error setting SO_SWAPPING\n");
911 + return swapfilesize;
920 +static int nfs_swap_release(struct file *swapf, void *dptr)
922 + struct nfs_swap_data *data = (struct nfs_swap_data *)dptr;
928 + if (swapf != data->file ||
929 + swapf->f_dentry->d_inode != data->inode ||
930 + !swapf->f_dentry->d_inode->i_sb ||
931 + swapf->f_dentry->d_inode->i_sb->s_magic != NFS_SUPER_MAGIC ||
932 + NFS_SERVER(swapf->f_dentry->d_inode) != data->server ||
933 + data->socket != data->server->client->cl_xprt->sock) {
934 + panic("nfs_swap_release: nfs swap data messed up");
938 + /* remove socket option SO_SWAPPING */
941 + error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING,
942 + (char *)&off, sizeof(off));
945 + dprintk("nfs_swap_open: error clearing SO_SWAPPING\n");
952 +static struct swap_ops nfs_swap_ops = {
953 + open: nfs_swap_open,
954 + release: nfs_swap_release,
955 + rw_page: nfs_rw_swap_page
958 +int __init nfs_swap_init(void)
960 + (void)register_swap_method(NFS_SWAP_ID, &nfs_swap_ops);
964 +void __exit nfs_swap_exit(void)
966 + unregister_swap_method(NFS_SWAP_ID);
969 +module_init(nfs_swap_init)
970 +module_exit(nfs_swap_exit)
972 +MODULE_LICENSE("GPL");
973 +MODULE_AUTHOR("(c) 1996-2002 cH (Claus-Justus Heine)");
974 +MODULE_DESCRIPTION("Swapping to files located on volumes mounted via NFS");
975 diff -Nurb src/linux/linux.orig/fs/nfs/read.c src/linux/linux/fs/nfs/read.c
976 --- src/linux/linux.orig/fs/nfs/read.c 2003-07-04 04:12:08.000000000 -0400
977 +++ src/linux/linux/fs/nfs/read.c 2004-05-31 02:18:03.000000000 -0400
980 static void nfs_readpage_result(struct rpc_task *task);
982 -/* Hack for future NFS swap support */
984 -# define IS_SWAPFILE(inode) (0)
987 static kmem_cache_t *nfs_rdata_cachep;
989 static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
991 int rsize = NFS_SERVER(inode)->rsize;
993 int count = PAGE_CACHE_SIZE;
994 - int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
997 dprintk("NFS: nfs_readpage_sync(%p)\n", page);
999 offset, rsize, page);
1002 - result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags,
1003 + result = NFS_PROTO(inode)->read(inode, cred, &fattr, 0,
1004 offset, rsize, page, &eof);
1005 nfs_refresh_inode(inode, &fattr);
1010 /* N.B. Do we need to test? Never called for swapfile inode */
1011 - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
1012 + flags = RPC_TASK_ASYNC;
1014 nfs_read_rpcsetup(head, data);
1019 error = nfs_readpage_sync(file, inode, page);
1020 - if (error < 0 && IS_SWAPFILE(inode))
1021 - printk("Aiee.. nfs swap-in of page failed!\n");
1025 diff -Nurb src/linux/linux.orig/fs/nfs/write.c src/linux/linux/fs/nfs/write.c
1026 --- src/linux/linux.orig/fs/nfs/write.c 2003-07-04 04:12:08.000000000 -0400
1027 +++ src/linux/linux/fs/nfs/write.c 2004-05-31 02:20:47.000000000 -0400
1029 #include <linux/config.h>
1030 #include <linux/types.h>
1031 #include <linux/slab.h>
1032 -#include <linux/swap.h>
1033 #include <linux/pagemap.h>
1034 #include <linux/file.h>
1037 static void nfs_commit_done(struct rpc_task *);
1040 -/* Hack for future NFS swap support */
1041 -#ifndef IS_SWAPFILE
1042 -# define IS_SWAPFILE(inode) (0)
1045 static kmem_cache_t *nfs_wdata_cachep;
1047 static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
1049 * For the moment, we just call nfs_refresh_inode().
1051 static __inline__ int
1052 -nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
1053 +__nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
1055 if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) {
1056 fattr->pre_size = NFS_CACHE_ISIZE(inode);
1058 return nfs_refresh_inode(inode, fattr);
1061 +int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
1063 + return __nfs_write_attributes(inode, fattr);
1067 * Write a page synchronously.
1068 * Offset is the data offset within the page.
1070 struct rpc_cred *cred = NULL;
1072 unsigned int wsize = NFS_SERVER(inode)->wsize;
1073 - int result, refresh = 0, written = 0, flags;
1075 + int result, refresh = 0, written = 0;
1076 struct nfs_fattr fattr;
1077 struct nfs_writeverf verf;
1079 @@ -121,15 +119,14 @@
1081 base = page_offset(page) + offset;
1083 - flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
1086 - if (count < wsize && !IS_SWAPFILE(inode))
1087 + if (count < wsize)
1090 - result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
1091 + result = NFS_PROTO(inode)->write(inode, cred, &fattr,
1093 offset, wsize, page, &verf);
1094 - nfs_write_attributes(inode, &fattr);
1095 + __nfs_write_attributes(inode, &fattr);
1098 /* Must mark the page invalid after I/O error */
1100 printk("NFS: short write, wsize=%u, result=%d\n",
1111 - nfs_write_attributes(inode, resp->fattr);
1112 + __nfs_write_attributes(inode, resp->fattr);
1113 while (!list_empty(&data->pages)) {
1114 req = nfs_list_entry(data->pages.next);
1115 nfs_list_remove_request(req);
1116 @@ -1133,7 +1129,7 @@
1117 if (nfs_async_handle_jukebox(task))
1120 - nfs_write_attributes(inode, resp->fattr);
1121 + __nfs_write_attributes(inode, resp->fattr);
1122 while (!list_empty(&data->pages)) {
1123 req = nfs_list_entry(data->pages.next);
1124 nfs_list_remove_request(req);
1125 diff -Nurb src/linux/linux.orig/include/linux/fs.h src/linux/linux/include/linux/fs.h
1126 --- src/linux/linux.orig/include/linux/fs.h 2004-05-31 02:06:19.000000000 -0400
1127 +++ src/linux/linux/include/linux/fs.h 2004-05-31 02:18:03.000000000 -0400
1128 @@ -1500,6 +1500,10 @@
1129 extern int inode_change_ok(struct inode *, struct iattr *);
1130 extern int inode_setattr(struct inode *, struct iattr *);
1132 +/* for swapping to block devices */
1133 +void create_empty_buffers(struct page *page, kdev_t dev, unsigned long blocksize);
1134 +void end_buffer_io_async(struct buffer_head * bh, int uptodate);
1137 * Common dentry functions for inclusion in the VFS
1138 * or in other stackable file systems. Some of these
1139 diff -Nurb src/linux/linux.orig/include/linux/nfs_fs.h src/linux/linux/include/linux/nfs_fs.h
1140 --- src/linux/linux.orig/include/linux/nfs_fs.h 2004-05-31 02:06:28.000000000 -0400
1141 +++ src/linux/linux/include/linux/nfs_fs.h 2004-05-31 02:18:03.000000000 -0400
1144 #define NFS_MAX_DIRCACHE 16
1146 -#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768
1147 -#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096
1148 +#define NFS_MAX_FILE_IO_BUFFER_SIZE (8*PAGE_SIZE)
1149 +#define NFS_DEF_FILE_IO_BUFFER_SIZE PAGE_SIZE
1152 * The upper limit on timeouts for the exponential backoff algorithm.
1154 extern int nfs_writepage(struct page *);
1155 extern int nfs_flush_incompatible(struct file *file, struct page *page);
1156 extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
1157 +extern int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr);
1160 * Try to write back everything synchronously (but check the
1163 #define NFSDBG_XDR 0x0020
1164 #define NFSDBG_FILE 0x0040
1165 #define NFSDBG_ROOT 0x0080
1166 +#define NFSDBG_SWAP 0x0100
1167 #define NFSDBG_ALL 0xFFFF
1170 diff -Nurb src/linux/linux.orig/include/linux/slab.h src/linux/linux/include/linux/slab.h
1171 --- src/linux/linux.orig/include/linux/slab.h 2004-05-31 02:06:19.000000000 -0400
1172 +++ src/linux/linux/include/linux/slab.h 2004-05-31 02:18:03.000000000 -0400
1174 #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */
1175 #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */
1176 #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */
1177 +#define SLAB_LOW_GFP_ORDER 0x00010000UL /* use as low a gfp order as possible */
1179 /* flags passed to a constructor func */
1180 #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
1181 diff -Nurb src/linux/linux.orig/include/linux/swap.h src/linux/linux/include/linux/swap.h
1182 --- src/linux/linux.orig/include/linux/swap.h 2004-05-31 02:06:19.000000000 -0400
1183 +++ src/linux/linux/include/linux/swap.h 2004-05-31 02:18:03.000000000 -0400
1185 #define SWAP_MAP_MAX 0x7fff
1186 #define SWAP_MAP_BAD 0x8000
1189 + int (*open)(struct file *swapf, void **data);
1190 + int (*release)(struct file *swapf, void *data);
1191 + int (*rw_page)(int rw,
1192 + struct page *page, unsigned long offset, void *data);
1195 +struct swap_method {
1196 + struct swap_method *next;
1198 + struct swap_ops *ops;
1203 * The in-memory structure used to track swap areas.
1205 struct swap_info_struct {
1207 - kdev_t swap_device;
1208 + struct file *swap_file;
1209 + struct swap_method *method;
1211 spinlock_t sdev_lock;
1212 - struct dentry * swap_file;
1213 - struct vfsmount *swap_vfsmnt;
1214 unsigned short * swap_map;
1215 unsigned int lowest_bit;
1216 unsigned int highest_bit;
1217 @@ -141,11 +155,15 @@
1218 extern int total_swap_pages;
1219 extern unsigned int nr_swapfiles;
1220 extern struct swap_info_struct swap_info[];
1221 -extern int is_swap_partition(kdev_t);
1222 +extern int register_swap_method(char *name, struct swap_ops *ops);
1223 +extern int unregister_swap_method(char *name);
1224 +extern int swap_run_test(int (*test_fct)(unsigned int flags,
1225 + struct file *swap_file,
1226 + void *testdata), void *testdata);
1227 extern void si_swapinfo(struct sysinfo *);
1228 extern swp_entry_t get_swap_page(void);
1229 -extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *,
1231 +struct swap_method *get_swaphandle_info(swp_entry_t entry,
1232 + unsigned long *offset, void **data);
1233 extern int swap_duplicate(swp_entry_t);
1234 extern int swap_count(struct page *);
1235 extern int valid_swaphandles(swp_entry_t, unsigned long *);
1236 diff -Nurb src/linux/linux.orig/include/net/netswapping.h src/linux/linux/include/net/netswapping.h
1237 --- src/linux/linux.orig/include/net/netswapping.h 1969-12-31 19:00:00.000000000 -0500
1238 +++ src/linux/linux/include/net/netswapping.h 2004-05-31 02:18:03.000000000 -0400
1240 +#ifndef _LINUX_NETSWAPPING_H
1241 +#define _LINUX_NETSWAPPING_H
1243 +#include <linux/swap.h>
1244 +#include <linux/init.h>
1246 +/* It is a mess. Socket options are defined in asm-ARCH/socket.h */
1248 +#define SO_SWAPPING 0x00100000 /* hopefully not used by anybody else */
1252 +#define CTL_NETSWAP 0x00100000
1255 + NET_SWAP_DROPPED = 1,
1256 + NET_SWAP_DROP_THRESHOLD = 2,
1257 + NET_SWAP_SOCK_COUNT = 3
1260 +extern unsigned int netswap_free_pages_min;
1261 +extern int netswap_sock_count;
1262 +extern unsigned int netswap_dropped;
1264 +/* this is "#defined" and not inline because sock.h includes us, but we need
1265 + * the "struct sock" definition.
1267 +#define netswap_low_memory(sk, skb) \
1271 + if (netswap_sock_count > 0 && /* anybody swapping via network? */ \
1272 + !(sk)->swapping && /* but we are not needed for swapping */ \
1273 + nr_free_pages() < netswap_free_pages_min) { /* so drop us */ \
1274 + printk("netswap_low_memory: " \
1275 + "dropping skb 0x%p@0x%p\n", skb, sk); \
1276 + netswap_dropped ++; \
1282 +extern int __init netswap_init(void);
1287 diff -Nurb src/linux/linux.orig/include/net/sock.h src/linux/linux/include/net/sock.h
1288 --- src/linux/linux.orig/include/net/sock.h 2004-05-31 02:07:17.000000000 -0400
1289 +++ src/linux/linux/include/net/sock.h 2004-05-31 02:18:03.000000000 -0400
1290 @@ -103,6 +103,10 @@
1291 #include <linux/filter.h>
1294 +#ifdef CONFIG_NETSWAP
1295 +#include <net/netswapping.h>
1298 #include <asm/atomic.h>
1299 #include <net/dst.h>
1301 @@ -536,6 +540,12 @@
1305 +#ifdef CONFIG_NETSWAP
1306 + /* Increased by SO_SWAPPING with arg != 0, decreased by
1307 + * SO_SWAPPING with arg 0
1311 unsigned char debug;
1312 unsigned char rcvtstamp;
1313 unsigned char use_write_queue;
1314 @@ -1165,6 +1175,11 @@
1315 return err; /* Toss packet */
1317 #endif /* CONFIG_FILTER */
1318 +#ifdef CONFIG_NETSWAP
1319 + /* an inline function defined in net/netswapping.h */
1320 + if (netswap_low_memory(sk, skb))
1322 +#endif /* CONFIG_NETSWAP */
1325 skb_set_owner_r(skb, sk);
1326 diff -Nurb src/linux/linux.orig/kernel/ksyms.c src/linux/linux/kernel/ksyms.c
1327 --- src/linux/linux.orig/kernel/ksyms.c 2004-05-31 02:02:43.000000000 -0400
1328 +++ src/linux/linux/kernel/ksyms.c 2004-05-31 02:18:03.000000000 -0400
1330 #include <linux/mm.h>
1331 #include <linux/capability.h>
1332 #include <linux/highuid.h>
1333 +#include <linux/swapctl.h>
1334 #include <linux/brlock.h>
1335 #include <linux/fs.h>
1336 #include <linux/tty.h>
1337 @@ -127,6 +128,11 @@
1338 EXPORT_SYMBOL(kmap_prot);
1339 EXPORT_SYMBOL(kmap_pte);
1341 +EXPORT_SYMBOL(nr_free_pages);
1342 +/* EXPORT_SYMBOL(freepages); */
1343 +EXPORT_SYMBOL(register_swap_method);
1344 +EXPORT_SYMBOL(unregister_swap_method);
1345 +EXPORT_SYMBOL(swap_run_test);
1347 /* filesystem internal functions */
1348 EXPORT_SYMBOL(def_blk_fops);
1350 EXPORT_SYMBOL(make_bad_inode);
1351 EXPORT_SYMBOL(is_bad_inode);
1352 EXPORT_SYMBOL(event);
1353 -EXPORT_SYMBOL(brw_page);
1354 +EXPORT_SYMBOL(end_buffer_io_async);
1355 EXPORT_SYMBOL(__inode_dir_notify);
1358 diff -Nurb src/linux/linux.orig/mm/page_io.c src/linux/linux/mm/page_io.c
1359 --- src/linux/linux.orig/mm/page_io.c 2003-07-04 04:12:29.000000000 -0400
1360 +++ src/linux/linux/mm/page_io.c 2004-05-31 02:18:03.000000000 -0400
1362 static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page)
1364 unsigned long offset;
1365 - int zones[PAGE_SIZE/512];
1369 - struct inode *swapf = 0;
1370 + struct swap_method *method;
1374 ClearPageUptodate(page);
1379 - get_swaphandle_info(entry, &offset, &dev, &swapf);
1381 - zones[0] = offset;
1383 - block_size = PAGE_SIZE;
1384 - } else if (swapf) {
1386 - unsigned int block = offset
1387 - << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
1389 - block_size = swapf->i_sb->s_blocksize;
1390 - for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
1391 - if (!(zones[i] = bmap(swapf,block++))) {
1392 - printk("rw_swap_page: bad swap file\n");
1396 - dev = swapf->i_dev;
1398 + method = get_swaphandle_info(entry, &offset, &data);
1399 + if (!method || !method->ops->rw_page(rw, page, offset, data)) {
1403 - /* block_size == PAGE_SIZE/zones_used */
1404 - brw_page(rw, page, dev, zones, block_size);
1408 diff -Nurb src/linux/linux.orig/mm/slab.c src/linux/linux/mm/slab.c
1409 --- src/linux/linux.orig/mm/slab.c 2003-07-04 04:12:29.000000000 -0400
1410 +++ src/linux/linux/mm/slab.c 2004-05-31 02:18:03.000000000 -0400
1411 @@ -111,10 +111,12 @@
1412 # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
1413 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
1414 SLAB_NO_REAP | SLAB_CACHE_DMA | \
1415 - SLAB_MUST_HWCACHE_ALIGN)
1416 + SLAB_MUST_HWCACHE_ALIGN | \
1417 + SLAB_LOW_GFP_ORDER)
1419 # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
1420 - SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN)
1421 + SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
1422 + SLAB_LOW_GFP_ORDER)
1426 @@ -247,8 +249,13 @@
1429 /* internal c_flags */
1430 -#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
1431 -#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
1432 +#define CFLGS_OFF_SLAB 0x020000UL /* slab management in own cache */
1433 +#define CFLGS_OPTIMIZE 0x040000UL /* optimized slab lookup */
1434 +#define CFLGS_MASK (CFLGS_OFF_SLAB | CFLGS_OPTIMIZE)
1436 +#if (CFLGS_MASK & CREATE_MASK)
1437 +# error BUG: internal and external SLAB flags overlap
1440 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
1441 #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
1442 @@ -452,7 +459,12 @@
1443 snprintf(name, sizeof(name), "size-%Zd",sizes->cs_size);
1444 if (!(sizes->cs_cachep =
1445 kmem_cache_create(name, sizes->cs_size,
1446 - 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
1449 + SLAB_LOW_GFP_ORDER| /* sorry */
1451 + SLAB_HWCACHE_ALIGN,
1460 + if (cachep->gfporder == 0 && (flags & SLAB_LOW_GFP_ORDER))
1462 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
1463 /* Oops, this num of objs will cause problems. */
1465 diff -Nurb src/linux/linux.orig/mm/swapfile.c src/linux/linux/mm/swapfile.c
1466 --- src/linux/linux.orig/mm/swapfile.c 2003-07-04 04:12:29.000000000 -0400
1467 +++ src/linux/linux/mm/swapfile.c 2004-05-31 02:18:03.000000000 -0400
1469 #include <linux/swap.h>
1470 #include <linux/swapctl.h>
1471 #include <linux/blkdev.h> /* for blk_size */
1472 +#include <linux/file.h>
1473 #include <linux/vmalloc.h>
1474 #include <linux/pagemap.h>
1475 #include <linux/shm.h>
1477 #include <asm/pgtable.h>
1480 +#include <linux/kmod.h>
1483 spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
1484 unsigned int nr_swapfiles;
1485 int total_swap_pages;
1488 struct swap_info_struct swap_info[MAX_SWAPFILES];
1490 +static struct swap_method *swap_methods = NULL;
1492 #define SWAPFILE_CLUSTER 256
1494 +int register_swap_method(char *name, struct swap_ops *ops)
1496 + struct swap_method *pos;
1497 + struct swap_method *new;
1502 + for (pos = swap_methods; pos; pos = pos->next) {
1503 + if (strcmp(pos->name, name) == 0) {
1504 + printk(KERN_ERR "register_swap_method: "
1505 + "method %s already registered\n", name);
1511 + if (!(new = kmalloc(sizeof(*new), GFP_KERNEL))) {
1512 + printk(KERN_ERR "register_swap_method: "
1513 + "no memory for new method \"%s\"\n", name);
1520 + new->use_count = 0;
1522 + /* ok, insert at top of list */
1523 + printk("register_swap_method: method %s\n", name);
1524 + new->next = swap_methods;
1525 + swap_methods = new;
1531 +int unregister_swap_method(char *name)
1533 + struct swap_method **method, *next;
1538 + for (method = &swap_methods; *method; method = &(*method)->next) {
1539 + if (strcmp((*method)->name, name) == 0) {
1540 + if ((*method)->use_count > 0) {
1541 + printk(KERN_ERR "unregister_swap_method: "
1542 + "method \"%s\" is in use\n", name);
1547 + next = (*method)->next;
1550 + printk("unregister_swap_method: method %s\n", name);
1555 + printk("unregister_swap_method: no such method %s\n", name);
1562 static inline int scan_swap_map(struct swap_info_struct *si)
1564 unsigned long offset;
1565 @@ -711,13 +786,14 @@
1566 struct nameidata nd;
1569 + struct file *swap_file;
1571 if (!capable(CAP_SYS_ADMIN))
1574 err = user_path_walk(specialfile, &nd);
1581 @@ -725,15 +801,20 @@
1582 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1583 p = swap_info + type;
1584 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1585 - if (p->swap_file == nd.dentry)
1586 + if (p->swap_file &&
1587 + p->swap_file->f_dentry == nd.dentry)
1593 + /* p->swap_file contains all needed info, no need to keep nd, so
1596 + path_release(&nd);
1604 @@ -767,32 +848,30 @@
1605 total_swap_pages += p->pages;
1606 p->flags = SWP_WRITEOK;
1611 - if (p->swap_device)
1612 - blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP);
1613 - path_release(&nd);
1615 + if (p->method->ops->release)
1616 + p->method->ops->release(p->swap_file, p->data);
1618 swap_device_lock(p);
1619 - nd.mnt = p->swap_vfsmnt;
1620 - nd.dentry = p->swap_file;
1621 - p->swap_vfsmnt = NULL;
1622 + p->method->use_count --;
1625 + swap_file = p->swap_file;
1626 p->swap_file = NULL;
1627 - p->swap_device = 0;
1629 swap_map = p->swap_map;
1632 swap_device_unlock(p);
1634 + filp_close(swap_file, NULL);
1640 - path_release(&nd);
1646 @@ -805,18 +884,17 @@
1650 - len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
1651 + len += sprintf(buf, "%-32s%-16s%-8s%-8sPriority\n",
1652 + "Filename", "Type", "Size", "Used");
1653 for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
1654 if ((ptr->flags & SWP_USED) && ptr->swap_map) {
1655 - char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt,
1656 + char * path = d_path(ptr->swap_file->f_dentry,
1657 + ptr->swap_file->f_vfsmnt,
1660 len += sprintf(buf + len, "%-31s ", path);
1662 - if (!ptr->swap_device)
1663 - len += sprintf(buf + len, "file\t\t");
1665 - len += sprintf(buf + len, "partition\t");
1666 + len += sprintf(buf + len, "%-15s ", ptr->method->name);
1669 for (j = 0; j < ptr->max; ++j)
1674 - len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10),
1675 + len += sprintf(buf + len, "%-8d%-8d%d\n", ptr->pages << (PAGE_SHIFT - 10),
1676 usedswap << (PAGE_SHIFT - 10), ptr->prio);
1679 @@ -835,18 +913,55 @@
1683 -int is_swap_partition(kdev_t dev) {
1684 +/* apply a test function to all active swap objects. E.g. for checking
1685 + * whether a partition is used for swapping
1687 +int swap_run_test(int (*test_fct)(unsigned int flags,
1688 + struct file * swap_file,
1689 + void *testdata), void *testdata)
1691 struct swap_info_struct *ptr = swap_info;
1694 for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
1695 - if (ptr->flags & SWP_USED)
1696 - if (ptr->swap_device == dev)
1697 + if (ptr->swap_file &&
1698 + test_fct(ptr->flags, ptr->swap_file, testdata))
1704 +/* Walk through the list of known swap method until somebody wants to
1705 + * handle this file. Pick the first one which claims to be able to
1706 + * swap to this kind of file.
1708 + * return value: < 0: error, 0: not found, > 0: swapfilesize
1710 +int find_swap_method(struct file *swap_file,
1711 + struct swap_info_struct *p)
1713 + int swapfilesize = 0;
1714 + struct swap_method *method;
1717 + for (method = swap_methods; method; method = method->next) {
1718 + swapfilesize = method->ops->open(swap_file, &p->data);
1719 + if (swapfilesize == 0) {
1722 + if (swapfilesize > 0) {
1723 + p->method = method;
1724 + p->method->use_count ++;
1725 + p->swap_file = swap_file;
1728 + if (swapfilesize < 0) {
1732 + return swapfilesize;
1736 * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1739 asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
1741 struct swap_info_struct * p;
1742 - struct nameidata nd;
1743 - struct inode * swap_inode;
1748 int nr_good_pages = 0;
1749 unsigned long maxpages = 1;
1751 - struct block_device *bdev = NULL;
1752 unsigned short *swap_map;
1753 + char * tmp_specialfile;
1754 + struct file *swap_file;
1756 if (!capable(CAP_SYS_ADMIN))
1758 @@ -886,8 +1000,7 @@
1759 nr_swapfiles = type+1;
1760 p->flags = SWP_USED;
1761 p->swap_file = NULL;
1762 - p->swap_vfsmnt = NULL;
1763 - p->swap_device = 0;
1768 @@ -901,53 +1014,56 @@
1769 p->prio = --least_priority;
1772 - error = user_path_walk(specialfile, &nd);
1775 + /* Open the swap using filp_open. Bail out on any errors. */
1776 + tmp_specialfile = getname(specialfile);
1777 + if (IS_ERR(tmp_specialfile)) {
1778 + error = PTR_ERR(tmp_specialfile);
1781 + p->swap_file = filp_open(tmp_specialfile, O_RDWR, 0600);
1782 + putname(tmp_specialfile);
1783 + if (IS_ERR(p->swap_file)) {
1784 + error = PTR_ERR(p->swap_file);
1788 - p->swap_file = nd.dentry;
1789 - p->swap_vfsmnt = nd.mnt;
1790 - swap_inode = nd.dentry->d_inode;
1793 - if (S_ISBLK(swap_inode->i_mode)) {
1794 - kdev_t dev = swap_inode->i_rdev;
1795 - struct block_device_operations *bdops;
1796 - devfs_handle_t de;
1798 - p->swap_device = dev;
1799 - set_blocksize(dev, PAGE_SIZE);
1801 - bd_acquire(swap_inode);
1802 - bdev = swap_inode->i_bdev;
1803 - de = devfs_get_handle_from_inode(swap_inode);
1804 - bdops = devfs_get_ops(de); /* Increments module use count */
1805 - if (bdops) bdev->bd_op = bdops;
1807 - error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP);
1808 - devfs_put_ops(de);/*Decrement module use count now we're safe*/
1811 - set_blocksize(dev, PAGE_SIZE);
1813 - if (!dev || (blk_size[MAJOR(dev)] &&
1814 - !blk_size[MAJOR(dev)][MINOR(dev)]))
1817 - if (blk_size[MAJOR(dev)])
1818 - swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
1819 - >> (PAGE_SHIFT - 10);
1820 - } else if (S_ISREG(swap_inode->i_mode))
1821 - swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
1824 + swapfilesize = find_swap_method(p->swap_file, p);
1825 + if (swapfilesize < 0) {
1826 + error = swapfilesize;
1830 + if (swapfilesize == 0) {
1831 + (void)request_module("swapfile-mod");
1833 + swapfilesize = find_swap_method(p->swap_file, p);
1834 + if (swapfilesize < 0) {
1835 + error = swapfilesize;
1840 + if (swapfilesize == 0) {
1841 + printk("Don't know how to swap to this kind of file\n");
1842 + goto bad_swap_1; /* free swap map */
1845 + /* After this point, the swap-file has been opened by the swap
1846 + * method. We must make sure to use the bad_swap label for any
1851 for (i = 0 ; i < nr_swapfiles ; i++) {
1852 struct swap_info_struct *q = &swap_info[i];
1853 if (i == type || !q->swap_file)
1855 - if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
1856 + if (p->swap_file->f_dentry->d_inode->i_mapping
1858 + q->swap_file->f_dentry->d_inode->i_mapping)
1862 @@ -1083,17 +1199,27 @@
1869 - blkdev_put(bdev, BDEV_SWAP);
1870 + if (p->method->ops->release)
1871 + p->method->ops->release(p->swap_file, p->data);
1873 + p->method->use_count --;
1876 + swap_list_unlock();
1880 + swap_file = p->swap_file;
1881 + p->swap_file = NULL;
1882 + swap_list_unlock();
1883 + filp_close(swap_file, NULL);
1888 swap_map = p->swap_map;
1889 - nd.mnt = p->swap_vfsmnt;
1890 - nd.dentry = p->swap_file;
1891 - p->swap_device = 0;
1892 - p->swap_file = NULL;
1893 - p->swap_vfsmnt = NULL;
1896 if (!(swap_flags & SWAP_FLAG_PREFER))
1897 @@ -1101,7 +1227,7 @@
1901 - path_release(&nd);
1905 free_page((long) swap_header);
1906 @@ -1217,8 +1343,8 @@
1908 * Prior swap_duplicate protects against swap device deletion.
1910 -void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
1911 - kdev_t *dev, struct inode **swapf)
1912 +struct swap_method *get_swaphandle_info(swp_entry_t entry,
1913 + unsigned long *offset, void **data)
1916 struct swap_info_struct *p;
1917 @@ -1226,32 +1352,26 @@
1918 type = SWP_TYPE(entry);
1919 if (type >= nr_swapfiles) {
1920 printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val);
1925 p = &swap_info[type];
1926 *offset = SWP_OFFSET(entry);
1927 if (*offset >= p->max && *offset != 0) {
1928 printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val);
1932 if (p->swap_map && !p->swap_map[*offset]) {
1933 printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val);
1937 if (!(p->flags & SWP_USED)) {
1938 printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val);
1943 - if (p->swap_device) {
1944 - *dev = p->swap_device;
1945 - } else if (p->swap_file) {
1946 - *swapf = p->swap_file->d_inode;
1948 - printk(KERN_ERR "rw_swap_page: no swap file or device\n");
1956 diff -Nurb src/linux/linux.orig/net/Config.in src/linux/linux/net/Config.in
1957 --- src/linux/linux.orig/net/Config.in 2003-07-04 04:12:29.000000000 -0400
1958 +++ src/linux/linux/net/Config.in 2004-05-31 02:18:03.000000000 -0400
1961 bool 'Socket Filtering' CONFIG_FILTER
1962 tristate 'Unix domain sockets' CONFIG_UNIX
1963 +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
1964 + bool 'Swapping via network sockets (EXPERIMENTAL)' CONFIG_NETSWAP
1966 bool 'TCP/IP networking' CONFIG_INET
1967 if [ "$CONFIG_INET" = "y" ]; then
1968 source net/ipv4/Config.in
1969 diff -Nurb src/linux/linux.orig/net/Makefile src/linux/linux/net/Makefile
1970 --- src/linux/linux.orig/net/Makefile 2003-07-04 04:12:29.000000000 -0400
1971 +++ src/linux/linux/net/Makefile 2004-05-31 02:18:03.000000000 -0400
1973 ifeq ($(CONFIG_NET),y)
1974 obj-$(CONFIG_MODULES) += netsyms.o
1975 obj-$(CONFIG_SYSCTL) += sysctl_net.o
1976 +obj-$(CONFIG_NETSWAP) += netswapping.o
1979 include $(TOPDIR)/Rules.make
1980 diff -Nurb src/linux/linux.orig/net/core/sock.c src/linux/linux/net/core/sock.c
1981 --- src/linux/linux.orig/net/core/sock.c 2003-10-14 04:09:32.000000000 -0400
1982 +++ src/linux/linux/net/core/sock.c 2004-05-31 02:18:03.000000000 -0400
1983 @@ -402,6 +402,21 @@
1987 +#ifdef CONFIG_NETSWAP
1990 + if (!sk->swapping) {
1991 + netswap_sock_count ++;
1994 + } else if (sk->swapping > 0) {
1996 + if (!sk->swapping) {
1997 + netswap_sock_count --;
2002 /* We implement the SO_SNDLOWAT etc to
2003 not be settable (1003.1g 5.3) */
2005 @@ -552,6 +567,12 @@
2009 +#ifdef CONFIG_NETSWAP
2011 + v.val = sk->swapping;
2015 /* Dubious BSD thing... Probably nobody even uses it, but
2016 * the UNIX standard wants it for whatever reason... -DaveM
2018 diff -Nurb src/linux/linux.orig/net/ipv4/tcp_ipv4.c src/linux/linux/net/ipv4/tcp_ipv4.c
2019 --- src/linux/linux.orig/net/ipv4/tcp_ipv4.c 2003-10-14 04:09:33.000000000 -0400
2020 +++ src/linux/linux/net/ipv4/tcp_ipv4.c 2004-05-31 02:18:03.000000000 -0400
2021 @@ -1657,6 +1657,12 @@
2022 if (filter && sk_filter(skb, filter))
2024 #endif /* CONFIG_FILTER */
2025 +#ifdef CONFIG_NETSWAP
2026 + /* tcp doesn't use sock_queue_rcv_skb() ... */
2027 + /* an inline function defined in net/netswapping.h */
2028 + if (netswap_low_memory(sk, skb))
2030 +#endif /* CONFIG_NETSWAP */
2032 IP_INC_STATS_BH(IpInDelivers);
2034 diff -Nurb src/linux/linux.orig/net/ipv6/tcp_ipv6.c src/linux/linux/net/ipv6/tcp_ipv6.c
2035 --- src/linux/linux.orig/net/ipv6/tcp_ipv6.c 2003-10-14 04:09:34.000000000 -0400
2036 +++ src/linux/linux/net/ipv6/tcp_ipv6.c 2004-05-31 02:18:03.000000000 -0400
2037 @@ -1424,6 +1424,12 @@
2038 if (filter && sk_filter(skb, filter))
2040 #endif /* CONFIG_FILTER */
2041 +#ifdef CONFIG_NETSWAP
2042 + /* tcp doesn't use sock_queue_rcv_skb() ... */
2043 + /* an inline function defined in net/netswapping.h */
2044 + if (netswap_low_memory(sk, skb))
2046 +#endif /* CONFIG_NETSWAP */
2049 * socket locking is here for SMP purposes as backlog rcv
2050 diff -Nurb src/linux/linux.orig/net/netswapping.c src/linux/linux/net/netswapping.c
2051 --- src/linux/linux.orig/net/netswapping.c 1969-12-31 19:00:00.000000000 -0500
2052 +++ src/linux/linux/net/netswapping.c 2004-05-31 02:18:03.000000000 -0400
2055 + * linux/net/swapping.c
2057 + * Support paging over network connections (inet only)
2059 + * (c) 2000 Claus-Justus Heine <heine@instmath.rwth-aachen.de>
2062 +#include <linux/slab.h>
2063 +#include <linux/swap.h>
2064 +#include <linux/swapctl.h>
2065 +#include <linux/skbuff.h>
2066 +#include <linux/module.h>
2067 +#include <linux/sysctl.h>
2068 +#include <linux/init.h>
2069 +#include <net/netswapping.h>
2070 +#include <net/sock.h>
2071 +#include <asm/uaccess.h>
2073 +unsigned int netswap_dropped; /* statistics */
2074 +unsigned int netswap_free_pages_min;
2075 +int netswap_sock_count; /* how many sockets have swapping option set */
2077 +#ifdef CONFIG_SYSCTL
2079 +static ctl_table netswap_table[] = {
2080 + {NET_SWAP_DROPPED, "dropped",
2081 + &netswap_dropped, sizeof(int), 0644, NULL, &proc_dointvec },
2082 + {NET_SWAP_DROP_THRESHOLD, "threshold",
2083 + &netswap_free_pages_min, sizeof(int), 0644, NULL, &proc_dointvec },
2084 + {NET_SWAP_SOCK_COUNT, "sock_count",
2085 + &netswap_sock_count, sizeof(int), 0444, NULL, &proc_dointvec },
2089 +static struct ctl_table_header *netswap_sysctl_header;
2091 +static ctl_table netswap_net_table[] = {
2092 + {CTL_NETSWAP, "swapping", NULL, 0, 0555, netswap_table},
2096 +static ctl_table netswap_root_table[] = {
2097 + {CTL_NET, "net", NULL, 0, 0555, netswap_net_table},
2103 +int __init netswap_init(void)
2105 + /* drop packets when below this threshold */
2106 + netswap_free_pages_min = 32 /* freepages.min */;
2107 +#ifdef CONFIG_SYSCTL
2108 + netswap_sysctl_header = register_sysctl_table(netswap_root_table, 0);
2113 +void __exit netswap_exit(void)
2115 +#ifdef CONFIG_SYSCTL
2116 + unregister_sysctl_table(netswap_sysctl_header);
2120 +/* linux/init.h -- VERY nice :-)
2122 + * On the other hand, we have no control over the order the initcalls
2123 + * are performed ...
2125 + * Actually, we are not compiled as module ...
2128 +module_init(netswap_init)
2129 +module_exit(netswap_exit)
2130 diff -Nurb src/linux/linux.orig/net/netsyms.c src/linux/linux/net/netsyms.c
2131 --- src/linux/linux.orig/net/netsyms.c 2004-05-31 02:02:49.000000000 -0400
2132 +++ src/linux/linux/net/netsyms.c 2004-05-31 02:18:03.000000000 -0400
2133 @@ -601,4 +601,10 @@
2134 EXPORT_SYMBOL(wireless_send_event);
2135 #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
2137 +#ifdef CONFIG_NETSWAP
2138 +EXPORT_SYMBOL(netswap_sock_count);
2139 +EXPORT_SYMBOL(netswap_free_pages_min);
2140 +EXPORT_SYMBOL(netswap_dropped);
2143 #endif /* CONFIG_NET */
2144 diff -Nurb src/linux/linux.orig/net/packet/af_packet.c src/linux/linux/net/packet/af_packet.c
2145 --- src/linux/linux.orig/net/packet/af_packet.c 2003-10-14 04:09:35.000000000 -0400
2146 +++ src/linux/linux/net/packet/af_packet.c 2004-05-31 02:18:03.000000000 -0400
2147 @@ -449,6 +449,12 @@
2150 #endif /* CONFIG_FILTER */
2151 +#ifdef CONFIG_NETSWAP
2152 + /* packet doesn't use sock_queue_rcv_skb() ... */
2153 + /* an inline function defined in net/netswapping.h */
2154 + if (netswap_low_memory(sk, skb))
2155 + goto drop_n_restore;
2156 +#endif /* CONFIG_NETSWAP */
2158 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
2161 po->stats.tp_drops++;
2162 spin_unlock(&sk->receive_queue.lock);
2164 -#ifdef CONFIG_FILTER
2165 +#if defined(CONFIG_FILTER) || defined(CONFIG_NETSWAP)
2168 if (skb_head != skb->data && skb_shared(skb)) {
2169 @@ -557,6 +563,12 @@
2173 +#ifdef CONFIG_NETSWAP
2174 + /* packet doesn't use sock_queue_rcv_skb() ... */
2175 + /* an inline function defined in net/netswapping.h */
2176 + if (netswap_low_memory(sk, skb))
2177 + goto drop_n_restore;
2178 +#endif /* CONFIG_NETSWAP */
2180 if (sk->type == SOCK_DGRAM) {
2181 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
2182 diff -Nurb src/linux/linux.orig/net/sunrpc/sched.c src/linux/linux/net/sunrpc/sched.c
2183 --- src/linux/linux.orig/net/sunrpc/sched.c 2003-07-04 04:12:33.000000000 -0400
2184 +++ src/linux/linux/net/sunrpc/sched.c 2004-05-31 02:18:03.000000000 -0400
2187 static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
2189 +#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
2191 * This is the last-ditch buffer for NFS swap requests
2193 -static u32 swap_buffer[PAGE_SIZE >> 2];
2194 +static u32 swap_buffer[2*PAGE_SIZE >> 2];
2195 static long swap_buffer_used;
2200 clear_bit(1, &swap_buffer_used);
2205 * Disable the timer for a given RPC task. Should be called with
2207 __rpc_execute(struct rpc_task *task)
2210 + unsigned long alloc_flag = current->flags & PF_MEMALLOC;
2212 dprintk("RPC: %4d rpc_execute flgs %x\n",
2213 task->tk_pid, task->tk_flags);
2214 @@ -510,6 +513,13 @@
2218 + if (task->tk_flags & RPC_TASK_SWAPPER) {
2219 + if (!current->flags & PF_MEMALLOC) {
2220 + dprintk("__rpc_execute: Setting PF_MEMALLOC\n");
2222 + current->flags |= PF_MEMALLOC;
2229 rpc_set_sleeping(task);
2230 if (RPC_IS_ASYNC(task)) {
2231 spin_unlock_bh(&rpc_queue_lock);
2237 spin_unlock_bh(&rpc_queue_lock);
2238 @@ -563,7 +574,12 @@
2239 /* sync task: sleep here */
2240 dprintk("RPC: %4d sync task going to sleep\n",
2242 - if (current->pid == rpciod_pid)
2243 + /* it's ok to wait for rpciod when swapping,
2244 + * because this means it needed memory and is
2245 + * doing the swap-out itself.
2247 + if (current->pid == rpciod_pid &&
2248 + !(task->tk_flags & RPC_TASK_SWAPPER))
2249 printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
2251 __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
2252 @@ -608,6 +624,10 @@
2253 /* Release all resources associated with the task */
2254 rpc_release_task(task);
2257 + if (!alloc_flag) {
2258 + current->flags &= ~PF_MEMALLOC;
2263 @@ -699,10 +719,16 @@
2267 + unsigned long alloc_flag = current->flags & PF_MEMALLOC;
2270 - if (flags & RPC_TASK_SWAPPER)
2271 + if (flags & RPC_TASK_SWAPPER) {
2273 - else if (flags & RPC_TASK_ASYNC)
2274 + if (!(current->flags & PF_MEMALLOC)) {
2275 + dprintk("rpc_allocate: Setting PF_MEMALLOC\n");
2277 + current->flags |= PF_MEMALLOC;
2278 + } else if (flags & RPC_TASK_ASYNC)
2282 @@ -710,29 +736,44 @@
2284 if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) {
2285 dprintk("RPC: allocated buffer %p\n", buffer);
2290 +#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
2291 if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer)
2292 && rpc_lock_swapbuf()) {
2293 dprintk("RPC: used last-ditch swap buffer\n");
2294 - return swap_buffer;
2295 + ret = swap_buffer;
2299 + if (flags & RPC_TASK_ASYNC) {
2303 - if (flags & RPC_TASK_ASYNC)
2306 } while (!signalled());
2310 + if (!alloc_flag) {
2311 + current->flags &= ~PF_MEMALLOC;
2317 rpc_free(void *buffer)
2319 +#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
2320 if (buffer != swap_buffer) {
2324 +#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
2326 rpc_unlock_swapbuf();
2327 + printk("RPC: Released swap buffer\n");
2332 diff -Nurb src/linux/linux.orig/net/sunrpc/xprt.c src/linux/linux/net/sunrpc/xprt.c
2333 --- src/linux/linux.orig/net/sunrpc/xprt.c 2003-07-04 04:12:33.000000000 -0400
2334 +++ src/linux/linux/net/sunrpc/xprt.c 2004-05-31 02:18:03.000000000 -0400
2336 __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
2338 if (!xprt->snd_task) {
2339 - if (xprt->nocong || __xprt_get_cong(xprt, task))
2340 + if (__xprt_get_cong(xprt, task))
2341 xprt->snd_task = task;
2343 if (xprt->snd_task != task) {
2348 - if (xprt->nocong || __xprt_get_cong(xprt, task))
2349 + if (__xprt_get_cong(xprt, task))
2350 xprt->snd_task = task;
2355 struct rpc_rqst *req = task->tk_rqstp;
2357 + if (xprt->nocong || RPC_IS_SWAPPER(task))
2362 dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",