diff -u --recursive --new-file linux-2.4.13-ping/fs/nfs/Makefile linux-2.4.13-tune/fs/nfs/Makefile --- linux-2.4.13-ping/fs/nfs/Makefile Fri Dec 29 23:07:23 2000 +++ linux-2.4.13-tune/fs/nfs/Makefile Thu Oct 25 11:55:26 2001 @@ -9,8 +9,8 @@ O_TARGET := nfs.o -obj-y := inode.o file.o read.o write.o dir.o symlink.o proc.o \ - nfs2xdr.o flushd.o unlink.o +obj-y := dir.o file.o flushd.o inode.o nfs2xdr.o pagelist.o proc.o \ + read.o symlink.o unlink.o write.o obj-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o obj-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o diff -u --recursive --new-file linux-2.4.13-ping/fs/nfs/flushd.c linux-2.4.13-tune/fs/nfs/flushd.c --- linux-2.4.13-ping/fs/nfs/flushd.c Wed Jun 27 23:02:29 2001 +++ linux-2.4.13-tune/fs/nfs/flushd.c Thu Nov 1 13:39:56 2001 @@ -38,9 +38,9 @@ #include #include +#include #include #include -#include /* * Various constants @@ -111,13 +111,10 @@ dprintk("NFS: reqlist_exit (ptr %p rpc %p)\n", cache, cache->task); - while (cache->task || cache->inodes) { - if (!cache->task) { - nfs_reqlist_init(server); - } else { - cache->task->tk_status = -ENOMEM; - rpc_wake_up_task(cache->task); - } + while (cache->task) { + rpc_exit(cache->task, 0); + rpc_wake_up_task(cache->task); + interruptible_sleep_on_timeout(&cache->request_wait, 1 * HZ); } out: @@ -150,133 +147,47 @@ } } -void nfs_wake_flushd() -{ - rpc_wake_up_status(&flushd_queue, -ENOMEM); -} - -static void inode_append_flushd(struct inode *inode) -{ - struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); - struct inode **q; - - if (NFS_FLAGS(inode) & NFS_INO_FLUSH) - goto out; - inode->u.nfs_i.hash_next = NULL; - - q = &cache->inodes; - while (*q) - q = &(*q)->u.nfs_i.hash_next; - *q = inode; - - /* Note: we increase the inode i_count in order to prevent - * it from disappearing when on the flush list - */ - NFS_FLAGS(inode) |= NFS_INO_FLUSH; - atomic_inc(&inode->i_count); -out:; -} - -/* Protect me using the BKL */ -void inode_remove_flushd(struct inode *inode) -{ - struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); - struct inode **q; - - if (!(NFS_FLAGS(inode) & NFS_INO_FLUSH)) - return; - - q = &cache->inodes; - while (*q && *q != inode) - q = &(*q)->u.nfs_i.hash_next; - if (*q) { - *q = inode->u.nfs_i.hash_next; - NFS_FLAGS(inode) &= ~NFS_INO_FLUSH; - iput(inode); - } -} - -void inode_schedule_scan(struct inode *inode, unsigned long time) -{ - struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); - struct rpc_task *task; - unsigned long mintimeout; - - lock_kernel(); - if (time_after(NFS_NEXTSCAN(inode), time)) - NFS_NEXTSCAN(inode) = time; - mintimeout = jiffies + 1 * HZ; - if (time_before(mintimeout, NFS_NEXTSCAN(inode))) - mintimeout = NFS_NEXTSCAN(inode); - inode_append_flushd(inode); - - task = cache->task; - if (!task) { - nfs_reqlist_init(NFS_SERVER(inode)); - } else { - if (time_after(cache->runat, mintimeout)) - rpc_wake_up_task(task); - } - unlock_kernel(); -} - - +#define NFS_FLUSHD_TIMEOUT (30*HZ) static void nfs_flushd(struct rpc_task *task) { struct nfs_server *server; struct nfs_reqlist *cache; - struct inode *inode, *next; - unsigned long delay = jiffies + NFS_WRITEBACK_LOCKDELAY; - int flush = (task->tk_status == -ENOMEM); + LIST_HEAD(head); dprintk("NFS: %4d flushd starting\n", task->tk_pid); server = (struct nfs_server *) task->tk_calldata; cache = server->rw_requests; - next = cache->inodes; - cache->inodes = NULL; - - while ((inode = next) != NULL) { - next = next->u.nfs_i.hash_next; - inode->u.nfs_i.hash_next = NULL; - NFS_FLAGS(inode) &= ~NFS_INO_FLUSH; - - if (flush) { - nfs_pagein_inode(inode, 0, 0); - nfs_sync_file(inode, NULL, 0, 0, FLUSH_AGING); - } else if (time_after(jiffies, NFS_NEXTSCAN(inode))) { - NFS_NEXTSCAN(inode) = jiffies + NFS_WRITEBACK_LOCKDELAY; - nfs_pagein_timeout(inode); - nfs_flush_timeout(inode, FLUSH_AGING); -#ifdef CONFIG_NFS_V3 - nfs_commit_timeout(inode, FLUSH_AGING); -#endif + for(;;) { + spin_lock(&nfs_wreq_lock); + if (nfs_scan_lru_dirty_timeout(server, &head)) { + spin_unlock(&nfs_wreq_lock); + nfs_flush_list(&head, server->wpages, FLUSH_AGING); + continue; } - - if (nfs_have_writebacks(inode) || nfs_have_read(inode)) { - inode_append_flushd(inode); - if (time_after(delay, NFS_NEXTSCAN(inode))) - delay = NFS_NEXTSCAN(inode); + if (nfs_scan_lru_read_timeout(server, &head)) { + spin_unlock(&nfs_wreq_lock); + nfs_pagein_list(&head, server->rpages); + continue; } - iput(inode); +#ifdef CONFIG_NFS_V3 + if (nfs_scan_lru_commit_timeout(server, &head)) { + spin_unlock(&nfs_wreq_lock); + nfs_commit_list(&head, FLUSH_AGING); + continue; + } +#endif + spin_unlock(&nfs_wreq_lock); + break; } dprintk("NFS: %4d flushd back to sleep\n", task->tk_pid); - if (time_after(jiffies + 1 * HZ, delay)) - delay = 1 * HZ; - else - delay = delay - jiffies; - task->tk_status = 0; - task->tk_action = nfs_flushd; - task->tk_timeout = delay; - cache->runat = jiffies + task->tk_timeout; - - if (!atomic_read(&cache->nr_requests) && !cache->inodes) { - cache->task = NULL; - task->tk_action = NULL; - } else + if (task->tk_action) { + task->tk_timeout = NFS_FLUSHD_TIMEOUT; + cache->runat = jiffies + task->tk_timeout; rpc_sleep_on(&flushd_queue, task, NULL, NULL); + } } static void diff -u --recursive --new-file linux-2.4.13-ping/fs/nfs/inode.c linux-2.4.13-tune/fs/nfs/inode.c --- linux-2.4.13-ping/fs/nfs/inode.c Thu Oct 25 11:54:35 2001 +++ linux-2.4.13-tune/fs/nfs/inode.c Thu Nov 1 13:39:56 2001 @@ -324,6 +324,10 @@ if (!server->hostname) goto out_unlock; strcpy(server->hostname, data->hostname); + INIT_LIST_HEAD(&server->lru_read); + INIT_LIST_HEAD(&server->lru_dirty); + INIT_LIST_HEAD(&server->lru_commit); + INIT_LIST_HEAD(&server->lru_busy); nfsv3_try_again: /* Check NFS protocol revision and initialize RPC op vector @@ -1078,6 +1082,8 @@ extern void nfs_destroy_nfspagecache(void); extern int nfs_init_readpagecache(void); extern int nfs_destroy_readpagecache(void); +extern int nfs_init_writepagecache(void); +extern int nfs_destroy_writepagecache(void); /* * Initialize NFS @@ -1094,6 +1100,10 @@ if (err) return err; + err = nfs_init_writepagecache(); + if (err) + return err; + #ifdef CONFIG_PROC_FS rpc_proc_register(&nfs_rpcstat); #endif @@ -1102,6 +1112,7 @@ static void __exit exit_nfs_fs(void) { + nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); nfs_destroy_nfspagecache(); #ifdef CONFIG_PROC_FS @@ -1113,6 +1124,7 @@ EXPORT_NO_SYMBOLS; /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch "); +MODULE_LICENSE("GPL"); module_init(init_nfs_fs) module_exit(exit_nfs_fs) diff -u --recursive --new-file linux-2.4.13-ping/fs/nfs/pagelist.c linux-2.4.13-tune/fs/nfs/pagelist.c --- linux-2.4.13-ping/fs/nfs/pagelist.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.13-tune/fs/nfs/pagelist.c Thu Nov 1 13:39:56 2001 @@ -0,0 +1,498 @@ +/* + * linux/fs/nfs/pagelist.c + * + * A set of helper functions for managing NFS read and write requests. + * The main purpose of these routines is to provide support for the + * coalescing of several requests into a single RPC call. + * + * Copyright 2000, 2001 (c) Trond Myklebust + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFS_PARANOIA 1 + +/* + * Spinlock + */ +spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED; + +static kmem_cache_t *nfs_page_cachep; + +static inline struct nfs_page * +nfs_page_alloc(void) +{ + struct nfs_page *p; + p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->wb_hash); + INIT_LIST_HEAD(&p->wb_list); + INIT_LIST_HEAD(&p->wb_lru); + init_waitqueue_head(&p->wb_wait); + } + return p; +} + +static inline void +nfs_page_free(struct nfs_page *p) +{ + kmem_cache_free(nfs_page_cachep, p); +} + +static int nfs_try_to_free_pages(struct nfs_server *); + +/** + * nfs_create_request - Create an NFS read/write request. + * @file: file that owns this request + * @inode: inode to which the request is attached + * @page: page to write + * @offset: starting offset within the page for the write + * @count: number of bytes to read/write + * + * The page must be locked by the caller. This makes sure we never + * create two different requests for the same page, and avoids + * a possible deadlock when we reach the hard limit on the number + * of dirty pages. + * User should ensure it is safe to sleep in this function. + */ +struct nfs_page * +nfs_create_request(struct file *file, struct inode *inode, + struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); + struct nfs_page *req; + + /* Deal with hard limits. */ + for (;;) { + /* Prevent races by incrementing *before* we test */ + atomic_inc(&cache->nr_requests); + + /* If we haven't reached the local hard limit yet, + * try to allocate the request struct */ + if (atomic_read(&cache->nr_requests) <= MAX_REQUEST_HARD) { + req = nfs_page_alloc(); + if (req != NULL) + break; + } + + atomic_dec(&cache->nr_requests); + + /* Try to free up at least one request in order to stay + * below the hard limit + */ + if (nfs_try_to_free_pages(server)) + continue; + if (signalled() && (server->flags & NFS_MOUNT_INTR)) + return ERR_PTR(-ERESTARTSYS); + current->policy = SCHED_YIELD; + schedule(); + } + + /* Initialize the request struct. Initially, we assume a + * long write-back delay. This will be adjusted in + * update_nfs_request below if the region is not locked. */ + req->wb_page = page; + page_cache_get(page); + req->wb_offset = offset; + req->wb_bytes = count; + + /* If we have a struct file, use its cached credentials */ + if (file) { + req->wb_file = file; + get_file(file); + req->wb_cred = nfs_file_cred(file); + } + req->wb_inode = inode; + req->wb_count = 1; + + return req; +} + + +/** + * nfs_release_request - Release the count on an NFS read/write request + * @req: request to release + * + * Release all resources associated with a write request after it + * has been committed to stable storage + * + * Note: Should never be called with the spinlock held! + */ +void +nfs_release_request(struct nfs_page *req) +{ + struct inode *inode = req->wb_inode; + struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); + + spin_lock(&nfs_wreq_lock); + if (--req->wb_count) { + spin_unlock(&nfs_wreq_lock); + return; + } + __nfs_del_lru(req); + spin_unlock(&nfs_wreq_lock); + atomic_dec(&cache->nr_requests); + +#ifdef NFS_PARANOIA + if (!list_empty(&req->wb_list)) + BUG(); + if (!list_empty(&req->wb_hash)) + BUG(); + if (NFS_WBACK_BUSY(req)) + BUG(); + if (atomic_read(&cache->nr_requests) < 0) + BUG(); +#endif + + /* Release struct file or cached credential */ + if (req->wb_file) + fput(req->wb_file); + else if (req->wb_cred) + put_rpccred(req->wb_cred); + page_cache_release(req->wb_page); + nfs_page_free(req); +} + +/** + * nfs_list_add_request - Insert a request into a sorted list + * @req: request + * @head: head of list into which to insert the request. + * + * Note that the wb_list is sorted by page index in order to facilitate + * coalescing of requests. + * We use an insertion sort that is optimized for the case of appended + * writes. + */ +void +nfs_list_add_request(struct nfs_page *req, struct list_head *head) +{ + struct list_head *pos; + unsigned long pg_idx = page_index(req->wb_page); + +#ifdef NFS_PARANOIA + if (!list_empty(&req->wb_list)) { + printk(KERN_ERR "NFS: Add to list failed!\n"); + BUG(); + } +#endif + for (pos = head->prev; pos != head; pos = pos->prev) { + struct nfs_page *p = nfs_list_entry(pos); + if (page_index(p->wb_page) < pg_idx) + break; + } + list_add(&req->wb_list, pos); + req->wb_list_head = head; +} + +/** + * nfs_wait_on_request - Wait for a request to complete. + * @req: request to wait upon. + * + * Interruptible by signals only if mounted with intr flag. + * The user is responsible for holding a count on the request. + */ +int +nfs_wait_on_request(struct nfs_page *req) +{ + struct inode *inode = req->wb_inode; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + + if (!NFS_WBACK_BUSY(req)) + return 0; + return nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req)); +} + +/** + * nfs_coalesce_requests - Split coalesced requests out from a list. + * @head: source list + * @dst: destination list + * @nmax: maximum number of requests to coalesce + * + * Moves a maximum of 'nmax' elements from one list to another. + * The elements are checked to ensure that they form a contiguous set + * of pages, and that they originated from the same file. + */ +int +nfs_coalesce_requests(struct list_head *head, struct list_head *dst, + unsigned int nmax) +{ + struct nfs_page *req = NULL; + unsigned int npages = 0; + + while (!list_empty(head)) { + struct nfs_page *prev = req; + + req = nfs_list_entry(head->next); + if (prev) { + if (req->wb_file != prev->wb_file) + break; + if (page_index(req->wb_page) != page_index(prev->wb_page)+1) + break; + + if (req->wb_offset != 0) + break; + } + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + npages++; + if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE) + break; + if (npages >= nmax) + break; + } + return npages; +} + +/* + * nfs_scan_forward - Coalesce more requests + * @req: First request to add + * @dst: destination list + * @nmax: maximum number of requests to coalesce + * + * Tries to coalesce more requests by traversing the request's wb_list. + * Moves the resulting list into dst. Requests are guaranteed to be + * contiguous, and to originate from the same file. + */ +static int +nfs_scan_forward(struct nfs_page *req, struct list_head *dst, int nmax) +{ + struct nfs_server *server = NFS_SERVER(req->wb_inode); + struct list_head *pos, *head = req->wb_list_head; + struct file *file = req->wb_file; + unsigned long idx = page_index(req->wb_page) + 1; + int npages = 0; + + for (pos = req->wb_list.next; nfs_lock_request(req); pos = pos->next) { + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + __nfs_del_lru(req); + __nfs_add_lru(&server->lru_busy, req); + npages++; + if (npages == nmax) + break; + if (pos == head) + break; + if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE) + break; + req = nfs_list_entry(pos); + if (page_index(req->wb_page) != idx++) + break; + if (req->wb_offset != 0) + break; + if (req->wb_file != file) + break; + } + return npages; +} + +/** + * nfs_scan_lru - Scan one of the least recently used list + * @head: One of the NFS superblock lru lists + * @dst: Destination list + * @nmax: maximum number of requests to coalesce + * + * Scans one of the NFS superblock lru lists for upto nmax requests + * and returns them on a list. The requests are all guaranteed to be + * contiguous, originating from the same inode and the same file. + */ +int +nfs_scan_lru(struct list_head *head, struct list_head *dst, int nmax) +{ + struct list_head *pos; + struct nfs_page *req; + int npages = 0; + + list_for_each(pos, head) { + req = nfs_lru_entry(pos); + npages = nfs_scan_forward(req, dst, nmax); + if (npages) + break; + } + return npages; +} + +/** + * nfs_scan_lru_timeout - Scan one of the superblock lru lists for timed out requests + * @head: One of the NFS superblock lru lists + * @dst: Destination list + * @nmax: maximum number of requests to coalesce + * + * Scans one of the NFS superblock lru lists for upto nmax requests + * and returns them on a list. The requests are all guaranteed to be + * contiguous, originating from the same inode and the same file. + * The first request on the destination list will be timed out, the + * others are not guaranteed to be so. + */ +int +nfs_scan_lru_timeout(struct list_head *head, struct list_head *dst, int nmax) +{ + struct list_head *pos; + struct nfs_page *req; + int npages = 0; + + list_for_each(pos, head) { + req = nfs_lru_entry(pos); + if (time_after(req->wb_timeout, jiffies)) + break; + npages = nfs_scan_forward(req, dst, nmax); + if (npages) + break; + } + return npages; +} + +/** + * nfs_scan_list - Scan a list for matching requests + * @head: One of the NFS inode request lists + * @dst: Destination list + * @file: if set, ensure we match requests from this file + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the nfs_wreq_lock when calling this function + */ +int +nfs_scan_list(struct list_head *head, struct list_head *dst, + struct file *file, + unsigned long idx_start, unsigned int npages) +{ + struct list_head *pos, *tmp; + struct nfs_page *req; + unsigned long idx_end; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + list_for_each_safe(pos, tmp, head) { + unsigned long pg_idx; + + req = nfs_list_entry(pos); + + if (file && req->wb_file != file) + continue; + + pg_idx = page_index(req->wb_page); + if (pg_idx < idx_start) + continue; + if (pg_idx > idx_end) + break; + + if (!nfs_lock_request(req)) + continue; + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + __nfs_del_lru(req); + __nfs_add_lru(&NFS_SERVER(req->wb_inode)->lru_busy, req); + res++; + } + return res; +} + +/* + * nfs_try_to_free_pages - Free up NFS read/write requests + * @server: The NFS superblock + * + * This function attempts to flush out NFS reads and writes in order + * to keep the hard limit on the total number of pending requests + * on a given NFS partition. + * Note: we first try to commit unstable writes, then flush out pending + * reads, then finally the dirty pages. + * The assumption is that this reflects the ordering from the fastest + * to the slowest method for reclaiming requests. + */ +static int +nfs_try_to_free_pages(struct nfs_server *server) +{ + LIST_HEAD(head); + struct nfs_page *req = NULL; + int nreq; + + for (;;) { + if (req) { + int status = nfs_wait_on_request(req); + nfs_release_request(req); + if (status) + break; + req = NULL; + } + nreq = atomic_read(&server->rw_requests->nr_requests); + if (nreq < MAX_REQUEST_HARD) + return 1; + spin_lock(&nfs_wreq_lock); + /* Are there any busy RPC calls that might free up requests? */ + if (!list_empty(&server->lru_busy)) { + req = nfs_lru_entry(server->lru_busy.next); + req->wb_count++; + __nfs_del_lru(req); + spin_unlock(&nfs_wreq_lock); + continue; + } + +#ifdef CONFIG_NFS_V3 + /* Let's try to free up some completed NFSv3 unstable writes */ + nfs_scan_lru_commit(server, &head); + if (!list_empty(&head)) { + spin_unlock(&nfs_wreq_lock); + nfs_commit_list(&head, 0); + continue; + } +#endif + /* OK, so we try to free up some pending readaheads */ + nfs_scan_lru_read(server, &head); + if (!list_empty(&head)) { + spin_unlock(&nfs_wreq_lock); + nfs_pagein_list(&head, server->rpages); + continue; + } + /* Last resort: we try to flush out single requests */ + nfs_scan_lru_dirty(server, &head); + if (!list_empty(&head)) { + spin_unlock(&nfs_wreq_lock); + nfs_flush_list(&head, server->wpages, FLUSH_STABLE); + continue; + } + spin_unlock(&nfs_wreq_lock); + break; + } + /* We failed to free up requests */ + return 0; +} + +int nfs_init_nfspagecache(void) +{ + nfs_page_cachep = kmem_cache_create("nfs_page", + sizeof(struct nfs_page), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (nfs_page_cachep == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_nfspagecache(void) +{ + if (kmem_cache_destroy(nfs_page_cachep)) + printk(KERN_INFO "nfs_page: not all structures were freed\n"); +} + diff -u --recursive --new-file linux-2.4.13-ping/fs/nfs/read.c linux-2.4.13-tune/fs/nfs/read.c --- linux-2.4.13-ping/fs/nfs/read.c Thu Oct 11 17:12:52 2001 +++ linux-2.4.13-tune/fs/nfs/read.c Thu Nov 1 13:39:56 2001 @@ -148,34 +148,6 @@ return result; } -static inline struct nfs_page * -_nfs_find_read(struct inode *inode, struct page *page) -{ - struct list_head *head, *next; - - head = &inode->u.nfs_i.read; - next = head->next; - while (next != head) { - struct nfs_page *req = nfs_list_entry(next); - next = next->next; - if (page_index(req->wb_page) != page_index(page)) - continue; - req->wb_count++; - return req; - } - return NULL; -} - -static struct nfs_page * -nfs_find_read(struct inode *inode, struct page *page) -{ - struct nfs_page *req; - spin_lock(&nfs_wreq_lock); - req = _nfs_find_read(inode, page); - spin_unlock(&nfs_wreq_lock); - return req; -} - /* * Add a request to the inode's asynchronous read list. */ @@ -185,61 +157,26 @@ struct inode *inode = req->wb_inode; spin_lock(&nfs_wreq_lock); - if (list_empty(&req->wb_list)) { - nfs_list_add_request(req, &inode->u.nfs_i.read); - inode->u.nfs_i.nread++; - } + nfs_list_add_request(req, &inode->u.nfs_i.read); + inode->u.nfs_i.nread++; + __nfs_add_lru(&NFS_SERVER(inode)->lru_read, req); spin_unlock(&nfs_wreq_lock); - /* - * NB: the call to inode_schedule_scan() must lie outside the - * spinlock since it can run flushd(). - */ - inode_schedule_scan(inode, req->wb_timeout); } static int nfs_readpage_async(struct file *file, struct inode *inode, struct page *page) { - struct nfs_page *req, *new = NULL; - int result; - - for (;;) { - result = 0; - if (Page_Uptodate(page)) - break; + struct nfs_page *new; - req = nfs_find_read(inode, page); - if (req) { - if (page != req->wb_page) { - nfs_release_request(req); - nfs_pagein_inode(inode, page_index(page), 0); - continue; - } - nfs_release_request(req); - break; - } - - if (new) { - nfs_lock_request(new); - new->wb_timeout = jiffies + NFS_READ_DELAY; - nfs_mark_request_read(new); - nfs_unlock_request(new); - new = NULL; - break; - } - - result = -ENOMEM; - new = nfs_create_request(file, inode, page, 0, PAGE_CACHE_SIZE); - if (!new) - break; - } + new = nfs_create_request(file, inode, page, 0, PAGE_CACHE_SIZE); + if (IS_ERR(new)) + return PTR_ERR(new); + nfs_mark_request_read(new); if (inode->u.nfs_i.nread >= NFS_SERVER(inode)->rpages || page_index(page) == (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) nfs_pagein_inode(inode, 0, 0); - if (new) - nfs_release_request(new); - return result; + return 0; } /* @@ -345,14 +282,13 @@ return -ENOMEM; } -static int -nfs_pagein_list(struct inode *inode, struct list_head *head) +int +nfs_pagein_list(struct list_head *head, int rpages) { LIST_HEAD(one_request); struct nfs_page *req; int error = 0; - unsigned int pages = 0, - rpages = NFS_SERVER(inode)->rpages; + unsigned int pages = 0; while (!list_empty(head)) { pages += nfs_coalesce_requests(head, &one_request, rpages); @@ -368,29 +304,70 @@ return error; } -static int -nfs_scan_read_timeout(struct inode *inode, struct list_head *dst) +/** + * nfs_scan_lru_read_timeout - Scan LRU list for timed out read requests + * @server: NFS superblock data + * @dst: destination list + * + * Moves a maximum of 'rpages' timed out requests from the NFS read LRU list. + * The elements are checked to ensure that they form a contiguous set + * of pages, and that they originated from the same file. + */ +int +nfs_scan_lru_read_timeout(struct nfs_server *server, struct list_head *dst) { - int pages; - spin_lock(&nfs_wreq_lock); - pages = nfs_scan_list_timeout(&inode->u.nfs_i.read, dst, inode); - inode->u.nfs_i.nread -= pages; - if ((inode->u.nfs_i.nread == 0) != list_empty(&inode->u.nfs_i.read)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.nread.\n"); - spin_unlock(&nfs_wreq_lock); - return pages; + struct inode *inode; + int npages; + + npages = nfs_scan_lru_timeout(&server->lru_read, dst, server->rpages); + if (npages) { + inode = nfs_list_entry(dst->next)->wb_inode; + inode->u.nfs_i.nread -= npages; + } + return npages; } +/** + * nfs_scan_lru_read - Scan LRU list for read requests + * @server: NFS superblock data + * @dst: destination list + * + * Moves a maximum of 'rpages' requests from the NFS read LRU list. + * The elements are checked to ensure that they form a contiguous set + * of pages, and that they originated from the same file. + */ +int +nfs_scan_lru_read(struct nfs_server *server, struct list_head *dst) +{ + struct inode *inode; + int npages; + + npages = nfs_scan_lru(&server->lru_read, dst, server->rpages); + if (npages) { + inode = nfs_list_entry(dst->next)->wb_inode; + inode->u.nfs_i.nread -= npages; + } + return npages; +} + +/* + * nfs_scan_read - Scan an inode for read requests + * @inode: NFS inode to scan + * @dst: destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan + * + * Moves requests from the inode's read list. + * The requests are *not* checked to ensure that they form a contiguous set. + */ static int nfs_scan_read(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { int res; - spin_lock(&nfs_wreq_lock); res = nfs_scan_list(&inode->u.nfs_i.read, dst, NULL, idx_start, npages); inode->u.nfs_i.nread -= res; if ((inode->u.nfs_i.nread == 0) != list_empty(&inode->u.nfs_i.read)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.nread.\n"); - spin_unlock(&nfs_wreq_lock); return res; } @@ -401,28 +378,16 @@ int res, error = 0; + spin_lock(&nfs_wreq_lock); res = nfs_scan_read(inode, &head, idx_start, npages); + spin_unlock(&nfs_wreq_lock); if (res) - error = nfs_pagein_list(inode, &head); + error = nfs_pagein_list(&head, NFS_SERVER(inode)->rpages); if (error < 0) return error; return res; } -int nfs_pagein_timeout(struct inode *inode) -{ - LIST_HEAD(head); - int pages, - error = 0; - - pages = nfs_scan_read_timeout(inode, &head); - if (pages) - error = nfs_pagein_list(inode, &head); - if (error < 0) - return error; - return pages; -} - /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -457,8 +422,8 @@ (long long)NFS_FILEID(req->wb_inode), req->wb_bytes, (long long)(page_offset(page) + req->wb_offset)); - nfs_unlock_request(req); nfs_release_request(req); + nfs_unlock_request(req); } } @@ -500,11 +465,10 @@ if (error) goto out_error; - error = -1; - if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) + if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) { error = nfs_readpage_async(file, inode, page); - if (error >= 0) goto out; + } error = nfs_readpage_sync(file, inode, page); if (error < 0 && IS_SWAPFILE(inode)) diff -u --recursive --new-file linux-2.4.13-ping/fs/nfs/write.c linux-2.4.13-tune/fs/nfs/write.c --- linux-2.4.13-ping/fs/nfs/write.c Thu Oct 11 17:12:52 2001 +++ linux-2.4.13-tune/fs/nfs/write.c Thu Nov 1 13:39:56 2001 @@ -61,16 +61,9 @@ #include #include -#define NFS_PARANOIA 1 #define NFSDBG_FACILITY NFSDBG_PAGECACHE /* - * Spinlock - */ -spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED; -static atomic_t nfs_nr_requests = ATOMIC_INIT(0); - -/* * Local structures * * This is the struct where the WRITE/COMMIT arguments go. @@ -103,27 +96,8 @@ # define IS_SWAPFILE(inode) (0) #endif -static kmem_cache_t *nfs_page_cachep; static kmem_cache_t *nfs_wdata_cachep; -static __inline__ struct nfs_page *nfs_page_alloc(void) -{ - struct nfs_page *p; - p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS); - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->wb_hash); - INIT_LIST_HEAD(&p->wb_list); - init_waitqueue_head(&p->wb_wait); - } - return p; -} - -static __inline__ void nfs_page_free(struct nfs_page *p) -{ - kmem_cache_free(nfs_page_cachep, p); -} - static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) { struct nfs_write_data *p; @@ -248,7 +222,6 @@ if (!req->wb_cred) req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred); nfs_unlock_request(req); - nfs_release_request(req); nfs_strategy(inode); out: return status; @@ -367,11 +340,11 @@ inode->u.nfs_i.npages--; if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n"); - if (list_empty(&inode->u.nfs_i.writeback)) + if (list_empty(&inode->u.nfs_i.writeback)) { + spin_unlock(&nfs_wreq_lock); iput(inode); - if (!nfs_have_writebacks(inode) && !nfs_have_read(inode)) - inode_remove_flushd(inode); - spin_unlock(&nfs_wreq_lock); + } else + spin_unlock(&nfs_wreq_lock); nfs_release_request(req); } @@ -408,44 +381,6 @@ } /* - * Insert a write request into a sorted list - */ -void nfs_list_add_request(struct nfs_page *req, struct list_head *head) -{ - struct list_head *prev; - - if (!list_empty(&req->wb_list)) { - printk(KERN_ERR "NFS: Add to list failed!\n"); - return; - } - if (!NFS_WBACK_BUSY(req)) - printk(KERN_ERR "NFS: unlocked request attempted added to list!\n"); - prev = head->prev; - while (prev != head) { - struct nfs_page *p = nfs_list_entry(prev); - if (page_index(p->wb_page) < page_index(req->wb_page)) - break; - prev = prev->prev; - } - list_add(&req->wb_list, prev); - req->wb_list_head = head; -} - -/* - * Insert a write request into an inode - */ -void nfs_list_remove_request(struct nfs_page *req) -{ - if (list_empty(&req->wb_list)) - return; - if (!NFS_WBACK_BUSY(req)) - printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n"); - list_del(&req->wb_list); - INIT_LIST_HEAD(&req->wb_list); - req->wb_list_head = NULL; -} - -/* * Add a request to the inode's dirty list. */ static inline void @@ -454,16 +389,11 @@ struct inode *inode = req->wb_inode; spin_lock(&nfs_wreq_lock); - if (list_empty(&req->wb_list)) { - nfs_list_add_request(req, &inode->u.nfs_i.dirty); - inode->u.nfs_i.ndirty++; - } + nfs_list_add_request(req, &inode->u.nfs_i.dirty); + inode->u.nfs_i.ndirty++; + __nfs_del_lru(req); + __nfs_add_lru(&NFS_SERVER(inode)->lru_dirty, req); spin_unlock(&nfs_wreq_lock); - /* - * NB: the call to inode_schedule_scan() must lie outside the - * spinlock since it can run flushd(). - */ - inode_schedule_scan(inode, req->wb_timeout); mark_inode_dirty(inode); } @@ -487,165 +417,16 @@ struct inode *inode = req->wb_inode; spin_lock(&nfs_wreq_lock); - if (list_empty(&req->wb_list)) { - nfs_list_add_request(req, &inode->u.nfs_i.commit); - inode->u.nfs_i.ncommit++; - } + nfs_list_add_request(req, &inode->u.nfs_i.commit); + inode->u.nfs_i.ncommit++; + __nfs_del_lru(req); + __nfs_add_lru(&NFS_SERVER(inode)->lru_commit, req); spin_unlock(&nfs_wreq_lock); - /* - * NB: the call to inode_schedule_scan() must lie outside the - * spinlock since it can run flushd(). - */ - inode_schedule_scan(inode, req->wb_timeout); mark_inode_dirty(inode); } #endif /* - * Create a write request. - * Page must be locked by the caller. This makes sure we never create - * two different requests for the same page, and avoids possible deadlock - * when we reach the hard limit on the number of dirty pages. - * It should be safe to sleep here. - */ -struct nfs_page *nfs_create_request(struct file *file, struct inode *inode, - struct page *page, - unsigned int offset, unsigned int count) -{ - struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); - struct nfs_page *req = NULL; - long timeout; - - /* Deal with hard/soft limits. - */ - do { - /* If we're over the global soft limit, wake up all requests */ - if (atomic_read(&nfs_nr_requests) >= MAX_REQUEST_SOFT) { - dprintk("NFS: hit soft limit (%d requests)\n", - atomic_read(&nfs_nr_requests)); - if (!cache->task) - nfs_reqlist_init(NFS_SERVER(inode)); - nfs_wake_flushd(); - } - - /* If we haven't reached the local hard limit yet, - * try to allocate the request struct */ - if (atomic_read(&cache->nr_requests) < MAX_REQUEST_HARD) { - req = nfs_page_alloc(); - if (req != NULL) - break; - } - - /* We're over the hard limit. Wait for better times */ - dprintk("NFS: create_request sleeping (total %d pid %d)\n", - atomic_read(&cache->nr_requests), current->pid); - - timeout = 1 * HZ; - if (NFS_SERVER(inode)->flags & NFS_MOUNT_INTR) { - interruptible_sleep_on_timeout(&cache->request_wait, - timeout); - if (signalled()) - break; - } else - sleep_on_timeout(&cache->request_wait, timeout); - - dprintk("NFS: create_request waking up (tot %d pid %d)\n", - atomic_read(&cache->nr_requests), current->pid); - } while (!req); - if (!req) - return NULL; - - /* Initialize the request struct. Initially, we assume a - * long write-back delay. This will be adjusted in - * update_nfs_request below if the region is not locked. */ - req->wb_page = page; - page_cache_get(page); - req->wb_offset = offset; - req->wb_bytes = count; - req->wb_file = file; - - /* If we have a struct file, use its cached credentials */ - if (file) { - get_file(file); - req->wb_cred = nfs_file_cred(file); - } - req->wb_inode = inode; - req->wb_count = 1; - - /* register request's existence */ - atomic_inc(&cache->nr_requests); - atomic_inc(&nfs_nr_requests); - return req; -} - - -/* - * Release all resources associated with a write request after it - * has been committed to stable storage - * - * Note: Should always be called with the spinlock held! - */ -void -nfs_release_request(struct nfs_page *req) -{ - struct inode *inode = req->wb_inode; - struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); - struct page *page = req->wb_page; - - spin_lock(&nfs_wreq_lock); - if (--req->wb_count) { - spin_unlock(&nfs_wreq_lock); - return; - } - spin_unlock(&nfs_wreq_lock); - - if (!list_empty(&req->wb_list)) { - printk(KERN_ERR "NFS: Request released while still on a list!\n"); - nfs_list_remove_request(req); - } - if (!list_empty(&req->wb_hash)) { - printk(KERN_ERR "NFS: Request released while still hashed!\n"); - nfs_inode_remove_request(req); - } - if (NFS_WBACK_BUSY(req)) - printk(KERN_ERR "NFS: Request released while still locked!\n"); - - /* Release struct file or cached credential */ - if (req->wb_file) - fput(req->wb_file); - else if (req->wb_cred) - put_rpccred(req->wb_cred); - page_cache_release(page); - nfs_page_free(req); - /* wake up anyone waiting to allocate a request */ - atomic_dec(&cache->nr_requests); - atomic_dec(&nfs_nr_requests); - wake_up(&cache->request_wait); -#ifdef NFS_PARANOIA - if (atomic_read(&cache->nr_requests) < 0) - BUG(); - if (atomic_read(&nfs_nr_requests) < 0) - BUG(); -#endif -} - -/* - * Wait for a request to complete. - * - * Interruptible by signals only if mounted with intr flag. - */ -static int -nfs_wait_on_request(struct nfs_page *req) -{ - struct inode *inode = req->wb_inode; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - - if (!NFS_WBACK_BUSY(req)) - return 0; - return nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req)); -} - -/* * Wait for a request to complete. * * Interruptible by signals only if mounted with intr flag. @@ -695,155 +476,152 @@ return res; } -/* - * Scan cluster for dirty pages and send as many of them to the - * server as possible. +/** + * nfs_scan_lru_dirty_timeout - Scan LRU list for timed out dirty requests + * @server: NFS superblock data + * @dst: destination list + * + * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list. + * The elements are checked to ensure that they form a contiguous set + * of pages, and that they originated from the same file. */ -int nfs_scan_list_timeout(struct list_head *head, struct list_head *dst, struct inode *inode) +int +nfs_scan_lru_dirty_timeout(struct nfs_server *server, struct list_head *dst) { - struct list_head *p; - struct nfs_page *req; - int pages = 0; + struct inode *inode; + int npages; - p = head->next; - while (p != head) { - req = nfs_list_entry(p); - p = p->next; - if (time_after(req->wb_timeout, jiffies)) { - if (time_after(NFS_NEXTSCAN(inode), req->wb_timeout)) - NFS_NEXTSCAN(inode) = req->wb_timeout; - continue; - } - if (!nfs_lock_request(req)) - continue; - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - pages++; + npages = nfs_scan_lru_timeout(&server->lru_dirty, dst, server->wpages); + if (npages) { + inode = nfs_list_entry(dst->next)->wb_inode; + inode->u.nfs_i.ndirty -= npages; } - return pages; -} - -static int -nfs_scan_dirty_timeout(struct inode *inode, struct list_head *dst) -{ - int pages; - spin_lock(&nfs_wreq_lock); - pages = nfs_scan_list_timeout(&inode->u.nfs_i.dirty, dst, inode); - inode->u.nfs_i.ndirty -= pages; - if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); - spin_unlock(&nfs_wreq_lock); - return pages; + return npages; } -#ifdef CONFIG_NFS_V3 -static int -nfs_scan_commit_timeout(struct inode *inode, struct list_head *dst) -{ - int pages; - spin_lock(&nfs_wreq_lock); - pages = nfs_scan_list_timeout(&inode->u.nfs_i.commit, dst, inode); - inode->u.nfs_i.ncommit -= pages; - if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); - spin_unlock(&nfs_wreq_lock); - return pages; -} -#endif - -int nfs_scan_list(struct list_head *src, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) +/** + * nfs_scan_lru_dirty - Scan LRU list for dirty requests + * @server: NFS superblock data + * @dst: destination list + * + * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list. + * The elements are checked to ensure that they form a contiguous set + * of pages, and that they originated from the same file. + */ +int +nfs_scan_lru_dirty(struct nfs_server *server, struct list_head *dst) { - struct list_head *p; - struct nfs_page *req; - unsigned long idx_end; - int res; - - res = 0; - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - p = src->next; - while (p != src) { - unsigned long pg_idx; - - req = nfs_list_entry(p); - p = p->next; - - if (file && req->wb_file != file) - continue; - - pg_idx = page_index(req->wb_page); - if (pg_idx < idx_start || pg_idx > idx_end) - continue; + struct inode *inode; + int npages; - if (!nfs_lock_request(req)) - continue; - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - res++; + npages = nfs_scan_lru(&server->lru_dirty, dst, server->wpages); + if (npages) { + inode = nfs_list_entry(dst->next)->wb_inode; + inode->u.nfs_i.ndirty -= npages; } - return res; + return npages; } +/* + * nfs_scan_dirty - Scan an inode for dirty requests + * @inode: NFS inode to scan + * @dst: destination list + * @file: if set, ensure we match requests from this file + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves requests from the inode's dirty page list. + * The requests are *not* checked to ensure that they form a contiguous set. + */ static int nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) { int res; - spin_lock(&nfs_wreq_lock); res = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, idx_start, npages); inode->u.nfs_i.ndirty -= res; if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); - spin_unlock(&nfs_wreq_lock); return res; } #ifdef CONFIG_NFS_V3 +/** + * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests + * @server: NFS superblock data + * @dst: destination list + * + * Finds the first a timed out request in the NFS commit LRU list and moves it + * to the list dst. If such an element is found, we move all other commit + * requests that apply to the same inode. + * The assumption is that doing everything in a single commit-to-disk is + * the cheaper alternative. + */ +int +nfs_scan_lru_commit_timeout(struct nfs_server *server, struct list_head *dst) +{ + struct inode *inode; + int npages; + + npages = nfs_scan_lru_timeout(&server->lru_commit, dst, 1); + if (npages) { + inode = nfs_list_entry(dst->next)->wb_inode; + npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, NULL, 0, 0); + inode->u.nfs_i.ncommit -= npages; + } + return npages; +} + + +/** + * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests + * @server: NFS superblock data + * @dst: destination list + * + * Finds the first request in the NFS commit LRU list and moves it + * to the list dst. If such an element is found, we move all other commit + * requests that apply to the same inode. + * The assumption is that doing everything in a single commit-to-disk is + * the cheaper alternative. + */ +int +nfs_scan_lru_commit(struct nfs_server *server, struct list_head *dst) +{ + struct inode *inode; + int npages; + + npages = nfs_scan_lru(&server->lru_commit, dst, 1); + if (npages) { + inode = nfs_list_entry(dst->next)->wb_inode; + npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, NULL, 0, 0); + inode->u.nfs_i.ncommit -= npages; + } + return npages; +} + +/* + * nfs_scan_commit - Scan an inode for commit requests + * @inode: NFS inode to scan + * @dst: destination list + * @file: if set, ensure we collect requests from this file only. + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves requests from the inode's 'commit' request list. + * The requests are *not* checked to ensure that they form a contiguous set. + */ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) { int res; - spin_lock(&nfs_wreq_lock); res = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, idx_start, npages); inode->u.nfs_i.ncommit -= res; if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); - spin_unlock(&nfs_wreq_lock); return res; } #endif -int nfs_coalesce_requests(struct list_head *src, struct list_head *dst, unsigned int maxpages) -{ - struct nfs_page *req = NULL; - unsigned int pages = 0; - - while (!list_empty(src)) { - struct nfs_page *prev = req; - - req = nfs_list_entry(src->next); - if (prev) { - if (req->wb_file != prev->wb_file) - break; - if (page_index(req->wb_page) != page_index(prev->wb_page)+1) - break; - - if (req->wb_offset != 0) - break; - } - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - pages++; - if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE) - break; - if (pages >= maxpages) - break; - } - return pages; -} - /* * Try to update any existing write request, or create one if there is none. * In order to match, the request's credentials must match those of @@ -867,7 +645,7 @@ spin_lock(&nfs_wreq_lock); req = _nfs_find_request(inode, page); if (req) { - if (!nfs_lock_request(req)) { + if (!nfs_lock_request_dontget(req)) { int error; spin_unlock(&nfs_wreq_lock); error = nfs_wait_on_request(req); @@ -882,24 +660,18 @@ break; } - req = new; - if (req) { - nfs_lock_request(req); - nfs_inode_add_request(inode, req); + if (new) { + nfs_lock_request_dontget(new); + nfs_inode_add_request(inode, new); spin_unlock(&nfs_wreq_lock); - nfs_mark_request_dirty(req); - break; + nfs_mark_request_dirty(new); + return new; } spin_unlock(&nfs_wreq_lock); - /* - * If we're over the soft limit, flush out old requests - */ - if (inode->u.nfs_i.npages >= MAX_REQUEST_SOFT) - nfs_wb_file(inode, file); new = nfs_create_request(file, inode, page, offset, bytes); - if (!new) - return ERR_PTR(-ENOMEM); + if (IS_ERR(new)) + return new; /* If the region is locked, adjust the timeout */ if (region_locked(inode, new)) new->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY; @@ -919,7 +691,6 @@ || !nfs_dirty_request(req) || offset > rqend || end < req->wb_offset) { nfs_unlock_request(req); - nfs_release_request(req); return ERR_PTR(-EBUSY); } @@ -967,23 +738,12 @@ if (NFS_PROTO(inode)->version == 2) { if (dirty >= NFS_STRATEGY_PAGES * wpages) nfs_flush_file(inode, NULL, 0, 0, 0); - } else { - if (dirty >= wpages) - nfs_flush_file(inode, NULL, 0, 0, 0); - if (inode->u.nfs_i.ncommit > NFS_STRATEGY_PAGES * wpages && - atomic_read(&nfs_nr_requests) > MAX_REQUEST_SOFT) - nfs_commit_file(inode, NULL, 0, 0, 0); - } + } else if (dirty >= wpages) + nfs_flush_file(inode, NULL, 0, 0, 0); #else if (dirty >= NFS_STRATEGY_PAGES * wpages) nfs_flush_file(inode, NULL, 0, 0, 0); #endif - /* - * If we're running out of free requests, flush out everything - * in order to reduce memory useage... - */ - if (inode->u.nfs_i.npages > MAX_REQUEST_SOFT) - nfs_wb_all(inode); } int @@ -1052,16 +812,16 @@ goto done; status = 0; - nfs_unlock_request(req); /* If we wrote past the end of the page. * Call the strategy routine so it can send out a bunch * of requests. */ if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE) { SetPageUptodate(page); + nfs_unlock_request(req); nfs_strategy(inode); - } - nfs_release_request(req); + } else + nfs_unlock_request(req); done: dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)inode->i_size); @@ -1123,6 +883,7 @@ struct rpc_task *task; struct rpc_message msg; int flags, + nfsvers = NFS_PROTO(inode)->version, async = !(how & FLUSH_SYNC), stable = (how & FLUSH_STABLE); sigset_t oldset; @@ -1138,7 +899,9 @@ /* Set up the argument struct */ nfs_write_rpcsetup(head, data); - if (stable) { + if (nfsvers < 3) + data->args.stable = NFS_FILE_SYNC; + else if (stable) { if (!inode->u.nfs_i.ncommit) data->args.stable = NFS_FILE_SYNC; else @@ -1153,7 +916,7 @@ task->tk_release = nfs_writedata_release; #ifdef CONFIG_NFS_V3 - msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE; + msg.rpc_proc = (nfsvers == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE; #else msg.rpc_proc = NFSPROC_WRITE; #endif @@ -1184,14 +947,13 @@ return -ENOMEM; } -static int -nfs_flush_list(struct inode *inode, struct list_head *head, int how) +int +nfs_flush_list(struct list_head *head, int wpages, int how) { LIST_HEAD(one_request); struct nfs_page *req; int error = 0; - unsigned int pages = 0, - wpages = NFS_SERVER(inode)->wpages; + unsigned int pages = 0; while (!list_empty(head)) { pages += nfs_coalesce_requests(head, &one_request, wpages); @@ -1294,7 +1056,7 @@ } #ifdef CONFIG_NFS_V3 - if (resp->verf->committed != NFS_UNSTABLE) { + if (argp->stable != NFS_UNSTABLE || resp->verf->committed == NFS_FILE_SYNC) { nfs_inode_remove_request(req); dprintk(" OK\n"); goto next; @@ -1355,7 +1117,7 @@ /* * Commit dirty pages */ -static int +int nfs_commit_list(struct list_head *head, int how) { struct rpc_message msg; @@ -1464,28 +1226,16 @@ int res, error = 0; + spin_lock(&nfs_wreq_lock); res = nfs_scan_dirty(inode, &head, file, idx_start, npages); + spin_unlock(&nfs_wreq_lock); if (res) - error = nfs_flush_list(inode, &head, how); + error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); if (error < 0) return error; return res; } -int nfs_flush_timeout(struct inode *inode, int how) -{ - LIST_HEAD(head); - int pages, - error = 0; - - pages = nfs_scan_dirty_timeout(inode, &head); - if (pages) - error = nfs_flush_list(inode, &head, how); - if (error < 0) - return error; - return pages; -} - #ifdef CONFIG_NFS_V3 int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages, int how) @@ -1494,29 +1244,15 @@ int res, error = 0; + spin_lock(&nfs_wreq_lock); res = nfs_scan_commit(inode, &head, file, idx_start, npages); + spin_unlock(&nfs_wreq_lock); if (res) error = nfs_commit_list(&head, how); if (error < 0) return error; return res; } - -int nfs_commit_timeout(struct inode *inode, int how) -{ - LIST_HEAD(head); - int pages, - error = 0; - - pages = nfs_scan_commit_timeout(inode, &head); - if (pages) { - pages += nfs_scan_commit(inode, &head, NULL, 0, 0); - error = nfs_commit_list(&head, how); - } - if (error < 0) - return error; - return pages; -} #endif int nfs_sync_file(struct inode *inode, struct file *file, unsigned long idx_start, @@ -1545,15 +1281,8 @@ return error; } -int nfs_init_nfspagecache(void) +int nfs_init_writepagecache(void) { - nfs_page_cachep = kmem_cache_create("nfs_page", - sizeof(struct nfs_page), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (nfs_page_cachep == NULL) - return -ENOMEM; - nfs_wdata_cachep = kmem_cache_create("nfs_write_data", sizeof(struct nfs_write_data), 0, SLAB_HWCACHE_ALIGN, @@ -1564,10 +1293,8 @@ return 0; } -void nfs_destroy_nfspagecache(void) +void nfs_destroy_writepagecache(void) { - if (kmem_cache_destroy(nfs_page_cachep)) - printk(KERN_INFO "nfs_page: not all structures were freed\n"); if (kmem_cache_destroy(nfs_wdata_cachep)) printk(KERN_INFO "nfs_write_data: not all structures were freed\n"); } diff -u --recursive --new-file linux-2.4.13-ping/include/linux/nfs_flushd.h linux-2.4.13-tune/include/linux/nfs_flushd.h --- linux-2.4.13-ping/include/linux/nfs_flushd.h Wed Oct 24 07:00:40 2001 +++ linux-2.4.13-tune/include/linux/nfs_flushd.h Thu Oct 25 13:48:36 2001 @@ -9,11 +9,9 @@ /* * Counters of total number and pending number of requests. - * When the total number of requests exceeds the soft limit, we start - * flushing out requests. If it exceeds the hard limit, we stall until - * it drops again. + * When the total number of requests exceeds the hard limit, we stall + * until it drops again. */ -#define MAX_REQUEST_SOFT 192 #define MAX_REQUEST_HARD 256 /* @@ -36,8 +34,6 @@ extern void nfs_reqlist_free(struct nfs_server *); extern int nfs_reqlist_init(struct nfs_server *); extern void nfs_reqlist_exit(struct nfs_server *); -extern void inode_schedule_scan(struct inode *, unsigned long); -extern void inode_remove_flushd(struct inode *); extern void nfs_wake_flushd(void); /* diff -u --recursive --new-file linux-2.4.13-ping/include/linux/nfs_fs.h linux-2.4.13-tune/include/linux/nfs_fs.h --- linux-2.4.13-ping/include/linux/nfs_fs.h Thu Oct 25 11:54:35 2001 +++ linux-2.4.13-tune/include/linux/nfs_fs.h Thu Oct 25 11:55:26 2001 @@ -215,10 +215,14 @@ */ extern int nfs_sync_file(struct inode *, struct file *, unsigned long, unsigned int, int); extern int nfs_flush_file(struct inode *, struct file *, unsigned long, unsigned int, int); -extern int nfs_flush_timeout(struct inode *, int); +extern int nfs_flush_list(struct list_head *, int, int); +extern int nfs_scan_lru_dirty(struct nfs_server *, struct list_head *); +extern int nfs_scan_lru_dirty_timeout(struct nfs_server *, struct list_head *); #ifdef CONFIG_NFS_V3 extern int nfs_commit_file(struct inode *, struct file *, unsigned long, unsigned int, int); -extern int nfs_commit_timeout(struct inode *, int); +extern int nfs_commit_list(struct list_head *, int); +extern int nfs_scan_lru_commit(struct nfs_server *, struct list_head *); +extern int nfs_scan_lru_commit_timeout(struct nfs_server *, struct list_head *); #endif static inline int @@ -265,7 +269,9 @@ */ extern int nfs_readpage(struct file *, struct page *); extern int nfs_pagein_inode(struct inode *, unsigned long, unsigned int); -extern int nfs_pagein_timeout(struct inode *); +extern int nfs_pagein_list(struct list_head *, int); +extern int nfs_scan_lru_read(struct nfs_server *, struct list_head *); +extern int nfs_scan_lru_read_timeout(struct nfs_server *, struct list_head *); /* * linux/fs/mount_clnt.c diff -u --recursive --new-file linux-2.4.13-ping/include/linux/nfs_fs_sb.h linux-2.4.13-tune/include/linux/nfs_fs_sb.h --- linux-2.4.13-ping/include/linux/nfs_fs_sb.h Thu Oct 25 11:54:35 2001 +++ linux-2.4.13-tune/include/linux/nfs_fs_sb.h Thu Oct 25 13:29:22 2001 @@ -1,6 +1,8 @@ #ifndef _NFS_FS_SB #define _NFS_FS_SB +#include + /* * NFS client parameters stored in the superblock. */ @@ -22,6 +24,10 @@ unsigned int namelen; char * hostname; /* remote hostname */ struct nfs_reqlist * rw_requests; /* async read/write requests */ + struct list_head lru_read, + lru_dirty, + lru_commit, + lru_busy; }; /* diff -u --recursive --new-file linux-2.4.13-ping/include/linux/nfs_page.h linux-2.4.13-tune/include/linux/nfs_page.h --- linux-2.4.13-ping/include/linux/nfs_page.h Wed Oct 24 07:00:45 2001 +++ linux-2.4.13-tune/include/linux/nfs_page.h Thu Nov 1 13:39:57 2001 @@ -23,6 +23,7 @@ struct nfs_page { struct list_head wb_hash, /* Inode */ + wb_lru, /* superblock lru list */ wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ struct file *wb_file; @@ -40,33 +41,39 @@ #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) -extern struct nfs_page *nfs_create_request(struct file *file, - struct inode *inode, - struct page *page, - unsigned int offset, - unsigned int count); +extern struct nfs_page *nfs_create_request(struct file *, struct inode *, + struct page *, + unsigned int, unsigned int); extern void nfs_release_request(struct nfs_page *req); -extern void nfs_list_add_request(struct nfs_page *req, - struct list_head *head); -extern void nfs_list_remove_request(struct nfs_page *req); - -extern int nfs_scan_list_timeout(struct list_head *head, - struct list_head *dst, - struct inode *inode); -extern int nfs_scan_list(struct list_head *src, struct list_head *dst, - struct file *file, unsigned long idx_start, - unsigned int npages); -extern int nfs_coalesce_requests(struct list_head *src, struct list_head *dst, - unsigned int maxpages); +extern void nfs_list_add_request(struct nfs_page *, struct list_head *); + +extern int nfs_scan_lru(struct list_head *, struct list_head *, int); +extern int nfs_scan_lru_timeout(struct list_head *, struct list_head *, int); +extern int nfs_scan_list(struct list_head *, struct list_head *, + struct file *, unsigned long, unsigned int); +extern int nfs_coalesce_requests(struct list_head *, struct list_head *, + unsigned int); +extern int nfs_wait_on_request(struct nfs_page *); extern spinlock_t nfs_wreq_lock; /* + * Lock the page of an asynchronous request without incrementing the wb_count + */ +static inline int +nfs_lock_request_dontget(struct nfs_page *req) +{ + if (test_and_set_bit(PG_BUSY, &req->wb_flags)) + return 0; + return 1; +} + +/* * Lock the page of an asynchronous request */ -static __inline__ int +static inline int nfs_lock_request(struct nfs_page *req) { if (test_and_set_bit(PG_BUSY, &req->wb_flags)) @@ -75,7 +82,7 @@ return 1; } -static __inline__ void +static inline void nfs_unlock_request(struct nfs_page *req) { if (!NFS_WBACK_BUSY(req)) { @@ -86,20 +93,57 @@ clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_clear_bit(); if (waitqueue_active(&req->wb_wait)) - wake_up(&req->wb_wait); + wake_up_all(&req->wb_wait); nfs_release_request(req); } -static __inline__ struct nfs_page * +/** + * nfs_list_remove_request - Remove a request from its wb_list + * @req: request + */ +static inline void +nfs_list_remove_request(struct nfs_page *req) +{ + if (list_empty(&req->wb_list)) + return; + if (!NFS_WBACK_BUSY(req)) { + printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n"); + BUG(); + } + list_del_init(&req->wb_list); + req->wb_list_head = NULL; +} + +static inline struct nfs_page * nfs_list_entry(struct list_head *head) { return list_entry(head, struct nfs_page, wb_list); } -static __inline__ struct nfs_page * +static inline struct nfs_page * nfs_inode_wb_entry(struct list_head *head) { return list_entry(head, struct nfs_page, wb_hash); } +static inline void +__nfs_add_lru(struct list_head *head, struct nfs_page *req) +{ + list_add_tail(&req->wb_lru, head); +} + +static inline void +__nfs_del_lru(struct nfs_page *req) +{ + if (list_empty(&req->wb_lru)) + return; + list_del_init(&req->wb_lru); +} + +static inline struct nfs_page * +nfs_lru_entry(struct list_head *head) +{ + return list_entry(head, struct nfs_page, wb_lru); +} + #endif /* _LINUX_NFS_PAGE_H */