diff -u --recursive --new-file linux-2.4.15-pre9/fs/lockd/clntproc.c linux-2.4.15-jukebox/fs/lockd/clntproc.c --- linux-2.4.15-pre9/fs/lockd/clntproc.c Thu Oct 11 16:52:18 2001 +++ linux-2.4.15-jukebox/fs/lockd/clntproc.c Thu Nov 22 18:27:45 2001 @@ -569,11 +569,15 @@ printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status); die: + lock_kernel(); nlm_release_host(req->a_host); + unlock_kernel(); kfree(req); return; retry_rebind: + lock_kernel(); nlm_rebind_host(req->a_host); + unlock_kernel(); retry_unlock: rpc_restart_call(task); } @@ -650,12 +654,16 @@ } die: + lock_kernel(); nlm_release_host(req->a_host); + unlock_kernel(); kfree(req); return; retry_cancel: + lock_kernel(); nlm_rebind_host(req->a_host); + unlock_kernel(); rpc_restart_call(task); rpc_delay(task, 30 * HZ); } diff -u --recursive --new-file linux-2.4.15-pre9/fs/lockd/svc4proc.c linux-2.4.15-jukebox/fs/lockd/svc4proc.c --- linux-2.4.15-pre9/fs/lockd/svc4proc.c Mon Oct 1 22:45:47 2001 +++ linux-2.4.15-jukebox/fs/lockd/svc4proc.c Thu Nov 22 18:27:45 2001 @@ -17,6 +17,7 @@ #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_CLIENT @@ -499,7 +500,9 @@ dprintk("lockd: %4d callback failed (errno = %d)\n", task->tk_pid, -task->tk_status); } + lock_kernel(); nlm_release_host(call->a_host); + unlock_kernel(); kfree(call); } diff -u --recursive --new-file linux-2.4.15-pre9/fs/lockd/svclock.c linux-2.4.15-jukebox/fs/lockd/svclock.c --- linux-2.4.15-pre9/fs/lockd/svclock.c Thu Oct 11 16:52:18 2001 +++ linux-2.4.15-jukebox/fs/lockd/svclock.c Thu Nov 22 18:27:45 2001 @@ -576,9 +576,10 @@ dprintk("lockd: GRANT_MSG RPC callback\n"); dprintk("callback: looking for cookie %x \n", *(unsigned int *)(call->a_args.cookie.data)); + lock_kernel(); if (!(block = nlmsvc_find_block(&call->a_args.cookie))) { dprintk("lockd: no block for cookie %x\n", *(u32 *)(call->a_args.cookie.data)); - return; + goto out; } /* Technically, we should down the file semaphore here. Since we @@ -599,6 +600,8 @@ block->b_incall = 0; nlm_release_host(call->a_host); + out: + unlock_kernel(); } /* diff -u --recursive --new-file linux-2.4.15-pre9/fs/lockd/svcproc.c linux-2.4.15-jukebox/fs/lockd/svcproc.c --- linux-2.4.15-pre9/fs/lockd/svcproc.c Thu Oct 11 16:52:18 2001 +++ linux-2.4.15-jukebox/fs/lockd/svcproc.c Thu Nov 22 18:27:45 2001 @@ -18,6 +18,7 @@ #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_CLIENT @@ -527,7 +528,9 @@ dprintk("lockd: %4d callback failed (errno = %d)\n", task->tk_pid, -task->tk_status); } + lock_kernel(); nlm_release_host(call->a_host); + unlock_kernel(); kfree(call); } diff -u --recursive --new-file linux-2.4.15-pre9/fs/namei.c linux-2.4.15-jukebox/fs/namei.c --- linux-2.4.15-pre9/fs/namei.c Wed Oct 17 23:46:29 2001 +++ linux-2.4.15-jukebox/fs/namei.c Thu Nov 22 18:06:49 2001 @@ -454,7 +454,7 @@ while (*name=='/') name++; if (!*name) - goto return_base; + goto return_reval; inode = nd->dentry->d_inode; if (current->link_count) @@ -573,7 +573,7 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: - goto return_base; + goto return_reval; } if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { err = nd->dentry->d_op->d_hash(nd->dentry, &this); @@ -624,6 +624,17 @@ nd->last_type = LAST_DOT; else if (this.len == 2 && this.name[1] == '.') nd->last_type = LAST_DOTDOT; +return_reval: + /* + * We bypassed the ordinary revalidation routines, so + * NFS wants to check the cached inode for staleness. + */ + inode = nd->dentry->d_inode; + if (inode && inode->i_op && inode->i_op->check_stale) { + err = inode->i_op->check_stale(inode); + if (err) + break; + } return_base: return 0; out_dput: diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/dir.c linux-2.4.15-jukebox/fs/nfs/dir.c --- linux-2.4.15-pre9/fs/nfs/dir.c Tue Jun 12 20:15:08 2001 +++ linux-2.4.15-jukebox/fs/nfs/dir.c Thu Nov 22 18:20:18 2001 @@ -34,8 +34,11 @@ #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ +static loff_t nfs_dir_llseek(struct file *, loff_t, int); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *); +static int nfs_cached_lookup(struct inode *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); static int nfs_create(struct inode *, struct dentry *, int); static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_rmdir(struct inode *, struct dentry *); @@ -47,6 +50,7 @@ struct inode *, struct dentry *); struct file_operations nfs_dir_operations = { + llseek: nfs_dir_llseek, read: generic_read_dir, readdir: nfs_readdir, open: nfs_open, @@ -66,8 +70,28 @@ permission: nfs_permission, revalidate: nfs_revalidate, setattr: nfs_notify_change, + check_stale: nfs_check_stale, }; +static loff_t nfs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + switch (origin) { + case 1: + if (offset == 0) { + offset = file->f_pos; + break; + } + case 2: + return -EINVAL; + } + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_reada = 0; + file->f_version = ++event; + } + return (offset <= 0) ? 0 : offset; +} + typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int); typedef struct { struct file *file; @@ -108,13 +132,15 @@ error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, buffer, NFS_SERVER(inode)->dtsize, desc->plus); /* We requested READDIRPLUS, but the server doesn't grok it */ - if (desc->plus && error == -ENOTSUPP) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; - desc->plus = 0; - goto again; - } - if (error < 0) + if (error < 0) { + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + desc->plus = 0; + goto again; + } goto error; + } SetPageUptodate(page); kunmap(page); /* Ensure consistent page alignment of the data. @@ -195,7 +221,6 @@ dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); - desc->plus = NFS_USE_READDIRPLUS(inode); page = read_cache_page(&inode->i_data, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { @@ -247,6 +272,24 @@ return res; } +static unsigned int nfs_type2dtype[] = { + DT_UNKNOWN, + DT_REG, + DT_DIR, + DT_BLK, + DT_CHR, + DT_LNK, + DT_SOCK, + DT_UNKNOWN, + DT_FIFO +}; + +static inline +unsigned int nfs_type_to_d_type(enum nfs_ftype type) +{ + return nfs_type2dtype[type]; +} + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -263,11 +306,17 @@ dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); for(;;) { + unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ fileid = nfs_fileid_to_ino_t(entry->ino); + + /* Use readdirplus info */ + if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR)) + d_type = nfs_type_to_d_type(entry->fattr->type); + res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, DT_UNKNOWN); + entry->prev_cookie, fileid, d_type); if (res < 0) break; file->f_pos = desc->target = entry->cookie; @@ -334,7 +383,8 @@ /* Reset read descriptor so it searches the page cache from * the start upon the next call to readdir_search_pagecache() */ desc->page_index = 0; - memset(desc->entry, 0, sizeof(*desc->entry)); + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; out: dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); return status; @@ -353,9 +403,11 @@ nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_entry my_entry; + struct nfs_fh fh; + struct nfs_fattr fattr; long res; - res = nfs_revalidate(dentry); + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res < 0) return res; @@ -366,12 +418,16 @@ * itself. */ memset(desc, 0, sizeof(*desc)); - memset(&my_entry, 0, sizeof(my_entry)); - desc->file = filp; desc->target = filp->f_pos; - desc->entry = &my_entry; desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); + + my_entry.cookie = my_entry.prev_cookie = 0; + my_entry.eof = 0; + my_entry.fh = &fh; + my_entry.fattr = &fattr; + desc->entry = &my_entry; while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -401,6 +457,32 @@ return 0; } +static inline +void nfs_renew_verifier(struct inode *dir, struct dentry *dentry) +{ + u64 mtime = NFS_CACHE_MTIME(dir); + dentry->d_rtime_sec = mtime >> 32; + dentry->d_rtime_nsec = mtime & 0xffffffffUL; +} + +/* + * A check for whether or not the parent directory has changed. + * In the case it has, we assume that the dentries are untrustworthy + * and may need to be looked up again. + */ +static inline +int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +{ + u64 mtime; + if (IS_ROOT(dentry)) + return 1; + if (nfs_revalidate_inode(NFS_SERVER(dir), dir)) + return 0; + mtime = NFS_CACHE_MTIME(dir); + return (dentry->d_rtime_sec == (mtime >> 32)) && + (dentry->d_rtime_nsec == (mtime & 0xffffffffUL)); +} + /* * Whenever an NFS operation succeeds, we know that the dentry * is valid, so we update the revalidation timestamp. @@ -408,50 +490,34 @@ static inline void nfs_renew_times(struct dentry * dentry) { dentry->d_time = jiffies; + nfs_renew_verifier(dentry->d_parent->d_inode, dentry); } -static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags) +static inline +int nfs_lookup_verify_inode(struct inode *inode, int flags) { - struct inode *inode = dentry->d_inode; - unsigned long timeout = NFS_ATTRTIMEO(inode); - + struct nfs_server *server = NFS_SERVER(inode); /* - * If it's the last lookup in a series, we use a stricter - * cache consistency check by looking at the parent mtime. - * - * If it's been modified in the last hour, be really strict. - * (This still means that we can avoid doing unnecessary - * work on directories like /usr/share/bin etc which basically - * never change). + * If we're interested in close-to-open cache consistency, + * then we revalidate the inode upon lookup. */ - if (!(flags & LOOKUP_CONTINUE)) { - long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime; - - if (diff < 15*60) - timeout = 0; - } - - return time_after(jiffies,dentry->d_time + timeout); + if (!(server->flags & NFS_MOUNT_NOCTO) && !(flags & LOOKUP_CONTINUE)) + NFS_CACHEINV(inode); + return nfs_revalidate_inode(server, inode); } /* * We judge how long we want to trust negative * dentries by looking at the parent inode mtime. * - * If mtime is close to present time, we revalidate - * more often. + * If parent mtime has changed, we revalidate, else we wait for a + * period corresponding to the parent's attribute cache timeout value. */ -#define NFS_REVALIDATE_NEGATIVE (1 * HZ) -static inline int nfs_neg_need_reval(struct dentry *dentry) +static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry) { - struct inode *dir = dentry->d_parent->d_inode; - unsigned long timeout = NFS_ATTRTIMEO(dir); - long diff = CURRENT_TIME - dir->i_mtime; - - if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE) - timeout = NFS_REVALIDATE_NEGATIVE; - - return time_after(jiffies, dentry->d_time + timeout); + if (!nfs_check_verifier(dir, dentry)) + return 1; + return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); } /* @@ -462,9 +528,8 @@ * NOTE! The hit can be a negative hit too, don't assume * we have an inode! * - * If the dentry is older than the revalidation interval, - * we do a new lookup and verify that the dentry is still - * correct. + * If the parent directory is seen to have changed, we throw out the + * cached dentry and do a new lookup. */ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) { @@ -477,13 +542,9 @@ lock_kernel(); dir = dentry->d_parent->d_inode; inode = dentry->d_inode; - /* - * If we don't have an inode, let's look at the parent - * directory mtime to get a hint about how often we - * should validate things.. - */ + if (!inode) { - if (nfs_neg_need_reval(dentry)) + if (nfs_neg_need_reval(dir, dentry)) goto out_bad; goto out_valid; } @@ -494,48 +555,49 @@ goto out_bad; } - if (!nfs_dentry_force_reval(dentry, flags)) + /* Force a full look up iff the parent directory has changed */ + if (nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, flags)) + goto out_bad; goto out_valid; + } - if (IS_ROOT(dentry)) { - __nfs_revalidate_inode(NFS_SERVER(inode), inode); + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + goto out_bad; + if (nfs_lookup_verify_inode(inode, flags)) + goto out_bad; goto out_valid_renew; } - /* - * Do a new lookup and check the dentry attributes. - */ + if (NFS_STALE(inode)) + goto out_bad; + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error) goto out_bad; - - /* Inode number matches? */ - if (!(fattr.valid & NFS_ATTR_FATTR) || - NFS_FSID(inode) != fattr.fsid || - NFS_FILEID(inode) != fattr.fileid) + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) goto out_bad; - - /* Ok, remember that we successfully checked it.. */ - nfs_refresh_inode(inode, &fattr); - - if (nfs_inode_is_stale(inode, &fhandle, &fattr)) + if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; out_valid_renew: nfs_renew_times(dentry); -out_valid: + out_valid: unlock_kernel(); return 1; -out_bad: - shrink_dcache_parent(dentry); - /* If we have submounts, don't unhash ! */ - if (have_submounts(dentry)) - goto out_valid; - d_drop(dentry); - /* Purge readdir caches. */ - nfs_zap_caches(dir); - if (inode && S_ISDIR(inode->i_mode)) + out_bad: + NFS_CACHEINV(dir); + if (inode && S_ISDIR(inode->i_mode)) { + /* Purge readdir caches. */ nfs_zap_caches(inode); + /* If we have submounts, don't unhash ! */ + if (have_submounts(dentry)) + goto out_valid; + shrink_dcache_parent(dentry); + } + d_drop(dentry); unlock_kernel(); return 0; } @@ -594,6 +656,20 @@ error = -ENOMEM; dentry->d_op = &nfs_dentry_operations; + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + error = -EACCES; + inode = nfs_fhget(dentry, &fhandle, &fattr); + if (inode) { + if (!(NFS_SERVER(dir)->flags & NFS_MOUNT_NOCTO)) + NFS_CACHEINV(inode); + d_add(dentry, inode); + nfs_renew_times(dentry); + error = 0; + } + goto out; + } + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); inode = NULL; if (error == -ENOENT) @@ -604,14 +680,85 @@ if (inode) { no_entry: d_add(dentry, inode); - nfs_renew_times(dentry); error = 0; } + nfs_renew_times(dentry); } out: return ERR_PTR(error); } +static inline +int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) +{ + struct nfs_entry *entry = desc->entry; + int status; + + while((status = dir_decode(desc)) == 0) { + if (entry->len != dentry->d_name.len) + continue; + if (memcmp(entry->name, dentry->d_name.name, entry->len)) + continue; + if (!(entry->fattr->valid & NFS_ATTR_FATTR)) + continue; + break; + } + return status; +} + +/* + * Use the cached Readdirplus results in order to avoid a LOOKUP call + * whenever we believe that the parent directory has not changed. + * + * We assume that any file creation/rename changes the directory mtime. + * As this results in a page cache invalidation whenever it occurs, + * we don't require any other tests for cache coherency. + */ +static +int nfs_cached_lookup(struct inode *dir, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + nfs_readdir_descriptor_t desc; + struct nfs_server *server; + struct nfs_entry entry; + struct page *page; + int res; + + if (!NFS_USE_READDIRPLUS(dir)) + return -ENOENT; + server = NFS_SERVER(dir); + if (server->flags & NFS_MOUNT_NOAC) + return -ENOENT; + nfs_revalidate_inode(server, dir); + + entry.fh = fh; + entry.fattr = fattr; + + desc.decode = NFS_PROTO(dir)->decode_dirent; + desc.entry = &entry; + desc.page_index = 0; + desc.plus = 1; + + for(;(page = find_get_page(&dir->i_data, desc.page_index)); desc.page_index++) { + + res = -EIO; + if (Page_Uptodate(page)) { + desc.ptr = kmap(page); + res = find_dirent_name(&desc, page, dentry); + kunmap(page); + } + page_cache_release(page); + + if (res == 0) + goto out_found; + if (res != -EAGAIN) + break; + } + return -ENOENT; + out_found: + return 0; +} + /* * Code common to create, mkdir, and mknod. */ diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/file.c linux-2.4.15-jukebox/fs/nfs/file.c --- linux-2.4.15-pre9/fs/nfs/file.c Sun Sep 23 18:48:01 2001 +++ linux-2.4.15-jukebox/fs/nfs/file.c Thu Nov 22 18:27:46 2001 @@ -99,7 +99,9 @@ dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) *ppos); + lock_kernel(); result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + unlock_kernel(); if (!result) result = generic_file_read(file, buf, count, ppos); return result; @@ -115,7 +117,9 @@ dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); + lock_kernel(); status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + unlock_kernel(); if (!status) status = generic_file_mmap(file, vma); return status; @@ -134,13 +138,11 @@ dfprintk(VFS, "nfs: fsync(%x/%ld)\n", inode->i_dev, inode->i_ino); - lock_kernel(); status = nfs_wb_file(inode, file); if (!status) { status = file->f_error; file->f_error = 0; } - unlock_kernel(); return status; } @@ -164,9 +166,7 @@ loff_t pos = ((loff_t)page->index<mapping->host; - lock_kernel(); status = nfs_updatepage(file, page, offset, to-offset); - unlock_kernel(); /* most likely it's already done. CHECKME */ if (pos > inode->i_size) inode->i_size = pos; @@ -224,7 +224,9 @@ result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; + lock_kernel(); result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + unlock_kernel(); if (result) goto out; diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/flushd.c linux-2.4.15-jukebox/fs/nfs/flushd.c --- linux-2.4.15-pre9/fs/nfs/flushd.c Thu Nov 22 14:09:19 2001 +++ linux-2.4.15-jukebox/fs/nfs/flushd.c Thu Nov 22 18:27:46 2001 @@ -51,6 +51,19 @@ * This is the wait queue all cluster daemons sleep on */ static struct rpc_wait_queue flushd_queue = RPC_INIT_WAITQ("nfs_flushd"); +static spinlock_t nfs_flushd_lock = SPIN_LOCK_UNLOCKED; + +static inline void +nfs_lock_flushd(void) +{ + spin_lock(&nfs_flushd_lock); +} + +static inline void +nfs_unlock_flushd(void) +{ + spin_unlock(&nfs_flushd_lock); +} /* * Local function declarations. @@ -67,12 +80,11 @@ dprintk("NFS: writecache_init\n"); - lock_kernel(); - status = -ENOMEM; /* Create the RPC task */ if (!(task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC))) - goto out_unlock; + return -ENOMEM; + nfs_lock_flushd(); cache = server->rw_requests; status = 0; @@ -89,22 +101,21 @@ cache->auth = server->client->cl_auth; task->tk_action = nfs_flushd; task->tk_exit = nfs_flushd_exit; + nfs_unlock_flushd(); rpc_execute(task); - unlock_kernel(); return 0; out_unlock: - if (task) - rpc_release_task(task); - unlock_kernel(); - return status; + nfs_unlock_flushd(); + rpc_release_task(task); + return 0; } void nfs_reqlist_exit(struct nfs_server *server) { struct nfs_reqlist *cache; - lock_kernel(); + nfs_lock_flushd(); cache = server->rw_requests; if (!cache) goto out; @@ -114,11 +125,13 @@ while (cache->task) { rpc_exit(cache->task, 0); rpc_wake_up_task(cache->task); + nfs_unlock_flushd(); interruptible_sleep_on_timeout(&cache->request_wait, 1 * HZ); + nfs_lock_flushd(); } out: - unlock_kernel(); + nfs_unlock_flushd(); } int nfs_reqlist_alloc(struct nfs_server *server) @@ -183,11 +196,13 @@ } dprintk("NFS: %4d flushd back to sleep\n", task->tk_pid); + nfs_lock_flushd(); if (task->tk_action) { task->tk_timeout = NFS_FLUSHD_TIMEOUT; cache->runat = jiffies + task->tk_timeout; rpc_sleep_on(&flushd_queue, task, NULL, NULL); } + nfs_unlock_flushd(); } static void @@ -196,10 +211,13 @@ struct nfs_server *server; struct nfs_reqlist *cache; server = (struct nfs_server *) task->tk_calldata; + + nfs_lock_flushd(); cache = server->rw_requests; if (cache->task == task) cache->task = NULL; wake_up(&cache->request_wait); + nfs_unlock_flushd(); } diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/inode.c linux-2.4.15-jukebox/fs/nfs/inode.c --- linux-2.4.15-pre9/fs/nfs/inode.c Thu Nov 22 14:09:19 2001 +++ linux-2.4.15-jukebox/fs/nfs/inode.c Thu Nov 22 20:51:55 2001 @@ -83,6 +83,9 @@ &nfs_rpcstat, }; +/* Spinlock to protect the NFS inode update */ +static spinlock_t nfs_inode_lock = SPIN_LOCK_UNLOCKED; + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -255,6 +258,69 @@ } /* + * Set up the NFS superblock private area using probed values + */ +static int +nfs_setup_superblock(struct super_block *sb, struct nfs_fh *rootfh) +{ + struct nfs_server *server = &sb->u.nfs_sb.s_server; + struct nfs_fattr fattr; + struct nfs_fsinfo fsinfo = { &fattr, }; + struct nfs_pathconf pathinfo = { &fattr, }; + int maxlen, res; + + res = server->rpc_ops->fsinfo(server, rootfh, &fsinfo); + if (res < 0) + return res; + + /* Work out a lot of parameters */ + if (!server->rsize) + server->rsize = nfs_block_size(fsinfo.rtpref, NULL); + if (!server->wsize) + server->wsize = nfs_block_size(fsinfo.wtpref, NULL); + + /* NFSv3: we don't have bsize, but rather rtmult and wtmult... */ + if (!fsinfo.wtmult) + fsinfo.wtmult = 512; + sb->s_blocksize = nfs_block_bits(fsinfo.wtmult, &sb->s_blocksize_bits); + + if (server->rsize > fsinfo.rtmax) + server->rsize = fsinfo.rtmax; + if (server->wsize > fsinfo.wtmax) + server->wsize = fsinfo.wtmax; + + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (server->rpages > NFS_READ_MAXIOV) { + server->rpages = NFS_READ_MAXIOV; + server->rsize = server->rpages << PAGE_CACHE_SHIFT; + } + + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (server->wpages > NFS_WRITE_MAXIOV) { + server->wpages = NFS_WRITE_MAXIOV; + server->wsize = server->wpages << PAGE_CACHE_SHIFT; + } + + server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + maxlen = (server->rpc_ops->version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN; + if (!server->namelen) { + res = server->rpc_ops->pathconf(server, rootfh, &pathinfo); + if (!res) + server->namelen = pathinfo.name_max; + } + if (!server->namelen || server->namelen > maxlen) + server->namelen = maxlen; + + sb->s_maxbytes = fsinfo.maxfilesize; + return 0; +} + +/* * The way this works is that the mount process passes a structure * in the data argument which contains the server's IP address * and the root file handle obtained from the server's mount @@ -272,8 +338,7 @@ unsigned int authflavor; struct sockaddr_in srvaddr; struct rpc_timeout timeparms; - struct nfs_fsinfo fsinfo; - int tcp, version, maxlen; + int tcp, version; memset(&sb->u.nfs_sb, 0, sizeof(sb->u.nfs_sb)); if (!data) @@ -302,11 +367,11 @@ sb->s_magic = NFS_SUPER_MAGIC; sb->s_op = &nfs_sops; - sb->s_blocksize_bits = 0; - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); server = &sb->u.nfs_sb.s_server; - server->rsize = nfs_block_size(data->rsize, NULL); - server->wsize = nfs_block_size(data->wsize, NULL); + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); server->flags = data->flags & NFS_MOUNT_FLAGMASK; if (data->flags & NFS_MOUNT_NOAC) { @@ -336,6 +401,7 @@ #ifdef CONFIG_NFS_V3 server->rpc_ops = &nfs_v3_clientops; version = 3; + server->caps |= NFS_CAP_READDIRPLUS; if (data->version < 4) { printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); goto out_unlock; @@ -413,61 +479,11 @@ sb->s_root->d_op = &nfs_dentry_operations; /* Get some general file system info */ - if (server->rpc_ops->statfs(server, root, &fsinfo) >= 0) { - if (server->namelen == 0) - server->namelen = fsinfo.namelen; - } else { + if (nfs_setup_superblock(sb, root) < 0) { printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n"); goto out_no_root; } - /* Work out a lot of parameters */ - if (data->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (data->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - /* NFSv3: we don't have bsize, but rather rtmult and wtmult... */ - if (!fsinfo.bsize) - fsinfo.bsize = (fsinfo.rtmult>fsinfo.wtmult) ? fsinfo.rtmult : fsinfo.wtmult; - /* Also make sure we don't go below rsize/wsize since - * RPC calls are expensive */ - if (fsinfo.bsize < server->rsize) - fsinfo.bsize = server->rsize; - if (fsinfo.bsize < server->wsize) - fsinfo.bsize = server->wsize; - - if (data->bsize == 0) - sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits); - if (server->rsize > fsinfo.rtmax) - server->rsize = fsinfo.rtmax; - if (server->wsize > fsinfo.wtmax) - server->wsize = fsinfo.wtmax; - - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->rpages > NFS_READ_MAXIOV) { - server->rpages = NFS_READ_MAXIOV; - server->rsize = server->rpages << PAGE_CACHE_SHIFT; - } - - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->wpages > NFS_WRITE_MAXIOV) { - server->wpages = NFS_WRITE_MAXIOV; - server->wsize = server->wpages << PAGE_CACHE_SHIFT; - } - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - maxlen = (version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN; - - if (server->namelen == 0 || server->namelen > maxlen) - server->namelen = maxlen; - - sb->s_maxbytes = fsinfo.maxfilesize; - /* Fire up the writeback cache */ if (nfs_reqlist_alloc(server) < 0) { printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n"); @@ -527,7 +543,8 @@ struct nfs_server *server = &sb->u.nfs_sb.s_server; unsigned char blockbits; unsigned long blockres; - struct nfs_fsinfo res; + struct nfs_fattr attr; + struct nfs_fsstat res = { &attr, }; int error; error = server->rpc_ops->statfs(server, NFS_FH(sb->s_root->d_inode), &res); @@ -535,18 +552,15 @@ if (error < 0) goto out_err; - if (res.bsize == 0) - res.bsize = sb->s_blocksize; - buf->f_bsize = nfs_block_bits(res.bsize, &blockbits); + buf->f_bsize = sb->s_blocksize; + blockbits = sb->s_blocksize_bits; blockres = (1 << blockbits) - 1; buf->f_blocks = (res.tbytes + blockres) >> blockbits; buf->f_bfree = (res.fbytes + blockres) >> blockbits; buf->f_bavail = (res.abytes + blockres) >> blockbits; buf->f_files = res.tfiles; buf->f_ffree = res.afiles; - if (res.namelen == 0 || res.namelen > server->namelen) - res.namelen = server->namelen; - buf->f_namelen = res.namelen; + buf->f_namelen = server->namelen; return 0; out_err: printk("nfs_statfs: statfs error = %d\n", -error); @@ -555,18 +569,30 @@ } /* + * Reset the read time on the local caches + */ +void +nfs_invalidate_caches(struct inode *inode) +{ + spin_lock(&nfs_inode_lock); + NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; + spin_unlock(&nfs_inode_lock); +} + +/* * Invalidate the local caches */ void nfs_zap_caches(struct inode *inode) { - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; - invalidate_inode_pages(inode); + spin_lock(&nfs_inode_lock); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - NFS_CACHEINV(inode); + NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; + spin_unlock(&nfs_inode_lock); } /* @@ -582,50 +608,49 @@ nfs_zap_caches(inode); } +/* Don't use READDIRPLUS on directories that we believe are too large */ +#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) + /* * Fill in inode information from the fattr. */ static void nfs_fill_inode(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) { - /* - * Check whether the mode has been set, as we only want to - * do this once. (We don't allow inodes to change types.) + NFS_FILEID(inode) = fattr->fileid; + NFS_FSID(inode) = fattr->fsid; + inode->i_mode = fattr->mode; + /* Why so? Because we want revalidate for devices/FIFOs, and + * that's precisely what we have in nfs_file_inode_operations. */ - if (inode->i_mode == 0) { - NFS_FILEID(inode) = fattr->fileid; - NFS_FSID(inode) = fattr->fsid; - inode->i_mode = fattr->mode; - /* Why so? Because we want revalidate for devices/FIFOs, and - * that's precisely what we have in nfs_file_inode_operations. - */ - inode->i_op = &nfs_file_inode_operations; - if (S_ISREG(inode->i_mode)) { - inode->i_fop = &nfs_file_operations; - inode->i_data.a_ops = &nfs_file_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; - inode->i_fop = &nfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) - inode->i_op = &nfs_symlink_inode_operations; - else - init_special_inode(inode, inode->i_mode, fattr->rdev); - /* - * Preset the size and mtime, as there's no need - * to invalidate the caches. - */ - inode->i_size = nfs_size_to_loff_t(fattr->size); - inode->i_mtime = nfs_time_to_secs(fattr->mtime); - inode->i_atime = nfs_time_to_secs(fattr->atime); - inode->i_ctime = nfs_time_to_secs(fattr->ctime); - NFS_CACHE_CTIME(inode) = fattr->ctime; - NFS_CACHE_MTIME(inode) = fattr->mtime; - NFS_CACHE_ISIZE(inode) = fattr->size; - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; - memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); - } - nfs_refresh_inode(inode, fattr); + inode->i_op = &nfs_file_inode_operations; + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &nfs_file_operations; + inode->i_data.a_ops = &nfs_file_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &nfs_dir_inode_operations; + inode->i_fop = &nfs_dir_operations; + if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) + && fattr->size <= NFS_LIMIT_READDIRPLUS) + NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + } else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else + init_special_inode(inode, inode->i_mode, fattr->rdev); + /* + * Preset the size and mtime, as there's no need + * to invalidate the caches. + */ + inode->i_size = nfs_size_to_loff_t(fattr->size); + inode->i_mtime = nfs_time_to_secs(fattr->mtime); + inode->i_atime = nfs_time_to_secs(fattr->atime); + inode->i_ctime = nfs_time_to_secs(fattr->ctime); + NFS_CACHE_CTIME(inode) = fattr->ctime; + NFS_CACHE_MTIME(inode) = fattr->mtime; + NFS_CACHE_ISIZE(inode) = fattr->size; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); } struct nfs_find_desc { @@ -655,27 +680,6 @@ return 1; } -int -nfs_inode_is_stale(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) -{ - /* Empty inodes are not stale */ - if (!inode->i_mode) - return 0; - - if ((fattr->mode & S_IFMT) != (inode->i_mode & S_IFMT)) - return 1; - - if (is_bad_inode(inode) || NFS_STALE(inode)) - return 1; - - /* Has the filehandle changed? If so is the old one stale? */ - if (memcmp(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)) != 0 && - __nfs_revalidate_inode(NFS_SERVER(inode),inode) == -ESTALE) - return 1; - - return 0; -} - /* * This is our own version of iget that looks up inodes by file handle * instead of inode number. We use this technique instead of using @@ -718,7 +722,19 @@ if (!(inode = iget4(sb, ino, nfs_find_actor, &desc))) goto out_no_inode; - nfs_fill_inode(inode, fh, fattr); + /* + * Check whether the mode has been set, as we only want to + * do this once. (We don't allow inodes to change types.) + */ + if (inode->i_mode == 0) { + nfs_fill_inode(inode, fh, fattr); + nfs_refresh_inode(inode, fattr); + + /* We don't trust READDIRPLUS attributes */ + if (fattr->valid & NFS_ATTR_RDPLUS) + NFS_CACHEINV(inode); + } else if (!(fattr->valid & NFS_ATTR_RDPLUS)) + nfs_refresh_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%x/%Ld ct=%d)\n", inode->i_dev, (long long)NFS_FILEID(inode), atomic_read(&inode->i_count)); @@ -741,7 +757,7 @@ /* * Make sure the inode is up-to-date. */ - error = nfs_revalidate(dentry); + error = nfs_revalidate_inode(NFS_SERVER(inode),inode); if (error) { #ifdef NFS_PARANOIA printk("nfs_notify_change: revalidate failed, error=%d\n", error); @@ -809,7 +825,26 @@ nfs_revalidate(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - return nfs_revalidate_inode(NFS_SERVER(inode), inode); + int status; + lock_kernel(); + status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + unlock_kernel(); + return status; +} + +/* + * Another revalidation function: This one checks inodes for staleness + * when we've bypassed the ordinary dcache revalidation routines. + * e.g. open(".") + */ +int +nfs_check_stale(struct inode *inode) +{ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)) + NFS_CACHEINV(inode); + if (NFS_STALE(inode)) + return -ESTALE; + return 0; } /* @@ -838,13 +873,11 @@ struct rpc_auth *auth; struct rpc_cred *cred; - lock_kernel(); auth = NFS_CLIENT(inode)->cl_auth; cred = rpcauth_lookupcred(auth, 0); filp->private_data = cred; if (filp->f_mode & FMODE_WRITE) nfs_set_mmcred(inode, cred); - unlock_kernel(); return 0; } @@ -852,11 +885,9 @@ { struct rpc_cred *cred; - lock_kernel(); cred = nfs_file_cred(filp); if (cred) put_rpccred(cred); - unlock_kernel(); return 0; } @@ -873,7 +904,6 @@ dfprintk(PAGECACHE, "NFS: revalidating (%x/%Ld)\n", inode->i_dev, (long long)NFS_FILEID(inode)); - lock_kernel(); if (!inode || is_bad_inode(inode)) goto out_nowait; if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) @@ -916,7 +946,6 @@ NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; wake_up(&inode->i_wait); out_nowait: - unlock_kernel(); return status; } @@ -962,6 +991,8 @@ new_size = fattr->size; new_isize = nfs_size_to_loff_t(fattr->size); + spin_lock(&nfs_inode_lock); + /* * Update the read time so we don't revalidate too often. */ @@ -1044,6 +1075,7 @@ NFS_ATTRTIMEO(inode) = NFS_MAXATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; } + spin_unlock(&nfs_inode_lock); if (invalid) nfs_zap_caches(inode); diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/nfs2xdr.c linux-2.4.15-jukebox/fs/nfs/nfs2xdr.c --- linux-2.4.15-pre9/fs/nfs/nfs2xdr.c Sat Nov 3 02:40:09 2001 +++ linux-2.4.15-jukebox/fs/nfs/nfs2xdr.c Thu Nov 22 16:30:18 2001 @@ -419,7 +419,7 @@ bufsiz = bufsiz >> 2; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); + *p++ = htonl(args->cookie & 0xFFFFFFFF); *p++ = htonl(bufsiz); /* see above */ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); @@ -506,7 +506,7 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - entry->cookie = ntohl(*p++); + entry->cookie = (s64)((off_t)ntohl(*p++)); entry->eof = !p[0] && p[1]; return p; @@ -631,36 +631,18 @@ * Decode STATFS reply */ static int -nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_statfs *res) { int status; - u32 xfer_size; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); - /* For NFSv2, we more or less have to guess the preferred - * read/write/readdir sizes from the single 'transfer size' - * value. - */ - xfer_size = ntohl(*p++); /* tsize */ - res->rtmax = 8 * 1024; - res->rtpref = xfer_size; - res->rtmult = xfer_size; - res->wtmax = 8 * 1024; - res->wtpref = xfer_size; - res->wtmult = xfer_size; - res->dtpref = PAGE_CACHE_SIZE; - res->maxfilesize = 0x7FFFFFFF; /* just a guess */ + res->tsize = ntohl(*p++); res->bsize = ntohl(*p++); - - res->tbytes = ntohl(*p++) * res->bsize; - res->fbytes = ntohl(*p++) * res->bsize; - res->abytes = ntohl(*p++) * res->bsize; - res->tfiles = 0; - res->ffiles = 0; - res->afiles = 0; - res->namelen = 0; + res->blocks = ntohl(*p++); + res->bfree = ntohl(*p++); + res->bavail = ntohl(*p++); return 0; } diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/nfs3proc.c linux-2.4.15-jukebox/fs/nfs/nfs3proc.c --- linux-2.4.15-pre9/fs/nfs/nfs3proc.c Mon Oct 1 22:45:37 2001 +++ linux-2.4.15-jukebox/fs/nfs/nfs3proc.c Thu Nov 22 18:06:49 2001 @@ -80,7 +80,8 @@ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, fhandle, fattr, 0); dprintk("NFS reply lookup: %d\n", status); - nfs_refresh_inode(dir, &dir_attr); + if (status >= 0) + status = nfs_refresh_inode(dir, &dir_attr); return status; } @@ -462,24 +463,42 @@ return status; } -/* - * This is a combo call of fsstat and fsinfo - */ static int nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsstat *stat) { int status; - dprintk("NFS call fsstat\n"); - memset((char *)info, 0, sizeof(*info)); - status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, info, 0); - if (status < 0) - goto error; + stat->fattr->valid = 0; + dprintk("NFS call statfs\n"); + status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); + dprintk("NFS reply statfs: %d\n", status); + return status; +} + +static int +nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + info->fattr->valid = 0; + dprintk("NFS call fsinfo\n"); status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0); + dprintk("NFS reply fsinfo: %d\n", status); + return status; +} -error: - dprintk("NFS reply statfs: %d\n", status); +static int +nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + int status; + + info->fattr->valid = 0; + dprintk("NFS call pathconf\n"); + status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); + dprintk("NFS reply pathconf: %d\n", status); return status; } @@ -508,5 +527,7 @@ nfs3_proc_readdir, nfs3_proc_mknod, nfs3_proc_statfs, + nfs3_proc_fsinfo, + nfs3_proc_pathconf, nfs3_decode_dirent, }; diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/nfs3xdr.c linux-2.4.15-jukebox/fs/nfs/nfs3xdr.c --- linux-2.4.15-pre9/fs/nfs/nfs3xdr.c Sat Nov 3 02:40:09 2001 +++ linux-2.4.15-jukebox/fs/nfs/nfs3xdr.c Thu Nov 22 18:20:18 2001 @@ -523,6 +523,13 @@ return 0; } +/* Hack to sign-extending 32-bit cookies */ +static inline +u64 nfs_transform_cookie64(u64 cookie) +{ + return (cookie & 0x80000000) ? (cookie ^ 0xFFFFFFFF00000000) : cookie; +} + /* * Encode arguments to readdir call */ @@ -533,7 +540,7 @@ int buflen, replen; p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->cookie); + p = xdr_encode_hyper(p, nfs_transform_cookie64(args->cookie)); *p++ = args->verf[0]; *p++ = args->verf[1]; if (args->plus) { @@ -644,6 +651,7 @@ nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) { struct nfs_entry old = *entry; + u64 cookie; if (!*p++) { if (!*p) @@ -657,24 +665,25 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + p = xdr_decode_hyper(p, &cookie); + entry->cookie = nfs_transform_cookie64(cookie); if (plus) { - p = xdr_decode_post_op_attr(p, &entry->fattr); + entry->fattr->valid = 0; + p = xdr_decode_post_op_attr(p, entry->fattr); + if (entry->fattr->valid != 0) + entry->fattr->valid |= NFS_ATTR_RDPLUS; /* In fact, a post_op_fh3: */ if (*p++) { - p = xdr_decode_fhandle(p, &entry->fh); + p = xdr_decode_fhandle(p, entry->fh); /* Ugh -- server reply was truncated */ if (p == NULL) { dprintk("NFS: FH truncated\n"); *entry = old; return ERR_PTR(-EAGAIN); } - } else { - /* If we don't get a file handle, the attrs - * aren't worth a lot. */ - entry->fattr.valid = 0; - } + } else + memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); } entry->eof = !p[0] && p[1]; @@ -958,14 +967,13 @@ * Decode FSSTAT reply */ static int -nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); @@ -975,8 +983,7 @@ p = xdr_decode_hyper(p, &res->tfiles); p = xdr_decode_hyper(p, &res->ffiles); p = xdr_decode_hyper(p, &res->afiles); - - /* ignore invarsec */ + res->invarsec = ntohl(*p++); return 0; } @@ -986,12 +993,11 @@ static int nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); @@ -1003,8 +1009,8 @@ res->wtmult = ntohl(*p++); res->dtpref = ntohl(*p++); p = xdr_decode_hyper(p, &res->maxfilesize); - - /* ignore time_delta and properties */ + p = xdr_decode_time3(p, &res->time_delta); + res->properties = ntohl(*p++); return 0; } @@ -1012,20 +1018,21 @@ * Decode PATHCONF reply */ static int -nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); res->linkmax = ntohl(*p++); - res->namelen = ntohl(*p++); - - /* ignore remaining fields */ + res->name_max = ntohl(*p++); + res->no_trunc = ntohl(*p++) != 0; + res->chown_restricted = ntohl(*p++) != 0; + res->case_insensitive = ntohl(*p++) != 0; + res->case_preserving = ntohl(*p++) != 0; return 0; } diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/pagelist.c linux-2.4.15-jukebox/fs/nfs/pagelist.c --- linux-2.4.15-pre9/fs/nfs/pagelist.c Thu Nov 22 14:09:19 2001 +++ linux-2.4.15-jukebox/fs/nfs/pagelist.c Thu Nov 22 18:21:20 2001 @@ -187,7 +187,7 @@ BUG(); } #endif - for (pos = head->prev; pos != head; pos = pos->prev) { + list_for_each_prev(pos, head) { struct nfs_page *p = nfs_list_entry(pos); if (page_index(p->wb_page) < pg_idx) break; diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/proc.c linux-2.4.15-jukebox/fs/nfs/proc.c --- linux-2.4.15-pre9/fs/nfs/proc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.15-jukebox/fs/nfs/proc.c Thu Nov 22 18:46:54 2001 @@ -361,17 +361,62 @@ static int nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsstat *stat) { int status; + struct nfs2_statfs fsinfo; - dprintk("NFS call statfs\n"); - memset((char *)info, 0, sizeof(*info)); - status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0); + stat->fattr->valid = 0; + dprintk("NFS call statfs\n"); + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); dprintk("NFS reply statfs: %d\n", status); + if (status) + goto out; + stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize; + stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize; + stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize; + stat->tfiles = 0; + stat->ffiles = 0; + stat->afiles = 0; + stat->invarsec = 0; + out: return status; } +static int +nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + struct nfs2_statfs fsinfo; + + info->fattr->valid = 0; + dprintk("NFS call fsinfo\n"); + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + dprintk("NFS reply fsinfo: %d\n", status); + if (status) + goto out; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->time_delta = 0; + info->properties = 0x1b; + out: + return status; +} + +static int +nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + return -ENOTSUPP; +} + extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); struct nfs_rpc_ops nfs_v2_clientops = { @@ -397,5 +442,7 @@ nfs_proc_readdir, nfs_proc_mknod, nfs_proc_statfs, + nfs_proc_fsinfo, + nfs_proc_pathconf, nfs_decode_dirent, }; diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/read.c linux-2.4.15-jukebox/fs/nfs/read.c --- linux-2.4.15-pre9/fs/nfs/read.c Thu Nov 22 14:09:19 2001 +++ linux-2.4.15-jukebox/fs/nfs/read.c Thu Nov 22 18:27:46 2001 @@ -113,11 +113,9 @@ inode->i_dev, (long long)NFS_FILEID(inode), (long long)offset, rsize, buffer); - lock_kernel(); result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags, offset, rsize, buffer, &eof); nfs_refresh_inode(inode, &fattr); - unlock_kernel(); /* * Even if we had a partial success we can't mark the page @@ -272,9 +270,7 @@ rpc_clnt_sigmask(clnt, &oldset); rpc_call_setup(task, &msg, 0); - lock_kernel(); rpc_execute(task); - unlock_kernel(); rpc_clnt_sigunmask(clnt, &oldset); return 0; out_bad: diff -u --recursive --new-file linux-2.4.15-pre9/fs/nfs/write.c linux-2.4.15-jukebox/fs/nfs/write.c --- linux-2.4.15-pre9/fs/nfs/write.c Thu Nov 22 14:09:19 2001 +++ linux-2.4.15-jukebox/fs/nfs/write.c Thu Nov 22 18:27:46 2001 @@ -125,16 +125,18 @@ * under NFSv2 when the NFSv3 attribute patch is included. * For the moment, we just call nfs_refresh_inode(). */ -static __inline__ int +static inline int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) { + int status; if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) { fattr->pre_size = NFS_CACHE_ISIZE(inode); fattr->pre_mtime = NFS_CACHE_MTIME(inode); fattr->pre_ctime = NFS_CACHE_CTIME(inode); fattr->valid |= NFS_ATTR_WCC; } - return nfs_refresh_inode(inode, fattr); + status = nfs_refresh_inode(inode, fattr); + return status; } /* @@ -260,7 +262,6 @@ if (page->index >= end_index+1 || !offset) goto out; do_it: - lock_kernel(); if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) { err = nfs_writepage_async(NULL, inode, page, 0, offset); if (err >= 0) @@ -270,7 +271,6 @@ if (err == offset) err = 0; } - unlock_kernel(); out: UnlockPage(page); return err; @@ -305,18 +305,30 @@ /* * Insert a write request into an inode + * Note: we sort the list in order to be able to optimize nfs_find_request() + * & co. for the 'write append' case. For 2.5 we may want to consider + * some form of hashing so as to perform well on random writes. */ static inline void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { + struct list_head *pos, *head; + unsigned long pg_idx = page_index(req->wb_page); + if (!list_empty(&req->wb_hash)) return; if (!NFS_WBACK_BUSY(req)) printk(KERN_ERR "NFS: unlocked request attempted hashed!\n"); - if (list_empty(&inode->u.nfs_i.writeback)) + head = &inode->u.nfs_i.writeback; + if (list_empty(head)) igrab(inode); + list_for_each_prev(pos, head) { + struct nfs_page *entry = nfs_inode_wb_entry(pos); + if (page_index(entry->wb_page) < pg_idx) + break; + } inode->u.nfs_i.npages++; - list_add(&req->wb_hash, &inode->u.nfs_i.writeback); + list_add(&req->wb_hash, pos); req->wb_count++; } @@ -354,15 +366,18 @@ static inline struct nfs_page * _nfs_find_request(struct inode *inode, struct page *page) { - struct list_head *head, *next; + struct list_head *head, *pos; + unsigned long pg_idx = page_index(page); head = &inode->u.nfs_i.writeback; - next = head->next; - while (next != head) { - struct nfs_page *req = nfs_inode_wb_entry(next); - next = next->next; - if (page_index(req->wb_page) != page_index(page)) + list_for_each_prev(pos, head) { + struct nfs_page *req = nfs_inode_wb_entry(pos); + unsigned long found_idx = page_index(req->wb_page); + + if (pg_idx < found_idx) continue; + if (pg_idx != found_idx) + break; req->wb_count++; return req; } @@ -444,20 +459,20 @@ else idx_end = idx_start + npages - 1; - spin_lock(&nfs_wreq_lock); head = &inode->u.nfs_i.writeback; - p = head->next; - while (p != head) { + restart: + spin_lock(&nfs_wreq_lock); + list_for_each_prev(p, head) { unsigned long pg_idx; struct nfs_page *req = nfs_inode_wb_entry(p); - p = p->next; - if (file && req->wb_file != file) continue; pg_idx = page_index(req->wb_page); - if (pg_idx < idx_start || pg_idx > idx_end) + if (pg_idx < idx_start) + break; + if (pg_idx > idx_end) continue; if (!NFS_WBACK_BUSY(req)) @@ -468,9 +483,8 @@ nfs_release_request(req); if (error < 0) return error; - spin_lock(&nfs_wreq_lock); - p = head->next; res++; + goto restart; } spin_unlock(&nfs_wreq_lock); return res; @@ -932,9 +946,7 @@ rpc_clnt_sigmask(clnt, &oldset); rpc_call_setup(task, &msg, 0); - lock_kernel(); rpc_execute(task); - unlock_kernel(); rpc_clnt_sigunmask(clnt, &oldset); return 0; out_bad: @@ -1155,9 +1167,7 @@ dprintk("NFS: %4d initiated commit call\n", task->tk_pid); rpc_clnt_sigmask(clnt, &oldset); rpc_call_setup(task, &msg, 0); - lock_kernel(); rpc_execute(task); - unlock_kernel(); rpc_clnt_sigunmask(clnt, &oldset); return 0; out_bad: diff -u --recursive --new-file linux-2.4.15-pre9/include/linux/dcache.h linux-2.4.15-jukebox/include/linux/dcache.h --- linux-2.4.15-pre9/include/linux/dcache.h Mon Nov 5 21:42:13 2001 +++ linux-2.4.15-jukebox/include/linux/dcache.h Thu Nov 22 18:37:32 2001 @@ -80,6 +80,8 @@ struct super_block * d_sb; /* The root of the dentry tree */ unsigned long d_vfs_flags; void * d_fsdata; /* fs-specific data */ + unsigned long d_rtime_sec; /* used by nfs d_revalidate */ + unsigned long d_rtime_nsec; /* used by nfs d_revalidate */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ }; diff -u --recursive --new-file linux-2.4.15-pre9/include/linux/fs.h linux-2.4.15-jukebox/include/linux/fs.h --- linux-2.4.15-pre9/include/linux/fs.h Thu Nov 22 14:09:24 2001 +++ linux-2.4.15-jukebox/include/linux/fs.h Thu Nov 22 18:37:33 2001 @@ -851,6 +851,7 @@ int (*revalidate) (struct dentry *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct dentry *, struct iattr *); + int (*check_stale) (struct inode *); }; /* diff -u --recursive --new-file linux-2.4.15-pre9/include/linux/list.h linux-2.4.15-jukebox/include/linux/list.h --- linux-2.4.15-pre9/include/linux/list.h Mon Nov 5 21:42:13 2001 +++ linux-2.4.15-jukebox/include/linux/list.h Thu Nov 22 18:37:32 2001 @@ -162,6 +162,16 @@ for (pos = (head)->next, n = pos->next; pos != (head); \ pos = n, n = pos->next) +/** + * list_for_each_prev - iterate over a list in reverse order + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ + pos = pos->prev, prefetch(pos->prev)) + + #endif /* __KERNEL__ || _LVM_H_INCLUDE */ #endif diff -u --recursive --new-file linux-2.4.15-pre9/include/linux/nfs_fs.h linux-2.4.15-jukebox/include/linux/nfs_fs.h --- linux-2.4.15-pre9/include/linux/nfs_fs.h Thu Nov 22 14:09:25 2001 +++ linux-2.4.15-jukebox/include/linux/nfs_fs.h Thu Nov 22 20:26:42 2001 @@ -81,10 +81,7 @@ #define NFS_CACHE_MTIME(inode) ((inode)->u.nfs_i.read_cache_mtime) #define NFS_CACHE_ISIZE(inode) ((inode)->u.nfs_i.read_cache_isize) #define NFS_NEXTSCAN(inode) ((inode)->u.nfs_i.nextscan) -#define NFS_CACHEINV(inode) \ -do { \ - NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; \ -} while (0) +#define NFS_CACHEINV(inode) nfs_invalidate_caches(inode) #define NFS_ATTRTIMEO(inode) ((inode)->u.nfs_i.attrtimeo) #define NFS_MINATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ @@ -101,8 +98,15 @@ #define NFS_FILEID(inode) ((inode)->u.nfs_i.fileid) #define NFS_FSID(inode) ((inode)->u.nfs_i.fsid) -/* Inode Flags */ -#define NFS_USE_READDIRPLUS(inode) ((NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) ? 1 : 0) +static inline int nfs_server_capable(struct inode *inode, int cap) +{ + return NFS_SERVER(inode)->caps & cap; +} + +static inline int NFS_USE_READDIRPLUS(struct inode *inode) +{ + return NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS; +} /* * These are the default flags for swap requests @@ -141,6 +145,7 @@ * linux/fs/nfs/inode.c */ extern struct super_block *nfs_read_super(struct super_block *, void *, int); +extern void nfs_invalidate_caches(struct inode *); extern void nfs_zap_caches(struct inode *); extern int nfs_inode_is_stale(struct inode *, struct nfs_fh *, struct nfs_fattr *); @@ -152,6 +157,7 @@ extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); +extern int nfs_check_stale(struct inode *); extern int nfs_notify_change(struct dentry *, struct iattr *); /* diff -u --recursive --new-file linux-2.4.15-pre9/include/linux/nfs_fs_sb.h linux-2.4.15-jukebox/include/linux/nfs_fs_sb.h --- linux-2.4.15-pre9/include/linux/nfs_fs_sb.h Thu Nov 22 14:09:25 2001 +++ linux-2.4.15-jukebox/include/linux/nfs_fs_sb.h Thu Nov 22 18:37:32 2001 @@ -10,6 +10,7 @@ struct rpc_clnt * client; /* RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ int flags; /* various flags */ + unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ @@ -36,4 +37,8 @@ struct nfs_server s_server; }; +/* Server capabilities */ +#define NFS_CAP_READDIRPLUS 1 + + #endif diff -u --recursive --new-file linux-2.4.15-pre9/include/linux/nfs_xdr.h linux-2.4.15-jukebox/include/linux/nfs_xdr.h --- linux-2.4.15-pre9/include/linux/nfs_xdr.h Mon Jan 29 21:07:43 2001 +++ linux-2.4.15-jukebox/include/linux/nfs_xdr.h Thu Nov 22 20:26:11 2001 @@ -35,11 +35,13 @@ #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ #define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ #define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ +#define NFS_ATTR_RDPLUS 0x0008 /* Made in readdirplus */ /* * Info on the file system */ struct nfs_fsinfo { + struct nfs_fattr *fattr; __u32 rtmax; /* max. read transfer size */ __u32 rtpref; /* pref. read transfer size */ __u32 rtmult; /* reads should be multiple of this */ @@ -48,15 +50,37 @@ __u32 wtmult; /* writes should be multiple of this */ __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; - __u64 bsize; /* block size */ + __u64 time_delta; + __u32 properties; +}; + +struct nfs_fsstat { + struct nfs_fattr *fattr; __u64 tbytes; /* total size in bytes */ __u64 fbytes; /* # of free bytes */ __u64 abytes; /* # of bytes available to user */ __u64 tfiles; /* # of files */ __u64 ffiles; /* # of free files */ __u64 afiles; /* # of files available to user */ + __u32 invarsec; +}; + +struct nfs_pathconf { + struct nfs_fattr *fattr; /* Post-op attributes */ __u32 linkmax;/* max # of hard links */ - __u32 namelen;/* max name length */ + __u32 name_max;/* max name length */ + int no_trunc : 1, + chown_restricted : 1, + case_insensitive : 1, + case_preserving : 1; +}; + +struct nfs2_statfs { + __u32 tsize; /* Server transfer size */ + __u32 bsize; /* Filesystem block size */ + __u32 blocks; /* No. of "bsize" blocks on filesystem */ + __u32 bfree; /* No. of free "bsize" blocks */ + __u32 bavail; /* No. of available "bsize" blocks */ }; /* Arguments to the read call. @@ -112,8 +136,8 @@ const char * name; unsigned int len; int eof; - struct nfs_fh fh; - struct nfs_fattr fattr; + struct nfs_fh *fh; + struct nfs_fattr *fattr; }; /* @@ -353,7 +377,11 @@ int (*mknod) (struct inode *, struct qstr *, struct iattr *, dev_t, struct nfs_fh *, struct nfs_fattr *); int (*statfs) (struct nfs_server *, struct nfs_fh *, + struct nfs_fsstat *); + int (*fsinfo) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + int (*pathconf) (struct nfs_server *, struct nfs_fh *, + struct nfs_pathconf *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); }; diff -u --recursive --new-file linux-2.4.15-pre9/include/linux/sunrpc/clnt.h linux-2.4.15-jukebox/include/linux/sunrpc/clnt.h --- linux-2.4.15-pre9/include/linux/sunrpc/clnt.h Mon Nov 5 21:42:54 2001 +++ linux-2.4.15-jukebox/include/linux/sunrpc/clnt.h Thu Nov 22 19:02:08 2001 @@ -111,6 +111,8 @@ void rpc_release_client(struct rpc_clnt *); void rpc_getport(struct rpc_task *, struct rpc_clnt *); int rpc_register(u32, u32, int, unsigned short, int *); +u32 * rpc_call_header(struct rpc_task *task); +u32 * rpc_call_verify(struct rpc_task *task); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); @@ -144,5 +146,10 @@ */ int rpc_getport_external(struct sockaddr_in *, __u32, __u32, int); +/* + * Ping function + */ +void rpc_ping(struct rpc_task *task); + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff -u --recursive --new-file linux-2.4.15-pre9/include/linux/sunrpc/xprt.h linux-2.4.15-jukebox/include/linux/sunrpc/xprt.h --- linux-2.4.15-pre9/include/linux/sunrpc/xprt.h Mon Nov 5 21:42:54 2001 +++ linux-2.4.15-jukebox/include/linux/sunrpc/xprt.h Thu Nov 22 19:02:08 2001 @@ -39,12 +39,14 @@ * Come Linux 2.3, we'll handle fragments directly. */ #define RPC_MAXCONG 16 -#define RPC_MAXREQS (RPC_MAXCONG + 1) +#define RPC_MAXREQS (RPC_MAXCONG + 2) #define RPC_CWNDSCALE 256 #define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE) #define RPC_INITCWND RPC_CWNDSCALE #define RPCXPRT_CONGESTED(xprt) \ ((xprt)->cong >= (xprt)->cwnd) +#define RPCXPRT_SUPERCONGESTED(xprt) \ + ((xprt)->cwnd < 2*RPC_CWNDSCALE) /* Default timeout values */ #define RPC_MAX_UDP_TIMEOUT (60*HZ) @@ -135,6 +137,7 @@ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ + struct rpc_wait_queue pingwait; /* waiting on ping() */ struct rpc_rqst * free; /* free slots */ struct rpc_rqst slot[RPC_MAXREQS]; unsigned long sockstate; /* Socket state */ @@ -179,10 +182,12 @@ unsigned long); int xprt_reserve(struct rpc_task *); +int xprt_ping_reserve(struct rpc_task *); void xprt_transmit(struct rpc_task *); void xprt_receive(struct rpc_task *); int xprt_adjust_timeout(struct rpc_timeout *); void xprt_release(struct rpc_task *); +void xprt_ping_release(struct rpc_task *); void xprt_reconnect(struct rpc_task *); int xprt_clear_backlog(struct rpc_xprt *); int xprt_tcp_pending(void); @@ -190,6 +195,8 @@ #define XPRT_WSPACE 0 #define XPRT_CONNECT 1 +#define XPRT_PING 2 +#define XPRT_NORESPOND 3 #define xprt_wspace(xp) (test_bit(XPRT_WSPACE, &(xp)->sockstate)) #define xprt_test_and_set_wspace(xp) (test_and_set_bit(XPRT_WSPACE, &(xp)->sockstate)) @@ -200,6 +207,32 @@ #define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) +static inline int xprt_pinging(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_PING, &xprt->sockstate); +} +static inline int xprt_test_and_set_pinging(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_PING, &xprt->sockstate); +} +static inline void xprt_clear_pinging(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_PING, &xprt->sockstate); +} + +static inline int xprt_norespond(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_NORESPOND, &xprt->sockstate); +} +static inline int xprt_test_and_set_norespond(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_NORESPOND, &xprt->sockstate); +} +static inline void xprt_clear_norespond(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_NORESPOND, &xprt->sockstate); +} + static inline void rpciod_tcp_dispatcher(void) { diff -u --recursive --new-file linux-2.4.15-pre9/net/sunrpc/Makefile linux-2.4.15-jukebox/net/sunrpc/Makefile --- linux-2.4.15-pre9/net/sunrpc/Makefile Fri Dec 29 23:07:24 2000 +++ linux-2.4.15-jukebox/net/sunrpc/Makefile Thu Nov 22 18:20:43 2001 @@ -14,7 +14,7 @@ obj-y := clnt.o xprt.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o \ - pmap_clnt.o xdr.o sunrpc_syms.o + ping.o pmap_clnt.o xdr.o sunrpc_syms.o obj-$(CONFIG_PROC_FS) += stats.o obj-$(CONFIG_SYSCTL) += sysctl.o diff -u --recursive --new-file linux-2.4.15-pre9/net/sunrpc/clnt.c linux-2.4.15-jukebox/net/sunrpc/clnt.c --- linux-2.4.15-pre9/net/sunrpc/clnt.c Fri Sep 21 20:24:50 2001 +++ linux-2.4.15-jukebox/net/sunrpc/clnt.c Thu Nov 22 18:20:43 2001 @@ -57,8 +57,8 @@ static void call_reconnect(struct rpc_task *task); static void child_reconnect(struct rpc_task *); static void child_reconnect_status(struct rpc_task *); -static u32 * call_header(struct rpc_task *task); -static u32 * call_verify(struct rpc_task *task); +static void call_ping(struct rpc_task *task); +static void call_pingresult(struct rpc_task *task); /* @@ -491,7 +491,7 @@ /* Encode header and provided arguments */ encode = rpcproc_encode(clnt, task->tk_msg.rpc_proc); - if (!(p = call_header(task))) { + if (!(p = rpc_call_header(task))) { printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); rpc_exit(task, -EIO); } else @@ -618,11 +618,10 @@ task->tk_action = call_reconnect; break; } - /* - * Sleep and dream of an open connection - */ - task->tk_timeout = 5 * HZ; - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + if (RPCXPRT_SUPERCONGESTED(clnt->cl_xprt)) { + task->tk_action = call_ping; + break; + } case -ENOMEM: case -EAGAIN: task->tk_action = call_transmit; @@ -646,6 +645,7 @@ { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; + int major = 0; if (req) { struct rpc_timeout *to = &req->rq_timeout; @@ -666,17 +666,7 @@ rpc_exit(task, -EIO); return; } - if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { - task->tk_flags |= RPC_CALL_MAJORSEEN; - if (req) - printk(KERN_NOTICE "%s: server %s not responding, still trying\n", - clnt->cl_protname, clnt->cl_server); -#ifdef RPC_DEBUG - else - printk(KERN_NOTICE "%s: task %d can't get a request slot\n", - clnt->cl_protname, task->tk_pid); -#endif - } + major = 1; if (clnt->cl_autobind) clnt->cl_port = 0; @@ -689,6 +679,8 @@ } else if (!xprt_connected(clnt->cl_xprt)) { task->tk_action = call_reconnect; clnt->cl_stats->rpcretrans++; + } else if (major && RPCXPRT_SUPERCONGESTED(clnt->cl_xprt)) { + task->tk_action = call_ping; } else { task->tk_action = call_transmit; clnt->cl_stats->rpcretrans++; @@ -710,12 +702,6 @@ dprintk("RPC: %4d call_decode (status %d)\n", task->tk_pid, task->tk_status); - if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) { - printk(KERN_NOTICE "%s: server %s OK\n", - clnt->cl_protname, clnt->cl_server); - task->tk_flags &= ~RPC_CALL_MAJORSEEN; - } - if (task->tk_status < 12) { if (!clnt->cl_softrtry) { task->tk_action = call_transmit; @@ -729,7 +715,7 @@ } /* Verify the RPC header */ - if (!(p = call_verify(task))) + if (!(p = rpc_call_verify(task))) return; /* @@ -788,8 +774,8 @@ /* * Call header serialization */ -static u32 * -call_header(struct rpc_task *task) +u32 * +rpc_call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = clnt->cl_xprt; @@ -809,10 +795,63 @@ } /* + * Ping a non-responding server + */ +static void +call_ping(struct rpc_task *task) +{ + task->tk_action = call_pingresult; + rpc_ping(task); +} + +/* + * Interpret the result from ping + */ +static void +call_pingresult(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + int status = task->tk_status; + + task->tk_status = 0; + if (status >= 0) { + task->tk_action = call_transmit; + return; + } + + switch(status) { + case -ECONNREFUSED: + case -ENOTCONN: + if (clnt->cl_autobind || !clnt->cl_port) { + clnt->cl_port = 0; + task->tk_action = call_bind; + break; + } + if (xprt->stream) { + task->tk_action = call_reconnect; + break; + } + case -ENOMEM: + case -ENOBUFS: + rpc_delay(task, HZ >> 4); + case -ETIMEDOUT: + task->tk_action = call_ping; + break; + default: + if (clnt->cl_chatty) + printk("%s: RPC call returned error %d\n", + clnt->cl_protname, -status); + rpc_exit(task,status); + return; + } +} + +/* * Reply header verification */ -static u32 * -call_verify(struct rpc_task *task) +u32 * +rpc_call_verify(struct rpc_task *task) { u32 *p = task->tk_rqstp->rq_rvec[0].iov_base, n; diff -u --recursive --new-file linux-2.4.15-pre9/net/sunrpc/ping.c linux-2.4.15-jukebox/net/sunrpc/ping.c --- linux-2.4.15-pre9/net/sunrpc/ping.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.15-jukebox/net/sunrpc/ping.c Thu Nov 22 18:20:43 2001 @@ -0,0 +1,218 @@ +/* + * linux/net/sunrpc/ping.c + * + * Ping routing. + * + * Copyright (C) 2000, Trond Myklebust + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define RPC_SLACK_SPACE 512 /* total overkill */ +#define RPC_PING_DELAY (15*HZ) + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_XPRT +#endif + +static void ping_call_reserve(struct rpc_task *); +static void ping_call_allocate(struct rpc_task *); +static void ping_call_encode(struct rpc_task *); +static void ping_call_transmit(struct rpc_task *); +static void ping_call_receive(struct rpc_task *); +static void ping_call_exit(struct rpc_task *); + + +static void +ping_call_reserve(struct rpc_task *task) +{ + dprintk("RPC: %4d, ping_call_reserve\n", task->tk_pid); + task->tk_status = 0; + task->tk_action = ping_call_allocate; + task->tk_timeout = task->tk_client->cl_timeout.to_resrvval; + xprt_ping_reserve(task); +} + +static void +ping_call_allocate(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_rqst *req = task->tk_rqstp; + unsigned int bufsiz; + + dprintk("RPC: %4d, ping_call_allocate (status %d)\n", + task->tk_pid, task->tk_status); + + task->tk_action = ping_call_exit; + if (task->tk_status < 0) + return; + + bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc) + RPC_SLACK_SPACE; + if (!(task->tk_buffer = rpc_malloc(task, bufsiz << 1))) { + task->tk_status = -ENOMEM; + return; + } + req->rq_svec[0].iov_base = (void *)task->tk_buffer; + req->rq_svec[0].iov_len = bufsiz; + req->rq_slen = 0; + req->rq_snr = 1; + req->rq_rvec[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); + req->rq_rvec[0].iov_len = bufsiz; + req->rq_rlen = bufsiz; + req->rq_rnr = 1; + task->tk_action = ping_call_encode; +} + +static void +ping_call_encode(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + u32 *p; + + dprintk("RPC: %4d, ping_call_encode (status %d)\n", + task->tk_pid, task->tk_status); + + if (task->tk_status < 0) { + task->tk_action = ping_call_exit; + return; + } + p = rpc_call_header(task); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + task->tk_action = ping_call_transmit; +} + +static void +ping_call_transmit(struct rpc_task *task) +{ + dprintk("RPC: %4d, ping_call_transmit\n", task->tk_pid); + task->tk_action = ping_call_receive; + xprt_transmit(task); +} + +static void +ping_call_receive(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_timeout *to = &req->rq_timeout; + u32 *p; + + dprintk("RPC: %4d, ping_call_receive (status %d)\n", + task->tk_pid, task->tk_status); + + if (task->tk_status >= 0) + p = rpc_call_verify(task); + + task->tk_action = ping_call_exit; + + if (task->tk_status >= 0 || task->tk_status == -EACCES) { + task->tk_status = 0; + if (xprt_norespond(xprt)) { + if (clnt->cl_chatty) + printk(KERN_NOTICE "%s: server %s OK\n", + clnt->cl_protname, clnt->cl_server); + xprt_clear_norespond(xprt); + } + return; + } + + switch (task->tk_status) { + case -ENOTCONN: + break; + case -ENOMEM: + case -EAGAIN: + case -ECONNREFUSED: + case -ETIMEDOUT: + if (!xprt_adjust_timeout(to)) { + task->tk_status = 0; + task->tk_action = ping_call_transmit; + break; + } + default: + if (clnt->cl_softrtry) { + task->tk_status = -EIO; + break; + } + if (clnt->cl_chatty) { + if (!xprt_test_and_set_norespond(xprt)) { + printk(KERN_NOTICE + "%s: server %s is not responding\n", + clnt->cl_protname, clnt->cl_server); + } else { + printk(KERN_NOTICE + "%s: server %s still not responding\n", + clnt->cl_protname, clnt->cl_server); + } + } + rpc_delay(task, RPC_PING_DELAY); + } +} + +static void +ping_call_exit(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + dprintk("RPC: %4d, ping_call_exit (status %d)\n", + task->tk_pid, task->tk_status); + + task->tk_action = NULL; + xprt_ping_release(task); + + /* Sigh. rpc_delay() clears task->tk_status */ + if (task->tk_status == 0 && xprt_norespond(xprt)) + task->tk_status = -ETIMEDOUT; + + xprt_clear_pinging(xprt); + rpc_wake_up_status(&xprt->pingwait, task->tk_status); +} + +void +rpc_ping(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_task *child; + struct rpc_message msg = {0, NULL, NULL, NULL}; + + dprintk("RPC: %4d, rpc_ping\n", task->tk_pid); + + again: + if (xprt_test_and_set_pinging(xprt)) { + rpc_sleep_on(&xprt->pingwait, task, NULL, 0); + if (!xprt_pinging(xprt)) { + rpc_wake_up_task(task); + goto again; + } + dprintk("RPC: %4d, rpc_ping, waiting on completion\n", + task->tk_pid); + return; + } + + child = rpc_new_child(clnt, task); + if (!child) { + dprintk("RPC: %4d, rpc_ping, failed to create child process\n", + task->tk_pid); + xprt_clear_pinging(xprt); + rpc_wake_up_status(&xprt->pingwait, -ENOMEM); + task->tk_status = -ENOMEM; + return; + } + rpc_call_setup(child, &msg, 0); + child->tk_action = ping_call_reserve; + + dprintk("RPC: %4d, rpc_ping, running child process %4d\n", + task->tk_pid, child->tk_pid); + rpc_run_child(task, child, NULL); +} diff -u --recursive --new-file linux-2.4.15-pre9/net/sunrpc/sched.c linux-2.4.15-jukebox/net/sunrpc/sched.c --- linux-2.4.15-pre9/net/sunrpc/sched.c Thu Oct 11 17:12:52 2001 +++ linux-2.4.15-jukebox/net/sunrpc/sched.c Thu Nov 22 18:27:46 2001 @@ -1052,7 +1052,6 @@ int rounds = 0; MOD_INC_USE_COUNT; - lock_kernel(); /* * Let our maker know we're running ... */ diff -u --recursive --new-file linux-2.4.15-pre9/net/sunrpc/xprt.c linux-2.4.15-jukebox/net/sunrpc/xprt.c --- linux-2.4.15-pre9/net/sunrpc/xprt.c Mon Oct 8 21:36:07 2001 +++ linux-2.4.15-jukebox/net/sunrpc/xprt.c Thu Nov 22 18:20:43 2001 @@ -85,7 +85,7 @@ */ static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void do_xprt_transmit(struct rpc_task *); -static void xprt_reserve_status(struct rpc_task *task); +static void xprt_alloc_slot(struct rpc_xprt *, struct rpc_task *); static void xprt_disconnect(struct rpc_xprt *); static void xprt_reconn_status(struct rpc_task *task); static struct socket *xprt_create_socket(int, struct rpc_timeout *); @@ -1247,15 +1247,8 @@ rpc_sleep_on(&xprt->sending, task, NULL, NULL); } spin_unlock_bh(&xprt->sock_lock); - return; case -EAGAIN: - /* Keep holding the socket if it is blocked */ - rpc_delay(task, HZ>>4); return; - case -ECONNREFUSED: - case -ENOTCONN: - if (!xprt->stream) - return; default: if (xprt->stream) xprt_disconnect(xprt); @@ -1306,9 +1299,11 @@ dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n", task->tk_pid, xprt->cong, xprt->cwnd); spin_lock_bh(&xprt->xprt_lock); - xprt_reserve_status(task); + if (!RPCXPRT_CONGESTED(xprt)) + xprt_alloc_slot(xprt, task); if (task->tk_rqstp) { task->tk_timeout = 0; + xprt->cong += RPC_CWNDSCALE; } else if (!task->tk_timeout) { task->tk_status = -ENOBUFS; } else { @@ -1323,35 +1318,48 @@ } /* - * Reservation callback + * Reserve a ping RPC call slot. */ -static void -xprt_reserve_status(struct rpc_task *task) +int +xprt_ping_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - struct rpc_rqst *req; - if (xprt->shutdown) { - task->tk_status = -EIO; - } else if (task->tk_status < 0) { - /* NOP */ - } else if (task->tk_rqstp) { - /* We've already been given a request slot: NOP */ - } else { - if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free)) - goto out_nofree; - /* OK: There's room for us. Grab a free slot and bump - * congestion value */ - xprt->free = req->rq_next; - req->rq_next = NULL; - xprt->cong += RPC_CWNDSCALE; - task->tk_rqstp = req; - xprt_request_init(task, xprt); + /* We already have an initialized request. */ + if (task->tk_rqstp) + return 0; - if (xprt->free) - xprt_clear_backlog(xprt); - } + dprintk("RPC: %4d xprt_ping_reserve cong = %ld cwnd = %ld\n", + task->tk_pid, xprt->cong, xprt->cwnd); + spin_lock_bh(&xprt->xprt_lock); + xprt_alloc_slot(xprt, task); + if (!task->tk_rqstp) + task->tk_status = -ENOBUFS; + spin_unlock_bh(&xprt->xprt_lock); + dprintk("RPC: %4d xprt_ping_reserve returns %d\n", + task->tk_pid, task->tk_status); + return task->tk_status; +} +/* + * Reserve a slot + */ +static void +xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpc_rqst *req; + + if (!(req = xprt->free)) + goto out_nofree; + /* OK: There's room for us. Grab a free slot and bump + * congestion value */ + xprt->free = req->rq_next; + req->rq_next = NULL; + task->tk_rqstp = req; + xprt_request_init(task, xprt); + + if (xprt->free) + xprt_clear_backlog(xprt); return; out_nofree: @@ -1383,8 +1391,8 @@ /* * Release an RPC call slot */ -void -xprt_release(struct rpc_task *task) +static void +__xprt_release(struct rpc_task *task, int congvalue) { struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; @@ -1405,13 +1413,26 @@ req->rq_next = xprt->free; xprt->free = req; - /* Decrease congestion value. */ - xprt->cong -= RPC_CWNDSCALE; - - xprt_clear_backlog(xprt); + if (congvalue) { + /* Decrease congestion value. */ + xprt->cong -= congvalue; + xprt_clear_backlog(xprt); + } spin_unlock_bh(&xprt->xprt_lock); } +void +xprt_release(struct rpc_task *task) +{ + __xprt_release(task, RPC_CWNDSCALE); +} + +void +xprt_ping_release(struct rpc_task *task) +{ + __xprt_release(task, 0); +} + /* * Set default timeout parameters */ @@ -1481,6 +1502,7 @@ xprt->pending = RPC_INIT_WAITQ("xprt_pending"); xprt->sending = RPC_INIT_WAITQ("xprt_sending"); xprt->backlog = RPC_INIT_WAITQ("xprt_backlog"); + xprt->pingwait= RPC_INIT_WAITQ("xprt_pingwait"); /* initialize free list */ for (i = 0, req = xprt->slot; i < RPC_MAXREQS-1; i++, req++) @@ -1616,6 +1638,7 @@ rpc_wake_up(&xprt->sending); rpc_wake_up(&xprt->pending); rpc_wake_up(&xprt->backlog); + rpc_wake_up(&xprt->pingwait); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait); }