fs/nfs/dir.c | 281 ++++++++++++++++++++++++++++++++++++-------- fs/nfs/inode.c | 233 +++++++++++++++++++++--------------- fs/nfs/nfs2xdr.c | 33 +---- fs/nfs/nfs3proc.c | 58 ++++++--- fs/nfs/nfs3xdr.c | 60 +++++---- fs/nfs/proc.c | 55 +++++++- fs/nfs/unlink.c | 13 +- include/linux/nfs_fs.h | 11 + include/linux/nfs_fs_i.h | 12 + include/linux/nfs_fs_sb.h | 5 include/linux/nfs_xdr.h | 36 +++++ include/linux/sunrpc/xprt.h | 7 + net/sunrpc/xprt.c | 107 +++++++++++++--- 13 files changed, 670 insertions(+), 241 deletions(-) diff -u --recursive --new-file --show-c-function linux-2.4.26/fs/nfs/dir.c linux-2.4.26-07-sock_disconnect/fs/nfs/dir.c --- linux-2.4.26/fs/nfs/dir.c 2004-04-14 11:28:23.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/fs/nfs/dir.c 2004-04-16 09:35:36.000000000 -0700 @@ -34,8 +34,11 @@ #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ +static loff_t nfs_dir_llseek(struct file *, loff_t, int); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *); +static int nfs_cached_lookup(struct inode *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); static int nfs_create(struct inode *, struct dentry *, int); static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_rmdir(struct inode *, struct dentry *); @@ -48,6 +51,7 @@ static int nfs_rename(struct inode *, st static int nfs_fsync_dir(struct file *, struct dentry *, int); struct file_operations nfs_dir_operations = { + llseek: nfs_dir_llseek, read: generic_read_dir, readdir: nfs_readdir, open: nfs_open, @@ -70,6 +74,25 @@ struct inode_operations nfs_dir_inode_op setattr: nfs_notify_change, }; +static loff_t nfs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + switch (origin) { + case 1: + if (offset == 0) { + offset = file->f_pos; + break; + } + case 2: + return -EINVAL; + } + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_reada = 0; + file->f_version = ++event; + } + return (offset <= 0) ? 0 : offset; +} + typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int); typedef struct { struct file *file; @@ -109,13 +132,15 @@ int nfs_readdir_filler(nfs_readdir_descr error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); /* We requested READDIRPLUS, but the server doesn't grok it */ - if (desc->plus && error == -ENOTSUPP) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; - desc->plus = 0; - goto again; - } - if (error < 0) + if (error < 0) { + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + desc->plus = 0; + goto again; + } goto error; + } SetPageUptodate(page); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either @@ -194,8 +219,7 @@ int find_dirent_page(nfs_readdir_descrip dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); - desc->plus = NFS_USE_READDIRPLUS(inode); - page = read_cache_page(&inode->i_data, desc->page_index, + page = read_cache_page(inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { status = PTR_ERR(page); @@ -246,6 +270,24 @@ int readdir_search_pagecache(nfs_readdir return res; } +static unsigned int nfs_type2dtype[] = { + DT_UNKNOWN, + DT_REG, + DT_DIR, + DT_BLK, + DT_CHR, + DT_LNK, + DT_SOCK, + DT_UNKNOWN, + DT_FIFO +}; + +static inline +unsigned int nfs_type_to_d_type(enum nfs_ftype type) +{ + return nfs_type2dtype[type]; +} + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -262,11 +304,17 @@ int nfs_do_filldir(nfs_readdir_descripto dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); for(;;) { + unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ fileid = nfs_fileid_to_ino_t(entry->ino); + + /* Use readdirplus info */ + if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR)) + d_type = nfs_type_to_d_type(entry->fattr->type); + res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, DT_UNKNOWN); + entry->prev_cookie, fileid, d_type); if (res < 0) break; file->f_pos = desc->target = entry->cookie; @@ -333,7 +381,8 @@ int uncached_readdir(nfs_readdir_descrip /* Reset read descriptor so it searches the page cache from * the start upon the next call to readdir_search_pagecache() */ desc->page_index = 0; - memset(desc->entry, 0, sizeof(*desc->entry)); + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; out: dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); return status; @@ -352,9 +401,11 @@ static int nfs_readdir(struct file *filp nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_entry my_entry; + struct nfs_fh fh; + struct nfs_fattr fattr; long res; - res = nfs_revalidate(dentry); + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res < 0) return res; @@ -365,12 +416,16 @@ static int nfs_readdir(struct file *filp * itself. */ memset(desc, 0, sizeof(*desc)); - memset(&my_entry, 0, sizeof(my_entry)); - desc->file = filp; desc->target = filp->f_pos; - desc->entry = &my_entry; desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); + + my_entry.cookie = my_entry.prev_cookie = 0; + my_entry.eof = 0; + my_entry.fh = &fh; + my_entry.fattr = &fattr; + desc->entry = &my_entry; while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -434,16 +489,9 @@ static inline void nfs_renew_times(struc } static inline -int nfs_lookup_verify_inode(struct inode *inode, int flags) +int nfs_lookup_verify_inode(struct inode *inode) { - struct nfs_server *server = NFS_SERVER(inode); - /* - * If we're interested in close-to-open cache consistency, - * then we revalidate the inode upon lookup. - */ - if (!(server->flags & NFS_MOUNT_NOCTO) && !(flags & LOOKUP_CONTINUE)) - NFS_CACHEINV(inode); - return nfs_revalidate_inode(server, inode); + return nfs_revalidate_inode(NFS_SERVER(inode), inode); } /* @@ -497,11 +545,20 @@ static int nfs_lookup_revalidate(struct /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, flags)) - goto out_bad; + if (nfs_lookup_verify_inode(inode)) + goto out_zap_parent; goto out_valid; } + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + goto out_bad; + if (nfs_lookup_verify_inode(inode)) + goto out_zap_parent; + goto out_valid_renew; + } + if (NFS_STALE(inode)) goto out_bad; @@ -513,10 +570,13 @@ static int nfs_lookup_revalidate(struct if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; + out_valid_renew: nfs_renew_times(dentry); out_valid: unlock_kernel(); return 1; +out_zap_parent: + nfs_zap_caches(dir); out_bad: NFS_CACHEINV(dir); if (inode && S_ISDIR(inode->i_mode)) { @@ -593,6 +653,18 @@ static struct dentry *nfs_lookup(struct error = -ENOMEM; dentry->d_op = &nfs_dentry_operations; + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + error = -EACCES; + inode = nfs_fhget(dentry, &fhandle, &fattr); + if (inode) { + d_add(dentry, inode); + nfs_renew_times(dentry); + error = 0; + } + goto out; + } + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); inode = NULL; if (error == -ENOENT) @@ -611,6 +683,79 @@ out: return ERR_PTR(error); } +static inline +int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) +{ + struct nfs_entry *entry = desc->entry; + int status; + + while((status = dir_decode(desc)) == 0) { + if (entry->len != dentry->d_name.len) + continue; + if (memcmp(entry->name, dentry->d_name.name, entry->len)) + continue; + if (!(entry->fattr->valid & NFS_ATTR_FATTR)) + continue; + break; + } + return status; +} + +/* + * Use the cached Readdirplus results in order to avoid a LOOKUP call + * whenever we believe that the parent directory has not changed. + * + * We assume that any file creation/rename changes the directory mtime. + * As this results in a page cache invalidation whenever it occurs, + * we don't require any other tests for cache coherency. + */ +static +int nfs_cached_lookup(struct inode *dir, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + nfs_readdir_descriptor_t desc; + struct nfs_server *server; + struct nfs_entry entry; + struct page *page; + unsigned long timestamp = NFS_MTIME_UPDATE(dir); + int res; + + if (!NFS_USE_READDIRPLUS(dir)) + return -ENOENT; + server = NFS_SERVER(dir); + if (server->flags & NFS_MOUNT_NOAC) + return -ENOENT; + nfs_revalidate_inode(server, dir); + + entry.fh = fh; + entry.fattr = fattr; + + desc.decode = NFS_PROTO(dir)->decode_dirent; + desc.entry = &entry; + desc.page_index = 0; + desc.plus = 1; + + for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) { + + res = -EIO; + if (Page_Uptodate(page)) { + desc.ptr = kmap(page); + res = find_dirent_name(&desc, page, dentry); + kunmap(page); + } + page_cache_release(page); + + if (res == 0) + goto out_found; + if (res != -EAGAIN) + break; + } + return -ENOENT; + out_found: + fattr->timestamp = timestamp; + return 0; +} + /* * Code common to create, mkdir, and mknod. */ @@ -999,7 +1144,7 @@ static int nfs_rename(struct inode *old_ struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct dentry *dentry = NULL, *rehash = NULL; - int error = -EBUSY; + int error; /* * To prevent any new references to the target during the rename, @@ -1025,6 +1170,12 @@ static int nfs_rename(struct inode *old_ */ if (!new_inode) goto go_ahead; + /* If target is a hard link to the source, then noop */ + error = 0; + if (NFS_FILEID(new_inode) == NFS_FILEID(old_inode)) + goto out; + + error = -EBUSY; if (S_ISDIR(new_inode->i_mode)) goto out; else if (atomic_read(&new_dentry->d_count) > 1) { @@ -1088,34 +1239,62 @@ out: int nfs_permission(struct inode *inode, int mask) { - int error = vfs_permission(inode, mask); - - if (!NFS_PROTO(inode)->access) - goto out; - - if (error == -EROFS) - goto out; - - /* - * Trust UNIX mode bits except: - * - * 1) When override capabilities may have been invoked - * 2) When root squashing may be involved - * 3) When ACLs may overturn a negative answer */ - if (!capable(CAP_DAC_OVERRIDE) && !capable(CAP_DAC_READ_SEARCH) - && (current->fsuid != 0) && (current->fsgid != 0) - && error != -EACCES) - goto out; + struct nfs_access_cache *cache = &NFS_I(inode)->cache_access; + struct rpc_cred *cred; + int mode = inode->i_mode; + int error; - error = NFS_PROTO(inode)->access(inode, mask, 0); + if (mask & MAY_WRITE) { + /* + * + * Nobody gets write access to a read-only fs. + * + */ + if (IS_RDONLY(inode) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + return -EROFS; - if (error == -EACCES && NFS_CLIENT(inode)->cl_droppriv && - current->uid != 0 && current->gid != 0 && - (current->fsuid != current->uid || current->fsgid != current->gid)) - error = NFS_PROTO(inode)->access(inode, mask, 1); + /* + * + * Nobody gets write access to an immutable file. + * + */ + if (IS_IMMUTABLE(inode)) + return -EACCES; + } - out: - return error; + if (!NFS_PROTO(inode)->access) + goto out_notsup; + cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + if (cache->cred == cred + && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) { + if (!cache->err) { + /* Is the mask a subset of an accepted mask? */ + if ((cache->mask & mask) == mask) + goto out_cached; + } else { + /* ...or is it a superset of a rejected mask? */ + if ((cache->mask & mask) == cache->mask) + goto out_cached; + } + } + error = NFS_PROTO(inode)->access(inode, cred, mask); + if (!error || error == -EACCES) { + cache->jiffies = jiffies; + if (cache->cred) + put_rpccred(cache->cred); + cache->cred = cred; + cache->mask = mask; + cache->err = error; + return error; + } + put_rpccred(cred); +out_notsup: + nfs_revalidate_inode(NFS_SERVER(inode), inode); + return vfs_permission(inode, mask); +out_cached: + put_rpccred(cred); + return cache->err; } /* diff -u --recursive --new-file --show-c-function linux-2.4.26/fs/nfs/inode.c linux-2.4.26-07-sock_disconnect/fs/nfs/inode.c --- linux-2.4.26/fs/nfs/inode.c 2004-04-14 11:28:36.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/fs/nfs/inode.c 2004-04-16 09:35:36.000000000 -0700 @@ -146,10 +146,14 @@ nfs_delete_inode(struct inode * inode) static void nfs_clear_inode(struct inode *inode) { - struct rpc_cred *cred = NFS_I(inode)->mm_cred; + struct nfs_inode_info *nfsi = NFS_I(inode); + struct rpc_cred *cred = nfsi->mm_cred; if (cred) put_rpccred(cred); + cred = nfsi->cache_access.cred; + if (cred) + put_rpccred(cred); } void @@ -251,6 +255,72 @@ nfs_get_root(struct super_block *sb, str } /* + * Set up the NFS superblock private area using probed values + */ +static int +nfs_setup_superblock(struct super_block *sb, struct nfs_fh *rootfh) +{ + struct nfs_server *server = &sb->u.nfs_sb.s_server; + struct nfs_fattr fattr; + struct nfs_fsinfo fsinfo = { &fattr, }; + struct nfs_pathconf pathinfo = { &fattr, }; + int maxlen, res; + + res = server->rpc_ops->fsinfo(server, rootfh, &fsinfo); + if (res < 0) + return res; + + /* Work out a lot of parameters */ + if (!server->rsize) + server->rsize = nfs_block_size(fsinfo.rtpref, NULL); + if (!server->wsize) + server->wsize = nfs_block_size(fsinfo.wtpref, NULL); + + /* NFSv3: we don't have bsize, but rather rtmult and wtmult... */ + if (!fsinfo.wtmult) + fsinfo.wtmult = 512; + sb->s_blocksize = nfs_block_bits(fsinfo.wtmult, &sb->s_blocksize_bits); + + if (server->rsize > fsinfo.rtmax) + server->rsize = fsinfo.rtmax; + if (server->wsize > fsinfo.wtmax) + server->wsize = fsinfo.wtmax; + + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (server->rpages > NFS_READ_MAXIOV) { + server->rpages = NFS_READ_MAXIOV; + server->rsize = server->rpages << PAGE_CACHE_SHIFT; + } + + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (server->wpages > NFS_WRITE_MAXIOV) { + server->wpages = NFS_WRITE_MAXIOV; + server->wsize = server->wpages << PAGE_CACHE_SHIFT; + } + + server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + maxlen = (server->rpc_ops->version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN; + if (!server->namelen) { + res = server->rpc_ops->pathconf(server, rootfh, &pathinfo); + if (!res) + server->namelen = pathinfo.name_max; + } + if (!server->namelen || server->namelen > maxlen) + server->namelen = maxlen; + + sb->s_maxbytes = fsinfo.maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE) + sb->s_maxbytes = MAX_LFS_FILESIZE; + + return 0; +} + +/* * The way this works is that the mount process passes a structure * in the data argument which contains the server's IP address * and the root file handle obtained from the server's mount @@ -268,8 +338,7 @@ nfs_read_super(struct super_block *sb, v unsigned int authflavor; struct sockaddr_in srvaddr; struct rpc_timeout timeparms; - struct nfs_fsinfo fsinfo; - int tcp, version, maxlen; + int tcp, version; memset(&sb->u.nfs_sb, 0, sizeof(sb->u.nfs_sb)); if (!data) @@ -298,11 +367,11 @@ nfs_read_super(struct super_block *sb, v sb->s_magic = NFS_SUPER_MAGIC; sb->s_op = &nfs_sops; - sb->s_blocksize_bits = 0; - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); server = &sb->u.nfs_sb.s_server; - server->rsize = nfs_block_size(data->rsize, NULL); - server->wsize = nfs_block_size(data->wsize, NULL); + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); server->flags = data->flags & NFS_MOUNT_FLAGMASK; if (data->flags & NFS_MOUNT_NOAC) { @@ -326,12 +395,14 @@ nfs_read_super(struct super_block *sb, v INIT_LIST_HEAD(&server->lru_busy); nfsv3_try_again: + server->caps = 0; /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ if (data->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 server->rpc_ops = &nfs_v3_clientops; version = 3; + server->caps |= NFS_CAP_READDIRPLUS; if (data->version < 4) { printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); goto out_unlock; @@ -409,63 +480,11 @@ nfs_read_super(struct super_block *sb, v sb->s_root->d_op = &nfs_dentry_operations; /* Get some general file system info */ - if (server->rpc_ops->statfs(server, root, &fsinfo) >= 0) { - if (server->namelen == 0) - server->namelen = fsinfo.namelen; - } else { + if (nfs_setup_superblock(sb, root) < 0) { printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n"); goto out_no_root; } - /* Work out a lot of parameters */ - if (data->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (data->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - /* NFSv3: we don't have bsize, but rather rtmult and wtmult... */ - if (!fsinfo.bsize) - fsinfo.bsize = (fsinfo.rtmult>fsinfo.wtmult) ? fsinfo.rtmult : fsinfo.wtmult; - /* Also make sure we don't go below rsize/wsize since - * RPC calls are expensive */ - if (fsinfo.bsize < server->rsize) - fsinfo.bsize = server->rsize; - if (fsinfo.bsize < server->wsize) - fsinfo.bsize = server->wsize; - - if (data->bsize == 0) - sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits); - if (server->rsize > fsinfo.rtmax) - server->rsize = fsinfo.rtmax; - if (server->wsize > fsinfo.wtmax) - server->wsize = fsinfo.wtmax; - - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->rpages > NFS_READ_MAXIOV) { - server->rpages = NFS_READ_MAXIOV; - server->rsize = server->rpages << PAGE_CACHE_SHIFT; - } - - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->wpages > NFS_WRITE_MAXIOV) { - server->wpages = NFS_WRITE_MAXIOV; - server->wsize = server->wpages << PAGE_CACHE_SHIFT; - } - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - maxlen = (version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN; - - if (server->namelen == 0 || server->namelen > maxlen) - server->namelen = maxlen; - - sb->s_maxbytes = fsinfo.maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE) - sb->s_maxbytes = MAX_LFS_FILESIZE; - /* Fire up the writeback cache */ if (nfs_reqlist_alloc(server) < 0) { printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n"); @@ -526,7 +545,8 @@ nfs_statfs(struct super_block *sb, struc struct nfs_server *server = &sb->u.nfs_sb.s_server; unsigned char blockbits; unsigned long blockres; - struct nfs_fsinfo res; + struct nfs_fattr attr; + struct nfs_fsstat res = { &attr, }; int error; error = server->rpc_ops->statfs(server, NFS_FH(sb->s_root->d_inode), &res); @@ -534,18 +554,15 @@ nfs_statfs(struct super_block *sb, struc if (error < 0) goto out_err; - if (res.bsize == 0) - res.bsize = sb->s_blocksize; - buf->f_bsize = nfs_block_bits(res.bsize, &blockbits); + buf->f_bsize = sb->s_blocksize; + blockbits = sb->s_blocksize_bits; blockres = (1 << blockbits) - 1; buf->f_blocks = (res.tbytes + blockres) >> blockbits; buf->f_bfree = (res.fbytes + blockres) >> blockbits; buf->f_bavail = (res.abytes + blockres) >> blockbits; buf->f_files = res.tfiles; buf->f_ffree = res.afiles; - if (res.namelen == 0 || res.namelen > server->namelen) - res.namelen = server->namelen; - buf->f_namelen = res.namelen; + buf->f_namelen = server->namelen; return 0; out_err: printk("nfs_statfs: statfs error = %d\n", -error); @@ -623,36 +640,35 @@ nfs_invalidate_inode(struct inode *inode nfs_zap_caches(inode); } +/* Don't use READDIRPLUS on directories that we believe are too large */ +#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) + /* * Fill in inode information from the fattr. */ static void nfs_fill_inode(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) { - /* - * Check whether the mode has been set, as we only want to - * do this once. (We don't allow inodes to change types.) + NFS_FILEID(inode) = fattr->fileid; + inode->i_mode = fattr->mode; + /* Why so? Because we want revalidate for devices/FIFOs, and + * that's precisely what we have in nfs_file_inode_operations. */ - if (inode->i_mode == 0) { - NFS_FILEID(inode) = fattr->fileid; - inode->i_mode = fattr->mode; - /* Why so? Because we want revalidate for devices/FIFOs, and - * that's precisely what we have in nfs_file_inode_operations. - */ - inode->i_op = &nfs_file_inode_operations; - if (S_ISREG(inode->i_mode)) { - inode->i_fop = &nfs_file_operations; - inode->i_data.a_ops = &nfs_file_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; - inode->i_fop = &nfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) - inode->i_op = &nfs_symlink_inode_operations; - else - init_special_inode(inode, inode->i_mode, fattr->rdev); - memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); - } - nfs_refresh_inode(inode, fattr); + inode->i_op = &nfs_file_inode_operations; + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &nfs_file_operations; + inode->i_data.a_ops = &nfs_file_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &nfs_dir_inode_operations; + inode->i_fop = &nfs_dir_operations; + if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) + && fattr->size <= NFS_LIMIT_READDIRPLUS) + NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + } else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else + init_special_inode(inode, inode->i_mode, fattr->rdev); + memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); } struct nfs_find_desc { @@ -727,7 +743,14 @@ __nfs_fhget(struct super_block *sb, stru if (!(inode = iget4(sb, ino, nfs_find_actor, &desc))) goto out_no_inode; - nfs_fill_inode(inode, fh, fattr); + /* + * Check whether the mode has been set, as we only want to + * do this once. (We don't allow inodes to change types.) + */ + if (inode->i_mode == 0) + nfs_fill_inode(inode, fh, fattr); + + nfs_refresh_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%x/%Ld ct=%d)\n", inode->i_dev, (long long)NFS_FILEID(inode), atomic_read(&inode->i_count)); @@ -850,15 +873,23 @@ int nfs_open(struct inode *inode, struct { struct rpc_auth *auth; struct rpc_cred *cred; + int err = 0; lock_kernel(); + /* Ensure that we revalidate the data cache */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)) { + err = __nfs_revalidate_inode(NFS_SERVER(inode),inode); + if (err) + goto out; + } auth = NFS_CLIENT(inode)->cl_auth; cred = rpcauth_lookupcred(auth, 0); filp->private_data = cred; if (filp->f_mode & FMODE_WRITE) nfs_set_mmcred(inode, cred); +out: unlock_kernel(); - return 0; + return err; } int nfs_release(struct inode *inode, struct file *filp) @@ -993,6 +1024,9 @@ __nfs_refresh_inode(struct inode *inode, goto out_err; } + /* Throw out obsolete READDIRPLUS attributes */ + if (time_before(fattr->timestamp, NFS_READTIME(inode))) + return 0; /* * Make sure the inode's type hasn't changed. */ @@ -1011,7 +1045,7 @@ __nfs_refresh_inode(struct inode *inode, /* * Update the read time so we don't revalidate too often. */ - NFS_READTIME(inode) = jiffies; + NFS_READTIME(inode) = fattr->timestamp; /* * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. @@ -1060,7 +1094,8 @@ __nfs_refresh_inode(struct inode *inode, inode->i_atime = new_atime; if (NFS_CACHE_MTIME(inode) != new_mtime) { - NFS_MTIME_UPDATE(inode) = jiffies; + if (invalid) + NFS_MTIME_UPDATE(inode) = fattr->timestamp; NFS_CACHE_MTIME(inode) = new_mtime; inode->i_mtime = nfs_time_to_secs(new_mtime); } @@ -1068,6 +1103,16 @@ __nfs_refresh_inode(struct inode *inode, NFS_CACHE_ISIZE(inode) = new_size; inode->i_size = new_isize; + if (inode->i_mode != fattr->mode || + inode->i_uid != fattr->uid || + inode->i_gid != fattr->gid) { + struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; + if (*cred) { + put_rpccred(*cred); + *cred = NULL; + } + } + inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; diff -u --recursive --new-file --show-c-function linux-2.4.26/fs/nfs/nfs2xdr.c linux-2.4.26-07-sock_disconnect/fs/nfs/nfs2xdr.c --- linux-2.4.26/fs/nfs/nfs2xdr.c 2004-04-14 11:28:20.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/fs/nfs/nfs2xdr.c 2004-04-16 09:35:36.000000000 -0700 @@ -118,6 +118,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; fattr->rdev = 0; } + fattr->timestamp = jiffies; return p; } @@ -369,7 +370,7 @@ nfs_xdr_readdirargs(struct rpc_rqst *req count = count >> 2; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); + *p++ = htonl(args->cookie & 0xFFFFFFFF); *p++ = htonl(count); /* see above */ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); @@ -466,7 +467,7 @@ nfs_decode_dirent(u32 *p, struct nfs_ent entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - entry->cookie = ntohl(*p++); + entry->cookie = (s64)((off_t)ntohl(*p++)); entry->eof = !p[0] && p[1]; return p; @@ -595,36 +596,18 @@ nfs_xdr_writeres(struct rpc_rqst *req, u * Decode STATFS reply */ static int -nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_statfs *res) { int status; - u32 xfer_size; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); - /* For NFSv2, we more or less have to guess the preferred - * read/write/readdir sizes from the single 'transfer size' - * value. - */ - xfer_size = ntohl(*p++); /* tsize */ - res->rtmax = 8 * 1024; - res->rtpref = xfer_size; - res->rtmult = xfer_size; - res->wtmax = 8 * 1024; - res->wtpref = xfer_size; - res->wtmult = xfer_size; - res->dtpref = PAGE_CACHE_SIZE; - res->maxfilesize = 0x7FFFFFFF; /* just a guess */ + res->tsize = ntohl(*p++); res->bsize = ntohl(*p++); - - res->tbytes = ntohl(*p++) * res->bsize; - res->fbytes = ntohl(*p++) * res->bsize; - res->abytes = ntohl(*p++) * res->bsize; - res->tfiles = 0; - res->ffiles = 0; - res->afiles = 0; - res->namelen = 0; + res->blocks = ntohl(*p++); + res->bfree = ntohl(*p++); + res->bavail = ntohl(*p++); return 0; } diff -u --recursive --new-file --show-c-function linux-2.4.26/fs/nfs/nfs3proc.c linux-2.4.26-07-sock_disconnect/fs/nfs/nfs3proc.c --- linux-2.4.26/fs/nfs/nfs3proc.c 2004-04-14 11:28:23.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/fs/nfs/nfs3proc.c 2004-04-16 09:35:36.000000000 -0700 @@ -117,12 +117,13 @@ nfs3_proc_lookup(struct inode *dir, stru } static int -nfs3_proc_access(struct inode *inode, int mode, int ruid) +nfs3_proc_access(struct inode *inode, struct rpc_cred *cred, int mode) { struct nfs_fattr fattr; struct nfs3_accessargs arg = { NFS_FH(inode), 0 }; struct nfs3_accessres res = { &fattr, 0 }; - int status, flags; + struct rpc_message msg = { NFS3PROC_ACCESS, &arg, &res, cred }; + int status; dprintk("NFS call access\n"); fattr.valid = 0; @@ -140,8 +141,7 @@ nfs3_proc_access(struct inode *inode, in if (mode & MAY_EXEC) arg.access |= NFS3_ACCESS_EXECUTE; } - flags = (ruid) ? RPC_CALL_REALUID : 0; - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_ACCESS, &arg, &res, flags); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); dprintk("NFS reply access\n"); @@ -433,6 +433,8 @@ nfs3_proc_rmdir(struct inode *dir, struc * The decode function itself doesn't perform any decoding, it just makes * sure the reply is syntactically correct. * + * Also note that this implementation handles both plain readdir and + * readdirplus. */ static int nfs3_proc_readdir(struct inode *dir, struct rpc_cred *cred, @@ -446,7 +448,11 @@ nfs3_proc_readdir(struct inode *dir, str struct rpc_message msg = { NFS3PROC_READDIR, &arg, &res, cred }; int status; - dprintk("NFS call readdir %d\n", (unsigned int) cookie); + if (plus) + msg.rpc_proc = NFS3PROC_READDIRPLUS; + + dprintk("NFS call readdir%s %d\n", + plus? "plus" : "", (unsigned int) cookie); dir_attr.valid = 0; status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); @@ -482,24 +488,42 @@ nfs3_proc_mknod(struct inode *dir, struc return status; } -/* - * This is a combo call of fsstat and fsinfo - */ static int nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsstat *stat) { int status; - dprintk("NFS call fsstat\n"); - memset((char *)info, 0, sizeof(*info)); - status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, info, 0); - if (status < 0) - goto error; + stat->fattr->valid = 0; + dprintk("NFS call statfs\n"); + status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); + dprintk("NFS reply statfs: %d\n", status); + return status; +} + +static int +nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + info->fattr->valid = 0; + dprintk("NFS call fsinfo\n"); status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0); + dprintk("NFS reply fsinfo: %d\n", status); + return status; +} -error: - dprintk("NFS reply statfs: %d\n", status); +static int +nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + int status; + + info->fattr->valid = 0; + dprintk("NFS call pathconf\n"); + status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); + dprintk("NFS reply pathconf: %d\n", status); return status; } @@ -528,5 +552,7 @@ struct nfs_rpc_ops nfs_v3_clientops = { nfs3_proc_readdir, nfs3_proc_mknod, nfs3_proc_statfs, + nfs3_proc_fsinfo, + nfs3_proc_pathconf, nfs3_decode_dirent, }; diff -u --recursive --new-file --show-c-function linux-2.4.26/fs/nfs/nfs3xdr.c linux-2.4.26-07-sock_disconnect/fs/nfs/nfs3xdr.c --- linux-2.4.26/fs/nfs/nfs3xdr.c 2004-04-14 11:28:57.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/fs/nfs/nfs3xdr.c 2004-04-16 09:35:36.000000000 -0700 @@ -181,6 +181,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt /* Update the mode bits */ fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); + fattr->timestamp = jiffies; return p; } @@ -465,6 +466,13 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, return 0; } +/* Hack to sign-extending 32-bit cookies */ +static inline +u64 nfs_transform_cookie64(u64 cookie) +{ + return (cookie & 0x80000000) ? (cookie ^ 0xFFFFFFFF00000000) : cookie; +} + /* * Encode arguments to readdir call */ @@ -476,7 +484,7 @@ nfs3_xdr_readdirargs(struct rpc_rqst *re u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->cookie); + p = xdr_encode_hyper(p, nfs_transform_cookie64(args->cookie)); *p++ = args->verf[0]; *p++ = args->verf[1]; if (args->plus) { @@ -599,6 +607,9 @@ err_unmap: u32 * nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) { + struct nfs_entry old = *entry; + u64 cookie; + if (!*p++) { if (!*p) return ERR_PTR(-EAGAIN); @@ -611,16 +622,23 @@ nfs3_decode_dirent(u32 *p, struct nfs_en entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + p = xdr_decode_hyper(p, &cookie); + entry->cookie = nfs_transform_cookie64(cookie); if (plus) { - struct nfs_fattr fattr; - p = xdr_decode_post_op_attr(p, &fattr); + entry->fattr->valid = 0; + p = xdr_decode_post_op_attr(p, entry->fattr); /* In fact, a post_op_fh3: */ if (*p++) { - struct nfs_fh fh; - p = xdr_decode_fhandle(p, &fh); - } + p = xdr_decode_fhandle(p, entry->fh); + /* Ugh -- server reply was truncated */ + if (p == NULL) { + dprintk("NFS: FH truncated\n"); + *entry = old; + return ERR_PTR(-EAGAIN); + } + } else + memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); } entry->eof = !p[0] && p[1]; @@ -903,14 +921,13 @@ nfs3_xdr_linkres(struct rpc_rqst *req, u * Decode FSSTAT reply */ static int -nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); @@ -920,8 +937,7 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, p = xdr_decode_hyper(p, &res->tfiles); p = xdr_decode_hyper(p, &res->ffiles); p = xdr_decode_hyper(p, &res->afiles); - - /* ignore invarsec */ + res->invarsec = ntohl(*p++); return 0; } @@ -931,12 +947,11 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, static int nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); @@ -948,8 +963,8 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, res->wtmult = ntohl(*p++); res->dtpref = ntohl(*p++); p = xdr_decode_hyper(p, &res->maxfilesize); - - /* ignore time_delta and properties */ + p = xdr_decode_time3(p, &res->time_delta); + res->properties = ntohl(*p++); return 0; } @@ -957,20 +972,21 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, * Decode PATHCONF reply */ static int -nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); res->linkmax = ntohl(*p++); - res->namelen = ntohl(*p++); - - /* ignore remaining fields */ + res->name_max = ntohl(*p++); + res->no_trunc = ntohl(*p++) != 0; + res->chown_restricted = ntohl(*p++) != 0; + res->case_insensitive = ntohl(*p++) != 0; + res->case_preserving = ntohl(*p++) != 0; return 0; } diff -u --recursive --new-file --show-c-function linux-2.4.26/fs/nfs/proc.c linux-2.4.26-07-sock_disconnect/fs/nfs/proc.c --- linux-2.4.26/fs/nfs/proc.c 2004-04-14 11:28:21.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/fs/nfs/proc.c 2004-04-16 09:35:30.000000000 -0700 @@ -351,17 +351,62 @@ nfs_proc_readdir(struct inode *dir, stru static int nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsstat *stat) { int status; + struct nfs2_statfs fsinfo; - dprintk("NFS call statfs\n"); - memset((char *)info, 0, sizeof(*info)); - status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0); + stat->fattr->valid = 0; + dprintk("NFS call statfs\n"); + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); dprintk("NFS reply statfs: %d\n", status); + if (status) + goto out; + stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize; + stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize; + stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize; + stat->tfiles = 0; + stat->ffiles = 0; + stat->afiles = 0; + stat->invarsec = 0; + out: return status; } +static int +nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + struct nfs2_statfs fsinfo; + + info->fattr->valid = 0; + dprintk("NFS call fsinfo\n"); + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + dprintk("NFS reply fsinfo: %d\n", status); + if (status) + goto out; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->time_delta = 0; + info->properties = 0x1b; + out: + return status; +} + +static int +nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + return -ENOTSUPP; +} + extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); struct nfs_rpc_ops nfs_v2_clientops = { @@ -387,5 +432,7 @@ struct nfs_rpc_ops nfs_v2_clientops nfs_proc_readdir, nfs_proc_mknod, nfs_proc_statfs, + nfs_proc_fsinfo, + nfs_proc_pathconf, nfs_decode_dirent, }; diff -u --recursive --new-file --show-c-function linux-2.4.26/fs/nfs/unlink.c linux-2.4.26-07-sock_disconnect/fs/nfs/unlink.c --- linux-2.4.26/fs/nfs/unlink.c 2004-04-14 11:28:50.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/fs/nfs/unlink.c 2004-04-16 09:35:23.000000000 -0700 @@ -12,6 +12,7 @@ #include #include #include +#include struct nfs_unlinkdata { @@ -21,6 +22,9 @@ struct nfs_unlinkdata { struct rpc_task task; struct rpc_cred *cred; unsigned int count; + + wait_queue_head_t waitq; + int completed; }; static struct nfs_unlinkdata *nfs_deletes; @@ -133,6 +137,8 @@ nfs_async_unlink_done(struct rpc_task *t put_rpccred(data->cred); data->cred = NULL; dput(dir); + data->completed = 1; + wake_up(&data->waitq); } /** @@ -175,6 +181,8 @@ nfs_async_unlink(struct dentry *dentry) nfs_deletes = data; data->count = 1; + init_waitqueue_head(&data->waitq); + task = &data->task; rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC); task->tk_calldata = data; @@ -212,7 +220,10 @@ nfs_complete_unlink(struct dentry *dentr data->count++; nfs_copy_dname(dentry, data); dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; - if (data->task.tk_rpcwait == &nfs_delete_queue) + if (data->task.tk_rpcwait == &nfs_delete_queue) { + struct rpc_clnt *clnt = data->task.tk_client; rpc_wake_up_task(&data->task); + nfs_wait_event(clnt, data->waitq, data->completed == 1); + } nfs_put_unlinkdata(data); } diff -u --recursive --new-file --show-c-function linux-2.4.26/include/linux/nfs_fs.h linux-2.4.26-07-sock_disconnect/include/linux/nfs_fs.h --- linux-2.4.26/include/linux/nfs_fs.h 2004-04-14 11:28:18.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/include/linux/nfs_fs.h 2004-04-16 09:35:36.000000000 -0700 @@ -102,8 +102,15 @@ do { \ #define NFS_FILEID(inode) ((inode)->u.nfs_i.fileid) -/* Inode Flags */ -#define NFS_USE_READDIRPLUS(inode) ((NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) ? 1 : 0) +static inline int nfs_server_capable(struct inode *inode, int cap) +{ + return NFS_SERVER(inode)->caps & cap; +} + +static inline int NFS_USE_READDIRPLUS(struct inode *inode) +{ + return NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS; +} /* * These are the default flags for swap requests diff -u --recursive --new-file --show-c-function linux-2.4.26/include/linux/nfs_fs_i.h linux-2.4.26-07-sock_disconnect/include/linux/nfs_fs_i.h --- linux-2.4.26/include/linux/nfs_fs_i.h 2004-04-14 11:28:18.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/include/linux/nfs_fs_i.h 2004-04-16 09:35:26.000000000 -0700 @@ -6,6 +6,16 @@ #include /* + * NFSv3 Access mode cache + */ +struct nfs_access_cache { + unsigned long jiffies; + struct rpc_cred * cred; + int mask; + int err; +}; + +/* * nfs fs inode data in memory */ struct nfs_inode_info { @@ -54,6 +64,8 @@ struct nfs_inode_info { */ unsigned long cache_mtime_jiffies; + struct nfs_access_cache cache_access; + /* * This is the cookie verifier used for NFSv3 readdir * operations diff -u --recursive --new-file --show-c-function linux-2.4.26/include/linux/nfs_fs_sb.h linux-2.4.26-07-sock_disconnect/include/linux/nfs_fs_sb.h --- linux-2.4.26/include/linux/nfs_fs_sb.h 2004-04-14 11:28:57.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/include/linux/nfs_fs_sb.h 2004-04-16 09:35:36.000000000 -0700 @@ -10,6 +10,7 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ int flags; /* various flags */ + unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ @@ -36,4 +37,8 @@ struct nfs_sb_info { struct nfs_server s_server; }; +/* Server capabilities */ +#define NFS_CAP_READDIRPLUS 1 + + #endif diff -u --recursive --new-file --show-c-function linux-2.4.26/include/linux/nfs_xdr.h linux-2.4.26-07-sock_disconnect/include/linux/nfs_xdr.h --- linux-2.4.26/include/linux/nfs_xdr.h 2004-04-14 11:28:46.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/include/linux/nfs_xdr.h 2004-04-16 09:35:36.000000000 -0700 @@ -27,6 +27,7 @@ struct nfs_fattr { __u64 atime; __u64 mtime; __u64 ctime; + unsigned long timestamp; }; #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ @@ -37,6 +38,7 @@ struct nfs_fattr { * Info on the file system */ struct nfs_fsinfo { + struct nfs_fattr *fattr; __u32 rtmax; /* max. read transfer size */ __u32 rtpref; /* pref. read transfer size */ __u32 rtmult; /* reads should be multiple of this */ @@ -45,15 +47,37 @@ struct nfs_fsinfo { __u32 wtmult; /* writes should be multiple of this */ __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; - __u64 bsize; /* block size */ + __u64 time_delta; + __u32 properties; +}; + +struct nfs_fsstat { + struct nfs_fattr *fattr; __u64 tbytes; /* total size in bytes */ __u64 fbytes; /* # of free bytes */ __u64 abytes; /* # of bytes available to user */ __u64 tfiles; /* # of files */ __u64 ffiles; /* # of free files */ __u64 afiles; /* # of files available to user */ + __u32 invarsec; +}; + +struct nfs_pathconf { + struct nfs_fattr *fattr; /* Post-op attributes */ __u32 linkmax;/* max # of hard links */ - __u32 namelen;/* max name length */ + __u32 name_max;/* max name length */ + int no_trunc : 1, + chown_restricted : 1, + case_insensitive : 1, + case_preserving : 1; +}; + +struct nfs2_statfs { + __u32 tsize; /* Server transfer size */ + __u32 bsize; /* Filesystem block size */ + __u32 blocks; /* No. of "bsize" blocks on filesystem */ + __u32 bfree; /* No. of free "bsize" blocks */ + __u32 bavail; /* No. of available "bsize" blocks */ }; /* Arguments to the read call. @@ -109,6 +133,8 @@ struct nfs_entry { const char * name; unsigned int len; int eof; + struct nfs_fh *fh; + struct nfs_fattr *fattr; }; /* @@ -298,7 +324,7 @@ struct nfs_rpc_ops { struct iattr *); int (*lookup) (struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); - int (*access) (struct inode *, int , int); + int (*access) (struct inode *, struct rpc_cred *, int); int (*readlink)(struct inode *, struct page *); int (*read) (struct inode *, struct rpc_cred *, struct nfs_fattr *, @@ -330,7 +356,11 @@ struct nfs_rpc_ops { int (*mknod) (struct inode *, struct qstr *, struct iattr *, dev_t, struct nfs_fh *, struct nfs_fattr *); int (*statfs) (struct nfs_server *, struct nfs_fh *, + struct nfs_fsstat *); + int (*fsinfo) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + int (*pathconf) (struct nfs_server *, struct nfs_fh *, + struct nfs_pathconf *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); }; diff -u --recursive --new-file --show-c-function linux-2.4.26/include/linux/sunrpc/xprt.h linux-2.4.26-07-sock_disconnect/include/linux/sunrpc/xprt.h --- linux-2.4.26/include/linux/sunrpc/xprt.h 2004-04-14 11:28:35.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/include/linux/sunrpc/xprt.h 2004-04-16 09:35:42.000000000 -0700 @@ -162,6 +162,12 @@ struct rpc_xprt { tcp_offset; /* fragment offset */ unsigned long tcp_copied, /* copied to request */ tcp_flags; + /* + * Disconnection of idle sockets + */ + struct tq_struct task_cleanup; + struct timer_list timer; + unsigned long last_used; /* * Send stuff @@ -200,6 +206,7 @@ int xprt_clear_backlog(struct rpc_xprt void xprt_sock_setbufsize(struct rpc_xprt *); #define XPRT_CONNECT 0 +#define XPRT_LOCKED 1 #define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) diff -u --recursive --new-file --show-c-function linux-2.4.26/net/sunrpc/xprt.c linux-2.4.26-07-sock_disconnect/net/sunrpc/xprt.c --- linux-2.4.26/net/sunrpc/xprt.c 2004-04-14 11:28:33.000000000 -0700 +++ linux-2.4.26-07-sock_disconnect/net/sunrpc/xprt.c 2004-04-16 09:35:42.000000000 -0700 @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -77,6 +78,7 @@ #endif #define XPRT_MAX_BACKOFF (8) +#define XPRT_IDLE_TIMEOUT (5*60*HZ) /* * Local functions @@ -139,25 +141,34 @@ static int __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - if (!xprt->snd_task) { - if (xprt->nocong || __xprt_get_cong(xprt, task)) { - xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } - } + + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { + if (xprt->snd_task == task) + return 1; + if (task == NULL) + return 0; + goto out_sleep; } - if (xprt->snd_task != task) { - dprintk("RPC: %4d TCP write queue full\n", task->tk_pid); - task->tk_timeout = 0; - task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); - else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + if (xprt->nocong || __xprt_get_cong(xprt, task)) { + xprt->snd_task = task; + if (req) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } + return 1; } - return xprt->snd_task == task; + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); +out_sleep: + dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (req && req->rq_ntrans) + rpc_sleep_on(&xprt->resend, task, NULL, NULL); + else + rpc_sleep_on(&xprt->sending, task, NULL, NULL); + return 0; } static inline int @@ -175,15 +186,15 @@ __xprt_lock_write_next(struct rpc_xprt * { struct rpc_task *task; - if (xprt->snd_task) + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) return; + if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) + goto out_unlock; task = rpc_wake_up_next(&xprt->resend); if (!task) { - if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) - return; task = rpc_wake_up_next(&xprt->sending); if (!task) - return; + goto out_unlock; } if (xprt->nocong || __xprt_get_cong(xprt, task)) { struct rpc_rqst *req = task->tk_rqstp; @@ -192,7 +203,12 @@ __xprt_lock_write_next(struct rpc_xprt * req->rq_bytes_sent = 0; req->rq_ntrans++; } + return; } +out_unlock: + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); } /* @@ -201,9 +217,13 @@ __xprt_lock_write_next(struct rpc_xprt * static void __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { - if (xprt->snd_task == task) + if (xprt->snd_task == task) { xprt->snd_task = NULL; - __xprt_lock_write_next(xprt); + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); + __xprt_lock_write_next(xprt); + } } static inline void @@ -416,6 +436,15 @@ xprt_close(struct rpc_xprt *xprt) sock_release(sock); } +static void +xprt_socket_autoclose(void *args) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)args; + + xprt_close(xprt); + xprt_release_write(xprt, NULL); +} + /* * Mark a transport as disconnected */ @@ -430,6 +459,27 @@ xprt_disconnect(struct rpc_xprt *xprt) } /* + * Used to allow disconnection when we've been idle + */ +static void +xprt_init_autodisconnect(unsigned long data) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)data; + + spin_lock(&xprt->sock_lock); + if (!list_empty(&xprt->recv) || xprt->shutdown) + goto out_abort; + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) + goto out_abort; + spin_unlock(&xprt->sock_lock); + /* Let keventd close the socket */ + schedule_task(&xprt->task_cleanup); + return; +out_abort: + spin_unlock(&xprt->sock_lock); +} + +/* * Reconnect a broken TCP connection. * */ @@ -1254,6 +1304,8 @@ xprt_reserve(struct rpc_task *task) spin_lock(&xprt->xprt_lock); do_xprt_reserve(task); spin_unlock(&xprt->xprt_lock); + if (task->tk_rqstp) + del_timer_sync(&xprt->timer); } } @@ -1333,6 +1385,9 @@ xprt_release(struct rpc_task *task) __xprt_put_cong(xprt, req); if (!list_empty(&req->rq_list)) list_del(&req->rq_list); + xprt->last_used = jiffies; + if (list_empty(&xprt->recv) && !xprt->shutdown) + mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); spin_unlock_bh(&xprt->sock_lock); task->tk_rqstp = NULL; memset(req, 0, sizeof(*req)); /* mark unused */ @@ -1403,6 +1458,11 @@ xprt_setup(int proto, struct sockaddr_in init_waitqueue_head(&xprt->cong_wait); INIT_LIST_HEAD(&xprt->recv); + INIT_TQUEUE(&xprt->task_cleanup, xprt_socket_autoclose, xprt); + init_timer(&xprt->timer); + xprt->timer.function = xprt_init_autodisconnect; + xprt->timer.data = (unsigned long) xprt; + xprt->last_used = jiffies; /* Set timeout parameters */ if (to) { @@ -1580,6 +1640,7 @@ xprt_shutdown(struct rpc_xprt *xprt) rpc_wake_up(&xprt->backlog); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait); + del_timer_sync(&xprt->timer); } /*