diff -u --recursive --new-file linux-2.4.19/Documentation/Configure.help linux-2.4.19-29-fix_lockd4/Documentation/Configure.help --- linux-2.4.19/Documentation/Configure.help Sat Aug 3 02:39:42 2002 +++ linux-2.4.19-29-fix_lockd4/Documentation/Configure.help Sat Oct 5 03:55:07 2002 @@ -14840,6 +14840,30 @@ If unsure, say N. +Allow direct I/O on files in NFS +CONFIG_NFS_DIRECTIO + There are important applications whose performance or correctness + depends on uncached access to file data. Database clusters (multiple + copies of the same instance running on separate hosts) implement their + own cache coherency protocol that subsumes the NFS cache protocols. + Applications that process datasets considerably larger than the client's + memory do not always benefit from a local cache. A streaming video + server, for instance, has no need to cache the contents of a file. + + This option enables applications to perform direct I/O on files in NFS + file systems using the O_DIRECT open() flag. When O_DIRECT is set for + files, their data is not cached in the system's page cache. Direct + read and write operations are aligned to block boundaries. Data is + moved to and from user-level application buffers directly. + + Unless your program is designed to use O_DIRECT properly, you are much + better off allowing the NFS client to manage caching for you. Misusing + O_DIRECT can cause poor server performance or network storms. This + kernel build option defaults OFF to avoid exposing system administrators + unwittingly to a potentially hazardous feature. + + If unsure, say N. + Root file system on NFS CONFIG_ROOT_NFS If you want your Linux box to mount its whole root file system (the diff -u --recursive --new-file linux-2.4.19/arch/i386/kernel/i386_ksyms.c linux-2.4.19-29-fix_lockd4/arch/i386/kernel/i386_ksyms.c --- linux-2.4.19/arch/i386/kernel/i386_ksyms.c Sat Aug 3 02:39:42 2002 +++ linux-2.4.19-29-fix_lockd4/arch/i386/kernel/i386_ksyms.c Sat Oct 5 03:50:54 2002 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -173,6 +174,11 @@ EXPORT_SYMBOL(atomic_dec_and_lock); #endif +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif + extern int is_sony_vaio_laptop; EXPORT_SYMBOL(is_sony_vaio_laptop); diff -u --recursive --new-file linux-2.4.19/arch/mips/kernel/mips_ksyms.c linux-2.4.19-29-fix_lockd4/arch/mips/kernel/mips_ksyms.c --- linux-2.4.19/arch/mips/kernel/mips_ksyms.c Sat Aug 3 02:39:43 2002 +++ linux-2.4.19-29-fix_lockd4/arch/mips/kernel/mips_ksyms.c Sat Oct 5 03:50:54 2002 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -127,3 +128,8 @@ #endif EXPORT_SYMBOL(get_wchan); + +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif diff -u --recursive --new-file linux-2.4.19/arch/ppc/kernel/ppc_ksyms.c linux-2.4.19-29-fix_lockd4/arch/ppc/kernel/ppc_ksyms.c --- linux-2.4.19/arch/ppc/kernel/ppc_ksyms.c Sat Aug 3 02:39:43 2002 +++ linux-2.4.19-29-fix_lockd4/arch/ppc/kernel/ppc_ksyms.c Sat Oct 5 03:50:55 2002 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -364,3 +365,8 @@ EXPORT_SYMBOL_NOVERS(agp_special_page); #endif /* defined(CONFIG_ALL_PPC) */ +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif + diff -u --recursive --new-file linux-2.4.19/arch/sparc/kernel/sparc_ksyms.c linux-2.4.19-29-fix_lockd4/arch/sparc/kernel/sparc_ksyms.c --- linux-2.4.19/arch/sparc/kernel/sparc_ksyms.c Sat Aug 3 02:39:43 2002 +++ linux-2.4.19-29-fix_lockd4/arch/sparc/kernel/sparc_ksyms.c Sat Oct 5 03:50:55 2002 @@ -23,6 +23,7 @@ #include #endif #include +#include #include #include @@ -300,3 +301,8 @@ /* Sun Power Management Idle Handler */ EXPORT_SYMBOL(pm_idle); + +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif diff -u --recursive --new-file linux-2.4.19/fs/Config.in linux-2.4.19-29-fix_lockd4/fs/Config.in --- linux-2.4.19/fs/Config.in Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/Config.in Sat Oct 5 03:55:07 2002 @@ -95,6 +95,7 @@ dep_tristate 'InterMezzo file system support (experimental, replicating fs)' CONFIG_INTERMEZZO_FS $CONFIG_INET $CONFIG_EXPERIMENTAL dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET dep_mbool ' Provide NFSv3 client support' CONFIG_NFS_V3 $CONFIG_NFS_FS + dep_mbool ' Allow direct I/O on NFS files (EXPERIMENTAL)' CONFIG_NFS_DIRECTIO $CONFIG_NFS_FS $CONFIG_EXPERIMENTAL dep_bool ' Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET diff -u --recursive --new-file linux-2.4.19/fs/block_dev.c linux-2.4.19-29-fix_lockd4/fs/block_dev.c --- linux-2.4.19/fs/block_dev.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/block_dev.c Sat Oct 5 03:55:07 2002 @@ -131,8 +131,9 @@ return 0; } -static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) +static int blkdev_direct_IO(int rw, struct file * filp, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) { + struct inode * inode = filp->f_dentry->d_inode->i_mapping->host; return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, blkdev_get_block); } diff -u --recursive --new-file linux-2.4.19/fs/ext2/inode.c linux-2.4.19-29-fix_lockd4/fs/ext2/inode.c --- linux-2.4.19/fs/ext2/inode.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/ext2/inode.c Sat Oct 5 03:55:07 2002 @@ -583,8 +583,9 @@ { return generic_block_bmap(mapping,block,ext2_get_block); } -static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) +static int ext2_direct_IO(int rw, struct file * filp, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) { + struct inode * inode = filp->f_dentry->d_inode->i_mapping->host; return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block); } struct address_space_operations ext2_aops = { diff -u --recursive --new-file linux-2.4.19/fs/lockd/clntproc.c linux-2.4.19-29-fix_lockd4/fs/lockd/clntproc.c --- linux-2.4.19/fs/lockd/clntproc.c Thu Oct 11 16:52:18 2001 +++ linux-2.4.19-29-fix_lockd4/fs/lockd/clntproc.c Sat Oct 5 03:55:46 2002 @@ -460,7 +460,7 @@ } if (status < 0) return status; - } while (resp->status == NLM_LCK_BLOCKED); + } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; diff -u --recursive --new-file linux-2.4.19/fs/lockd/svc4proc.c linux-2.4.19-29-fix_lockd4/fs/lockd/svc4proc.c --- linux-2.4.19/fs/lockd/svc4proc.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/lockd/svc4proc.c Sat Oct 5 03:55:52 2002 @@ -462,6 +462,24 @@ } /* + * client sent a GRANTED_RES, let's remove the associated block + */ +static int +nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, + void *resp) +{ + if (!nlmsvc_ops) + return rpc_success; + + dprintk("lockd: GRANTED_RES called\n"); + + nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); + return rpc_success; +} + + + +/* * This is the generic lockd callback for async RPC calls */ static u32 @@ -524,7 +542,6 @@ #define nlm4svc_proc_lock_res nlm4svc_proc_null #define nlm4svc_proc_cancel_res nlm4svc_proc_null #define nlm4svc_proc_unlock_res nlm4svc_proc_null -#define nlm4svc_proc_granted_res nlm4svc_proc_null struct nlm_void { int dummy; }; @@ -554,7 +571,7 @@ PROC(lock_res, lockres, norep, res, void), PROC(cancel_res, cancelres, norep, res, void), PROC(unlock_res, unlockres, norep, res, void), - PROC(granted_res, grantedres, norep, res, void), + PROC(granted_res, res, norep, res, void), /* statd callback */ PROC(sm_notify, reboot, void, reboot, void), PROC(none, void, void, void, void), diff -u --recursive --new-file linux-2.4.19/fs/lockd/svclock.c linux-2.4.19-29-fix_lockd4/fs/lockd/svclock.c --- linux-2.4.19/fs/lockd/svclock.c Thu Oct 11 16:52:18 2001 +++ linux-2.4.19-29-fix_lockd4/fs/lockd/svclock.c Sat Oct 5 03:56:08 2002 @@ -64,7 +64,7 @@ if (when != NLM_NEVER) { if ((when += jiffies) == NLM_NEVER) when ++; - while ((b = *bp) && time_before_eq(b->b_when,when)) + while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER) bp = &b->b_next; } else while ((b = *bp)) @@ -143,14 +143,15 @@ * Find a block with a given NLM cookie. */ static inline struct nlm_block * -nlmsvc_find_block(struct nlm_cookie *cookie) +nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) { struct nlm_block *block; for (block = nlm_blocked; block; block = block->b_next) { dprintk("cookie: head of blocked queue %p, block %p\n", nlm_blocked, block); - if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)) + if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie) + && nlm_cmp_addr(sin, &block->b_host->h_addr)) break; } @@ -572,12 +573,16 @@ struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; struct nlm_block *block; unsigned long timeout; + struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client); dprintk("lockd: GRANT_MSG RPC callback\n"); - dprintk("callback: looking for cookie %x \n", - *(unsigned int *)(call->a_args.cookie.data)); - if (!(block = nlmsvc_find_block(&call->a_args.cookie))) { - dprintk("lockd: no block for cookie %x\n", *(u32 *)(call->a_args.cookie.data)); + dprintk("callback: looking for cookie %x, host (%08x)\n", + *(unsigned int *)(call->a_args.cookie.data), + ntohl(peer_addr->sin_addr.s_addr)); + if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) { + dprintk("lockd: no block for cookie %x, host (%08x)\n", + *(u32 *)(call->a_args.cookie.data), + ntohl(peer_addr->sin_addr.s_addr)); return; } @@ -606,18 +611,21 @@ * block. */ void -nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status) +nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status) { struct nlm_block *block; struct nlm_file *file; - if (!(block = nlmsvc_find_block(cookie))) + dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n", + *(unsigned int *)(cookie->data), + ntohl(rqstp->rq_addr.sin_addr.s_addr), status); + if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr))) return; file = block->b_file; file->f_count++; down(&file->f_sema); - if ((block = nlmsvc_find_block(cookie)) != NULL) { + if ((block = nlmsvc_find_block(cookie,&rqstp->rq_addr)) != NULL) { if (status == NLM_LCK_DENIED_GRACE_PERIOD) { /* Try again in a couple of seconds */ nlmsvc_insert_block(block, 10 * HZ); diff -u --recursive --new-file linux-2.4.19/fs/lockd/svcproc.c linux-2.4.19-29-fix_lockd4/fs/lockd/svcproc.c --- linux-2.4.19/fs/lockd/svcproc.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/lockd/svcproc.c Sat Oct 5 03:55:52 2002 @@ -490,6 +490,22 @@ } /* + * client sent a GRANTED_RES, let's remove the associated block + */ +static int +nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, + void *resp) +{ + if (!nlmsvc_ops) + return rpc_success; + + dprintk("lockd: GRANTED_RES called\n"); + + nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); + return rpc_success; +} + +/* * This is the generic lockd callback for async RPC calls */ static u32 @@ -552,7 +568,6 @@ #define nlmsvc_proc_lock_res nlmsvc_proc_null #define nlmsvc_proc_cancel_res nlmsvc_proc_null #define nlmsvc_proc_unlock_res nlmsvc_proc_null -#define nlmsvc_proc_granted_res nlmsvc_proc_null struct nlm_void { int dummy; }; @@ -582,7 +597,7 @@ PROC(lock_res, lockres, norep, res, void), PROC(cancel_res, cancelres, norep, res, void), PROC(unlock_res, unlockres, norep, res, void), - PROC(granted_res, grantedres, norep, res, void), + PROC(granted_res, res, norep, res, void), /* statd callback */ PROC(sm_notify, reboot, void, reboot, void), PROC(none, void, void, void, void), diff -u --recursive --new-file linux-2.4.19/fs/lockd/xdr.c linux-2.4.19-29-fix_lockd4/fs/lockd/xdr.c --- linux-2.4.19/fs/lockd/xdr.c Mon Oct 1 22:45:47 2001 +++ linux-2.4.19-29-fix_lockd4/fs/lockd/xdr.c Sat Oct 5 03:51:45 2002 @@ -561,11 +561,10 @@ #define nlmclt_decode_norep NULL #define PROC(proc, argtype, restype) \ - { "nlm_" #proc, \ - (kxdrproc_t) nlmclt_encode_##argtype, \ - (kxdrproc_t) nlmclt_decode_##restype, \ - MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \ - 0 \ + { .p_procname = "nlm_" #proc, \ + .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \ + .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \ + .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \ } static struct rpc_procinfo nlm_procedures[] = { diff -u --recursive --new-file linux-2.4.19/fs/lockd/xdr4.c linux-2.4.19-29-fix_lockd4/fs/lockd/xdr4.c --- linux-2.4.19/fs/lockd/xdr4.c Mon Oct 1 22:45:47 2001 +++ linux-2.4.19-29-fix_lockd4/fs/lockd/xdr4.c Sat Oct 5 03:51:45 2002 @@ -566,12 +566,11 @@ */ #define nlm4clt_decode_norep NULL -#define PROC(proc, argtype, restype) \ - { "nlm4_" #proc, \ - (kxdrproc_t) nlm4clt_encode_##argtype, \ - (kxdrproc_t) nlm4clt_decode_##restype, \ - MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \ - 0 \ +#define PROC(proc, argtype, restype) \ + { .p_procname = "nlm4_" #proc, \ + .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \ + .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \ + .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \ } static struct rpc_procinfo nlm4_procedures[] = { diff -u --recursive --new-file linux-2.4.19/fs/nfs/Makefile linux-2.4.19-29-fix_lockd4/fs/nfs/Makefile --- linux-2.4.19/fs/nfs/Makefile Fri Nov 9 23:28:15 2001 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/Makefile Sat Oct 5 03:55:07 2002 @@ -14,6 +14,7 @@ obj-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o obj-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +obj-$(CONFIG_NFS_DIRECTIO) += direct.o obj-m := $(O_TARGET) diff -u --recursive --new-file linux-2.4.19/fs/nfs/dir.c linux-2.4.19-29-fix_lockd4/fs/nfs/dir.c --- linux-2.4.19/fs/nfs/dir.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/dir.c Sat Oct 5 03:55:12 2002 @@ -34,8 +34,11 @@ #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ +static loff_t nfs_dir_llseek(struct file *, loff_t, int); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *); +static int nfs_cached_lookup(struct inode *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); static int nfs_create(struct inode *, struct dentry *, int); static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_rmdir(struct inode *, struct dentry *); @@ -45,12 +48,15 @@ static int nfs_mknod(struct inode *, struct dentry *, int, int); static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +static int nfs_fsync_dir(struct file *, struct dentry *, int); struct file_operations nfs_dir_operations = { + llseek: nfs_dir_llseek, read: generic_read_dir, readdir: nfs_readdir, open: nfs_open, release: nfs_release, + fsync: nfs_fsync_dir }; struct inode_operations nfs_dir_inode_operations = { @@ -68,6 +74,25 @@ setattr: nfs_notify_change, }; +static loff_t nfs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + switch (origin) { + case 1: + if (offset == 0) { + offset = file->f_pos; + break; + } + case 2: + return -EINVAL; + } + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_reada = 0; + file->f_version = ++event; + } + return (offset <= 0) ? 0 : offset; +} + typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int); typedef struct { struct file *file; @@ -99,24 +124,24 @@ struct file *file = desc->file; struct inode *inode = file->f_dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); - void *buffer = kmap(page); int error; dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); again: - error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, buffer, + error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); /* We requested READDIRPLUS, but the server doesn't grok it */ - if (desc->plus && error == -ENOTSUPP) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; - desc->plus = 0; - goto again; - } - if (error < 0) + if (error < 0) { + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + desc->plus = 0; + goto again; + } goto error; + } SetPageUptodate(page); - kunmap(page); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * throught inode->i_sem or some other mechanism. @@ -195,8 +220,7 @@ dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); - desc->plus = NFS_USE_READDIRPLUS(inode); - page = read_cache_page(&inode->i_data, desc->page_index, + page = read_cache_page(inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { status = PTR_ERR(page); @@ -247,6 +271,24 @@ return res; } +static unsigned int nfs_type2dtype[] = { + DT_UNKNOWN, + DT_REG, + DT_DIR, + DT_BLK, + DT_CHR, + DT_LNK, + DT_SOCK, + DT_UNKNOWN, + DT_FIFO +}; + +static inline +unsigned int nfs_type_to_d_type(enum nfs_ftype type) +{ + return nfs_type2dtype[type]; +} + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -263,11 +305,17 @@ dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); for(;;) { + unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ fileid = nfs_fileid_to_ino_t(entry->ino); + + /* Use readdirplus info */ + if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR)) + d_type = nfs_type_to_d_type(entry->fattr->type); + res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, DT_UNKNOWN); + entry->prev_cookie, fileid, d_type); if (res < 0) break; file->f_pos = desc->target = entry->cookie; @@ -315,12 +363,12 @@ status = -ENOMEM; goto out; } - desc->page = page; - desc->ptr = kmap(page); desc->error = NFS_PROTO(inode)->readdir(inode, cred, desc->target, - desc->ptr, + page, NFS_SERVER(inode)->dtsize, desc->plus); + desc->page = page; + desc->ptr = kmap(page); if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) desc->entry->prev_cookie = desc->target; @@ -334,7 +382,8 @@ /* Reset read descriptor so it searches the page cache from * the start upon the next call to readdir_search_pagecache() */ desc->page_index = 0; - memset(desc->entry, 0, sizeof(*desc->entry)); + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; out: dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); return status; @@ -353,9 +402,11 @@ nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_entry my_entry; + struct nfs_fh fh; + struct nfs_fattr fattr; long res; - res = nfs_revalidate(dentry); + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res < 0) return res; @@ -366,12 +417,16 @@ * itself. */ memset(desc, 0, sizeof(*desc)); - memset(&my_entry, 0, sizeof(my_entry)); - desc->file = filp; desc->target = filp->f_pos; - desc->entry = &my_entry; desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); + + my_entry.cookie = my_entry.prev_cookie = 0; + my_entry.eof = 0; + my_entry.fh = &fh; + my_entry.fattr = &fattr; + desc->entry = &my_entry; while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -402,6 +457,15 @@ } /* + * All directory operations under NFS are synchronous, so fsync() + * is a dummy operation. + */ +int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) +{ + return 0; +} + +/* * A check for whether or not the parent directory has changed. * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. @@ -426,16 +490,9 @@ } static inline -int nfs_lookup_verify_inode(struct inode *inode, int flags) +int nfs_lookup_verify_inode(struct inode *inode) { - struct nfs_server *server = NFS_SERVER(inode); - /* - * If we're interested in close-to-open cache consistency, - * then we revalidate the inode upon lookup. - */ - if (!(server->flags & NFS_MOUNT_NOCTO) && !(flags & LOOKUP_CONTINUE)) - NFS_CACHEINV(inode); - return nfs_revalidate_inode(server, inode); + return nfs_revalidate_inode(NFS_SERVER(inode), inode); } /* @@ -489,11 +546,20 @@ /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, flags)) + if (nfs_lookup_verify_inode(inode)) goto out_bad; goto out_valid; } + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + goto out_bad; + if (nfs_lookup_verify_inode(inode)) + goto out_bad; + goto out_valid_renew; + } + if (NFS_STALE(inode)) goto out_bad; @@ -505,6 +571,7 @@ if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; + out_valid_renew: nfs_renew_times(dentry); out_valid: unlock_kernel(); @@ -580,6 +647,18 @@ error = -ENOMEM; dentry->d_op = &nfs_dentry_operations; + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + error = -EACCES; + inode = nfs_fhget(dentry, &fhandle, &fattr); + if (inode) { + d_add(dentry, inode); + nfs_renew_times(dentry); + error = 0; + } + goto out; + } + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); inode = NULL; if (error == -ENOENT) @@ -598,6 +677,79 @@ return ERR_PTR(error); } +static inline +int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) +{ + struct nfs_entry *entry = desc->entry; + int status; + + while((status = dir_decode(desc)) == 0) { + if (entry->len != dentry->d_name.len) + continue; + if (memcmp(entry->name, dentry->d_name.name, entry->len)) + continue; + if (!(entry->fattr->valid & NFS_ATTR_FATTR)) + continue; + break; + } + return status; +} + +/* + * Use the cached Readdirplus results in order to avoid a LOOKUP call + * whenever we believe that the parent directory has not changed. + * + * We assume that any file creation/rename changes the directory mtime. + * As this results in a page cache invalidation whenever it occurs, + * we don't require any other tests for cache coherency. + */ +static +int nfs_cached_lookup(struct inode *dir, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + nfs_readdir_descriptor_t desc; + struct nfs_server *server; + struct nfs_entry entry; + struct page *page; + unsigned long timestamp = NFS_MTIME_UPDATE(dir); + int res; + + if (!NFS_USE_READDIRPLUS(dir)) + return -ENOENT; + server = NFS_SERVER(dir); + if (server->flags & NFS_MOUNT_NOAC) + return -ENOENT; + nfs_revalidate_inode(server, dir); + + entry.fh = fh; + entry.fattr = fattr; + + desc.decode = NFS_PROTO(dir)->decode_dirent; + desc.entry = &entry; + desc.page_index = 0; + desc.plus = 1; + + for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) { + + res = -EIO; + if (Page_Uptodate(page)) { + desc.ptr = kmap(page); + res = find_dirent_name(&desc, page, dentry); + kunmap(page); + } + page_cache_release(page); + + if (res == 0) + goto out_found; + if (res != -EAGAIN) + break; + } + return -ENOENT; + out_found: + fattr->timestamp = timestamp; + return 0; +} + /* * Code common to create, mkdir, and mknod. */ @@ -1074,34 +1226,62 @@ int nfs_permission(struct inode *inode, int mask) { - int error = vfs_permission(inode, mask); - - if (!NFS_PROTO(inode)->access) - goto out; - - if (error == -EROFS) - goto out; - - /* - * Trust UNIX mode bits except: - * - * 1) When override capabilities may have been invoked - * 2) When root squashing may be involved - * 3) When ACLs may overturn a negative answer */ - if (!capable(CAP_DAC_OVERRIDE) && !capable(CAP_DAC_READ_SEARCH) - && (current->fsuid != 0) && (current->fsgid != 0) - && error != -EACCES) - goto out; + struct nfs_access_cache *cache = &NFS_I(inode)->cache_access; + struct rpc_cred *cred; + int mode = inode->i_mode; + int error; - error = NFS_PROTO(inode)->access(inode, mask, 0); + if (mask & MAY_WRITE) { + /* + * + * Nobody gets write access to a read-only fs. + * + */ + if (IS_RDONLY(inode) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + return -EROFS; - if (error == -EACCES && NFS_CLIENT(inode)->cl_droppriv && - current->uid != 0 && current->gid != 0 && - (current->fsuid != current->uid || current->fsgid != current->gid)) - error = NFS_PROTO(inode)->access(inode, mask, 1); + /* + * + * Nobody gets write access to an immutable file. + * + */ + if (IS_IMMUTABLE(inode)) + return -EACCES; + } - out: - return error; + if (!NFS_PROTO(inode)->access) + goto out_notsup; + cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + if (cache->cred == cred + && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) { + if (!cache->err) { + /* Is the mask a subset of an accepted mask? */ + if ((cache->mask & mask) == mask) + goto out_cached; + } else { + /* ...or is it a superset of a rejected mask? */ + if ((cache->mask & mask) == cache->mask) + goto out_cached; + } + } + error = NFS_PROTO(inode)->access(inode, cred, mask); + if (!error || error == -EACCES) { + cache->jiffies = jiffies; + if (cache->cred) + put_rpccred(cache->cred); + cache->cred = cred; + cache->mask = mask; + cache->err = error; + return error; + } + put_rpccred(cred); +out_notsup: + nfs_revalidate_inode(NFS_SERVER(inode), inode); + return vfs_permission(inode, mask); +out_cached: + put_rpccred(cred); + return cache->err; } /* diff -u --recursive --new-file linux-2.4.19/fs/nfs/direct.c linux-2.4.19-29-fix_lockd4/fs/nfs/direct.c --- linux-2.4.19/fs/nfs/direct.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/direct.c Sat Oct 5 03:55:07 2002 @@ -0,0 +1,374 @@ +/* + * linux/fs/nfs/direct.c + * + * High-performance direct I/O for the NFS client + * + * When an application requests uncached I/O, all read and write requests + * are made directly to the server; data stored or fetched via these + * requests is not cached in the Linux page cache. The client does not + * correct unaligned requests from applications. All requested bytes are + * held on permanent storage before a direct write system call returns to + * an application. Applications that manage their own data caching, such + * as databases, make very good use of direct I/O on local file systems. + * + * Solaris implements an uncached I/O facility called directio() that + * is used for backups and sequential I/O to very large files. Solaris + * also supports uncaching whole NFS partitions with "-o forcedirectio," + * an undocumented mount option. + * + * Note that I/O to read in executables (e.g. kernel_read) cannot use + * direct (kiobuf) reads because there is no vma backing the passed-in + * data buffer. + * + * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust. + * + * Initial implementation: 12/2001 by Chuck Lever + * + * TODO: + * + * 1. Use concurrent asynchronous network requests rather than + * serialized synchronous network requests for normal (non-sync) + * direct I/O. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NFSDBG_FACILITY (NFSDBG_PAGECACHE | NFSDBG_VFS) +#define VERF_SIZE (2 * sizeof(__u32)) + +static /* inline */ int +nfs_direct_read_rpc(struct file *file, struct nfs_readargs *arg) +{ + int result; + struct inode * inode = file->f_dentry->d_inode; + struct nfs_fattr fattr; + struct rpc_message msg; + struct nfs_readres res = { &fattr, arg->count, 0 }; + +#ifdef CONFIG_NFS_V3 + msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? + NFS3PROC_READ : NFSPROC_READ; +#else + msg.rpc_proc = NFSPROC_READ; +#endif + msg.rpc_argp = arg; + msg.rpc_resp = &res; + + lock_kernel(); + msg.rpc_cred = nfs_file_cred(file); + fattr.valid = 0; + result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_refresh_inode(inode, &fattr); + unlock_kernel(); + + return result; +} + +static /* inline */ int +nfs_direct_write_rpc(struct file *file, struct nfs_writeargs *arg, + struct nfs_writeverf *verf) +{ + int result; + struct inode *inode = file->f_dentry->d_inode; + struct nfs_fattr fattr; + struct rpc_message msg; + struct nfs_writeres res = { &fattr, verf, 0 }; + +#ifdef CONFIG_NFS_V3 + msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? + NFS3PROC_WRITE : NFSPROC_WRITE; +#else + msg.rpc_proc = NFSPROC_WRITE; +#endif + msg.rpc_argp = arg; + msg.rpc_resp = &res; + + lock_kernel(); + msg.rpc_cred = get_rpccred(nfs_file_cred(file)); + fattr.valid = 0; + result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_write_attributes(inode, &fattr); + put_rpccred(msg.rpc_cred); + unlock_kernel(); + +#ifdef CONFIG_NFS_V3 + if (NFS_PROTO(inode)->version == 3) { + if (result > 0) { + if ((arg->stable == NFS_FILE_SYNC) && + (verf->committed != NFS_FILE_SYNC)) { + printk(KERN_ERR __FUNCTION__ + ": server didn't sync stable write request\n"); + return -EIO; + } + + if (result != arg->count) { + printk(KERN_INFO __FUNCTION__ + ": short write, count=%u, result=%d\n", + arg->count, result); + } + } + return result; + } else { +#endif + verf->committed = NFS_FILE_SYNC; /* NFSv2 always syncs data */ + if (result == 0) + return arg->count; + return result; +#ifdef CONFIG_NFS_V3 + } +#endif +} + +#ifdef CONFIG_NFS_V3 +static /* inline */ int +nfs_direct_commit_rpc(struct inode *inode, loff_t offset, size_t count, + struct nfs_writeverf *verf) +{ + int result; + struct nfs_fattr fattr; + struct nfs_writeargs arg = { NFS_FH(inode), offset, count, 0, 0, + NULL }; + struct nfs_writeres res = { &fattr, verf, 0 }; + struct rpc_message msg = { NFS3PROC_COMMIT, &arg, &res, NULL }; + + fattr.valid = 0; + + lock_kernel(); + result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_write_attributes(inode, &fattr); + unlock_kernel(); + + return result; +} +#else +static inline int +nfs_direct_commit_rpc(struct inode *inode, loff_t offset, size_t count, + struct nfs_writeverf *verf) +{ + return 0; +} +#endif + +/* + * Walk through the iobuf and create an iovec for each "rsize" bytes. + */ +static int +nfs_direct_read(struct file *file, struct kiobuf *iobuf, loff_t offset, + size_t count) +{ + int curpage, total; + struct inode *inode = file->f_dentry->d_inode; + int rsize = NFS_SERVER(inode)->rsize; + struct page **src = iobuf->maplist, + **end = iobuf->maplist + iobuf->nr_pages; + struct page *pages[NFS_READ_MAXIOV]; + struct nfs_readargs args = { NFS_FH(inode), + offset, + 0, + iobuf->offset, + pages }; + + total = 0; + curpage = 0; + while (count) { + int request, result; + struct page **dst = pages; + + request = count; + if (count > rsize) + request = rsize; + args.count = request; + + do { + if (!*src) + return -EFAULT; + + *dst++ = *src; + /* zero after the first iov */ + if (request < PAGE_SIZE) + break; + request -= PAGE_SIZE; + src++; + } while (request != 0 && src != end); + + result = nfs_direct_read_rpc(file, &args); + + if (result < 0) { + if (result == -EISDIR) + total = -EINVAL; + else + total = result; + break; + } + + total += result; + if (result < args.count) /* NFSv2ism */ + break; + count -= result; + args.offset += result; + args.pgbase += result; + args.pgbase &= ~PAGE_MASK; + }; + return total; +} + +/* + * Walk through the iobuf and create an iovec for each "wsize" bytes. + * If only one network write is necessary, or if the O_SYNC flag or + * 'sync' mount option are present, or if this is a V2 inode, use + * FILE_SYNC. Otherwise, use UNSTABLE and finish with a COMMIT. + * + * The mechanics of this function are much the same as nfs_direct_read, + * with the added complexity of committing unstable writes. + */ +static int +nfs_direct_write(struct file *file, struct kiobuf *iobuf, + loff_t offset, size_t count) +{ + int curpage, total; + int need_commit = 0; + loff_t save_offset = offset; + struct inode *inode = file->f_dentry->d_inode; + int wsize = NFS_SERVER(inode)->wsize; + struct nfs_writeverf first_verf, ret_verf; + struct page *pages[NFS_WRITE_MAXIOV]; + struct nfs_writeargs args = { NFS_FH(inode), 0, 0, NFS_FILE_SYNC, 0, pages }; + +#ifdef CONFIG_NFS_V3 + if ((NFS_PROTO(inode)->version == 3) && (count > wsize) && + (!IS_SYNC(inode))) + args.stable = NFS_UNSTABLE; +#endif + +retry: + total = 0; + curpage = 0; + while (count) { + int request, result; + struct page **dest = pages; + + request = count; + if (count > wsize) + request = wsize; + args.count = request; + args.offset = offset; + args.pgbase = (iobuf->offset + total) & ~PAGE_MASK; + + do { + struct page *page = iobuf->maplist[curpage]; + + if (!page) + return -EFAULT; + + *dest++ = page; + /* zero after the first iov */ + if (request > PAGE_SIZE) + break; + request -= PAGE_SIZE; + curpage++; + } while (request != 0 && curpage < iobuf->nr_pages); + + result = nfs_direct_write_rpc(file, &args, &ret_verf); + + if (result < 0) { + total = result; + break; + } + + if (!total) + memcpy(&first_verf.verifier, &ret_verf.verifier, + VERF_SIZE); + if (ret_verf.committed != NFS_FILE_SYNC) { + need_commit = 1; + if (memcmp(&first_verf.verifier, &ret_verf.verifier, + VERF_SIZE)) + goto print_retry; + } + + total += result; + count -= result; + offset += result; + }; + + /* + * Commit data written so far, even in the event of an error + */ + if (need_commit) { + if (nfs_direct_commit_rpc(inode, save_offset, + iobuf->length - count, &ret_verf)) + goto print_retry; + if (memcmp(&first_verf.verifier, &ret_verf.verifier, + VERF_SIZE)) + goto print_retry; + } + + return total; + +print_retry: + printk(KERN_INFO __FUNCTION__ + ": detected server restart; retrying with FILE_SYNC\n"); + args.stable = NFS_FILE_SYNC; + offset = save_offset; + count = iobuf->length; + goto retry; +} + +/* + * Read or write data, moving the data directly to/from the + * application's buffer without caching in the page cache. + * + * Rules for direct I/O + * + * 1. block size = 512 bytes or more + * 2. file byte offset is block aligned + * 3. byte count is a multiple of block size + * 4. user buffer is not aligned + * 5. user buffer is faulted in and pinned + * + * These are verified before we get here. + */ +int +nfs_direct_IO(int rw, struct file *file, struct kiobuf *iobuf, + unsigned long blocknr, int blocksize) +{ + int result = -EINVAL; + size_t count = iobuf->length; + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + loff_t offset = blocknr << inode->i_blkbits; + + switch (rw) { + case READ: + dfprintk(VFS, + "NFS: direct_IO(READ) (%s/%s) off/cnt(%Lu/%d)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, offset, count); + + result = nfs_direct_read(file, iobuf, offset, count); + break; + case WRITE: + dfprintk(VFS, + "NFS: direct_IO(WRITE) (%s/%s) off/cnt(%Lu/%d)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, offset, count); + + result = nfs_direct_write(file, iobuf, offset, count); + break; + default: + break; + } + + dfprintk(VFS, "NFS: direct_IO result = %d\n", result); + return result; +} diff -u --recursive --new-file linux-2.4.19/fs/nfs/file.c linux-2.4.19-29-fix_lockd4/fs/nfs/file.c --- linux-2.4.19/fs/nfs/file.c Mon Feb 25 20:38:09 2002 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/file.c Sat Oct 5 03:55:07 2002 @@ -16,6 +16,7 @@ * nfs regular file handling functions */ +#include #include #include #include @@ -199,6 +200,9 @@ sync_page: nfs_sync_page, writepage: nfs_writepage, prepare_write: nfs_prepare_write, +#ifdef CONFIG_NFS_DIRECTIO + direct_IO: nfs_direct_IO, +#endif commit_write: nfs_commit_write }; diff -u --recursive --new-file linux-2.4.19/fs/nfs/inode.c linux-2.4.19-29-fix_lockd4/fs/nfs/inode.c --- linux-2.4.19/fs/nfs/inode.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/inode.c Sat Oct 5 03:54:52 2002 @@ -146,10 +146,14 @@ static void nfs_clear_inode(struct inode *inode) { - struct rpc_cred *cred = NFS_I(inode)->mm_cred; + struct nfs_inode_info *nfsi = NFS_I(inode); + struct rpc_cred *cred = nfsi->mm_cred; if (cred) put_rpccred(cred); + cred = nfsi->cache_access.cred; + if (cred) + put_rpccred(cred); } void @@ -251,6 +255,72 @@ } /* + * Set up the NFS superblock private area using probed values + */ +static int +nfs_setup_superblock(struct super_block *sb, struct nfs_fh *rootfh) +{ + struct nfs_server *server = &sb->u.nfs_sb.s_server; + struct nfs_fattr fattr; + struct nfs_fsinfo fsinfo = { &fattr, }; + struct nfs_pathconf pathinfo = { &fattr, }; + int maxlen, res; + + res = server->rpc_ops->fsinfo(server, rootfh, &fsinfo); + if (res < 0) + return res; + + /* Work out a lot of parameters */ + if (!server->rsize) + server->rsize = nfs_block_size(fsinfo.rtpref, NULL); + if (!server->wsize) + server->wsize = nfs_block_size(fsinfo.wtpref, NULL); + + /* NFSv3: we don't have bsize, but rather rtmult and wtmult... */ + if (!fsinfo.wtmult) + fsinfo.wtmult = 512; + sb->s_blocksize = nfs_block_bits(fsinfo.wtmult, &sb->s_blocksize_bits); + + if (server->rsize > fsinfo.rtmax) + server->rsize = fsinfo.rtmax; + if (server->wsize > fsinfo.wtmax) + server->wsize = fsinfo.wtmax; + + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (server->rpages > NFS_READ_MAXIOV) { + server->rpages = NFS_READ_MAXIOV; + server->rsize = server->rpages << PAGE_CACHE_SHIFT; + } + + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (server->wpages > NFS_WRITE_MAXIOV) { + server->wpages = NFS_WRITE_MAXIOV; + server->wsize = server->wpages << PAGE_CACHE_SHIFT; + } + + server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + maxlen = (server->rpc_ops->version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN; + if (!server->namelen) { + res = server->rpc_ops->pathconf(server, rootfh, &pathinfo); + if (!res) + server->namelen = pathinfo.name_max; + } + if (!server->namelen || server->namelen > maxlen) + server->namelen = maxlen; + + sb->s_maxbytes = fsinfo.maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE) + sb->s_maxbytes = MAX_LFS_FILESIZE; + + return 0; +} + +/* * The way this works is that the mount process passes a structure * in the data argument which contains the server's IP address * and the root file handle obtained from the server's mount @@ -268,8 +338,7 @@ unsigned int authflavor; struct sockaddr_in srvaddr; struct rpc_timeout timeparms; - struct nfs_fsinfo fsinfo; - int tcp, version, maxlen; + int tcp, version; memset(&sb->u.nfs_sb, 0, sizeof(sb->u.nfs_sb)); if (!data) @@ -298,11 +367,11 @@ sb->s_magic = NFS_SUPER_MAGIC; sb->s_op = &nfs_sops; - sb->s_blocksize_bits = 0; - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); server = &sb->u.nfs_sb.s_server; - server->rsize = nfs_block_size(data->rsize, NULL); - server->wsize = nfs_block_size(data->wsize, NULL); + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); server->flags = data->flags & NFS_MOUNT_FLAGMASK; if (data->flags & NFS_MOUNT_NOAC) { @@ -326,12 +395,14 @@ INIT_LIST_HEAD(&server->lru_busy); nfsv3_try_again: + server->caps = 0; /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ if (data->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 server->rpc_ops = &nfs_v3_clientops; version = 3; + server->caps |= NFS_CAP_READDIRPLUS; if (data->version < 4) { printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); goto out_unlock; @@ -409,70 +480,19 @@ sb->s_root->d_op = &nfs_dentry_operations; /* Get some general file system info */ - if (server->rpc_ops->statfs(server, root, &fsinfo) >= 0) { - if (server->namelen == 0) - server->namelen = fsinfo.namelen; - } else { + if (nfs_setup_superblock(sb, root) < 0) { printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n"); goto out_no_root; } - /* Work out a lot of parameters */ - if (data->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (data->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - /* NFSv3: we don't have bsize, but rather rtmult and wtmult... */ - if (!fsinfo.bsize) - fsinfo.bsize = (fsinfo.rtmult>fsinfo.wtmult) ? fsinfo.rtmult : fsinfo.wtmult; - /* Also make sure we don't go below rsize/wsize since - * RPC calls are expensive */ - if (fsinfo.bsize < server->rsize) - fsinfo.bsize = server->rsize; - if (fsinfo.bsize < server->wsize) - fsinfo.bsize = server->wsize; - - if (data->bsize == 0) - sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits); - if (server->rsize > fsinfo.rtmax) - server->rsize = fsinfo.rtmax; - if (server->wsize > fsinfo.wtmax) - server->wsize = fsinfo.wtmax; - - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->rpages > NFS_READ_MAXIOV) { - server->rpages = NFS_READ_MAXIOV; - server->rsize = server->rpages << PAGE_CACHE_SHIFT; - } - - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->wpages > NFS_WRITE_MAXIOV) { - server->wpages = NFS_WRITE_MAXIOV; - server->wsize = server->wpages << PAGE_CACHE_SHIFT; - } - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - maxlen = (version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN; - - if (server->namelen == 0 || server->namelen > maxlen) - server->namelen = maxlen; - - sb->s_maxbytes = fsinfo.maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE) - sb->s_maxbytes = MAX_LFS_FILESIZE; - /* Fire up the writeback cache */ if (nfs_reqlist_alloc(server) < 0) { printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n"); goto failure_kill_reqlist; } - /* We're airborne */ + /* We're airborne Set socket buffersize */ + rpc_setbufsize(clnt, server->wsize + 100, server->rsize + 100); /* Check whether to start the lockd process */ if (!(server->flags & NFS_MOUNT_NONLM)) @@ -525,7 +545,8 @@ struct nfs_server *server = &sb->u.nfs_sb.s_server; unsigned char blockbits; unsigned long blockres; - struct nfs_fsinfo res; + struct nfs_fattr attr; + struct nfs_fsstat res = { &attr, }; int error; error = server->rpc_ops->statfs(server, NFS_FH(sb->s_root->d_inode), &res); @@ -533,18 +554,15 @@ if (error < 0) goto out_err; - if (res.bsize == 0) - res.bsize = sb->s_blocksize; - buf->f_bsize = nfs_block_bits(res.bsize, &blockbits); + buf->f_bsize = sb->s_blocksize; + blockbits = sb->s_blocksize_bits; blockres = (1 << blockbits) - 1; buf->f_blocks = (res.tbytes + blockres) >> blockbits; buf->f_bfree = (res.fbytes + blockres) >> blockbits; buf->f_bavail = (res.abytes + blockres) >> blockbits; buf->f_files = res.tfiles; buf->f_ffree = res.afiles; - if (res.namelen == 0 || res.namelen > server->namelen) - res.namelen = server->namelen; - buf->f_namelen = res.namelen; + buf->f_namelen = server->namelen; return 0; out_err: printk("nfs_statfs: statfs error = %d\n", -error); @@ -622,36 +640,35 @@ nfs_zap_caches(inode); } +/* Don't use READDIRPLUS on directories that we believe are too large */ +#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) + /* * Fill in inode information from the fattr. */ static void nfs_fill_inode(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) { - /* - * Check whether the mode has been set, as we only want to - * do this once. (We don't allow inodes to change types.) + NFS_FILEID(inode) = fattr->fileid; + inode->i_mode = fattr->mode; + /* Why so? Because we want revalidate for devices/FIFOs, and + * that's precisely what we have in nfs_file_inode_operations. */ - if (inode->i_mode == 0) { - NFS_FILEID(inode) = fattr->fileid; - inode->i_mode = fattr->mode; - /* Why so? Because we want revalidate for devices/FIFOs, and - * that's precisely what we have in nfs_file_inode_operations. - */ - inode->i_op = &nfs_file_inode_operations; - if (S_ISREG(inode->i_mode)) { - inode->i_fop = &nfs_file_operations; - inode->i_data.a_ops = &nfs_file_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; - inode->i_fop = &nfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) - inode->i_op = &nfs_symlink_inode_operations; - else - init_special_inode(inode, inode->i_mode, fattr->rdev); - memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); - } - nfs_refresh_inode(inode, fattr); + inode->i_op = &nfs_file_inode_operations; + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &nfs_file_operations; + inode->i_data.a_ops = &nfs_file_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &nfs_dir_inode_operations; + inode->i_fop = &nfs_dir_operations; + if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) + && fattr->size <= NFS_LIMIT_READDIRPLUS) + NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + } else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else + init_special_inode(inode, inode->i_mode, fattr->rdev); + memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); } struct nfs_find_desc { @@ -726,7 +743,14 @@ if (!(inode = iget4(sb, ino, nfs_find_actor, &desc))) goto out_no_inode; - nfs_fill_inode(inode, fh, fattr); + /* + * Check whether the mode has been set, as we only want to + * do this once. (We don't allow inodes to change types.) + */ + if (inode->i_mode == 0) + nfs_fill_inode(inode, fh, fattr); + + nfs_refresh_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%x/%Ld ct=%d)\n", inode->i_dev, (long long)NFS_FILEID(inode), atomic_read(&inode->i_count)); @@ -849,15 +873,23 @@ { struct rpc_auth *auth; struct rpc_cred *cred; + int err = 0; lock_kernel(); + /* Ensure that we revalidate the data cache */ + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO) { + err = __nfs_revalidate_inode(NFS_SERVER(inode),inode); + if (err) + goto out; + } auth = NFS_CLIENT(inode)->cl_auth; cred = rpcauth_lookupcred(auth, 0); filp->private_data = cred; if (filp->f_mode & FMODE_WRITE) nfs_set_mmcred(inode, cred); +out: unlock_kernel(); - return 0; + return err; } int nfs_release(struct inode *inode, struct file *filp) @@ -992,6 +1024,9 @@ goto out_err; } + /* Throw out obsolete READDIRPLUS attributes */ + if (time_before(fattr->timestamp, NFS_READTIME(inode))) + return 0; /* * Make sure the inode's type hasn't changed. */ @@ -1010,7 +1045,7 @@ /* * Update the read time so we don't revalidate too often. */ - NFS_READTIME(inode) = jiffies; + NFS_READTIME(inode) = fattr->timestamp; /* * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. @@ -1059,7 +1094,8 @@ inode->i_atime = new_atime; if (NFS_CACHE_MTIME(inode) != new_mtime) { - NFS_MTIME_UPDATE(inode) = jiffies; + if (invalid) + NFS_MTIME_UPDATE(inode) = fattr->timestamp; NFS_CACHE_MTIME(inode) = new_mtime; inode->i_mtime = nfs_time_to_secs(new_mtime); } @@ -1067,6 +1103,16 @@ NFS_CACHE_ISIZE(inode) = new_size; inode->i_size = new_isize; + if (inode->i_mode != fattr->mode || + inode->i_uid != fattr->uid || + inode->i_gid != fattr->gid) { + struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; + if (*cred) { + put_rpccred(*cred); + *cred = NULL; + } + } + inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; diff -u --recursive --new-file linux-2.4.19/fs/nfs/nfs2xdr.c linux-2.4.19-29-fix_lockd4/fs/nfs/nfs2xdr.c --- linux-2.4.19/fs/nfs/nfs2xdr.c Mon Feb 25 20:38:09 2002 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/nfs2xdr.c Sat Oct 5 03:54:52 2002 @@ -24,9 +24,6 @@ #include #include -/* Uncomment this to support servers requiring longword lengths */ -#define NFS_PAD_WRITES 1 - #define NFSDBG_FACILITY NFSDBG_XDR /* #define NFS_PARANOIA 1 */ @@ -90,17 +87,6 @@ return p + XDR_QUADLEN(NFS2_FHSIZE); } -static inline u32 * -xdr_decode_string2(u32 *p, char **string, unsigned int *len, - unsigned int maxlen) -{ - *len = ntohl(*p++); - if (*len > maxlen) - return NULL; - *string = (char *) p; - return p + XDR_QUADLEN(*len); -} - static inline u32* xdr_decode_time(u32 *p, u64 *timep) { @@ -109,7 +95,7 @@ return p; } -static inline u32 * +static u32 * xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) { fattr->type = (enum nfs_ftype) ntohl(*p++); @@ -132,6 +118,7 @@ fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; fattr->rdev = 0; } + fattr->timestamp = jiffies; return p; } @@ -223,35 +210,20 @@ nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int buflen, replen; - unsigned int nr; + unsigned int replen; + u32 offset = (u32)args->offset; + u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->offset); - *p++ = htonl(args->count); - *p++ = htonl(args->count); + *p++ = htonl(offset); + *p++ = htonl(count); + *p++ = htonl(count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - /* Get the number of buffers in the receive iovec */ - nr = args->nriov; - - if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_readargs\n"); - return -EINVAL; - } - - /* set up reply iovec */ + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - /* Copy the iovec */ - memcpy(req->rq_rvec + 1, args->iov, nr * sizeof(struct iovec)); - - req->rq_rvec[nr+1].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[nr+1].iov_len = buflen - replen; - req->rq_rlen = args->count + buflen; - req->rq_rnr += nr+1; - + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, count); return 0; } @@ -272,10 +244,10 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READ header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); } - recvd = req->rq_rlen - hdrlen; + recvd = req->rq_received - hdrlen; if (count > recvd) { printk(KERN_WARNING "NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); @@ -284,7 +256,6 @@ dprintk("RPC: readres OK count %d\n", count); if (count < res->count) { - xdr_zero_iovec(iov+1, req->rq_rnr-2, res->count - count); res->count = count; res->eof = 1; /* Silly NFSv3ism which can't be helped */ } else @@ -300,46 +271,19 @@ static int nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) { - unsigned int nr; + struct xdr_buf *sndbuf = &req->rq_snd_buf; + u32 offset = (u32)args->offset; u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->offset); - *p++ = htonl(args->offset); + *p++ = htonl(offset); + *p++ = htonl(offset); *p++ = htonl(count); *p++ = htonl(count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - - /* Get the number of buffers in the send iovec */ - nr = args->nriov; - - if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs " - "(nr %d max %d)\n", nr, MAX_IOVEC); - return -EINVAL; - } - - /* Copy the iovec */ - memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec)); - -#ifdef NFS_PAD_WRITES - /* - * Some old servers require that the message length - * be a multiple of 4, so we pad it here if needed. - */ - if (count & 3) { - struct iovec *iov = req->rq_svec + nr + 1; - int pad = 4 - (count & 3); - - iov->iov_base = (void *) "\0\0\0"; - iov->iov_len = pad; - count += pad; - nr++; - } -#endif - req->rq_slen += count; - req->rq_snr += nr; + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + /* Copy the page array */ + xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); return 0; } @@ -406,32 +350,24 @@ { struct rpc_task *task = req->rq_task; struct rpc_auth *auth = task->tk_auth; - u32 bufsiz = args->bufsiz; - int buflen, replen; + unsigned int replen; + u32 count = args->count; /* * Some servers (e.g. HP OS 9.5) seem to expect the buffer size * to be in longwords ... check whether to convert the size. */ if (task->tk_client->cl_flags & NFS_CLNTF_BUFSIZE) - bufsiz = bufsiz >> 2; + count = count >> 2; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); - *p++ = htonl(bufsiz); /* see above */ + *p++ = htonl(args->cookie & 0xFFFFFFFF); + *p++ = htonl(count); /* see above */ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - /* set up reply iovec */ + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; - req->rq_rlen = buflen + args->bufsiz; - req->rq_rnr += 2; - + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); return 0; } @@ -443,12 +379,15 @@ * from nfs_readdir for each entry. */ static int -nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) +nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy) { - struct iovec *iov = req->rq_rvec; - int hdrlen; - int status, nr; - u32 *end, *entry, len; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; + struct page **page; + int hdrlen, recvd; + int status, nr; + unsigned int len, pglen; + u32 *end, *entry; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); @@ -456,15 +395,18 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } - - /* Get start and end address of XDR data */ - p = (u32 *) iov[1].iov_base; - end = (u32 *) ((u8 *) p + iov[1].iov_len); + pglen = rcvbuf->page_len; + recvd = req->rq_received - hdrlen; + if (pglen > recvd) + pglen = recvd; + page = rcvbuf->pages; + p = kmap(*page); + end = (u32 *)((char *)p + pglen); + entry = p; for (nr = 0; *p++; nr++) { - entry = p - 1; if (p + 2 > end) goto short_pkt; p++; /* fileid */ @@ -473,16 +415,28 @@ if (len > NFS2_MAXNAMLEN) { printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n", len); - return -errno_NFSERR_IO; + goto err_unmap; } if (p + 2 > end) goto short_pkt; + entry = p; } + if (!nr && (entry[0] != 0 || entry[1] == 0)) + goto short_pkt; + out: + kunmap(*page); return nr; short_pkt: - printk(KERN_NOTICE "NFS: short packet in readdir reply!\n"); entry[0] = entry[1] = 0; - return nr; + /* truncate listing? */ + if (!nr) { + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + goto out; +err_unmap: + kunmap(*page); + return -errno_NFSERR_IO; } u32 * @@ -500,7 +454,7 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - entry->cookie = ntohl(*p++); + entry->cookie = (s64)((off_t)ntohl(*p++)); entry->eof = !p[0] && p[1]; return p; @@ -568,21 +522,16 @@ static int nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args) { - struct rpc_task *task = req->rq_task; - struct rpc_auth *auth = task->tk_auth; - int buflen, replen; + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + u32 count = args->count - 4; p = xdr_encode_fhandle(p, args->fh); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; - req->rq_rlen = buflen + args->bufsiz; - req->rq_rnr += 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); return 0; } @@ -590,32 +539,33 @@ * Decode READLINK reply */ static int -nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res) +nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy) { - struct iovec *iov = req->rq_rvec; - u32 *strlen; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; + unsigned int hdrlen; + u32 *strlen, len; char *string; - int hdrlen; int status; - unsigned int len; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } - strlen = (u32*)res->buffer; + strlen = (u32*)kmap(rcvbuf->pages[0]); /* Convert length of symlink */ len = ntohl(*strlen); - if (len > res->bufsiz - 5) - len = res->bufsiz - 5; + if (len > rcvbuf->page_len) + len = rcvbuf->page_len; *strlen = len; /* NULL terminate the string we got */ string = (char *)(strlen + 1); string[len] = 0; + kunmap(rcvbuf->pages[0]); return 0; } @@ -633,36 +583,18 @@ * Decode STATFS reply */ static int -nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_statfs *res) { int status; - u32 xfer_size; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); - /* For NFSv2, we more or less have to guess the preferred - * read/write/readdir sizes from the single 'transfer size' - * value. - */ - xfer_size = ntohl(*p++); /* tsize */ - res->rtmax = 8 * 1024; - res->rtpref = xfer_size; - res->rtmult = xfer_size; - res->wtmax = 8 * 1024; - res->wtpref = xfer_size; - res->wtmult = xfer_size; - res->dtpref = PAGE_CACHE_SIZE; - res->maxfilesize = 0x7FFFFFFF; /* just a guess */ + res->tsize = ntohl(*p++); res->bsize = ntohl(*p++); - - res->tbytes = ntohl(*p++) * res->bsize; - res->fbytes = ntohl(*p++) * res->bsize; - res->abytes = ntohl(*p++) * res->bsize; - res->tfiles = 0; - res->ffiles = 0; - res->afiles = 0; - res->namelen = 0; + res->blocks = ntohl(*p++); + res->bfree = ntohl(*p++); + res->bavail = ntohl(*p++); return 0; } @@ -732,33 +664,32 @@ # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif -#define PROC(proc, argtype, restype) \ - { "nfs_" #proc, \ - (kxdrproc_t) nfs_xdr_##argtype, \ - (kxdrproc_t) nfs_xdr_##restype, \ - MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ - 0 \ +#define PROC(proc, argtype, restype, timer) \ + { .p_procname = "nfs_" #proc, \ + .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ + .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ + .p_timer = timer \ } - static struct rpc_procinfo nfs_procedures[18] = { - PROC(null, enc_void, dec_void), - PROC(getattr, fhandle, attrstat), - PROC(setattr, sattrargs, attrstat), - PROC(root, enc_void, dec_void), - PROC(lookup, diropargs, diropres), - PROC(readlink, readlinkargs, readlinkres), - PROC(read, readargs, readres), - PROC(writecache, enc_void, dec_void), - PROC(write, writeargs, writeres), - PROC(create, createargs, diropres), - PROC(remove, diropargs, stat), - PROC(rename, renameargs, stat), - PROC(link, linkargs, stat), - PROC(symlink, symlinkargs, stat), - PROC(mkdir, createargs, diropres), - PROC(rmdir, diropargs, stat), - PROC(readdir, readdirargs, readdirres), - PROC(statfs, fhandle, statfsres), + PROC(null, enc_void, dec_void, 0), + PROC(getattr, fhandle, attrstat, 1), + PROC(setattr, sattrargs, attrstat, 0), + PROC(root, enc_void, dec_void, 0), + PROC(lookup, diropargs, diropres, 2), + PROC(readlink, readlinkargs, readlinkres, 3), + PROC(read, readargs, readres, 3), + PROC(writecache, enc_void, dec_void, 0), + PROC(write, writeargs, writeres, 4), + PROC(create, createargs, diropres, 0), + PROC(remove, diropargs, stat, 0), + PROC(rename, renameargs, stat, 0), + PROC(link, linkargs, stat, 0), + PROC(symlink, symlinkargs, stat, 0), + PROC(mkdir, createargs, diropres, 0), + PROC(rmdir, diropargs, stat, 0), + PROC(readdir, readdirargs, readdirres, 3), + PROC(statfs, fhandle, statfsres, 0), }; struct rpc_version nfs_version2 = { diff -u --recursive --new-file linux-2.4.19/fs/nfs/nfs3proc.c linux-2.4.19-29-fix_lockd4/fs/nfs/nfs3proc.c --- linux-2.4.19/fs/nfs/nfs3proc.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/nfs3proc.c Sat Oct 5 03:54:52 2002 @@ -117,12 +117,13 @@ } static int -nfs3_proc_access(struct inode *inode, int mode, int ruid) +nfs3_proc_access(struct inode *inode, struct rpc_cred *cred, int mode) { struct nfs_fattr fattr; struct nfs3_accessargs arg = { NFS_FH(inode), 0 }; struct nfs3_accessres res = { &fattr, 0 }; - int status, flags; + struct rpc_message msg = { NFS3PROC_ACCESS, &arg, &res, cred }; + int status; dprintk("NFS call access\n"); fattr.valid = 0; @@ -140,8 +141,7 @@ if (mode & MAY_EXEC) arg.access |= NFS3_ACCESS_EXECUTE; } - flags = (ruid) ? RPC_CALL_REALUID : 0; - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_ACCESS, &arg, &res, flags); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); dprintk("NFS reply access\n"); @@ -151,17 +151,16 @@ } static int -nfs3_proc_readlink(struct inode *inode, void *buffer, unsigned int buflen) +nfs3_proc_readlink(struct inode *inode, struct page *page) { struct nfs_fattr fattr; - struct nfs3_readlinkargs args = { NFS_FH(inode), buffer, buflen }; - struct nfs3_readlinkres res = { &fattr, buffer, buflen }; + struct nfs3_readlinkargs args = { NFS_FH(inode), PAGE_CACHE_SIZE, &page }; int status; dprintk("NFS call readlink\n"); fattr.valid = 0; status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, - &args, &res, 0); + &args, &fattr, 0); nfs_refresh_inode(inode, &fattr); dprintk("NFS reply readlink: %d\n", status); return status; @@ -170,11 +169,12 @@ static int nfs3_proc_read(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, int flags, - loff_t offset, unsigned int count, void *buffer, int *eofp) + unsigned int base, unsigned int count, struct page *page, + int *eofp) { - struct nfs_readargs arg = { NFS_FH(inode), offset, count, 1, - {{buffer, count}, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}} }; + u64 offset = page_offset(page) + base; + struct nfs_readargs arg = { NFS_FH(inode), offset, count, + base, &page }; struct nfs_readres res = { fattr, count, 0 }; struct rpc_message msg = { NFS3PROC_READ, &arg, &res, cred }; int status; @@ -190,13 +190,12 @@ static int nfs3_proc_write(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, int flags, - loff_t offset, unsigned int count, - void *buffer, struct nfs_writeverf *verf) + unsigned int base, unsigned int count, + struct page *page, struct nfs_writeverf *verf) { + u64 offset = page_offset(page) + base; struct nfs_writeargs arg = { NFS_FH(inode), offset, count, - NFS_FILE_SYNC, 1, - {{buffer, count}, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}} }; + NFS_FILE_SYNC, base, &page }; struct nfs_writeres res = { fattr, verf, 0 }; struct rpc_message msg = { NFS3PROC_WRITE, &arg, &res, cred }; int status, rpcflags = 0; @@ -434,26 +433,16 @@ */ static int nfs3_proc_readdir(struct inode *dir, struct rpc_cred *cred, - u64 cookie, void *entry, - unsigned int size, int plus) + u64 cookie, struct page *page, unsigned int count, int plus) { struct nfs_fattr dir_attr; - struct nfs3_readdirargs arg = { NFS_FH(dir), cookie, {0, 0}, 0, 0, 0 }; - struct nfs3_readdirres res = { &dir_attr, 0, 0, 0, 0 }; - struct rpc_message msg = { NFS3PROC_READDIR, &arg, &res, cred }; u32 *verf = NFS_COOKIEVERF(dir); + struct nfs3_readdirargs arg = { NFS_FH(dir), cookie, {verf[0], verf[1]}, + plus, count, &page }; + struct nfs3_readdirres res = { &dir_attr, verf, plus }; + struct rpc_message msg = { NFS3PROC_READDIR, &arg, &res, cred }; int status; - arg.buffer = entry; - arg.bufsiz = size; - arg.verf[0] = verf[0]; - arg.verf[1] = verf[1]; - arg.plus = plus; - res.buffer = entry; - res.bufsiz = size; - res.verf = verf; - res.plus = plus; - if (plus) msg.rpc_proc = NFS3PROC_READDIRPLUS; @@ -494,24 +483,42 @@ return status; } -/* - * This is a combo call of fsstat and fsinfo - */ static int nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsstat *stat) { int status; - dprintk("NFS call fsstat\n"); - memset((char *)info, 0, sizeof(*info)); - status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, info, 0); - if (status < 0) - goto error; + stat->fattr->valid = 0; + dprintk("NFS call statfs\n"); + status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); + dprintk("NFS reply statfs: %d\n", status); + return status; +} + +static int +nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + info->fattr->valid = 0; + dprintk("NFS call fsinfo\n"); status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0); + dprintk("NFS reply fsinfo: %d\n", status); + return status; +} -error: - dprintk("NFS reply statfs: %d\n", status); +static int +nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + int status; + + info->fattr->valid = 0; + dprintk("NFS call pathconf\n"); + status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); + dprintk("NFS reply pathconf: %d\n", status); return status; } @@ -540,5 +547,7 @@ nfs3_proc_readdir, nfs3_proc_mknod, nfs3_proc_statfs, + nfs3_proc_fsinfo, + nfs3_proc_pathconf, nfs3_decode_dirent, }; diff -u --recursive --new-file linux-2.4.19/fs/nfs/nfs3xdr.c linux-2.4.19-29-fix_lockd4/fs/nfs/nfs3xdr.c --- linux-2.4.19/fs/nfs/nfs3xdr.c Sat Nov 3 02:40:09 2001 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/nfs3xdr.c Sat Oct 5 03:54:52 2002 @@ -22,9 +22,6 @@ #include #include -/* Uncomment this to support servers requiring longword lengths */ -#define NFS_PAD_WRITES 1 - #define NFSDBG_FACILITY NFSDBG_XDR /* Mapping from NFS error code to "errno" error code. */ @@ -156,18 +153,7 @@ return p; } -static inline u32 * -xdr_decode_string2(u32 *p, char **string, unsigned int *len, - unsigned int maxlen) -{ - *len = ntohl(*p++); - if (*len > maxlen) - return NULL; - *string = (char *) p; - return p + XDR_QUADLEN(*len); -} - -static inline u32 * +static u32 * xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) { unsigned int type; @@ -195,6 +181,7 @@ /* Update the mode bits */ fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); + fattr->timestamp = jiffies; return p; } @@ -350,35 +337,18 @@ nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int buflen, replen; - unsigned int nr; + unsigned int replen; + u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_hyper(p, args->offset); - *p++ = htonl(args->count); + *p++ = htonl(count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - /* Get the number of buffers in the receive iovec */ - nr = args->nriov; - - if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_readargs\n"); - return -EINVAL; - } - - /* set up reply iovec */ + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - - /* Copy the iovec */ - memcpy(req->rq_rvec + 1, args->iov, nr * sizeof(struct iovec)); - - req->rq_rvec[nr+1].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[nr+1].iov_len = buflen - replen; - req->rq_rlen = args->count + buflen; - req->rq_rnr += nr+1; - + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->pages, args->pgbase, count); return 0; } @@ -388,7 +358,7 @@ static int nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) { - unsigned int nr; + struct xdr_buf *sndbuf = &req->rq_snd_buf; u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); @@ -396,37 +366,10 @@ *p++ = htonl(count); *p++ = htonl(args->stable); *p++ = htonl(count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - - /* Get the number of buffers in the send iovec */ - nr = args->nriov; - - if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs\n"); - return -EINVAL; - } - - /* Copy the iovec */ - memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec)); - -#ifdef NFS_PAD_WRITES - /* - * Some old servers require that the message length - * be a multiple of 4, so we pad it here if needed. - */ - if (count & 3) { - struct iovec *iov = req->rq_svec + nr + 1; - int pad = 4 - (count & 3); - - iov->iov_base = (void *) "\0\0\0"; - iov->iov_len = pad; - count += pad; - nr++; - } -#endif - req->rq_slen += count; - req->rq_snr += nr; + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + /* Copy the page array */ + xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); return 0; } @@ -523,6 +466,13 @@ return 0; } +/* Hack to sign-extending 32-bit cookies */ +static inline +u64 nfs_transform_cookie64(u64 cookie) +{ + return (cookie & 0x80000000) ? (cookie ^ 0xFFFFFFFF00000000) : cookie; +} + /* * Encode arguments to readdir call */ @@ -530,31 +480,24 @@ nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int buflen, replen; + unsigned int replen; + u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->cookie); + p = xdr_encode_hyper(p, nfs_transform_cookie64(args->cookie)); *p++ = args->verf[0]; *p++ = args->verf[1]; if (args->plus) { /* readdirplus: need dircount + buffer size. * We just make sure we make dircount big enough */ - *p++ = htonl(args->bufsiz >> 3); + *p++ = htonl(count); } - *p++ = htonl(args->bufsiz); + *p++ = htonl(count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - /* set up reply iovec */ + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; - req->rq_rlen = buflen + args->bufsiz; - req->rq_rnr += 2; - + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); return 0; } @@ -565,11 +508,13 @@ static int nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res) { - struct iovec *iov = req->rq_rvec; - int hdrlen; - int status, nr; - unsigned int len; - u32 *entry, *end; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; + struct page **page; + int hdrlen, recvd; + int status, nr; + unsigned int len, pglen; + u32 *entry, *end; status = ntohl(*p++); /* Decode post_op_attrs */ @@ -587,13 +532,18 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } - p = (u32 *) iov[1].iov_base; - end = (u32 *) ((u8 *) p + iov[1].iov_len); + pglen = rcvbuf->page_len; + recvd = req->rq_received - hdrlen; + if (pglen > recvd) + pglen = recvd; + page = rcvbuf->pages; + p = kmap(*page); + entry = p; + end = (u32 *)((char *)p + pglen); for (nr = 0; *p++; nr++) { - entry = p - 1; if (p + 3 > end) goto short_pkt; p += 2; /* inode # */ @@ -602,7 +552,7 @@ if (len > NFS3_MAXNAMLEN) { printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n", len); - return -errno_NFSERR_IO; + goto err_unmap; } if (res->plus) { @@ -622,7 +572,7 @@ if (len > NFS3_FHSIZE) { printk(KERN_WARNING "NFS: giant filehandle in " "readdir (len %x)!\n", len); - return -errno_NFSERR_IO; + goto err_unmap; } p += XDR_QUADLEN(len); } @@ -630,20 +580,31 @@ if (p + 2 > end) goto short_pkt; + entry = p; } - + if (!nr && (entry[0] != 0 || entry[1] == 0)) + goto short_pkt; + out: + kunmap(*page); return nr; short_pkt: - printk(KERN_NOTICE "NFS: short packet in readdir reply!\n"); - /* truncate listing */ entry[0] = entry[1] = 0; - return nr; + /* truncate listing? */ + if (!nr) { + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); + entry[1] = 1; + } + goto out; +err_unmap: + kunmap(*page); + return -errno_NFSERR_IO; } u32 * nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) { struct nfs_entry old = *entry; + u64 cookie; if (!*p++) { if (!*p) @@ -657,24 +618,23 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + p = xdr_decode_hyper(p, &cookie); + entry->cookie = nfs_transform_cookie64(cookie); if (plus) { - p = xdr_decode_post_op_attr(p, &entry->fattr); + entry->fattr->valid = 0; + p = xdr_decode_post_op_attr(p, entry->fattr); /* In fact, a post_op_fh3: */ if (*p++) { - p = xdr_decode_fhandle(p, &entry->fh); + p = xdr_decode_fhandle(p, entry->fh); /* Ugh -- server reply was truncated */ if (p == NULL) { dprintk("NFS: FH truncated\n"); *entry = old; return ERR_PTR(-EAGAIN); } - } else { - /* If we don't get a file handle, the attrs - * aren't worth a lot. */ - entry->fattr.valid = 0; - } + } else + memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); } entry->eof = !p[0] && p[1]; @@ -772,21 +732,16 @@ static int nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args) { - struct rpc_task *task = req->rq_task; - struct rpc_auth *auth = task->tk_auth; - int buflen, replen; + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + u32 count = args->count - 4; p = xdr_encode_fhandle(p, args->fh); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Inline the page array */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2; - buflen = req->rq_rvec[0].iov_len; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; - req->rq_rlen = buflen + args->bufsiz; - req->rq_rnr += 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count); return 0; } @@ -794,17 +749,17 @@ * Decode READLINK reply */ static int -nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkres *res) +nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) { - struct iovec *iov = req->rq_rvec; - int hdrlen; - u32 *strlen; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; + unsigned int hdrlen; + u32 *strlen, len; char *string; int status; - unsigned int len; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, res->fattr); + p = xdr_decode_post_op_attr(p, fattr); if (status != 0) return -nfs_stat_to_errno(status); @@ -812,18 +767,19 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } - strlen = (u32*)res->buffer; + strlen = (u32*)kmap(rcvbuf->pages[0]); /* Convert length of symlink */ len = ntohl(*strlen); - if (len > res->bufsiz - 5) - len = res->bufsiz - 5; + if (len > rcvbuf->page_len) + len = rcvbuf->page_len; *strlen = len; /* NULL terminate the string we got */ string = (char *)(strlen + 1); string[len] = 0; + kunmap(rcvbuf->pages[0]); return 0; } @@ -857,20 +813,18 @@ hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len > hdrlen) { dprintk("NFS: READ header is short. iovec will be shifted.\n"); - xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen); } - recvd = req->rq_rlen - hdrlen; + recvd = req->rq_received - hdrlen; if (count > recvd) { printk(KERN_WARNING "NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); count = recvd; } - if (count < res->count) { - xdr_zero_iovec(iov+1, req->rq_rnr-2, res->count - count); + if (count < res->count) res->count = count; - } return count; } @@ -958,14 +912,13 @@ * Decode FSSTAT reply */ static int -nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); @@ -975,8 +928,7 @@ p = xdr_decode_hyper(p, &res->tfiles); p = xdr_decode_hyper(p, &res->ffiles); p = xdr_decode_hyper(p, &res->afiles); - - /* ignore invarsec */ + res->invarsec = ntohl(*p++); return 0; } @@ -986,12 +938,11 @@ static int nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); @@ -1003,8 +954,8 @@ res->wtmult = ntohl(*p++); res->dtpref = ntohl(*p++); p = xdr_decode_hyper(p, &res->maxfilesize); - - /* ignore time_delta and properties */ + p = xdr_decode_time3(p, &res->time_delta); + res->properties = ntohl(*p++); return 0; } @@ -1012,20 +963,21 @@ * Decode PATHCONF reply */ static int -nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res) { - struct nfs_fattr dummy; int status; status = ntohl(*p++); - p = xdr_decode_post_op_attr(p, &dummy); + p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) return -nfs_stat_to_errno(status); res->linkmax = ntohl(*p++); - res->namelen = ntohl(*p++); - - /* ignore remaining fields */ + res->name_max = ntohl(*p++); + res->no_trunc = ntohl(*p++) != 0; + res->chown_restricted = ntohl(*p++) != 0; + res->case_insensitive = ntohl(*p++) != 0; + res->case_preserving = ntohl(*p++) != 0; return 0; } @@ -1051,37 +1003,37 @@ # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif -#define PROC(proc, argtype, restype) \ - { "nfs3_" #proc, \ - (kxdrproc_t) nfs3_xdr_##argtype, \ - (kxdrproc_t) nfs3_xdr_##restype, \ - MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ - 0 \ +#define PROC(proc, argtype, restype, timer) \ + { .p_procname = "nfs3_" #proc, \ + .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ + .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ + .p_timer = timer \ } static struct rpc_procinfo nfs3_procedures[22] = { - PROC(null, enc_void, dec_void), - PROC(getattr, fhandle, attrstat), - PROC(setattr, sattrargs, wccstat), - PROC(lookup, diropargs, lookupres), - PROC(access, accessargs, accessres), - PROC(readlink, readlinkargs, readlinkres), - PROC(read, readargs, readres), - PROC(write, writeargs, writeres), - PROC(create, createargs, createres), - PROC(mkdir, mkdirargs, createres), - PROC(symlink, symlinkargs, createres), - PROC(mknod, mknodargs, createres), - PROC(remove, diropargs, wccstat), - PROC(rmdir, diropargs, wccstat), - PROC(rename, renameargs, renameres), - PROC(link, linkargs, linkres), - PROC(readdir, readdirargs, readdirres), - PROC(readdirplus, readdirargs, readdirres), - PROC(fsstat, fhandle, fsstatres), - PROC(fsinfo, fhandle, fsinfores), - PROC(pathconf, fhandle, pathconfres), - PROC(commit, commitargs, commitres), + PROC(null, enc_void, dec_void, 0), + PROC(getattr, fhandle, attrstat, 1), + PROC(setattr, sattrargs, wccstat, 0), + PROC(lookup, diropargs, lookupres, 2), + PROC(access, accessargs, accessres, 1), + PROC(readlink, readlinkargs, readlinkres, 3), + PROC(read, readargs, readres, 3), + PROC(write, writeargs, writeres, 4), + PROC(create, createargs, createres, 0), + PROC(mkdir, mkdirargs, createres, 0), + PROC(symlink, symlinkargs, createres, 0), + PROC(mknod, mknodargs, createres, 0), + PROC(remove, diropargs, wccstat, 0), + PROC(rmdir, diropargs, wccstat, 0), + PROC(rename, renameargs, renameres, 0), + PROC(link, linkargs, linkres, 0), + PROC(readdir, readdirargs, readdirres, 3), + PROC(readdirplus, readdirargs, readdirres, 3), + PROC(fsstat, fhandle, fsstatres, 0), + PROC(fsinfo, fhandle, fsinfores, 0), + PROC(pathconf, fhandle, pathconfres, 0), + PROC(commit, commitargs, commitres, 5), }; struct rpc_version nfs_version3 = { diff -u --recursive --new-file linux-2.4.19/fs/nfs/proc.c linux-2.4.19-29-fix_lockd4/fs/nfs/proc.c --- linux-2.4.19/fs/nfs/proc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/proc.c Sat Oct 5 03:54:52 2002 @@ -106,15 +106,13 @@ } static int -nfs_proc_readlink(struct inode *inode, void *buffer, unsigned int bufsiz) +nfs_proc_readlink(struct inode *inode, struct page *page) { - struct nfs_readlinkargs args = { NFS_FH(inode), buffer, bufsiz }; - struct nfs_readlinkres res = { buffer, bufsiz }; + struct nfs_readlinkargs args = { NFS_FH(inode), PAGE_CACHE_SIZE, &page }; int status; dprintk("NFS call readlink\n"); - status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, - &args, &res, 0); + status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0); dprintk("NFS reply readlink: %d\n", status); return status; } @@ -122,11 +120,12 @@ static int nfs_proc_read(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, int flags, - loff_t offset, unsigned int count, void *buffer, int *eofp) + unsigned int base, unsigned int count, + struct page *page, int *eofp) { - struct nfs_readargs arg = { NFS_FH(inode), offset, count, 1, - {{ buffer, count }, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}} }; + u64 offset = page_offset(page) + base; + struct nfs_readargs arg = { NFS_FH(inode), offset, count, + base, &page }; struct nfs_readres res = { fattr, count, 0}; struct rpc_message msg = { NFSPROC_READ, &arg, &res, cred }; int status; @@ -143,13 +142,12 @@ static int nfs_proc_write(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, int how, - loff_t offset, unsigned int count, - void *buffer, struct nfs_writeverf *verf) + unsigned int base, unsigned int count, + struct page *page, struct nfs_writeverf *verf) { - struct nfs_writeargs arg = {NFS_FH(inode), offset, count, - NFS_FILE_SYNC, 1, - {{buffer, count}, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}}}; + u64 offset = page_offset(page) + base; + struct nfs_writeargs arg = { NFS_FH(inode), offset, count, + NFS_FILE_SYNC, base, &page }; struct nfs_writeres res = {fattr, verf, count}; struct rpc_message msg = { NFSPROC_WRITE, &arg, &res, cred }; int status, flags = 0; @@ -337,21 +335,13 @@ */ static int nfs_proc_readdir(struct inode *dir, struct rpc_cred *cred, - __u64 cookie, void *entry, - unsigned int size, int plus) + __u64 cookie, struct page *page, + unsigned int count, int plus) { - struct nfs_readdirargs arg; - struct nfs_readdirres res; - struct rpc_message msg = { NFSPROC_READDIR, &arg, &res, cred }; + struct nfs_readdirargs arg = { NFS_FH(dir), cookie, count, &page }; + struct rpc_message msg = { NFSPROC_READDIR, &arg, NULL, cred }; int status; - arg.fh = NFS_FH(dir); - arg.cookie = cookie; - arg.buffer = entry; - arg.bufsiz = size; - res.buffer = entry; - res.bufsiz = size; - dprintk("NFS call readdir %d\n", (unsigned int)cookie); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); @@ -361,17 +351,62 @@ static int nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsstat *stat) { int status; + struct nfs2_statfs fsinfo; - dprintk("NFS call statfs\n"); - memset((char *)info, 0, sizeof(*info)); - status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0); + stat->fattr->valid = 0; + dprintk("NFS call statfs\n"); + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); dprintk("NFS reply statfs: %d\n", status); + if (status) + goto out; + stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize; + stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize; + stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize; + stat->tfiles = 0; + stat->ffiles = 0; + stat->afiles = 0; + stat->invarsec = 0; + out: + return status; +} + +static int +nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + struct nfs2_statfs fsinfo; + + info->fattr->valid = 0; + dprintk("NFS call fsinfo\n"); + status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + dprintk("NFS reply fsinfo: %d\n", status); + if (status) + goto out; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->time_delta = 0; + info->properties = 0x1b; + out: return status; } +static int +nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *info) +{ + return -ENOTSUPP; +} + extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); struct nfs_rpc_ops nfs_v2_clientops = { @@ -397,5 +432,7 @@ nfs_proc_readdir, nfs_proc_mknod, nfs_proc_statfs, + nfs_proc_fsinfo, + nfs_proc_pathconf, nfs_decode_dirent, }; diff -u --recursive --new-file linux-2.4.19/fs/nfs/read.c linux-2.4.19-29-fix_lockd4/fs/nfs/read.c --- linux-2.4.19/fs/nfs/read.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/read.c Sat Oct 5 03:50:55 2002 @@ -42,6 +42,7 @@ struct nfs_readres res; /* ... and result struct */ struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ + struct page *pagevec[NFS_READ_MAXIOV]; }; /* @@ -63,6 +64,7 @@ if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->args.pages = p->pagevec; } return p; } @@ -86,8 +88,7 @@ { struct rpc_cred *cred = NULL; struct nfs_fattr fattr; - loff_t offset = page_offset(page); - char *buffer; + unsigned int offset = 0; int rsize = NFS_SERVER(inode)->rsize; int result; int count = PAGE_CACHE_SIZE; @@ -103,19 +104,18 @@ * This works now because the socket layer never tries to DMA * into this buffer directly. */ - buffer = kmap(page); do { if (count < rsize) rsize = count; - dprintk("NFS: nfs_proc_read(%s, (%x/%Ld), %Ld, %d, %p)\n", + dprintk("NFS: nfs_proc_read(%s, (%x/%Ld), %u, %u, %p)\n", NFS_SERVER(inode)->hostname, inode->i_dev, (long long)NFS_FILEID(inode), - (long long)offset, rsize, buffer); + offset, rsize, page); lock_kernel(); result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags, - offset, rsize, buffer, &eof); + offset, rsize, page, &eof); nfs_refresh_inode(inode, &fattr); unlock_kernel(); @@ -130,12 +130,15 @@ } count -= result; offset += result; - buffer += result; if (result < rsize) /* NFSv2ism */ break; } while (count); - memset(buffer, 0, count); + if (count) { + char *kaddr = kmap(page); + memset(kaddr + offset, 0, count); + kunmap(page); + } flush_dcache_page(page); SetPageUptodate(page); if (PageError(page)) @@ -143,7 +146,6 @@ result = 0; io_error: - kunmap(page); UnlockPage(page); return result; } @@ -186,26 +188,24 @@ nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data) { struct nfs_page *req; - struct iovec *iov; + struct page **pages; unsigned int count; - iov = data->args.iov; + pages = data->args.pages; count = 0; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); - iov->iov_base = kmap(req->wb_page) + req->wb_offset; - iov->iov_len = req->wb_bytes; + *pages++ = req->wb_page; count += req->wb_bytes; - iov++; - data->args.nriov++; } req = nfs_list_entry(data->pages.next); data->inode = req->wb_inode; data->cred = req->wb_cred; data->args.fh = NFS_FH(req->wb_inode); data->args.offset = page_offset(req->wb_page) + req->wb_offset; + data->args.pgbase = req->wb_offset; data->args.count = count; data->res.fattr = &data->fattr; data->res.count = count; @@ -266,10 +266,10 @@ msg.rpc_cred = data->cred; /* Start the async call */ - dprintk("NFS: %4d initiated read call (req %x/%Ld count %d nriov %d.\n", + dprintk("NFS: %4d initiated read call (req %x/%Ld count %u.\n", task->tk_pid, inode->i_dev, (long long)NFS_FILEID(inode), - data->args.count, data->args.nriov); + data->args.count); rpc_clnt_sigmask(clnt, &oldset); rpc_call_setup(task, &msg, 0); @@ -424,7 +424,6 @@ } else SetPageError(page); flush_dcache_page(page); - kunmap(page); UnlockPage(page); dprintk("NFS: read (%x/%Ld %d@%Ld)\n", diff -u --recursive --new-file linux-2.4.19/fs/nfs/symlink.c linux-2.4.19-29-fix_lockd4/fs/nfs/symlink.c --- linux-2.4.19/fs/nfs/symlink.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/symlink.c Sat Oct 5 03:50:55 2002 @@ -29,7 +29,6 @@ */ static int nfs_symlink_filler(struct inode *inode, struct page *page) { - void *buffer = kmap(page); int error; /* We place the length at the beginning of the page, @@ -37,13 +36,11 @@ * XDR response verification will NULL terminate it. */ lock_kernel(); - error = NFS_PROTO(inode)->readlink(inode, buffer, - PAGE_CACHE_SIZE - sizeof(u32)-4); + error = NFS_PROTO(inode)->readlink(inode, page); unlock_kernel(); if (error < 0) goto error; SetPageUptodate(page); - kunmap(page); UnlockPage(page); return 0; diff -u --recursive --new-file linux-2.4.19/fs/nfs/write.c linux-2.4.19-29-fix_lockd4/fs/nfs/write.c --- linux-2.4.19/fs/nfs/write.c Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/fs/nfs/write.c Sat Oct 5 03:55:07 2002 @@ -77,6 +77,7 @@ struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ + struct page *pagevec[NFS_WRITE_MAXIOV]; }; /* @@ -105,6 +106,7 @@ if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->args.pages = p->pagevec; } return p; } @@ -121,23 +123,6 @@ } /* - * This function will be used to simulate weak cache consistency - * under NFSv2 when the NFSv3 attribute patch is included. - * For the moment, we just call nfs_refresh_inode(). - */ -static __inline__ int -nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) -{ - if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - return nfs_refresh_inode(inode, fattr); -} - -/* * Write a page synchronously. * Offset is the data offset within the page. */ @@ -163,7 +148,6 @@ inode->i_dev, (long long)NFS_FILEID(inode), count, (long long)(page_offset(page) + offset)); - buffer = kmap(page) + offset; base = page_offset(page) + offset; flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC; @@ -173,7 +157,7 @@ wsize = count; result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags, - base, wsize, buffer, &verf); + offset, wsize, page, &verf); nfs_write_attributes(inode, &fattr); if (result < 0) { @@ -186,7 +170,8 @@ wsize, result); refresh = 1; buffer += wsize; - base += wsize; + base += wsize; + offset += wsize; written += wsize; count -= wsize; /* @@ -201,7 +186,6 @@ ClearPageError(page); io_error: - kunmap(page); if (cred) put_rpccred(cred); @@ -811,8 +795,15 @@ * If wsize is smaller than page size, update and write * page synchronously. */ - if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode)) - return nfs_writepage_sync(file, inode, page, offset, count); + if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode)) { + status = nfs_writepage_sync(file, inode, page, offset, count); + if (status > 0) { + if (offset == 0 && status == PAGE_CACHE_SIZE) + SetPageUptodate(page); + return 0; + } + return status; + } /* * Try to find an NFS request corresponding to this page @@ -861,29 +852,27 @@ nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data) { struct nfs_page *req; - struct iovec *iov; + struct page **pages; unsigned int count; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ - iov = data->args.iov; + pages = data->args.pages; count = 0; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); - iov->iov_base = kmap(req->wb_page) + req->wb_offset; - iov->iov_len = req->wb_bytes; + *pages++ = req->wb_page; count += req->wb_bytes; - iov++; - data->args.nriov++; } req = nfs_list_entry(data->pages.next); data->inode = req->wb_inode; data->cred = req->wb_cred; data->args.fh = NFS_FH(req->wb_inode); data->args.offset = page_offset(req->wb_page) + req->wb_offset; + data->args.pgbase = req->wb_offset; data->args.count = count; data->res.fattr = &data->fattr; data->res.count = count; @@ -948,11 +937,11 @@ msg.rpc_resp = &data->res; msg.rpc_cred = data->cred; - dprintk("NFS: %4d initiated write call (req %x/%Ld count %d nriov %d)\n", + dprintk("NFS: %4d initiated write call (req %x/%Ld count %u)\n", task->tk_pid, inode->i_dev, (long long)NFS_FILEID(inode), - data->args.count, data->args.nriov); + data->args.count); rpc_clnt_sigmask(clnt, &oldset); rpc_call_setup(task, &msg, 0); @@ -1064,8 +1053,6 @@ nfs_list_remove_request(req); page = req->wb_page; - kunmap(page); - dprintk("NFS: write (%x/%Ld %d@%Ld)", req->wb_inode->i_dev, (long long)NFS_FILEID(req->wb_inode), diff -u --recursive --new-file linux-2.4.19/include/asm-i386/kmap_types.h linux-2.4.19-29-fix_lockd4/include/asm-i386/kmap_types.h --- linux-2.4.19/include/asm-i386/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux-2.4.19-29-fix_lockd4/include/asm-i386/kmap_types.h Sat Oct 5 03:50:55 2002 @@ -3,7 +3,7 @@ enum km_type { KM_BOUNCE_READ, - KM_SKB_DATA, + KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, diff -u --recursive --new-file linux-2.4.19/include/asm-mips/kmap_types.h linux-2.4.19-29-fix_lockd4/include/asm-mips/kmap_types.h --- linux-2.4.19/include/asm-mips/kmap_types.h Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/include/asm-mips/kmap_types.h Sat Oct 5 03:50:55 2002 @@ -3,7 +3,7 @@ enum km_type { KM_BOUNCE_READ, - KM_SKB_DATA, + KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, diff -u --recursive --new-file linux-2.4.19/include/asm-ppc/kmap_types.h linux-2.4.19-29-fix_lockd4/include/asm-ppc/kmap_types.h --- linux-2.4.19/include/asm-ppc/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux-2.4.19-29-fix_lockd4/include/asm-ppc/kmap_types.h Sat Oct 5 03:50:55 2002 @@ -7,7 +7,7 @@ enum km_type { KM_BOUNCE_READ, - KM_SKB_DATA, + KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, diff -u --recursive --new-file linux-2.4.19/include/asm-sparc/kmap_types.h linux-2.4.19-29-fix_lockd4/include/asm-sparc/kmap_types.h --- linux-2.4.19/include/asm-sparc/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux-2.4.19-29-fix_lockd4/include/asm-sparc/kmap_types.h Sat Oct 5 03:50:55 2002 @@ -3,7 +3,7 @@ enum km_type { KM_BOUNCE_READ, - KM_SKB_DATA, + KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, diff -u --recursive --new-file linux-2.4.19/include/linux/fs.h linux-2.4.19-29-fix_lockd4/include/linux/fs.h --- linux-2.4.19/include/linux/fs.h Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/include/linux/fs.h Sat Oct 5 04:01:40 2002 @@ -394,7 +394,7 @@ int (*flushpage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); #define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */ - int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int); + int (*direct_IO)(int, struct file *, struct kiobuf *, unsigned long, int); }; struct address_space { diff -u --recursive --new-file linux-2.4.19/include/linux/lockd/lockd.h linux-2.4.19-29-fix_lockd4/include/linux/lockd/lockd.h --- linux-2.4.19/include/linux/lockd/lockd.h Thu Nov 22 20:47:20 2001 +++ linux-2.4.19-29-fix_lockd4/include/linux/lockd/lockd.h Sat Oct 5 04:20:16 2002 @@ -164,6 +164,7 @@ unsigned long nlmsvc_retry_blocked(void); int nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, int action); +void nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32); /* * File handling for the server personality diff -u --recursive --new-file linux-2.4.19/include/linux/nfs_fs.h linux-2.4.19-29-fix_lockd4/include/linux/nfs_fs.h --- linux-2.4.19/include/linux/nfs_fs.h Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/include/linux/nfs_fs.h Sat Oct 5 04:01:58 2002 @@ -102,8 +102,15 @@ #define NFS_FILEID(inode) ((inode)->u.nfs_i.fileid) -/* Inode Flags */ -#define NFS_USE_READDIRPLUS(inode) ((NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) ? 1 : 0) +static inline int nfs_server_capable(struct inode *inode, int cap) +{ + return NFS_SERVER(inode)->caps & cap; +} + +static inline int NFS_USE_READDIRPLUS(struct inode *inode) +{ + return NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS; +} /* * These are the default flags for swap requests @@ -270,6 +277,11 @@ extern int nfs_scan_lru_read_timeout(struct nfs_server *, struct list_head *); /* + * linux/fs/nfs/direct.c + */ +extern int nfs_direct_IO(int, struct file *, struct kiobuf *, unsigned long, int); + +/* * linux/fs/mount_clnt.c * (Used only by nfsroot module) */ @@ -298,6 +310,23 @@ return __nfs_refresh_inode(inode,fattr); } +/* + * This function will be used to simulate weak cache consistency + * under NFSv2 when the NFSv3 attribute patch is included. + * For the moment, we just call nfs_refresh_inode(). + */ +static __inline__ int +nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) +{ + if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) { + fattr->pre_size = NFS_CACHE_ISIZE(inode); + fattr->pre_mtime = NFS_CACHE_MTIME(inode); + fattr->pre_ctime = NFS_CACHE_CTIME(inode); + fattr->valid |= NFS_ATTR_WCC; + } + return nfs_refresh_inode(inode, fattr); +} + static inline loff_t nfs_size_to_loff_t(__u64 size) { diff -u --recursive --new-file linux-2.4.19/include/linux/nfs_fs_i.h linux-2.4.19-29-fix_lockd4/include/linux/nfs_fs_i.h --- linux-2.4.19/include/linux/nfs_fs_i.h Sat Aug 3 02:39:45 2002 +++ linux-2.4.19-29-fix_lockd4/include/linux/nfs_fs_i.h Sat Oct 5 04:01:40 2002 @@ -6,6 +6,16 @@ #include /* + * NFSv3 Access mode cache + */ +struct nfs_access_cache { + unsigned long jiffies; + struct rpc_cred * cred; + int mask; + int err; +}; + +/* * nfs fs inode data in memory */ struct nfs_inode_info { @@ -54,6 +64,8 @@ */ unsigned long cache_mtime_jiffies; + struct nfs_access_cache cache_access; + /* * This is the cookie verifier used for NFSv3 readdir * operations diff -u --recursive --new-file linux-2.4.19/include/linux/nfs_fs_sb.h linux-2.4.19-29-fix_lockd4/include/linux/nfs_fs_sb.h --- linux-2.4.19/include/linux/nfs_fs_sb.h Thu Nov 22 20:46:19 2001 +++ linux-2.4.19-29-fix_lockd4/include/linux/nfs_fs_sb.h Sat Oct 5 04:01:40 2002 @@ -10,6 +10,7 @@ struct rpc_clnt * client; /* RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ int flags; /* various flags */ + unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ @@ -36,4 +37,8 @@ struct nfs_server s_server; }; +/* Server capabilities */ +#define NFS_CAP_READDIRPLUS 1 + + #endif diff -u --recursive --new-file linux-2.4.19/include/linux/nfs_xdr.h linux-2.4.19-29-fix_lockd4/include/linux/nfs_xdr.h --- linux-2.4.19/include/linux/nfs_xdr.h Mon Jan 29 21:07:43 2001 +++ linux-2.4.19-29-fix_lockd4/include/linux/nfs_xdr.h Sat Oct 5 03:55:07 2002 @@ -30,6 +30,7 @@ __u64 atime; __u64 mtime; __u64 ctime; + unsigned long timestamp; }; #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ @@ -40,6 +41,7 @@ * Info on the file system */ struct nfs_fsinfo { + struct nfs_fattr *fattr; __u32 rtmax; /* max. read transfer size */ __u32 rtpref; /* pref. read transfer size */ __u32 rtmult; /* reads should be multiple of this */ @@ -48,28 +50,50 @@ __u32 wtmult; /* writes should be multiple of this */ __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; - __u64 bsize; /* block size */ + __u64 time_delta; + __u32 properties; +}; + +struct nfs_fsstat { + struct nfs_fattr *fattr; __u64 tbytes; /* total size in bytes */ __u64 fbytes; /* # of free bytes */ __u64 abytes; /* # of bytes available to user */ __u64 tfiles; /* # of files */ __u64 ffiles; /* # of free files */ __u64 afiles; /* # of files available to user */ + __u32 invarsec; +}; + +struct nfs_pathconf { + struct nfs_fattr *fattr; /* Post-op attributes */ __u32 linkmax;/* max # of hard links */ - __u32 namelen;/* max name length */ + __u32 name_max;/* max name length */ + int no_trunc : 1, + chown_restricted : 1, + case_insensitive : 1, + case_preserving : 1; +}; + +struct nfs2_statfs { + __u32 tsize; /* Server transfer size */ + __u32 bsize; /* Filesystem block size */ + __u32 blocks; /* No. of "bsize" blocks on filesystem */ + __u32 bfree; /* No. of free "bsize" blocks */ + __u32 bavail; /* No. of available "bsize" blocks */ }; /* Arguments to the read call. * Note that NFS_READ_MAXIOV must be <= (MAX_IOVEC-2) from sunrpc/xprt.h */ -#define NFS_READ_MAXIOV 8 +#define NFS_READ_MAXIOV (9) struct nfs_readargs { struct nfs_fh * fh; __u64 offset; __u32 count; - unsigned int nriov; - struct iovec iov[NFS_READ_MAXIOV]; + unsigned int pgbase; + struct page ** pages; }; struct nfs_readres { @@ -81,14 +105,14 @@ /* Arguments to the write call. * Note that NFS_WRITE_MAXIOV must be <= (MAX_IOVEC-2) from sunrpc/xprt.h */ -#define NFS_WRITE_MAXIOV 8 +#define NFS_WRITE_MAXIOV (9) struct nfs_writeargs { struct nfs_fh * fh; __u64 offset; __u32 count; enum nfs3_stable_how stable; - unsigned int nriov; - struct iovec iov[NFS_WRITE_MAXIOV]; + unsigned int pgbase; + struct page ** pages; }; struct nfs_writeverf { @@ -112,8 +136,8 @@ const char * name; unsigned int len; int eof; - struct nfs_fh fh; - struct nfs_fattr fattr; + struct nfs_fh *fh; + struct nfs_fattr *fattr; }; /* @@ -165,8 +189,8 @@ struct nfs_readdirargs { struct nfs_fh * fh; __u32 cookie; - void * buffer; - unsigned int bufsiz; + unsigned int count; + struct page ** pages; }; struct nfs_diropok { @@ -176,18 +200,8 @@ struct nfs_readlinkargs { struct nfs_fh * fh; - void * buffer; - unsigned int bufsiz; -}; - -struct nfs_readlinkres { - void * buffer; - unsigned int bufsiz; -}; - -struct nfs_readdirres { - void * buffer; - unsigned int bufsiz; + unsigned int count; + struct page ** pages; }; struct nfs3_sattrargs { @@ -262,9 +276,9 @@ struct nfs_fh * fh; __u64 cookie; __u32 verf[2]; - void * buffer; - unsigned int bufsiz; int plus; + unsigned int count; + struct page ** pages; }; struct nfs3_diropres { @@ -280,14 +294,8 @@ struct nfs3_readlinkargs { struct nfs_fh * fh; - void * buffer; - unsigned int bufsiz; -}; - -struct nfs3_readlinkres { - struct nfs_fattr * fattr; - void * buffer; - unsigned int bufsiz; + unsigned int count; + struct page ** pages; }; struct nfs3_renameres { @@ -303,8 +311,6 @@ struct nfs3_readdirres { struct nfs_fattr * dir_attr; __u32 * verf; - void * buffer; - unsigned int bufsiz; int plus; }; @@ -321,16 +327,16 @@ struct iattr *); int (*lookup) (struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); - int (*access) (struct inode *, int , int); - int (*readlink)(struct inode *, void *, unsigned int); + int (*access) (struct inode *, struct rpc_cred *, int); + int (*readlink)(struct inode *, struct page *); int (*read) (struct inode *, struct rpc_cred *, struct nfs_fattr *, - int, loff_t, unsigned int, - void *buffer, int *eofp); + int, unsigned int, unsigned int, + struct page *, int *eofp); int (*write) (struct inode *, struct rpc_cred *, struct nfs_fattr *, - int, loff_t, unsigned int, - void *buffer, struct nfs_writeverf *verfp); + int, unsigned int, unsigned int, + struct page *, struct nfs_writeverf *verfp); int (*commit) (struct inode *, struct nfs_fattr *, unsigned long, unsigned int); int (*create) (struct inode *, struct qstr *, struct iattr *, @@ -349,11 +355,15 @@ struct nfs_fh *, struct nfs_fattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct inode *, struct rpc_cred *, - u64, void *, unsigned int, int); + u64, struct page *, unsigned int, int); int (*mknod) (struct inode *, struct qstr *, struct iattr *, dev_t, struct nfs_fh *, struct nfs_fattr *); int (*statfs) (struct nfs_server *, struct nfs_fh *, + struct nfs_fsstat *); + int (*fsinfo) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + int (*pathconf) (struct nfs_server *, struct nfs_fh *, + struct nfs_pathconf *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); }; diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/clnt.h linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/clnt.h --- linux-2.4.19/include/linux/sunrpc/clnt.h Mon Feb 25 20:38:13 2002 +++ linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/clnt.h Sat Oct 5 04:01:57 2002 @@ -15,6 +15,7 @@ #include #include #include +#include /* * This defines an RPC port mapping @@ -51,6 +52,8 @@ unsigned int cl_flags; /* misc client flags */ unsigned long cl_hardmax; /* max hard timeout */ + struct rpc_rtt cl_rtt; /* RTO estimator data */ + struct rpc_portmap cl_pmap; /* port mapping */ struct rpc_wait_queue cl_bindwait; /* waiting on getport() */ @@ -90,6 +93,7 @@ kxdrproc_t p_decode; /* XDR decode function */ unsigned int p_bufsiz; /* req. buffer size */ unsigned int p_count; /* call count */ + unsigned int p_timer; /* Which RTT timer to use */ }; #define rpcproc_bufsiz(clnt, proc) ((clnt)->cl_procinfo[proc].p_bufsiz) @@ -97,6 +101,7 @@ #define rpcproc_decode(clnt, proc) ((clnt)->cl_procinfo[proc].p_decode) #define rpcproc_name(clnt, proc) ((clnt)->cl_procinfo[proc].p_procname) #define rpcproc_count(clnt, proc) ((clnt)->cl_procinfo[proc].p_count) +#define rpcproc_timer(clnt, proc) ((clnt)->cl_procinfo[proc].p_timer) #define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) #define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr) @@ -121,6 +126,7 @@ void rpc_restart_call(struct rpc_task *); void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); +void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); static __inline__ int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/sched.h linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/sched.h --- linux-2.4.19/include/linux/sunrpc/sched.h Thu Nov 22 20:46:19 2001 +++ linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/sched.h Sat Oct 5 03:51:30 2002 @@ -77,9 +77,7 @@ wait_queue_head_t tk_wait; /* sync: sleep on this q */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ - unsigned short tk_lock; /* Task lock counter */ - unsigned char tk_active : 1,/* Task has been activated */ - tk_wakeup : 1;/* Task waiting to wake up */ + unsigned char tk_active : 1;/* Task has been activated */ unsigned long tk_runstate; /* Task run status */ #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ @@ -161,15 +159,11 @@ void rpc_remove_wait_queue(struct rpc_task *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action, rpc_action timer); -void rpc_sleep_locked(struct rpc_wait_queue *, struct rpc_task *, - rpc_action action, rpc_action timer); void rpc_add_timer(struct rpc_task *, rpc_action); void rpc_wake_up_task(struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); void rpc_wake_up_status(struct rpc_wait_queue *, int); -int __rpc_lock_task(struct rpc_task *); -void rpc_unlock_task(struct rpc_task *); void rpc_delay(struct rpc_task *, unsigned long); void * rpc_allocate(unsigned int flags, unsigned int); void rpc_free(void *); diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/timer.h linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/timer.h --- linux-2.4.19/include/linux/sunrpc/timer.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/timer.h Sat Oct 5 04:01:57 2002 @@ -0,0 +1,41 @@ +/* + * linux/include/linux/sunrpc/timer.h + * + * Declarations for the RPC transport timer. + * + * Copyright (C) 2002 Trond Myklebust + */ + +#ifndef _LINUX_SUNRPC_TIMER_H +#define _LINUX_SUNRPC_TIMER_H + +#include + +struct rpc_rtt { + long timeo; /* default timeout value */ + long srtt[5]; /* smoothed round trip time << 3 */ + long sdrtt[5]; /* soothed medium deviation of RTT */ + atomic_t ntimeouts; /* Global count of the number of timeouts */ +}; + + +extern void rpc_init_rtt(struct rpc_rtt *rt, long timeo); +extern void rpc_update_rtt(struct rpc_rtt *rt, int timer, long m); +extern long rpc_calc_rto(struct rpc_rtt *rt, int timer); + +static inline void rpc_inc_timeo(struct rpc_rtt *rt) +{ + atomic_inc(&rt->ntimeouts); +} + +static inline void rpc_clear_timeo(struct rpc_rtt *rt) +{ + atomic_set(&rt->ntimeouts, 0); +} + +static inline int rpc_ntimeo(struct rpc_rtt *rt) +{ + return atomic_read(&rt->ntimeouts); +} + +#endif /* _LINUX_SUNRPC_TIMER_H */ diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/xdr.h linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/xdr.h --- linux-2.4.19/include/linux/sunrpc/xdr.h Thu Nov 22 20:47:20 2001 +++ linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/xdr.h Sat Oct 5 04:01:57 2002 @@ -34,6 +34,31 @@ typedef int (*kxdrproc_t)(void *rqstp, u32 *data, void *obj); /* + * Basic structure for transmission/reception of a client XDR message. + * Features a header (for a linear buffer containing RPC headers + * and the data payload for short messages), and then an array of + * pages. + * The tail iovec allows you to append data after the page array. Its + * main interest is for appending padding to the pages in order to + * satisfy the int_32-alignment requirements in RFC1832. + * + * For the future, we might want to string several of these together + * in a list if anybody wants to make use of NFSv4 COMPOUND + * operations and/or has a need for scatter/gather involving pages. + */ +struct xdr_buf { + struct iovec head[1], /* RPC header + non-page data */ + tail[1]; /* Appended after page data */ + + struct page ** pages; /* Array of contiguous pages */ + unsigned int page_base, /* Start of page data */ + page_len; /* Length of page data */ + + unsigned int len; /* Total length of data */ + +}; + +/* * pre-xdr'ed macros. */ @@ -67,6 +92,11 @@ u32 * xdr_decode_netobj(u32 *p, struct xdr_netobj *); u32 * xdr_decode_netobj_fixed(u32 *p, void *obj, unsigned int len); +void xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int, + unsigned int); +void xdr_inline_pages(struct xdr_buf *, unsigned int, + struct page **, unsigned int, unsigned int); + /* * Decode 64bit quantities (NFSv3 support) */ @@ -98,6 +128,40 @@ void xdr_shift_iovec(struct iovec *, int, size_t); void xdr_zero_iovec(struct iovec *, int, size_t); +/* + * Maximum number of iov's we use. + */ +#define MAX_IOVEC (12) + +/* + * XDR buffer helper functions + */ +extern int xdr_kmap(struct iovec *, struct xdr_buf *, unsigned int); +extern void xdr_kunmap(struct xdr_buf *, unsigned int); +extern void xdr_shift_buf(struct xdr_buf *, unsigned int); +extern void xdr_zero_buf(struct xdr_buf *, unsigned int); + +/* + * Helper structure for copying from an sk_buff. + */ +typedef struct { + struct sk_buff *skb; + unsigned int offset; + size_t count; + unsigned int csum; +} skb_reader_t; + +typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); + +extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, + skb_reader_t *, skb_read_actor_t); + +extern int xdr_copy_skb(struct xdr_buf *xdr, unsigned int base, + struct sk_buff *skb, unsigned int offset); + +extern int xdr_copy_and_csum_skb(struct xdr_buf *xdr, unsigned int base, + struct sk_buff *skb, unsigned int offset, unsigned int csum); + #endif /* __KERNEL__ */ #endif /* _SUNRPC_XDR_H_ */ diff -u --recursive --new-file linux-2.4.19/include/linux/sunrpc/xprt.h linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/xprt.h --- linux-2.4.19/include/linux/sunrpc/xprt.h Sat Aug 3 02:39:46 2002 +++ linux-2.4.19-29-fix_lockd4/include/linux/sunrpc/xprt.h Sat Oct 5 04:01:57 2002 @@ -13,17 +13,13 @@ #include #include #include - -/* - * Maximum number of iov's we use. - */ -#define MAX_IOVEC 10 +#include /* * The transport code maintains an estimate on the maximum number of out- * standing RPC requests, using a smoothed version of the congestion * avoidance implemented in 44BSD. This is basically the Van Jacobson - * slow start algorithm: If a retransmit occurs, the congestion window is + * congestion algorithm: If a retransmit occurs, the congestion window is * halved; otherwise, it is incremented by 1/cwnd when * * - a reply is received and @@ -36,15 +32,13 @@ * Note: on machines with low memory we should probably use a smaller * MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment * reassembly will frequently run out of memory. - * Come Linux 2.3, we'll handle fragments directly. */ -#define RPC_MAXCONG 16 -#define RPC_MAXREQS (RPC_MAXCONG + 1) -#define RPC_CWNDSCALE 256 +#define RPC_MAXCONG (16) +#define RPC_MAXREQS RPC_MAXCONG +#define RPC_CWNDSCALE (256) #define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE) #define RPC_INITCWND RPC_CWNDSCALE -#define RPCXPRT_CONGESTED(xprt) \ - ((xprt)->cong >= (xprt)->cwnd) +#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) /* Default timeout values */ #define RPC_MAX_UDP_TIMEOUT (60*HZ) @@ -63,22 +57,12 @@ unsigned long to_current, /* current timeout */ to_initval, /* initial timeout */ to_maxval, /* max timeout */ - to_increment, /* if !exponential */ - to_resrvval; /* reserve timeout */ + to_increment; /* if !exponential */ short to_retries; /* max # of retries */ unsigned char to_exponential; }; /* - * This is the RPC buffer - */ -struct rpc_iov { - struct iovec io_vec[MAX_IOVEC]; - unsigned int io_nr; - unsigned int io_len; -}; - -/* * This describes a complete RPC request */ struct rpc_rqst { @@ -87,8 +71,8 @@ */ struct rpc_xprt * rq_xprt; /* RPC client */ struct rpc_timeout rq_timeout; /* timeout parms */ - struct rpc_iov rq_snd_buf; /* send buffer */ - struct rpc_iov rq_rcv_buf; /* recv buffer */ + struct xdr_buf rq_snd_buf; /* send buffer */ + struct xdr_buf rq_rcv_buf; /* recv buffer */ /* * This is the private part @@ -96,7 +80,10 @@ struct rpc_task * rq_task; /* RPC task data */ __u32 rq_xid; /* request XID */ struct rpc_rqst * rq_next; /* free list */ - volatile unsigned char rq_received : 1;/* receive completed */ + int rq_cong; /* has incremented xprt->cong */ + int rq_received; /* receive completed */ + + struct list_head rq_list; /* * For authentication (e.g. auth_des) @@ -109,16 +96,14 @@ u32 rq_bytes_sent; /* Bytes we have sent */ -#ifdef RPC_PROFILE - unsigned long rq_xtime; /* when transmitted */ -#endif + long rq_xtime; /* when transmitted */ + int rq_ntimeo; + int rq_nresend; }; -#define rq_svec rq_snd_buf.io_vec -#define rq_snr rq_snd_buf.io_nr -#define rq_slen rq_snd_buf.io_len -#define rq_rvec rq_rcv_buf.io_vec -#define rq_rnr rq_rcv_buf.io_nr -#define rq_rlen rq_rcv_buf.io_len +#define rq_svec rq_snd_buf.head +#define rq_slen rq_snd_buf.len +#define rq_rvec rq_rcv_buf.head +#define rq_rlen rq_rcv_buf.len #define XPRT_LAST_FRAG (1 << 0) #define XPRT_COPY_RECM (1 << 1) @@ -135,9 +120,12 @@ unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ - unsigned long congtime; /* hold cwnd until then */ + + unsigned int rcvsize, /* socket receive buffer size */ + sndsize; /* socket send buffer size */ struct rpc_wait_queue sending; /* requests waiting to send */ + struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ struct rpc_rqst * free; /* free slots */ @@ -164,6 +152,8 @@ spinlock_t xprt_lock; /* lock xprt info */ struct rpc_task * snd_task; /* Task blocked in send */ + struct list_head recv; + void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); @@ -182,20 +172,16 @@ void xprt_set_timeout(struct rpc_timeout *, unsigned int, unsigned long); -int xprt_reserve(struct rpc_task *); +void xprt_reserve(struct rpc_task *); void xprt_transmit(struct rpc_task *); void xprt_receive(struct rpc_task *); int xprt_adjust_timeout(struct rpc_timeout *); void xprt_release(struct rpc_task *); void xprt_reconnect(struct rpc_task *); int xprt_clear_backlog(struct rpc_xprt *); +void xprt_sock_setbufsize(struct rpc_xprt *); -#define XPRT_WSPACE 0 -#define XPRT_CONNECT 1 - -#define xprt_wspace(xp) (test_bit(XPRT_WSPACE, &(xp)->sockstate)) -#define xprt_test_and_set_wspace(xp) (test_and_set_bit(XPRT_WSPACE, &(xp)->sockstate)) -#define xprt_clear_wspace(xp) (clear_bit(XPRT_WSPACE, &(xp)->sockstate)) +#define XPRT_CONNECT 0 #define xprt_connected(xp) (!(xp)->stream || test_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) diff -u --recursive --new-file linux-2.4.19/mm/filemap.c linux-2.4.19-29-fix_lockd4/mm/filemap.c --- linux-2.4.19/mm/filemap.c Sat Aug 3 02:39:46 2002 +++ linux-2.4.19-29-fix_lockd4/mm/filemap.c Sat Oct 5 03:55:08 2002 @@ -1581,7 +1581,7 @@ if (retval) break; - retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize); + retval = mapping->a_ops->direct_IO(rw, filp, iobuf, (offset+progress) >> blocksize_bits, blocksize); if (rw == READ && retval > 0) mark_dirty_kiobuf(iobuf, retval); diff -u --recursive --new-file linux-2.4.19/net/sunrpc/Makefile linux-2.4.19-29-fix_lockd4/net/sunrpc/Makefile --- linux-2.4.19/net/sunrpc/Makefile Fri Dec 29 23:07:24 2000 +++ linux-2.4.19-29-fix_lockd4/net/sunrpc/Makefile Sat Oct 5 03:51:46 2002 @@ -14,7 +14,7 @@ obj-y := clnt.o xprt.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o \ - pmap_clnt.o xdr.o sunrpc_syms.o + pmap_clnt.o timer.o xdr.o sunrpc_syms.o obj-$(CONFIG_PROC_FS) += stats.o obj-$(CONFIG_SYSCTL) += sysctl.o diff -u --recursive --new-file linux-2.4.19/net/sunrpc/clnt.c linux-2.4.19-29-fix_lockd4/net/sunrpc/clnt.c --- linux-2.4.19/net/sunrpc/clnt.c Fri Sep 21 20:24:50 2001 +++ linux-2.4.19-29-fix_lockd4/net/sunrpc/clnt.c Sat Oct 5 03:53:49 2002 @@ -43,6 +43,7 @@ static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); +static void call_start(struct rpc_task *task); static void call_reserve(struct rpc_task *task); static void call_reserveresult(struct rpc_task *task); static void call_allocate(struct rpc_task *task); @@ -108,6 +109,8 @@ if (!clnt->cl_port) clnt->cl_autobind = 1; + rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval); + if (!rpcauth_create(flavor, clnt)) goto out_no_auth; @@ -328,13 +331,23 @@ rpcauth_bindcred(task); if (task->tk_status == 0) - task->tk_action = call_reserve; + task->tk_action = call_start; else task->tk_action = NULL; +} - /* Increment call count */ - if (task->tk_msg.rpc_proc < task->tk_client->cl_maxproc) - rpcproc_count(task->tk_client, task->tk_msg.rpc_proc)++; +void +rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) +{ + struct rpc_xprt *xprt = clnt->cl_xprt; + + xprt->sndsize = 0; + if (sndsize) + xprt->sndsize = sndsize + RPC_SLACK_SPACE; + xprt->rcvsize = 0; + if (rcvsize) + xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; + xprt_sock_setbufsize(xprt); } /* @@ -347,26 +360,46 @@ if (RPC_ASSASSINATED(task)) return; - task->tk_action = call_reserve; - rpcproc_count(task->tk_client, task->tk_msg.rpc_proc)++; + task->tk_action = call_start; } /* - * 1. Reserve an RPC call slot + * 0. Initial state + * + * Other FSM states can be visited zero or more times, but + * this state is visited exactly once for each RPC. */ static void -call_reserve(struct rpc_task *task) +call_start(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; if (task->tk_msg.rpc_proc > clnt->cl_maxproc) { - printk(KERN_WARNING "%s (vers %d): bad procedure number %d\n", - clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc); + printk(KERN_ERR "%s (vers %d): bad procedure number %d\n", + clnt->cl_protname, clnt->cl_vers, + task->tk_msg.rpc_proc); rpc_exit(task, -EIO); return; } + dprintk("RPC: %4d call_start %s%d proc %d (%s)\n", task->tk_pid, + clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc, + (RPC_IS_ASYNC(task) ? "async" : "sync")); + + /* Increment call count */ + rpcproc_count(clnt, task->tk_msg.rpc_proc)++; + clnt->cl_stats->rpccnt++; + task->tk_action = call_reserve; +} + +/* + * 1. Reserve an RPC call slot + */ +static void +call_reserve(struct rpc_task *task) +{ dprintk("RPC: %4d call_reserve\n", task->tk_pid); + if (!rpcauth_uptodatecred(task)) { task->tk_action = call_refresh; return; @@ -374,8 +407,6 @@ task->tk_status = 0; task->tk_action = call_reserveresult; - task->tk_timeout = clnt->cl_timeout.to_resrvval; - clnt->cl_stats->rpccnt++; xprt_reserve(task); } @@ -389,38 +420,46 @@ dprintk("RPC: %4d call_reserveresult (status %d)\n", task->tk_pid, task->tk_status); + /* * After a call to xprt_reserve(), we must have either * a request slot or else an error status. */ - if ((task->tk_status >= 0 && !task->tk_rqstp) || - (task->tk_status < 0 && task->tk_rqstp)) - printk(KERN_ERR "call_reserveresult: status=%d, request=%p??\n", - task->tk_status, task->tk_rqstp); + task->tk_status = 0; + if (status >= 0) { + if (task->tk_rqstp) { + task->tk_action = call_allocate; + return; + } - if (task->tk_status >= 0) { - task->tk_action = call_allocate; + printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", + __FUNCTION__, status); + rpc_exit(task, -EIO); return; } - task->tk_status = 0; + /* + * Even though there was an error, we may have acquired + * a request slot somehow. Make sure not to leak it. + */ + if (task->tk_rqstp) { + printk(KERN_ERR "%s: status=%d, request allocated anyway\n", + __FUNCTION__, status); + xprt_release(task); + } + switch (status) { - case -EAGAIN: - case -ENOBUFS: - task->tk_timeout = task->tk_client->cl_timeout.to_resrvval; + case -EAGAIN: /* woken up; retry */ task->tk_action = call_reserve; - break; - case -ETIMEDOUT: - dprintk("RPC: task timed out\n"); - task->tk_action = call_timeout; + return; + case -EIO: /* probably a shutdown */ break; default: - if (!task->tk_rqstp) { - printk(KERN_INFO "RPC: task has no request, exit EIO\n"); - rpc_exit(task, -EIO); - } else - rpc_exit(task, status); + printk(KERN_ERR "%s: unrecognized error %d, exiting\n", + __FUNCTION__, status); + break; } + rpc_exit(task, status); } /* @@ -465,6 +504,8 @@ { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; + struct xdr_buf *sndbuf = &req->rq_snd_buf; + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; unsigned int bufsiz; kxdrproc_t encode; int status; @@ -477,14 +518,16 @@ /* Default buffer setup */ bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc)+RPC_SLACK_SPACE; - req->rq_svec[0].iov_base = (void *)task->tk_buffer; - req->rq_svec[0].iov_len = bufsiz; - req->rq_slen = 0; - req->rq_snr = 1; - req->rq_rvec[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); - req->rq_rvec[0].iov_len = bufsiz; - req->rq_rlen = bufsiz; - req->rq_rnr = 1; + sndbuf->head[0].iov_base = (void *)task->tk_buffer; + sndbuf->head[0].iov_len = bufsiz; + sndbuf->tail[0].iov_len = 0; + sndbuf->page_len = 0; + sndbuf->len = 0; + rcvb