All of the above --- fs/lockd/clntlock.c | 89 +--- fs/lockd/clntproc.c | 66 +-- fs/lockd/host.c | 5 fs/lockd/mon.c | 11 fs/lockd/svc4proc.c | 2 fs/lockd/svclock.c | 258 ++++++----- fs/lockd/svcproc.c | 2 fs/lockd/svcsubs.c | 2 fs/lockd/xdr.c | 17 - fs/lockd/xdr4.c | 21 + fs/locks.c | 53 +- fs/namespace.c | 38 ++ fs/nfs/callback_xdr.c | 28 + fs/nfs/dir.c | 104 +++- fs/nfs/direct.c | 949 +++++++++++++++++++++++----------------- fs/nfs/file.c | 27 + fs/nfs/idmap.c | 2 fs/nfs/inode.c | 187 ++++++-- fs/nfs/iostat.h | 163 +++++++ fs/nfs/mount_clnt.c | 17 + fs/nfs/nfs2xdr.c | 4 fs/nfs/nfs3acl.c | 16 + fs/nfs/nfs3proc.c | 246 ++++++---- fs/nfs/nfs3xdr.c | 6 fs/nfs/nfs4proc.c | 94 +--- fs/nfs/nfs4xdr.c | 2 fs/nfs/proc.c | 156 ++++--- fs/nfs/read.c | 102 ++++ fs/nfs/write.c | 145 +++++- fs/nfsd/nfs4callback.c | 2 fs/nfsd/nfs4state.c | 13 - fs/proc/base.c | 39 ++ include/linux/fs.h | 7 include/linux/lockd/lockd.h | 13 - include/linux/lockd/xdr.h | 1 include/linux/nfs_fs.h | 80 --- include/linux/nfs_fs_i.h | 8 include/linux/nfs_fs_sb.h | 6 include/linux/nfs_xdr.h | 5 include/linux/sunrpc/clnt.h | 19 - include/linux/sunrpc/metrics.h | 77 +++ include/linux/sunrpc/sched.h | 9 include/linux/sunrpc/xprt.h | 13 + net/sunrpc/auth.c | 16 + net/sunrpc/auth_gss/auth_gss.c | 2 net/sunrpc/clnt.c | 14 - net/sunrpc/pmap_clnt.c | 41 +- net/sunrpc/rpc_pipe.c | 9 net/sunrpc/sched.c | 7 net/sunrpc/stats.c | 115 +++++ net/sunrpc/xprt.c | 26 + net/sunrpc/xprtsock.c | 49 ++ 52 files changed, 2179 insertions(+), 1204 deletions(-) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 3eaf6e7..0fc0ee2 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, /* * The server lockd has called us back to tell us the lock was granted */ -u32 -nlmclnt_grant(struct nlm_lock *lock) +u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) { + const struct file_lock *fl = &lock->fl; + const struct nfs_fh *fh = &lock->fh; struct nlm_wait *block; u32 res = nlm_lck_denied; @@ -122,14 +123,28 @@ nlmclnt_grant(struct nlm_lock *lock) * Warning: must not use cookie to match it! */ list_for_each_entry(block, &nlm_blocked, b_list) { - if (nlm_compare_locks(block->b_lock, &lock->fl)) { - /* Alright, we found a lock. Set the return status - * and wake up the caller - */ - block->b_status = NLM_LCK_GRANTED; - wake_up(&block->b_wait); - res = nlm_granted; - } + struct file_lock *fl_blocked = block->b_lock; + + if (fl_blocked->fl_start != fl->fl_start) + continue; + if (fl_blocked->fl_end != fl->fl_end) + continue; + /* + * Careful! The NLM server will return the 32-bit "pid" that + * we put on the wire: in this case the lockowner "pid". + */ + if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) + continue; + if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) + continue; + if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0) + continue; + /* Alright, we found a lock. Set the return status + * and wake up the caller + */ + block->b_status = NLM_LCK_GRANTED; + wake_up(&block->b_wait); + res = nlm_granted; } return res; } @@ -140,34 +155,6 @@ nlmclnt_grant(struct nlm_lock *lock) */ /* - * Mark the locks for reclaiming. - * FIXME: In 2.5 we don't want to iterate through any global file_lock_list. - * Maintain NLM lock reclaiming lists in the nlm_host instead. - */ -static -void nlmclnt_mark_reclaim(struct nlm_host *host) -{ - struct file_lock *fl; - struct inode *inode; - struct list_head *tmp; - - list_for_each(tmp, &file_lock_list) { - fl = list_entry(tmp, struct file_lock, fl_link); - - inode = fl->fl_file->f_dentry->d_inode; - if (inode->i_sb->s_magic != NFS_SUPER_MAGIC) - continue; - if (fl->fl_u.nfs_fl.owner == NULL) - continue; - if (fl->fl_u.nfs_fl.owner->host != host) - continue; - if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) - continue; - fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM; - } -} - -/* * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number, * that we mark locks for reclaiming, and that we bump the pseudo NSM state. */ @@ -179,7 +166,12 @@ void nlmclnt_prepare_reclaim(struct nlm_ host->h_state++; host->h_nextrebind = 0; nlm_rebind_host(host); - nlmclnt_mark_reclaim(host); + + /* + * Mark the locks for reclaiming. + */ + list_splice_init(&host->h_granted, &host->h_reclaim); + dprintk("NLM: reclaiming locks for host %s", host->h_name); } @@ -208,9 +200,7 @@ reclaimer(void *ptr) { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; - struct list_head *tmp; - struct file_lock *fl; - struct inode *inode; + struct file_lock *fl, *next; daemonize("%s-reclaim", host->h_name); allow_signal(SIGKILL); @@ -222,20 +212,9 @@ reclaimer(void *ptr) /* First, reclaim all locks that have been marked. */ restart: - list_for_each(tmp, &file_lock_list) { - fl = list_entry(tmp, struct file_lock, fl_link); - - inode = fl->fl_file->f_dentry->d_inode; - if (inode->i_sb->s_magic != NFS_SUPER_MAGIC) - continue; - if (fl->fl_u.nfs_fl.owner == NULL) - continue; - if (fl->fl_u.nfs_fl.owner->host != host) - continue; - if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM)) - continue; + list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { + list_del(&fl->fl_u.nfs_fl.list); - fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM; nlmclnt_reclaim(host, fl); if (signalled()) break; diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 220058d..7e89655 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -132,8 +132,10 @@ static void nlmclnt_setlockargs(struct n memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh)); lock->caller = system_utsname.nodename; lock->oh.data = req->a_owner; - lock->oh.len = sprintf(req->a_owner, "%d@%s", - current->pid, system_utsname.nodename); + lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", + (unsigned int)fl->fl_u.nfs_fl.owner->pid, + system_utsname.nodename); + lock->svid = fl->fl_u.nfs_fl.owner->pid; locks_copy_lock(&lock->fl, fl); } @@ -146,48 +148,6 @@ static void nlmclnt_release_lockargs(str } /* - * Initialize arguments for GRANTED call. The nlm_rqst structure - * has been cleared already. - */ -int -nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock) -{ - locks_copy_lock(&call->a_args.lock.fl, &lock->fl); - memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh)); - call->a_args.lock.caller = system_utsname.nodename; - call->a_args.lock.oh.len = lock->oh.len; - - /* set default data area */ - call->a_args.lock.oh.data = call->a_owner; - - if (lock->oh.len > NLMCLNT_OHSIZE) { - void *data = kmalloc(lock->oh.len, GFP_KERNEL); - if (!data) { - nlmclnt_freegrantargs(call); - return 0; - } - call->a_args.lock.oh.data = (u8 *) data; - } - - memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len); - return 1; -} - -void -nlmclnt_freegrantargs(struct nlm_rqst *call) -{ - struct file_lock *fl = &call->a_args.lock.fl; - /* - * Check whether we allocated memory for the owner. - */ - if (call->a_args.lock.oh.data != (u8 *) call->a_owner) { - kfree(call->a_args.lock.oh.data); - } - if (fl->fl_ops && fl->fl_ops->fl_release_private) - fl->fl_ops->fl_release_private(fl); -} - -/* * This is the main entry point for the NLM client. */ int @@ -487,11 +447,16 @@ static void nlmclnt_locks_copy_lock(stru { memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl)); nlm_get_lockowner(new->fl_u.nfs_fl.owner); + if (!list_empty(&fl->fl_u.nfs_fl.list)) + list_add(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.list); + else + INIT_LIST_HEAD(&new->fl_u.nfs_fl.list); } static void nlmclnt_locks_release_private(struct file_lock *fl) { nlm_put_lockowner(fl->fl_u.nfs_fl.owner); + list_del(&fl->fl_u.nfs_fl.list); fl->fl_ops = NULL; } @@ -504,8 +469,8 @@ static void nlmclnt_locks_init_private(s { BUG_ON(fl->fl_ops != NULL); fl->fl_u.nfs_fl.state = 0; - fl->fl_u.nfs_fl.flags = 0; fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner); + INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list); fl->fl_ops = &nlmclnt_lock_ops; } @@ -591,8 +556,8 @@ nlmclnt_lock(struct nlm_rqst *req, struc if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; - fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED; fl->fl_flags |= FL_SLEEP; + list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); do_vfs_lock(fl); } status = nlm_stat_to_errno(resp->status); @@ -658,9 +623,11 @@ nlmclnt_unlock(struct nlm_rqst *req, str struct nlm_res *resp = &req->a_res; int status; - /* Clean the GRANTED flag now so the lock doesn't get - * reclaimed while we're stuck in the unlock call. */ - fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED; + /* + * Remove from the granted list now so the lock doesn't get + * reclaimed while we're stuck in the unlock call. + */ + list_del_init(&fl->fl_u.nfs_fl.list); if (req->a_flags & RPC_TASK_ASYNC) { status = nlmclnt_async_call(req, NLMPROC_UNLOCK, @@ -786,6 +753,7 @@ static void nlmclnt_cancel_callback(stru switch (req->a_res.status) { case NLM_LCK_GRANTED: case NLM_LCK_DENIED_GRACE_PERIOD: + case NLM_LCK_DENIED: /* Everything's good */ break; case NLM_LCK_DENIED_NOLOCKS: diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 82f7a0b..f456f8e 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct socka nlm_hosts[hash] = host; INIT_LIST_HEAD(&host->h_lockowners); spin_lock_init(&host->h_lock); + INIT_LIST_HEAD(&host->h_granted); + INIT_LIST_HEAD(&host->h_reclaim); if (++nrhosts > NLM_HOST_MAX) next_gc = 0; @@ -191,11 +193,12 @@ nlm_bind_host(struct nlm_host *host) xprt->resvport = 1; /* NLM requires a reserved port */ /* Existing NLM servers accept AUTH_UNIX only */ - clnt = rpc_create_client(xprt, host->h_name, &nlm_program, + clnt = rpc_new_client(xprt, host->h_name, &nlm_program, host->h_version, RPC_AUTH_UNIX); if (IS_ERR(clnt)) goto forgetit; clnt->cl_autobind = 1; /* turn on pmap queries */ + clnt->cl_softrtry = 1; /* All queries are soft */ host->h_rpcclnt = clnt; } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 0edc03e..5dd52b7 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 struct rpc_clnt *clnt; int status; struct nsm_args args; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = res, + }; clnt = nsm_create(); if (IS_ERR(clnt)) { @@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 args.proc = NLMPROC_NSM_NOTIFY; memset(res, 0, sizeof(*res)); - status = rpc_call(clnt, proc, &args, res, 0); + msg.rpc_proc = &clnt->cl_procinfo[proc]; + status = rpc_call_sync(clnt, &msg, 0); if (status < 0) printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n", status); @@ -214,12 +219,16 @@ static struct rpc_procinfo nsm_procedure .p_encode = (kxdrproc_t) xdr_encode_mon, .p_decode = (kxdrproc_t) xdr_decode_stat_res, .p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2, + .p_statidx = SM_MON, + .p_name = "MONITOR", }, [SM_UNMON] = { .p_proc = SM_UNMON, .p_encode = (kxdrproc_t) xdr_encode_unmon, .p_decode = (kxdrproc_t) xdr_decode_stat, .p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2, + .p_statidx = SM_UNMON, + .p_name = "UNMONITOR", }, }; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4063095..b10f913 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rq resp->cookie = argp->cookie; dprintk("lockd: GRANTED called\n"); - resp->status = nlmclnt_grant(&argp->lock); + resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); return rpc_success; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 9cfced6..da854e6 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -39,9 +39,12 @@ #define nlm_deadlock nlm_lck_denied #endif +static void nlmsvc_release_block(struct nlm_block *block); static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); static int nlmsvc_remove_block(struct nlm_block *block); +static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); +static void nlmsvc_freegrantargs(struct nlm_rqst *call); static const struct rpc_call_ops nlmsvc_grant_ops; /* @@ -58,6 +61,7 @@ nlmsvc_insert_block(struct nlm_block *bl struct nlm_block **bp, *b; dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when); + kref_get(&block->b_count); if (block->b_queued) nlmsvc_remove_block(block); bp = &nlm_blocked; @@ -90,6 +94,7 @@ nlmsvc_remove_block(struct nlm_block *bl if (b == block) { *bp = block->b_next; block->b_queued = 0; + nlmsvc_release_block(block); return 1; } } @@ -123,6 +128,7 @@ nlmsvc_lookup_block(struct nlm_file *fil *head = block->b_next; block->b_queued = 0; } + kref_get(&block->b_count); return block; } } @@ -155,6 +161,8 @@ nlmsvc_find_block(struct nlm_cookie *coo break; } + if (block != NULL) + kref_get(&block->b_count); return block; } @@ -177,8 +185,7 @@ nlmsvc_create_block(struct svc_rqst *rqs struct nlm_rqst *call; /* Create host handle for callback */ - host = nlmclnt_lookup_host(&rqstp->rq_addr, - rqstp->rq_prot, rqstp->rq_vers); + host = nlmsvc_lookup_host(rqstp); if (host == NULL) return NULL; @@ -188,11 +195,13 @@ nlmsvc_create_block(struct svc_rqst *rqs memset(block, 0, sizeof(*block)); locks_init_lock(&block->b_call.a_args.lock.fl); locks_init_lock(&block->b_call.a_res.lock.fl); + kref_init(&block->b_count); - if (!nlmclnt_setgrantargs(&block->b_call, lock)) + if (!nlmsvc_setgrantargs(&block->b_call, lock)) goto failed_free; /* Set notifier function for VFS, and init args */ + block->b_call.a_args.lock.fl.fl_flags |= FL_SLEEP; block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations; block->b_call.a_args.cookie = *cookie; /* see above */ @@ -227,28 +236,24 @@ failed: * It is the caller's responsibility to check whether the file * can be closed hereafter. */ -static int -nlmsvc_delete_block(struct nlm_block *block, int unlock) +static int nlmsvc_unlink_block(struct nlm_block *block) { - struct file_lock *fl = &block->b_call.a_args.lock.fl; - struct nlm_file *file = block->b_file; - struct nlm_block **bp; - int status = 0; - - dprintk("lockd: deleting block %p...\n", block); + int status; + dprintk("lockd: unlinking block %p...\n", block); /* Remove block from list */ + status = posix_unblock_lock(block->b_file->f_file, &block->b_call.a_args.lock.fl); nlmsvc_remove_block(block); - if (unlock) - status = posix_unblock_lock(file->f_file, fl); + return status; +} - /* If the block is in the middle of a GRANT callback, - * don't kill it yet. */ - if (block->b_incall) { - nlmsvc_insert_block(block, NLM_NEVER); - block->b_done = 1; - return status; - } +static void nlmsvc_free_block(struct kref *kref) +{ + struct nlm_block *block = container_of(kref, struct nlm_block, b_count); + struct nlm_file *file = block->b_file; + struct nlm_block **bp; + + dprintk("lockd: freeing block %p...\n", block); /* Remove block from file's list of blocks */ for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) { @@ -260,9 +265,14 @@ nlmsvc_delete_block(struct nlm_block *bl if (block->b_host) nlm_release_host(block->b_host); - nlmclnt_freegrantargs(&block->b_call); + nlmsvc_freegrantargs(&block->b_call); kfree(block); - return status; +} + +static void nlmsvc_release_block(struct nlm_block *block) +{ + if (block != NULL) + kref_put(&block->b_count, nlmsvc_free_block); } /* @@ -282,7 +292,7 @@ nlmsvc_traverse_blocks(struct nlm_host * block->b_host->h_inuse = 1; else if (action == NLM_ACT_UNLOCK) { if (host == NULL || host == block->b_host) - nlmsvc_delete_block(block, 1); + nlmsvc_unlink_block(block); } } up(&file->f_sema); @@ -290,6 +300,49 @@ nlmsvc_traverse_blocks(struct nlm_host * } /* + * Initialize arguments for GRANTED call. The nlm_rqst structure + * has been cleared already. + */ +static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock) +{ + locks_copy_lock(&call->a_args.lock.fl, &lock->fl); + memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh)); + call->a_args.lock.caller = system_utsname.nodename; + call->a_args.lock.oh.len = lock->oh.len; + + /* set default data area */ + call->a_args.lock.oh.data = call->a_owner; + call->a_args.lock.svid = lock->fl.fl_pid; + + if (lock->oh.len > NLMCLNT_OHSIZE) { + void *data = kmalloc(lock->oh.len, GFP_KERNEL); + if (!data) { + nlmsvc_freegrantargs(call); + return 0; + } + call->a_args.lock.oh.data = (u8 *) data; + } + + memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len); + return 1; +} + +static void nlmsvc_freegrantargs(struct nlm_rqst *call) +{ + struct file_lock *fl = &call->a_args.lock.fl; + /* + * Check whether we allocated memory for the owner. + */ + if (call->a_args.lock.oh.data != (u8 *) call->a_owner) { + kfree(call->a_args.lock.oh.data); + } + if (fl->fl_ops && fl->fl_ops->fl_release_private) + fl->fl_ops->fl_release_private(fl); + if (fl->fl_lmops && fl->fl_lmops->fl_release_private) + fl->fl_lmops->fl_release_private(fl); +} + +/* * Attempt to establish a lock, and if it can't be granted, block it * if required. */ @@ -297,9 +350,9 @@ u32 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_lock *lock, int wait, struct nlm_cookie *cookie) { - struct file_lock *conflock; - struct nlm_block *block; + struct nlm_block *block, *newblock = NULL; int error; + u32 ret; dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n", file->f_file->f_dentry->d_inode->i_sb->s_id, @@ -310,69 +363,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, stru wait); - /* Get existing block (in case client is busy-waiting) */ - block = nlmsvc_lookup_block(file, lock, 0); - - lock->fl.fl_flags |= FL_LOCKD; - + lock->fl.fl_flags &= ~FL_SLEEP; again: /* Lock file against concurrent access */ down(&file->f_sema); + /* Get existing block (in case client is busy-waiting) */ + block = nlmsvc_lookup_block(file, lock, 0); + if (block == NULL) { + if (newblock != NULL) + lock = &newblock->b_call.a_args.lock; + } else + lock = &block->b_call.a_args.lock; - if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) { - error = posix_lock_file(file->f_file, &lock->fl); + error = posix_lock_file(file->f_file, &lock->fl); + lock->fl.fl_flags &= ~FL_SLEEP; - if (block) - nlmsvc_delete_block(block, 0); - up(&file->f_sema); + dprintk("lockd: posix_lock_file returned %d\n", error); - dprintk("lockd: posix_lock_file returned %d\n", -error); - switch(-error) { + switch(error) { case 0: - return nlm_granted; - case EDEADLK: - return nlm_deadlock; - case EAGAIN: - return nlm_lck_denied; + ret = nlm_granted; + goto out; + case -EAGAIN: + break; + case -EDEADLK: + ret = nlm_deadlock; + goto out; default: /* includes ENOLCK */ - return nlm_lck_denied_nolocks; - } + ret = nlm_lck_denied_nolocks; + goto out; } - if (!wait) { - up(&file->f_sema); - return nlm_lck_denied; - } + ret = nlm_lck_denied; + if (!wait) + goto out; - if (posix_locks_deadlock(&lock->fl, conflock)) { - up(&file->f_sema); - return nlm_deadlock; - } + ret = nlm_lck_blocked; + if (block != NULL) + goto out; /* If we don't have a block, create and initialize it. Then * retry because we may have slept in kmalloc. */ /* We have to release f_sema as nlmsvc_create_block may try to * to claim it while doing host garbage collection */ - if (block == NULL) { + if (newblock == NULL) { up(&file->f_sema); dprintk("lockd: blocking on this lock (allocating).\n"); - if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie))) + if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie))) return nlm_lck_denied_nolocks; goto again; } /* Append to list of blocked */ - nlmsvc_insert_block(block, NLM_NEVER); - - if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) { - /* Now add block to block list of the conflicting lock - if we haven't done so. */ - dprintk("lockd: blocking on this lock.\n"); - posix_block_lock(conflock, &block->b_call.a_args.lock.fl); - } - + nlmsvc_insert_block(newblock, NLM_NEVER); +out: up(&file->f_sema); - return nlm_lck_blocked; + nlmsvc_release_block(newblock); + nlmsvc_release_block(block); + dprintk("lockd: nlmsvc_lock returned %u\n", ret); + return ret; } /* @@ -382,8 +431,6 @@ u32 nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock, struct nlm_lock *conflock) { - struct file_lock *fl; - dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", file->f_file->f_dentry->d_inode->i_sb->s_id, file->f_file->f_dentry->d_inode->i_ino, @@ -391,13 +438,14 @@ nlmsvc_testlock(struct nlm_file *file, s (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); - if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) { + if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) { dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n", - fl->fl_type, (long long)fl->fl_start, - (long long)fl->fl_end); + conflock->fl.fl_type, + (long long)conflock->fl.fl_start, + (long long)conflock->fl.fl_end); conflock->caller = "somehost"; /* FIXME */ conflock->oh.len = 0; /* don't return OH info */ - conflock->fl = *fl; + conflock->svid = conflock->fl.fl_pid; return nlm_lck_denied; } @@ -453,8 +501,10 @@ nlmsvc_cancel_blocked(struct nlm_file *f (long long)lock->fl.fl_end); down(&file->f_sema); - if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) - status = nlmsvc_delete_block(block, 1); + if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) { + status = nlmsvc_unlink_block(block); + nlmsvc_release_block(block); + } up(&file->f_sema); return status ? nlm_lck_denied : nlm_granted; } @@ -509,7 +559,6 @@ nlmsvc_grant_blocked(struct nlm_block *b { struct nlm_file *file = block->b_file; struct nlm_lock *lock = &block->b_call.a_args.lock; - struct file_lock *conflock; int error; dprintk("lockd: grant blocked lock %p\n", block); @@ -518,7 +567,7 @@ nlmsvc_grant_blocked(struct nlm_block *b down(&file->f_sema); /* Unlink block request from list */ - nlmsvc_remove_block(block); + nlmsvc_unlink_block(block); /* If b_granted is true this means we've been here before. * Just retry the grant callback, possibly refreshing the RPC @@ -529,41 +578,38 @@ nlmsvc_grant_blocked(struct nlm_block *b } /* Try the lock operation again */ - if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) { - /* Bummer, we blocked again */ + lock->fl.fl_flags |= FL_SLEEP; + error = posix_lock_file(file->f_file, &lock->fl); + lock->fl.fl_flags &= ~FL_SLEEP; + + switch (error) { + case 0: + break; + case -EAGAIN: dprintk("lockd: lock still blocked\n"); nlmsvc_insert_block(block, NLM_NEVER); - posix_block_lock(conflock, &lock->fl); - up(&file->f_sema); - return; - } - - /* Alright, no conflicting lock. Now lock it for real. If the - * following yields an error, this is most probably due to low - * memory. Retry the lock in a few seconds. - */ - if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) { + goto out_unlock; + default: printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", -error, __FUNCTION__); nlmsvc_insert_block(block, 10 * HZ); - up(&file->f_sema); - return; + goto out_unlock; } callback: /* Lock was granted by VFS. */ dprintk("lockd: GRANTing blocked lock.\n"); block->b_granted = 1; - block->b_incall = 1; /* Schedule next grant callback in 30 seconds */ nlmsvc_insert_block(block, 30 * HZ); /* Call the client */ - nlm_get_host(block->b_call.a_host); + kref_get(&block->b_count); if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops) < 0) - nlm_release_host(block->b_call.a_host); + nlmsvc_release_block(block); +out_unlock: up(&file->f_sema); } @@ -578,20 +624,10 @@ callback: static void nlmsvc_grant_callback(struct rpc_task *task, void *data) { struct nlm_rqst *call = data; - struct nlm_block *block; + struct nlm_block *block = container_of(call, struct nlm_block, b_call); unsigned long timeout; - struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client); dprintk("lockd: GRANT_MSG RPC callback\n"); - dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n", - nlmdbg_cookie2a(&call->a_args.cookie), - NIPQUAD(peer_addr->sin_addr.s_addr)); - if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) { - dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n", - nlmdbg_cookie2a(&call->a_args.cookie), - NIPQUAD(peer_addr->sin_addr.s_addr)); - return; - } /* Technically, we should down the file semaphore here. Since we * move the block towards the head of the queue only, no harm @@ -608,9 +644,7 @@ static void nlmsvc_grant_callback(struct } nlmsvc_insert_block(block, timeout); svc_wake_up(block->b_daemon); - block->b_incall = 0; - - nlm_release_host(call->a_host); + nlmsvc_release_block(block); } static const struct rpc_call_ops nlmsvc_grant_ops = { @@ -636,23 +670,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqst file->f_count++; down(&file->f_sema); - block = nlmsvc_find_block(cookie, &rqstp->rq_addr); if (block) { if (status == NLM_LCK_DENIED_GRACE_PERIOD) { /* Try again in a couple of seconds */ nlmsvc_insert_block(block, 10 * HZ); - up(&file->f_sema); } else { /* Lock is now held by client, or has been rejected. * In both cases, the block should be removed. */ - up(&file->f_sema); - if (status == NLM_LCK_GRANTED) - nlmsvc_delete_block(block, 0); - else - nlmsvc_delete_block(block, 1); + nlmsvc_unlink_block(block); } } + up(&file->f_sema); nlm_release_file(file); + nlmsvc_release_block(block); } /* @@ -675,10 +705,12 @@ nlmsvc_retry_blocked(void) break; dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n", block, block->b_when, block->b_done); + kref_get(&block->b_count); if (block->b_done) - nlmsvc_delete_block(block, 0); + nlmsvc_unlink_block(block); else nlmsvc_grant_blocked(block); + nlmsvc_release_block(block); } if ((block = nlm_blocked) && block->b_when != NLM_NEVER) diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 3bc437e..35681d9 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqs resp->cookie = argp->cookie; dprintk("lockd: GRANTED called\n"); - resp->status = nlmclnt_grant(&argp->lock); + resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); return rpc_success; } diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 62f4a38..601e5b3 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host again: file->f_locks = 0; for (fl = inode->i_flock; fl; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_LOCKD)) + if (fl->fl_lmops != &nlmsvc_lock_operations) continue; /* update current lock count */ diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 200fbda..766ce06 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock || !(p = nlm_decode_fh(p, &lock->fh)) || !(p = nlm_decode_oh(p, &lock->oh))) return NULL; + lock->svid = ntohl(*p++); locks_init_lock(fl); fl->fl_owner = current->files; - fl->fl_pid = ntohl(*p++); + fl->fl_pid = (pid_t)lock->svid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ start = ntohl(*p++); @@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock else len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1); - *p++ = htonl(fl->fl_pid); + *p++ = htonl(lock->svid); *p++ = htonl(start); *p++ = htonl(len); @@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_re struct file_lock *fl = &resp->lock.fl; *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one; - *p++ = htonl(fl->fl_pid); + *p++ = htonl(resp->lock.svid); /* Encode owner handle. */ if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) @@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); - lock->fl.fl_pid = ~(u32) 0; + lock->svid = ~(u32) 0; + lock->fl.fl_pid = (pid_t)lock->svid; if (!(p = nlm_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, @@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *r memset(&resp->lock, 0, sizeof(resp->lock)); locks_init_lock(fl); excl = ntohl(*p++); - fl->fl_pid = ntohl(*p++); + resp->lock.svid = ntohl(*p++); + fl->fl_pid = (pid_t)resp->lock.svid; if (!(p = nlm_decode_oh(p, &resp->lock.oh))) return -EIO; @@ -543,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, .p_proc = NLMPROC_##proc, \ .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \ .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \ - .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \ + .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \ + .p_statidx = NLMPROC_##proc, \ + .p_name = #proc, \ } static struct rpc_procinfo nlm_procedures[] = { diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index fdcf105..36eb175 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock || !(p = nlm4_decode_fh(p, &lock->fh)) || !(p = nlm4_decode_oh(p, &lock->oh))) return NULL; + lock->svid = ntohl(*p++); locks_init_lock(fl); fl->fl_owner = current->files; - fl->fl_pid = ntohl(*p++); + fl->fl_pid = (pid_t)lock->svid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ p = xdr_decode_hyper(p, &start); @@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX)) return NULL; - *p++ = htonl(fl->fl_pid); + *p++ = htonl(lock->svid); start = loff_t_to_s64(fl->fl_start); if (fl->fl_end == OFFSET_MAX) @@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_r struct file_lock *fl = &resp->lock.fl; *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one; - *p++ = htonl(fl->fl_pid); + *p++ = htonl(resp->lock.svid); /* Encode owner handle. */ if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) @@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_r p = xdr_encode_hyper(p, start); p = xdr_encode_hyper(p, len); - dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n", - resp->status, fl->fl_pid, fl->fl_type, + dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n", + resp->status, (int)resp->lock.svid, fl->fl_type, (long long)fl->fl_start, (long long)fl->fl_end); } @@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); - lock->fl.fl_pid = ~(u32) 0; + lock->svid = ~(u32) 0; + lock->fl.fl_pid = (pid_t)lock->svid; if (!(p = nlm4_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, @@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst * memset(&resp->lock, 0, sizeof(resp->lock)); locks_init_lock(fl); excl = ntohl(*p++); - fl->fl_pid = ntohl(*p++); + resp->lock.svid = ntohl(*p++); + fl->fl_pid = (pid_t)resp->lock.svid; if (!(p = nlm4_decode_oh(p, &resp->lock.oh))) return -EIO; @@ -548,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, .p_proc = NLMPROC_##proc, \ .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \ .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \ - .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \ + .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \ + .p_statidx = NLMPROC_##proc, \ + .p_name = #proc, \ } static struct rpc_procinfo nlm4_procedures[] = { diff --git a/fs/locks.c b/fs/locks.c index 909eab8..75fe32b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -139,10 +139,7 @@ int lease_break_time = 45; #define for_each_lock(inode, lockp) \ for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) -LIST_HEAD(file_lock_list); - -EXPORT_SYMBOL(file_lock_list); - +static LIST_HEAD(file_lock_list); static LIST_HEAD(blocked_list); static kmem_cache_t *filelock_cache; @@ -221,15 +218,23 @@ static void init_once(void *foo, kmem_ca /* * Initialize a new lock from an existing file_lock structure. */ -void locks_copy_lock(struct file_lock *new, struct file_lock *fl) +static void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) { new->fl_owner = fl->fl_owner; new->fl_pid = fl->fl_pid; - new->fl_file = fl->fl_file; + new->fl_file = NULL; new->fl_flags = fl->fl_flags; new->fl_type = fl->fl_type; new->fl_start = fl->fl_start; new->fl_end = fl->fl_end; + new->fl_ops = NULL; + new->fl_lmops = NULL; +} + +void locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + __locks_copy_lock(new, fl); + new->fl_file = fl->fl_file; new->fl_ops = fl->fl_ops; new->fl_lmops = fl->fl_lmops; if (fl->fl_ops && fl->fl_ops->fl_copy_lock) @@ -654,8 +659,9 @@ static int locks_block_on_timeout(struct return result; } -struct file_lock * -posix_test_lock(struct file *filp, struct file_lock *fl) +int +posix_test_lock(struct file *filp, struct file_lock *fl, + struct file_lock *conflock) { struct file_lock *cfl; @@ -666,9 +672,13 @@ posix_test_lock(struct file *filp, struc if (posix_locks_conflict(cfl, fl)) break; } + if (cfl) { + __locks_copy_lock(conflock, cfl); + unlock_kernel(); + return 1; + } unlock_kernel(); - - return (cfl); + return 0; } EXPORT_SYMBOL(posix_test_lock); @@ -1544,7 +1554,7 @@ asmlinkage long sys_flock(unsigned int f */ int fcntl_getlk(struct file *filp, struct flock __user *l) { - struct file_lock *fl, file_lock; + struct file_lock *fl, cfl, file_lock; struct flock flock; int error; @@ -1568,7 +1578,7 @@ int fcntl_getlk(struct file *filp, struc else fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); } else { - fl = posix_test_lock(filp, &file_lock); + fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL); } flock.l_type = F_UNLCK; @@ -1698,7 +1708,7 @@ out: */ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) { - struct file_lock *fl, file_lock; + struct file_lock *fl, cfl, file_lock; struct flock64 flock; int error; @@ -1722,7 +1732,7 @@ int fcntl_getlk64(struct file *filp, str else fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); } else { - fl = posix_test_lock(filp, &file_lock); + fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL); } flock.l_type = F_UNLCK; @@ -1936,21 +1946,6 @@ void locks_remove_flock(struct file *fil } /** - * posix_block_lock - blocks waiting for a file lock - * @blocker: the lock which is blocking - * @waiter: the lock which conflicts and has to wait - * - * lockd needs to block waiting for locks. - */ -void -posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) -{ - locks_insert_block(blocker, waiter); -} - -EXPORT_SYMBOL(posix_block_lock); - -/** * posix_unblock_lock - stop waiting for a file lock * @filp: how the file was opened * @waiter: the lock which was waiting diff --git a/fs/namespace.c b/fs/namespace.c index 058a448..70bba4b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -399,6 +399,44 @@ struct seq_operations mounts_op = { .show = show_vfsmnt }; +static int show_vfsstat(struct seq_file *m, void *v) +{ + struct vfsmount *mnt = v; + int err = 0; + + /* device */ + if (mnt->mnt_devname) { + seq_puts(m, "device "); + mangle(m, mnt->mnt_devname); + } else + seq_puts(m, "no device"); + + /* mount point */ + seq_puts(m, " mounted on "); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + + /* file system type */ + seq_puts(m, "with fstype "); + mangle(m, mnt->mnt_sb->s_type->name); + + /* optional statistics */ + if (mnt->mnt_sb->s_op->show_stats) { + seq_putc(m, ' '); + err = mnt->mnt_sb->s_op->show_stats(m, mnt); + } + + seq_putc(m, '\n'); + return err; +} + +struct seq_operations mountstats_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_vfsstat, +}; + /** * may_umount_tree - check if a mount tree is busy * @mnt: root of mount tree diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 7c33b9a..05c38cf 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) { - uint32_t *savep; + uint32_t *savep = NULL; unsigned status = res->status; if (unlikely(status != 0)) @@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rq struct xdr_stream *xdr_in, void *argp, struct xdr_stream *xdr_out, void *resp) { - struct callback_op *op; - unsigned int op_nr; + struct callback_op *op = &callback_ops[0]; + unsigned int op_nr = OP_CB_ILLEGAL; unsigned int status = 0; long maxlen; unsigned res; dprintk("%s: start\n", __FUNCTION__); status = decode_op_hdr(xdr_in, &op_nr); - if (unlikely(status != 0)) { - op_nr = OP_CB_ILLEGAL; - op = &callback_ops[0]; - } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) { - op_nr = OP_CB_ILLEGAL; - op = &callback_ops[0]; - status = htonl(NFS4ERR_OP_ILLEGAL); - } else - op = &callback_ops[op_nr]; + if (likely(status == 0)) { + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + op = &callback_ops[op_nr]; + break; + default: + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + status = htonl(NFS4ERR_OP_ILLEGAL); + } + } maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { @@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct decode_compound_hdr_arg(&xdr_in, &hdr_arg); hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; + hdr_res.nops = NULL; encode_compound_hdr_res(&xdr_out, &hdr_res); for (;;) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a1554be..609185a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -34,6 +34,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "iostat.h" #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ @@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct { int res = 0; + dfprintk(VFS, "NFS: opendir(%s/%ld)\n", + inode->i_sb->s_id, inode->i_ino); + lock_kernel(); /* Call generic open code in order to cache credentials */ if (!res) @@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descr unsigned long timestamp; int error; - dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); + dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", + __FUNCTION__, (long long)desc->entry->cookie, + page->index); again: timestamp = jiffies; @@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", + __FUNCTION__, (unsigned long long)entry->cookie); if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { @@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t schedule(); } } - dfprintk(VFS, "NFS: find_dirent() returns %d\n", status); return status; } @@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descri if (status) break; - dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); + dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", + (unsigned long long)entry->cookie, desc->current_index); if (desc->file->f_pos == desc->current_index) { *desc->dir_cookie = entry->cookie; @@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descri schedule(); } } - dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); return status; } @@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descrip struct page *page; int status; - dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); + dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", + __FUNCTION__, desc->page_index, + (long long) *desc->dir_cookie); page = read_cache_page(inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); @@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descrip if (status < 0) dir_page_release(desc); out: - dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status); return status; read_error: page_cache_release(page); @@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir /* Always search-by-index from the beginning of the cache */ if (*desc->dir_cookie == 0) { - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); + dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n", + (long long)desc->file->f_pos); desc->page_index = 0; desc->entry->cookie = desc->entry->prev_cookie = 0; desc->entry->eof = 0; desc->current_index = 0; } else - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", + (unsigned long long)*desc->dir_cookie); for (;;) { res = find_dirent_page(desc); @@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir schedule(); } } - dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res); + + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res); return res; } @@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descripto int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", + (unsigned long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descripto dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", + (unsigned long long)*desc->dir_cookie, res); return res; } @@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descrip struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", + (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { @@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descrip desc->entry->cookie = desc->entry->prev_cookie = 0; desc->entry->eof = 0; out: - dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", + __FUNCTION__, status); return status; out_release: dir_page_release(desc); @@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp struct nfs_fattr fattr; long res; + dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (long long)filp->f_pos); + nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); + lock_kernel(); res = nfs_revalidate_inode(NFS_SERVER(inode), inode); @@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp } } unlock_kernel(); - if (res < 0) - return res; - return 0; + if (res > 0) + res = 0; + dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + res); + return res; } loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) @@ -599,6 +622,10 @@ out: */ int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) { + dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); + return 0; } @@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; + nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; if (!inode) { @@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct } if (is_bad_inode(inode)) { - dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); goto out_bad; } @@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct out_valid: unlock_kernel(); dput(parent); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 1; out_zap_parent: nfs_zap_caches(dir); @@ -771,6 +803,9 @@ out_zap_parent: d_drop(dentry); unlock_kernel(); dput(parent); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 0; } @@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct dfprintk(VFS, "NFS: lookup(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); if (dentry->d_name.len > NFS_SERVER(dir)->namelen) @@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup( struct dentry *res = NULL; int error; + dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + /* Check that we are indeed trying to open this file */ if (!is_atomic_open(dir, nd)) goto no_open; @@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, int error; int open_flags = 0; - dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: create(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; @@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dent struct iattr attr; int status; - dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); if (!new_valid_dev(rdev)) return -EINVAL; @@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct iattr attr; int error; - dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; @@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, { int error; - dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); lock_kernel(); nfs_begin_data_update(dir); @@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, atomic_read(&dentry->d_count)); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); #ifdef NFS_PARANOIA if (!dentry->d_inode) @@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d sillycounter++; sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); - dfprintk(VFS, "trying to rename %s to %s\n", - dentry->d_name.name, silly); + dfprintk(VFS, "NFS: trying to rename %s to %s\n", + dentry->d_name.name, silly); sdentry = lookup_one_len(silly, dentry->d_parent, slen); /* @@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, struct rpc_cred *cred; int res = 0; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); + if (mask == 0) goto out; /* Is this sys_access() ? */ @@ -1679,13 +1721,15 @@ force_lookup: res = PTR_ERR(cred); unlock_kernel(); out: + dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", + inode->i_sb->s_id, inode->i_ino, mask, res); return res; out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) res = generic_permission(inode, mask, NULL); unlock_kernel(); - return res; + goto out; } /* diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 04ab2fc..193ef4c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -7,11 +7,11 @@ * * There are important applications whose performance or correctness * depends on uncached access to file data. Database clusters - * (multiple copies of the same instance running on separate hosts) + * (multiple copies of the same instance running on separate hosts) * implement their own cache coherency protocol that subsumes file - * system cache protocols. Applications that process datasets - * considerably larger than the client's memory do not always benefit - * from a local cache. A streaming video server, for instance, has no + * system cache protocols. Applications that process datasets + * considerably larger than the client's memory do not always benefit + * from a local cache. A streaming video server, for instance, has no * need to cache the contents of a file. * * When an application requests uncached I/O, all read and write requests @@ -34,6 +34,7 @@ * 08 Jun 2003 Port to 2.5 APIs --cel * 31 Mar 2004 Handle direct I/O without VFS support --cel * 15 Sep 2004 Parallel async reads --cel + * 04 May 2005 support O_DIRECT with aio --cel * */ @@ -54,8 +55,9 @@ #include #include +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_VFS -#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) static kmem_cache_t *nfs_direct_cachep; @@ -64,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep; */ struct nfs_direct_req { struct kref kref; /* release manager */ - struct list_head list; /* nfs_read_data structs */ - wait_queue_head_t wait; /* wait for i/o completion */ + + /* I/O parameters */ + struct list_head list, /* nfs_read/write_data structs */ + rewrite_list; /* saved nfs_write_data structs */ + struct nfs_open_context *ctx; /* file open context info */ + struct kiocb * iocb; /* controlling i/o request */ + struct inode * inode; /* target file of i/o */ + unsigned long user_addr; /* location of user's buffer */ + size_t user_count; /* total bytes to move */ + loff_t pos; /* starting offset in file */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ - atomic_t complete, /* i/os we're waiting for */ - count, /* bytes actually processed */ + + /* completion state */ + spinlock_t lock; /* protect completion state */ + int outstanding; /* i/os we're waiting for */ + ssize_t count, /* bytes actually processed */ error; /* any reported error */ + struct completion completion; /* wait for i/o completion */ + + /* commit state */ + struct nfs_write_data * commit_data; /* special write_data for commits */ + int flags; +#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ +#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ + struct nfs_writeverf verf; /* unstable write verifier */ }; +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); /** - * nfs_get_user_pages - find and set up pages underlying user's buffer - * rw: direction (read or write) - * user_addr: starting address of this segment of user's buffer - * count: size of this segment - * @pages: returned array of page struct pointers underlying user's buffer - */ -static inline int -nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, - struct page ***pages) + * nfs_direct_IO - NFS address space operation for direct I/O + * @rw: direction (read or write) + * @iocb: target I/O control block + * @iov: array of vectors that define I/O buffer + * @pos: offset in file to begin the operation + * @nr_segs: size of iovec array + * + * The presence of this routine in the address space ops vector means + * the NFS client supports direct I/O. However, we shunt off direct + * read and write requests before the VFS gets them, so this method + * should never be called. + */ +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) +{ + struct dentry *dentry = iocb->ki_filp->f_dentry; + + dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", + dentry->d_name.name, (long long) pos, nr_segs); + + return -EINVAL; +} + +static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +{ + int i; + for (i = 0; i < npages; i++) { + struct page *page = pages[i]; + if (do_dirty && !PageCompound(page)) + set_page_dirty_lock(page); + page_cache_release(page); + } + kfree(pages); +} + +static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) { int result = -ENOMEM; unsigned long page_count; size_t array_size; - /* set an arbitrary limit to prevent type overflow */ - /* XXX: this can probably be as large as INT_MAX */ - if (size > MAX_DIRECTIO_SIZE) { - *pages = NULL; - return -EFBIG; - } - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; page_count -= user_addr >> PAGE_SHIFT; @@ -107,66 +149,117 @@ nfs_get_user_pages(int rw, unsigned long page_count, (rw == READ), 0, *pages, NULL); up_read(¤t->mm->mmap_sem); + if (result != page_count) { + /* + * If we got fewer pages than expected from + * get_user_pages(), the user buffer runs off the + * end of a mapping; return EFAULT. + */ + if (result >= 0) { + nfs_free_user_pages(*pages, result, 0); + result = -EFAULT; + } else + kfree(*pages); + *pages = NULL; + } } return result; } -/** - * nfs_free_user_pages - tear down page struct array - * @pages: array of page struct pointers underlying target buffer - * @npages: number of pages in the array - * @do_dirty: dirty the pages as we release them - */ -static void -nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { - int i; - for (i = 0; i < npages; i++) { - struct page *page = pages[i]; - if (do_dirty && !PageCompound(page)) - set_page_dirty_lock(page); - page_cache_release(page); - } - kfree(pages); + struct nfs_direct_req *dreq; + + dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + if (!dreq) + return NULL; + + kref_init(&dreq->kref); + init_completion(&dreq->completion); + INIT_LIST_HEAD(&dreq->list); + INIT_LIST_HEAD(&dreq->rewrite_list); + dreq->iocb = NULL; + dreq->ctx = NULL; + spin_lock_init(&dreq->lock); + dreq->outstanding = 0; + dreq->count = 0; + dreq->error = 0; + dreq->flags = 0; + + return dreq; } -/** - * nfs_direct_req_release - release nfs_direct_req structure for direct read - * @kref: kref object embedded in an nfs_direct_req structure - * - */ static void nfs_direct_req_release(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + + if (dreq->ctx != NULL) + put_nfs_open_context(dreq->ctx); kmem_cache_free(nfs_direct_cachep, dreq); } -/** - * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read - * @count: count of bytes for the read request - * @rsize: local rsize setting +/* + * Collects and returns the final error value/byte-count. + */ +static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) +{ + ssize_t result = -EIOCBQUEUED; + + /* Async requests don't wait here */ + if (dreq->iocb) + goto out; + + result = wait_for_completion_interruptible(&dreq->completion); + + if (!result) + result = dreq->error; + if (!result) + result = dreq->count; + +out: + kref_put(&dreq->kref, nfs_direct_req_release); + return (ssize_t) result; +} + +/* + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). * + * In addition, synchronous I/O uses a stack-allocated iocb. Thus we + * can't trust the iocb is still valid here if this is a synchronous + * request. If the waiter is woken prematurely, the iocb is long gone. + */ +static void nfs_direct_complete(struct nfs_direct_req *dreq) +{ + nfs_free_user_pages(dreq->pages, dreq->npages, 1); + + if (dreq->iocb) { + long res = (long) dreq->error; + if (!res) + res = (long) dreq->count; + aio_complete(dreq->iocb, res, 0); + } + complete_all(&dreq->completion); + + kref_put(&dreq->kref, nfs_direct_req_release); +} + +/* * Note we also set the number of requests we have in the dreq when we are * done. This prevents races with I/O completion so we will always wait * until all requests have been dispatched and completed. */ -static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize) +static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) { struct list_head *list; struct nfs_direct_req *dreq; - unsigned int reads = 0; unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + dreq = nfs_direct_req_alloc(); if (!dreq) return NULL; - kref_init(&dreq->kref); - init_waitqueue_head(&dreq->wait); - INIT_LIST_HEAD(&dreq->list); - atomic_set(&dreq->count, 0); - atomic_set(&dreq->error, 0); - list = &dreq->list; for(;;) { struct nfs_read_data *data = nfs_readdata_alloc(rpages); @@ -186,72 +279,70 @@ static struct nfs_direct_req *nfs_direct list_add(&data->pages, list); data->req = (struct nfs_page *) dreq; - reads++; + dreq->outstanding++; if (nbytes <= rsize) break; nbytes -= rsize; } kref_get(&dreq->kref); - atomic_set(&dreq->complete, reads); return dreq; } -/** - * nfs_direct_read_result - handle a read reply for a direct read request - * @data: address of NFS READ operation control block - * @status: status of this NFS READ operation - * - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server). - */ -static void nfs_direct_read_result(struct nfs_read_data *data, int status) +static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { + struct nfs_read_data *data = calldata; struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - if (likely(status >= 0)) - atomic_add(data->res.count, &dreq->count); + if (nfs_readpage_result(task, data) != 0) + return; + + spin_lock(&dreq->lock); + + if (likely(task->tk_status >= 0)) + dreq->count += data->res.count; else - atomic_set(&dreq->error, status); + dreq->error = task->tk_status; - if (unlikely(atomic_dec_and_test(&dreq->complete))) { - nfs_free_user_pages(dreq->pages, dreq->npages, 1); - wake_up(&dreq->wait); - kref_put(&dreq->kref, nfs_direct_req_release); + if (--dreq->outstanding) { + spin_unlock(&dreq->lock); + return; } + + spin_unlock(&dreq->lock); + nfs_direct_complete(dreq); } -/** - * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read - * @dreq: address of nfs_direct_req struct for this request - * @inode: target inode - * @ctx: target file open context - * @user_addr: starting address of this segment of user's buffer - * @count: size of this segment - * @file_offset: offset in file to begin the operation - * +static const struct rpc_call_ops nfs_read_direct_ops = { + .rpc_call_done = nfs_direct_read_result, + .rpc_release = nfs_readdata_release, +}; + +/* * For each nfs_read_data struct that was allocated on the list, dispatch * an NFS READ operation */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, - struct inode *inode, struct nfs_open_context *ctx, - unsigned long user_addr, size_t count, loff_t file_offset) +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) { + struct nfs_open_context *ctx = dreq->ctx; + struct inode *inode = ctx->dentry->d_inode; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; + size_t count = dreq->user_count; + loff_t pos = dreq->pos; + size_t rsize = NFS_SERVER(inode)->rsize; unsigned int curpage, pgbase; - unsigned int rsize = NFS_SERVER(inode)->rsize; curpage = 0; - pgbase = user_addr & ~PAGE_MASK; + pgbase = dreq->user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; - unsigned int bytes; + size_t bytes; bytes = rsize; if (count < rsize) bytes = count; + BUG_ON(list_empty(list)); data = list_entry(list->next, struct nfs_read_data, pages); list_del_init(&data->pages); @@ -259,7 +350,7 @@ static void nfs_direct_read_schedule(str data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; - data->args.offset = file_offset; + data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = &pages[curpage]; data->args.count = bytes; @@ -267,77 +358,38 @@ static void nfs_direct_read_schedule(str data->res.eof = 0; data->res.count = bytes; + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_read_direct_ops, data); NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long) inode; - data->complete = nfs_direct_read_result; lock_kernel(); rpc_execute(&data->task); unlock_kernel(); - dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n", + dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), bytes, (unsigned long long)data->args.offset); - file_offset += bytes; + pos += bytes; pgbase += bytes; curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); + BUG_ON(!list_empty(list)); } -/** - * nfs_direct_read_wait - wait for I/O completion for direct reads - * @dreq: request on which we are to wait - * @intr: whether or not this wait can be interrupted - * - * Collects and returns the final error value/byte-count. - */ -static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) -{ - int result = 0; - - if (intr) { - result = wait_event_interruptible(dreq->wait, - (atomic_read(&dreq->complete) == 0)); - } else { - wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0)); - } - - if (!result) - result = atomic_read(&dreq->error); - if (!result) - result = atomic_read(&dreq->count); - - kref_put(&dreq->kref, nfs_direct_req_release); - return (ssize_t) result; -} - -/** - * nfs_direct_read_seg - Read in one iov segment. Generate separate - * read RPCs for each "rsize" bytes. - * @inode: target inode - * @ctx: target file open context - * @user_addr: starting address of this segment of user's buffer - * @count: size of this segment - * @file_offset: offset in file to begin the operation - * @pages: array of addresses of page structs defining user's buffer - * @nr_pages: number of pages in the array - * - */ -static ssize_t nfs_direct_read_seg(struct inode *inode, - struct nfs_open_context *ctx, unsigned long user_addr, - size_t count, loff_t file_offset, struct page **pages, - unsigned int nr_pages) +static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) { ssize_t result; sigset_t oldset; + struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; @@ -345,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struc if (!dreq) return -ENOMEM; + dreq->user_addr = user_addr; + dreq->user_count = count; + dreq->pos = pos; dreq->pages = pages; dreq->npages = nr_pages; + dreq->inode = inode; + dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; + nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, - file_offset); - result = nfs_direct_read_wait(dreq, clnt->cl_intr); + nfs_direct_read_schedule(dreq); + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; } -/** - * nfs_direct_read - For each iov segment, map the user's buffer - * then generate read RPCs. - * @inode: target inode - * @ctx: target file open context - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * - * We've already pushed out any non-direct writes so that this read - * will see them when we read from the server. - */ -static ssize_t -nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, - const struct iovec *iov, loff_t file_offset, - unsigned long nr_segs) -{ - ssize_t tot_bytes = 0; - unsigned long seg = 0; - - while ((seg < nr_segs) && (tot_bytes >= 0)) { - ssize_t result; - int page_count; - struct page **pages; - const struct iovec *vec = &iov[seg++]; - unsigned long user_addr = (unsigned long) vec->iov_base; - size_t size = vec->iov_len; - - page_count = nfs_get_user_pages(READ, user_addr, size, &pages); - if (page_count < 0) { - nfs_free_user_pages(pages, 0, 0); - if (tot_bytes > 0) - break; - return page_count; - } +static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) +{ + list_splice_init(&dreq->rewrite_list, &dreq->list); + while (!list_empty(&dreq->list)) { + struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_release(data); + } +} - result = nfs_direct_read_seg(inode, ctx, user_addr, size, - file_offset, pages, page_count); +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) +{ + struct list_head *pos; - if (result <= 0) { - if (tot_bytes > 0) - break; - return result; - } - tot_bytes += result; - file_offset += result; - if (result < size) - break; + list_splice_init(&dreq->rewrite_list, &dreq->list); + list_for_each(pos, &dreq->list) + dreq->outstanding++; + dreq->count = 0; + + nfs_direct_write_schedule(dreq, FLUSH_STABLE); +} + +static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + + /* Call the NFS version-specific code */ + if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) + return; + if (unlikely(task->tk_status < 0)) { + dreq->error = task->tk_status; + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } + if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + dprintk("NFS: %5u commit verify failed\n", task->tk_pid); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } - return tot_bytes; + dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status); + nfs_direct_write_complete(dreq, data->inode); } -/** - * nfs_direct_write_seg - Write out one iov segment. Generate separate - * write RPCs for each "wsize" bytes, then commit. - * @inode: target inode - * @ctx: target file open context - * user_addr: starting address of this segment of user's buffer - * count: size of this segment - * file_offset: offset in file to begin the operation - * @pages: array of addresses of page structs defining user's buffer - * nr_pages: size of pages array - */ -static ssize_t nfs_direct_write_seg(struct inode *inode, - struct nfs_open_context *ctx, unsigned long user_addr, - size_t count, loff_t file_offset, struct page **pages, - int nr_pages) -{ - const unsigned int wsize = NFS_SERVER(inode)->wsize; - size_t request; - int curpage, need_commit; - ssize_t result, tot_bytes; - struct nfs_writeverf first_verf; - struct nfs_write_data *wdata; +static const struct rpc_call_ops nfs_commit_direct_ops = { + .rpc_call_done = nfs_direct_commit_result, + .rpc_release = nfs_commit_release, +}; - wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); - if (!wdata) - return -ENOMEM; +static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) +{ + struct nfs_write_data *data = dreq->commit_data; + struct rpc_task *task = &data->task; - wdata->inode = inode; - wdata->cred = ctx->cred; - wdata->args.fh = NFS_FH(inode); - wdata->args.context = ctx; - wdata->args.stable = NFS_UNSTABLE; - if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) - wdata->args.stable = NFS_FILE_SYNC; - wdata->res.fattr = &wdata->fattr; - wdata->res.verf = &wdata->verf; + data->inode = dreq->inode; + data->cred = dreq->ctx->cred; - nfs_begin_data_update(inode); -retry: - need_commit = 0; - tot_bytes = 0; - curpage = 0; - request = count; - wdata->args.pgbase = user_addr & ~PAGE_MASK; - wdata->args.offset = file_offset; - do { - wdata->args.count = request; - if (wdata->args.count > wsize) - wdata->args.count = wsize; - wdata->args.pages = &pages[curpage]; - - dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", - wdata->args.count, (long long) wdata->args.offset, - user_addr + tot_bytes, wdata->args.pgbase, curpage); + data->args.fh = NFS_FH(data->inode); + data->args.offset = dreq->pos; + data->args.count = dreq->user_count; + data->res.count = 0; + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; - lock_kernel(); - result = NFS_PROTO(inode)->write(wdata); - unlock_kernel(); + rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC, + &nfs_commit_direct_ops, data); + NFS_PROTO(data->inode)->commit_setup(data, 0); - if (result <= 0) { - if (tot_bytes > 0) - break; - goto out; - } + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long)data->inode; + /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ + dreq->commit_data = NULL; + + dprintk("NFS: %5u initiated commit call\n", task->tk_pid); - if (tot_bytes == 0) - memcpy(&first_verf.verifier, &wdata->verf.verifier, - sizeof(first_verf.verifier)); - if (wdata->verf.committed != NFS_FILE_SYNC) { - need_commit = 1; - if (memcmp(&first_verf.verifier, &wdata->verf.verifier, - sizeof(first_verf.verifier))) - goto sync_retry; - } + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); +} - tot_bytes += result; +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +{ + int flags = dreq->flags; - /* in case of a short write: stop now, let the app recover */ - if (result < wdata->args.count) + dreq->flags = 0; + switch (flags) { + case NFS_ODIRECT_DO_COMMIT: + nfs_direct_commit_schedule(dreq); break; + case NFS_ODIRECT_RESCHED_WRITES: + nfs_direct_write_reschedule(dreq); + break; + default: + nfs_end_data_update(inode); + if (dreq->commit_data != NULL) + nfs_commit_free(dreq->commit_data); + nfs_direct_free_writedata(dreq); + nfs_direct_complete(dreq); + } +} - wdata->args.offset += result; - wdata->args.pgbase += result; - curpage += wdata->args.pgbase >> PAGE_SHIFT; - wdata->args.pgbase &= ~PAGE_MASK; - request -= result; - } while (request != 0); +static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +{ + dreq->commit_data = nfs_commit_alloc(0); + if (dreq->commit_data != NULL) + dreq->commit_data->req = (struct nfs_page *) dreq; +} +#else +static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +{ + dreq->commit_data = NULL; +} - /* - * Commit data written so far, even in the event of an error - */ - if (need_commit) { - wdata->args.count = tot_bytes; - wdata->args.offset = file_offset; +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +{ + nfs_end_data_update(inode); + nfs_direct_free_writedata(dreq); + nfs_direct_complete(dreq); +} +#endif - lock_kernel(); - result = NFS_PROTO(inode)->commit(wdata); - unlock_kernel(); +static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) +{ + struct list_head *list; + struct nfs_direct_req *dreq; + unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + dreq = nfs_direct_req_alloc(); + if (!dreq) + return NULL; + + list = &dreq->list; + for(;;) { + struct nfs_write_data *data = nfs_writedata_alloc(wpages); - if (result < 0 || memcmp(&first_verf.verifier, - &wdata->verf.verifier, - sizeof(first_verf.verifier)) != 0) - goto sync_retry; + if (unlikely(!data)) { + while (!list_empty(list)) { + data = list_entry(list->next, + struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_free(data); + } + kref_put(&dreq->kref, nfs_direct_req_release); + return NULL; + } + + INIT_LIST_HEAD(&data->pages); + list_add(&data->pages, list); + + data->req = (struct nfs_page *) dreq; + dreq->outstanding++; + if (nbytes <= wsize) + break; + nbytes -= wsize; } - result = tot_bytes; -out: - nfs_end_data_update(inode); - nfs_writedata_free(wdata); - return result; + nfs_alloc_commit_data(dreq); -sync_retry: - wdata->args.stable = NFS_FILE_SYNC; - goto retry; + kref_get(&dreq->kref); + return dreq; } -/** - * nfs_direct_write - For each iov segment, map the user's buffer - * then generate write and commit RPCs. - * @inode: target inode - * @ctx: target file open context - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * - * Upon return, generic_file_direct_IO invalidates any cached pages - * that non-direct readers might access, so they will pick up these - * writes immediately. - */ -static ssize_t nfs_direct_write(struct inode *inode, - struct nfs_open_context *ctx, const struct iovec *iov, - loff_t file_offset, unsigned long nr_segs) -{ - ssize_t tot_bytes = 0; - unsigned long seg = 0; - - while ((seg < nr_segs) && (tot_bytes >= 0)) { - ssize_t result; - int page_count; - struct page **pages; - const struct iovec *vec = &iov[seg++]; - unsigned long user_addr = (unsigned long) vec->iov_base; - size_t size = vec->iov_len; - - page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages); - if (page_count < 0) { - nfs_free_user_pages(pages, 0, 0); - if (tot_bytes > 0) - break; - return page_count; - } +static void nfs_direct_write_result(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = task->tk_status; + + if (nfs_writeback_done(task, data) != 0) + return; + + spin_lock(&dreq->lock); - result = nfs_direct_write_seg(inode, ctx, user_addr, size, - file_offset, pages, page_count); - nfs_free_user_pages(pages, page_count, 0); + if (likely(status >= 0)) + dreq->count += data->res.count; + else + dreq->error = task->tk_status; - if (result <= 0) { - if (tot_bytes > 0) + if (data->res.verf->committed != NFS_FILE_SYNC) { + switch (dreq->flags) { + case 0: + memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf)); + dreq->flags = NFS_ODIRECT_DO_COMMIT; break; - return result; + case NFS_ODIRECT_DO_COMMIT: + if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { + dprintk("NFS: %5u write verify failed\n", task->tk_pid); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } } - tot_bytes += result; - file_offset += result; - if (result < size) - break; } - return tot_bytes; + /* In case we have to resend */ + data->args.stable = NFS_FILE_SYNC; + + spin_unlock(&dreq->lock); } -/** - * nfs_direct_IO - NFS address space operation for direct I/O - * rw: direction (read or write) - * @iocb: target I/O control block - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. */ -ssize_t -nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t file_offset, unsigned long nr_segs) +static void nfs_direct_write_release(void *calldata) { - ssize_t result = -EINVAL; - struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx; - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - /* - * No support for async yet - */ + spin_lock(&dreq->lock); + if (--dreq->outstanding) { + spin_unlock(&dreq->lock); + return; + } + spin_unlock(&dreq->lock); + + nfs_direct_write_complete(dreq, data->inode); +} + +static const struct rpc_call_ops nfs_write_direct_ops = { + .rpc_call_done = nfs_direct_write_result, + .rpc_release = nfs_direct_write_release, +}; + +/* + * For each nfs_write_data struct that was allocated on the list, dispatch + * an NFS WRITE operation + */ +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) +{ + struct nfs_open_context *ctx = dreq->ctx; + struct inode *inode = ctx->dentry->d_inode; + struct list_head *list = &dreq->list; + struct page **pages = dreq->pages; + size_t count = dreq->user_count; + loff_t pos = dreq->pos; + size_t wsize = NFS_SERVER(inode)->wsize; + unsigned int curpage, pgbase; + + curpage = 0; + pgbase = dreq->user_addr & ~PAGE_MASK; + do { + struct nfs_write_data *data; + size_t bytes; + + bytes = wsize; + if (count < wsize) + bytes = count; + + BUG_ON(list_empty(list)); + data = list_entry(list->next, struct nfs_write_data, pages); + list_move_tail(&data->pages, &dreq->rewrite_list); + + data->inode = inode; + data->cred = ctx->cred; + data->args.fh = NFS_FH(inode); + data->args.context = ctx; + data->args.offset = pos; + data->args.pgbase = pgbase; + data->args.pages = &pages[curpage]; + data->args.count = bytes; + data->res.fattr = &data->fattr; + data->res.count = bytes; + data->res.verf = &data->verf; + + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_write_direct_ops, data); + NFS_PROTO(inode)->write_setup(data, sync); + + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long) inode; + + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); + + dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + bytes, + (unsigned long long)data->args.offset); + + pos += bytes; + pgbase += bytes; + curpage += pgbase >> PAGE_SHIFT; + pgbase &= ~PAGE_MASK; + + count -= bytes; + } while (count != 0); + BUG_ON(!list_empty(list)); +} + +static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) +{ + ssize_t result; + sigset_t oldset; + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_direct_req *dreq; + size_t wsize = NFS_SERVER(inode)->wsize; + int sync = 0; + + dreq = nfs_direct_write_alloc(count, wsize); + if (!dreq) + return -ENOMEM; + if (dreq->commit_data == NULL || count < wsize) + sync = FLUSH_STABLE; + + dreq->user_addr = user_addr; + dreq->user_count = count; + dreq->pos = pos; + dreq->pages = pages; + dreq->npages = nr_pages; + dreq->inode = inode; + dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) - return result; + dreq->iocb = iocb; + + nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); + + nfs_begin_data_update(inode); + + rpc_clnt_sigmask(clnt, &oldset); + nfs_direct_write_schedule(dreq, sync); + result = nfs_direct_wait(dreq); + rpc_clnt_sigunmask(clnt, &oldset); - ctx = (struct nfs_open_context *)file->private_data; - switch (rw) { - case READ: - dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n", - dentry->d_name.name, file_offset, nr_segs); - - result = nfs_direct_read(inode, ctx, iov, - file_offset, nr_segs); - break; - case WRITE: - dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n", - dentry->d_name.name, file_offset, nr_segs); - - result = nfs_direct_write(inode, ctx, iov, - file_offset, nr_segs); - break; - default: - break; - } return result; } @@ -630,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @buf: user's buffer into which to read data - * count: number of bytes to read - * pos: byte offset in file where reading starts + * @count: number of bytes to read + * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if * the request starts before the end of the file. For that check * to work, we must generate a GETATTR before each direct read, and * even then there is a window between the GETATTR and the subsequent - * READ where the file size could change. So our preference is simply + * READ where the file size could change. Our preference is simply * to do all reads the application wants, and the server will take * care of managing the end of file boundary. - * + * * This function also eliminates unnecessarily updating the file's * atime locally, as the NFS server sets the file's atime, and this * client must read the updated atime from the server back into its * cache. */ -ssize_t -nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { ssize_t retval = -EINVAL; - loff_t *ppos = &iocb->ki_pos; + int page_count; + struct page **pages; struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx = - (struct nfs_open_context *) file->private_data; struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct iovec iov = { - .iov_base = buf, - .iov_len = count, - }; dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); - if (!is_sync_kiocb(iocb)) - goto out; if (count < 0) goto out; retval = -EFAULT; - if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len)) + if (!access_ok(VERIFY_WRITE, buf, count)) goto out; retval = 0; if (!count) @@ -682,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, if (retval) goto out; - retval = nfs_direct_read(inode, ctx, &iov, pos, 1); + retval = nfs_get_user_pages(READ, (unsigned long) buf, + count, &pages); + if (retval < 0) + goto out; + page_count = retval; + + retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, + pages, page_count); if (retval > 0) - *ppos = pos + retval; + iocb->ki_pos = pos + retval; out: return retval; @@ -694,8 +810,8 @@ out: * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @buf: user's buffer from which to write data - * count: number of bytes to write - * pos: byte offset in file where writing starts + * @count: number of bytes to write + * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -715,28 +831,19 @@ out: * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t -nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { ssize_t retval; + int page_count; + struct page **pages; struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx = - (struct nfs_open_context *) file->private_data; struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct iovec iov = { - .iov_base = (char __user *)buf, - }; dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); - retval = -EINVAL; - if (!is_sync_kiocb(iocb)) - goto out; - retval = generic_write_checks(file, &pos, &count, 0); if (retval) goto out; @@ -747,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb retval = 0; if (!count) goto out; - iov.iov_len = count, retval = -EFAULT; - if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) + if (!access_ok(VERIFY_READ, buf, count)) goto out; retval = nfs_sync_mapping(mapping); if (retval) goto out; - retval = nfs_direct_write(inode, ctx, &iov, pos, 1); + retval = nfs_get_user_pages(WRITE, (unsigned long) buf, + count, &pages); + if (retval < 0) + goto out; + page_count = retval; + + retval = nfs_direct_write(iocb, (unsigned long) buf, count, + pos, pages, page_count); + + /* + * XXX: nfs_end_data_update() already ensures this file's + * cached data is subsequently invalidated. Do we really + * need to call invalidate_inode_pages2() again here? + * + * For aio writes, this invalidation will almost certainly + * occur before the writes complete. Kind of racey. + */ if (mapping->nrpages) invalidate_inode_pages2(mapping); + if (retval > 0) iocb->ki_pos = pos + retval; @@ -767,6 +890,10 @@ out: return retval; } +/** + * nfs_init_directcache - create a slab cache for nfs_direct_req structures + * + */ int nfs_init_directcache(void) { nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", @@ -779,6 +906,10 @@ int nfs_init_directcache(void) return 0; } +/** + * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures + * + */ void nfs_destroy_directcache(void) { if (kmem_cache_destroy(nfs_direct_cachep)) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7a79fbe..cc2fe48 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -32,6 +32,7 @@ #include #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -102,18 +103,15 @@ static int nfs_check_flags(int flags) static int nfs_file_open(struct inode *inode, struct file *filp) { - struct nfs_server *server = NFS_SERVER(inode); - int (*open)(struct inode *, struct file *); int res; res = nfs_check_flags(filp->f_flags); if (res) return res; + nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - /* Do NFSv4 open() call */ - if ((open = server->rpc_ops->file_open) != NULL) - res = open(inode, filp); + res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); unlock_kernel(); return res; } @@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, st /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) filemap_fdatawrite(filp->f_mapping); + nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return NFS_PROTO(inode)->file_release(inode, filp); } @@ -199,6 +198,7 @@ nfs_file_flush(struct file *file) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); lock_kernel(); /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); @@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char _ (unsigned long) count, (unsigned long) pos); result = nfs_revalidate_file(inode, iocb->ki_filp); + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); return result; @@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dent dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); lock_kernel(); status = nfs_wb_all(inode); if (!status) { @@ -365,6 +367,7 @@ nfs_file_write(struct kiocb *iocb, const if (!count) goto out; + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, buf, count, pos); out: return result; @@ -376,15 +379,14 @@ out_swapfile: static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { - struct file_lock *cfl; + struct file_lock cfl; struct inode *inode = filp->f_mapping->host; int status = 0; lock_kernel(); /* Try local locking first */ - cfl = posix_test_lock(filp, fl); - if (cfl != NULL) { - locks_copy_lock(fl, cfl); + if (posix_test_lock(filp, fl, &cfl)) { + locks_copy_lock(fl, &cfl); goto out; } @@ -504,9 +506,7 @@ static int nfs_lock(struct file *filp, i inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); - - if (!inode) - return -EINVAL; + nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && @@ -531,9 +531,6 @@ static int nfs_flock(struct file *filp, inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags); - if (!inode) - return -EINVAL; - /* * No BSD flocks over NFS allowed. * Note: we could try to fake a POSIX lock request here by diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 821edd3..32c95a0 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -132,6 +132,8 @@ nfs_idmap_delete(struct nfs4_client *clp if (!idmap) return; + dput(idmap->idmap_dentry); + idmap->idmap_dentry = NULL; rpc_unlink(idmap->idmap_path); clp->cl_idmap = NULL; kfree(idmap); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a77ee95..4705b9d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 @@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -78,6 +81,7 @@ static struct super_operations nfs_sops .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -290,6 +294,15 @@ nfs_sb_init(struct super_block *sb, rpc_ } sb->s_root->d_op = server->rpc_ops->dentry_ops; + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + no_root_error = -ENOMEM; + goto out_no_root; + } + + /* mount time stamp, in seconds */ + server->mount_time = jiffies; + /* Get some general file system info */ if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) @@ -396,6 +409,9 @@ nfs_create_client(struct nfs_server *ser nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + /* create transport and client */ xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { @@ -579,7 +595,7 @@ nfs_statfs(struct super_block *sb, struc } -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { static struct proc_nfs_info { int flag; @@ -588,28 +604,26 @@ static int nfs_show_options(struct seq_f } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); char buf[12]; char *proto; - seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) + if (nfss->acregmin != 3*HZ || showdefaults) seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) + if (nfss->acregmax != 60*HZ || showdefaults) seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) + if (nfss->acdirmin != 30*HZ || showdefaults) seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) + if (nfss->acdirmax != 60*HZ || showdefaults) seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) @@ -629,8 +643,96 @@ static int nfs_show_options(struct seq_f proto = buf; } seq_printf(m, ",proto=%s", proto); + seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", nfss->retrans_count); +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); + + return 0; +} + +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%d", nfss->wtmult); + seq_printf(m, ",dtsize=%d", nfss->dtsize); + seq_printf(m, ",bsize=%d", nfss->bsize); + seq_printf(m, ",namelen=%d", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct nfs_iostats *stats; + + if (!cpu_possible(cpu)) + continue; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + seq_printf(m, "\n"); + + rpc_print_iostats(m, nfss->client); + return 0; } @@ -660,6 +762,8 @@ static void nfs_zap_caches_locked(struct struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -847,6 +951,8 @@ nfs_setattr(struct dentry *dentry, struc struct nfs_fattr fattr; int error; + nfs_inc_stats(inode, NFSIOS_VFSSETATTR); + if (attr->ia_valid & ATTR_SIZE) { if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) attr->ia_valid &= ~ATTR_SIZE; @@ -902,6 +1008,7 @@ void nfs_setattr_update_inode(struct ino spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); inode->i_size = attr->ia_size; vmtruncate(inode, attr->ia_size); } @@ -1185,6 +1292,7 @@ int nfs_attribute_timeout(struct inode * */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; @@ -1201,6 +1309,7 @@ void nfs_revalidate_mapping(struct inode struct nfs_inode *nfsi = NFS_I(inode); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); if (S_ISREG(inode->i_mode)) nfs_sync_mapping(mapping); invalidate_inode_pages2(mapping); @@ -1299,39 +1408,37 @@ static int nfs_check_inode_attributes(st if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + return -EIO; + } + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && - nfsi->change_attr != fattr->change_attr) { + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) { + if (nfsi->change_attr == fattr->change_attr) + goto out; nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } - /* Has the inode gone and changed behind our back? */ - if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { - return -EIO; - } - - cur_size = i_size_read(inode); - new_isize = nfs_size_to_loff_t(fattr->size); - /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } - if (cur_size != new_isize) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (nfsi->npages == 0) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } + + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + if (cur_size != new_isize && nfsi->npages == 0) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) @@ -1343,6 +1450,7 @@ static int nfs_check_inode_attributes(st if (inode->i_nlink != fattr->nlink) nfsi->cache_validity |= NFS_INO_INVALID_ATTR; +out: if (!timespec_equal(&inode->i_atime, &fattr->atime)) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; @@ -1481,15 +1589,6 @@ static int nfs_update_inode(struct inode nfsi->cache_change_attribute = jiffies; } - if ((fattr->valid & NFS_ATTR_FATTR_V4) - && nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - nfsi->change_attr = fattr->change_attr; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = jiffies; - } - /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; @@ -1519,8 +1618,20 @@ static int nfs_update_inode(struct inode inode->i_blksize = fattr->du.nfs2.blocksize; } + if ((fattr->valid & NFS_ATTR_FATTR_V4)) { + if (nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + nfsi->change_attr = fattr->change_attr; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = jiffies; + } else + invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA); + } + /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { @@ -1738,6 +1849,7 @@ static struct super_operations nfs4_sops .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -1800,6 +1912,9 @@ static int nfs4_fill_super(struct super_ nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); @@ -2009,6 +2124,7 @@ out_err: out_free: kfree(server->mnt_path); kfree(server->hostname); + nfs_free_iostats(server->io_stats); kfree(server); return s; } @@ -2024,10 +2140,11 @@ static void nfs4_kill_super(struct super if (server->client != NULL && !IS_ERR(server->client)) rpc_shutdown_client(server->client); - rpciod_down(); /* release rpciod */ destroy_nfsv4_state(server); + rpciod_down(); + kfree(server->hostname); kfree(server); } diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h new file mode 100644 index 0000000..7a74951 --- /dev/null +++ b/fs/nfs/iostat.h @@ -0,0 +1,163 @@ +/* + * linux/fs/nfs/iostat.h + * + * Declarations for NFS client per-mount statistics + * + * Copyright (C) 2005, 2006 Chuck Lever + * + * NFS client per-mount statistics provide information about the health of + * the NFS client and the health of each NFS mount point. Generally these + * are not for detailed problem diagnosis, but simply to indicate that there + * is a problem. + * + * These counters are not meant to be human-readable, but are meant to be + * integrated into system monitoring tools such as "sar" and "iostat". As + * such, the counters are sampled by the tools over time, and are never + * zeroed after a file system is mounted. Moving averages can be computed + * by the tools by taking the difference between two instantaneous samples + * and dividing that by the time between the samples. + */ + +#ifndef _NFS_IOSTAT +#define _NFS_IOSTAT + +#define NFS_IOSTAT_VERS "1.0" + +/* + * NFS byte counters + * + * 1. SERVER - the number of payload bytes read from or written to the + * server by the NFS client via an NFS READ or WRITE request. + * + * 2. NORMAL - the number of bytes read or written by applications via + * the read(2) and write(2) system call interfaces. + * + * 3. DIRECT - the number of bytes read or written from files opened + * with the O_DIRECT flag. + * + * These counters give a view of the data throughput into and out of the NFS + * client. Comparing the number of bytes requested by an application with the + * number of bytes the client requests from the server can provide an + * indication of client efficiency (per-op, cache hits, etc). + * + * These counters can also help characterize which access methods are in + * use. DIRECT by itself shows whether there is any O_DIRECT traffic. + * NORMAL + DIRECT shows how much data is going through the system call + * interface. A large amount of SERVER traffic without much NORMAL or + * DIRECT traffic shows that applications are using mapped files. + * + * NFS page counters + * + * These count the number of pages read or written via nfs_readpage(), + * nfs_readpages(), or their write equivalents. + */ +enum nfs_stat_bytecounters { + NFSIOS_NORMALREADBYTES = 0, + NFSIOS_NORMALWRITTENBYTES, + NFSIOS_DIRECTREADBYTES, + NFSIOS_DIRECTWRITTENBYTES, + NFSIOS_SERVERREADBYTES, + NFSIOS_SERVERWRITTENBYTES, + NFSIOS_READPAGES, + NFSIOS_WRITEPAGES, + __NFSIOS_BYTESMAX, +}; + +/* + * NFS event counters + * + * These counters provide a low-overhead way of monitoring client activity + * without enabling NFS trace debugging. The counters show the rate at + * which VFS requests are made, and how often the client invalidates its + * data and attribute caches. This allows system administrators to monitor + * such things as how close-to-open is working, and answer questions such + * as "why are there so many GETATTR requests on the wire?" + * + * They also count anamolous events such as short reads and writes, silly + * renames due to close-after-delete, and operations that change the size + * of a file (such operations can often be the source of data corruption + * if applications aren't using file locking properly). + */ +enum nfs_stat_eventcounters { + NFSIOS_INODEREVALIDATE = 0, + NFSIOS_DENTRYREVALIDATE, + NFSIOS_DATAINVALIDATE, + NFSIOS_ATTRINVALIDATE, + NFSIOS_VFSOPEN, + NFSIOS_VFSLOOKUP, + NFSIOS_VFSACCESS, + NFSIOS_VFSUPDATEPAGE, + NFSIOS_VFSREADPAGE, + NFSIOS_VFSREADPAGES, + NFSIOS_VFSWRITEPAGE, + NFSIOS_VFSWRITEPAGES, + NFSIOS_VFSGETDENTS, + NFSIOS_VFSSETATTR, + NFSIOS_VFSFLUSH, + NFSIOS_VFSFSYNC, + NFSIOS_VFSLOCK, + NFSIOS_VFSRELEASE, + NFSIOS_CONGESTIONWAIT, + NFSIOS_SETATTRTRUNC, + NFSIOS_EXTENDWRITE, + NFSIOS_SILLYRENAME, + NFSIOS_SHORTREAD, + NFSIOS_SHORTWRITE, + NFSIOS_DELAY, + __NFSIOS_COUNTSMAX, +}; + +#ifdef __KERNEL__ + +#include +#include + +struct nfs_iostats { + unsigned long long bytes[__NFSIOS_BYTESMAX]; + unsigned long events[__NFSIOS_COUNTSMAX]; +} ____cacheline_aligned; + +static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(server->io_stats, cpu); + iostats->events[stat] ++; + put_cpu_no_resched(); +} + +static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +{ + nfs_inc_server_stats(NFS_SERVER(inode), stat); +} + +static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(server->io_stats, cpu); + iostats->bytes[stat] += addend; + put_cpu_no_resched(); +} + +static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +{ + nfs_add_server_stats(NFS_SERVER(inode), stat, addend); +} + +static inline struct nfs_iostats *nfs_alloc_iostats(void) +{ + return alloc_percpu(struct nfs_iostats); +} + +static inline void nfs_free_iostats(struct nfs_iostats *stats) +{ + free_percpu(stats); +} + +#endif +#endif diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index db99b8f..c44d87b 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, struct mnt_fhstatus result = { .fh = fh }; + struct rpc_message msg = { + .rpc_argp = path, + .rpc_resp = &result, + }; char hostname[32]; int status; - int call; dprintk("NFS: nfs_mount(%08x:%s)\n", (unsigned)ntohl(addr->sin_addr.s_addr), path); @@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, if (IS_ERR(mnt_clnt)) return PTR_ERR(mnt_clnt); - call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT; - status = rpc_call(mnt_clnt, call, path, &result, 0); + if (version == NFS_MNT3_VERSION) + msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; + else + msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; + + status = rpc_call_sync(mnt_clnt, &msg, 0); return status < 0? status : (result.status? -EACCES : 0); } @@ -137,6 +144,8 @@ static struct rpc_procinfo mnt_procedure .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus, .p_bufsiz = MNT_dirpath_sz << 2, + .p_statidx = MNTPROC_MNT, + .p_name = "MOUNT", }, }; @@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedur .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, .p_bufsiz = MNT_dirpath_sz << 2, + .p_statidx = MOUNTPROC3_MNT, + .p_name = "MOUNT", }, }; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 7fc0560..8cdc792 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat) .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ - .p_timer = timer \ + .p_timer = timer, \ + .p_statidx = NFSPROC_##proc, \ + .p_name = #proc, \ } struct rpc_procinfo nfs_procedures[] = { PROC(GETATTR, fhandle, attrstat, 1), diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 6a5bbc0..3328787 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struc struct nfs3_getaclres res = { .fattr = &fattr, }; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = &res, + }; struct posix_acl *acl; int status, count; @@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struc return NULL; dprintk("NFS call getacl\n"); - status = rpc_call(server->client_acl, ACLPROC3_GETACL, - &args, &res, 0); + msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; + status = rpc_call_sync(server->client_acl, &msg, 0); dprintk("NFS reply getacl: %d\n", status); /* pages may have been allocated at the xdr layer. */ @@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inod .acl_access = acl, .pages = pages, }; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = &fattr, + }; int status, count; status = -EOPNOTSUPP; @@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inod dprintk("NFS call setacl\n"); nfs_begin_data_update(inode); - status = rpc_call(server->client_acl, ACLPROC3_SETACL, - &args, &fattr, 0); + msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; + status = rpc_call_sync(server->client_acl, &msg, 0); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; spin_unlock(&inode->i_lock); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ed67567..cf186f0 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -19,6 +19,8 @@ #include #include +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_PROC extern struct rpc_procinfo nfs3_procedures[]; @@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, return res; } -static inline int -nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) -{ - struct rpc_message msg = { - .rpc_proc = &clnt->cl_procinfo[proc], - .rpc_argp = argp, - .rpc_resp = resp, - }; - return nfs3_rpc_wrapper(clnt, &msg, flags); -} - -#define rpc_call(clnt, proc, argp, resp, flags) \ - nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags) -#define rpc_call_sync(clnt, msg, flags) \ - nfs3_rpc_wrapper(clnt, msg, flags) +#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags) static int -nfs3_async_handle_jukebox(struct rpc_task *task) +nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) { if (task->tk_status != -EJUKEBOX) return 0; + nfs_inc_stats(inode, NFSIOS_DELAY); task->tk_status = 0; rpc_restart_call(task); rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); @@ -72,14 +61,21 @@ static int do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("%s: call fsinfo\n", __FUNCTION__); nfs_fattr_init(info->fattr); - status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0); + status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); if (!(info->fattr->valid & NFS_ATTR_FATTR)) { - status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0); + msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; + msg.rpc_resp = info->fattr; + status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); } return status; @@ -107,12 +103,16 @@ static int nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; int status; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call(server->client, NFS3PROC_GETATTR, - fhandle, fattr, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, .fh = NFS_FH(inode), .sattr = sattr, }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR], + .rpc_argp = &arg, + .rpc_resp = fattr, + }; int status; dprintk("NFS call setattr\n"); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); @@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, stru .fh = fhandle, .fattr = fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call lookup %s\n", name->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0); - if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, - fhandle, fattr, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { + msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; + msg.rpc_argp = fhandle; + msg.rpc_resp = fattr; + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + } dprintk("NFS reply lookup: %d\n", status); if (status >= 0) status = nfs_refresh_inode(dir, &dir_attr); @@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = entry->cred + .rpc_cred = entry->cred, }; int mode = entry->mask; int status; @@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct ino .pglen = pglen, .pages = &page }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK], + .rpc_argp = &args, + .rpc_resp = &fattr, + }; int status; dprintk("NFS call readlink\n"); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, - &args, &fattr, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); dprintk("NFS reply readlink: %d\n", status); return status; @@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, stru .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; mode_t mode = sattr->ia_mode; int status; @@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, stru again: nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); - nfs_post_op_update_inode(dir, &dir_attr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_refresh_inode(dir, &dir_attr); /* If the server doesn't support the exclusive creation semantics, * try again with simple 'guarded' mode. */ @@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir struct rpc_message *msg = &task->tk_msg; struct nfs_fattr *dir_attr; - if (nfs3_async_handle_jukebox(task)) + if (nfs3_async_handle_jukebox(task, dir->d_inode)) return 1; if (msg->rpc_argp) { dir_attr = (struct nfs_fattr*)msg->rpc_resp; @@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, .fromattr = &old_dir_attr, .toattr = &new_dir_attr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); nfs_fattr_init(&old_dir_attr); nfs_fattr_init(&new_dir_attr); - status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); nfs_post_op_update_inode(old_dir, &old_dir_attr); nfs_post_op_update_inode(new_dir, &new_dir_attr); dprintk("NFS reply rename: %d\n", status); @@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, stru .dir_attr = &dir_attr, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_LINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call link %s\n", name->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); nfs_post_op_update_inode(inode, &fattr); dprintk("NFS reply link: %d\n", status); @@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, str .fh = fhandle, .fattr = fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; if (path->len > NFS3_MAXPATHLEN) @@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, str dprintk("NFS call symlink %s -> %s\n", name->name, path->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply symlink: %d\n", status); return status; @@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struc .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int mode = sattr->ia_mode; int status; @@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struc nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; @@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struc .name = name->name, .len = name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR], + .rpc_argp = &arg, + .rpc_resp = &dir_attr, + }; int status; dprintk("NFS call rmdir %s\n", name->name); nfs_fattr_init(&dir_attr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply rmdir: %d\n", status); return status; @@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struc .fh = &fh, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD], + .rpc_argp = &arg, + .rpc_resp = &res, + }; mode_t mode = sattr->ia_mode; int status; @@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struc nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; @@ -707,11 +759,16 @@ static int nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *stat) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT], + .rpc_argp = fhandle, + .rpc_resp = stat, + }; int status; dprintk("NFS call fsstat\n"); nfs_fattr_init(stat->fattr); - status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply statfs: %d\n", status); return status; } @@ -720,11 +777,16 @@ static int nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + status = rpc_call_sync(server->client_sys, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; } @@ -733,40 +795,34 @@ static int nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_pathconf *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("NFS call pathconf\n"); nfs_fattr_init(info->fattr); - status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply pathconf: %d\n", status); return status; } extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); -static void nfs3_read_done(struct rpc_task *task, void *calldata) +static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; - - if (nfs3_async_handle_jukebox(task)) - return; + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; /* Call back common NFS readpage processing */ if (task->tk_status >= 0) nfs_refresh_inode(data->inode, &data->fattr); - nfs_readpage_result(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_read_ops = { - .rpc_call_done = nfs3_read_done, - .rpc_release = nfs_readdata_release, -}; - -static void -nfs3_proc_read_setup(struct nfs_read_data *data) +static void nfs3_proc_read_setup(struct nfs_read_data *data) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_READ], .rpc_argp = &data->args, @@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_dat .rpc_cred = data->cred, }; - /* N.B. Do we need to test? Never called for swapfile inode */ - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs3_write_done(struct rpc_task *task, void *calldata) +static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - - if (nfs3_async_handle_jukebox(task)) - return; + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_write_ops = { - .rpc_call_done = nfs3_write_done, - .rpc_release = nfs_writedata_release, -}; - -static void -nfs3_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs3_proc_write_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int stable; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], .rpc_argp = &data->args, @@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_d .rpc_cred = data->cred, }; + data->args.stable = NFS_UNSTABLE; if (how & FLUSH_STABLE) { - if (!NFS_I(inode)->ncommit) - stable = NFS_FILE_SYNC; - else - stable = NFS_DATA_SYNC; - } else - stable = NFS_UNSTABLE; - data->args.stable = stable; - - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + data->args.stable = NFS_FILE_SYNC; + if (NFS_I(data->inode)->ncommit) + data->args.stable = NFS_DATA_SYNC; + } /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs3_commit_done(struct rpc_task *task, void *calldata) +static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - - if (nfs3_async_handle_jukebox(task)) - return; + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_commit_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_commit_ops = { - .rpc_call_done = nfs3_commit_done, - .rpc_release = nfs_commit_release, -}; - -static void -nfs3_proc_commit_setup(struct nfs_write_data *data, int how) +static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], .rpc_argp = &data->args, @@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_ .rpc_cred = data->cred, }; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } static int @@ -902,8 +919,11 @@ struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, + .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, + .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, + .commit_done = nfs3_commit_done, .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index b6c0b50..2d8701a 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ - .p_timer = timer \ + .p_timer = timer, \ + .p_statidx = NFS3PROC_##proc, \ + .p_name = #proc, \ } struct rpc_procinfo nfs3_procedures[] = { @@ -1150,6 +1152,7 @@ static struct rpc_procinfo nfs3_acl_proc .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, .p_timer = 1, + .p_name = "GETACL", }, [ACLPROC3_SETACL] = { .p_proc = ACLPR