diff -u --recursive --new-file linux-2.4.9/fs/lockd/clntlock.c linux-2.4.9-tune/fs/lockd/clntlock.c --- linux-2.4.9/fs/lockd/clntlock.c Tue Nov 7 19:18:57 2000 +++ linux-2.4.9-tune/fs/lockd/clntlock.c Fri Aug 17 12:29:20 2001 @@ -138,7 +138,7 @@ void nlmclnt_recovery(struct nlm_host *host, u32 newstate) { - if (!host->h_reclaiming++) { + if (host->h_reclaiming++) { if (host->h_nsmstate == newstate) return; printk(KERN_WARNING @@ -153,7 +153,7 @@ host->h_nsmstate = newstate; host->h_state++; nlm_get_host(host); - kernel_thread(reclaimer, host, 0); + kernel_thread(reclaimer, host, CLONE_SIGNAL); } } @@ -167,12 +167,24 @@ /* This one ensures that our parent doesn't terminate while the * reclaim is in progress */ lock_kernel(); + + daemonize(); + strcpy(current->comm, "lockd-reclaim"); + + /* Block signals */ + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, sigmask(SIGKILL)); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + lockd_up(); + exit_files(current); + /* First, reclaim all locks that have been granted previously. */ restart: tmp = file_lock_list.next; - while (tmp != &file_lock_list) { + while (tmp != &file_lock_list && ! signalled()) { struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); struct inode *inode = fl->fl_file->f_dentry->d_inode; if (inode->i_sb->s_magic == NFS_SUPER_MAGIC && diff -u --recursive --new-file linux-2.4.9/fs/lockd/clntproc.c linux-2.4.9-tune/fs/lockd/clntproc.c --- linux-2.4.9/fs/lockd/clntproc.c Mon Dec 4 03:01:01 2000 +++ linux-2.4.9-tune/fs/lockd/clntproc.c Fri Aug 17 12:28:26 2001 @@ -142,7 +142,8 @@ /* If we're cleaning up locks because the process is exiting, * perform the RPC call asynchronously. */ - if ((cmd == F_SETLK || cmd == F_SETLKW) + if ((cmd == F_SETLK || cmd == F_SETLKW + || cmd == F_SETLK64 || cmd == F_SETLKW64) && fl->fl_type == F_UNLCK && (current->flags & PF_EXITING)) { sigfillset(¤t->blocked); /* Mask all signals */ @@ -166,13 +167,15 @@ /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); - if (cmd == F_GETLK) { + if (cmd == F_GETLK || cmd == F_GETLK64) { status = nlmclnt_test(call, fl); - } else if ((cmd == F_SETLK || cmd == F_SETLKW) + } else if ((cmd == F_SETLK || cmd == F_SETLKW + || cmd == F_SETLK64 || cmd == F_SETLKW64) && fl->fl_type == F_UNLCK) { status = nlmclnt_unlock(call, fl); - } else if (cmd == F_SETLK || cmd == F_SETLKW) { - call->a_args.block = (cmd == F_SETLKW)? 1 : 0; + } else if (cmd == F_SETLK || cmd == F_SETLKW + || cmd == F_SETLK64 || cmd == F_SETLKW64) { + call->a_args.block = (cmd == F_SETLKW) || cmd == F_SETLKW64? 1 : 0; status = nlmclnt_lock(call, fl); } else { status = -EINVAL; diff -u --recursive --new-file linux-2.4.9/fs/lockd/host.c linux-2.4.9-tune/fs/lockd/host.c --- linux-2.4.9/fs/lockd/host.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.9-tune/fs/lockd/host.c Fri Aug 17 12:29:20 2001 @@ -51,7 +51,8 @@ struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *rqstp) { - return nlm_lookup_host(rqstp->rq_client, &rqstp->rq_addr, 0, 0); + return nlm_lookup_host(rqstp->rq_client, &rqstp->rq_addr, + rqstp->rq_prot, rqstp->rq_vers); } /* @@ -97,7 +98,9 @@ nlm_gc_hosts(); for (hp = &nlm_hosts[hash]; (host = *hp); hp = &host->h_next) { - if (host->h_version != version || host->h_proto != proto) + if (proto && host->h_proto != proto) + continue; + if (version && host->h_version != version) continue; if (nlm_match_host(host, clnt, sin)) { diff -u --recursive --new-file linux-2.4.9/fs/lockd/mon.c linux-2.4.9-tune/fs/lockd/mon.c --- linux-2.4.9/fs/lockd/mon.c Tue Jun 12 04:15:27 2001 +++ linux-2.4.9-tune/fs/lockd/mon.c Fri Aug 17 12:29:20 2001 @@ -43,7 +43,7 @@ args.addr = host->h_addr.sin_addr.s_addr; args.prog = NLM_PROGRAM; - args.vers = 1; + args.vers = host->h_version; args.proc = NLMPROC_NSM_NOTIFY; memset(res, 0, sizeof(*res)); diff -u --recursive --new-file linux-2.4.9/fs/lockd/svc.c linux-2.4.9-tune/fs/lockd/svc.c --- linux-2.4.9/fs/lockd/svc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.9-tune/fs/lockd/svc.c Fri Aug 17 12:29:20 2001 @@ -65,6 +65,7 @@ struct svc_serv *serv = rqstp->rq_server; int err = 0; unsigned long grace_period_expire; + struct k_sigaction sa; /* Lock module and set up kernel thread */ MOD_INC_USE_COUNT; @@ -76,16 +77,20 @@ nlmsvc_pid = current->pid; up(&lockd_start); - exit_mm(current); - current->session = 1; - current->pgrp = 1; + daemonize(); sprintf(current->comm, "lockd"); /* Process request with signals blocked. */ spin_lock_irq(¤t->sigmask_lock); - siginitsetinv(¤t->blocked, sigmask(SIGKILL)); + siginitsetinv(¤t->blocked, sigmask(SIGKILL)|sigmask(SIGCHLD)); recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); + spin_unlock_irq(¤t->sigmask_lock); + + /* Install a handler so SIGCLD is ignored */ + sa.sa.sa_handler = SIG_IGN; + sa.sa.sa_flags = 0; + siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); + do_sigaction(SIGCHLD, &sa, NULL); /* kick rpciod */ rpciod_up(); @@ -105,10 +110,10 @@ nlmsvc_grace_period = 10 * HZ; #else if (nlm_grace_period) { - nlmsvc_grace_period += (1 + nlm_grace_period / nlm_timeout) + nlmsvc_grace_period = (1 + nlm_grace_period / nlm_timeout) * nlm_timeout * HZ; } else { - nlmsvc_grace_period += 5 * nlm_timeout * HZ; + nlmsvc_grace_period = 5 * nlm_timeout * HZ; } #endif @@ -135,10 +140,12 @@ * (Theoretically, there shouldn't even be blocked locks * during grace period). */ - if (!nlmsvc_grace_period) { + if (!grace_period_expire) { timeout = nlmsvc_retry_blocked(); - } else if (time_before(nlmsvc_grace_period, jiffies)) + } else if (time_before(grace_period_expire, jiffies)) { + grace_period_expire = 0; nlmsvc_grace_period = 0; + } /* * Find a socket with data available and call its @@ -339,7 +346,7 @@ * Define NLM program and procedures */ static struct svc_version nlmsvc_version1 = { - 1, 16, nlmsvc_procedures, NULL + 1, 17, nlmsvc_procedures, NULL }; static struct svc_version nlmsvc_version3 = { 3, 24, nlmsvc_procedures, NULL diff -u --recursive --new-file linux-2.4.9/fs/lockd/svc4proc.c linux-2.4.9-tune/fs/lockd/svc4proc.c --- linux-2.4.9/fs/lockd/svc4proc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.9-tune/fs/lockd/svc4proc.c Fri Aug 17 12:29:20 2001 @@ -420,6 +420,8 @@ void *resp) { struct sockaddr_in saddr = rqstp->rq_addr; + int vers = rqstp->rq_vers; + int prot = rqstp->rq_prot; struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); @@ -435,8 +437,8 @@ /* Obtain the host pointer for this NFS server and try to * reclaim all locks we hold on this server. */ - saddr.sin_addr.s_addr = argp->addr; - if ((host = nlm_lookup_host(NULL, &saddr, IPPROTO_UDP, 1)) != NULL) { + saddr.sin_addr.s_addr = htonl(argp->addr); + if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { nlmclnt_recovery(host, argp->state); nlm_release_host(host); } @@ -444,7 +446,7 @@ /* If we run on an NFS server, delete all locks held by the client */ if (nlmsvc_ops != NULL) { struct svc_client *clnt; - saddr.sin_addr.s_addr = argp->addr; + saddr.sin_addr.s_addr = argp->addr; if ((clnt = nlmsvc_ops->exp_getclient(&saddr)) != NULL && (host = nlm_lookup_host(clnt, &saddr, 0, 0)) != NULL) { nlmsvc_free_host_resources(host); @@ -549,7 +551,8 @@ PROC(cancel_res, cancelres, norep, res, void), PROC(unlock_res, unlockres, norep, res, void), PROC(granted_res, grantedres, norep, res, void), - PROC(none, void, void, void, void), + /* statd callback */ + PROC(sm_notify, reboot, void, reboot, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), @@ -558,6 +561,4 @@ PROC(nm_lock, lockargs, res, args, res), PROC(free_all, notify, void, args, void), - /* statd callback */ - PROC(sm_notify, reboot, void, reboot, void), }; diff -u --recursive --new-file linux-2.4.9/fs/lockd/svcproc.c linux-2.4.9-tune/fs/lockd/svcproc.c --- linux-2.4.9/fs/lockd/svcproc.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.9-tune/fs/lockd/svcproc.c Fri Aug 17 12:29:20 2001 @@ -445,6 +445,8 @@ void *resp) { struct sockaddr_in saddr = rqstp->rq_addr; + int vers = rqstp->rq_vers; + int prot = rqstp->rq_prot; struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); @@ -460,8 +462,8 @@ /* Obtain the host pointer for this NFS server and try to * reclaim all locks we hold on this server. */ - saddr.sin_addr.s_addr = argp->addr; - if ((host = nlm_lookup_host(NULL, &saddr, IPPROTO_UDP, 1)) != NULL) { + saddr.sin_addr.s_addr = htonl(argp->addr); + if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { nlmclnt_recovery(host, argp->state); nlm_release_host(host); } @@ -574,7 +576,8 @@ PROC(cancel_res, cancelres, norep, res, void), PROC(unlock_res, unlockres, norep, res, void), PROC(granted_res, grantedres, norep, res, void), - PROC(none, void, void, void, void), + /* statd callback */ + PROC(sm_notify, reboot, void, reboot, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), PROC(none, void, void, void, void), @@ -583,6 +586,4 @@ PROC(nm_lock, lockargs, res, args, res), PROC(free_all, notify, void, args, void), - /* statd callback */ - PROC(sm_notify, reboot, void, reboot, void), }; diff -u --recursive --new-file linux-2.4.9/fs/locks.c linux-2.4.9-tune/fs/locks.c --- linux-2.4.9/fs/locks.c Thu Jul 5 00:39:28 2001 +++ linux-2.4.9-tune/fs/locks.c Fri Aug 17 12:28:26 2001 @@ -257,7 +257,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, struct flock *l) { - loff_t start; + off_t start, end; switch (l->l_whence) { case 0: /*SEEK_SET*/ @@ -270,17 +270,16 @@ start = filp->f_dentry->d_inode->i_size; break; default: - return (0); + return -EINVAL; } if (((start += l->l_start) < 0) || (l->l_len < 0)) - return (0); - fl->fl_end = start + l->l_len - 1; - if (l->l_len > 0 && fl->fl_end < 0) - return (0); - if (fl->fl_end > OFFT_OFFSET_MAX) - return 0; + return -EINVAL; + end = start + l->l_len - 1; + if (l->l_len > 0 && end < 0) + return -EOVERFLOW; fl->fl_start = start; /* we record the absolute position */ + fl->fl_end = end; if (l->l_len == 0) fl->fl_end = OFFSET_MAX; @@ -292,7 +291,7 @@ fl->fl_insert = NULL; fl->fl_remove = NULL; - return (assign_type(fl, l->l_type) == 0); + return assign_type(fl, l->l_type); } #if BITS_PER_LONG == 32 @@ -312,14 +311,14 @@ start = filp->f_dentry->d_inode->i_size; break; default: - return (0); + return -EINVAL; } if (((start += l->l_start) < 0) || (l->l_len < 0)) - return (0); + return -EINVAL; fl->fl_end = start + l->l_len - 1; if (l->l_len > 0 && fl->fl_end < 0) - return (0); + return -EOVERFLOW; fl->fl_start = start; /* we record the absolute position */ if (l->l_len == 0) fl->fl_end = OFFSET_MAX; @@ -339,10 +338,10 @@ fl->fl_type = l->l_type; break; default: - return (0); + return -EINVAL; } - return (1); + return (0); } #endif @@ -1353,8 +1352,8 @@ if (!filp) goto out; - error = -EINVAL; - if (!flock_to_posix_lock(filp, &file_lock, &flock)) + error = flock_to_posix_lock(filp, &file_lock, &flock); + if (error) goto out_putf; if (filp->f_op && filp->f_op->lock) { @@ -1443,8 +1442,8 @@ } } - error = -EINVAL; - if (!flock_to_posix_lock(filp, file_lock, &flock)) + error = flock_to_posix_lock(filp, file_lock, &flock); + if (error) goto out_putf; error = -EBADF; @@ -1518,8 +1517,8 @@ if (!filp) goto out; - error = -EINVAL; - if (!flock64_to_posix_lock(filp, &file_lock, &flock)) + error = flock64_to_posix_lock(filp, &file_lock, &flock); + if (error) goto out_putf; if (filp->f_op && filp->f_op->lock) { @@ -1596,8 +1595,8 @@ } } - error = -EINVAL; - if (!flock64_to_posix_lock(filp, file_lock, &flock)) + error = flock64_to_posix_lock(filp, file_lock, &flock); + if (error) goto out_putf; error = -EBADF; diff -u --recursive --new-file linux-2.4.9/fs/namei.c linux-2.4.9-tune/fs/namei.c --- linux-2.4.9/fs/namei.c Fri Jul 20 21:39:56 2001 +++ linux-2.4.9-tune/fs/namei.c Fri Aug 17 12:25:23 2001 @@ -418,7 +418,7 @@ while (*name=='/') name++; if (!*name) - goto return_base; + goto return_reval; inode = nd->dentry->d_inode; if (current->link_count) @@ -537,7 +537,7 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: - goto return_base; + goto return_reval; } if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { err = nd->dentry->d_op->d_hash(nd->dentry, &this); @@ -588,6 +588,10 @@ nd->last_type = LAST_DOT; else if (this.len == 2 && this.name[1] == '.') nd->last_type = LAST_DOTDOT; +return_reval: + dentry = nd->dentry; + if (dentry && dentry->d_op && dentry->d_op->d_revalidate) + dentry->d_op->d_revalidate(dentry, nd->flags); return_base: return 0; out_dput: diff -u --recursive --new-file linux-2.4.9/fs/nfs/dir.c linux-2.4.9-tune/fs/nfs/dir.c --- linux-2.4.9/fs/nfs/dir.c Tue Jun 12 20:15:08 2001 +++ linux-2.4.9-tune/fs/nfs/dir.c Fri Aug 17 12:26:01 2001 @@ -34,6 +34,7 @@ #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ +static loff_t nfs_dir_llseek(struct file *, loff_t, int); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *); static int nfs_create(struct inode *, struct dentry *, int); @@ -47,6 +48,7 @@ struct inode *, struct dentry *); struct file_operations nfs_dir_operations = { + llseek: nfs_dir_llseek, read: generic_read_dir, readdir: nfs_readdir, open: nfs_open, @@ -68,6 +70,25 @@ setattr: nfs_notify_change, }; +static loff_t nfs_dir_llseek(struct file *file, loff_t offset, int origin) +{ + switch (origin) { + case 1: + if (offset == 0) { + offset = file->f_pos; + break; + } + case 2: + return -EINVAL; + } + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_reada = 0; + file->f_version = ++event; + } + return (offset <= 0) ? 0 : offset; +} + typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int); typedef struct { struct file *file; @@ -108,13 +129,17 @@ error = NFS_PROTO(inode)->readdir(inode, cred, desc->entry->cookie, buffer, NFS_SERVER(inode)->dtsize, desc->plus); /* We requested READDIRPLUS, but the server doesn't grok it */ - if (desc->plus && error == -ENOTSUPP) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; - desc->plus = 0; - goto again; - } - if (error < 0) + if (error < 0) { + if (error == -ENOTSUPP && desc->plus) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + desc->plus = 0; + goto again; + } goto error; + } + if (desc->plus) + NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; SetPageUptodate(page); kunmap(page); /* Ensure consistent page alignment of the data. @@ -195,7 +220,6 @@ dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); - desc->plus = NFS_USE_READDIRPLUS(inode); page = read_cache_page(&inode->i_data, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { @@ -247,6 +271,29 @@ return res; } +static struct { + unsigned int nfstype; + unsigned int dtype; +} nfs_type2dtype[] = { + { NFNON, DT_UNKNOWN }, + { NFREG, DT_REG }, + { NFDIR, DT_DIR }, + { NFBLK, DT_BLK }, + { NFCHR, DT_CHR }, + { NFLNK, DT_LNK }, + { NFSOCK, DT_SOCK }, + { NFBAD, DT_UNKNOWN }, + { NFFIFO, DT_FIFO }, +}; + +static inline +unsigned nfs_type_to_d_type(unsigned type) +{ + if (type < 8) + return nfs_type2dtype[type].dtype; + return DT_UNKNOWN; +} + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -263,11 +310,17 @@ dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); for(;;) { + unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ fileid = nfs_fileid_to_ino_t(entry->ino); + + /* Use readdirplus info */ + if (desc->plus && (entry->fattr.valid & NFS_ATTR_FATTR)) + d_type = nfs_type_to_d_type(entry->fattr.type); + res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, DT_UNKNOWN); + entry->prev_cookie, fileid, d_type); if (res < 0) break; file->f_pos = desc->target = entry->cookie; @@ -372,6 +425,7 @@ desc->target = filp->f_pos; desc->entry = &my_entry; desc->decode = NFS_PROTO(inode)->decode_dirent; + desc->plus = NFS_USE_READDIRPLUS(inode); while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -401,55 +455,74 @@ return 0; } +static inline +void nfs_renew_verifier(struct inode *dir, struct dentry *dentry) +{ + dentry->d_verifier = NFS_CACHE_MTIME(dir); +} + +/* + * A check for whether or not the parent directory has changed. + * In the case it has, we assume that the dentries are untrustworthy + * and may need to be looked up again. + */ +static inline +int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +{ + if (IS_ROOT(dentry)) + return 1; + nfs_revalidate_inode(NFS_SERVER(dir),dir); + return dentry->d_verifier == NFS_CACHE_MTIME(dir); +} + /* * Whenever an NFS operation succeeds, we know that the dentry * is valid, so we update the revalidation timestamp. */ -static inline void nfs_renew_times(struct dentry * dentry) +static inline void __nfs_renew_times(struct dentry * dentry) { dentry->d_time = jiffies; } +static inline void nfs_renew_times(struct dentry * dentry) +{ + __nfs_renew_times(dentry); + nfs_renew_verifier(dentry->d_parent->d_inode, dentry); +} + static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags) { struct inode *inode = dentry->d_inode; unsigned long timeout = NFS_ATTRTIMEO(inode); /* - * If it's the last lookup in a series, we use a stricter - * cache consistency check by looking at the parent mtime. - * - * If it's been modified in the last hour, be really strict. - * (This still means that we can avoid doing unnecessary - * work on directories like /usr/share/bin etc which basically - * never change). + * If we're interested in close-to-open cache consistency, + * then we revalidate the inode upon lookup. */ - if (!(flags & LOOKUP_CONTINUE)) { - long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime; + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO) + && !(flags & LOOKUP_CONTINUE)) + return 1; - if (diff < 15*60) - timeout = 0; - } - - return time_after(jiffies,dentry->d_time + timeout); + if (time_after(jiffies, NFS_READTIME(inode) + timeout)) + return 1; + + return time_after(jiffies, dentry->d_time + timeout); } /* * We judge how long we want to trust negative * dentries by looking at the parent inode mtime. * - * If mtime is close to present time, we revalidate - * more often. + * If parent mtime has changed, we revalidate, else we wait for a + * period corresponding to the parent's attribute cache timeout value. */ -#define NFS_REVALIDATE_NEGATIVE (1 * HZ) static inline int nfs_neg_need_reval(struct dentry *dentry) { struct inode *dir = dentry->d_parent->d_inode; unsigned long timeout = NFS_ATTRTIMEO(dir); - long diff = CURRENT_TIME - dir->i_mtime; - if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE) - timeout = NFS_REVALIDATE_NEGATIVE; + if (!nfs_check_verifier(dir, dentry)) + return 1; return time_after(jiffies, dentry->d_time + timeout); } @@ -462,9 +535,8 @@ * NOTE! The hit can be a negative hit too, don't assume * we have an inode! * - * If the dentry is older than the revalidation interval, - * we do a new lookup and verify that the dentry is still - * correct. + * If the parent directory is seen to have changed, we throw out the + * cached dentry and do a new lookup. */ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) { @@ -477,11 +549,7 @@ lock_kernel(); dir = dentry->d_parent->d_inode; inode = dentry->d_inode; - /* - * If we don't have an inode, let's look at the parent - * directory mtime to get a hint about how often we - * should validate things.. - */ + if (!inode) { if (nfs_neg_need_reval(dentry)) goto out_bad; @@ -494,48 +562,50 @@ goto out_bad; } - if (!nfs_dentry_force_reval(dentry, flags)) - goto out_valid; - - if (IS_ROOT(dentry)) { - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - goto out_valid_renew; - } + /* Force a full look up iff the parent directory has changed */ + if (nfs_check_verifier(dir, dentry)) + goto fast_getattr; - /* - * Do a new lookup and check the dentry attributes. - */ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error) goto out_bad; - - /* Inode number matches? */ - if (!(fattr.valid & NFS_ATTR_FATTR) || - NFS_FSID(inode) != fattr.fsid || - NFS_FILEID(inode) != fattr.fileid) + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + goto out_bad; + if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; - /* Ok, remember that we successfully checked it.. */ - nfs_refresh_inode(inode, &fattr); + nfs_renew_times(dentry); + goto out_valid; + + fast_getattr: + if (!nfs_dentry_force_reval(dentry, flags)) + goto out_valid; - if (nfs_inode_is_stale(inode, &fhandle, &fattr)) + /* + * Revalidate the cached attributes. + */ + error = NFS_PROTO(inode)->getattr(inode, &fattr); + if (error) + goto out_bad; + if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; - out_valid_renew: - nfs_renew_times(dentry); -out_valid: + __nfs_renew_times(dentry); + out_valid: unlock_kernel(); return 1; -out_bad: - shrink_dcache_parent(dentry); - /* If we have submounts, don't unhash ! */ - if (have_submounts(dentry)) - goto out_valid; - d_drop(dentry); - /* Purge readdir caches. */ - nfs_zap_caches(dir); - if (inode && S_ISDIR(inode->i_mode)) + out_bad: + if (inode && S_ISDIR(inode->i_mode)) { + /* Purge readdir caches. */ nfs_zap_caches(inode); + /* If we are root, or have submounts, don't unhash ! */ + if (IS_ROOT(dentry) || have_submounts(dentry)) { + nfs_renew_verifier(dir, dentry); + goto out_valid; + } + shrink_dcache_parent(dentry); + } + d_drop(dentry); unlock_kernel(); return 0; } @@ -604,9 +674,9 @@ if (inode) { no_entry: d_add(dentry, inode); - nfs_renew_times(dentry); error = 0; } + nfs_renew_times(dentry); } out: return ERR_PTR(error); diff -u --recursive --new-file linux-2.4.9/fs/nfs/file.c linux-2.4.9-tune/fs/nfs/file.c --- linux-2.4.9/fs/nfs/file.c Thu Aug 16 18:39:37 2001 +++ linux-2.4.9-tune/fs/nfs/file.c Fri Aug 17 12:28:26 2001 @@ -155,9 +155,18 @@ */ static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) { + int status; + kmap(page); - return nfs_flush_incompatible(file, page); + status = nfs_flush_incompatible(file, page); + if (status) + goto out_err; + return 0; + out_err: + kunmap(page); + return status; } + static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) { long status; @@ -265,7 +274,7 @@ /* Fake OK code if mounted without NLM support */ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { - if (cmd == F_GETLK) + if (cmd == F_GETLK || cmd == F_GETLK64) status = LOCK_USE_CLNT; goto out_ok; } @@ -305,13 +314,20 @@ * This makes locking act as a cache coherency point. */ out_ok: - if ((cmd == F_SETLK || cmd == F_SETLKW) && fl->fl_type != F_UNLCK) { - filemap_fdatasync(inode->i_mapping); - down(&inode->i_sem); - nfs_wb_all(inode); /* we may have slept */ - up(&inode->i_sem); - filemap_fdatawait(inode->i_mapping); - nfs_zap_caches(inode); + switch (cmd) { + case F_SETLK: + case F_SETLKW: + case F_SETLK64: + case F_SETLKW64: + if (fl->fl_type != F_UNLCK) { + filemap_fdatasync(inode->i_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); /* we may have slept */ + up(&inode->i_sem); + filemap_fdatawait(inode->i_mapping); + nfs_zap_caches(inode); + } + default: } return status; } diff -u --recursive --new-file linux-2.4.9/fs/nfs/flushd.c linux-2.4.9-tune/fs/nfs/flushd.c --- linux-2.4.9/fs/nfs/flushd.c Wed Jun 27 23:02:29 2001 +++ linux-2.4.9-tune/fs/nfs/flushd.c Tue Aug 21 12:31:19 2001 @@ -47,6 +47,8 @@ */ #define NFSDBG_FACILITY NFSDBG_PAGECACHE +#define NFS_SCAN_RESO (30*HZ) + /* * This is the wait queue all cluster daemons sleep on */ @@ -118,7 +120,7 @@ cache->task->tk_status = -ENOMEM; rpc_wake_up_task(cache->task); } - interruptible_sleep_on_timeout(&cache->request_wait, 1 * HZ); + interruptible_sleep_on_timeout(&cache->request_wait, NFS_SCAN_RESO); } out: unlock_kernel(); @@ -205,7 +207,7 @@ lock_kernel(); if (time_after(NFS_NEXTSCAN(inode), time)) NFS_NEXTSCAN(inode) = time; - mintimeout = jiffies + 1 * HZ; + mintimeout = jiffies + NFS_SCAN_RESO; if (time_before(mintimeout, NFS_NEXTSCAN(inode))) mintimeout = NFS_NEXTSCAN(inode); inode_append_flushd(inode); @@ -263,8 +265,8 @@ } dprintk("NFS: %4d flushd back to sleep\n", task->tk_pid); - if (time_after(jiffies + 1 * HZ, delay)) - delay = 1 * HZ; + if (time_after(jiffies + NFS_SCAN_RESO, delay)) + delay = NFS_SCAN_RESO; else delay = delay - jiffies; task->tk_status = 0; diff -u --recursive --new-file linux-2.4.9/fs/nfs/inode.c linux-2.4.9-tune/fs/nfs/inode.c --- linux-2.4.9/fs/nfs/inode.c Thu Aug 16 18:39:37 2001 +++ linux-2.4.9-tune/fs/nfs/inode.c Thu Aug 30 14:13:59 2001 @@ -312,6 +312,7 @@ if (data->flags & NFS_MOUNT_NOAC) { data->acregmin = data->acregmax = 0; data->acdirmin = data->acdirmax = 0; + sb->s_flags |= MS_SYNCHRONOUS; } server->acregmin = data->acregmin*HZ; server->acregmax = data->acregmax*HZ; @@ -323,6 +324,7 @@ if (!server->hostname) goto out_unlock; strcpy(server->hostname, data->hostname); + server->caps = 0; nfsv3_try_again: /* Check NFS protocol revision and initialize RPC op vector @@ -331,6 +333,7 @@ #ifdef CONFIG_NFS_V3 server->rpc_ops = &nfs_v3_clientops; version = 3; + server->caps |= NFS_CAP_READDIRPLUS; if (data->version < 4) { printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); goto out_unlock; @@ -650,27 +653,6 @@ return 1; } -int -nfs_inode_is_stale(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) -{ - /* Empty inodes are not stale */ - if (!inode->i_mode) - return 0; - - if ((fattr->mode & S_IFMT) != (inode->i_mode & S_IFMT)) - return 1; - - if (is_bad_inode(inode)) - return 1; - - /* Has the filehandle changed? If so is the old one stale? */ - if (memcmp(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)) != 0 && - __nfs_revalidate_inode(NFS_SERVER(inode),inode) == -ESTALE) - return 1; - - return 0; -} - /* * This is our own version of iget that looks up inodes by file handle * instead of inode number. We use this technique instead of using @@ -736,7 +718,7 @@ /* * Make sure the inode is up-to-date. */ - error = nfs_revalidate(dentry); + error = nfs_revalidate_inode(NFS_SERVER(inode),inode); if (error) { #ifdef NFS_PARANOIA printk("nfs_notify_change: revalidate failed, error=%d\n", error); @@ -862,24 +844,22 @@ int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - int status = 0; + int status = -ESTALE; struct nfs_fattr fattr; dfprintk(PAGECACHE, "NFS: revalidating (%x/%Ld)\n", inode->i_dev, (long long)NFS_FILEID(inode)); lock_kernel(); - if (!inode || is_bad_inode(inode) || NFS_STALE(inode)) { - unlock_kernel(); - return -ESTALE; - } + if (!inode || is_bad_inode(inode)) + goto out_nowait; + if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) + goto out_nowait; while (NFS_REVALIDATING(inode)) { status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); - if (status < 0) { - unlock_kernel(); - return status; - } + if (status < 0) + goto out_nowait; if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) { status = NFS_STALE(inode) ? -ESTALE : 0; goto out_nowait; @@ -893,7 +873,8 @@ inode->i_dev, (long long)NFS_FILEID(inode), status); if (status == -ESTALE) { NFS_FLAGS(inode) |= NFS_INO_STALE; - remove_inode_hash(inode); + if (inode != inode->i_sb->s_root->d_inode) + remove_inode_hash(inode); } goto out; } @@ -906,6 +887,8 @@ } dfprintk(PAGECACHE, "NFS: (%x/%Ld) revalidation complete\n", inode->i_dev, (long long)NFS_FILEID(inode)); + + NFS_FLAGS(inode) &= ~NFS_INO_STALE; out: NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; wake_up(&inode->i_wait); diff -u --recursive --new-file linux-2.4.9/fs/nfs/nfs2xdr.c linux-2.4.9-tune/fs/nfs/nfs2xdr.c --- linux-2.4.9/fs/nfs/nfs2xdr.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.9-tune/fs/nfs/nfs2xdr.c Fri Aug 17 12:24:08 2001 @@ -419,7 +419,7 @@ bufsiz = bufsiz >> 2; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); + *p++ = htonl(args->cookie & 0xFFFFFFFF); *p++ = htonl(bufsiz); /* see above */ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); @@ -504,7 +504,7 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - entry->cookie = ntohl(*p++); + entry->cookie = (s64)((off_t)ntohl(*p++)); entry->eof = !p[0] && p[1]; return p; diff -u --recursive --new-file linux-2.4.9/fs/nfs/nfs3proc.c linux-2.4.9-tune/fs/nfs/nfs3proc.c --- linux-2.4.9/fs/nfs/nfs3proc.c Mon Dec 4 03:01:01 2000 +++ linux-2.4.9-tune/fs/nfs/nfs3proc.c Fri Aug 17 12:25:23 2001 @@ -80,7 +80,8 @@ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, fhandle, fattr, 0); dprintk("NFS reply lookup: %d\n", status); - nfs_refresh_inode(dir, &dir_attr); + if (status >= 0) + status = nfs_refresh_inode(dir, &dir_attr); return status; } diff -u --recursive --new-file linux-2.4.9/fs/nfs/nfs3xdr.c linux-2.4.9-tune/fs/nfs/nfs3xdr.c --- linux-2.4.9/fs/nfs/nfs3xdr.c Fri Feb 9 20:29:44 2001 +++ linux-2.4.9-tune/fs/nfs/nfs3xdr.c Fri Aug 17 12:26:01 2001 @@ -523,6 +523,13 @@ return 0; } +/* Hack to sign-extending 32-bit cookies */ +static inline +u64 nfs_transform_cookie64(u64 cookie) +{ + return (cookie & 0x80000000) ? (cookie ^ 0xFFFFFFFF00000000) : cookie; +} + /* * Encode arguments to readdir call */ @@ -533,7 +540,7 @@ int buflen, replen; p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_hyper(p, args->cookie); + p = xdr_encode_hyper(p, nfs_transform_cookie64(args->cookie)); *p++ = args->verf[0]; *p++ = args->verf[1]; if (args->plus) { @@ -635,6 +642,7 @@ nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) { struct nfs_entry old = *entry; + u64 cookie; if (!*p++) { if (!*p) @@ -648,9 +656,11 @@ entry->name = (const char *) p; p += XDR_QUADLEN(entry->len); entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + p = xdr_decode_hyper(p, &cookie); + entry->cookie = nfs_transform_cookie64(cookie); if (plus) { + entry->fattr.valid = 0; p = xdr_decode_post_op_attr(p, &entry->fattr); /* In fact, a post_op_fh3: */ if (*p++) { @@ -661,11 +671,8 @@ *entry = old; return ERR_PTR(-EAGAIN); } - } else { - /* If we don't get a file handle, the attrs - * aren't worth a lot. */ - entry->fattr.valid = 0; - } + } else + memset((u8*)(&entry->fh), 0, sizeof(entry->fh)); } entry->eof = !p[0] && p[1]; diff -u --recursive --new-file linux-2.4.9/fs/nfs/read.c linux-2.4.9-tune/fs/nfs/read.c --- linux-2.4.9/fs/nfs/read.c Wed Jun 27 23:02:29 2001 +++ linux-2.4.9-tune/fs/nfs/read.c Fri Aug 17 12:23:22 2001 @@ -59,7 +59,7 @@ static __inline__ struct nfs_read_data *nfs_readdata_alloc(void) { struct nfs_read_data *p; - p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NFS); + p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); diff -u --recursive --new-file linux-2.4.9/fs/nfs/write.c linux-2.4.9-tune/fs/nfs/write.c --- linux-2.4.9/fs/nfs/write.c Thu Aug 16 18:39:37 2001 +++ linux-2.4.9-tune/fs/nfs/write.c Thu Aug 30 14:14:00 2001 @@ -109,7 +109,7 @@ static __inline__ struct nfs_page *nfs_page_alloc(void) { struct nfs_page *p; - p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL); + p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->wb_hash); @@ -127,7 +127,7 @@ static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) { struct nfs_write_data *p; - p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NFS); + p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); @@ -288,7 +288,7 @@ goto out; do_it: lock_kernel(); - if (NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) { + if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) { err = nfs_writepage_async(NULL, inode, page, 0, offset); if (err >= 0) err = 0; @@ -1031,7 +1031,7 @@ * If wsize is smaller than page size, update and write * page synchronously. */ - if (NFS_SERVER(inode)->wsize < PAGE_SIZE) + if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode)) return nfs_writepage_sync(file, inode, page, offset, count); /* diff -u --recursive --new-file linux-2.4.9/include/linux/dcache.h linux-2.4.9-tune/include/linux/dcache.h --- linux-2.4.9/include/linux/dcache.h Fri Aug 17 12:03:17 2001 +++ linux-2.4.9-tune/include/linux/dcache.h Fri Aug 17 12:45:07 2001 @@ -80,6 +80,7 @@ struct super_block * d_sb; /* The root of the dentry tree */ unsigned long d_vfs_flags; void * d_fsdata; /* fs-specific data */ + unsigned long long d_verifier; /* used by nfs d_revalidate */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ }; diff -u --recursive --new-file linux-2.4.9/include/linux/lockd/nlm.h linux-2.4.9-tune/include/linux/lockd/nlm.h --- linux-2.4.9/include/linux/lockd/nlm.h Fri Aug 17 11:23:00 2001 +++ linux-2.4.9-tune/include/linux/lockd/nlm.h Fri Aug 17 12:49:31 2001 @@ -49,10 +49,10 @@ #define NLMPROC_CANCEL_RES 13 #define NLMPROC_UNLOCK_RES 14 #define NLMPROC_GRANTED_RES 15 +#define NLMPROC_NSM_NOTIFY 16 /* statd callback */ #define NLMPROC_SHARE 20 #define NLMPROC_UNSHARE 21 #define NLMPROC_NM_LOCK 22 #define NLMPROC_FREE_ALL 23 -#define NLMPROC_NSM_NOTIFY 24 /* statd callback */ #endif /* LINUX_LOCKD_NLM_H */ diff -u --recursive --new-file linux-2.4.9/include/linux/nfs_flushd.h linux-2.4.9-tune/include/linux/nfs_flushd.h --- linux-2.4.9/include/linux/nfs_flushd.h Fri Aug 17 12:49:55 2001 +++ linux-2.4.9-tune/include/linux/nfs_flushd.h Wed Aug 22 12:24:14 2001 @@ -13,8 +13,8 @@ * flushing out requests. If it exceeds the hard limit, we stall until * it drops again. */ -#define MAX_REQUEST_SOFT 192 -#define MAX_REQUEST_HARD 256 +#define MAX_REQUEST_SOFT 8192 +#define MAX_REQUEST_HARD 32768 /* * Maximum number of requests per write cluster. diff -u --recursive --new-file linux-2.4.9/include/linux/nfs_fs.h linux-2.4.9-tune/include/linux/nfs_fs.h --- linux-2.4.9/include/linux/nfs_fs.h Fri Aug 17 12:03:37 2001 +++ linux-2.4.9-tune/include/linux/nfs_fs.h Tue Aug 21 12:28:53 2001 @@ -46,10 +46,10 @@ * The upper limit on timeouts for the exponential backoff algorithm. */ #define NFS_MAX_RPC_TIMEOUT (6*HZ) -#define NFS_READ_DELAY (2*HZ) -#define NFS_WRITEBACK_DELAY (5*HZ) +#define NFS_READ_DELAY (60*HZ) +#define NFS_WRITEBACK_DELAY (60*HZ) #define NFS_WRITEBACK_LOCKDELAY (60*HZ) -#define NFS_COMMIT_DELAY (5*HZ) +#define NFS_COMMIT_DELAY (60*HZ) /* * Size of the lookup cache in units of number of entries cached. @@ -101,8 +101,19 @@ #define NFS_FILEID(inode) ((inode)->u.nfs_i.fileid) #define NFS_FSID(inode) ((inode)->u.nfs_i.fsid) -/* Inode Flags */ -#define NFS_USE_READDIRPLUS(inode) ((NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) ? 1 : 0) +static inline int nfs_server_caps(struct inode *inode) +{ + return NFS_SERVER(inode)->caps; +} + +static inline int NFS_USE_READDIRPLUS(struct inode *inode) +{ + if (nfs_server_caps(inode) & NFS_CAP_READDIRPLUS) + return 1; + if (NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS) + return 1; + return 0; +} /* * These are the default flags for swap requests diff -u --recursive --new-file linux-2.4.9/include/linux/nfs_fs_sb.h linux-2.4.9-tune/include/linux/nfs_fs_sb.h --- linux-2.4.9/include/linux/nfs_fs_sb.h Wed Apr 26 02:28:56 2000 +++ linux-2.4.9-tune/include/linux/nfs_fs_sb.h Fri Aug 17 12:26:01 2001 @@ -8,6 +8,7 @@ struct rpc_clnt * client; /* RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ int flags; /* various flags */ + unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ @@ -29,5 +30,9 @@ struct nfs_sb_info { struct nfs_server s_server; }; + +/* Server capabilities */ +#define NFS_CAP_READDIRPLUS 1 + #endif diff -u --recursive --new-file linux-2.4.9/include/linux/sunrpc/xprt.h linux-2.4.9-tune/include/linux/sunrpc/xprt.h --- linux-2.4.9/include/linux/sunrpc/xprt.h Fri Aug 17 11:23:00 2001 +++ linux-2.4.9-tune/include/linux/sunrpc/xprt.h Thu Aug 30 15:09:00 2001 @@ -135,15 +135,13 @@ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ - struct rpc_wait_queue reconn; /* waiting for reconnect */ struct rpc_rqst * free; /* free slots */ struct rpc_rqst slot[RPC_MAXREQS]; unsigned long sockstate; /* Socket state */ unsigned char shutdown : 1, /* being shut down */ nocong : 1, /* no congestion control */ stream : 1, /* TCP */ - tcp_more : 1, /* more record fragments */ - connecting : 1; /* being reconnected */ + tcp_more : 1; /* more record fragments */ /* * State of TCP reply receive stuff @@ -158,6 +156,8 @@ /* * Send stuff */ + spinlock_t sock_lock; /* lock socket info */ + spinlock_t xprt_lock; /* lock xprt info */ struct rpc_task * snd_task; /* Task blocked in send */ @@ -185,10 +185,9 @@ void xprt_release(struct rpc_task *); void xprt_reconnect(struct rpc_task *); int xprt_clear_backlog(struct rpc_xprt *); +int xprt_tcp_pending(void); void __rpciod_tcp_dispatcher(void); -extern struct list_head rpc_xprt_pending; - #define XPRT_WSPACE 0 #define XPRT_CONNECT 1 @@ -200,12 +199,6 @@ #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) - -static inline -int xprt_tcp_pending(void) -{ - return !list_empty(&rpc_xprt_pending); -} static inline void rpciod_tcp_dispatcher(void) diff -u --recursive --new-file linux-2.4.9/kernel/ksyms.c linux-2.4.9-tune/kernel/ksyms.c --- linux-2.4.9/kernel/ksyms.c Mon Aug 13 02:35:38 2001 +++ linux-2.4.9-tune/kernel/ksyms.c Fri Aug 17 12:29:20 2001 @@ -477,6 +477,7 @@ EXPORT_SYMBOL(cap_bset); EXPORT_SYMBOL(daemonize); EXPORT_SYMBOL(csum_partial); /* for networking and md */ +EXPORT_SYMBOL(do_sigaction); /* Program loader interfaces */ EXPORT_SYMBOL(setup_arg_pages); diff -u --recursive --new-file linux-2.4.9/lib/Makefile linux-2.4.9-tune/lib/Makefile --- linux-2.4.9/lib/Makefile Wed Apr 25 22:31:03 2001 +++ linux-2.4.9-tune/lib/Makefile Fri Aug 17 12:38:24 2001 @@ -8,7 +8,7 @@ L_TARGET := lib.a -export-objs := cmdline.o rwsem-spinlock.o rwsem.o +export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o diff -u --recursive --new-file linux-2.4.9/lib/dec_and_lock.c linux-2.4.9-tune/lib/dec_and_lock.c --- linux-2.4.9/lib/dec_and_lock.c Sat Jul 8 01:22:48 2000 +++ linux-2.4.9-tune/lib/dec_and_lock.c Fri Aug 17 12:22:49 2001 @@ -1,3 +1,4 @@ +#include #include #include @@ -34,4 +35,6 @@ spin_unlock(lock); return 0; } + +EXPORT_SYMBOL(atomic_dec_and_lock); #endif diff -u --recursive --new-file linux-2.4.9/net/sunrpc/clnt.c linux-2.4.9-tune/net/sunrpc/clnt.c --- linux-2.4.9/net/sunrpc/clnt.c Thu Apr 19 17:38:50 2001 +++ linux-2.4.9-tune/net/sunrpc/clnt.c Thu Aug 30 15:09:00 2001 @@ -55,6 +55,8 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_reconnect(struct rpc_task *task); +static void child_reconnect(struct rpc_task *); +static void child_reconnect_status(struct rpc_task *); static u32 * call_header(struct rpc_task *task); static u32 * call_verify(struct rpc_task *task); @@ -526,6 +528,7 @@ call_reconnect(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_task *child; dprintk("RPC: %4d call_reconnect status %d\n", task->tk_pid, task->tk_status); @@ -533,8 +536,29 @@ task->tk_action = call_transmit; if (task->tk_status < 0 || !clnt->cl_xprt->stream) return; - clnt->cl_stats->netreconn++; + + /* Run as a child to ensure it runs as an rpciod task */ + child = rpc_new_child(clnt, task); + if (child) { + child->tk_action = child_reconnect; + rpc_run_child(task, child, NULL); + } +} + +static void child_reconnect(struct rpc_task *task) +{ + task->tk_client->cl_stats->netreconn++; + task->tk_status = 0; + task->tk_action = child_reconnect_status; xprt_reconnect(task); +} + +static void child_reconnect_status(struct rpc_task *task) +{ + if (task->tk_status == -EAGAIN) + task->tk_action = child_reconnect; + else + task->tk_action = NULL; } /* diff -u --recursive --new-file linux-2.4.9/net/sunrpc/sched.c linux-2.4.9-tune/net/sunrpc/sched.c --- linux-2.4.9/net/sunrpc/sched.c Tue Apr 3 22:45:37 2001 +++ linux-2.4.9-tune/net/sunrpc/sched.c Fri Aug 17 12:29:20 2001 @@ -30,7 +30,7 @@ /* * We give RPC the same get_free_pages priority as NFS */ -#define GFP_RPC GFP_NFS +#define GFP_RPC GFP_NOFS static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); @@ -744,7 +744,7 @@ * for readahead): * * sync user requests: GFP_KERNEL - * async requests: GFP_RPC (== GFP_NFS) + * async requests: GFP_RPC (== GFP_NOFS) * swap requests: GFP_ATOMIC (or new GFP_SWAPPER) */ void * @@ -772,8 +772,8 @@ } if (flags & RPC_TASK_ASYNC) return NULL; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ>>4); + current->policy |= SCHED_YIELD; + schedule(); } while (!signalled()); return NULL; @@ -1072,8 +1072,6 @@ current->pgrp = 1; strcpy(current->comm, "rpciod"); - current->flags |= PF_MEMALLOC; - dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid); while (rpciod_users) { if (signalled()) { @@ -1120,8 +1118,8 @@ __rpc_schedule(); if (all_tasks) { dprintk("rpciod_killall: waiting for tasks to exit\n"); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + current->policy |= SCHED_YIELD; + schedule(); } } @@ -1152,7 +1150,7 @@ /* * Create the rpciod thread and wait for it to start. */ - error = kernel_thread(rpciod, &rpciod_killer, 0); + error = kernel_thread(rpciod, &rpciod_killer, CLONE_SIGNAL); if (error < 0) { printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error); rpciod_users--; @@ -1191,8 +1189,8 @@ * wait briefly before checking the process id. */ current->sigpending = 0; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + current->policy |= SCHED_YIELD; + schedule(); /* * Display a message if we're going to wait longer. */ diff -u --recursive --new-file linux-2.4.9/net/sunrpc/svc.c linux-2.4.9-tune/net/sunrpc/svc.c --- linux-2.4.9/net/sunrpc/svc.c Thu Jun 21 02:42:19 2001 +++ linux-2.4.9-tune/net/sunrpc/svc.c Fri Aug 17 12:29:20 2001 @@ -136,7 +136,7 @@ serv->sv_nrthreads++; rqstp->rq_server = serv; - error = kernel_thread((int (*)(void *)) func, rqstp, 0); + error = kernel_thread((int (*)(void *)) func, rqstp, CLONE_SIGNAL); if (error < 0) goto out_thread; error = 0; diff -u --recursive --new-file linux-2.4.9/net/sunrpc/xprt.c linux-2.4.9-tune/net/sunrpc/xprt.c --- linux-2.4.9/net/sunrpc/xprt.c Wed Aug 15 10:22:18 2001 +++ linux-2.4.9-tune/net/sunrpc/xprt.c Thu Aug 30 15:09:00 2001 @@ -75,10 +75,6 @@ * Local variables */ -/* Spinlock for critical sections in the code. */ -spinlock_t xprt_sock_lock = SPIN_LOCK_UNLOCKED; -spinlock_t xprt_lock = SPIN_LOCK_UNLOCKED; - #ifdef RPC_DEBUG # undef RPC_DEBUG_DATA # define RPCDBG_FACILITY RPCDBG_XPRT @@ -172,6 +168,44 @@ } /* + * Serialize write access to sockets, in order to prevent different + * requests from interfering with each other. + * Also prevents TCP socket reconnections from colliding with writes. + */ +static int +xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) +{ + int retval; + spin_lock_bh(&xprt->sock_lock); + if (!xprt->snd_task) + xprt->snd_task = task; + else if (xprt->snd_task != task) { + dprintk("RPC: %4d TCP write queue full (task %d)\n", + task->tk_pid, xprt->snd_task->tk_pid); + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + rpc_sleep_on(&xprt->sending, task, NULL, NULL); + } + retval = xprt->snd_task == task; + spin_unlock_bh(&xprt->sock_lock); + return retval; +} + +/* + * Releases the socket for use by other requests. + */ +static void +xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) +{ + spin_lock_bh(&xprt->sock_lock); + if (xprt->snd_task == task) { + xprt->snd_task = NULL; + rpc_wake_up_next(&xprt->sending); + } + spin_unlock_bh(&xprt->sock_lock); +} + +/* * Write data to socket. */ static inline int @@ -285,7 +319,10 @@ if (xprt->nocong) return; - spin_lock_bh(&xprt_sock_lock); + /* + * Note: we're in a BH context + */ + spin_lock(&xprt->xprt_lock); cwnd = xprt->cwnd; if (result >= 0) { if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime)) @@ -313,7 +350,7 @@ xprt->cwnd = cwnd; out: - spin_unlock_bh(&xprt_sock_lock); + spin_unlock(&xprt->xprt_lock); } /* @@ -394,6 +431,8 @@ /* * Reconnect a broken TCP connection. + * + * Note: This cannot collide with the TCP reads, as both run from rpciod */ void xprt_reconnect(struct rpc_task *task) @@ -416,15 +455,10 @@ return; } - spin_lock(&xprt_lock); - if (xprt->connecting) { - task->tk_timeout = 0; - rpc_sleep_on(&xprt->reconn, task, NULL, NULL); - spin_unlock(&xprt_lock); + if (!xprt_lock_write(xprt, task)) return; - } - xprt->connecting = 1; - spin_unlock(&xprt_lock); + if (xprt_connected(xprt)) + goto out_write; status = -ENOTCONN; if (!inet) { @@ -439,6 +473,7 @@ /* Reset TCP record info */ xprt->tcp_offset = 0; + xprt->tcp_reclen = 0; xprt->tcp_copied = 0; xprt->tcp_more = 0; @@ -467,24 +502,22 @@ dprintk("RPC: %4d connect status %d connected %d\n", task->tk_pid, status, xprt_connected(xprt)); - spin_lock_bh(&xprt_sock_lock); + spin_lock_bh(&xprt->sock_lock); if (!xprt_connected(xprt)) { task->tk_timeout = xprt->timeout.to_maxval; - rpc_sleep_on(&xprt->reconn, task, xprt_reconn_status, NULL); - spin_unlock_bh(&xprt_sock_lock); + rpc_sleep_on(&xprt->sending, task, xprt_reconn_status, NULL); + spin_unlock_bh(&xprt->sock_lock); return; } - spin_unlock_bh(&xprt_sock_lock); + spin_unlock_bh(&xprt->sock_lock); } defer: - spin_lock(&xprt_lock); - xprt->connecting = 0; if (status < 0) { rpc_delay(task, 5*HZ); task->tk_status = -ENOTCONN; } - rpc_wake_up(&xprt->reconn); - spin_unlock(&xprt_lock); + out_write: + xprt_release_write(xprt, task); } /* @@ -499,10 +532,7 @@ dprintk("RPC: %4d xprt_reconn_timeout %d\n", task->tk_pid, task->tk_status); - spin_lock(&xprt_lock); - xprt->connecting = 0; - rpc_wake_up(&xprt->reconn); - spin_unlock(&xprt_lock); + xprt_release_write(xprt, task); } /* @@ -699,10 +729,6 @@ struct iovec riov; int want, result; - if (xprt->tcp_offset >= xprt->tcp_reclen + sizeof(xprt->tcp_recm)) { - xprt->tcp_offset = 0; - xprt->tcp_reclen = 0; - } if (xprt->tcp_offset >= sizeof(xprt->tcp_recm)) goto done; @@ -718,10 +744,6 @@ want -= result; } while (want); - /* Is this another fragment in the last message */ - if (!xprt->tcp_more) - xprt->tcp_copied = 0; /* No, so we're reading a new message */ - /* Get the record length and mask out the last fragment bit */ xprt->tcp_reclen = ntohl(xprt->tcp_recm); xprt->tcp_more = (xprt->tcp_reclen & 0x80000000) ? 0 : 1; @@ -843,14 +865,15 @@ /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ - if ((result = tcp_read_fraghdr(xprt)) <= 0) + if ((result = tcp_read_fraghdr(xprt)) < 0) return result; avail = result; /* Read in the xid if necessary */ - if ((result = tcp_read_xid(xprt, avail)) <= 0) + if ((result = tcp_read_xid(xprt, avail)) < 0) return result; - avail = result; + if (!(avail = result)) + goto out_ok; /* Find and lock the request corresponding to this xid */ req = xprt_lookup_rqst(xprt, xprt->tcp_xid); @@ -868,9 +891,14 @@ if ((result = tcp_read_discard(xprt, avail)) < 0) return result; + out_ok: dprintk("RPC: tcp_input_record done (off %d reclen %d copied %d)\n", xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_copied); result = xprt->tcp_reclen; + xprt->tcp_reclen = 0; + xprt->tcp_offset = 0; + if (!xprt->tcp_more) + xprt->tcp_copied = 0; return result; } @@ -885,11 +913,19 @@ rpciod_wake_up(); } +int xprt_tcp_pending(void) +{ + int retval; + + spin_lock_bh(&rpc_queue_lock); + retval = !list_empty(&rpc_xprt_pending); + spin_unlock_bh(&rpc_queue_lock); + return retval; +} + static inline void xprt_append_pending(struct rpc_xprt *xprt) { - if (!list_empty(&xprt->rx_pending)) - return; spin_lock_bh(&rpc_queue_lock); if (list_empty(&xprt->rx_pending)) { list_add(&xprt->rx_pending, rpc_xprt_pending.prev); @@ -1003,11 +1039,10 @@ case TCP_ESTABLISHED: if (xprt_test_and_set_connected(xprt)) break; - spin_lock_bh(&xprt_sock_lock); + spin_lock(&xprt->sock_lock); if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) rpc_wake_up_task(xprt->snd_task); - rpc_wake_up(&xprt->reconn); - spin_unlock_bh(&xprt_sock_lock); + spin_unlock(&xprt->sock_lock); break; case TCP_SYN_SENT: case TCP_SYN_RECV: @@ -1041,10 +1076,10 @@ return; if (!xprt_test_and_set_wspace(xprt)) { - spin_lock_bh(&xprt_sock_lock); + spin_lock(&xprt->sock_lock); if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) rpc_wake_up_task(xprt->snd_task); - spin_unlock_bh(&xprt_sock_lock); + spin_unlock(&xprt->sock_lock); } if (test_bit(SOCK_NOSPACE, &sock->flags)) { @@ -1071,10 +1106,10 @@ return; if (!xprt_test_and_set_wspace(xprt)) { - spin_lock_bh(&xprt_sock_lock); + spin_lock(&xprt->sock_lock); if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) rpc_wake_up_task(xprt->snd_task); - spin_unlock_bh(&xprt_sock_lock); + spin_unlock(&xprt->sock_lock); } if (sk->sleep && waitqueue_active(sk->sleep)) @@ -1100,55 +1135,6 @@ rpc_wake_up_task(task); } - -/* - * Serialize access to sockets, in order to prevent different - * requests from interfering with each other. - */ -static int -xprt_down_transmit(struct rpc_task *task) -{ - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - struct rpc_rqst *req = task->tk_rqstp; - - spin_lock_bh(&xprt_sock_lock); - spin_lock(&xprt_lock); - if (xprt->snd_task && xprt->snd_task != task) { - dprintk("RPC: %4d TCP write queue full (task %d)\n", - task->tk_pid, xprt->snd_task->tk_pid); - task->tk_timeout = 0; - task->tk_status = -EAGAIN; - rpc_sleep_on(&xprt->sending, task, NULL, NULL); - } else if (!xprt->snd_task) { - xprt->snd_task = task; -#ifdef RPC_PROFILE - req->rq_xtime = jiffies; -#endif - req->rq_bytes_sent = 0; - } - spin_unlock(&xprt_lock); - spin_unlock_bh(&xprt_sock_lock); - return xprt->snd_task == task; -} - -/* - * Releases the socket for use by other requests. - */ -static inline void -xprt_up_transmit(struct rpc_task *task) -{ - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - - if (xprt->snd_task && xprt->snd_task == task) { - spin_lock_bh(&xprt_sock_lock); - spin_lock(&xprt_lock); - xprt->snd_task = NULL; - rpc_wake_up_next(&xprt->sending); - spin_unlock(&xprt_lock); - spin_unlock_bh(&xprt_sock_lock); - } -} - /* * Place the actual RPC call. * We have to copy the iovec because sendmsg fiddles with its contents. @@ -1182,9 +1168,12 @@ *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); } - if (!xprt_down_transmit(task)) + if (!xprt_lock_write(xprt, task)) return; +#ifdef RPC_PROFILE + req->rq_xtime = jiffies; +#endif do_xprt_transmit(task); } @@ -1252,12 +1241,12 @@ switch (status) { case -ENOMEM: /* Protect against (udp|tcp)_write_space */ - spin_lock_bh(&xprt_sock_lock); + spin_lock_bh(&xprt->sock_lock); if (!xprt_wspace(xprt)) { task->tk_timeout = req->rq_timeout.to_current; rpc_sleep_on(&xprt->sending, task, NULL, NULL); } - spin_unlock_bh(&xprt_sock_lock); + spin_unlock_bh(&xprt->sock_lock); return; case -EAGAIN: /* Keep holding the socket if it is blocked */ @@ -1268,6 +1257,9 @@ if (!xprt->stream) return; default: + if (xprt->stream) + xprt_disconnect(xprt); + req->rq_bytes_sent = 0; goto out_release; } @@ -1278,7 +1270,7 @@ rpc_add_timer(task, xprt_timer); rpc_unlock_task(task); out_release: - xprt_up_transmit(task); + xprt_release_write(xprt, task); } /* @@ -1313,7 +1305,7 @@ dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n", task->tk_pid, xprt->cong, xprt->cwnd); - spin_lock_bh(&xprt_sock_lock); + spin_lock_bh(&xprt->xprt_lock); xprt_reserve_status(task); if (task->tk_rqstp) { task->tk_timeout = 0; @@ -1324,7 +1316,7 @@ task->tk_status = -EAGAIN; rpc_sleep_on(&xprt->backlog, task, NULL, NULL); } - spin_unlock_bh(&xprt_sock_lock); + spin_unlock_bh(&xprt->xprt_lock); dprintk("RPC: %4d xprt_reserve returns %d\n", task->tk_pid, task->tk_status); return task->tk_status; @@ -1397,7 +1389,11 @@ struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; - xprt_up_transmit(task); + if (xprt->snd_task == task) { + if (xprt->stream) + xprt_disconnect(xprt); + xprt_release_write(xprt, task); + } if (!(req = task->tk_rqstp)) return; task->tk_rqstp = NULL; @@ -1411,7 +1407,7 @@ rpc_remove_wait_queue(task); } - spin_lock_bh(&xprt_sock_lock); + spin_lock_bh(&xprt->xprt_lock); req->rq_next = xprt->free; xprt->free = req; @@ -1419,7 +1415,7 @@ xprt->cong -= RPC_CWNDSCALE; xprt_clear_backlog(xprt); - spin_unlock_bh(&xprt_sock_lock); + spin_unlock_bh(&xprt->xprt_lock); } /* @@ -1476,6 +1472,8 @@ } else xprt->cwnd = RPC_INITCWND; xprt->congtime = jiffies; + spin_lock_init(&xprt->sock_lock); + spin_lock_init(&xprt->xprt_lock); init_waitqueue_head(&xprt->cong_wait); /* Set timeout parameters */ @@ -1489,7 +1487,6 @@ xprt->pending = RPC_INIT_WAITQ("xprt_pending"); xprt->sending = RPC_INIT_WAITQ("xprt_sending"); xprt->backlog = RPC_INIT_WAITQ("xprt_backlog"); - xprt->reconn = RPC_INIT_WAITQ("xprt_reconn"); /* initialize free list */ for (i = 0, req = xprt->slot; i < RPC_MAXREQS-1; i++, req++) @@ -1625,7 +1622,6 @@ rpc_wake_up(&xprt->sending); rpc_wake_up(&xprt->pending); rpc_wake_up(&xprt->backlog); - rpc_wake_up(&xprt->reconn); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait); }