All of the above --- Documentation/filesystems/caching/backend-api.txt | 357 +++++ Documentation/filesystems/caching/cachefiles.txt | 281 ++++ Documentation/filesystems/caching/fscache.txt | 151 ++ Documentation/filesystems/caching/netfs-api.txt | 752 +++++++++++ Documentation/sysctl/fs.txt | 6 fs/Kconfig | 49 + fs/Makefile | 2 fs/afs/cache.h | 27 fs/afs/cell.c | 109 +- fs/afs/cell.h | 16 fs/afs/cmservice.c | 2 fs/afs/dir.c | 15 fs/afs/file.c | 246 ++-- fs/afs/fsclient.c | 4 fs/afs/inode.c | 45 - fs/afs/internal.h | 25 fs/afs/main.c | 24 fs/afs/mntpt.c | 12 fs/afs/proc.c | 1 fs/afs/server.c | 3 fs/afs/vlocation.c | 179 ++- fs/afs/vnode.c | 248 +++- fs/afs/vnode.h | 10 fs/afs/volume.c | 78 - fs/afs/volume.h | 28 fs/autofs4/autofs_i.h | 3 fs/autofs4/init.c | 2 fs/autofs4/inode.c | 22 fs/autofs4/waitq.c | 1 fs/buffer.c | 2 fs/cachefiles/Makefile | 18 fs/cachefiles/cf-bind.c | 283 ++++ fs/cachefiles/cf-interface.c | 1315 +++++++++++++++++++ fs/cachefiles/cf-key.c | 160 ++ fs/cachefiles/cf-main.c | 131 ++ fs/cachefiles/cf-namei.c | 837 ++++++++++++ fs/cachefiles/cf-proc.c | 510 +++++++ fs/cachefiles/cf-sysctl.c | 69 + fs/cachefiles/cf-xattr.c | 299 ++++ fs/cachefiles/internal.h | 308 ++++ fs/dcache.c | 297 ++++ fs/ext2/dir.c | 6 fs/ext3/inode.c | 10 fs/fcntl.c | 2 fs/file_table.c | 49 + fs/freevxfs/vxfs_subr.c | 2 fs/fscache/Makefile | 11 fs/fscache/cookie.c | 1063 ++++++++++++++++ fs/fscache/fscache-int.h | 93 + fs/fscache/fsdef.c | 113 ++ fs/fscache/main.c | 105 ++ fs/fscache/page.c | 548 ++++++++ fs/nfs/Makefile | 7 fs/nfs/callback.c | 31 fs/nfs/callback.h | 7 fs/nfs/callback_proc.c | 13 fs/nfs/client.c | 1439 +++++++++++++++++++++ fs/nfs/delegation.c | 35 - fs/nfs/delegation.h | 10 fs/nfs/dir.c | 18 fs/nfs/file.c | 35 - fs/nfs/fscache.c | 349 +++++ fs/nfs/fscache.h | 466 +++++++ fs/nfs/getroot.c | 306 ++++ fs/nfs/idmap.c | 39 - fs/nfs/inode.c | 35 - fs/nfs/internal.h | 134 +- fs/nfs/namespace.c | 33 fs/nfs/nfs3proc.c | 8 fs/nfs/nfs4_fs.h | 78 - fs/nfs/nfs4namespace.c | 118 +- fs/nfs/nfs4proc.c | 165 +- fs/nfs/nfs4renewd.c | 19 fs/nfs/nfs4state.c | 174 --- fs/nfs/nfs4xdr.c | 42 - fs/nfs/pagelist.c | 3 fs/nfs/proc.c | 4 fs/nfs/read.c | 32 fs/nfs/super.c | 1400 +++++++------------- fs/nfs/sysctl.c | 43 + fs/nfs/write.c | 13 fs/open.c | 20 fs/reiserfs/inode.c | 10 fs/super.c | 12 fs/ufs/dir.c | 6 include/linux/dcache.h | 2 include/linux/file.h | 1 include/linux/fs.h | 10 include/linux/fscache-cache.h | 243 ++++ include/linux/fscache.h | 496 +++++++ include/linux/nfs4_mount.h | 1 include/linux/nfs_fs.h | 8 include/linux/nfs_fs_sb.h | 94 + include/linux/nfs_idmap.h | 14 include/linux/nfs_mount.h | 1 include/linux/nfs_xdr.h | 4 include/linux/page-flags.h | 15 include/linux/pagemap.h | 17 include/linux/sysctl.h | 1 kernel/auditsc.c | 2 kernel/sysctl.c | 11 mm/filemap.c | 120 ++ mm/migrate.c | 4 mm/page_alloc.c | 2 mm/readahead.c | 25 105 files changed, 13153 insertions(+), 1951 deletions(-) diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt new file mode 100644 index 0000000..1dd601b --- /dev/null +++ b/Documentation/filesystems/caching/backend-api.txt @@ -0,0 +1,357 @@ + ========================== + FS-CACHE CACHE BACKEND API + ========================== + +The FS-Cache system provides an API by which actual caches can be supplied to +FS-Cache for it to then serve out to network filesystems and other interested +parties. + +This API is declared in . + + +==================================== +INITIALISING AND REGISTERING A CACHE +==================================== + +To start off, a cache definition must be initialised and registered for each +cache the backend wants to make available. For instance, CacheFS does this in +the fill_super() operation on mounting. + +The cache definition (struct fscache_cache) should be initialised by calling: + + void fscache_init_cache(struct fscache_cache *cache, + struct fscache_cache_ops *ops, + const char *idfmt, + ...) + +Where: + + (*) "cache" is a pointer to the cache definition; + + (*) "ops" is a pointer to the table of operations that the backend supports on + this cache; + + (*) and a format and printf-style arguments for constructing a label for the + cache. + + +The cache should then be registered with FS-Cache by passing a pointer to the +previously initialised cache definition to: + + int fscache_add_cache(struct fscache_cache *cache, + struct fscache_object *fsdef, + const char *tagname); + +Two extra arguments should also be supplied: + + (*) "fsdef" which should point to the object representation for the FS-Cache + master index in this cache. Netfs primary index entries will be created + here. + + (*) "tagname" which, if given, should be a text string naming this cache. If + this is NULL, the identifier will be used instead. For CacheFS, the + identifier is set to name the underlying block device and the tag can be + supplied by mount. + +This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag +is already in use. 0 will be returned on success. + + +===================== +UNREGISTERING A CACHE +===================== + +A cache can be withdrawn from the system by calling this function with a +pointer to the cache definition: + + void fscache_withdraw_cache(struct fscache_cache *cache) + +In CacheFS's case, this is called by put_super(). + + +================== +FS-CACHE UTILITIES +================== + +FS-Cache provides some utilities that a cache backend may make use of: + + (*) Find the parent of an object: + + struct fscache_object * + fscache_find_parent_object(struct fscache_object *object) + + This allows a backend to find the logical parent of an index or data file + in the cache hierarchy. + + (*) Note occurrence of an I/O error in a cache: + + void fscache_io_error(struct fscache_cache *cache) + + This tells FS-Cache that an I/O error occurred in the cache. After this + has been called, only resource dissociation operations (object and page + release) will be passed from the netfs to the cache backend for the + specified cache. + + This does not actually withdraw the cache. That must be done separately. + + (*) Get an extra reference to a read or write context: + + void *fscache_get_context(struct fscache_cookie *cookie, void *context) + + and release a reference: + + void *fscache_put_context(struct fscache_cookie *cookie, void *context) + + These should be used to maintain the presence of the read or write context + passed to the cache read/write functions. This context must then be + passed to the I/O completion function. + + +======================== +RELEVANT DATA STRUCTURES +======================== + + (*) Index/Data file FS-Cache representation cookie: + + struct fscache_cookie { + struct fscache_object_def *def; + struct fscache_netfs *netfs; + void *netfs_data; + ... + }; + + The fields that might be of use to the backend describe the object + definition, the netfs definition and the netfs's data for this cookie. + The object definition contain functions supplied by the netfs for loading + and matching index entries; these are required to provide some of the + cache operations. + + (*) In-cache object representation: + + struct fscache_object { + struct fscache_cache *cache; + struct fscache_cookie *cookie; + unsigned long flags; + #define FSCACHE_OBJECT_RECYCLING 1 + ... + }; + + Structures of this type should be allocated by the cache backend and + passed to FS-Cache when requested by the appropriate cache operation. In + the case of CacheFS, they're embedded in CacheFS's internal object + structures. + + Each object contains a pointer to the cookie that represents the object it + is backing. It also contains a flag that indicates whether the object is + being retired when put_object() is called. This should be initialised by + calling fscache_object_init(object). + + +================ +CACHE OPERATIONS +================ + +The cache backend provides FS-Cache with a table of operations that can be +performed on the denizens of the cache. These are held in a structure of type: + + struct fscache_cache_ops + + (*) Name of cache provider [mandatory]: + + const char *name + + This isn't strictly an operation, but should be pointed at a string naming + the backend. + + (*) Object lookup [mandatory]: + + struct fscache_object *(*lookup_object)(struct fscache_cache *cache, + struct fscache_object *parent, + struct fscache_cookie *cookie) + + This method is used to look up an object in the specified cache, given a + pointer to the parent object and the cookie to which the object will be + attached. This should instantiate that object in the cache if it can, or + return -ENOBUFS or -ENOMEM if it can't. + + (*) Increment object refcount [mandatory]: + + struct fscache_object *(*grab_object)(struct fscache_object *object) + + This method is called to increment the reference count on an object. It + may fail (for instance if the cache is being withdrawn) by returning NULL. + It should return the object pointer if successful. + + (*) Lock/Unlock object [mandatory]: + + void (*lock_object)(struct fscache_object *object) + void (*unlock_object)(struct fscache_object *object) + + These methods are used to exclusively lock an object. It must be possible + to schedule with the lock held, so a spinlock isn't sufficient. + + (*) Pin/Unpin object [optional]: + + int (*pin_object)(struct fscache_object *object) + void (*unpin_object)(struct fscache_object *object) + + These methods are used to pin an object into the cache. Once pinned an + object cannot be reclaimed to make space. Return -ENOSPC if there's not + enough space in the cache to permit this. + + (*) Update object [mandatory]: + + int (*update_object)(struct fscache_object *object) + + This is called to update the index entry for the specified object. The + new information should be in object->cookie->netfs_data. This can be + obtained by calling object->cookie->def->get_aux()/get_attr(). + + (*) Release object reference [mandatory]: + + void (*put_object)(struct fscache_object *object) + + This method is used to discard a reference to an object. The object may + be destroyed when all the references held by FS-Cache are released. + + (*) Synchronise a cache [mandatory]: + + void (*sync)(struct fscache_cache *cache) + + This is called to ask the backend to synchronise a cache with its backing + device. + + (*) Dissociate a cache [mandatory]: + + void (*dissociate_pages)(struct fscache_cache *cache) + + This is called to ask a cache to perform any page dissociations as part of + cache withdrawal. + + (*) Set the data size on a cache file [mandatory]: + + int (*set_i_size)(struct fscache_object *object, loff_t i_size); + + This is called to indicate to the cache the maximum size a file may reach. + The cache may use this to reserve space on the cache. It may also return + -ENOBUFS to indicate that insufficient space is available to expand the + metadata used to track the data. It should return 0 if successful or + -ENOMEM or -EIO on error. + + (*) Reserve cache space for an object's data [optional]: + + int (*reserve_space)(struct fscache_object *object, loff_t size); + + This is called to request that cache space be reserved to hold the data + for an object and the metadata used to track it. Zero size should be + taken as request to cancel a reservation. + + This should return 0 if successful, -ENOSPC if there isn't enough space + available, or -ENOMEM or -EIO on other errors. + + The reservation may exceed the size of the object, thus permitting future + expansion. If the amount of space consumed by an object would exceed the + reservation, it's permitted to refuse requests to allocate pages, but not + required. An object may be pruned down to its reservation size if larger + than that already. + + (*) Request page be read from cache [mandatory]: + + int (*read_or_alloc_page)(struct fscache_object *object, + struct page *page, + fscache_rw_complete_t end_io_func, + void *end_io_data, + gfp_t gfp) + + This is called to attempt to read a netfs page from the cache, or to + reserve a backing block if not. FS-Cache will have done as much checking + as it can before calling, but most of the work belongs to the backend. + + If there's no page in the cache, then -ENODATA should be returned if the + backend managed to reserve a backing block; -ENOBUFS, -ENOMEM or -EIO if + it didn't. + + If there is a page in the cache, then a read operation should be queued + and 0 returned. When the read finishes, end_io_func() should be called + with the following arguments: + + (*end_io_func)(object->cookie->netfs_data, + page, + end_io_data, + error); + + The mark_pages_cached() cookie operation should be called for the page if + any cache metadata is retained. This will indicate to the netfs that the + page needs explicit uncaching. This operation takes a pagevec, thus + allowing several pages to be marked at once. + + (*) Request pages be read from cache [mandatory]: + + int (*read_or_alloc_pages)(struct fscache_object *object, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages, + fscache_rw_complete_t end_io_func, + void *end_io_data, + gfp_t gfp) + + This is like the previous operation, except it will be handed a list of + pages instead of one page. Any pages on which a read operation is started + must be added to the page cache for the specified mapping and also to the + LRU. Such pages must also be removed from the pages list and nr_pages + decremented per page. + + If there was an error such as -ENOMEM, then that should be returned; else + if one or more pages couldn't be read or allocated, then -ENOBUFS should + be returned; else if one or more pages couldn't be read, then -ENODATA + should be returned. If all the pages are dispatched then 0 should be + returned. + + (*) Request page be allocated in the cache [mandatory]: + + int (*allocate_page)(struct fscache_object *object, + struct page *page, + gfp_t gfp) + + This is like read_or_alloc_page(), except that it shouldn't read from the + cache, even if there's data there that could be retrieved. It should, + however, set up any internal metadata required such that write_page() can + write to the cache. + + If there's no backing block available, then -ENOBUFS should be returned + (or -ENOMEM or -EIO if there were other problems). If a block is + successfully allocated, then the netfs page should be marked and 0 + returned. + + (*) Request page be written to cache [mandatory]: + + int (*write_page)(struct fscache_object *object, + struct page *page, + fscache_rw_complete_t end_io_func, + void *end_io_data, + gfp_t gfp) + + This is called to write from a page on which there was a previously + successful read_or_alloc_page() call. FS-Cache filters out pages that + don't have mappings. + + If there's no backing block available, then -ENOBUFS should be returned + (or -ENOMEM or -EIO if there were other problems). + + If the write operation could be queued, then 0 should be returned. When + the write completes, end_io_func() should be called with the following + arguments: + + (*end_io_func)(object->cookie->netfs_data, + page, + end_io_data, + error); + + (*) Discard retained per-page metadata [mandatory]: + + void (*uncache_pages)(struct fscache_object *object, + struct pagevec *pagevec) + + This is called when one or more netfs pages are being evicted from the + pagecache. The cache backend should tear down any internal representation + or tracking it maintains. diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt new file mode 100644 index 0000000..37b6385 --- /dev/null +++ b/Documentation/filesystems/caching/cachefiles.txt @@ -0,0 +1,281 @@ + =============================================== + CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM + =============================================== + +Contents: + + (*) Overview. + + (*) Requirements. + + (*) Configuration. + + (*) Starting the cache. + + (*) Things to avoid. + + +======== +OVERVIEW +======== + +CacheFiles is a caching backend that's meant to use as a cache a directory on +an already mounted filesystem of a local type (such as Ext3). + +CacheFiles uses a userspace daemon to do some of the cache management - such as +reaping stale nodes and culling. This is called cachefilesd and lives in +/sbin. + +The filesystem and data integrity of the cache are only as good as those of the +filesystem providing the backing services. Note that CacheFiles does not +attempt to journal anything since the journalling interfaces of the various +filesystems are very specific in nature. + +CacheFiles creates a proc-file - "/proc/fs/cachefiles" - that is used for +communication with the daemon. Only one thing may have this open at once, and +whilst it is open, a cache is at least partially in existence. The daemon +opens this and sends commands down it to control the cache. + +CacheFiles is currently limited to a single cache. + +CacheFiles attempts to maintain at least a certain percentage of free space on +the filesystem, shrinking the cache by culling the objects it contains to make +space if necessary - see the "Cache Culling" section. This means it can be +placed on the same medium as a live set of data, and will expand to make use of +spare space and automatically contract when the set of data requires more +space. + + +============ +REQUIREMENTS +============ + +The use of CacheFiles and its daemon requires the following features to be +available in the system and in the cache filesystem: + + - dnotify. + + - extended attributes (xattrs). + + - openat() and friends. + + - bmap() support on files in the filesystem (FIBMAP ioctl). + + - The use of bmap() to detect a partial page at the end of the file. + +It is strongly recommended that the "dir_index" option is enabled on Ext3 +filesystems being used as a cache. + + +============= +CONFIGURATION +============= + +The cache is configured by a script in /etc/cachefilesd.conf. These commands +set up cache ready for use. The following script commands are available: + + (*) brun % + (*) bcull % + (*) bstop % + + Configure the culling limits. Optional. See the section on culling + The defaults are 7%, 5% and 1% respectively. + + (*) dir + + Specify the directory containing the root of the cache. Mandatory. + + (*) tag + + Specify a tag to FS-Cache to use in distinguishing multiple caches. + Optional. The default is "CacheFiles". + + (*) debug + + Specify a numeric bitmask to control debugging in the kernel module. + Optional. The default is zero (all off). The following values can be + OR'd into the mask to collect various information: + + 1 Turn on trace of function entry (_enter() macros) + 2 Turn on trace of function exit (_leave() macros) + 4 Turn on trace of internal debug points (_debug()) + + This mask can also be set through /proc/sys/fs/cachefiles/debug. + + +================== +STARTING THE CACHE +================== + +The cache is started by running the daemon. The daemon opens the cache proc +file, configures the cache and tells it to begin caching. At that point the +cache binds to fscache and the cache becomes live. + +The daemon is run as follows: + + /sbin/cachefilesd [-d]* [-s] [-n] [-f ] + +The flags are: + + (*) -d + + Increase the debugging level. This can be specified multiple times and + is cumulative with itself. + + (*) -s + + Send messages to stderr instead of syslog. + + (*) -n + + Don't daemonise and go into background. + + (*) -f + + Use an alternative configuration file rather than the default one. + + +=============== +THINGS TO AVOID +=============== + +Do not mount other things within the cache as this will cause problems. The +kernel module contains its own very cut-down path walking facility that ignores +mountpoints, but the daemon can't avoid them. + +Do not create, rename or unlink files and directories in the cache whilst the +cache is active, as this may cause the state to become uncertain. + +Renaming files in the cache might make objects appear to be other objects (the +filename is part of the lookup key). + +Do not change or remove the extended attributes attached to cache files by the +cache as this will cause the cache state management to get confused. + +Do not create files or directories in the cache, lest the cache get confused or +serve incorrect data. + +Do not chmod files in the cache. The module creates things with minimal +permissions to prevent random users being able to access them directly. + + +============= +CACHE CULLING +============= + +The cache may need culling occasionally to make space. This involves +discarding objects from the cache that have been used less recently than +anything else. Culling is based on the access time of data objects. Empty +directories are culled if not in use. + +Cache culling is done on the basis of the percentage of blocks available in the +underlying filesystem. There are three "limits": + + (*) brun + + If the amount of available space in the cache rises above this limit, then + culling is turned off. + + (*) bcull + + If the amount of available space in the cache falls below this limit, then + culling is started. + + (*) bstop + + If the amount of available space in the cache falls below this limit, then + no further allocation of disk space is permitted until culling has raised + the amount above this limit again. + +These must be configured thusly: + + 0 <= bstop < bcull < brun < 100 + +Note that these are percentages of available space, and do _not_ appear as 100 +minus the percentage displayed by the "df" program. + +The userspace daemon scans the cache to build up a table of cullable objects. +These are then culled in least recently used order. A new scan of the cache is +started as soon as space is made in the table. Objects will be skipped if +their atimes have changed or if the kernel module says it is still using them. + + +=============== +CACHE STRUCTURE +=============== + +The CacheFiles module will create two directories in the directory it was +given: + + (*) cache/ + + (*) graveyard/ + +The active cache objects all reside in the first directory. The CacheFiles +kernel module moves any retired or culled objects that it can't simply unlink +to the graveyard from which the daemon will actually delete them. + +The daemon uses dnotify to monitor the graveyard directory, and will delete +anything that appears therein. + + +The module represents index objects as directories with the filename "I..." or +"J...". Note that the "cache/" directory is itself a special index. + +Data objects are represented as files if they have no children, or directories +if they do. Their filenames all begin "D..." or "E...". If represented as a +directory, data objects will have a file in the directory called "data" that +actually holds the data. + +Special objects are similar to data objects, except their filenames begin +"S..." or "T...". + + +If an object has children, then it will be represented as a directory. +Immediately in the representative directory are a collection of directories +named for hash values of the child object keys with an '@' prepended. Into +this directory, if possible, will be placed the representations of the child +objects: + + INDEX INDEX INDEX DATA FILES + ========= ========== ================================= ================ + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400 + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry + cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry + + +If the key is so long that it exceeds NAME_MAX with the decorations added on to +it, then it will be cut into pieces, the first few of which will be used to +make a nest of directories, and the last one of which will be the objects +inside the last directory. The names of the intermediate directories will have +'+' prepended: + + J1223/@23/+xy...z/+kl...m/Epqr + + +Note that keys are raw data, and not only may they exceed NAME_MAX in size, +they may also contain things like '/' and NUL characters, and so they may not +be suitable for turning directly into a filename. + +To handle this, CacheFiles will use a suitably printable filename directly and +"base-64" encode ones that aren't directly suitable. The two versions of +object filenames indicate the encoding: + + OBJECT TYPE PRINTABLE ENCODED + =============== =============== =============== + Index "I..." "J..." + Data "D..." "E..." + Special "S..." "T..." + +Intermediate directories are always "@" or "+" as appropriate. + + +Each object in the cache has an extended attribute label that holds the object +type ID (required to distinguish special objects) and the auxiliary data from +the netfs. The latter is used to detect stale objects in the cache and update +or retire them. + + +Note that CacheFiles will erase from the cache any file it doesn't recognise or +any file of an incorrect type (such as a FIFO file or a device file). diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt new file mode 100644 index 0000000..82c3168 --- /dev/null +++ b/Documentation/filesystems/caching/fscache.txt @@ -0,0 +1,151 @@ + ========================== + General Filesystem Caching + ========================== + +======== +OVERVIEW +======== + +This facility is a general purpose cache for network filesystems, though it +could be used for caching other things such as ISO9660 filesystems too. + +FS-Cache mediates between cache backends (such as CacheFS) and network +filesystems: + + +---------+ + | | +--------------+ + | NFS |--+ | | + | | | +-->| CacheFS | + +---------+ | +----------+ | | /dev/hda5 | + | | | | +--------------+ + +---------+ +-->| | | + | | | |--+ + | AFS |----->| FS-Cache | + | | | |--+ + +---------+ +-->| | | + | | | | +--------------+ + +---------+ | +----------+ | | | + | | | +-->| CacheFiles | + | ISOFS |--+ | /var/cache | + | | +--------------+ + +---------+ + + +FS-Cache does not follow the idea of completely loading every netfs file +opened in its entirety into a cache before permitting it to be accessed and +then serving the pages out of that cache rather than the netfs inode because: + + (1) It must be practical to operate without a cache. + + (2) The size of any accessible file must not be limited to the size of the + cache. + + (3) The combined size of all opened files (this includes mapped libraries) + must not be limited to the size of the cache. + + (4) The user should not be forced to download an entire file just to do a + one-off access of a small portion of it (such as might be done with the + "file" program). + +It instead serves the cache out in PAGE_SIZE chunks as and when requested by +the netfs('s) using it. + + +FS-Cache provides the following facilities: + + (1) More than one cache can be used at once. Caches can be selected + explicitly by use of tags. + + (2) Caches can be added / removed at any time. + + (3) The netfs is provided with an interface that allows either party to + withdraw caching facilities from a file (required for (2)). + + (4) The interface to the netfs returns as few errors as possible, preferring + rather to let the netfs remain oblivious. + + (5) Cookies are used to represent indices, files and other objects to the + netfs. The simplest cookie is just a NULL pointer - indicating nothing + cached there. + + (6) The netfs is allowed to propose - dynamically - any index hierarchy it + desires, though it must be aware that the index search function is + recursive, stack space is limited, and indices can only be children of + indices. + + (7) Data I/O is done direct to and from the netfs's pages. The netfs + indicates that page A is at index B of the data-file represented by cookie + C, and that it should be read or written. The cache backend may or may + not start I/O on that page, but if it does, a netfs callback will be + invoked to indicate completion. The I/O may be either synchronous or + asynchronous. + + (8) Cookies can be "retired" upon release. At this point FS-Cache will mark + them as obsolete and the index hierarchy rooted at that point will get + recycled. + + (9) The netfs provides a "match" function for index searches. In addition to + saying whether a match was made or not, this can also specify that an + entry should be updated or deleted. + + +FS-Cache maintains a virtual indexing tree in which all indices, files, objects +and pages are kept. Bits of this tree may actually reside in one or more +caches. + + FSDEF + | + +------------------------------------+ + | | + NFS AFS + | | + +--------------------------+ +-----------+ + | | | | + homedir mirror afs.org redhat.com + | | | + +------------+ +---------------+ +----------+ + | | | | | | + 00001 00002 00007 00125 vol00001 vol00002 + | | | | | + +---+---+ +-----+ +---+ +------+------+ +-----+----+ + | | | | | | | | | | | | | +PG0 PG1 PG2 PG0 XATTR PG0 PG1 DIRENT DIRENT DIRENT R/W R/O Bak + | | + PG0 +-------+ + | | + 00001 00003 + | + +---+---+ + | | | + PG0 PG1 PG2 + +In the example above, you can see two netfs's being backed: NFS and AFS. These +have different index hierarchies: + + (*) The NFS primary index contains per-server indices. Each server index is + indexed by NFS file handles to get data file objects. Each data file + objects can have an array of pages, but may also have further child + objects, such as extended attributes and directory entries. Extended + attribute objects themselves have page-array contents. + + (*) The AFS primary index contains per-cell indices. Each cell index contains + per-logical-volume indices. Each of volume index contains up to three + indices for the read-write, read-only and backup mirrors of those volumes. + Each of these contains vnode data file objects, each of which contains an + array of pages. + +The very top index is the FS-Cache master index in which individual netfs's +have entries. + +Any index object may reside in more than one cache, provided it only has index +children. Any index with non-index object children will be assumed to only +reside in one cache. + + +The netfs API to FS-Cache can be found in: + + Documentation/filesystems/caching/netfs-api.txt + +The cache backend API to FS-Cache can be found in: + + Documentation/filesystems/caching/backend-api.txt diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt new file mode 100644 index 0000000..a1a182b --- /dev/null +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -0,0 +1,752 @@ + =============================== + FS-CACHE NETWORK FILESYSTEM API + =============================== + +There's an API by which a network filesystem can make use of the FS-Cache +facilities. This is based around a number of principles: + + (1) Caches can store a number of different object types. There are two main + object types: indices and files. The first is a special type used by + FS-Cache to make finding objects faster and to make retiring of groups of + objects easier. + + (2) Every index, file or other object is represented by a cookie. This cookie + may or may not have anything associated with it, but the netfs doesn't + need to care. + + (3) Barring the top-level index (one entry per cached netfs), the index + hierarchy for each netfs is structured according the whim of the netfs. + +This API is declared in . + +This document contains the following sections: + + (1) Network filesystem definition + (2) Index definition + (3) Object definition + (4) Network filesystem (un)registration + (5) Cache tag lookup + (6) Index registration + (7) Data file registration + (8) Miscellaneous object registration + (9) Setting the data file size + (10) Page alloc/read/write + (11) Page uncaching + (12) Index and data file update + (13) Miscellaneous cookie operations + (14) Cookie unregistration + (15) Index and data file invalidation + + +============================= +NETWORK FILESYSTEM DEFINITION +============================= + +FS-Cache needs a description of the network filesystem. This is specified +using a record of the following structure: + + struct fscache_netfs { + uint32_t version; + const char *name; + struct fscache_netfs_operations *ops; + struct fscache_cookie *primary_index; + ... + }; + +This first three fields should be filled in before registration, and the fourth +will be filled in by the registration function; any other fields should just be +ignored and are for internal use only. + +The fields are: + + (1) The name of the netfs (used as the key in the toplevel index). + + (2) The version of the netfs (if the name matches but the version doesn't, the + entire in-cache hierarchy for this netfs will be scrapped and begun + afresh). + + (3) The operations table is defined as follows: + + struct fscache_netfs_operations { + }; + + Currently there aren't any functions here. + + (4) The cookie representing the primary index will be allocated according to + another parameter passed into the registration function. + +For example, kAFS (linux/fs/afs/) uses the following definitions to describe +itself: + + static struct fscache_netfs_operations afs_cache_ops = { + }; + + struct fscache_netfs afs_cache_netfs = { + .version = 0, + .name = "afs", + .ops = &afs_cache_ops, + }; + + +================ +INDEX DEFINITION +================ + +Indices are used for two purposes: + + (1) To aid the finding of a file based on a series of keys (such as AFS's + "cell", "volume ID", "vnode ID"). + + (2) To make it easier to discard a subset of all the files cached based around + a particular key - for instance to mirror the removal of an AFS volume. + +However, since it's unlikely that any two netfs's are going to want to define +their index hierarchies in quite the same way, FS-Cache tries to impose as few +restraints as possible on how an index is structured and where it is placed in +the tree. The netfs can even mix indices and data files at the same level, but +it's not recommended. + +Each index entry consists of a key of indeterminate length plus some auxilliary +data, also of indeterminate length. + +There are some limits on indices: + + (1) Any index containing non-index objects should be restricted to a single + cache. Any such objects created within an index will be created in the + first cache only. The cache in which an index is created can be + controlled by cache tags (see below). + + (2) The entry data must be atomically journallable, so it is limited to about + 400 bytes at present. At least 400 bytes will be available. + + (3) The depth of the index tree should be judged with care as the search + function is recursive. Too many layers will run the kernel out of stack. + + +================= +OBJECT DEFINITION +================= + +To define an object, a structure of the following type should be filled out: + + struct fscache_object_def + { + uint8_t name[16]; + uint8_t type; + + struct fscache_cache_tag *(*select_cache)( + const void *parent_netfs_data, + const void *cookie_netfs_data); + + uint16_t (*get_key)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + void (*get_attr)(const void *cookie_netfs_data, + uint64_t *size); + + uint16_t (*get_aux)(const void *cookie_netfs_data, + void *buffer, + uint16_t bufmax); + + fscache_checkaux_t (*check_aux)(void *cookie_netfs_data, + const void *data, + uint16_t datalen); + + void (*get_context)(void *cookie_netfs_data, void *context); + + void (*put_context)(void *cookie_netfs_data, void *context); + + void (*mark_pages_cached)(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec); + + void (*now_uncached)(void *cookie_netfs_data); + }; + +This has the following fields: + + (1) The type of the object [mandatory]. + + This is one of the following values: + + (*) FSCACHE_COOKIE_TYPE_INDEX + + This defines an index, which is a special FS-Cache type. + + (*) FSCACHE_COOKIE_TYPE_DATAFILE + + This defines an ordinary data file. + + (*) Any other value between 2 and 255 + + This defines an extraordinary object such as an XATTR. + + (2) The name of the object type (NUL terminated unless all 16 chars are used) + [optional]. + + (3) A function to select the cache in which to store an index [optional]. + + This function is invoked when an index needs to be instantiated in a cache + during the instantiation of a non-index object. Only the immediate index + parent for the non-index object will be queried. Any indices above that + in the hierarchy may be stored in multiple caches. This function does not + need to be supplied for any non-index object or any index that will only + have index children. + + If this function is not supplied or if it returns NULL then the first + cache in the parent's list will be chosed, or failing that, the first + cache in the master list. + + (4) A function to retrieve an object's key from the netfs [mandatory]. + + This function will be called with the netfs data that was passed to the + cookie acquisition function and the maximum length of key data that it may + provide. It should write the required key data into the given buffer and + return the quantity it wrote. + + (5) A function to retrieve attribute data from the netfs [optional]. + + This function will be called with the netfs data that was passed to the + cookie acquisition function. It should return the size of the file if + this is a data file. The size may be used to govern how much cache must + be reserved for this file in the cache. + + If the function is absent, a file size of 0 is assumed. + + (6) A function to retrieve auxilliary data from the netfs [optional]. + + This function will be called with the netfs data that was passed to the + cookie acquisition function and the maximum length of auxilliary data that + it may provide. It should write the auxilliary data into the given buffer + and return the quantity it wrote. + + If this function is absent, the auxilliary data length will be set to 0. + + The length of the auxilliary data buffer may be dependent on the key + length. A netfs mustn't rely on being able to provide more than 400 bytes + for both. + + (7) A function to check the auxilliary data [optional]. + + This function will be called to check that a match found in the cache for + this object is valid. For instance with AFS it could check the auxilliary + data against the data version number returned by the server to determine + whether the index entry in a cache is still valid. + + If this function is absent, it will be assumed that matching objects in a + cache are always valid. + + If present, the function should return one of the following values: + + (*) FSCACHE_CHECKAUX_OKAY - the entry is okay as is + (*) FSCACHE_CHECKAUX_NEEDS_UPDATE - the entry requires update + (*) FSCACHE_CHECKAUX_OBSOLETE - the entry should be deleted + + This function can also be used to extract data from the auxilliary data in + the cache and copy it into the netfs's structures. + + (8) A pair of functions to manage contexts for the completion callback + [optional]. + + The cache read/write functions are passed a context which is then passed + to the I/O completion callback function. To ensure this context remains + valid until after the I/O completion is called, two functions may be + provided: one to get an extra reference on the context, and one to drop a + reference to it. + + If the context is not used or is a type of object that won't go out of + scope, then these functions are not required. These functions are not + required for indices as indices may not contain data. These functions may + be called in interrupt context and so may not sleep. + + (9) A function to mark a page as retaining cache metadata [mandatory]. + + This is called by the cache to indicate that it is retaining in-memory + information for this page and that the netfs should uncache the page when + it has finished. This does not indicate whether there's data on the disk + or not. Note that several pages at once may be presented for marking. + + kAFS and NFS use the PG_private bit on the page structure for this, but + that may not be appropriate in all cases. + + This function is not required for indices as they're not permitted data. + +(10) A function to unmark all the pages retaining cache metadata [mandatory]. + + This is called by FS-Cache to indicate that a backing store is being + unbound from a cookie and that all the marks on the pages should be + cleared to prevent confusion. Note that the cache will have torn down all + its tracking information so that the pages don't need to be explicitly + uncached. + + This function is not required for indices as they're not permitted data. + + +=================================== +NETWORK FILESYSTEM (UN)REGISTRATION +=================================== + +The first step is to declare the network filesystem to the cache. This also +involves specifying the layout of the primary index (for AFS, this would be the +"cell" level). + +The registration function is: + + int fscache_register_netfs(struct fscache_netfs *netfs); + +It just takes a pointer to the netfs definition. It returns 0 or an error as +appropriate. + +For kAFS, registration is done as follows: + + ret = fscache_register_netfs(&afs_cache_netfs); + +The last step is, of course, unregistration: + + void fscache_unregister_netfs(struct fscache_netfs *netfs); + + +================ +CACHE TAG LOOKUP +================ + +FS-Cache permits the use of more than one cache. To permit particular index +subtrees to be bound to particular caches, the second step is to look up cache +representation tags. This step is optional; it can be left entirely up to +FS-Cache as to which cache should be used. The problem with doing that is that +FS-Cache will always pick the first cache that was registered. + +To get the representation for a named tag: + + struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name); + +This takes a text string as the name and returns a representation of a tag. It +will never return an error. It may return a dummy tag, however, if it runs out +of memory; this will inhibit caching with this tag. + +Any representation so obtained must be released by passing it to this function: + + void fscache_release_cache_tag(struct fscache_cache_tag *tag); + +The tag will be retrieved by FS-Cache when it calls the object definition +operation select_cache(). + + +================== +INDEX REGISTRATION +================== + +The third step is to inform FS-Cache about part of an index hierarchy that can +be used to locate files. This is done by requesting a cookie for each index in +the path to the file: + + struct fscache_cookie * + fscache_acquire_cookie(struct fscache_cookie *parent, + struct fscache_object_def *def, + void *netfs_data); + +This function creates an index entry in the index represented by parent, +filling in the index entry by calling the operations pointed to by def. + +Note that this function never returns an error - all errors are handled +internally. It may also return NULL to indicate no cookie. It is quite +acceptable to pass this token back to this function as the parent to another +acquisition (or even to the relinquish cookie, read page and write page +functions - see below). + +Note also that no indices are actually created in a cache until a non-index +object needs to be created somewhere down the hierarchy. Furthermore, an index +may be created in several different caches independently at different times. +This is all handled transparently, and the netfs doesn't see any of it. + +For example, with AFS, a cell would be added to the primary index. This index +entry would have a dependent inode containing a volume location index for the +volume mappings within this cell: + + cell->cache = + fscache_acquire_cookie(afs_cache_netfs.primary_index, + &afs_cell_cache_index_def, + cell); + +Then when a volume location was accessed, it would be entered into the cell's +index and an inode would be allocated that acts as a volume type and hash chain +combination: + + vlocation->cache = + fscache_acquire_cookie(cell->cache, + &afs_vlocation_cache_index_def, + vlocation); + +And then a particular flavour of volume (R/O for example) could be added to +that index, creating another index for vnodes (AFS inode equivalents): + + volume->cache = + fscache_acquire_cookie(vlocation->cache, + &afs_volume_cache_index_def, + volume); + + +====================== +DATA FILE REGISTRATION +====================== + +The fourth step is to request a data file be created in the cache. This is +identical to index cookie acquisition. The only difference is that the type in +the object definition should be something other than index type. + + vnode->cache = + fscache_acquire_cookie(volume->cache, + &afs_vnode_cache_object_def, + vnode); + + +================================= +MISCELLANEOUS OBJECT REGISTRATION +================================= + +An optional step is to request an object of miscellaneous type be created in +the cache. This is almost identical to index cookie acquisition. The only +difference is that the type in the object definition should be something other +than index type. Whilst the parent object could be an index, it's more likely +it would be some other type of object such as a data file. + + xattr->cache = + fscache_acquire_cookie(vnode->cache, + &afs_xattr_cache_object_def, + xattr); + +Miscellaneous objects might be used to store extended attributes or directory +entries for example. + + +========================== +SETTING THE DATA FILE SIZE +========================== + +The fifth step is to set the size of the file. This doesn't automatically +reserve any space in the cache, but permits the cache to adjust its metadata +for data tracking appropriately: + + int fscache_set_i_size(struct fscache_cookie *cookie, loff_t i_size); + +The cache will return -ENOBUFS if there is no backing cache or if there is no +space to allocate any extra metadata required in the cache. + +Note that attempts to read or write data pages in the cache over this size may +be rebuffed with -ENOBUFS. + + +===================== +PAGE READ/ALLOC/WRITE +===================== + +And the sixth step is to store and retrieve pages in the cache. There are +three functions that are used to do this. + +Note: + + (1) A page should not be re-read or re-allocated without uncaching it first. + + (2) A read or allocated page must be uncached when the netfs page is released + from the pagecache. + + (3) A page should only be written to the cache if previous read or allocated. + +This permits the cache to maintain its page tracking in proper order. + + +PAGE READ +--------- + +Firstly, the netfs should ask FS-Cache to examine the caches and read the +contents cached for a particular page of a particular file if present, or else +allocate space to store the contents if not: + + typedef + void (*fscache_rw_complete_t)(struct page *page, + void *context, + int error); + + int fscache_read_or_alloc_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *end_io_data, + gfp_t gfp); + +The cookie argument must specify a cookie for an object that isn't an index, +the page specified will have the data loaded into it (and is also used to +specify the page number), and the gfp argument is used to control how any +memory allocations made are satisfied. + +If the cookie indicates the inode is not cached: + + (1) The function will return -ENOBUFS. + +Else if there's a copy of the page resident in the cache: + + (1) The mark_pages_cached() cookie operation will be called on that page. + + (2) The function will submit a request to read the data from the cache's + backing device directly into the page specified. + + (3) The function will return 0. + + (4) When the read is complete, end_io_func() will be invoked with: + + (*) The netfs data supplied when the cookie was created. + + (*) The page descriptor. + + (*) The context argument passed to the above function. This will be + maintained with the get_context/put_context functions mentioned above. + + (*) An argument that's 0 on success or negative for an error code. + + If an error occurs, it should be assumed that the page contains no usable + data. + + end_io_func() will be called in process context if the read is results in + an error, but it might be called in interrupt context if the read is + successful. + +Otherwise, if there's not a copy available in cache, but the cache may be able +to store the page: + + (1) The mark_pages_cached() cookie operation will be called on that page. + + (2) A block may be reserved in the cache and attached to the object at the + appropriate place. + + (3) The function will return -ENODATA. + +This function may also return -ENOMEM or -EINTR, in which case it won't have +read any data from the cache. + + +PAGE ALLOCATE +------------- + +Alternatively, if there's not expected to be any data in the cache for a page +because the file has been extended, a block can simply be allocated instead: + + int fscache_alloc_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp); + +This is similar to the fscache_read_or_alloc_page() function, except that it +never reads from the cache. It will return 0 if a block has been allocated, +rather than -ENODATA as the other would. One or the other must be performed +before writing to the cache. + +The mark_pages_cached() cookie operation will be called on the page if +successful. + + +PAGE WRITE +---------- + +Secondly, if the netfs changes the contents of the page (either due to an +initial download or if a user performs a write), then the page should be +written back to the cache: + + int fscache_write_page(struct fscache_cookie *cookie, + struct page *page, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp); + +The cookie argument must specify a data file cookie, the page specified should +contain the data to be written (and is also used to specify the page number), +and the gfp argument is used to control how any memory allocations made are +satisfied. + +The page must have first been read or allocated successfully and must not have +been uncached before writing is performed. + +If the cookie indicates the inode is not cached then: + + (1) The function will return -ENOBUFS. + +Else if space can be allocated in the cache to hold this page: + + (1) The function will submit a request to write the data to cache's backing + device directly from the page specified. + + (2) The function will return 0. + + (3) When the write is complete the end_io_func() will be invoked with: + + (*) The netfs data supplied when the cookie was created. + + (*) The page descriptor. + + (*) The context argument passed to the function. This will be maintained + with the get_context/put_context functions mentioned above. + + (*) An argument that's 0 on success or negative for an error. + + If an error occurs, it can be assumed that the page has not been written + to the cache, and that either there's a block containing the old data or + no block at all in the cache. + + end_io_func() might be called in interrupt context. + +Else if there's no space available in the cache, -ENOBUFS will be returned. + + +MULTIPLE PAGE READ +------------------ + +A facility is provided to read several pages at once, as requested by the +readpages() address space operation: + + int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, + struct address_space *mapping, + struct list_head *pages, + int *nr_pages, + fscache_rw_complete_t end_io_func, + void *context, + gfp_t gfp); + +This works in a similar way to fscache_read_or_alloc_page(), except: + + (1) Any page it can retrieve data for is removed from pages and nr_pages and + dispatched for reading to the disk. Reads of adjacent pages on disk may + be merged for greater efficiency. + + (2) The mark_pages_cached() cookie operation will be called on several pages + at once if they're being read or allocated. + + (3) If there was an general error, then that error will be returned. + + Else if some pages couldn't be allocated or read, then -ENOBUFS will be + returned. + + Else if some pages couldn't be read but were allocated, then -ENODATA will + be returned. + + Otherwise, if all pages had reads dispatched, then 0 will be returned, the + list will be empty and *nr_pages will be 0. + + (4) end_io_func will be called once for each page being read as the reads + complete. It will be called in process context if error != 0, but it may + be called in interrupt context if there is no error. + +Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude +some of the pages being read and some being allocated. Those pages will have +been marked appropriately and will need uncaching. + + +============== +PAGE UNCACHING +============== + +To uncache a page, this function should be called: + + void fscache_uncache_page(struct fscache_cookie *cookie, + struct page *page); + +This function permits the cache to release any in-memory representation it +might be holding for this netfs page. This function must be called once for +each page on which the read or write page functions above have been called to +make sure the cache's in-memory tracking information gets torn down. + +Note that pages can't be explicitly deleted from the a data file. The whole +data file must be retired (see the relinquish cookie function below). + +Furthermore, note that this does not cancel the asynchronous read or write +operation started by the read/alloc and write functions. + +There is another unbinding operation similar to the above that takes a set of +pages to unbind in one go: + + void fscache_uncache_pagevec(struct fscache_cookie *cookie, + struct pagevec *pagevec); + + +========================== +INDEX AND DATA FILE UPDATE +========================== + +To request an update of the index data for an index or other object, the +following function should be called: + + void fscache_update_cookie(struct fscache_cookie *cookie); + +This function will refer back to the netfs_data pointer stored in the cookie by +the acquisition function to obtain the data to write into each revised index +entry. The update method in the parent index definition will be called to +transfer the data. + +Note that partial updates may happen automatically at other times, such as when +data blocks are added to a data file object. + + +=============================== +MISCELLANEOUS COOKIE OPERATIONS +=============================== + +There are a number of operations that can be used to control cookies: + + (*) Cookie pinning: + + int fscache_pin_cookie(struct fscache_cookie *cookie); + void fscache_unpin_cookie(struct fscache_cookie *cookie); + + These operations permit data cookies to be pinned into the cache and to + have the pinning removed. They are not permitted on index cookies. + + The pinning function will return 0 if successful, -ENOBUFS in the cookie + isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning, + -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or + -EIO if there's any other problem. + + (*) Data space reservation: + + int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size); + + This permits a netfs to request cache space be reserved to store up to the + given amount of a file. It is permitted to ask for more than the current + size of the file to allow for future file expansion. + + If size is given as zero then the reservation will be cancelled. + + The function will return 0 if successful, -ENOBUFS in the cookie isn't + backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations, + -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or + -EIO if there's any other problem. + + Note that this doesn't pin an object in a cache; it can still be culled to + make space if it's not in use. + + +===================== +COOKIE UNREGISTRATION +===================== + +To get rid of a cookie, this function should be called. + + void fscache_relinquish_cookie(struct fscache_cookie *cookie, + int retire); + +If retire is non-zero, then the object will be marked for recycling, and all +copies of it will be removed from all active caches in which it is present. +Not only that but all child objects will also be retired. + +If retire is zero, then the object may be available again when next the +acquisition function is called. Retirement here will overrule the pinning on a +cookie. + +One very important note - relinquish must NOT be called for a cookie unless all +the cookies for "child" indices, objects and pages have been relinquished +first. + + +================================ +INDEX AND DATA FILE INVALIDATION +================================ + +There is no direct way to invalidate an index subtree or a data file. To do +this, the caller should relinquish and retire the cookie they have, and then +acquire a new one. diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 0b62c62..ead15f0 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -71,7 +71,7 @@ you might want to raise the limit. ============================================================== -file-max & file-nr: +file-max, file-nr & file-kernel: The kernel allocates file handles dynamically, but as yet it doesn't free them again. @@ -88,6 +88,10 @@ close to the maximum, but the number of significantly greater than 0, you've encountered a peak in your usage of file handles and you don't need to increase the maximum. +The value in file-kernel denotes the number of internal file handles +that the kernel has open. These do not contribute to ENFILE +accounting. + ============================================================== inode-max, inode-nr & inode-state: diff --git a/fs/Kconfig b/fs/Kconfig index 53f5c6d..12e77a1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -530,6 +530,41 @@ config FUSE_FS If you want to develop a userspace FS, or if you want to use a filesystem based on FUSE, answer Y or M. +menu "Caches" + +config FSCACHE + tristate "General filesystem cache manager" + depends on EXPERIMENTAL + help + This option enables a generic filesystem caching manager that can be + used by various network and other filesystems to cache data + locally. Different sorts of caches can be plugged in, depending on the + resources available. + + See Documentation/filesystems/caching/fscache.txt for more information. + +config CACHEFILES + tristate "Filesystem caching on files" + depends on FSCACHE + help + This permits use of a mounted filesystem as a cache for other + filesystems - primarily networking filesystems - thus allowing fast + local disk to enhance the speed of slower devices. + + See Documentation/filesystems/caching/cachefiles.txt for more + information. + +config CACHEFILES_DEBUG + bool "Debug CacheFiles" + depends on CACHEFILES + help + This permits debugging to be dynamically enabled in the filesystem + caching on files module. If this is set, the debugging output may be + enabled by setting bits in /proc/sys/fs/cachefiles/debug or by + including a debugging specifier in /etc/cachefilesd.conf. + +endmenu + menu "CD-ROM/DVD Filesystems" config ISO9660_FS @@ -1470,6 +1505,13 @@ config NFS_V4 If unsure, say N. +config NFS_FSCACHE + bool "Provide NFS client caching support (EXPERIMENTAL)" + depends on NFS_FS && FSCACHE && EXPERIMENTAL + help + Say Y here if you want NFS data to be cached locally on disc through + the general filesystem cache manager + config NFS_DIRECTIO bool "Allow direct I/O on NFS files (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL @@ -1906,6 +1948,13 @@ # for fs/nls/Config.in If unsure, say N. +config AFS_FSCACHE + bool "Provide AFS client caching support" + depends on AFS_FS && FSCACHE && EXPERIMENTAL + help + Say Y here if you want AFS data to be cached locally on through the + generic filesystem cache manager + config RXRPC tristate diff --git a/fs/Makefile b/fs/Makefile index 8913542..e5efb4b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -52,6 +52,7 @@ obj-y += devpts/ obj-$(CONFIG_PROFILING) += dcookies.o # Do not add any filesystems before this line +obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 obj-$(CONFIG_JBD) += jbd/ @@ -100,5 +101,6 @@ obj-$(CONFIG_AFS_FS) += afs/ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ +obj-$(CONFIG_CACHEFILES) += cachefiles/ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ diff --git a/fs/afs/cache.h b/fs/afs/cache.h deleted file mode 100644 index 9eb7722..0000000 --- a/fs/afs/cache.h +++ /dev/null @@ -1,27 +0,0 @@ -/* cache.h: AFS local cache management interface - * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_AFS_CACHE_H -#define _LINUX_AFS_CACHE_H - -#undef AFS_CACHING_SUPPORT - -#include -#ifdef AFS_CACHING_SUPPORT -#include -#endif -#include "types.h" - -#ifdef __KERNEL__ - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_AFS_CACHE_H */ diff --git a/fs/afs/cell.c b/fs/afs/cell.c index bfc1fd2..3aaeada 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -31,17 +31,21 @@ static DEFINE_RWLOCK(afs_cells_lock); static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ static struct afs_cell *afs_cell_root; -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_cell_cache_match(void *target, - const void *entry); -static void afs_cell_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_cache_cell_index_def = { - .name = "cell_ix", - .data_size = sizeof(struct afs_cache_cell), - .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, - .match = afs_cell_cache_match, - .update = afs_cell_cache_update, +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static fscache_checkaux_t afs_cell_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen); + +static struct fscache_cookie_def afs_cell_cache_index_def = { + .name = "AFS cell", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_cell_cache_get_key, + .get_aux = afs_cell_cache_get_aux, + .check_aux = afs_cell_cache_check_aux, }; #endif @@ -115,12 +119,11 @@ int afs_cell_create(const char *name, ch if (ret < 0) goto error; -#ifdef AFS_CACHING_SUPPORT - /* put it up for caching */ - cachefs_acquire_cookie(afs_cache_netfs.primary_index, - &afs_vlocation_cache_index_def, - cell, - &cell->cache); +#ifdef CONFIG_AFS_FSCACHE + /* put it up for caching (this never returns an error) */ + cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, + &afs_cell_cache_index_def, + cell); #endif /* add to the cell lists */ @@ -345,8 +348,8 @@ static void afs_cell_destroy(struct afs_ list_del_init(&cell->proc_link); up_write(&afs_proc_cells_sem); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(cell->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(cell->cache, 0); #endif up_write(&afs_cells_sem); @@ -525,44 +528,62 @@ void afs_cell_purge(void) /*****************************************************************************/ /* - * match a cell record obtained from the cache + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_cell_cache_match(void *target, - const void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_cell *ccell = entry; - struct afs_cell *cell = target; + const struct afs_cell *cell = cookie_netfs_data; + uint16_t klen; - _enter("{%s},{%s}", ccell->name, cell->name); + _enter("%p,%p,%u", cell, buffer, bufmax); - if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } + klen = strlen(cell->name); + if (klen > bufmax) + return 0; + + memcpy(buffer, cell->name, klen); + return klen; - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; -} /* end afs_cell_cache_match() */ +} /* end afs_cell_cache_get_key() */ #endif /*****************************************************************************/ /* - * update a cell record in the cache + * provide new auxilliary cache data */ -#ifdef AFS_CACHING_SUPPORT -static void afs_cell_cache_update(void *source, void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - struct afs_cache_cell *ccell = entry; - struct afs_cell *cell = source; + const struct afs_cell *cell = cookie_netfs_data; + uint16_t dlen; - _enter("%p,%p", source, entry); + _enter("%p,%p,%u", cell, buffer, bufmax); - strncpy(ccell->name, cell->name, sizeof(ccell->name)); + dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]); + dlen = min(dlen, bufmax); + dlen &= ~(sizeof(cell->vl_addrs[0]) - 1); - memcpy(ccell->vl_servers, - cell->vl_addrs, - min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs))); + memcpy(buffer, cell->vl_addrs, dlen); + + return dlen; + +} /* end afs_cell_cache_get_aux() */ +#endif + +/*****************************************************************************/ +/* + * check that the auxilliary data indicates that the entry is still valid + */ +#ifdef CONFIG_AFS_FSCACHE +static fscache_checkaux_t afs_cell_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; -} /* end afs_cell_cache_update() */ +} /* end afs_cell_cache_check_aux() */ #endif diff --git a/fs/afs/cell.h b/fs/afs/cell.h index 4834910..d670502 100644 --- a/fs/afs/cell.h +++ b/fs/afs/cell.h @@ -13,7 +13,7 @@ #ifndef _LINUX_AFS_CELL_H #define _LINUX_AFS_CELL_H #include "types.h" -#include "cache.h" +#include #define AFS_CELL_MAX_ADDRS 15 @@ -21,16 +21,6 @@ extern volatile int afs_cells_being_purg /*****************************************************************************/ /* - * entry in the cached cell catalogue - */ -struct afs_cache_cell -{ - char name[64]; /* cell name (padded with NULs) */ - struct in_addr vl_servers[15]; /* cached cell VL servers */ -}; - -/*****************************************************************************/ -/* * AFS cell record */ struct afs_cell @@ -39,8 +29,8 @@ struct afs_cell struct list_head link; /* main cell list link */ struct list_head proc_link; /* /proc cell list link */ struct proc_dir_entry *proc_dir; /* /proc dir for this cell */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif /* server record management */ diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 3d097fd..f87d5a7 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -24,7 +24,7 @@ #include "cmservice.h" #include "internal.h" static unsigned afscm_usage; /* AFS cache manager usage count */ -static struct rw_semaphore afscm_sem; /* AFS cache manager start/stop semaphore */ +static DECLARE_RWSEM(afscm_sem); /* AFS cache manager start/stop semaphore */ static int afscm_new_call(struct rxrpc_call *call); static void afscm_attention(struct rxrpc_call *call); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2fc9987..9800a07 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -145,7 +145,7 @@ #endif qty /= sizeof(union afs_dir_block); /* check them */ - dbuf = page_address(page); + dbuf = kmap_atomic(page, KM_USER0); for (tmp = 0; tmp < qty; tmp++) { if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", @@ -154,12 +154,12 @@ #endif goto error; } } + kunmap_atomic(dbuf, KM_USER0); - SetPageChecked(page); return; error: - SetPageChecked(page); + kunmap_atomic(dbuf, KM_USER0); SetPageError(page); } /* end afs_dir_check_page() */ @@ -170,7 +170,6 @@ #endif */ static inline void afs_dir_put_page(struct page *page) { - kunmap(page); page_cache_release(page); } /* end afs_dir_put_page() */ @@ -188,11 +187,9 @@ static struct page *afs_dir_get_page(str page = read_mapping_page(dir->i_mapping, index, NULL); if (!IS_ERR(page)) { wait_on_page_locked(page); - kmap(page); if (!PageUptodate(page)) goto fail; - if (!PageChecked(page)) - afs_dir_check_page(dir, page); + afs_dir_check_page(dir, page); if (PageError(page)) goto fail; } @@ -357,7 +354,7 @@ static int afs_dir_iterate(struct inode limit = blkoff & ~(PAGE_SIZE - 1); - dbuf = page_address(page); + dbuf = kmap_atomic(page, KM_USER0); /* deal with the individual blocks stashed on this page */ do { @@ -366,6 +363,7 @@ static int afs_dir_iterate(struct inode ret = afs_dir_iterate_block(fpos, dblock, blkoff, cookie, filldir); if (ret != 1) { + kunmap_atomic(dbuf, KM_USER0); afs_dir_put_page(page); goto out; } @@ -374,6 +372,7 @@ static int afs_dir_iterate(struct inode } while (*fpos < dir->i_size && blkoff < limit); + kunmap_atomic(dbuf, KM_USER0); afs_dir_put_page(page); ret = 0; } diff --git a/fs/afs/file.c b/fs/afs/file.c index 67d6634..e8e3680 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -16,12 +16,15 @@ #include #include #include #include +#include #include #include "volume.h" #include "vnode.h" #include #include "internal.h" +#define list_to_page(head) (list_entry((head)->prev, struct page, lru)) + #if 0 static int afs_file_open(struct inode *inode, struct file *file); static int afs_file_release(struct inode *inode, struct file *file); @@ -30,34 +33,74 @@ #endif static int afs_file_readpage(struct file *file, struct page *page); static void afs_file_invalidatepage(struct page *page, unsigned long offset); static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); +static int afs_file_mmap(struct file * file, struct vm_area_struct * vma); + +#ifdef CONFIG_AFS_FSCACHE +static int afs_file_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages); +static int afs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page); +#endif struct inode_operations afs_file_inode_operations = { .getattr = afs_inode_getattr, }; +const struct file_operations afs_file_file_operations = { + .llseek = generic_file_llseek, + .read = generic_file_read, + .mmap = afs_file_mmap, + .sendfile = generic_file_sendfile, +}; + const struct address_space_operations afs_fs_aops = { .readpage = afs_file_readpage, +#ifdef CONFIG_AFS_FSCACHE + .readpages = afs_file_readpages, +#endif .sync_page = block_sync_page, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = afs_file_releasepage, .invalidatepage = afs_file_invalidatepage, }; +static struct vm_operations_struct afs_fs_vm_operations = { + .nopage = filemap_nopage, + .populate = filemap_populate, +#ifdef CONFIG_AFS_FSCACHE + .page_mkwrite = afs_file_page_mkwrite, +#endif +}; + +/*****************************************************************************/ +/* + * set up a memory mapping on an AFS file + * - we set our own VMA ops so that we can catch the page becoming writable for + * userspace for shared-writable mmap + */ +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + _enter(""); + + file_accessed(file); + vma->vm_ops = &afs_fs_vm_operations; + return 0; + +} /* end afs_file_mmap() */ + /*****************************************************************************/ /* * deal with notification that a page was read from the cache */ -#ifdef AFS_CACHING_SUPPORT -static void afs_file_readpage_read_complete(void *cookie_data, - struct page *page, +#ifdef CONFIG_AFS_FSCACHE +static void afs_file_readpage_read_complete(struct page *page, void *data, int error) { - _enter("%p,%p,%p,%d", cookie_data, page, data, error); + _enter("%p,%p,%d", page, data, error); - if (error) - SetPageError(page); - else + /* if the read completes with an error, we just unlock the page and let + * the VM reissue the readpage */ + if (!error) SetPageUptodate(page); unlock_page(page); @@ -68,15 +111,16 @@ #endif /* * deal with notification that a page was written to the cache */ -#ifdef AFS_CACHING_SUPPORT -static void afs_file_readpage_write_complete(void *cookie_data, - struct page *page, +#ifdef CONFIG_AFS_FSCACHE +static void afs_file_readpage_write_complete(struct page *page, void *data, int error) { - _enter("%p,%p,%p,%d", cookie_data, page, data, error); + _enter("%p,%p,%d", page, data, error); - unlock_page(page); + /* note that the page has been written to the cache and can now be + * modified */ + end_page_fs_misc(page); } /* end afs_file_readpage_write_complete() */ #endif @@ -88,16 +132,13 @@ #endif static int afs_file_readpage(struct file *file, struct page *page) { struct afs_rxfs_fetch_descriptor desc; -#ifdef AFS_CACHING_SUPPORT - struct cachefs_page *pageio; -#endif struct afs_vnode *vnode; struct inode *inode; int ret; inode = page->mapping->host; - _enter("{%lu},{%lu}", inode->i_ino, page->index); + _enter("{%lu},%p{%lu}", inode->i_ino, page, page->index); vnode = AFS_FS_I(inode); @@ -107,13 +148,9 @@ #endif if (vnode->flags & AFS_VNODE_DELETED) goto error; -#ifdef AFS_CACHING_SUPPORT - ret = cachefs_page_get_private(page, &pageio, GFP_NOIO); - if (ret < 0) - goto error; - +#ifdef CONFIG_AFS_FSCACHE /* is it cached? */ - ret = cachefs_read_or_alloc_page(vnode->cache, + ret = fscache_read_or_alloc_page(vnode->cache, page, afs_file_readpage_read_complete, NULL, @@ -123,18 +160,20 @@ #else #endif switch (ret) { - /* read BIO submitted and wb-journal entry found */ - case 1: - BUG(); // TODO - handle wb-journal match - /* read BIO submitted (page in cache) */ case 0: break; - /* no page available in cache */ - case -ENOBUFS: + /* page not yet cached */ case -ENODATA: + _debug("cache said ENODATA"); + goto go_on; + + /* page will not be cached */ + case -ENOBUFS: + _debug("cache said ENOBUFS"); default: + go_on: desc.fid = vnode->fid; desc.offset = page->index << PAGE_CACHE_SHIFT; desc.size = min((size_t) (inode->i_size - desc.offset), @@ -148,34 +187,40 @@ #endif ret = afs_vnode_fetch_data(vnode, &desc); kunmap(page); if (ret < 0) { - if (ret==-ENOENT) { - _debug("got NOENT from server" + if (ret == -ENOENT) { + kdebug("got NOENT from server" " - marking file deleted and stale"); vnode->flags |= AFS_VNODE_DELETED; ret = -ESTALE; } -#ifdef AFS_CACHING_SUPPORT - cachefs_uncache_page(vnode->cache, page); +#ifdef CONFIG_AFS_FSCACHE + fscache_uncache_page(vnode->cache, page); + ClearPagePrivate(page); #endif goto error; } SetPageUptodate(page); -#ifdef AFS_CACHING_SUPPORT - if (cachefs_write_page(vnode->cache, - page, - afs_file_readpage_write_complete, - NULL, - GFP_KERNEL) != 0 - ) { - cachefs_uncache_page(vnode->cache, page); - unlock_page(page); + /* send the page to the cache */ +#ifdef CONFIG_AFS_FSCACHE + if (PagePrivate(page)) { + if (TestSetPageFsMisc(page)) + BUG(); + if (fscache_write_page(vnode->cache, + page, + afs_file_readpage_write_complete, + NULL, + GFP_KERNEL) != 0 + ) { + fscache_uncache_page(vnode->cache, page); + ClearPagePrivate(page); + end_page_fs_misc(page); + } } -#else - unlock_page(page); #endif + unlock_page(page); } _leave(" = 0"); @@ -192,20 +237,63 @@ #endif /*****************************************************************************/ /* - * get a page cookie for the specified page + * read a set of pages */ -#ifdef AFS_CACHING_SUPPORT -int afs_cache_get_page_cookie(struct page *page, - struct cachefs_page **_page_cookie) +#ifdef CONFIG_AFS_FSCACHE +static int afs_file_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) { - int ret; + struct afs_vnode *vnode; +#if 0 + struct pagevec lru_pvec; + unsigned page_idx; +#endif + int ret = 0; - _enter(""); - ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO); + _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages); - _leave(" = %d", ret); + vnode = AFS_FS_I(mapping->host); + if (vnode->flags & AFS_VNODE_DELETED) { + _leave(" = -ESTALE"); + return -ESTALE; + } + + /* attempt to read as many of the pages as possible */ + ret = fscache_read_or_alloc_pages(vnode->cache, + mapping, + pages, + &nr_pages, + afs_file_readpage_read_complete, + NULL, + mapping_gfp_mask(mapping)); + + switch (ret) { + /* all pages are being read from the cache */ + case 0: + BUG_ON(!list_empty(pages)); + BUG_ON(nr_pages != 0); + _leave(" = 0 [reading all]"); + return 0; + + /* there were pages that couldn't be read from the cache */ + case -ENODATA: + case -ENOBUFS: + break; + + /* other error */ + default: + _leave(" = %d", ret); + return ret; + } + + /* load the missing pages from the network */ + ret = read_cache_pages(mapping, pages, + (void *) afs_file_readpage, NULL); + + _leave(" = %d [netting]", ret); return ret; -} /* end afs_cache_get_page_cookie() */ + +} /* end afs_file_readpages() */ #endif /*****************************************************************************/ @@ -214,35 +302,22 @@ #endif */ static void afs_file_invalidatepage(struct page *page, unsigned long offset) { - int ret = 1; - _enter("{%lu},%lu", page->index, offset); BUG_ON(!PageLocked(page)); if (PagePrivate(page)) { -#ifdef AFS_CACHING_SUPPORT - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - cachefs_uncache_page(vnode->cache,page); -#endif - /* We release buffers only if the entire page is being * invalidated. * The get_block cached value has been unconditionally * invalidated, so real IO is not possible anymore. */ - if (offset == 0) { - BUG_ON(!PageLocked(page)); - - ret = 0; - if (!PageWriteback(page)) - ret = page->mapping->a_ops->releasepage(page, - 0); - /* possibly should BUG_ON(!ret); - neilb */ - } + if (offset == 0 && !PageWriteback(page)) + page->mapping->a_ops->releasepage(page, 0); } - _leave(" = %d", ret); + _leave(""); + } /* end afs_file_invalidatepage() */ /*****************************************************************************/ @@ -251,23 +326,30 @@ #endif */ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags) { - struct cachefs_page *pageio; - _enter("{%lu},%x", page->index, gfp_flags); - if (PagePrivate(page)) { -#ifdef AFS_CACHING_SUPPORT - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - cachefs_uncache_page(vnode->cache, page); +#ifdef CONFIG_AFS_FSCACHE + wait_on_page_fs_misc(page); + fscache_uncache_page(AFS_FS_I(page->mapping->host)->cache, page); + ClearPagePrivate(page); #endif - pageio = (struct cachefs_page *) page_private(page); - set_page_private(page, 0); - ClearPagePrivate(page); + /* indicate that the page can be released */ + _leave(" = 1"); + return 1; - kfree(pageio); - } +} /* end afs_file_releasepage() */ - _leave(" = 0"); +/*****************************************************************************/ +/* + * wait for the disc cache to finish writing before permitting modification of + * our page in the page cache + */ +#ifdef CONFIG_AFS_FSCACHE +static int afs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + wait_on_page_fs_misc(page); return 0; -} /* end afs_file_releasepage() */ + +} /* end afs_file_page_mkwrite() */ +#endif diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 61bc371..c88c41a 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -398,6 +398,8 @@ int afs_rxfs_fetch_file_status(struct af bp++; /* spare6 */ } + _debug("Data Version %llx\n", vnode->status.version); + /* success */ ret = 0; @@ -408,7 +410,7 @@ int afs_rxfs_fetch_file_status(struct af out_put_conn: afs_server_release_callslot(server, &callslot); out: - _leave(""); + _leave(" = %d", ret); return ret; abort: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4ebb30a..0a59eda 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -49,7 +49,7 @@ static int afs_inode_map_status(struct a case AFS_FTYPE_FILE: inode->i_mode = S_IFREG | vnode->status.mode; inode->i_op = &afs_file_inode_operations; - inode->i_fop = &generic_ro_fops; + inode->i_fop = &afs_file_file_operations; break; case AFS_FTYPE_DIR: inode->i_mode = S_IFDIR | vnode->status.mode; @@ -65,6 +65,11 @@ static int afs_inode_map_status(struct a return -EBADMSG; } +#ifdef CONFIG_AFS_FSCACHE + if (vnode->status.size != inode->i_size) + fscache_set_i_size(vnode->cache, vnode->status.size); +#endif + inode->i_nlink = vnode->status.nlink; inode->i_uid = vnode->status.owner; inode->i_gid = 0; @@ -101,13 +106,33 @@ static int afs_inode_fetch_status(struct struct afs_vnode *vnode; int ret; + _enter(""); + vnode = AFS_FS_I(inode); ret = afs_vnode_fetch_status(vnode); - if (ret == 0) + if (ret == 0) { +#ifdef CONFIG_AFS_FSCACHE + if (!vnode->cache) { + vnode->cache = + fscache_acquire_cookie(vnode->volume->cache, + &afs_vnode_cache_index_def, + vnode); + if (!vnode->cache) + printk("Negative\n"); + } +#endif ret = afs_inode_map_status(vnode); +#ifdef CONFIG_AFS_FSCACHE + if (ret < 0) { + fscache_relinquish_cookie(vnode->cache, 0); + vnode->cache = NULL; + } +#endif + } + _leave(" = %d", ret); return ret; } /* end afs_inode_fetch_status() */ @@ -122,6 +147,7 @@ static int afs_iget5_test(struct inode * return inode->i_ino == data->fid.vnode && inode->i_version == data->fid.unique; + } /* end afs_iget5_test() */ /*****************************************************************************/ @@ -179,20 +205,11 @@ inline int afs_iget(struct super_block * return ret; } -#ifdef AFS_CACHING_SUPPORT - /* set up caching before reading the status, as fetch-status reads the - * first page of symlinks to see if they're really mntpts */ - cachefs_acquire_cookie(vnode->volume->cache, - NULL, - vnode, - &vnode->cache); -#endif - /* okay... it's a new inode */ inode->i_flags |= S_NOATIME; vnode->flags |= AFS_VNODE_CHANGED; ret = afs_inode_fetch_status(inode); - if (ret<0) + if (ret < 0) goto bad_inode; /* success */ @@ -278,8 +295,8 @@ void afs_clear_inode(struct inode *inode afs_vnode_give_up_callback(vnode); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vnode->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vnode->cache, 0); vnode->cache = NULL; #endif diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e88b3b6..482dbd1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -16,15 +16,17 @@ #include #include #include #include +#include /* * debug tracing */ -#define kenter(FMT, a...) printk("==> %s("FMT")\n",__FUNCTION__ , ## a) -#define kleave(FMT, a...) printk("<== %s()"FMT"\n",__FUNCTION__ , ## a) -#define kdebug(FMT, a...) printk(FMT"\n" , ## a) -#define kproto(FMT, a...) printk("### "FMT"\n" , ## a) -#define knet(FMT, a...) printk(FMT"\n" , ## a) +#define __kdbg(FMT, a...) printk("[%05d] "FMT"\n", current->pid , ## a) +#define kenter(FMT, a...) __kdbg("==> %s("FMT")", __FUNCTION__ , ## a) +#define kleave(FMT, a...) __kdbg("<== %s()"FMT, __FUNCTION__ , ## a) +#define kdebug(FMT, a...) __kdbg(FMT , ## a) +#define kproto(FMT, a...) __kdbg("### "FMT , ## a) +#define knet(FMT, a...) __kdbg(FMT , ## a) #ifdef __KDEBUG #define _enter(FMT, a...) kenter(FMT , ## a) @@ -56,9 +58,6 @@ static inline void afs_discard_my_signal */ extern struct rw_semaphore afs_proc_cells_sem; extern struct list_head afs_proc_cells; -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_cache_cell_index_def; -#endif /* * dir.c @@ -71,11 +70,7 @@ extern const struct file_operations afs_ */ extern const struct address_space_operations afs_fs_aops; extern struct inode_operations afs_file_inode_operations; - -#ifdef AFS_CACHING_SUPPORT -extern int afs_cache_get_page_cookie(struct page *page, - struct cachefs_page **_page_cookie); -#endif +extern const struct file_operations afs_file_file_operations; /* * inode.c @@ -97,8 +92,8 @@ #endif /* * main.c */ -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_netfs afs_cache_netfs; +#ifdef CONFIG_AFS_FSCACHE +extern struct fscache_netfs afs_cache_netfs; #endif /* diff --git a/fs/afs/main.c b/fs/afs/main.c index 913c689..5840bb2 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -1,6 +1,6 @@ /* main.c: AFS client file system * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -14,11 +14,11 @@ #include #include #include #include +#include #include #include #include #include -#include "cache.h" #include "cell.h" #include "server.h" #include "fsclient.h" @@ -51,12 +51,11 @@ static struct rxrpc_peer_ops afs_peer_op struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT]; DEFINE_SPINLOCK(afs_cb_hash_lock); -#ifdef AFS_CACHING_SUPPORT -static struct cachefs_netfs_operations afs_cache_ops = { - .get_page_cookie = afs_cache_get_page_cookie, +#ifdef CONFIG_AFS_FSCACHE +static struct fscache_netfs_operations afs_cache_ops = { }; -struct cachefs_netfs afs_cache_netfs = { +struct fscache_netfs afs_cache_netfs = { .name = "afs", .version = 0, .ops = &afs_cache_ops, @@ -83,10 +82,9 @@ static int __init afs_init(void) if (ret < 0) return ret; -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE /* we want to be able to cache */ - ret = cachefs_register_netfs(&afs_cache_netfs, - &afs_cache_cell_index_def); + ret = fscache_register_netfs(&afs_cache_netfs); if (ret < 0) goto error; #endif @@ -137,8 +135,8 @@ #ifdef CONFIG_KEYS_TURNED_OFF afs_key_unregister(); error_cache: #endif -#ifdef AFS_CACHING_SUPPORT - cachefs_unregister_netfs(&afs_cache_netfs); +#ifdef CONFIG_AFS_FSCACHE + fscache_unregister_netfs(&afs_cache_netfs); error: #endif afs_cell_purge(); @@ -167,8 +165,8 @@ static void __exit afs_exit(void) #ifdef CONFIG_KEYS_TURNED_OFF afs_key_unregister(); #endif -#ifdef AFS_CACHING_SUPPORT - cachefs_unregister_netfs(&afs_cache_netfs); +#ifdef CONFIG_AFS_FSCACHE + fscache_unregister_netfs(&afs_cache_netfs); #endif afs_proc_cleanup(); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 99785a7..2a53d51 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -78,7 +78,7 @@ int afs_mntpt_check_symlink(struct afs_v ret = -EIO; wait_on_page_locked(page); - buf = kmap(page); + buf = kmap_atomic(page, KM_USER0); if (!PageUptodate(page)) goto out_free; if (PageError(page)) @@ -101,7 +101,7 @@ int afs_mntpt_check_symlink(struct afs_v ret = 0; out_free: - kunmap(page); + kunmap_atomic(buf, KM_USER0); page_cache_release(page); out: _leave(" = %d", ret); @@ -188,9 +188,9 @@ static struct vfsmount *afs_mntpt_do_aut if (!PageUptodate(page) || PageError(page)) goto error; - buf = kmap(page); + buf = kmap_atomic(page, KM_USER0); memcpy(devname, buf, size); - kunmap(page); + kunmap_atomic(buf, KM_USER0); page_cache_release(page); page = NULL; @@ -269,12 +269,12 @@ static void *afs_mntpt_follow_link(struc */ static void afs_mntpt_expiry_timed_out(struct afs_timer *timer) { - kenter(""); +// kenter(""); mark_mounts_for_expiry(&afs_vfsmounts); afs_kafstimod_add_timer(&afs_mntpt_expiry_timer, afs_mntpt_expiry_timeout * HZ); - kleave(""); +// kleave(""); } /* end afs_mntpt_expiry_timed_out() */ diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 101d21b..db58488 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -177,6 +177,7 @@ int afs_proc_init(void) */ void afs_proc_cleanup(void) { + remove_proc_entry("rootcell", proc_afs); remove_proc_entry("cells", proc_afs); remove_proc_entry("fs/afs", NULL); diff --git a/fs/afs/server.c b/fs/afs/server.c index 22afaae..e94628c 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -375,7 +375,6 @@ int afs_server_request_callslot(struct a else if (list_empty(&server->fs_callq)) { /* no one waiting */ server->fs_conn_cnt[nconn]++; - spin_unlock(&server->fs_lock); } else { /* someone's waiting - dequeue them and wake them up */ @@ -393,9 +392,9 @@ int afs_server_request_callslot(struct a } pcallslot->ready = 1; wake_up_process(pcallslot->task); - spin_unlock(&server->fs_lock); } + spin_unlock(&server->fs_lock); rxrpc_put_connection(callslot->conn); callslot->conn = NULL; diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 331f730..20148bc 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -59,17 +59,21 @@ static LIST_HEAD(afs_vlocation_update_pe static struct afs_vlocation *afs_vlocation_update; /* VL currently being updated */ static DEFINE_SPINLOCK(afs_vlocation_update_lock); /* lock guarding update queue */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vlocation_cache_match(void *target, - const void *entry); -static void afs_vlocation_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_vlocation_cache_index_def = { - .name = "vldb", - .data_size = sizeof(struct afs_cache_vlocation), - .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, - .match = afs_vlocation_cache_match, - .update = afs_vlocation_cache_update, +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static fscache_checkaux_t afs_vlocation_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen); + +static struct fscache_cookie_def afs_vlocation_cache_index_def = { + .name = "AFS.vldb", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_vlocation_cache_get_key, + .get_aux = afs_vlocation_cache_get_aux, + .check_aux = afs_vlocation_cache_check_aux, }; #endif @@ -300,13 +304,12 @@ int afs_vlocation_lookup(struct afs_cell list_add_tail(&vlocation->link, &cell->vl_list); -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE /* we want to store it in the cache, plus it might already be * encached */ - cachefs_acquire_cookie(cell->cache, - &afs_volume_cache_index_def, - vlocation, - &vlocation->cache); + vlocation->cache = fscache_acquire_cookie(cell->cache, + &afs_vlocation_cache_index_def, + vlocation); if (vlocation->valid) goto found_in_cache; @@ -340,7 +343,7 @@ #endif active: active = 1; -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE found_in_cache: #endif /* try to look up a cached volume in the cell VL databases by ID */ @@ -422,9 +425,9 @@ #endif afs_kafstimod_add_timer(&vlocation->upd_timer, 10 * HZ); -#ifdef AFS_CACHING_SUPPORT +#ifdef CONFIG_AFS_FSCACHE /* update volume entry in local cache */ - cachefs_update_cookie(vlocation->cache); + fscache_update_cookie(vlocation->cache); #endif *_vlocation = vlocation; @@ -438,8 +441,8 @@ #endif } else { list_del(&vlocation->link); -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vlocation->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vlocation->cache, 0); #endif afs_put_cell(vlocation->cell); kfree(vlocation); @@ -536,8 +539,8 @@ void afs_vlocation_do_timeout(struct afs } /* we can now destroy it properly */ -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(vlocation->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(vlocation->cache, 0); #endif afs_put_cell(cell); @@ -888,65 +891,103 @@ static void afs_vlocation_update_discard /*****************************************************************************/ /* - * match a VLDB record stored in the cache - * - may also load target from entry + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vlocation_cache_match(void *target, - const void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_vlocation *vldb = entry; - struct afs_vlocation *vlocation = target; + const struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t klen; - _enter("{%s},{%s}", vlocation->vldb.name, vldb->name); + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); - if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0 - ) { - if (!vlocation->valid || - vlocation->vldb.rtime == vldb->rtime - ) { - vlocation->vldb = *vldb; - vlocation->valid = 1; - _leave(" = SUCCESS [c->m]"); - return CACHEFS_MATCH_SUCCESS; - } - /* need to update cache if cached info differs */ - else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) { - /* delete if VIDs for this name differ */ - if (memcmp(&vlocation->vldb.vid, - &vldb->vid, - sizeof(vldb->vid)) != 0) { - _leave(" = DELETE"); - return CACHEFS_MATCH_SUCCESS_DELETE; - } + klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name)); + if (klen > bufmax) + return 0; - _leave(" = UPDATE"); - return CACHEFS_MATCH_SUCCESS_UPDATE; - } - else { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } - } + memcpy(buffer, vlocation->vldb.name, klen); + + _leave(" = %u", klen); + return klen; - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; -} /* end afs_vlocation_cache_match() */ +} /* end afs_vlocation_cache_get_key() */ #endif /*****************************************************************************/ /* - * update a VLDB record stored in the cache + * provide new auxilliary cache data */ -#ifdef AFS_CACHING_SUPPORT -static void afs_vlocation_cache_update(void *source, void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - struct afs_cache_vlocation *vldb = entry; - struct afs_vlocation *vlocation = source; + const struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t dlen; - _enter(""); + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); + + dlen = sizeof(struct afs_cache_vlocation); + dlen -= offsetof(struct afs_cache_vlocation, nservers); + if (dlen > bufmax) + return 0; + + memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen); + + _leave(" = %u", dlen); + return dlen; + +} /* end afs_vlocation_cache_get_aux() */ +#endif + +/*****************************************************************************/ +/* + * check that the auxilliary data indicates that the entry is still valid + */ +#ifdef CONFIG_AFS_FSCACHE +static fscache_checkaux_t afs_vlocation_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + const struct afs_cache_vlocation *cvldb; + struct afs_vlocation *vlocation = cookie_netfs_data; + uint16_t dlen; + + _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen); + + /* check the size of the data is what we're expecting */ + dlen = sizeof(struct afs_cache_vlocation); + dlen -= offsetof(struct afs_cache_vlocation, nservers); + if (dlen != buflen) + return FSCACHE_CHECKAUX_OBSOLETE; + + cvldb = container_of(buffer, struct afs_cache_vlocation, nservers); + + /* if what's on disk is more valid than what's in memory, then use the + * VL record from the cache */ + if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) { + memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen); + vlocation->valid = 1; + _leave(" = SUCCESS [c->m]"); + return FSCACHE_CHECKAUX_OKAY; + } + + /* need to update the cache if the cached info differs */ + if (memcmp(&vlocation->vldb, buffer, dlen) != 0) { + /* delete if the volume IDs for this name differ */ + if (memcmp(&vlocation->vldb.vid, &cvldb->vid, + sizeof(cvldb->vid)) != 0 + ) { + _leave(" = OBSOLETE"); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + _leave(" = UPDATE"); + return FSCACHE_CHECKAUX_NEEDS_UPDATE; + } - *vldb = vlocation->vldb; + _leave(" = OKAY"); + return FSCACHE_CHECKAUX_OKAY; -} /* end afs_vlocation_cache_update() */ +} /* end afs_vlocation_cache_check_aux() */ #endif diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index cf62da5..b6cba1e 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -29,17 +29,30 @@ struct afs_timer_ops afs_vnode_cb_timed_ .timed_out = afs_vnode_cb_timed_out, }; -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vnode_cache_match(void *target, - const void *entry); -static void afs_vnode_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_vnode_cache_index_def = { - .name = "vnode", - .data_size = sizeof(struct afs_cache_vnode), - .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 }, - .match = afs_vnode_cache_match, - .update = afs_vnode_cache_update, +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, + uint64_t *size); +static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); +static fscache_checkaux_t afs_vnode_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen); +static void afs_vnode_cache_mark_pages_cached(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec); +static void afs_vnode_cache_now_uncached(void *cookie_netfs_data); + +struct fscache_cookie_def afs_vnode_cache_index_def = { + .name = "AFS.vnode", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .get_key = afs_vnode_cache_get_key, + .get_attr = afs_vnode_cache_get_attr, + .get_aux = afs_vnode_cache_get_aux, + .check_aux = afs_vnode_cache_check_aux, + .mark_pages_cached = afs_vnode_cache_mark_pages_cached, + .now_uncached = afs_vnode_cache_now_uncached, }; #endif @@ -188,6 +201,8 @@ int afs_vnode_fetch_status(struct afs_vn if (vnode->update_cnt > 0) { /* someone else started a fetch */ + _debug("conflict"); + set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&vnode->update_waitq, &myself); @@ -219,6 +234,7 @@ int afs_vnode_fetch_status(struct afs_vn spin_unlock(&vnode->lock); set_current_state(TASK_RUNNING); + _leave(" [conflicted, %d", !!(vnode->flags & AFS_VNODE_DELETED)); return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0; } @@ -341,54 +357,198 @@ int afs_vnode_give_up_callback(struct af /*****************************************************************************/ /* - * match a vnode record stored in the cache + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_vnode_cache_match(void *target, - const void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_vnode *cvnode = entry; - struct afs_vnode *vnode = target; + const struct afs_vnode *vnode = cookie_netfs_data; + uint16_t klen; - _enter("{%x,%x,%Lx},{%x,%x,%Lx}", - vnode->fid.vnode, - vnode->fid.unique, - vnode->status.version, - cvnode->vnode_id, - cvnode->vnode_unique, - cvnode->data_version); - - if (vnode->fid.vnode != cvnode->vnode_id) { - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; + _enter("{%x,%x,%Lx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version, + buffer, bufmax); + + klen = sizeof(vnode->fid.vnode); + if (klen > bufmax) + return 0; + + memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode)); + + _leave(" = %u", klen); + return klen; + +} /* end afs_vnode_cache_get_key() */ +#endif + +/*****************************************************************************/ +/* + * provide an updated file attributes + */ +#ifdef CONFIG_AFS_FSCACHE +static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, + uint64_t *size) +{ + const struct afs_vnode *vnode = cookie_netfs_data; + + _enter("{%x,%x,%Lx},", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version); + + *size = i_size_read((struct inode *) &vnode->vfs_inode); + +} /* end afs_vnode_cache_get_attr() */ +#endif + +/*****************************************************************************/ +/* + * provide new auxilliary cache data + */ +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct afs_vnode *vnode = cookie_netfs_data; + uint16_t dlen; + + _enter("{%x,%x,%Lx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version, + buffer, bufmax); + + dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.version); + if (dlen > bufmax) + return 0; + + memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique)); + buffer += sizeof(vnode->fid.unique); + memcpy(buffer, &vnode->status.version, sizeof(vnode->status.version)); + + _leave(" = %u", dlen); + return dlen; + +} /* end afs_vnode_cache_get_aux() */ +#endif + +/*****************************************************************************/ +/* + * check that the auxilliary data indicates that the entry is still valid + */ +#ifdef CONFIG_AFS_FSCACHE +static fscache_checkaux_t afs_vnode_cache_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + struct afs_vnode *vnode = cookie_netfs_data; + uint16_t dlen; + + _enter("{%x,%x,%Lx},%p,%u", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version, + buffer, buflen); + + /* check the size of the data is what we're expecting */ + dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.version); + if (dlen != buflen) { + _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen); + return FSCACHE_CHECKAUX_OBSOLETE; } - if (vnode->fid.unique != cvnode->vnode_unique || - vnode->status.version != cvnode->data_version) { - _leave(" = DELETE"); - return CACHEFS_MATCH_SUCCESS_DELETE; + if (memcmp(buffer, + &vnode->fid.unique, + sizeof(vnode->fid.unique) + ) != 0 + ) { + unsigned unique; + + memcpy(&unique, buffer, sizeof(unique)); + + _leave(" = OBSOLETE [uniq %x != %x]", + unique, vnode->fid.unique); + return FSCACHE_CHECKAUX_OBSOLETE; + } + + if (memcmp(buffer + sizeof(vnode->fid.unique), + &vnode->status.version, + sizeof(vnode->status.version) + ) != 0 + ) { + afs_dataversion_t version; + + memcpy(&version, buffer + sizeof(vnode->fid.unique), + sizeof(version)); + + _leave(" = OBSOLETE [vers %llx != %llx]", + version, vnode->status.version); + return FSCACHE_CHECKAUX_OBSOLETE; } _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; -} /* end afs_vnode_cache_match() */ + return FSCACHE_CHECKAUX_OKAY; + +} /* end afs_vnode_cache_check_aux() */ #endif /*****************************************************************************/ /* - * update a vnode record stored in the cache + * indication of pages that now have cache metadata retained + * - this function should mark the specified pages as now being cached */ -#ifdef AFS_CACHING_SUPPORT -static void afs_vnode_cache_update(void *source, void *entry) +#ifdef CONFIG_AFS_FSCACHE +static void afs_vnode_cache_mark_pages_cached(void *cookie_netfs_data, + struct address_space *mapping, + struct pagevec *cached_pvec) { - struct afs_cache_vnode *cvnode = entry; - struct afs_vnode *vnode = source; + unsigned long loop; - _enter(""); + for (loop = 0; loop < cached_pvec->nr; loop++) { + struct page *page = cached_pvec->pages[loop]; - cvnode->vnode_id = vnode->fid.vnode; - cvnode->vnode_unique = vnode->fid.unique; - cvnode->data_version = vnode->status.version; + _debug("- mark %p{%lx}", page, page->index); -} /* end afs_vnode_cache_update() */ + SetPagePrivate(page); + } + +} /* end afs_vnode_cache_mark_pages_cached() */ #endif + +/*****************************************************************************/ +/* + * indication the cookie is no longer uncached + * - this function is called when the backing store currently caching a cookie + * is removed + * - the netfs should use this to clean up any markers indicating cached pages + * - this is mandatory for any object that may have data + */ +static void afs_vnode_cache_now_uncached(void *cookie_netfs_data) +{ + struct afs_vnode *vnode = cookie_netfs_data; + struct pagevec pvec; + pgoff_t first; + int loop, nr_pages; + + _enter("{%x,%x,%Lx}", + vnode->fid.vnode, vnode->fid.unique, vnode->status.version); + + pagevec_init(&pvec, 0); + first = 0; + + for (;;) { + /* grab a bunch of pages to clean */ + nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping, + first, + PAGEVEC_SIZE - pagevec_count(&pvec)); + if (!nr_pages) + break; + + for (loop = 0; loop < nr_pages; loop++) + ClearPagePrivate(pvec.pages[loop]); + + first = pvec.pages[nr_pages - 1]->index + 1; + + pvec.nr = nr_pages; + pagevec_release(&pvec); + cond_resched(); + } + + _leave(""); + +} /* end afs_vnode_cache_now_uncached() */ diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h index b86a971..3f0602d 100644 --- a/fs/afs/vnode.h +++ b/fs/afs/vnode.h @@ -13,9 +13,9 @@ #ifndef _LINUX_AFS_VNODE_H #define _LINUX_AFS_VNODE_H #include +#include #include "server.h" #include "kafstimod.h" -#include "cache.h" #ifdef __KERNEL__ @@ -32,8 +32,8 @@ struct afs_cache_vnode afs_dataversion_t data_version; /* data version */ }; -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_vnode_cache_index_def; +#ifdef CONFIG_AFS_FSCACHE +extern struct fscache_cookie_def afs_vnode_cache_index_def; #endif /*****************************************************************************/ @@ -47,8 +47,8 @@ struct afs_vnode struct afs_volume *volume; /* volume on which vnode resides */ struct afs_fid fid; /* the file identifier for this inode */ struct afs_file_status status; /* AFS status info for this file */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif wait_queue_head_t update_waitq; /* status fetch waitqueue */ diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 0ff4b86..0bd5578 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -15,10 +15,10 @@ #include #include #include #include +#include #include "volume.h" #include "vnode.h" #include "cell.h" -#include "cache.h" #include "cmservice.h" #include "fsclient.h" #include "vlclient.h" @@ -28,18 +28,14 @@ #ifdef __KDEBUG static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; #endif -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_volume_cache_match(void *target, - const void *entry); -static void afs_volume_cache_update(void *source, void *entry); - -struct cachefs_index_def afs_volume_cache_index_def = { - .name = "volume", - .data_size = sizeof(struct afs_cache_vhash), - .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 }, - .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 }, - .match = afs_volume_cache_match, - .update = afs_volume_cache_update, +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t buflen); + +static struct fscache_cookie_def afs_volume_cache_index_def = { + .name = "AFS.volume", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = afs_volume_cache_get_key, }; #endif @@ -214,11 +210,10 @@ int afs_volume_lookup(const char *name, } /* attach the cache and volume location */ -#ifdef AFS_CACHING_SUPPORT - cachefs_acquire_cookie(vlocation->cache, - &afs_vnode_cache_index_def, - volume, - &volume->cache); +#ifdef CONFIG_AFS_FSCACHE + volume->cache = fscache_acquire_cookie(vlocation->cache, + &afs_volume_cache_index_def, + volume); #endif afs_get_vlocation(vlocation); @@ -286,8 +281,8 @@ void afs_put_volume(struct afs_volume *v up_write(&vlocation->cell->vl_sem); /* finish cleaning up the volume */ -#ifdef AFS_CACHING_SUPPORT - cachefs_relinquish_cookie(volume->cache, 0); +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(volume->cache, 0); #endif afs_put_vlocation(vlocation); @@ -481,40 +476,25 @@ int afs_volume_release_fileserver(struct /*****************************************************************************/ /* - * match a volume hash record stored in the cache + * set the key for the index entry */ -#ifdef AFS_CACHING_SUPPORT -static cachefs_match_val_t afs_volume_cache_match(void *target, - const void *entry) +#ifdef CONFIG_AFS_FSCACHE +static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) { - const struct afs_cache_vhash *vhash = entry; - struct afs_volume *volume = target; - - _enter("{%u},{%u}", volume->type, vhash->vtype); + const struct afs_volume *volume = cookie_netfs_data; + uint16_t klen; - if (volume->type == vhash->vtype) { - _leave(" = SUCCESS"); - return CACHEFS_MATCH_SUCCESS; - } - - _leave(" = FAILED"); - return CACHEFS_MATCH_FAILED; -} /* end afs_volume_cache_match() */ -#endif + _enter("{%u},%p,%u", volume->type, buffer, bufmax); -/*****************************************************************************/ -/* - * update a volume hash record stored in the cache - */ -#ifdef AFS_CACHING_SUPPORT -static void afs_volume_cache_update(void *source, void *entry) -{ - struct afs_cache_vhash *vhash = entry; - struct afs_volume *volume = source; + klen = sizeof(volume->type); + if (klen > bufmax) + return 0; - _enter(""); + memcpy(buffer, &volume->type, sizeof(volume->type)); - vhash->vtype = volume->type; + _leave(" = %u", klen); + return klen; -} /* end afs_volume_cache_update() */ +} /* end afs_volume_cache_get_key() */ #endif diff --git a/fs/afs/volume.h b/fs/afs/volume.h index bfdcf19..fc9895a 100644 --- a/fs/afs/volume.h +++ b/fs/afs/volume.h @@ -12,11 +12,11 @@ #ifndef _LINUX_AFS_VOLUME_H #define _LINUX_AFS_VOLUME_H +#include #include "types.h" #include "fsclient.h" #include "kafstimod.h" #include "kafsasyncd.h" -#include "cache.h" typedef enum { AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */ @@ -45,24 +45,6 @@ #define AFS_VOL_VTM_BAK 0x04 /* backup v time_t rtime; /* last retrieval time */ }; -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_vlocation_cache_index_def; -#endif - -/*****************************************************************************/ -/* - * volume -> vnode hash table entry - */ -struct afs_cache_vhash -{ - afs_voltype_t vtype; /* which volume variation */ - uint8_t hash_bucket; /* which hash bucket this represents */ -} __attribute__((packed)); - -#ifdef AFS_CACHING_SUPPORT -extern struct cachefs_index_def afs_volume_cache_index_def; -#endif - /*****************************************************************************/ /* * AFS volume location record @@ -73,8 +55,8 @@ struct afs_vlocation struct list_head link; /* link in cell volume location list */ struct afs_timer timeout; /* decaching timer */ struct afs_cell *cell; /* cell to which volume belongs */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ @@ -109,8 +91,8 @@ struct afs_volume atomic_t usage; struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ struct afs_vlocation *vlocation; /* volume location */ -#ifdef AFS_CACHING_SUPPORT - struct cachefs_cookie *cache; /* caching cookie */ +#ifdef CONFIG_AFS_FSCACHE + struct fscache_cookie *cache; /* caching cookie */ #endif afs_volid_t vid; /* volume ID */ afs_voltype_t type; /* type of volume */ diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index d6603d0..47e38f3 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -96,7 +96,6 @@ #define AUTOFS_TYPE_OFFSET 0x0004 struct autofs_sb_info { u32 magic; - struct dentry *root; int pipefd; struct file *pipe; pid_t oz_pgrp; @@ -231,4 +230,4 @@ out: } void autofs4_dentry_release(struct dentry *); - +extern void autofs4_kill_sb(struct super_block *); diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index 5d91933..723a1c5 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -24,7 +24,7 @@ static struct file_system_type autofs_fs .owner = THIS_MODULE, .name = "autofs", .get_sb = autofs_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = autofs4_kill_sb, }; static int __init init_autofs4_fs(void) diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index fde78b1..1bf68c5 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -95,7 +95,7 @@ void autofs4_free_ino(struct autofs_info */ static void autofs4_force_release(struct autofs_sb_info *sbi) { - struct dentry *this_parent = sbi->root; + struct dentry *this_parent = sbi->sb->s_root; struct list_head *next; spin_lock(&dcache_lock); @@ -126,7 +126,7 @@ resume: spin_lock(&dcache_lock); } - if (this_parent != sbi->root) { + if (this_parent != sbi->sb->s_root) { struct dentry *dentry = this_parent; next = this_parent->d_u.d_child.next; @@ -139,15 +139,9 @@ resume: goto resume; } spin_unlock(&dcache_lock); - - dput(sbi->root); - sbi->root = NULL; - shrink_dcache_sb(sbi->sb); - - return; } -static void autofs4_put_super(struct super_block *sb) +void autofs4_kill_sb(struct super_block *sb) { struct autofs_sb_info *sbi = autofs4_sbi(sb); @@ -162,6 +156,7 @@ static void autofs4_put_super(struct sup kfree(sbi); DPRINTK("shutting down"); + kill_anon_super(sb); } static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) @@ -188,7 +183,6 @@ static int autofs4_show_options(struct s } static struct super_operations autofs4_sops = { - .put_super = autofs4_put_super, .statfs = simple_statfs, .show_options = autofs4_show_options, }; @@ -314,7 +308,6 @@ int autofs4_fill_super(struct super_bloc s->s_fs_info = sbi; sbi->magic = AUTOFS_SBI_MAGIC; - sbi->root = NULL; sbi->pipefd = -1; sbi->catatonic = 0; sbi->exp_timeout = 0; @@ -396,13 +389,6 @@ int autofs4_fill_super(struct super_bloc sbi->pipefd = pipefd; /* - * Take a reference to the root dentry so we get a chance to - * clean up the dentry tree on umount. - * See autofs4_force_release. - */ - sbi->root = dget(root); - - /* * Success! Install the root dentry now to indicate completion. */ s->s_root = root; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index ce103e7..c0a6c8d 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -45,7 +45,6 @@ void autofs4_catatonic_mode(struct autof fput(sbi->pipe); /* Close the pipe */ sbi->pipe = NULL; } - shrink_dcache_sb(sbi->sb); } static int autofs4_write(struct file *file, const void *addr, int bytes) diff --git a/fs/buffer.c b/fs/buffer.c index 3660dcb..31a01da 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -184,6 +184,8 @@ int fsync_super(struct super_block *sb) return sync_blockdev(sb->s_bdev); } +EXPORT_SYMBOL(fsync_super); + /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile new file mode 100644 index 0000000..c1522d9 --- /dev/null +++ b/fs/cachefiles/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for caching in a mounted filesystem +# + +cachefiles-objs := \ + cf-bind.o \ + cf-interface.o \ + cf-key.o \ + cf-main.o \ + cf-namei.o \ + cf-proc.o \ + cf-xattr.o + +ifeq ($(CONFIG_SYSCTL),y) +cachefiles-objs += cf-sysctl.o +endif + +obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/fs/cachefiles/cf-bind.c b/fs/cachefiles/cf-bind.c new file mode 100644 index 0000000..0c14c37 --- /dev/null +++ b/fs/cachefiles/cf-bind.c @@ -0,0 +1,283 @@ +/* cf-bind.c: bind and unbind a cache from the filesystem backing it + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static int cachefiles_proc_add_cache(struct cachefiles_cache *cache, + struct vfsmount *mnt); + +/*****************************************************************************/ +/* + * bind a directory as a cache + */ +int cachefiles_proc_bind(struct cachefiles_cache *cache, char *args) +{ + _enter("{%u,%u,%u},%s", + cache->brun_percent, + cache->bcull_percent, + cache->bstop_percent, + args); + + /* start by checking things over */ + ASSERT(cache->bstop_percent >= 0 && + cache->bstop_percent < cache->bcull_percent && + cache->bcull_percent < cache->brun_percent && + cache->brun_percent < 100); + + if (*args) { + kerror("'bind' command doesn't take an argument"); + return -EINVAL; + } + + if (!cache->rootdirname) { + kerror("No cache directory specified"); + return -EINVAL; + } + + /* don't permit already bound caches to be re-bound */ + if (test_bit(CACHEFILES_READY, &cache->flags)) { + kerror("Cache already bound"); + return -EBUSY; + } + + /* make sure we have copies of the tag and dirname strings */ + if (!cache->tag) { + /* the tag string is released by the fops->release() + * function, so we don't release it on error here */ + cache->tag = kstrdup("CacheFiles", GFP_KERNEL); + if (!cache->tag) + return -ENOMEM; + } + + /* add the cache */ + return cachefiles_proc_add_cache(cache, NULL); + +} /* end cachefiles_proc_bind() */ + +/*****************************************************************************/ +/* + * add a cache + */ +static int cachefiles_proc_add_cache(struct cachefiles_cache *cache, + struct vfsmount *mnt) +{ + struct cachefiles_object *fsdef; + struct nameidata nd; + struct kstatfs stats; + struct dentry *graveyard, *cachedir, *root; + int ret; + + _enter(""); + + /* allocate the root index object */ + ret = -ENOMEM; + + fsdef = kmem_cache_alloc(cachefiles_object_jar, SLAB_KERNEL); + if (!fsdef) + goto error_root_object; + + atomic_set(&fsdef->usage, 1); + atomic_set(&fsdef->fscache_usage, 1); + fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; + + _debug("- fsdef %p", fsdef); + + /* look up the directory at the root of the cache */ + memset(&nd, 0, sizeof(nd)); + + ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd); + if (ret < 0) + goto error_open_root; + + /* bind to the special mountpoint we've prepared */ + if (mnt) { + atomic_inc(&nd.mnt->mnt_sb->s_active); + mnt->mnt_sb = nd.mnt->mnt_sb; + mnt->mnt_flags = nd.mnt->mnt_flags; + mnt->mnt_flags |= MNT_NOSUID | MNT_NOEXEC | MNT_NODEV; + mnt->mnt_root = dget(nd.dentry); + mnt->mnt_mountpoint = mnt->mnt_root; + + /* copy the name, but ignore kstrdup() failing ENOMEM - we'll + * just end up with an devicenameless mountpoint */ + mnt->mnt_devname = kstrdup(nd.mnt->mnt_devname, GFP_KERNEL); + path_release(&nd); + + cache->mnt = mntget(mnt); + root = dget(mnt->mnt_root); + } + else { + cache->mnt = nd.mnt; + root = nd.dentry; + + nd.mnt = NULL; + nd.dentry = NULL; + path_release(&nd); + } + + /* check parameters */ + ret = -EOPNOTSUPP; + if (!root->d_inode || + !root->d_inode->i_op || + !root->d_inode->i_op->lookup || + !root->d_inode->i_op->mkdir || + !root->d_inode->i_op->setxattr || + !root->d_inode->i_op->getxattr || + !root->d_sb || + !root->d_sb->s_op || + !root->d_sb->s_op->statfs || + !root->d_sb->s_op->sync_fs) + goto error_unsupported; + + ret = -EROFS; + if (root->d_sb->s_flags & MS_RDONLY) + goto error_unsupported; + + /* get the cache size and blocksize */ + ret = root->d_sb->s_op->statfs(root, &stats); + if (ret < 0) + goto error_unsupported; + + ret = -ERANGE; + if (stats.f_bsize <= 0) + goto error_unsupported; + + ret = -EOPNOTSUPP; + if (stats.f_bsize > PAGE_SIZE) + goto error_unsupported; + + cache->bsize = stats.f_bsize; + cache->bshift = 0; + if (stats.f_bsize < PAGE_SIZE) + cache->bshift = PAGE_SHIFT - long_log2(stats.f_bsize); + + _debug("blksize %u (shift %u)", + cache->bsize, cache->bshift); + + _debug("size %llu, avail %llu", stats.f_blocks, stats.f_bavail); + + /* set up caching limits */ + stats.f_blocks >>= cache->bshift; + do_div(stats.f_blocks, 100); + cache->bstop = stats.f_blocks * cache->bstop_percent; + cache->bcull = stats.f_blocks * cache->bcull_percent; + cache->brun = stats.f_blocks * cache->brun_percent; + + _debug("limits {%llu,%llu,%llu}", + cache->brun, + cache->bcull, + cache->bstop); + + /* get the cache directory and check its type */ + cached