diff options
Diffstat (limited to 'fs')
262 files changed, 6275 insertions, 6364 deletions
diff --git a/fs/9p/Makefile b/fs/9p/Makefile index 1a940ec7af6..91fba025fcb 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -8,6 +8,8 @@ obj-$(CONFIG_9P_FS) := 9p.o vfs_dir.o \ vfs_dentry.o \ v9fs.o \ - fid.o + fid.o \ + xattr.o \ + xattr_user.o 9p-$(CONFIG_9P_FSCACHE) += cache.o diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 7317b39b281..35856368906 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -97,6 +97,34 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any) return ret; } +/* + * We need to hold v9ses->rename_sem as long as we hold references + * to returned path array. Array element contain pointers to + * dentry names. + */ +static int build_path_from_dentry(struct v9fs_session_info *v9ses, + struct dentry *dentry, char ***names) +{ + int n = 0, i; + char **wnames; + struct dentry *ds; + + for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent) + n++; + + wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL); + if (!wnames) + goto err_out; + + for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent) + wnames[i] = (char *)ds->d_name.name; + + *names = wnames; + return n; +err_out: + return -ENOMEM; +} + /** * v9fs_fid_lookup - lookup for a fid, try to walk if not found * @dentry: dentry to look for fid in @@ -112,7 +140,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) int i, n, l, clone, any, access; u32 uid; struct p9_fid *fid, *old_fid = NULL; - struct dentry *d, *ds; + struct dentry *ds; struct v9fs_session_info *v9ses; char **wnames, *uname; @@ -139,49 +167,62 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) fid = v9fs_fid_find(dentry, uid, any); if (fid) return fid; - + /* + * we don't have a matching fid. To do a TWALK we need + * parent fid. We need to prevent rename when we want to + * look at the parent. + */ + down_read(&v9ses->rename_sem); ds = dentry->d_parent; fid = v9fs_fid_find(ds, uid, any); - if (!fid) { /* walk from the root */ - n = 0; - for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent) - n++; + if (fid) { + /* Found the parent fid do a lookup with that */ + fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1); + goto fid_out; + } + up_read(&v9ses->rename_sem); - fid = v9fs_fid_find(ds, uid, any); - if (!fid) { /* the user is not attached to the fs yet */ - if (access == V9FS_ACCESS_SINGLE) - return ERR_PTR(-EPERM); + /* start from the root and try to do a lookup */ + fid = v9fs_fid_find(dentry->d_sb->s_root, uid, any); + if (!fid) { + /* the user is not attached to the fs yet */ + if (access == V9FS_ACCESS_SINGLE) + return ERR_PTR(-EPERM); - if (v9fs_proto_dotu(v9ses)) + if (v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) uname = NULL; - else - uname = v9ses->uname; + else + uname = v9ses->uname; - fid = p9_client_attach(v9ses->clnt, NULL, uname, uid, - v9ses->aname); - - if (IS_ERR(fid)) - return fid; - - v9fs_fid_add(ds, fid); - } - } else /* walk from the parent */ - n = 1; + fid = p9_client_attach(v9ses->clnt, NULL, uname, uid, + v9ses->aname); + if (IS_ERR(fid)) + return fid; - if (ds == dentry) + v9fs_fid_add(dentry->d_sb->s_root, fid); + } + /* If we are root ourself just return that */ + if (dentry->d_sb->s_root == dentry) return fid; - - wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL); - if (!wnames) - return ERR_PTR(-ENOMEM); - - for (d = dentry, i = (n-1); i >= 0; i--, d = d->d_parent) - wnames[i] = (char *) d->d_name.name; - + /* + * Do a multipath walk with attached root. + * When walking parent we need to make sure we + * don't have a parallel rename happening + */ + down_read(&v9ses->rename_sem); + n = build_path_from_dentry(v9ses, dentry, &wnames); + if (n < 0) { + fid = ERR_PTR(n); + goto err_out; + } clone = 1; i = 0; while (i < n) { l = min(n - i, P9_MAXWELEM); + /* + * We need to hold rename lock when doing a multipath + * walk to ensure none of the patch component change + */ fid = p9_client_walk(fid, l, &wnames[i], clone); if (IS_ERR(fid)) { if (old_fid) { @@ -193,15 +234,17 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) p9_client_clunk(old_fid); } kfree(wnames); - return fid; + goto err_out; } old_fid = fid; i += l; clone = 0; } - kfree(wnames); +fid_out: v9fs_fid_add(dentry, fid); +err_out: + up_read(&v9ses->rename_sem); return fid; } diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index f8b86e92cd6..38dc0e06759 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -237,6 +237,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, __putname(v9ses->uname); return ERR_PTR(-ENOMEM); } + init_rwsem(&v9ses->rename_sem); rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); if (rc) { @@ -278,7 +279,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; /* for legacy mode, fall back to V9FS_ACCESS_ANY */ - if (!v9fs_proto_dotu(v9ses) && + if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) && ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) { v9ses->flags &= ~V9FS_ACCESS_MASK; diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index bec4d0bcb45..4c963c9fc41 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -104,6 +104,7 @@ struct v9fs_session_info { struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ struct backing_dev_info bdi; + struct rw_semaphore rename_sem; }; struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 32ef4009d03..f47c6bbb01b 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -55,6 +55,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode); void v9fs_clear_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); +void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index d61e3b28ce3..16c8a2a98c1 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -87,29 +87,19 @@ static void p9stat_init(struct p9_wstat *stbuf) } /** - * v9fs_dir_readdir - read a directory + * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir * @filp: opened file structure - * @dirent: directory structure ??? - * @filldir: function to populate directory structure ??? + * @buflen: Length in bytes of buffer to allocate * */ -static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) +static int v9fs_alloc_rdir_buf(struct file *filp, int buflen) { - int over; - struct p9_wstat st; - int err = 0; - struct p9_fid *fid; - int buflen; - int reclen = 0; struct p9_rdir *rdir; + struct p9_fid *fid; + int err = 0; - P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); fid = filp->private_data; - - buflen = fid->clnt->msize - P9_IOHDRSZ; - - /* allocate rdir on demand */ if (!fid->rdir) { rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); @@ -128,6 +118,36 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) spin_unlock(&filp->f_dentry->d_lock); kfree(rdir); } +exit: + return err; +} + +/** + * v9fs_dir_readdir - read a directory + * @filp: opened file structure + * @dirent: directory structure ??? + * @filldir: function to populate directory structure ??? + * + */ + +static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + int over; + struct p9_wstat st; + int err = 0; + struct p9_fid *fid; + int buflen; + int reclen = 0; + struct p9_rdir *rdir; + + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); + fid = filp->private_data; + + buflen = fid->clnt->msize - P9_IOHDRSZ; + + err = v9fs_alloc_rdir_buf(filp, buflen); + if (err) + goto exit; rdir = (struct p9_rdir *) fid->rdir; err = mutex_lock_interruptible(&rdir->mutex); @@ -146,7 +166,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) while (rdir->head < rdir->tail) { p9stat_init(&st); err = p9stat_read(rdir->buf + rdir->head, - buflen - rdir->head, &st, + rdir->tail - rdir->head, &st, fid->clnt->proto_version); if (err) { P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); @@ -176,6 +196,88 @@ exit: return err; } +/** + * v9fs_dir_readdir_dotl - read a directory + * @filp: opened file structure + * @dirent: buffer to fill dirent structures + * @filldir: function to populate dirent structures + * + */ +static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, + filldir_t filldir) +{ + int over; + int err = 0; + struct p9_fid *fid; + int buflen; + struct p9_rdir *rdir; + struct p9_dirent curdirent; + u64 oldoffset = 0; + + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); + fid = filp->private_data; + + buflen = fid->clnt->msize - P9_READDIRHDRSZ; + + err = v9fs_alloc_rdir_buf(filp, buflen); + if (err) + goto exit; + rdir = (struct p9_rdir *) fid->rdir; + + err = mutex_lock_interruptible(&rdir->mutex); + if (err) + return err; + + while (err == 0) { + if (rdir->tail == rdir->head) { + err = p9_client_readdir(fid, rdir->buf, buflen, + filp->f_pos); + if (err <= 0) + goto unlock_and_exit; + + rdir->head = 0; + rdir->tail = err; + } + + while (rdir->head < rdir->tail) { + + err = p9dirent_read(rdir->buf + rdir->head, + buflen - rdir->head, &curdirent, + fid->clnt->proto_version); + if (err < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); + err = -EIO; + goto unlock_and_exit; + } + + /* d_off in dirent structure tracks the offset into + * the next dirent in the dir. However, filldir() + * expects offset into the current dirent. Hence + * while calling filldir send the offset from the + * previous dirent structure. + */ + over = filldir(dirent, curdirent.d_name, + strlen(curdirent.d_name), + oldoffset, v9fs_qid2ino(&curdirent.qid), + curdirent.d_type); + oldoffset = curdirent.d_off; + + if (over) { + err = 0; + goto unlock_and_exit; + } + + filp->f_pos = curdirent.d_off; + rdir->head += err; + } + } + +unlock_and_exit: + mutex_unlock(&rdir->mutex); +exit: + return err; +} + /** * v9fs_dir_release - close a directory @@ -207,7 +309,7 @@ const struct file_operations v9fs_dir_operations = { const struct file_operations v9fs_dir_operations_dotl = { .read = generic_read_dir, .llseek = generic_file_llseek, - .readdir = v9fs_dir_readdir, + .readdir = v9fs_dir_readdir_dotl, .open = v9fs_file_open, .release = v9fs_dir_release, }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 2bedc6c94fc..e97c92bd6f1 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -59,9 +59,13 @@ int v9fs_file_open(struct inode *inode, struct file *file) struct p9_fid *fid; int omode; - P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); + P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); v9ses = v9fs_inode2v9ses(inode); - omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses)); + if (v9fs_proto_dotl(v9ses)) + omode = file->f_flags; + else + omode = v9fs_uflags2omode(file->f_flags, + v9fs_proto_dotu(v9ses)); fid = file->private_data; if (!fid) { fid = v9fs_fid_clone(file->f_path.dentry); @@ -73,11 +77,12 @@ int v9fs_file_open(struct inode *inode, struct file *file) p9_client_clunk(fid); return err; } - if (omode & P9_OTRUNC) { + if (file->f_flags & O_TRUNC) { i_size_write(inode, 0); inode->i_blocks = 0; } - if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses))) + if ((file->f_flags & O_APPEND) && + (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))) generic_file_llseek(file, 0, SEEK_END); } @@ -139,7 +144,7 @@ ssize_t v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, u64 offset) { - int n, total; + int n, total, size; struct p9_fid *fid = filp->private_data; P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, @@ -147,6 +152,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, n = 0; total = 0; + size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; do { n = p9_client_read(fid, data, udata, offset, count); if (n <= 0) @@ -160,7 +166,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, offset += n; count -= n; total += n; - } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ)); + } while (count > 0 && n == size); if (n < 0) total = n; @@ -183,11 +189,13 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count, { int ret; struct p9_fid *fid; + size_t size; P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); fid = filp->private_data; - if (count > (fid->clnt->msize - P9_IOHDRSZ)) + size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; + if (count > size) ret = v9fs_file_readn(filp, NULL, udata, count, *offset); else ret = p9_client_read(fid, NULL, udata, *offset, count); @@ -224,9 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data, fid = filp->private_data; clnt = fid->clnt; - rsize = fid->iounit; - if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) - rsize = clnt->msize - P9_IOHDRSZ; + rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ; do { if (count < rsize) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4331b3b5ee1..6e94f3247ce 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -35,6 +35,7 @@ #include <linux/idr.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/xattr.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -42,6 +43,7 @@ #include "v9fs_vfs.h" #include "fid.h" #include "cache.h" +#include "xattr.h" static const struct inode_operations v9fs_dir_inode_operations; static const struct inode_operations v9fs_dir_inode_operations_dotu; @@ -236,6 +238,41 @@ void v9fs_destroy_inode(struct inode *inode) #endif /** + * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a + * new file system object. This checks the S_ISGID to determine the owning + * group of the new file system object. + */ + +static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) +{ + BUG_ON(dir_inode == NULL); + + if (dir_inode->i_mode & S_ISGID) { + /* set_gid bit is set.*/ + return dir_inode->i_gid; + } + return current_fsgid(); +} + +/** + * v9fs_dentry_from_dir_inode - helper function to get the dentry from + * dir inode. + * + */ + +static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) +{ + struct dentry *dentry; + + spin_lock(&dcache_lock); + /* Directory should have only one entry. */ + BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); + dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); + spin_unlock(&dcache_lock); + return dentry; +} + +/** * v9fs_get_inode - helper function to setup an inode * @sb: superblock * @mode: mode to setup inode with @@ -267,7 +304,13 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) case S_IFBLK: case S_IFCHR: case S_IFSOCK: - if (!v9fs_proto_dotu(v9ses)) { + if (v9fs_proto_dotl(v9ses)) { + inode->i_op = &v9fs_file_inode_operations_dotl; + inode->i_fop = &v9fs_file_operations_dotl; + } else if (v9fs_proto_dotu(v9ses)) { + inode->i_op = &v9fs_file_inode_operations; + inode->i_fop = &v9fs_file_operations; + } else { P9_DPRINTK(P9_DEBUG_ERROR, "special files without extended mode\n"); err = -EINVAL; @@ -396,23 +439,14 @@ void v9fs_clear_inode(struct inode *inode) #endif } -/** - * v9fs_inode_from_fid - populate an inode by issuing a attribute request - * @v9ses: session information - * @fid: fid to issue attribute request for - * @sb: superblock on which to create inode - * - */ - static struct inode * -v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, +v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { int err, umode; - struct inode *ret; + struct inode *ret = NULL; struct p9_wstat *st; - ret = NULL; st = p9_client_stat(fid); if (IS_ERR(st)) return ERR_CAST(st); @@ -433,15 +467,62 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, #endif p9stat_free(st); kfree(st); - return ret; - error: p9stat_free(st); kfree(st); return ERR_PTR(err); } +static struct inode * +v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, + struct super_block *sb) +{ + struct inode *ret = NULL; + int err; + struct p9_stat_dotl *st; + + st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); + if (IS_ERR(st)) + return ERR_CAST(st); + + ret = v9fs_get_inode(sb, st->st_mode); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + goto error; + } + + v9fs_stat2inode_dotl(st, ret); + ret->i_ino = v9fs_qid2ino(&st->qid); +#ifdef CONFIG_9P_FSCACHE + v9fs_vcookie_set_qid(ret, &st->qid); + v9fs_cache_inode_get_cookie(ret); +#endif + kfree(st); + return ret; +error: + kfree(st); + return ERR_PTR(err); +} + +/** + * v9fs_inode_from_fid - Helper routine to populate an inode by + * issuing a attribute request + * @v9ses: session information + * @fid: fid to issue attribute request for + * @sb: superblock on which to create inode + * + */ +static inline struct inode * +v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, + struct super_block *sb) +{ + if (v9fs_proto_dotl(v9ses)) + return v9fs_inode_dotl(v9ses, fid, sb); + else + return v9fs_inode(v9ses, fid, sb); +} + /** * v9fs_remove - helper function to remove files and directories * @dir: directory inode that is being deleted @@ -563,6 +644,118 @@ error: } /** + * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. + * @dir: directory inode that is being created + * @dentry: dentry that is being deleted + * @mode: create permissions + * @nd: path information + * + */ + +static int +v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + int err = 0; + char *name = NULL; + gid_t gid; + int flags; + struct v9fs_session_info *v9ses; + struct p9_fid *fid = NULL; + struct p9_fid *dfid, *ofid; + struct file *filp; + struct p9_qid qid; + struct inode *inode; + + v9ses = v9fs_inode2v9ses(dir); + if (nd && nd->flags & LOOKUP_OPEN) + flags = nd->intent.open.flags - 1; + else + flags = O_RDWR; + + name = (char *) dentry->d_name.name; + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x " + "mode:0x%x\n", name, flags, mode); + + dfid = v9fs_fid_lookup(dentry->d_parent); + if (IS_ERR(dfid)) { + err = PTR_ERR(dfid); + P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + return err; + } + + /* clone a fid to use for creation */ + ofid = p9_client_walk(dfid, 0, NULL, 1); + if (IS_ERR(ofid)) { + err = PTR_ERR(ofid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + return err; + } + + gid = v9fs_get_fsgid_for_create(dir); + err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); + if (err < 0) { + P9_DPRINTK(P9_DEBUG_VFS, + "p9_client_open_dotl failed in creat %d\n", + err); + goto error; + } + + /* No need to populate the inode if we are not opening the file AND + * not in cached mode. + */ + if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) { + /* Not in cached mode. No need to populate inode with stat */ + dentry->d_op = &v9fs_dentry_operations; + p9_client_clunk(ofid); + d_instantiate(dentry, NULL); + return 0; + } + + /* Now walk from the parent so we can get an unopened fid. */ + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + fid = NULL; + goto error; + } + + /* instantiate inode and assign the unopened fid to dentry */ + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); + goto error; + } + dentry->d_op = &v9fs_cached_dentry_operations; + d_instantiate(dentry, inode); + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + + /* if we are opening a file, assign the open fid to the file */ + if (nd && nd->flags & LOOKUP_OPEN) { + filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); + if (IS_ERR(filp)) { + p9_client_clunk(ofid); + return PTR_ERR(filp); + } + filp->private_data = ofid; + } else + p9_client_clunk(ofid); + + return 0; + +error: + if (ofid) + p9_client_clunk(ofid); + if (fid) + p9_client_clunk(fid); + return err; +} + +/** * v9fs_vfs_create - VFS hook to create files * @dir: directory inode that is being created * @dentry: dentry that is being deleted @@ -652,6 +845,83 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) return err; } + +/** + * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory + * @dir: inode that is being unlinked + * @dentry: dentry that is being unlinked + * @mode: mode for new directory + * + */ + +static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry, + int mode) +{ + int err; + struct v9fs_session_info *v9ses; + struct p9_fid *fid = NULL, *dfid = NULL; + gid_t gid; + char *name; + struct inode *inode; + struct p9_qid qid; + struct dentry *dir_dentry; + + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + err = 0; + v9ses = v9fs_inode2v9ses(dir); + + mode |= S_IFDIR; + dir_dentry = v9fs_dentry_from_dir_inode(dir); + dfid = v9fs_fid_lookup(dir_dentry); + if (IS_ERR(dfid)) { + err = PTR_ERR(dfid); + P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + dfid = NULL; + goto error; + } + + gid = v9fs_get_fsgid_for_create(dir); + if (gid < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n"); + goto error; + } + + name = (char *) dentry->d_name.name; + err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); + if (err < 0) + goto error; + + /* instantiate inode and assign the unopened fid to the dentry */ + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); + fid = NULL; + goto error; + } + + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", + err); + goto error; + } + dentry->d_op = &v9fs_cached_dentry_operations; + d_instantiate(dentry, inode); + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + fid = NULL; + } +error: + if (fid) + p9_client_clunk(fid); + return err; +} + /** * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode * @dir: inode that is being walked from @@ -678,6 +948,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; v9ses = v9fs_inode2v9ses(dir); + /* We can walk d_parent because we hold the dir->i_mutex */ dfid = v9fs_fid_lookup(dentry->d_parent); if (IS_ERR(dfid)) return ERR_CAST(dfid); @@ -785,27 +1056,33 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto clunk_olddir; } + down_write(&v9ses->rename_sem); if (v9fs_proto_dotl(v9ses)) { retval = p9_client_rename(oldfid, newdirfid, (char *) new_dentry->d_name.name); if (retval != -ENOSYS) goto clunk_newdir; } + if (old_dentry->d_parent != new_dentry->d_parent) { + /* + * 9P .u can only handle file rename in the same directory + */ - /* 9P can only handle file rename in the same directory */ - if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { P9_DPRINTK(P9_DEBUG_ERROR, "old dir and new dir are different\n"); retval = -EXDEV; goto clunk_newdir; } - v9fs_blank_wstat(&wstat); wstat.muid = v9ses->uname; wstat.name = (char *) new_dentry->d_name.name; retval = p9_client_wstat(oldfid, &wstat); clunk_newdir: + if (!retval) + /* successful rename */ + d_move(old_dentry, new_dentry); + up_write(&v9ses->rename_sem); p9_client_clunk(newdirfid); clunk_olddir: @@ -853,6 +1130,42 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } +static int +v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + int err; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_stat_dotl *st; + + P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); + err = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) + return simple_getattr(mnt, dentry, stat); + + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + /* Ask for all the fields in stat structure. Server will return + * whatever it supports + */ + + st = p9_client_getattr_dotl(fid, P9_STATS_ALL); + if (IS_ERR(st)) + return PTR_ERR(st); + + v9fs_stat2inode_dotl(st, dentry->d_inode); + generic_fillattr(dentry->d_inode, stat); + /* Change block size to what the server returned */ + stat->blksize = st->st_blksize; + + kfree(st); + return 0; +} + /** * v9fs_vfs_setattr - set file metadata * @dentry: file whose metadata to set @@ -903,6 +1216,49 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) } /** + * v9fs_vfs_setattr_dotl - set file metadata + * @dentry: file whose metadata to set + * @iattr: metadata assignment structure + * + */ + +static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) +{ + int retval; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_iattr_dotl p9attr; + + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + + retval = inode_change_ok(dentry->d_inode, iattr); + if (retval) + return retval; + + p9attr.valid = iattr->ia_valid; + p9attr.mode = iattr->ia_mode; + p9attr.uid = iattr->ia_uid; + p9attr.gid = iattr->ia_gid; + p9attr.size = iattr->ia_size; + p9attr.atime_sec = iattr->ia_atime.tv_sec; + p9attr.atime_nsec = iattr->ia_atime.tv_nsec; + p9attr.mtime_sec = iattr->ia_mtime.tv_sec; + p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; + + retval = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + retval = p9_client_setattr(fid, &p9attr); + if (retval >= 0) + retval = inode_setattr(dentry->d_inode, iattr); + + return retval; +} + +/** * v9fs_stat2inode - populate an inode structure with mistat info * @stat: Plan 9 metadata (mistat) structure * @inode: inode to populate @@ -980,6 +1336,77 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, } /** + * v9fs_stat2inode_dotl - populate an inode structure with stat info + * @stat: stat structure + * @inode: inode to populate + * @sb: superblock of filesystem + * + */ + +void +v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) +{ + + if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { + inode->i_atime.tv_sec = stat->st_atime_sec; + inode->i_atime.tv_nsec = stat->st_atime_nsec; + inode->i_mtime.tv_sec = stat->st_mtime_sec; + inode->i_mtime.tv_nsec = stat->st_mtime_nsec; + inode->i_ctime.tv_sec = stat->st_ctime_sec; + inode->i_ctime.tv_nsec = stat->st_ctime_nsec; + inode->i_uid = stat->st_uid; + inode->i_gid = stat->st_gid; + inode->i_nlink = stat->st_nlink; + inode->i_mode = stat->st_mode; + inode->i_rdev = new_decode_dev(stat->st_rdev); + + if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) + init_special_inode(inode, inode->i_mode, inode->i_rdev); + + i_size_write(inode, stat->st_size); + inode->i_blocks = stat->st_blocks; + } else { + if (stat->st_result_mask & P9_STATS_ATIME) { + inode->i_atime.tv_sec = stat->st_atime_sec; + inode->i_atime.tv_nsec = stat->st_atime_nsec; + } + if (stat->st_result_mask & P9_STATS_MTIME) { + inode->i_mtime.tv_sec = stat->st_mtime_sec; + inode->i_mtime.tv_nsec = stat->st_mtime_nsec; + } + if (stat->st_result_mask & P9_STATS_CTIME) { + inode->i_ctime.tv_sec = stat->st_ctime_sec; + inode->i_ctime.tv_nsec = stat->st_ctime_nsec; + } + if (stat->st_result_mask & P9_STATS_UID) + inode->i_uid = stat->st_uid; + if (stat->st_result_mask & P9_STATS_GID) + inode->i_gid = stat->st_gid; + if (stat->st_result_mask & P9_STATS_NLINK) + inode->i_nlink = stat->st_nlink; + if (stat->st_result_mask & P9_STATS_MODE) { + inode->i_mode = stat->st_mode; + if ((S_ISBLK(inode->i_mode)) || + (S_ISCHR(inode->i_mode))) + init_special_inode(inode, inode->i_mode, + inode->i_rdev); + } + if (stat->st_result_mask & P9_STATS_RDEV) + inode->i_rdev = new_decode_dev(stat->st_rdev); + if (stat->st_result_mask & P9_STATS_SIZE) + i_size_write(inode, stat->st_size); + if (stat->st_result_mask & P9_STATS_BLOCKS) + inode->i_blocks = stat->st_blocks; + } + if (stat->st_result_mask & P9_STATS_GEN) + inode->i_generation = stat->st_gen; + + /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION + * because the inode structure does not have fields for them. + */ +} + +/** * v9fs_qid2ino - convert qid into inode number * @qid: qid to hash * @@ -1022,7 +1449,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) if (IS_ERR(fid)) return PTR_ERR(fid); - if (!v9fs_proto_dotu(v9ses)) + if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) return -EBADF; st = p9_client_stat(fid); @@ -1128,6 +1555,99 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, } /** + * v9fs_vfs_symlink_dotl - helper function to create symlinks + * @dir: directory inode containing symlink + * @dentry: dentry for symlink + * @symname: symlink data + * + * See Also: 9P2000.L RFC for more information + * + */ + +static int +v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct v9fs_session_info *v9ses; + struct p9_fid *dfid; + struct p9_fid *fid = NULL; + struct inode *inode; + struct p9_qid qid; + char *name; + int err; + gid_t gid; + + name = (char *) dentry->d_name.name; + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", + dir->i_ino, name, symname); + v9ses = v9fs_inode2v9ses(dir); + + dfid = v9fs_fid_lookup(dentry->d_parent); + if (IS_ERR(dfid)) { + err = PTR_ERR(dfid); + P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + return err; + } + + gid = v9fs_get_fsgid_for_create(dir); + + if (gid < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid); + goto error; + } + + /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */ + err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid); + + if (err < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); + goto error; + } + + if (v9ses->cache) { + /* Now walk from the parent so we can get an unopened fid. */ + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); + fid = NULL; + goto error; + } + + /* instantiate inode and assign the unopened fid to dentry */ + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", + err); + goto error; + } + dentry->d_op = &v9fs_cached_dentry_operations; + d_instantiate(dentry, inode); + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + fid = NULL; + } else { + /* Not in cached mode. No need to populate inode with stat */ + inode = v9fs_get_inode(dir->i_sb, S_IFLNK); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto error; + } + dentry->d_op = &v9fs_dentry_operations; + d_instantiate(dentry, inode); + } + +error: + if (fid) + p9_client_clunk(fid); + + return err; +} + +/** * v9fs_vfs_symlink - helper function to create symlinks * @dir: directory inode containing symlink * @dentry: dentry for symlink @@ -1186,6 +1706,76 @@ clunk_fid: } /** + * v9fs_vfs_link_dotl - create a hardlink for dotl + * @old_dentry: dentry for file to link to + * @dir: inode destination for new link + * @dentry: dentry for link + * + */ + +static int +v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + int err; + struct p9_fid *dfid, *oldfid; + char *name; + struct v9fs_session_info *v9ses; + struct dentry *dir_dentry; + + P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", + dir->i_ino, old_dentry->d_name.name, + dentry->d_name.name); + + v9ses = v9fs_inode2v9ses(dir); + dir_dentry = v9fs_dentry_from_dir_inode(dir); + dfid = v9fs_fid_lookup(dir_dentry); + if (IS_ERR(dfid)) + return PTR_ERR(dfid); + + oldfid = v9fs_fid_lookup(old_dentry); + if (IS_ERR(oldfid)) + return PTR_ERR(oldfid); + + name = (char *) dentry->d_name.name; + + err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name); + + if (err < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); + return err; + } + + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { + /* Get the latest stat info from server. */ + struct p9_fid *fid; + struct p9_stat_dotl *st; + + fid = v9fs_fid_lookup(old_dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); + if (IS_ERR(st)) + return PTR_ERR(st); + + v9fs_stat2inode_dotl(st, old_dentry->d_inode); + + kfree(st); + } else { + /* Caching disabled. No need to get upto date stat info. + * This dentry will be released immediately. So, just i_count++ + */ + atomic_inc(&old_dentry->d_inode->i_count); + } + + dentry->d_op = old_dentry->d_op; + d_instantiate(dentry, old_dentry->d_inode); + + return err; +} + +/** * v9fs_vfs_mknod - create a special file * @dir: inode destination for new link * @dentry: dentry for file @@ -1230,6 +1820,100 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) return retval; } +/** + * v9fs_vfs_mknod_dotl - create a special file + * @dir: inode destination for new link + * @dentry: dentry for file + * @mode: mode for creation + * @rdev: device associated with special file + * + */ +static int +v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode, + dev_t rdev) +{ + int err; + char *name; + struct v9fs_session_info *v9ses; + struct p9_fid *fid = NULL, *dfid = NULL; + struct inode *inode; + gid_t gid; + struct p9_qid qid; + struct dentry *dir_dentry; + + P9_DPRINTK(P9_DEBUG_VFS, + " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, + dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); + + if (!new_valid_dev(rdev)) + return -EINVAL; + + v9ses = v9fs_inode2v9ses(dir); + dir_dentry = v9fs_dentry_from_dir_inode(dir); + dfid = v9fs_fid_lookup(dir_dentry); + if (IS_ERR(dfid)) { + err = PTR_ERR(dfid); + P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + dfid = NULL; + goto error; + } + + gid = v9fs_get_fsgid_for_create(dir); + if (gid < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n"); + goto error; + } + + name = (char *) dentry->d_name.name; + + err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid); + if (err < 0) + goto error; + + /* instantiate inode and assign the unopened fid to the dentry */ + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); + fid = NULL; + goto error; + } + + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", + err); + goto error; + } + dentry->d_op = &v9fs_cached_dentry_operations; + d_instantiate(dentry, inode); + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + fid = NULL; + } else { + /* + * Not in cached mode. No need to populate inode with stat. + * socket syscall returns a fd, so we need instantiate + */ + inode = v9fs_get_inode(dir->i_sb, mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto error; + } + dentry->d_op = &v9fs_dentry_operations; + d_instantiate(dentry, inode); + } + +error: + if (fid) + p9_client_clunk(fid); + return err; +} + static const struct inode_operations v9fs_dir_inode_operations_dotu = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, @@ -1238,24 +1922,29 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, - .mknod = v9fs_vfs_mknod, + .mknod = v9fs_vfs_mknod_dotl, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; static const struct inode_operations v9fs_dir_inode_operations_dotl = { - .create = v9fs_vfs_create, + .create = v9fs_vfs_create_dotl, .lookup = v9fs_vfs_lookup, - .symlink = v9fs_vfs_symlink, - .link = v9fs_vfs_link, + .link = v9fs_vfs_link_dotl, + .symlink = v9fs_vfs_symlink_dotl, .unlink = v9fs_vfs_unlink, - .mkdir = v9fs_vfs_mkdir, + .mkdir = v9fs_vfs_mkdir_dotl, .rmdir = v9fs_vfs_rmdir, - .mknod = v9fs_vfs_mknod, + .mknod = v9fs_vfs_mknod_dotl, .rename = v9fs_vfs_rename, - .getattr = v9fs_vfs_getattr, - .setattr = v9fs_vfs_setattr, + .getattr = v9fs_vfs_getattr_dotl, + .setattr = v9fs_vfs_setattr_dotl, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = v9fs_listxattr, + }; static const struct inode_operations v9fs_dir_inode_operations = { @@ -1276,8 +1965,12 @@ static const struct inode_operations v9fs_file_inode_operations = { }; static const struct inode_operations v9fs_file_inode_operations_dotl = { - .getattr = v9fs_vfs_getattr, - .setattr = v9fs_vfs_setattr, + .getattr = v9fs_vfs_getattr_dotl, + .setattr = v9fs_vfs_setattr_dotl, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = v9fs_listxattr, }; static const struct inode_operations v9fs_symlink_inode_operations = { @@ -1292,6 +1985,10 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = { .readlink = generic_readlink, .follow_link = v9fs_vfs_follow_link, .put_link = v9fs_vfs_put_link, - .getattr = v9fs_vfs_getattr, - .setattr = v9fs_vfs_setattr, + .getattr = v9fs_vfs_getattr_dotl, + .setattr = v9fs_vfs_setattr_dotl, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = v9fs_listxattr, }; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index be74d020436..4b9ede0b41b 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -45,6 +45,7 @@ #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" +#include "xattr.h" static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl; @@ -77,9 +78,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, sb->s_blocksize_bits = fls(v9ses->maxdata - 1); sb->s_blocksize = 1 << sb->s_blocksize_bits; sb->s_magic = V9FS_MAGIC; - if (v9fs_proto_dotl(v9ses)) + if (v9fs_proto_dotl(v9ses)) { sb->s_op = &v9fs_super_ops_dotl; - else + sb->s_xattr = v9fs_xattr_handlers; + } else sb->s_op = &v9fs_super_ops; sb->s_bdi = &v9ses->bdi; @@ -107,7 +109,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, struct inode *inode = NULL; struct dentry *root = NULL; struct v9fs_session_info *v9ses = NULL; - struct p9_wstat *st = NULL; int mode = S_IRWXUGO | S_ISVTX; struct p9_fid *fid; int retval = 0; @@ -124,16 +125,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, goto close_session; } - st = p9_client_stat(fid); - if (IS_ERR(st)) { - retval = PTR_ERR(st); - goto clunk_fid; - } - sb = sget(fs_type, NULL, v9fs_set_super, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); - goto free_stat; + goto clunk_fid; } v9fs_fill_super(sb, v9ses, flags, data); @@ -151,22 +146,38 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, } sb->s_root = root; - root->d_inode->i_ino = v9fs_qid2ino(&st->qid); - v9fs_stat2inode(st, root->d_inode, sb); + if (v9fs_proto_dotl(v9ses)) { + struct p9_stat_dotl *st = NULL; + st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); + if (IS_ERR(st)) { + retval = PTR_ERR(st); + goto clunk_fid; + } + + v9fs_stat2inode_dotl(st, root->d_inode); + kfree(st); + } else { + struct p9_wstat *st = NULL; + st = p9_client_stat(fid); + if (IS_ERR(st)) { + retval = PTR_ERR(st); + goto clunk_fid; + } + + root->d_inode->i_ino = v9fs_qid2ino(&st->qid); + v9fs_stat2inode(st, root->d_inode, sb); + + p9stat_free(st); + kfree(st); + } v9fs_fid_add(root, fid); - p9stat_free(st); - kfree(st); P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); simple_set_mnt(mnt, sb); return 0; -free_stat: - p9stat_free(st); - kfree(st); - clunk_fid: p9_client_clunk(fid); @@ -176,8 +187,6 @@ close_session: return retval; release_sb: - p9stat_free(st); - kfree(st); deactivate_locked_super(sb); return retval; } @@ -278,4 +287,5 @@ struct file_system_type v9fs_fs_type = { .get_sb = v9fs_get_sb, .kill_sb = v9fs_kill_super, .owner = THIS_MODULE, + .fs_flags = FS_RENAME_DOES_D_MOVE, }; diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c new file mode 100644 index 00000000000..f88e5c2dc87 --- /dev/null +++ b/fs/9p/xattr.c @@ -0,0 +1,160 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> + +#include "fid.h" +#include "xattr.h" + +/* + * v9fs_xattr_get() + * + * Copy an extended attribute into the buffer + * provided, or compute the buffer size required. + * Buffer is NULL to compute the size of the buffer required. + * + * Returns a negative error number on failure, or the number of bytes + * used / required on success. + */ +ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, + void *buffer, size_t buffer_size) +{ + ssize_t retval; + int msize, read_count; + u64 offset = 0, attr_size; + struct p9_fid *fid, *attr_fid; + + P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n", + __func__, name, buffer_size); + + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + attr_fid = p9_client_xattrwalk(fid, name, &attr_size); + if (IS_ERR(attr_fid)) { + retval = PTR_ERR(attr_fid); + P9_DPRINTK(P9_DEBUG_VFS, + "p9_client_attrwalk failed %zd\n", retval); + attr_fid = NULL; + goto error; + } + if (!buffer_size) { + /* request to get the attr_size */ + retval = attr_size; + goto error; + } + if (attr_size > buffer_size) { + retval = -ERANGE; + goto error; + } + msize = attr_fid->clnt->msize; + while (attr_size) { + if (attr_size > (msize - P9_IOHDRSZ)) + read_count = msize - P9_IOHDRSZ; + else + read_count = attr_size; + read_count = p9_client_read(attr_fid, ((char *)buffer)+offset, + NULL, offset, read_count); + if (read_count < 0) { + /* error in xattr read */ + retval = read_count; + goto error; + } + offset += read_count; + attr_size -= read_count; + } + /* Total read xattr bytes */ + retval = offset; +error: + if (attr_fid) + p9_client_clunk(attr_fid); + return retval; + +} + +/* + * v9fs_xattr_set() + * + * Create, replace or remove an extended attribute for this inode. Buffer + * is NULL to remove an existing extended attribute, and non-NULL to + * either replace an existing extended attribute, or create a new extended + * attribute. The flags XATTR_REPLACE and XATTR_CREATE + * specify that an extended attribute must exist and must not exist + * previous to the call, respectively. + * + * Returns 0, or a negative error number on failure. + */ +int v9fs_xattr_set(struct dentry *dentry, const char *name, + const void *value, size_t value_len, int flags) +{ + u64 offset = 0; + int retval, msize, write_count; + struct p9_fid *fid = NULL; + + P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n", + __func__, name, value_len, flags); + + fid = v9fs_fid_clone(dentry); + if (IS_ERR(fid)) { + retval = PTR_ERR(fid); + fid = NULL; + goto error; + } + /* + * On success fid points to xattr + */ + retval = p9_client_xattrcreate(fid, name, value_len, flags); + if (retval < 0) { + P9_DPRINTK(P9_DEBUG_VFS, + "p9_client_xattrcreate failed %d\n", retval); + goto error; + } + msize = fid->clnt->msize;; + while (value_len) { + if (value_len > (msize - P9_IOHDRSZ)) + write_count = msize - P9_IOHDRSZ; + else + write_count = value_len; + write_count = p9_client_write(fid, ((char *)value)+offset, + NULL, offset, write_count); + if (write_count < 0) { + /* error in xattr write */ + retval = write_count; + goto error; + } + offset += write_count; + value_len -= write_count; + } + /* Total read xattr bytes */ + retval = offset; +error: + if (fid) + retval = p9_client_clunk(fid); + return retval; +} + +ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + return v9fs_xattr_get(dentry, NULL, buffer, buffer_size); +} + +const struct xattr_handler *v9fs_xattr_handlers[] = { + &v9fs_xattr_user_handler, + NULL +}; diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h new file mode 100644 index 00000000000..9ddf672ae5c --- /dev/null +++ b/fs/9p/xattr.h @@ -0,0 +1,27 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ +#ifndef FS_9P_XATTR_H +#define FS_9P_XATTR_H + +#include <linux/xattr.h> + +extern const struct xattr_handler *v9fs_xattr_handlers[]; +extern struct xattr_handler v9fs_xattr_user_handler; + +extern ssize_t v9fs_xattr_get(struct dentry *, const char *, + void *, size_t); +extern int v9fs_xattr_set(struct dentry *, const char *, + const void *, size_t, int); +extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t); +#endif /* FS_9P_XATTR_H */ diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c new file mode 100644 index 00000000000..d0b701b7208 --- /dev/null +++ b/fs/9p/xattr_user.c @@ -0,0 +1,80 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include "xattr.h" + +static int v9fs_xattr_user_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_USER_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_USER_PREFIX, prefix_len); + memcpy(full_name+prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_get(dentry, full_name, buffer, size); + kfree(full_name); + return retval; +} + +static int v9fs_xattr_user_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_USER_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_USER_PREFIX, prefix_len); + memcpy(full_name + prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_set(dentry, full_name, value, size, flags); + kfree(full_name); + return retval; +} + +struct xattr_handler v9fs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = v9fs_xattr_user_get, + .set = v9fs_xattr_user_set, +}; diff --git a/fs/Kconfig b/fs/Kconfig index 5f85b594761..3d185308ec8 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -64,7 +64,7 @@ source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" config CUSE - tristate "Character device in Userpace support" + tristate "Character device in Userspace support" depends on FUSE_FS help This FUSE extension allows character devices to be diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index 5c4e61d3c77..8f975f25b48 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig @@ -2,6 +2,7 @@ config AFS_FS tristate "Andrew File System support (AFS) (EXPERIMENTAL)" depends on INET && EXPERIMENTAL select AF_RXRPC + select DNS_RESOLVER help If you say Y here, you will get an experimental Andrew File System driver. It currently only supports unsecured read-only AFS access. diff --git a/fs/afs/cell.c b/fs/afs/cell.c index e19c13f059e..ffea35c6387 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/key.h> #include <linux/ctype.h> +#include <linux/dns_resolver.h> #include <linux/sched.h> #include <keys/rxrpc-type.h> #include "internal.h" @@ -36,6 +37,8 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) struct key *key; size_t namelen; char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; + char *dvllist = NULL, *_vllist = NULL; + char delimiter = ':'; int ret; _enter("%s,%s", name, vllist); @@ -43,8 +46,10 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ namelen = strlen(name); - if (namelen > AFS_MAXCELLNAME) + if (namelen > AFS_MAXCELLNAME) { + _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); + } /* allocate and initialise a cell record */ cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL); @@ -64,15 +69,31 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) INIT_LIST_HEAD(&cell->vl_list); spin_lock_init(&cell->vl_lock); + /* if the ip address is invalid, try dns query */ + if (!vllist || strlen(vllist) < 7) { + ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); + if (ret < 0) { + _leave(" = %d", ret); + return ERR_PTR(ret); + } + _vllist = dvllist; + + /* change the delimiter for user-space reply */ + delimiter = ','; + + } else { + _vllist = vllist; + } + /* fill in the VL server list from the rest of the string */ do { unsigned a, b, c, d; - next = strchr(vllist, ':'); + next = strchr(_vllist, delimiter); if (next) *next++ = 0; - if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) + if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) goto bad_address; if (a > 255 || b > 255 || c > 255 || d > 255) @@ -81,7 +102,7 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) cell->vl_addrs[cell->vl_naddrs++].s_addr = htonl((a << 24) | (b << 16) | (c << 8) | d); - } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (vllist = next)); + } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); /* create a key to represent an anonymous user */ memcpy(keyname, "afs@", 4); @@ -110,6 +131,7 @@ bad_address: ret = -EINVAL; error: key_put(cell->anonymous_key); + kfree(dvllist); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -201,14 +223,12 @@ int afs_cell_init(char *rootcell) } cp = strchr(rootcell, ':'); - if (!cp) { - printk(KERN_ERR "kAFS: no VL server IP addresses specified\n"); - _leave(" = -EINVAL"); - return -EINVAL; - } + if (!cp) + _debug("kAFS: no VL server IP addresses specified"); + else + *cp++ = 0; /* allocate a cell record for the root cell */ - *cp++ = 0; new_root = afs_cell_create(rootcell, cp); if (IS_ERR(new_root)) { _leave(" = %ld", PTR_ERR(new_root)); diff --git a/fs/afs/write.c b/fs/afs/write.c index 3dab9e9948d..722743b152d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode) { struct address_space *mapping = vnode->vfs_inode.i_mapping; struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_cyclic = 1, @@ -1277,7 +1277,7 @@ out: /* sys_io_destroy: * Destroy the aio_context specified. May cancel any outstanding * AIOs and block on completion. Will fail with -ENOSYS if not - * implemented. May fail with -EFAULT if the context pointed to + * implemented. May fail with -EINVAL if the context pointed to * is invalid. */ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) @@ -1795,15 +1795,16 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, /* io_getevents: * Attempts to read at least min_nr events and up to nr events from - * the completion queue for the aio_context specified by ctx_id. May - * fail with -EINVAL if ctx_id is invalid, if min_nr is out of range, - * if nr is out of range, if when is out of range. May fail with - * -EFAULT if any of the memory specified to is invalid. May return - * 0 or < min_nr if no events are available and the timeout specified - * by when has elapsed, where when == NULL specifies an infinite - * timeout. Note that the timeout pointed to by when is relative and - * will be updated if not NULL and the operation blocks. Will fail - * with -ENOSYS if not implemented. + * the completion queue for the aio_context specified by ctx_id. If + * it succeeds, the number of read events is returned. May fail with + * -EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is + * out of range, if timeout is out of range. May fail with -EFAULT + * if any of the memory specified is invalid. May return 0 or + * < min_nr if the timeout specified by timeout has elapsed + * before sufficient events are available, where timeout == NULL + * specifies an infinite timeout. Note that the timeout pointed to by + * timeout is relative and will be updated if not NULL and the + * operation blocks. Will fail with -ENOSYS if not implemented. */ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, long, min_nr, diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 34ddda888e6..dc39d282488 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -436,7 +436,7 @@ befs_init_inodecache(void) init_once); if (befs_inode_cachep == NULL) { printk(KERN_ERR "befs_init_inodecache: " - "Couldn't initalize inode slabcache\n"); + "Couldn't initialize inode slabcache\n"); return -ENOMEM; } diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index b6ab27ccf21..811384bec8d 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -68,11 +68,7 @@ * Here we can be a bit looser than the data sections since this * needs to only meet arch ABI requirements. */ -#ifdef ARCH_SLAB_MINALIGN -#define FLAT_STACK_ALIGN (ARCH_SLAB_MINALIGN) -#else -#define FLAT_STACK_ALIGN (sizeof(void *)) -#endif +#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN) #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ #define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ diff --git a/fs/block_dev.c b/fs/block_dev.c index 99d6af81174..b3171fb0dc9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -681,8 +681,8 @@ retry: if (!bd_may_claim(bdev, whole, holder)) return -EBUSY; - /* if someone else is claiming, wait for it to finish */ - if (whole->bd_claiming && whole->bd_claiming != holder) { + /* if claiming is already in progress, wait for it to finish */ + if (whole->bd_claiming) { wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); DEFINE_WAIT(wait); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0d1d966b0fe..c3df14ce2cc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, return ret; } +/* + * min slot controls the lowest index we're willing to push to the + * right. We'll push up to and including min_slot, but no lower + */ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size, int empty, struct extent_buffer *right, - int free_space, u32 left_nritems) + int free_space, u32 left_nritems, + u32 min_slot) { struct extent_buffer *left = path->nodes[0]; struct extent_buffer *upper = path->nodes[1]; @@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, if (empty) nr = 0; else - nr = 1; + nr = max_t(u32, 1, min_slot); if (path->slots[0] >= left_nritems) push_space += data_size; @@ -2469,10 +2474,14 @@ out_unlock: * * returns 1 if the push failed because the other node didn't have enough * room, 0 if everything worked out and < 0 if there were major errors. + * + * this will push starting from min_slot to the end of the leaf. It won't + * push any slot lower than min_slot */ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size, - int empty) + *root, struct btrfs_path *path, + int min_data_size, int data_size, + int empty, u32 min_slot) { struct extent_buffer *left = path->nodes[0]; struct extent_buffer *right; @@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (left_nritems == 0) goto out_unlock; - return __push_leaf_right(trans, root, path, data_size, empty, - right, free_space, left_nritems); + return __push_leaf_right(trans, root, path, min_data_size, empty, + right, free_space, left_nritems, min_slot); out_unlock: btrfs_tree_unlock(right); free_extent_buffer(right); @@ -2525,12 +2534,17 @@ out_unlock: /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * max_slot can put a limit on how far into the leaf we'll push items. The + * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the + * items */ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size, int empty, struct extent_buffer *left, - int free_space, int right_nritems) + int free_space, u32 right_nritems, + u32 max_slot) { struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; @@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, slot = path->slots[1]; if (empty) - nr = right_nritems; + nr = min(right_nritems, max_slot); else - nr = right_nritems - 1; + nr = min(right_nritems - 1, max_slot); for (i = 0; i < nr; i++) { item = btrfs_item_nr(right, i); @@ -2712,10 +2726,14 @@ out: /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * max_slot can put a limit on how far into the leaf we'll push items. The + * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the + * items */ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size, - int empty) + *root, struct btrfs_path *path, int min_data_size, + int data_size, int empty, u32 max_slot) { struct extent_buffer *right = path->nodes[0]; struct extent_buffer *left; @@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - return __push_leaf_left(trans, root, path, data_size, - empty, left, free_space, right_nritems); + return __push_leaf_left(trans, root, path, min_data_size, + empty, left, free_space, right_nritems, + max_slot); out: btrfs_tree_unlock(left); free_extent_buffer(left); @@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, } /* + * double splits happen when we need to insert a big item in the middle + * of a leaf. A double split can leave us with 3 mostly empty leaves: + * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] + * A B C + * + * We avoid this by trying to push the items on either side of our target + * into the adjacent leaves. If all goes well we can avoid the double split + * completely. + */ +static noinline int push_for_double_split(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + int data_size) +{ + int ret; + int progress = 0; + int slot; + u32 nritems; + + slot = path->slots[0]; + + /* + * try to push all the items after our slot into the + * right leaf + */ + ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); + if (ret < 0) + return ret; + + if (ret == 0) + progress++; + + nritems = btrfs_header_nritems(path->nodes[0]); + /* + * our goal is to get our slot at the start or end of a leaf. If + * we've done so we're done + */ + if (path->slots[0] == 0 || path->slots[0] == nritems) + return 0; + + if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) + return 0; + + /* try to push all the items before our slot into the next leaf */ + slot = path->slots[0]; + ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); + if (ret < 0) + return ret; + + if (ret == 0) + progress++; + + if (progress) + return 0; + return 1; +} + +/* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. * @@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int wret; int split; int num_doubles = 0; + int tried_avoid_double = 0; l = path->nodes[0]; slot = path->slots[0]; @@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, return -EOVERFLOW; /* first try to make some room by pushing left and right */ - if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { - wret = push_leaf_right(trans, root, path, data_size, 0); + if (data_size) { + wret = push_leaf_right(trans, root, path, data_size, + data_size, 0, 0); if (wret < 0) return wret; if (wret) { - wret = push_leaf_left(trans, root, path, data_size, 0); + wret = push_leaf_left(trans, root, path, data_size, + data_size, 0, (u32)-1); if (wret < 0) return wret; } @@ -2923,6 +3003,8 @@ again: if (mid != nritems && leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { + if (data_size && !tried_avoid_double) + goto push_for_double; split = 2; } } @@ -2939,6 +3021,8 @@ again: if (mid != nritems && leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { + if (data_size && !tried_avoid_double) + goto push_for_double; split = 2 ; } } @@ -3019,6 +3103,13 @@ again: } return ret; + +push_for_double: + push_for_double_split(trans, root, path, data_size); + tried_avoid_double = 1; + if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) + return 0; + goto again; } static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, @@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, extent_buffer_get(leaf); btrfs_set_path_blocking(path); - wret = push_leaf_left(trans, root, path, 1, 1); + wret = push_leaf_left(trans, root, path, 1, 1, + 1, (u32)-1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf && btrfs_header_nritems(leaf)) { - wret = push_leaf_right(trans, root, path, 1, 1); + wret = push_leaf_right(trans, root, path, 1, + 1, 1, 0); if (wret < 0 && wret != -ENOSPC) ret = wret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4080c21ec5..d74e6af9b53 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, .sync_mode = wbc->sync_mode, .older_than_this = NULL, .nr_to_write = 64, @@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .sync_io = mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = inode->i_mapping->backing_dev_info, .sync_mode = mode, .older_than_this = NULL, .nr_to_write = nr_pages * 2, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4dbaf89b133..9254b3d58db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, */ /* the destination must be opened for writing */ - if (!(file->f_mode & FMODE_WRITE)) + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) return -EINVAL; ret = mnt_want_write(file->f_path.mnt); @@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, /* determine range to clone */ ret = -EINVAL; - if (off >= src->i_size || off + len > src->i_size) + if (off + len > src->i_size || off + len < off) goto out_unlock; if (len == 0) olen = len = src->i_size - off; @@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 disko = 0, diskl = 0; u64 datao = 0, datal = 0; u8 comp; + u64 endoff; size = btrfs_item_size_nr(leaf, slot); read_extent_buffer(leaf, buf, @@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_release_path(root, path); inode->i_mtime = inode->i_ctime = CURRENT_TIME; - if (new_key.offset + datal > inode->i_size) - btrfs_i_size_write(inode, - new_key.offset + datal); + + /* + * we round up to the block size at eof when + * determining which extents to clone above, + * but shouldn't round up the file size + */ + endoff = new_key.offset + datal; + if (endoff > off+olen) + endoff = off+olen; + if (endoff > inode->i_size) + btrfs_i_size_write(inode, endoff); + BTRFS_I(inode)->flags = BTRFS_I(src)->flags; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f4a7840bf42..42c7fafc8bf 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -37,9 +37,9 @@ void __cachefiles_printk_object(struct cachefiles_object *object, printk(KERN_ERR "%sobject: OBJ%x\n", prefix, object->fscache.debug_id); - printk(KERN_ERR "%sobjstate=%s fl=%lx swfl=%lx ev=%lx[%lx]\n", + printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", prefix, fscache_object_states[object->fscache.state], - object->fscache.flags, object->fscache.work.flags, + object->fscache.flags, work_busy(&object->fscache.work), object->fscache.events, object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK); printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", @@ -212,7 +212,7 @@ wait_for_old_object: /* if the object we're waiting for is queued for processing, * then just put ourselves on the queue behind it */ - if (slow_work_is_queued(&xobject->fscache.work)) { + if (work_pending(&xobject->fscache.work)) { _debug("queue OBJ%x behind OBJ%x immediately", object->fscache.debug_id, xobject->fscache.debug_id); @@ -220,8 +220,7 @@ wait_for_old_object: } /* otherwise we sleep until either the object we're waiting for - * is done, or the slow-work facility wants the thread back to - * do other work */ + * is done, or the fscache_object is congested */ wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); init_wait(&wait); requeue = false; @@ -229,8 +228,8 @@ wait_for_old_object: prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) break; - requeue = slow_work_sleep_till_thread_needed( - &object->fscache.work, &timeout); + + requeue = fscache_object_sleep_till_congested(&timeout); } while (timeout > 0 && !requeue); finish_wait(wq, &wait); diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 0f0d41fbb03..0e3c0924cc3 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -422,7 +422,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; op->op.flags &= FSCACHE_OP_KEEP_FLAGS; - op->op.flags |= FSCACHE_OP_FAST; + op->op.flags |= FSCACHE_OP_ASYNC; op->op.processor = cachefiles_read_copier; pagevec_init(&pagevec, 0); @@ -729,7 +729,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, pagevec_init(&pagevec, 0); op->op.flags &= FSCACHE_OP_KEEP_FLAGS; - op->op.flags |= FSCACHE_OP_FAST; + op->op.flags |= FSCACHE_OP_ASYNC; op->op.processor = cachefiles_read_copier; INIT_LIST_HEAD(&backpages); diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 04b8280582a..bc87b9c1d27 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -2,7 +2,7 @@ config CEPH_FS tristate "Ceph distributed file system (EXPERIMENTAL)" depends on INET && EXPERIMENTAL select LIBCRC32C - select CONFIG_CRYPTO_AES + select CRYPTO_AES help Choose Y or M here to include support for mounting the experimental Ceph distributed file system. Ceph is an extremely diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 83d4d2785ff..6d44053ecff 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, return -EAGAIN; } - op = le32_to_cpu(head->op); + op = le16_to_cpu(head->op); result = le32_to_cpu(head->result); dout("handle_reply op %d result %d\n", op, result); switch (op) { @@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac) remove_ticket_handler(ac, th); } + if (xi->auth_authorizer.buf) + ceph_buffer_put(xi->auth_authorizer.buf); + kfree(ac->private); ac->private = NULL; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 619b61655ee..b81be9a5648 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) struct ceph_cap *cap = NULL; /* temporary, until we do something about cap import/export */ - if (!ctx) - return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); + if (!ctx) { + cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); + if (cap) { + caps_use_count++; + caps_total_count++; + } + return cap; + } spin_lock(&caps_list_lock); dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", @@ -621,7 +627,7 @@ retry: if (fmode >= 0) __ceph_get_fmode(ci, fmode); spin_unlock(&inode->i_lock); - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); return 0; } @@ -1175,7 +1181,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, } if (wake) - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); return delayed; } @@ -2147,7 +2153,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) else if (flushsnaps) ceph_flush_snaps(ci); if (wake) - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); if (put) iput(inode); } @@ -2223,7 +2229,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, iput(inode); } else if (complete_capsnap) { ceph_flush_snaps(ci); - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); } if (drop_capsnap) iput(inode); @@ -2399,7 +2405,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, if (queue_invalidate) ceph_queue_invalidate(inode); if (wake) - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); if (check_caps == 1) ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, @@ -2454,7 +2460,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, struct ceph_inode_info, i_flushing_item)->vfs_inode); mdsc->num_cap_flushing--; - wake_up(&mdsc->cap_flushing_wq); + wake_up_all(&mdsc->cap_flushing_wq); dout(" inode %p now !flushing\n", inode); if (ci->i_dirty_caps == 0) { @@ -2466,7 +2472,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, } } spin_unlock(&mdsc->cap_dirty_lock); - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); out: spin_unlock(&inode->i_lock); @@ -2886,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; struct ceph_mds_request_release *rel = *p; + int used, dirty; int ret = 0; - int used = 0; spin_lock(&inode->i_lock); used = __ceph_caps_used(ci); + dirty = __ceph_caps_dirty(ci); - dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, - mds, ceph_cap_string(used), ceph_cap_string(drop), + dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n", + inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop), ceph_cap_string(unless)); - /* only drop unused caps */ - drop &= ~used; + /* only drop unused, clean caps */ + drop &= ~(used | dirty); cap = __get_cap_for_mds(ci, mds); if (cap && __cap_is_valid(cap)) { @@ -2977,6 +2984,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, memcpy(*p, dentry->d_name.name, dentry->d_name.len); *p += dentry->d_name.len; rel->dname_seq = cpu_to_le32(di->lease_seq); + __ceph_mdsc_drop_dentry_lease(dentry); } spin_unlock(&dentry->d_lock); return ret; diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c index 9ba54efb654..a4eec133258 100644 --- a/fs/ceph/crush/mapper.c +++ b/fs/ceph/crush/mapper.c @@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, static int crush_bucket_choose(struct crush_bucket *in, int x, int r) { - dprintk("choose %d x=%d r=%d\n", in->id, x, r); + dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); switch (in->alg) { case CRUSH_BUCKET_UNIFORM: return bucket_uniform_choose((struct crush_bucket_uniform *)in, @@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) */ static int is_out(struct crush_map *map, __u32 *weight, int item, int x) { - if (weight[item] >= 0x1000) + if (weight[item] >= 0x10000) return 0; if (weight[item] == 0) return 1; @@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map, int itemtype; int collide, reject; const int orig_tries = 5; /* attempts before we fall back to search */ - dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos); + + dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", + bucket->id, x, outpos, numrep); for (rep = outpos; rep < numrep; rep++) { /* keep trying until we get a non-out, non-colliding item */ @@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map, BUG_ON(item >= 0 || (-1-item) >= map->max_buckets); in = map->buckets[-1-item]; + retry_bucket = 1; continue; } @@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map, } } - if (recurse_to_leaf && - item < 0 && - crush_choose(map, map->buckets[-1-item], - weight, - x, outpos+1, 0, - out2, outpos, - firstn, 0, NULL) <= outpos) { - reject = 1; - } else { + reject = 0; + if (recurse_to_leaf) { + if (item < 0) { + if (crush_choose(map, + map->buckets[-1-item], + weight, + x, outpos+1, 0, + out2, outpos, + firstn, 0, + NULL) <= outpos) + /* didn't get leaf */ + reject = 1; + } else { + /* we already have a leaf! */ + out2[outpos] = item; + } + } + + if (!reject) { /* out? */ if (itemtype == 0) reject = is_out(map, weight, @@ -424,12 +437,12 @@ reject: continue; } - dprintk("choose got %d\n", item); + dprintk("CHOOSE got %d\n", item); out[outpos] = item; outpos++; } - dprintk("choose returns %d\n", outpos); + dprintk("CHOOSE returns %d\n", outpos); return outpos; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3be33fb066c..f2f5332ddbb 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp) static int caps_show(struct seq_file *s, void *p) { - struct ceph_client *client = p; + struct ceph_client *client = s->private; int total, avail, used, reserved, min; ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f85719310db..f94ed3c7f6a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -266,6 +266,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) spin_lock(&inode->i_lock); if ((filp->f_pos == 2 || fi->dentry) && !ceph_test_opt(client, NOASYNCREADDIR) && + ceph_snap(inode) != CEPH_SNAPDIR && (ci->i_ceph_flags & CEPH_I_COMPLETE) && __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { err = __dcache_readdir(filp, dirent, filldir); @@ -1013,18 +1014,22 @@ out_touch: /* * When a dentry is released, clear the dir I_COMPLETE if it was part - * of the current dir gen. + * of the current dir gen or if this is in the snapshot namespace. */ static void ceph_dentry_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); struct inode *parent_inode = dentry->d_parent->d_inode; + u64 snapid = ceph_snap(parent_inode); - if (parent_inode) { + dout("dentry_release %p parent %p\n", dentry, parent_inode); + + if (parent_inode && snapid != CEPH_SNAPDIR) { struct ceph_inode_info *ci = ceph_inode(parent_inode); spin_lock(&parent_inode->i_lock); - if (ci->i_shared_gen == di->lease_shared_gen) { + if (ci->i_shared_gen == di->lease_shared_gen || + snapid <= CEPH_MAXSNAP) { dout(" clearing %p complete (d_release)\n", parent_inode); ci->i_ceph_flags &= ~CEPH_I_COMPLETE; @@ -1241,7 +1246,9 @@ struct dentry_operations ceph_dentry_ops = { struct dentry_operations ceph_snapdir_dentry_ops = { .d_revalidate = ceph_snapdir_d_revalidate, + .d_release = ceph_dentry_release, }; struct dentry_operations ceph_snap_dentry_ops = { + .d_release = ceph_dentry_release, }; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 6251a1574b9..7c08698fad3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -265,7 +265,7 @@ int ceph_release(struct inode *inode, struct file *file) kmem_cache_free(ceph_file_cachep, cf); /* wake up anyone waiting for caps on this inode */ - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); return 0; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ab47f46ca28..389f9dbd994 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, d_drop(dn); realdn = d_materialise_unique(dn, in); if (IS_ERR(realdn)) { - pr_err("splice_dentry error %p inode %p ino %llx.%llx\n", - dn, in, ceph_vinop(in)); + pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", + PTR_ERR(realdn), dn, in, ceph_vinop(in)); if (prehash) *prehash = false; /* don't rehash on error */ dn = realdn; /* note realdn contains the error */ @@ -1199,8 +1199,10 @@ retry_lookup: goto out; } err = ceph_init_dentry(dn); - if (err < 0) + if (err < 0) { + dput(dn); goto out; + } } else if (dn->d_inode && (ceph_ino(dn->d_inode) != vino.ino || ceph_snap(dn->d_inode) != vino.snap)) { @@ -1234,18 +1236,23 @@ retry_lookup: goto out; } dn = splice_dentry(dn, in, NULL); + if (IS_ERR(dn)) + dn = NULL; } if (fill_inode(in, &rinfo->dir_in[i], NULL, session, req->r_request_started, -1, &req->r_caps_reservation) < 0) { pr_err("fill_inode badness on %p\n", in); - dput(dn); - continue; + goto next_item; } - update_dentry_lease(dn, rinfo->dir_dlease[i], - req->r_session, req->r_request_started); - dput(dn); + if (dn) + update_dentry_lease(dn, rinfo->dir_dlease[i], + req->r_session, + req->r_request_started); +next_item: + if (dn) + dput(dn); } req->r_did_prepopulate = true; @@ -1494,7 +1501,7 @@ retry: if (wrbuffer_refs == 0) ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); if (wake) - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1766947fc07..dd440bd438a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -868,7 +868,7 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, { struct ceph_inode_info *ci = ceph_inode(inode); - wake_up(&ci->i_cap_wq); + wake_up_all(&ci->i_cap_wq); if (arg) { spin_lock(&inode->i_lock); ci->i_wanted_max_size = 0; @@ -1514,6 +1514,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ceph_encode_filepath(&p, end, ino1, path1); ceph_encode_filepath(&p, end, ino2, path2); + /* make note of release offset, in case we need to replay */ + req->r_request_release_offset = p - msg->front.iov_base; + /* cap releases */ releases = 0; if (req->r_inode_drop) @@ -1561,7 +1564,7 @@ static void complete_request(struct ceph_mds_client *mdsc, if (req->r_callback) req->r_callback(mdsc, req); else - complete(&req->r_completion); + complete_all(&req->r_completion); } /* @@ -1580,6 +1583,32 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); + if (req->r_got_unsafe) { + /* + * Replay. Do not regenerate message (and rebuild + * paths, etc.); just use the original message. + * Rebuilding paths will break for renames because + * d_move mangles the src name. + */ + msg = req->r_request; + rhead = msg->front.iov_base; + + flags = le32_to_cpu(rhead->flags); + flags |= CEPH_MDS_FLAG_REPLAY; + rhead->flags = cpu_to_le32(flags); + + if (req->r_target_inode) + rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); + + rhead->num_retry = req->r_attempts - 1; + + /* remove cap/dentry releases from message */ + rhead->num_releases = 0; + msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset); + msg->front.iov_len = req->r_request_release_offset; + return 0; + } + if (req->r_request) { ceph_msg_put(req->r_request); req->r_request = NULL; @@ -1601,13 +1630,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, rhead->flags = cpu_to_le32(flags); rhead->num_fwd = req->r_num_fwd; rhead->num_retry = req->r_attempts - 1; + rhead->ino = 0; dout(" r_locked_dir = %p\n", req->r_locked_dir); - - if (req->r_target_inode && req->r_got_unsafe) - rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); - else - rhead->ino = 0; return 0; } @@ -1907,7 +1932,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) if (head->safe) { req->r_got_safe = true; __unregister_request(mdsc, req); - complete(&req->r_safe_completion); + complete_all(&req->r_safe_completion); if (req->r_got_unsafe) { /* @@ -1922,7 +1947,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* last unsafe request during umount? */ if (mdsc->stopping && !__get_oldest_req(mdsc)) - complete(&mdsc->safe_umount_waiters); + complete_all(&mdsc->safe_umount_waiters); mutex_unlock(&mdsc->mutex); goto out; } @@ -2101,7 +2126,7 @@ static void handle_session(struct ceph_mds_session *session, pr_info("mds%d reconnect denied\n", session->s_mds); remove_session_caps(session); wake = 1; /* for good measure */ - complete(&mdsc->session_close_waiters); + complete_all(&mdsc->session_close_waiters); kick_requests(mdsc, mds); break; @@ -2783,6 +2808,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) drop_leases(mdsc); ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); + + /* + * wait for reply handlers to drop their request refs and + * their inode/dcache refs + */ + ceph_msgr_flush(); } /* diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index b292fa42a66..952410c60d0 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -188,6 +188,7 @@ struct ceph_mds_request { int r_old_inode_drop, r_old_inode_unless; struct ceph_msg *r_request; /* original request */ + int r_request_release_offset; struct ceph_msg *r_reply; struct ceph_mds_reply_info_parsed r_reply_info; int r_err; diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 64b8b1f7863..15167b2daa5 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con); * nicely render a sockaddr as a string. */ #define MAX_ADDR_STR 20 -static char addr_str[MAX_ADDR_STR][40]; +#define MAX_ADDR_STR_LEN 60 +static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; static DEFINE_SPINLOCK(addr_str_lock); static int last_addr_str; @@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss) int i; char *s; struct sockaddr_in *in4 = (void *)ss; - unsigned char *quad = (void *)&in4->sin_addr.s_addr; struct sockaddr_in6 *in6 = (void *)ss; spin_lock(&addr_str_lock); @@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss) switch (ss->ss_family) { case AF_INET: - sprintf(s, "%u.%u.%u.%u:%u", - (unsigned int)quad[0], - (unsigned int)quad[1], - (unsigned int)quad[2], - (unsigned int)quad[3], - (unsigned int)ntohs(in4->sin_port)); + snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, + (unsigned int)ntohs(in4->sin_port)); break; case AF_INET6: - sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u", - in6->sin6_addr.s6_addr16[0], - in6->sin6_addr.s6_addr16[1], - in6->sin6_addr.s6_addr16[2], - in6->sin6_addr.s6_addr16[3], - in6->sin6_addr.s6_addr16[4], - in6->sin6_addr.s6_addr16[5], - in6->sin6_addr.s6_addr16[6], - in6->sin6_addr.s6_addr16[7], - (unsigned int)ntohs(in6->sin6_port)); + snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, + (unsigned int)ntohs(in6->sin6_port)); break; default: @@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock, */ static struct socket *ceph_tcp_connect(struct ceph_connection *con) { - struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr; + struct sockaddr_storage *paddr = &con->peer_addr.in_addr; struct socket *sock; int ret; BUG_ON(con->sock); - ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, + IPPROTO_TCP, &sock); if (ret) return ERR_PTR(ret); con->sock = sock; @@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); - ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK); + ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), + O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", pr_addr(&con->peer_addr.in_addr), @@ -657,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->connect_seq, global_seq, proto); - con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; + con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT); con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); con->out_connect.global_seq = cpu_to_le32(global_seq); @@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end, struct sockaddr_in *in4 = (void *)ss; struct sockaddr_in6 *in6 = (void *)ss; int port; + char delim = ','; + + if (*p == '[') { + delim = ']'; + p++; + } memset(ss, 0, sizeof(*ss)); if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, - ',', &ipend)) { + delim, &ipend)) ss->ss_family = AF_INET; - } else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, - ',', &ipend)) { + else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, + delim, &ipend)) ss->ss_family = AF_INET6; - } else { + else goto bad; - } p = ipend; + if (delim == ']') { + if (*p != ']') { + dout("missing matching ']'\n"); + goto bad; + } + p++; + } + /* port? */ if (p < end && *p == ':') { port = 0; @@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end, return 0; bad: - pr_err("parse_ips bad ip '%s'\n", c); + pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); return -EINVAL; } @@ -1396,10 +1399,12 @@ static int read_partial_message(struct ceph_connection *con) if (!con->in_msg) { dout("got hdr type %d front %d data %d\n", con->in_hdr.type, con->in_hdr.front_len, con->in_hdr.data_len); + skip = 0; con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); + BUG_ON(con->in_msg); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; @@ -2013,20 +2018,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) { mutex_lock(&con->mutex); if (!list_empty(&msg->list_head)) { - dout("con_revoke %p msg %p\n", con, msg); + dout("con_revoke %p msg %p - was on queue\n", con, msg); list_del_init(&msg->list_head); ceph_msg_put(msg); msg->hdr.seq = 0; - if (con->out_msg == msg) { - ceph_msg_put(con->out_msg); - con->out_msg = NULL; - } + } + if (con->out_msg == msg) { + dout("con_revoke %p msg %p - was sending\n", con, msg); + con->out_msg = NULL; if (con->out_kvec_is_msg) { con->out_skip = con->out_kvec_bytes; con->out_kvec_is_msg = false; } - } else { - dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg); + ceph_msg_put(msg); + msg->hdr.seq = 0; } mutex_unlock(&con->mutex); } diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 07a539906e6..54fe01c5070 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -345,7 +345,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, out: mutex_unlock(&monc->mutex); - wake_up(&client->auth_wq); + wake_up_all(&client->auth_wq); } /* @@ -462,7 +462,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, } mutex_unlock(&monc->mutex); if (req) { - complete(&req->completion); + complete_all(&req->completion); put_generic_request(req); } return; @@ -718,14 +718,15 @@ static void handle_auth_reply(struct ceph_mon_client *monc, monc->m_auth->front_max); if (ret < 0) { monc->client->auth_err = ret; - wake_up(&monc->client->auth_wq); + wake_up_all(&monc->client->auth_wq); } else if (ret > 0) { __send_prepared_auth_request(monc, ret); } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; - monc->client->msgr->inst.name.num = monc->auth->global_id; + monc->client->msgr->inst.name.num = + cpu_to_le64(monc->auth->global_id); __send_subscribe(monc); __resend_generic_request(monc); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index d25b4add85b..e3852234789 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -862,12 +862,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, if (req->r_callback) req->r_callback(req, msg); else - complete(&req->r_completion); + complete_all(&req->r_completion); if (flags & CEPH_OSD_FLAG_ONDISK) { if (req->r_safe_callback) req->r_safe_callback(req, msg); - complete(&req->r_safe_completion); /* fsync waiter */ + complete_all(&req->r_safe_completion); /* fsync waiter */ } done: @@ -1083,7 +1083,7 @@ done: if (newmap) kick_requests(osdc, NULL); up_read(&osdc->map_sem); - wake_up(&osdc->client->auth_wq); + wake_up_all(&osdc->client->auth_wq); return; bad: @@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) int type = le16_to_cpu(msg->hdr.type); if (!osd) - return; + goto out; osdc = osd->o_osdc; switch (type) { @@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) pr_err("received unknown message type %d %s\n", type, ceph_msg_type_name(type)); } +out: ceph_msg_put(msg); } diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index ddc656fb5c0..416d46adbf8 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -568,6 +568,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) if (ev > CEPH_PG_POOL_VERSION) { pr_warning("got unknown v %d > %d of ceph_pg_pool\n", ev, CEPH_PG_POOL_VERSION); + kfree(pi); goto bad; } __decode_pool(p, pi); @@ -707,6 +708,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, newcrush = crush_decode(*p, min(*p+len, end)); if (IS_ERR(newcrush)) return ERR_CAST(newcrush); + *p += len; } /* new flags? */ @@ -829,12 +831,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, /* remove any? */ while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping, node)->pgid, pgid) <= 0) { - struct rb_node *cur = rbp; + struct ceph_pg_mapping *cur = + rb_entry(rbp, struct ceph_pg_mapping, node); + rbp = rb_next(rbp); - dout(" removed pg_temp %llx\n", - *(u64 *)&rb_entry(cur, struct ceph_pg_mapping, - node)->pgid); - rb_erase(cur, &map->pg_temp); + dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); + rb_erase(&cur->node, &map->pg_temp); + kfree(cur); } if (pglen) { @@ -850,19 +853,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, for (j = 0; j < pglen; j++) pg->osds[j] = ceph_decode_32(p); err = __insert_pg_mapping(pg, &map->pg_temp); - if (err) + if (err) { + kfree(pg); goto bad; + } dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, pglen); } } while (rbp) { - struct rb_node *cur = rbp; + struct ceph_pg_mapping *cur = + rb_entry(rbp, struct ceph_pg_mapping, node); + rbp = rb_next(rbp); - dout(" removed pg_temp %llx\n", - *(u64 *)&rb_entry(cur, struct ceph_pg_mapping, - node)->pgid); - rb_erase(cur, &map->pg_temp); + dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); + rb_erase(&cur->node, &map->pg_temp); + kfree(cur); } /* ignore the rest */ diff --git a/fs/char_dev.c b/fs/char_dev.c index d6db933df2b..f80a4f25123 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -20,6 +20,7 @@ #include <linux/cdev.h> #include <linux/mutex.h> #include <linux/backing-dev.h> +#include <linux/tty.h> #include "internal.h" diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 80f35259680..917b7d449bb 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -2,7 +2,6 @@ config CIFS tristate "CIFS support (advanced network filesystem, SMBFS successor)" depends on INET select NLS - select SLOW_WORK help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block @@ -71,14 +70,14 @@ config CIFS_WEAK_PW_HASH If unsure, say N. config CIFS_UPCALL - bool "Kerberos/SPNEGO advanced session setup" - depends on CIFS && KEYS - help - Enables an upcall mechanism for CIFS which accesses - userspace helper utilities to provide SPNEGO packaged (RFC 4178) - Kerberos tickets which are needed to mount to certain secure servers - (for which more secure Kerberos authentication is required). If - unsure, say N. + bool "Kerberos/SPNEGO advanced session setup" + depends on CIFS && KEYS + select DNS_RESOLVER + help + Enables an upcall mechanism for CIFS which accesses userspace helper + utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets + which are needed to mount to certain secure servers (for which more + secure Kerberos authentication is required). If unsure, say N. config CIFS_XATTR bool "CIFS extended attributes" @@ -122,6 +121,7 @@ config CIFS_DEBUG2 config CIFS_DFS_UPCALL bool "DFS feature support" depends on CIFS && KEYS + select DNS_RESOLVER help Distributed File System (DFS) support is used to access shares transparently in an enterprise name space, even if the share @@ -131,6 +131,15 @@ config CIFS_DFS_UPCALL IP addresses) which is needed for implicit mounts of DFS junction points. If unsure, say N. +config CIFS_FSCACHE + bool "Provide CIFS client caching support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y + help + Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data + to be cached locally on disk through the general filesystem cache + manager. If unsure, say N. + config CIFS_EXPERIMENTAL bool "CIFS Experimental Features (EXPERIMENTAL)" depends on CIFS && EXPERIMENTAL diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 9948c0030e8..adefa60a9bd 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -11,3 +11,5 @@ cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o + +cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o diff --git a/fs/cifs/README b/fs/cifs/README index a727b7cb075..a7081eeeb85 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -568,8 +568,9 @@ module can be displayed via modinfo. Misc /proc/fs/cifs Flags and Debug Info ======================================= Informational pseudo-files: -DebugData Displays information about active CIFS sessions - and shares, as well as the cifs.ko version. +DebugData Displays information about active CIFS sessions and + shares, features enabled as well as the cifs.ko + version. Stats Lists summary resource usage information as well as per share statistics, if CONFIG_CIFS_STATS in enabled in the kernel configuration. diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c new file mode 100644 index 00000000000..224d7bbd1fc --- /dev/null +++ b/fs/cifs/cache.c @@ -0,0 +1,331 @@ +/* + * fs/cifs/cache.c - CIFS filesystem cache index structure definitions + * + * Copyright (c) 2010 Novell, Inc. + * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "fscache.h" +#include "cifs_debug.h" + +/* + * CIFS filesystem definition for FS-Cache + */ +struct fscache_netfs cifs_fscache_netfs = { + .name = "cifs", + .version = 0, +}; + +/* + * Register CIFS for caching with FS-Cache + */ +int cifs_fscache_register(void) +{ + return fscache_register_netfs(&cifs_fscache_netfs); +} + +/* + * Unregister CIFS for caching + */ +void cifs_fscache_unregister(void) +{ + fscache_unregister_netfs(&cifs_fscache_netfs); +} + +/* + * Key layout of CIFS server cache index object + */ +struct cifs_server_key { + uint16_t family; /* address family */ + uint16_t port; /* IP port */ + union { + struct in_addr ipv4_addr; + struct in6_addr ipv6_addr; + } addr[0]; +}; + +/* + * Server object keyed by {IPaddress,port,family} tuple + */ +static uint16_t cifs_server_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t maxbuf) +{ + const struct TCP_Server_Info *server = cookie_netfs_data; + const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr; + struct cifs_server_key *key = buffer; + uint16_t key_len = sizeof(struct cifs_server_key); + + memset(key, 0, key_len); + + /* + * Should not be a problem as sin_family/sin6_family overlays + * sa_family field + */ + switch (sa->sa_family) { + case AF_INET: + key->family = server->addr.sockAddr.sin_family; + key->port = server->addr.sockAddr.sin_port; + key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr; + key_len += sizeof(key->addr[0].ipv4_addr); + break; + + case AF_INET6: + key->family = server->addr.sockAddr6.sin6_family; + key->port = server->addr.sockAddr6.sin6_port; + key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr; + key_len += sizeof(key->addr[0].ipv6_addr); + break; + + default: + cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family); + key_len = 0; + break; + } + + return key_len; +} + +/* + * Server object for FS-Cache + */ +const struct fscache_cookie_def cifs_fscache_server_index_def = { + .name = "CIFS.server", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = cifs_server_get_key, +}; + +/* + * Auxiliary data attached to CIFS superblock within the cache + */ +struct cifs_fscache_super_auxdata { + u64 resource_id; /* unique server resource id */ +}; + +static char *extract_sharename(const char *treename) +{ + const char *src; + char *delim, *dst; + int len; + + /* skip double chars at the beginning */ + src = treename + 2; + + /* share name is always preceded by '\\' now */ + delim = strchr(src, '\\'); + if (!delim) + return ERR_PTR(-EINVAL); + delim++; + len = strlen(delim); + + /* caller has to free the memory */ + dst = kstrndup(delim, len, GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + return dst; +} + +/* + * Superblock object currently keyed by share name + */ +static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer, + uint16_t maxbuf) +{ + const struct cifsTconInfo *tcon = cookie_netfs_data; + char *sharename; + uint16_t len; + + sharename = extract_sharename(tcon->treeName); + if (IS_ERR(sharename)) { + cFYI(1, "CIFS: couldn't extract sharename\n"); + sharename = NULL; + return 0; + } + + len = strlen(sharename); + if (len > maxbuf) + return 0; + + memcpy(buffer, sharename, len); + + kfree(sharename); + + return len; +} + +static uint16_t +cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer, + uint16_t maxbuf) +{ + struct cifs_fscache_super_auxdata auxdata; + const struct cifsTconInfo *tcon = cookie_netfs_data; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.resource_id = tcon->resource_id; + + if (maxbuf > sizeof(auxdata)) + maxbuf = sizeof(auxdata); + + memcpy(buffer, &auxdata, maxbuf); + + return maxbuf; +} + +static enum +fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data, + const void *data, + uint16_t datalen) +{ + struct cifs_fscache_super_auxdata auxdata; + const struct cifsTconInfo *tcon = cookie_netfs_data; + + if (datalen != sizeof(auxdata)) + return FSCACHE_CHECKAUX_OBSOLETE; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.resource_id = tcon->resource_id; + + if (memcmp(data, &auxdata, datalen) != 0) + return FSCACHE_CHECKAUX_OBSOLETE; + + return FSCACHE_CHECKAUX_OKAY; +} + +/* + * Superblock object for FS-Cache + */ +const struct fscache_cookie_def cifs_fscache_super_index_def = { + .name = "CIFS.super", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = cifs_super_get_key, + .get_aux = cifs_fscache_super_get_aux, + .check_aux = cifs_fscache_super_check_aux, +}; + +/* + * Auxiliary data attached to CIFS inode within the cache + */ +struct cifs_fscache_inode_auxdata { + struct timespec last_write_time; + struct timespec last_change_time; + u64 eof; +}; + +static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t maxbuf) +{ + const struct cifsInodeInfo *cifsi = cookie_netfs_data; + uint16_t keylen; + + /* use the UniqueId as the key */ + keylen = sizeof(cifsi->uniqueid); + if (keylen > maxbuf) + keylen = 0; + else + memcpy(buffer, &cifsi->uniqueid, keylen); + + return keylen; +} + +static void +cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size) +{ + const struct cifsInodeInfo *cifsi = cookie_netfs_data; + + *size = cifsi->vfs_inode.i_size; +} + +static uint16_t +cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer, + uint16_t maxbuf) +{ + struct cifs_fscache_inode_auxdata auxdata; + const struct cifsInodeInfo *cifsi = cookie_netfs_data; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.eof = cifsi->server_eof; + auxdata.last_write_time = cifsi->vfs_inode.i_mtime; + auxdata.last_change_time = cifsi->vfs_inode.i_ctime; + + if (maxbuf > sizeof(auxdata)) + maxbuf = sizeof(auxdata); + + memcpy(buffer, &auxdata, maxbuf); + + return maxbuf; +} + +static enum +fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data, + const void *data, + uint16_t datalen) +{ + struct cifs_fscache_inode_auxdata auxdata; + struct cifsInodeInfo *cifsi = cookie_netfs_data; + + if (datalen != sizeof(auxdata)) + return FSCACHE_CHECKAUX_OBSOLETE; + + memset(&auxdata, 0, sizeof(auxdata)); + auxdata.eof = cifsi->server_eof; + auxdata.last_write_time = cifsi->vfs_inode.i_mtime; + auxdata.last_change_time = cifsi->vfs_inode.i_ctime; + + if (memcmp(data, &auxdata, datalen) != 0) + return FSCACHE_CHECKAUX_OBSOLETE; + + return FSCACHE_CHECKAUX_OKAY; +} + +static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data) +{ + struct cifsInodeInfo *cifsi = cookie_netfs_data; + struct pagevec pvec; + pgoff_t first; + int loop, nr_pages; + + pagevec_init(&pvec, 0); + first = 0; + + cFYI(1, "cifs inode 0x%p now uncached", cifsi); + + for (;;) { + nr_pages = pagevec_lookup(&pvec, + cifsi->vfs_inode.i_mapping, first, + PAGEVEC_SIZE - pagevec_count(&pvec)); + if (!nr_pages) + break; + + for (loop = 0; loop < nr_pages; loop++) + ClearPageFsCache(pvec.pages[loop]); + + first = pvec.pages[nr_pages - 1]->index + 1; + + pvec.nr = nr_pages; + pagevec_release(&pvec); + cond_resched(); + } +} + +const struct fscache_cookie_def cifs_fscache_inode_object_def = { + .name = "CIFS.uniqueid", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .get_key = cifs_fscache_inode_get_key, + .get_attr = cifs_fscache_inode_get_attr, + .get_aux = cifs_fscache_inode_get_aux, + .check_aux = cifs_fscache_inode_check_aux, + .now_uncached = cifs_fscache_inode_now_uncached, +}; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 4fce6e61b34..eb1ba493489 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -119,6 +119,31 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) "Display Internal CIFS Data Structures for Debugging\n" "---------------------------------------------------\n"); seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); + seq_printf(m, "Features: "); +#ifdef CONFIG_CIFS_DFS_UPCALL + seq_printf(m, "dfs"); + seq_putc(m, ' '); +#endif +#ifdef CONFIG_CIFS_FSCACHE + seq_printf(m, "fscache"); + seq_putc(m, ' '); +#endif +#ifdef CONFIG_CIFS_WEAK_PW_HASH + seq_printf(m, "lanman"); + seq_putc(m, ' '); +#endif +#ifdef CONFIG_CIFS_POSIX + seq_printf(m, "posix"); + seq_putc(m, ' '); +#endif +#ifdef CONFIG_CIFS_UPCALL + seq_printf(m, "spnego"); + seq_putc(m, ' '); +#endif +#ifdef CONFIG_CIFS_XATTR + seq_printf(m, "xattr"); +#endif + seq_putc(m, '\n'); seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); seq_printf(m, "Servers:"); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index ac19a6f3dae..d6ced7aa23c 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -141,7 +141,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, } rc = dns_resolve_server_name_to_ip(*devname, &srvIP); - if (rc != 0) { + if (rc < 0) { cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", __func__, *devname, rc); goto compose_mount_options_err; @@ -150,8 +150,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, * assuming that we have 'unc=' and 'ip=' in * the original sb_mountdata */ - md_len = strlen(sb_mountdata) + strlen(srvIP) + - strlen(ref->node_name) + 12; + md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12; mountdata = kzalloc(md_len+1, GFP_KERNEL); if (mountdata == NULL) { rc = -ENOMEM; @@ -230,28 +229,22 @@ compose_mount_options_err: goto compose_mount_options_out; } - -static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent, - struct dentry *dentry, const struct dfs_info3_param *ref) +/** + * cifs_dfs_do_refmount - mounts specified path using provided refferal + * @cifs_sb: parent/root superblock + * @fullpath: full path in UNC format + * @ref: server's referral + */ +static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, + const char *fullpath, const struct dfs_info3_param *ref) { - struct cifs_sb_info *cifs_sb; struct vfsmount *mnt; char *mountdata; char *devname = NULL; - char *fullpath; - - cifs_sb = CIFS_SB(dentry->d_inode->i_sb); - /* - * this function gives us a path with a double backslash prefix. We - * require a single backslash for DFS. - */ - fullpath = build_path_from_dentry(dentry); - if (!fullpath) - return ERR_PTR(-ENOMEM); + /* strip first '\' from fullpath */ mountdata = cifs_compose_mount_options(cifs_sb->mountdata, fullpath + 1, ref, &devname); - kfree(fullpath); if (IS_ERR(mountdata)) return (struct vfsmount *)mountdata; @@ -357,8 +350,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) rc = -EINVAL; goto out_err; } - mnt = cifs_dfs_do_refmount(nd->path.mnt, - nd->path.dentry, referrals + i); + mnt = cifs_dfs_do_refmount(cifs_sb, + full_path, referrals + i); cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, referrals[i].node_name, mnt); diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 246a167cb91..9e771450c3b 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -35,6 +35,7 @@ #define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */ #define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */ #define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/ +#define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */ struct cifs_sb_info { struct cifsTconInfo *tcon; /* primary mount */ diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 379bd7d9c05..87044906cd1 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -84,6 +84,9 @@ struct key_type cifs_spnego_key_type = { /* strlen of ";uid=0x" */ #define UID_KEY_LEN 7 +/* strlen of ";creduid=0x" */ +#define CREDUID_KEY_LEN 11 + /* strlen of ";user=" */ #define USER_KEY_LEN 6 @@ -107,6 +110,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) IP_KEY_LEN + INET6_ADDRSTRLEN + MAX_MECH_STR_LEN + UID_KEY_LEN + (sizeof(uid_t) * 2) + + CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + USER_KEY_LEN + strlen(sesInfo->userName) + PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; @@ -144,6 +148,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) sprintf(dp, ";uid=0x%x", sesInfo->linux_uid); dp = description + strlen(description); + sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); + + dp = description + strlen(description); sprintf(dp, ";user=%s", sesInfo->userName); dp = description + strlen(description); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 78c02eb4cb1..a5ed10c9afe 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -45,8 +45,8 @@ #include "cifs_fs_sb.h" #include <linux/mm.h> #include <linux/key-type.h> -#include "dns_resolve.h" #include "cifs_spnego.h" +#include "fscache.h" #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ int cifsFYI = 0; @@ -329,6 +329,12 @@ cifs_destroy_inode(struct inode *inode) } static void +cifs_clear_inode(struct inode *inode) +{ + cifs_fscache_release_inode_cookie(inode); +} + +static void cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) { seq_printf(s, ",addr="); @@ -473,14 +479,25 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data) return 0; } +void cifs_drop_inode(struct inode *inode) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + return generic_drop_inode(inode); + + return generic_delete_inode(inode); +} + static const struct super_operations cifs_super_ops = { .put_super = cifs_put_super, .statfs = cifs_statfs, .alloc_inode = cifs_alloc_inode, .destroy_inode = cifs_destroy_inode, -/* .drop_inode = generic_delete_inode, - .delete_inode = cifs_delete_inode, */ /* Do not need above two - functions unless later we add lazy close of inodes or unless the + .drop_inode = cifs_drop_inode, + .clear_inode = cifs_clear_inode, +/* .delete_inode = cifs_delete_inode, */ /* Do not need above + function unless later we add lazy close of inodes or unless the kernel forgets to call us with the same number of releases (closes) as opens */ .show_options = cifs_show_options, @@ -892,6 +909,10 @@ init_cifs(void) cFYI(1, "cifs_max_pending set to max of 256"); } + rc = cifs_fscache_register(); + if (rc) + goto out; + rc = cifs_init_inodecache(); if (rc) goto out_clean_proc; @@ -912,27 +933,13 @@ init_cifs(void) if (rc) goto out_unregister_filesystem; #endif -#ifdef CONFIG_CIFS_DFS_UPCALL - rc = register_key_type(&key_type_dns_resolver); - if (rc) - goto out_unregister_key_type; -#endif - rc = slow_work_register_user(THIS_MODULE); - if (rc) - goto out_unregister_resolver_key; return 0; - out_unregister_resolver_key: -#ifdef CONFIG_CIFS_DFS_UPCALL - unregister_key_type(&key_type_dns_resolver); - out_unregister_key_type: -#endif #ifdef CONFIG_CIFS_UPCALL - unregister_key_type(&cifs_spnego_key_type); out_unregister_filesystem: -#endif unregister_filesystem(&cifs_fs_type); +#endif out_destroy_request_bufs: cifs_destroy_request_bufs(); out_destroy_mids: @@ -941,6 +948,8 @@ init_cifs(void) cifs_destroy_inodecache(); out_clean_proc: cifs_proc_clean(); + cifs_fscache_unregister(); + out: return rc; } @@ -949,9 +958,9 @@ exit_cifs(void) { cFYI(DBG2, "exit_cifs"); cifs_proc_clean(); + cifs_fscache_unregister(); #ifdef CONFIG_CIFS_DFS_UPCALL cifs_dfs_release_automount_timer(); - unregister_key_type(&key_type_dns_resolver); #endif #ifdef CONFIG_CIFS_UPCALL unregister_key_type(&cifs_spnego_key_type); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index a7eb65c84b1..d82f5fb4761 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -114,5 +114,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.64" +#define CIFS_VERSION "1.65" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a88479ceaad..0cdfb8c32ac 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -16,10 +16,13 @@ * the GNU Lesser General Public License for more details. * */ +#ifndef _CIFS_GLOB_H +#define _CIFS_GLOB_H + #include <linux/in.h> #include <linux/in6.h> #include <linux/slab.h> -#include <linux/slow-work.h> +#include <linux/workqueue.h> #include "cifs_fs_sb.h" #include "cifsacl.h" /* @@ -34,7 +37,7 @@ #define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ #define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null termination then *2 for unicode versions */ -#define MAX_PASSWORD_SIZE 16 +#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ #define CIFS_MIN_RCV_POOL 4 @@ -80,8 +83,7 @@ enum statusEnum { }; enum securityEnum { - PLAINTXT = 0, /* Legacy with Plaintext passwords */ - LANMAN, /* Legacy LANMAN auth */ + LANMAN = 0, /* Legacy LANMAN auth */ NTLM, /* Legacy NTLM012 auth with NTLM hash */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ @@ -142,7 +144,6 @@ struct TCP_Server_Info { struct list_head pending_mid_q; void *Server_NlsInfo; /* BB - placeholder for future NLS info */ unsigned short server_codepage; /* codepage for the server */ - unsigned long ip_address; /* IP addr for the server if known */ enum protocolEnum protocolType; char versionMajor; char versionMinor; @@ -190,19 +191,9 @@ struct TCP_Server_Info { bool sec_mskerberos; /* supports legacy MS Kerberos */ bool sec_kerberosu2u; /* supports U2U Kerberos */ bool sec_ntlmssp; /* supports NTLMSSP */ -}; - -/* - * The following is our shortcut to user information. We surface the uid, - * and name. We always get the password on the fly in case it - * has changed. We also hang a list of sessions owned by this user off here. - */ -struct cifsUidInfo { - struct list_head userList; - struct list_head sessionList; /* SMB sessions for this user */ - uid_t linux_uid; - char user[MAX_USERNAME_SIZE + 1]; /* ascii name of user */ - /* BB may need ptr or callback for PAM or WinBind info */ +#ifdef CONFIG_CIFS_FSCACHE + struct fscache_cookie *fscache; /* client index cache cookie */ +#endif }; /* @@ -212,9 +203,6 @@ struct cifsSesInfo { struct list_head smb_ses_list; struct list_head tcon_list; struct mutex session_mutex; -#if 0 - struct cifsUidInfo *uidInfo; /* pointer to user info */ -#endif struct TCP_Server_Info *server; /* pointer to server info */ int ses_count; /* reference counter */ enum statusEnum status; @@ -226,7 +214,8 @@ struct cifsSesInfo { char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ int Suid; /* remote smb uid */ - uid_t linux_uid; /* local Linux uid */ + uid_t linux_uid; /* overriding owner of files on the mount */ + uid_t cred_uid; /* owner of credentials */ int capabilities; char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for TCP names - will ipv6 and sctp addresses fit? */ @@ -311,6 +300,10 @@ struct cifsTconInfo { bool local_lease:1; /* check leases (only) on local system not remote */ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool need_reconnect:1; /* connection reset, tid now invalid */ +#ifdef CONFIG_CIFS_FSCACHE + u64 resource_id; /* server resource id */ + struct fscache_cookie *fscache; /* cookie for share */ +#endif /* BB add field for back pointer to sb struct(s)? */ }; @@ -363,7 +356,7 @@ struct cifsFileInfo { atomic_t count; /* reference count */ struct mutex fh_mutex; /* prevents reopen race after dead ses*/ struct cifs_search_info srch_inf; - struct slow_work oplock_break; /* slow_work job for oplock breaks */ + struct work_struct oplock_break; /* work for oplock breaks */ }; /* Take a reference on the file private data */ @@ -398,6 +391,9 @@ struct cifsInodeInfo { bool invalid_mapping:1; /* pagecache is invalid */ u64 server_eof; /* current file size on server */ u64 uniqueid; /* server inode number */ +#ifdef CONFIG_CIFS_FSCACHE + struct fscache_cookie *fscache; +#endif struct inode vfs_inode; }; @@ -732,4 +728,10 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ +void cifs_oplock_break(struct work_struct *work); +void cifs_oplock_break_get(struct cifsFileInfo *cfile); +void cifs_oplock_break_put(struct cifsFileInfo *cfile); + extern const struct slow_work_ops cifs_oplock_break_ops; + +#endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index fb1657e0fdb..1f545081408 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -86,7 +86,9 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr); extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, struct TCP_Server_Info *server); -extern int cifs_convert_address(char *src, void *dst); +extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); +extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, + unsigned short int port); extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of @@ -106,7 +108,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, struct file *file, struct vfsmount *mnt, unsigned int oflags); extern int cifs_posix_open(char *full_path, struct inode **pinode, - struct vfsmount *mnt, struct super_block *sb, int mode, int oflags, __u32 *poplock, __u16 *pnetfid, int xid); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2208f06e4c4..95c2ea67edf 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -48,6 +48,7 @@ #include "nterr.h" #include "rfc1002pdu.h" #include "cn_cifs.h" +#include "fscache.h" #define CIFS_PORT 445 #define RFC1001_PORT 139 @@ -66,6 +67,7 @@ struct smb_vol { char *iocharset; /* local code page for mapping to and from Unicode */ char source_rfc1001_name[16]; /* netbios name of client */ char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ + uid_t cred_uid; uid_t linux_uid; gid_t linux_gid; mode_t file_mode; @@ -97,6 +99,7 @@ struct smb_vol { bool noblocksnd:1; bool noautotune:1; bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ + bool fsc:1; /* enable fscache */ unsigned int rsize; unsigned int wsize; bool sockopt_tcp_nodelay:1; @@ -830,7 +833,8 @@ cifs_parse_mount_options(char *options, const char *devname, /* null target name indicates to use *SMBSERVR default called name if we end up sending RFC1001 session initialize */ vol->target_rfc1001_name[0] = 0; - vol->linux_uid = current_uid(); /* use current_euid() instead? */ + vol->cred_uid = current_uid(); + vol->linux_uid = current_uid(); vol->linux_gid = current_gid(); /* default to only allowing write access to owner of the mount */ @@ -1257,6 +1261,12 @@ cifs_parse_mount_options(char *options, const char *devname, } else if ((strnicmp(data, "nocase", 6) == 0) || (strnicmp(data, "ignorecase", 10) == 0)) { vol->nocase = 1; + } else if (strnicmp(data, "mand", 4) == 0) { + /* ignore */ + } else if (strnicmp(data, "nomand", 6) == 0) { + /* ignore */ + } else if (strnicmp(data, "_netdev", 7) == 0) { + /* ignore */ } else if (strnicmp(data, "brl", 3) == 0) { vol->nobrl = 0; } else if ((strnicmp(data, "nobrl", 5) == 0) || @@ -1331,6 +1341,8 @@ cifs_parse_mount_options(char *options, const char *devname, printk(KERN_WARNING "CIFS: Mount option noac not " "supported. Instead set " "/proc/fs/cifs/LookupCacheEnabled to 0\n"); + } else if (strnicmp(data, "fsc", 3) == 0) { + vol->fsc = true; } else printk(KERN_WARNING "CIFS: Unknown mount option %s\n", data); @@ -1380,18 +1392,92 @@ cifs_parse_mount_options(char *options, const char *devname, return 0; } +static bool +match_address(struct TCP_Server_Info *server, struct sockaddr *addr) +{ + struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; + struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; + + switch (addr->sa_family) { + case AF_INET: + if (addr4->sin_addr.s_addr != + server->addr.sockAddr.sin_addr.s_addr) + return false; + if (addr4->sin_port && + addr4->sin_port != server->addr.sockAddr.sin_port) + return false; + break; + case AF_INET6: + if (!ipv6_addr_equal(&addr6->sin6_addr, + &server->addr.sockAddr6.sin6_addr)) + return false; + if (addr6->sin6_scope_id != + server->addr.sockAddr6.sin6_scope_id) + return false; + if (addr6->sin6_port && + addr6->sin6_port != server->addr.sockAddr6.sin6_port) + return false; + break; + } + + return true; +} + +static bool +match_security(struct TCP_Server_Info *server, struct smb_vol *vol) +{ + unsigned int secFlags; + + if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) + secFlags = vol->secFlg; + else + secFlags = global_secflags | vol->secFlg; + + switch (server->secType) { + case LANMAN: + if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT))) + return false; + break; + case NTLMv2: + if (!(secFlags & CIFSSEC_MAY_NTLMV2)) + return false; + break; + case NTLM: + if (!(secFlags & CIFSSEC_MAY_NTLM)) + return false; + break; + case Kerberos: + if (!(secFlags & CIFSSEC_MAY_KRB5)) + return false; + break; + case RawNTLMSSP: + if (!(secFlags & CIFSSEC_MAY_NTLMSSP)) + return false; + break; + default: + /* shouldn't happen */ + return false; + } + + /* now check if signing mode is acceptible */ + if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && + (server->secMode & SECMODE_SIGN_REQUIRED)) + return false; + else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) && + (server->secMode & + (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0) + return false; + + return true; +} + static struct TCP_Server_Info * -cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port) +cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) { - struct list_head *tmp; struct TCP_Server_Info *server; - struct sockaddr_in *addr4 = (struct sockaddr_in *) addr; - struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) addr; write_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &cifs_tcp_ses_list) { - server = list_entry(tmp, struct TCP_Server_Info, - tcp_ses_list); + list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { /* * the demux thread can exit on its own while still in CifsNew * so don't accept any sockets in that state. Since the @@ -1401,37 +1487,11 @@ cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port) if (server->tcpStatus == CifsNew) continue; - switch (addr->ss_family) { - case AF_INET: - if (addr4->sin_addr.s_addr == - server->addr.sockAddr.sin_addr.s_addr) { - addr4->sin_port = htons(port); - /* user overrode default port? */ - if (addr4->sin_port) { - if (addr4->sin_port != - server->addr.sockAddr.sin_port) - continue; - } - break; - } else - continue; + if (!match_address(server, addr)) + continue; - case AF_INET6: - if (ipv6_addr_equal(&addr6->sin6_addr, - &server->addr.sockAddr6.sin6_addr) && - (addr6->sin6_scope_id == - server->addr.sockAddr6.sin6_scope_id)) { - addr6->sin6_port = htons(port); - /* user overrode default port? */ - if (addr6->sin6_port) { - if (addr6->sin6_port != - server->addr.sockAddr6.sin6_port) - continue; - } - break; - } else - continue; - } + if (!match_security(server, vol)) + continue; ++server->srv_count; write_unlock(&cifs_tcp_ses_lock); @@ -1460,6 +1520,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); + cifs_fscache_release_client_cookie(server); + task = xchg(&server->tsk, NULL); if (task) force_sig(SIGKILL, task); @@ -1479,7 +1541,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info) cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip); if (volume_info->UNCip && volume_info->UNC) { - rc = cifs_convert_address(volume_info->UNCip, &addr); + rc = cifs_fill_sockaddr((struct sockaddr *)&addr, + volume_info->UNCip, + strlen(volume_info->UNCip), + volume_info->port); if (!rc) { /* we failed translating address */ rc = -EINVAL; @@ -1499,7 +1564,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) } /* see if we already have a matching tcp_ses */ - tcp_ses = cifs_find_tcp_session(&addr, volume_info->port); + tcp_ses = cifs_find_tcp_session((struct sockaddr *)&addr, volume_info); if (tcp_ses) return tcp_ses; @@ -1543,12 +1608,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info) cFYI(1, "attempting ipv6 connect"); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - sin_server6->sin6_port = htons(volume_info->port); memcpy(&tcp_ses->addr.sockAddr6, sin_server6, sizeof(struct sockaddr_in6)); rc = ipv6_connect(tcp_ses); } else { - sin_server->sin_port = htons(volume_info->port); memcpy(&tcp_ses->addr.sockAddr, sin_server, sizeof(struct sockaddr_in)); rc = ipv4_connect(tcp_ses); @@ -1577,6 +1640,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info) list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list); write_unlock(&cifs_tcp_ses_lock); + cifs_fscache_get_client_cookie(tcp_ses); + return tcp_ses; out_err: @@ -1591,17 +1656,27 @@ out_err: } static struct cifsSesInfo * -cifs_find_smb_ses(struct TCP_Server_Info *server, char *username) +cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) { - struct list_head *tmp; struct cifsSesInfo *ses; write_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); - if (strncmp(ses->userName, username, MAX_USERNAME_SIZE)) - continue; - + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + switch (server->secType) { + case Kerberos: + if (vol->cred_uid != ses->cred_uid) + continue; + break; + default: + /* anything else takes username/password */ + if (strncmp(ses->userName, vol->username, + MAX_USERNAME_SIZE)) + continue; + if (strlen(vol->username) != 0 && + strncmp(ses->password, vol->password, + MAX_PASSWORD_SIZE)) + continue; + } ++ses->ses_count; write_unlock(&cifs_tcp_ses_lock); return ses; @@ -1643,7 +1718,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) xid = GetXid(); - ses = cifs_find_smb_ses(server, volume_info->username); + ses = cifs_find_smb_ses(server, volume_info); if (ses) { cFYI(1, "Existing smb sess found (status=%d)", ses->status); @@ -1706,6 +1781,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) if (ses->domainName) strcpy(ses->domainName, volume_info->domainname); } + ses->cred_uid = volume_info->cred_uid; ses->linux_uid = volume_info->linux_uid; ses->overrideSecFlg = volume_info->secFlg; @@ -1773,6 +1849,7 @@ cifs_put_tcon(struct cifsTconInfo *tcon) CIFSSMBTDis(xid, tcon); _FreeXid(xid); + cifs_fscache_release_super_cookie(tcon); tconInfoFree(tcon); cifs_put_smb_ses(ses); } @@ -1843,6 +1920,8 @@ cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info) list_add(&tcon->tcon_list, &ses->tcon_list); write_unlock(&cifs_tcp_ses_lock); + cifs_fscache_get_super_cookie(tcon); + return tcon; out_fail: @@ -2397,6 +2476,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; if (pvolume_info->dynperm) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; + if (pvolume_info->fsc) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE; if (pvolume_info->direct_io) { cFYI(1, "mounting share using direct i/o"); cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 391816b461c..578d88c5b46 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/file.h> #include "cifsfs.h" #include "cifspdu.h" #include "cifsglob.h" @@ -129,12 +130,6 @@ cifs_bp_rename_retry: return full_path; } -/* - * When called with struct file pointer set to NULL, there is no way we could - * update file->private_data, but getting it stuck on openFileList provides a - * way to access it from cifs_fill_filedata and thereby set file->private_data - * from cifs_open. - */ struct cifsFileInfo * cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, struct file *file, struct vfsmount *mnt, unsigned int oflags) @@ -162,7 +157,7 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, mutex_init(&pCifsFile->lock_mutex); INIT_LIST_HEAD(&pCifsFile->llist); atomic_set(&pCifsFile->count, 1); - slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops); + INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); write_lock(&GlobalSMBSeslock); list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); @@ -184,12 +179,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, } write_unlock(&GlobalSMBSeslock); + file->private_data = pCifsFile; + return pCifsFile; } int cifs_posix_open(char *full_path, struct inode **pinode, - struct vfsmount *mnt, struct super_block *sb, - int mode, int oflags, + struct super_block *sb, int mode, int oflags, __u32 *poplock, __u16 *pnetfid, int xid) { int rc; @@ -258,19 +254,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode, cifs_fattr_to_inode(*pinode, &fattr); } - /* - * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to - * file->private_data. - */ - if (mnt) { - struct cifsFileInfo *pfile_info; - - pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, - oflags); - if (pfile_info == NULL) - rc = -ENOMEM; - } - posix_open_ret: kfree(presp_data); return rc; @@ -298,7 +281,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, int create_options = CREATE_NOT_DIR; __u32 oplock = 0; int oflags; - bool posix_create = false; /* * BB below access is probably too much for mknod to request * but we have to do query and setpathinfo so requesting @@ -339,7 +321,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = cifs_posix_open(full_path, &newinode, - nd ? nd->path.mnt : NULL, inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); /* EIO could indicate that (posix open) operation is not supported, despite what server claimed in capability @@ -347,7 +328,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, handled in posix open */ if (rc == 0) { - posix_create = true; if (newinode == NULL) /* query inode info */ goto cifs_create_get_file_info; else /* success, no need to query */ @@ -478,21 +458,28 @@ cifs_create_set_dentry: else cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); - /* nfsd case - nfs srv does not set nd */ - if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) { - /* mknod case - do not leave file open */ - CIFSSMBClose(xid, tcon, fileHandle); - } else if (!(posix_create) && (newinode)) { + if (newinode && nd && (nd->flags & LOOKUP_OPEN)) { struct cifsFileInfo *pfile_info; - /* - * cifs_fill_filedata() takes care of setting cifsFileInfo - * pointer to file->private_data. - */ - pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL, + struct file *filp; + + filp = lookup_instantiate_filp(nd, direntry, generic_file_open); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + CIFSSMBClose(xid, tcon, fileHandle); + goto cifs_create_out; + } + + pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp, nd->path.mnt, oflags); - if (pfile_info == NULL) + if (pfile_info == NULL) { + fput(filp); + CIFSSMBClose(xid, tcon, fileHandle); rc = -ENOMEM; + } + } else { + CIFSSMBClose(xid, tcon, fileHandle); } + cifs_create_out: kfree(buf); kfree(full_path); @@ -636,6 +623,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, bool posix_open = false; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; + struct cifsFileInfo *cfile; struct inode *newInode = NULL; char *full_path = NULL; struct file *filp; @@ -703,7 +691,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && (nd->intent.open.flags & O_CREAT)) { - rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, + rc = cifs_posix_open(full_path, &newInode, parent_dir_inode->i_sb, nd->intent.open.create_mode, nd->intent.open.flags, &oplock, @@ -733,8 +721,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, else direntry->d_op = &cifs_dentry_ops; d_add(direntry, newInode); - if (posix_open) - filp = lookup_instantiate_filp(nd, direntry, NULL); + if (posix_open) { + filp = lookup_instantiate_filp(nd, direntry, + generic_file_open); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + CIFSSMBClose(xid, pTcon, fileHandle); + goto lookup_out; + } + + cfile = cifs_new_fileinfo(newInode, fileHandle, filp, + nd->path.mnt, + nd->intent.open.flags); + if (cfile == NULL) { + fput(filp); + CIFSSMBClose(xid, pTcon, fileHandle); + rc = -ENOMEM; + goto lookup_out; + } + } /* since paths are not looked up by component - the parent directories are presumed to be good here */ renew_parental_timestamps(direntry); @@ -755,6 +760,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, is a common return code */ } +lookup_out: kfree(full_path); FreeXid(xid); return ERR_PTR(rc); diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 4db2c5e7283..0eb87026cad 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -4,6 +4,8 @@ * Copyright (c) 2007 Igor Mammedov * Author(s): Igor Mammedov (niallain@gmail.com) * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) * * Contains the CIFS DFS upcall routines used for hostname to * IP address translation. @@ -24,145 +26,73 @@ */ #include <linux/slab.h> -#include <keys/user-type.h> +#include <linux/dns_resolver.h> #include "dns_resolve.h" #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" -/* Checks if supplied name is IP address - * returns: - * 1 - name is IP - * 0 - name is not IP - */ -static int -is_ip(char *name) -{ - struct sockaddr_storage ss; - - return cifs_convert_address(name, &ss); -} - -static int -dns_resolver_instantiate(struct key *key, const void *data, - size_t datalen) -{ - int rc = 0; - char *ip; - - ip = kmalloc(datalen + 1, GFP_KERNEL); - if (!ip) - return -ENOMEM; - - memcpy(ip, data, datalen); - ip[datalen] = '\0'; - - /* make sure this looks like an address */ - if (!is_ip(ip)) { - kfree(ip); - return -EINVAL; - } - - key->type_data.x[0] = datalen; - key->payload.data = ip; - - return rc; -} - -static void -dns_resolver_destroy(struct key *key) -{ - kfree(key->payload.data); -} - -struct key_type key_type_dns_resolver = { - .name = "dns_resolver", - .def_datalen = sizeof(struct in_addr), - .describe = user_describe, - .instantiate = dns_resolver_instantiate, - .destroy = dns_resolver_destroy, - .match = user_match, -}; - -/* Resolves server name to ip address. - * input: - * unc - server UNC - * output: - * *ip_addr - pointer to server ip, caller responcible for freeing it. - * return 0 on success +/** + * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address. + * @unc: UNC path specifying the server + * @ip_addr: Where to return the IP address. + * + * The IP address will be returned in string form, and the caller is + * responsible for freeing it. + * + * Returns length of result on success, -ve on error. */ int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) { - int rc = -EAGAIN; - struct key *rkey = ERR_PTR(-EAGAIN); + struct sockaddr_storage ss; + const char *hostname, *sep; char *name; - char *data = NULL; - int len; + int len, rc; if (!ip_addr || !unc) return -EINVAL; - /* search for server name delimiter */ len = strlen(unc); if (len < 3) { cFYI(1, "%s: unc is too short: %s", __func__, unc); return -EINVAL; } + + /* Discount leading slashes for cifs */ len -= 2; - name = memchr(unc+2, '\\', len); - if (!name) { + hostname = unc + 2; + + /* Search for server name delimiter */ + sep = memchr(hostname, '\\', len); + if (sep) + len = sep - unc; + else cFYI(1, "%s: probably server name is whole unc: %s", - __func__, unc); - } else { - len = (name - unc) - 2/* leading // */; - } + __func__, unc); + + /* Try to interpret hostname as an IPv4 or IPv6 address */ + rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len); + if (rc > 0) + goto name_is_IP_address; + + /* Perform the upcall */ + rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); + if (rc < 0) + cERROR(1, "%s: unable to resolve: %*.*s", + __func__, len, len, hostname); + else + cFYI(1, "%s: resolved: %*.*s to %s", + __func__, len, len, hostname, *ip_addr); + return rc; - name = kmalloc(len+1, GFP_KERNEL); - if (!name) { - rc = -ENOMEM; - return rc; - } - memcpy(name, unc+2, len); +name_is_IP_address: + name = kmalloc(len + 1, GFP_KERNEL); + if (!name) + return -ENOMEM; + memcpy(name, hostname, len); name[len] = 0; - - if (is_ip(name)) { - cFYI(1, "%s: it is IP, skipping dns upcall: %s", - __func__, name); - data = name; - goto skip_upcall; - } - - rkey = request_key(&key_type_dns_resolver, name, ""); - if (!IS_ERR(rkey)) { - len = rkey->type_data.x[0]; - data = rkey->payload.data; - } else { - cERROR(1, "%s: unable to resolve: %s", __func__, name); - goto out; - } - -skip_upcall: - if (data) { - *ip_addr = kmalloc(len + 1, GFP_KERNEL); - if (*ip_addr) { - memcpy(*ip_addr, data, len + 1); - if (!IS_ERR(rkey)) - cFYI(1, "%s: resolved: %s to %s", __func__, - name, - *ip_addr - ); - rc = 0; - } else { - rc = -ENOMEM; - } - if (!IS_ERR(rkey)) - key_put(rkey); - } - -out: - kfree(name); - return rc; + cFYI(1, "%s: unc is IP, skipping dns upcall: %s", __func__, name); + *ip_addr = name; + return 0; } - - diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h index 966e9288930..d3f5d27f4d0 100644 --- a/fs/cifs/dns_resolve.h +++ b/fs/cifs/dns_resolve.h @@ -24,8 +24,6 @@ #define _DNS_RESOLVE_H #ifdef __KERNEL__ -#include <linux/key-type.h> -extern struct key_type key_type_dns_resolver; extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr); #endif /* KERNEL */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 75541af4b3d..db11fdef0e9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -40,6 +40,7 @@ #include "cifs_unicode.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "fscache.h" static inline int cifs_convert_flags(unsigned int flags) { @@ -162,44 +163,12 @@ psx_client_can_cache: return 0; } -static struct cifsFileInfo * -cifs_fill_filedata(struct file *file) -{ - struct list_head *tmp; - struct cifsFileInfo *pCifsFile = NULL; - struct cifsInodeInfo *pCifsInode = NULL; - - /* search inode for this file and fill in file->private_data */ - pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &pCifsInode->openFileList) { - pCifsFile = list_entry(tmp, struct cifsFileInfo, flist); - if ((pCifsFile->pfile == NULL) && - (pCifsFile->pid == current->tgid)) { - /* mode set in cifs_create */ - - /* needed for writepage */ - pCifsFile->pfile = file; - file->private_data = pCifsFile; - break; - } - } - read_unlock(&GlobalSMBSeslock); - - if (file->private_data != NULL) { - return pCifsFile; - } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) - cERROR(1, "could not find file instance for " - "new file %p", file); - return NULL; -} - /* all arguments to this function must be checked for validity in caller */ -static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, - struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile, +static inline int cifs_open_inode_helper(struct inode *inode, struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf, char *full_path, int xid) { + struct cifsInodeInfo *pCifsInode = CIFS_I(inode); struct timespec temp; int rc; @@ -213,36 +182,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, /* if not oplocked, invalidate inode pages if mtime or file size changed */ temp = cifs_NTtimeToUnix(buf->LastWriteTime); - if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && - (file->f_path.dentry->d_inode->i_size == + if (timespec_equal(&inode->i_mtime, &temp) && + (inode->i_size == (loff_t)le64_to_cpu(buf->EndOfFile))) { cFYI(1, "inode unchanged on server"); } else { - if (file->f_path.dentry->d_inode->i_mapping) { + if (inode->i_mapping) { /* BB no need to lock inode until after invalidate since namei code should already have it locked? */ - rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping); + rc = filemap_write_and_wait(inode->i_mapping); if (rc != 0) - CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; + pCifsInode->write_behind_rc = rc; } cFYI(1, "invalidating remote inode since open detected it " "changed"); - invalidate_remote_inode(file->f_path.dentry->d_inode); + invalidate_remote_inode(inode); } client_can_cache: if (pTcon->unix_ext) - rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode, - full_path, inode->i_sb, xid); + rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, + xid); else - rc = cifs_get_inode_info(&file->f_path.dentry->d_inode, - full_path, buf, inode->i_sb, xid, NULL); + rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, + xid, NULL); if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { pCifsInode->clientCanCacheAll = true; pCifsInode->clientCanCacheRead = true; - cFYI(1, "Exclusive Oplock granted on inode %p", - file->f_path.dentry->d_inode); + cFYI(1, "Exclusive Oplock granted on inode %p", inode); } else if ((*oplock & 0xF) == OPLOCK_READ) pCifsInode->clientCanCacheRead = true; @@ -256,7 +224,7 @@ int cifs_open(struct inode *inode, struct file *file) __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; - struct cifsFileInfo *pCifsFile; + struct cifsFileInfo *pCifsFile = NULL; struct cifsInodeInfo *pCifsInode; char *full_path = NULL; int desiredAccess; @@ -270,12 +238,6 @@ int cifs_open(struct inode *inode, struct file *file) tcon = cifs_sb->tcon; pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - pCifsFile = cifs_fill_filedata(file); - if (pCifsFile) { - rc = 0; - FreeXid(xid); - return rc; - } full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { @@ -299,8 +261,7 @@ int cifs_open(struct inode *inode, struct file *file) int oflags = (int) cifs_posix_convert_flags(file->f_flags); oflags |= SMB_O_CREAT; /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, - inode->i_sb, + rc = cifs_posix_open(full_path, &inode, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -308,9 +269,23 @@ int cifs_open(struct inode *inode, struct file *file) /* no need for special case handling of setting mode on read only files needed here */ - pCifsFile = cifs_fill_filedata(file); - cifs_posix_open_inode_helper(inode, file, pCifsInode, - oplock, netfid); + rc = cifs_posix_open_inode_helper(inode, file, + pCifsInode, oplock, netfid); + if (rc != 0) { + CIFSSMBClose(xid, tcon, netfid); + goto out; + } + + pCifsFile = cifs_new_fileinfo(inode, netfid, file, + file->f_path.mnt, + oflags); + if (pCifsFile == NULL) { + CIFSSMBClose(xid, tcon, netfid); + rc = -ENOMEM; + } + + cifs_fscache_set_inode_cookie(inode, file); + goto out; } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { if (tcon->ses->serverNOS) @@ -391,16 +366,18 @@ int cifs_open(struct inode *inode, struct file *file) goto out; } + rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid); + if (rc != 0) + goto out; + pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, file->f_flags); - file->private_data = pCifsFile; - if (file->private_data == NULL) { + if (pCifsFile == NULL) { rc = -ENOMEM; goto out; } - rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon, - &oplock, buf, full_path, xid); + cifs_fscache_set_inode_cookie(inode, file); if (oplock & CIFS_CREATE_ACTION) { /* time to set mode which we can not set earlier due to @@ -456,7 +433,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush) __u16 netfid; if (file->private_data) - pCifsFile = (struct cifsFileInfo *)file->private_data; + pCifsFile = file->private_data; else return -EBADF; @@ -513,8 +490,7 @@ reopen_error_exit: le64_to_cpu(tcon->fsUnixInfo.Capability))) { int oflags = (int) cifs_posix_convert_flags(file->f_flags); /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, - inode->i_sb, + rc = cifs_posix_open(full_path, NULL, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -595,8 +571,7 @@ int cifs_close(struct inode *inode, struct file *file) int xid, timeout; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; - struct cifsFileInfo *pSMBFile = - (struct cifsFileInfo *)file->private_data; + struct cifsFileInfo *pSMBFile = file->private_data; xid = GetXid(); @@ -671,8 +646,7 @@ int cifs_closedir(struct inode *inode, struct file *file) { int rc = 0; int xid; - struct cifsFileInfo *pCFileStruct = - (struct cifsFileInfo *)file->private_data; + struct cifsFileInfo *pCFileStruct = file->private_data; char *ptmp; cFYI(1, "Closedir inode = 0x%p", inode); @@ -893,8 +867,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) length, pfLock, posix_lock_type, wait_flag); } else { - struct cifsFileInfo *fid = - (struct cifsFileInfo *)file->private_data; + struct cifsFileInfo *fid = file->private_data; if (numLock) { rc = CIFSSMBLock(xid, tcon, netfid, length, @@ -995,7 +968,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, if (file->private_data == NULL) return -EBADF; - open_file = (struct cifsFileInfo *) file->private_data; + open_file = file->private_data; rc = generic_write_checks(file, poffset, &write_size, 0); if (rc) @@ -1097,7 +1070,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data, if (file->private_data == NULL) return -EBADF; - open_file = (struct cifsFileInfo *)file->private_data; + open_file = file->private_data; xid = GetXid(); @@ -1681,8 +1654,7 @@ int cifs_fsync(struct file *file, int datasync) int xid; int rc = 0; struct cifsTconInfo *tcon; - struct cifsFileInfo *smbfile = - (struct cifsFileInfo *)file->private_data; + struct cifsFileInfo *smbfile = file->private_data; struct inode *inode = file->f_path.dentry->d_inode; xid = GetXid(); @@ -1786,7 +1758,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, FreeXid(xid); return rc; } - open_file = (struct cifsFileInfo *)file->private_data; + open_file = file->private_data; if ((file->f_flags & O_ACCMODE) == O_WRONLY) cFYI(1, "attempting read on write only file instance"); @@ -1867,7 +1839,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, FreeXid(xid); return rc; } - open_file = (struct cifsFileInfo *)file->private_data; + open_file = file->private_data; if ((file->f_flags & O_ACCMODE) == O_WRONLY) cFYI(1, "attempting read on write only file instance"); @@ -1972,6 +1944,9 @@ static void cifs_copy_cache_pages(struct address_space *mapping, SetPageUptodate(page); unlock_page(page); data += PAGE_CACHE_SIZE; + + /* add page to FS-Cache */ + cifs_readpage_to_fscache(mapping->host, page); } return; } @@ -1998,10 +1973,19 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, FreeXid(xid); return rc; } - open_file = (struct cifsFileInfo *)file->private_data; + open_file = file->private_data; cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); pTcon = cifs_sb->tcon; + /* + * Reads as many pages as possible from fscache. Returns -ENOBUFS + * immediately if the cookie is negative + */ + rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, + &num_pages); + if (rc == 0) + goto read_complete; + cFYI(DBG2, "rpages: num pages %d", num_pages); for (i = 0; i < num_pages; ) { unsigned contig_pages; @@ -2112,6 +2096,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, smb_read_data = NULL; } +read_complete: FreeXid(xid); return rc; } @@ -2122,6 +2107,11 @@ static int cifs_readpage_worker(struct file *file, struct page *page, char *read_data; int rc; + /* Is the page cached? */ + rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page); + if (rc == 0) + goto read_complete; + page_cache_get(page); read_data = kmap(page); /* for reads over a certain size could initiate async read ahead */ @@ -2141,11 +2131,17 @@ static int cifs_readpage_worker(struct file *file, struct page *page, flush_dcache_page(page); SetPageUptodate(page); + + /* send this page to the cache */ + cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page); + rc = 0; io_error: kunmap(page); page_cache_release(page); + +read_complete: return rc; } @@ -2295,8 +2291,23 @@ out: return rc; } -static void -cifs_oplock_break(struct slow_work *work) +static int cifs_release_page(struct page *page, gfp_t gfp) +{ + if (PagePrivate(page)) + return 0; + + return cifs_fscache_release_page(page, gfp); +} + +static void cifs_invalidate_page(struct page *page, unsigned long offset) +{ + struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host); + + if (offset == 0) + cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); +} + +void cifs_oplock_break(struct work_struct *work) { struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, oplock_break); @@ -2333,33 +2344,30 @@ cifs_oplock_break(struct slow_work *work) LOCKING_ANDX_OPLOCK_RELEASE, false); cFYI(1, "Oplock release rc = %d", rc); } + + /* + * We might have kicked in before is_valid_oplock_break() + * finished grabbing reference for us. Make sure it's done by + * waiting for GlobalSMSSeslock. + */ + write_lock(&GlobalSMBSeslock); + write_unlock(&GlobalSMBSeslock); + + cifs_oplock_break_put(cfile); } -static int -cifs_oplock_break_get(struct slow_work *work) +void cifs_oplock_break_get(struct cifsFileInfo *cfile) { - struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, - oplock_break); mntget(cfile->mnt); cifsFileInfo_get(cfile); - return 0; } -static void -cifs_oplock_break_put(struct slow_work *work) +void cifs_oplock_break_put(struct cifsFileInfo *cfile) { - struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, - oplock_break); mntput(cfile->mnt); cifsFileInfo_put(cfile); } -const struct slow_work_ops cifs_oplock_break_ops = { - .get_ref = cifs_oplock_break_get, - .put_ref = cifs_oplock_break_put, - .execute = cifs_oplock_break, -}; - const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, @@ -2368,6 +2376,8 @@ const struct address_space_operations cifs_addr_ops = { .write_begin = cifs_write_begin, .write_end = cifs_write_end, .set_page_dirty = __set_page_dirty_nobuffers, + .releasepage = cifs_release_page, + .invalidatepage = cifs_invalidate_page, /* .sync_page = cifs_sync_page, */ /* .direct_IO = */ }; @@ -2384,6 +2394,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { .write_begin = cifs_write_begin, .write_end = cifs_write_end, .set_page_dirty = __set_page_dirty_nobuffers, + .releasepage = cifs_release_page, + .invalidatepage = cifs_invalidate_page, /* .sync_page = cifs_sync_page, */ /* .direct_IO = */ }; diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c new file mode 100644 index 00000000000..9f3f5c4be16 --- /dev/null +++ b/fs/cifs/fscache.c @@ -0,0 +1,236 @@ +/* + * fs/cifs/fscache.c - CIFS filesystem cache interface + * + * Copyright (c) 2010 Novell, Inc. + * Author(s): Suresh Jayaraman (sjayaraman@suse.de> + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "fscache.h" +#include "cifsglob.h" +#include "cifs_debug.h" +#include "cifs_fs_sb.h" + +void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) +{ + server->fscache = + fscache_acquire_cookie(cifs_fscache_netfs.primary_index, + &cifs_fscache_server_index_def, server); + cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server, + server->fscache); +} + +void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) +{ + cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server, + server->fscache); + fscache_relinquish_cookie(server->fscache, 0); + server->fscache = NULL; +} + +void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) +{ + struct TCP_Server_Info *server = tcon->ses->server; + + tcon->fscache = + fscache_acquire_cookie(server->fscache, + &cifs_fscache_super_index_def, tcon); + cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)", + server->fscache, tcon->fscache); +} + +void cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) +{ + cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache); + fscache_relinquish_cookie(tcon->fscache, 0); + tcon->fscache = NULL; +} + +static void cifs_fscache_enable_inode_cookie(struct inode *inode) +{ + struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + + if (cifsi->fscache) + return; + + cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache, + &cifs_fscache_inode_object_def, + cifsi); + cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", + cifs_sb->tcon->fscache, cifsi->fscache); +} + +void cifs_fscache_release_inode_cookie(struct inode *inode) +{ + struct cifsInodeInfo *cifsi = CIFS_I(inode); + + if (cifsi->fscache) { + cFYI(1, "CIFS releasing inode cookie (0x%p)", + cifsi->fscache); + fscache_relinquish_cookie(cifsi->fscache, 0); + cifsi->fscache = NULL; + } +} + +static void cifs_fscache_disable_inode_cookie(struct inode *inode) +{ + struct cifsInodeInfo *cifsi = CIFS_I(inode); + + if (cifsi->fscache) { + cFYI(1, "CIFS disabling inode cookie (0x%p)", + cifsi->fscache); + fscache_relinquish_cookie(cifsi->fscache, 1); + cifsi->fscache = NULL; + } +} + +void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) +{ + if ((filp->f_flags & O_ACCMODE) != O_RDONLY) + cifs_fscache_disable_inode_cookie(inode); + else { + cifs_fscache_enable_inode_cookie(inode); + cFYI(1, "CIFS: fscache inode cookie set"); + } +} + +void cifs_fscache_reset_inode_cookie(struct inode *inode) +{ + struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct fscache_cookie *old = cifsi->fscache; + + if (cifsi->fscache) { + /* retire the current fscache cache and get a new one */ + fscache_relinquish_cookie(cifsi->fscache, 1); + + cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache, + &cifs_fscache_inode_object_def, + cifsi); + cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p", + cifsi->fscache, old); + } +} + +int cifs_fscache_release_page(struct page *page, gfp_t gfp) +{ + if (PageFsCache(page)) { + struct inode *inode = page->mapping->host; + struct cifsInodeInfo *cifsi = CIFS_I(inode); + + cFYI(1, "CIFS: fscache release page (0x%p/0x%p)", + page, cifsi->fscache); + if (!fscache_maybe_release_page(cifsi->fscache, page, gfp)) + return 0; + } + + return 1; +} + +static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx, + int error) +{ + cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)", + page, error); + if (!error) + SetPageUptodate(page); + unlock_page(page); +} + +/* + * Retrieve a page from FS-Cache + */ +int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) +{ + int ret; + + cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p", + CIFS_I(inode)->fscache, page, inode); + ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page, + cifs_readpage_from_fscache_complete, + NULL, + GFP_KERNEL); + switch (ret) { + + case 0: /* page found in fscache, read submitted */ + cFYI(1, "CIFS: readpage_from_fscache: submitted"); + return ret; + case -ENOBUFS: /* page won't be cached */ + case -ENODATA: /* page not in cache */ + cFYI(1, "CIFS: readpage_from_fscache %d", ret); + return 1; + + default: + cERROR(1, "unknown error ret = %d", ret); + } + return ret; +} + +/* + * Retrieve a set of pages from FS-Cache + */ +int __cifs_readpages_from_fscache(struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + int ret; + + cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)", + CIFS_I(inode)->fscache, *nr_pages, inode); + ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping, + pages, nr_pages, + cifs_readpage_from_fscache_complete, + NULL, + mapping_gfp_mask(mapping)); + switch (ret) { + case 0: /* read submitted to the cache for all pages */ + cFYI(1, "CIFS: readpages_from_fscache: submitted"); + return ret; + + case -ENOBUFS: /* some pages are not cached and can't be */ + case -ENODATA: /* some pages are not cached */ + cFYI(1, "CIFS: readpages_from_fscache: no page"); + return 1; + + default: + cFYI(1, "unknown error ret = %d", ret); + } + + return ret; +} + +void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) +{ + int ret; + + cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p", + CIFS_I(inode)->fscache, page, inode); + ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL); + if (ret != 0) + fscache_uncache_page(CIFS_I(inode)->fscache, page); +} + +void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) +{ + struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct fscache_cookie *cookie = cifsi->fscache; + + cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie); + fscache_wait_on_page_write(cookie, page); + fscache_uncache_page(cookie, page); +} + diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h new file mode 100644 index 00000000000..31b88ec2341 --- /dev/null +++ b/fs/cifs/fscache.h @@ -0,0 +1,136 @@ +/* + * fs/cifs/fscache.h - CIFS filesystem cache interface definitions + * + * Copyright (c) 2010 Novell, Inc. + * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _CIFS_FSCACHE_H +#define _CIFS_FSCACHE_H + +#include <linux/fscache.h> + +#include "cifsglob.h" + +#ifdef CONFIG_CIFS_FSCACHE + +extern struct fscache_netfs cifs_fscache_netfs; +extern const struct fscache_cookie_def cifs_fscache_server_index_def; +extern const struct fscache_cookie_def cifs_fscache_super_index_def; +extern const struct fscache_cookie_def cifs_fscache_inode_object_def; + +extern int cifs_fscache_register(void); +extern void cifs_fscache_unregister(void); + +/* + * fscache.c + */ +extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *); +extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *); +extern void cifs_fscache_get_super_cookie(struct cifsTconInfo *); +extern void cifs_fscache_release_super_cookie(struct cifsTconInfo *); + +extern void cifs_fscache_release_inode_cookie(struct inode *); +extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *); +extern void cifs_fscache_reset_inode_cookie(struct inode *); + +extern void __cifs_fscache_invalidate_page(struct page *, struct inode *); +extern int cifs_fscache_release_page(struct page *page, gfp_t gfp); +extern int __cifs_readpage_from_fscache(struct inode *, struct page *); +extern int __cifs_readpages_from_fscache(struct inode *, + struct address_space *, + struct list_head *, + unsigned *); + +extern void __cifs_readpage_to_fscache(struct inode *, struct page *); + +static inline void cifs_fscache_invalidate_page(struct page *page, + struct inode *inode) +{ + if (PageFsCache(page)) + __cifs_fscache_invalidate_page(page, inode); +} + +static inline int cifs_readpage_from_fscache(struct inode *inode, + struct page *page) +{ + if (CIFS_I(inode)->fscache) + return __cifs_readpage_from_fscache(inode, page); + + return -ENOBUFS; +} + +static inline int cifs_readpages_from_fscache(struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + if (CIFS_I(inode)->fscache) + return __cifs_readpages_from_fscache(inode, mapping, pages, + nr_pages); + return -ENOBUFS; +} + +static inline void cifs_readpage_to_fscache(struct inode *inode, + struct page *page) +{ + if (PageFsCache(page)) + __cifs_readpage_to_fscache(inode, page); +} + +#else /* CONFIG_CIFS_FSCACHE */ +static inline int cifs_fscache_register(void) { return 0; } +static inline void cifs_fscache_unregister(void) {} + +static inline void +cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {} +static inline void +cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {} +static inline void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) {} +static inline void +cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) {} + +static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} +static inline void cifs_fscache_set_inode_cookie(struct inode *inode, + struct file *filp) {} +static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {} +static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp) +{ + return 1; /* May release page */ +} + +static inline void cifs_fscache_invalidate_page(struct page *page, + struct inode *inode) {} +static inline int +cifs_readpage_from_fscache(struct inode *inode, struct page *page) +{ + return -ENOBUFS; +} + +static inline int cifs_readpages_from_fscache(struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + return -ENOBUFS; +} + +static inline void cifs_readpage_to_fscache(struct inode *inode, + struct page *page) {} + +#endif /* CONFIG_CIFS_FSCACHE */ + +#endif /* _CIFS_FSCACHE_H */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 62b324f26a5..dc4c47ab958 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -29,6 +29,7 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "fscache.h" static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) @@ -288,7 +289,7 @@ int cifs_get_file_info_unix(struct file *filp) struct inode *inode = filp->f_path.dentry->d_inode; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsTconInfo *tcon = cifs_sb->tcon; - struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; + struct cifsFileInfo *cfile = filp->private_data; xid = GetXid(); rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); @@ -515,7 +516,7 @@ int cifs_get_file_info(struct file *filp) struct inode *inode = filp->f_path.dentry->d_inode; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsTconInfo *tcon = cifs_sb->tcon; - struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; + struct cifsFileInfo *cfile = filp->private_data; xid = GetXid(); rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); @@ -723,18 +724,17 @@ cifs_find_inode(struct inode *inode, void *opaque) { struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; + /* don't match inode with different uniqueid */ if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; - /* - * uh oh -- it's a directory. We can't use it since hardlinked dirs are - * verboten. Disable serverino and return it as if it were found, the - * caller can discard it, generate a uniqueid and retry the find - */ - if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) { + /* don't match inode of different type */ + if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) + return 0; + + /* if it's not a directory or has no dentries, then flag it */ + if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; - cifs_autodisable_serverino(CIFS_SB(inode->i_sb)); - } return 1; } @@ -748,6 +748,27 @@ cifs_init_inode(struct inode *inode, void *opaque) return 0; } +/* + * walk dentry list for an inode and report whether it has aliases that + * are hashed. We use this to determine if a directory inode can actually + * be used. + */ +static bool +inode_has_hashed_dentries(struct inode *inode) +{ + struct dentry *dentry; + + spin_lock(&dcache_lock); + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + if (!d_unhashed(dentry) || IS_ROOT(dentry)) { + spin_unlock(&dcache_lock); + return true; + } + } + spin_unlock(&dcache_lock); + return false; +} + /* Given fattrs, get a corresponding inode */ struct inode * cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) @@ -763,12 +784,16 @@ retry_iget5_locked: inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); if (inode) { - /* was there a problematic inode number collision? */ + /* was there a potentially problematic inode collision? */ if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { - iput(inode); - fattr->cf_uniqueid = iunique(sb, ROOT_I); fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; - goto retry_iget5_locked; + + if (inode_has_hashed_dentries(inode)) { + cifs_autodisable_serverino(CIFS_SB(sb)); + iput(inode); + fattr->cf_uniqueid = iunique(sb, ROOT_I); + goto retry_iget5_locked; + } } cifs_fattr_to_inode(inode, fattr); @@ -776,6 +801,10 @@ retry_iget5_locked: inode->i_flags |= S_NOATIME | S_NOCMTIME; if (inode->i_state & I_NEW) { inode->i_ino = hash; +#ifdef CONFIG_CIFS_FSCACHE + /* initialize per-inode cache cookie pointer */ + CIFS_I(inode)->fscache = NULL; +#endif unlock_new_inode(inode); } } @@ -807,6 +836,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) if (!inode) return ERR_PTR(-ENOMEM); +#ifdef CONFIG_CIFS_FSCACHE + /* populate tcon->resource_id */ + cifs_sb->tcon->resource_id = CIFS_I(inode)->uniqueid; +#endif + if (rc && cifs_sb->tcon->ipc) { cFYI(1, "ipc connection - fake read inode"); inode->i_mode |= S_IFDIR; @@ -1401,6 +1435,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, if (rc == 0 || rc != -ETXTBSY) return rc; + /* open-file renames don't work across directories */ + if (to_dentry->d_parent != from_dentry->d_parent) + return rc; + /* open the file to be renamed -- we need DELETE perms */ rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, CREATE_NOT_DIR, &srcfid, &oplock, NULL, @@ -1564,6 +1602,7 @@ cifs_invalidate_mapping(struct inode *inode) cifs_i->write_behind_rc = rc; } invalidate_remote_inode(inode); + cifs_fscache_reset_inode_cookie(inode); } int cifs_revalidate_file(struct file *filp) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 505926f1ee6..9d38a71c8e1 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -41,8 +41,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) __u64 ExtAttrMask = 0; __u64 caps; struct cifsTconInfo *tcon; - struct cifsFileInfo *pSMBFile = - (struct cifsFileInfo *)filep->private_data; + struct cifsFileInfo *pSMBFile = filep->private_data; #endif /* CONFIG_CIFS_POSIX */ xid = GetXid(); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1394aa37f26..3ccadc1326d 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -498,7 +498,6 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) struct cifsTconInfo *tcon; struct cifsInodeInfo *pCifsInode; struct cifsFileInfo *netfile; - int rc; cFYI(1, "Checking for oplock break or dnotify response"); if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && @@ -583,13 +582,18 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) pCifsInode->clientCanCacheAll = false; if (pSMB->OplockLevel == 0) pCifsInode->clientCanCacheRead = false; - rc = slow_work_enqueue(&netfile->oplock_break); - if (rc) { - cERROR(1, "failed to enqueue oplock " - "break: %d\n", rc); - } else { - netfile->oplock_break_cancelled = false; - } + + /* + * cifs_oplock_break_put() can't be called + * from here. Get reference after queueing + * succeeded. cifs_oplock_break() will + * synchronize using GlobalSMSSeslock. + */ + if (queue_work(system_nrt_wq, + &netfile->oplock_break)) + cifs_oplock_break_get(netfile); + netfile->oplock_break_cancelled = false; + read_unlock(&GlobalSMBSeslock); read_unlock(&cifs_tcp_ses_lock); return true; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index d35d52889cb..f97851119e6 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -61,6 +61,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ERRremcd, -EACCES}, {ERRdiffdevice, -EXDEV}, {ERRnofiles, -ENOENT}, + {ERRwriteprot, -EROFS}, {ERRbadshare, -ETXTBSY}, {ERRlock, -EACCES}, {ERRunsup, -EINVAL}, @@ -139,17 +140,18 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = { * Returns 0 on failure. */ static int -cifs_inet_pton(const int address_family, const char *cp, void *dst) +cifs_inet_pton(const int address_family, const char *cp, int len, void *dst) { int ret = 0; /* calculate length by finding first slash or NULL */ if (address_family == AF_INET) - ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL); + ret = in4_pton(cp, len, dst, '\\', NULL); else if (address_family == AF_INET6) - ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); + ret = in6_pton(cp, len, dst , '\\', NULL); - cFYI(DBG2, "address conversion returned %d for %s", ret, cp); + cFYI(DBG2, "address conversion returned %d for %*.*s", + ret, len, len, cp); if (ret > 0) ret = 1; return ret; @@ -164,43 +166,66 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst) * Returns 0 on failure. */ int -cifs_convert_address(char *src, void *dst) +cifs_convert_address(struct sockaddr *dst, const char *src, int len) { - int rc; - char *pct, *endp; + int rc, alen, slen; + const char *pct; + char *endp, scope_id[13]; struct sockaddr_in *s4 = (struct sockaddr_in *) dst; struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; /* IPv4 address */ - if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) { + if (cifs_inet_pton(AF_INET, src, len, &s4->sin_addr.s_addr)) { s4->sin_family = AF_INET; return 1; } - /* temporarily terminate string */ - pct = strchr(src, '%'); - if (pct) - *pct = '\0'; - - rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr); - - /* repair temp termination (if any) and make pct point to scopeid */ - if (pct) - *pct++ = '%'; + /* attempt to exclude the scope ID from the address part */ + pct = memchr(src, '%', len); + alen = pct ? pct - src : len; + rc = cifs_inet_pton(AF_INET6, src, alen, &s6->sin6_addr.s6_addr); if (!rc) return rc; s6->sin6_family = AF_INET6; if (pct) { + /* grab the scope ID */ + slen = len - (alen + 1); + if (slen <= 0 || slen > 12) + return 0; + memcpy(scope_id, pct + 1, slen); + scope_id[slen] = '\0'; + s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); - if (!*pct || *endp) + if (endp != scope_id + slen) return 0; } return rc; } +int +cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, + const unsigned short int port) +{ + if (!cifs_convert_address(dst, src, len)) + return 0; + + switch (dst->sa_family) { + case AF_INET: + ((struct sockaddr_in *)dst)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)dst)->sin6_port = htons(port); + break; + default: + return 0; + } + + return 1; +} + /***************************************************************************** convert a NT status code to a dos class/code *****************************************************************************/ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index daf1753af67..d5e591fab47 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -847,6 +847,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); + if (tmp_buf == NULL) { + rc = -ENOMEM; + break; + } + for (i = 0; (i < num_to_fill) && (rc == 0); i++) { if (current_entry == NULL) { /* evaluate whether this case is an error */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7707389bdf2..0a57cb7db5d 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate: /* calculate session key */ setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); - if (first_time) /* should this be moved into common code - with similar ntlmv2 path? */ - /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key, - response BB FIXME, v2_sess_key); */ - - /* copy session key */ - - /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE); - bcc_ptr += LM2_SESS_KEY_SIZE; */ + /* FIXME: calculate MAC key */ memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp)); bcc_ptr += sizeof(struct ntlmv2_resp); diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index c5084d27db7..7f16cb825fe 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h @@ -76,6 +76,7 @@ #define ERRnofiles 18 /* A File Search command can find no more files matching the specified criteria. */ +#define ERRwriteprot 19 /* media is write protected */ #define ERRgeneral 31 #define ERRbadshare 32 /* The sharing mode specified for an Open conflicts with existing FIDs on diff --git a/fs/compat.c b/fs/compat.c index 6490d2134ff..c6fda9aeb86 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -8,7 +8,7 @@ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs - * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) + * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 641640dc7ae..63ae8583146 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -4,7 +4,7 @@ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs - * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) + * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) * * These routines maintain argument size conversion between 32bit and 64bit * ioctls. @@ -601,8 +601,11 @@ static int ioc_settimeout(unsigned int fd, unsigned int cmd, } /* Bluetooth ioctls */ -#define HCIUARTSETPROTO _IOW('U', 200, int) -#define HCIUARTGETPROTO _IOR('U', 201, int) +#define HCIUARTSETPROTO _IOW('U', 200, int) +#define HCIUARTGETPROTO _IOR('U', 201, int) +#define HCIUARTGETDEVICE _IOR('U', 202, int) +#define HCIUARTSETFLAGS _IOW('U', 203, int) +#define HCIUARTGETFLAGS _IOR('U', 204, int) #define BNEPCONNADD _IOW('B', 200, int) #define BNEPCONNDEL _IOW('B', 201, int) @@ -1328,6 +1331,8 @@ COMPATIBLE_IOCTL(HCISETLINKPOL) COMPATIBLE_IOCTL(HCISETLINKMODE) COMPATIBLE_IOCTL(HCISETACLMTU) COMPATIBLE_IOCTL(HCISETSCOMTU) +COMPATIBLE_IOCTL(HCIBLOCKADDR) +COMPATIBLE_IOCTL(HCIUNBLOCKADDR) COMPATIBLE_IOCTL(HCIINQUIRY) COMPATIBLE_IOCTL(HCIUARTSETPROTO) COMPATIBLE_IOCTL(HCIUARTGETPROTO) diff --git a/fs/dcache.c b/fs/dcache.c index d96047b4a63..86d4db15473 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -590,6 +590,8 @@ static void prune_dcache(int count) up_read(&sb->s_umount); } spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); count -= pruned; __put_super(sb); /* more work left to do? */ @@ -894,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent); * * In this case we return -1 to tell the caller that we baled. */ -static int shrink_dcache_memory(int nr, gfp_t gfp_mask) +static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { if (nr) { if (!(gfp_mask & __GFP_FS)) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index c0d35c62052..37a34c2c622 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -248,7 +248,7 @@ static struct connection *assoc2con(int assoc_id) for (i = 0 ; i < CONN_HASH_SIZE; i++) { hlist_for_each_entry(con, h, &connection_hash[i], list) { - if (con && con->sctp_assoc == assoc_id) { + if (con->sctp_assoc == assoc_id) { mutex_unlock(&connections_lock); return con; } diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 2c6ad518100..ef17e0169da 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -81,24 +81,11 @@ static struct genl_ops dlm_nl_ops = { int __init dlm_netlink_init(void) { - int rv; - - rv = genl_register_family(&family); - if (rv) - return rv; - - rv = genl_register_ops(&family, &dlm_nl_ops); - if (rv < 0) - goto err; - return 0; - err: - genl_unregister_family(&family); - return rv; + return genl_register_family_with_ops(&family, &dlm_nl_ops, 1); } void dlm_netlink_exit(void) { - genl_unregister_ops(&family, &dlm_nl_ops); genl_unregister_family(&family); } diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 1cc087635a5..a2e3b562e65 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -762,7 +762,7 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, /** * ecryptfs_init_crypt_ctx - * @crypt_stat: Uninitilized crypt stats structure + * @crypt_stat: Uninitialized crypt stats structure * * Initialize the crypto context. * diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 2d8dbce9d48..46c4dd8dfcc 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -31,9 +31,9 @@ static struct mutex ecryptfs_msg_ctx_lists_mux; static struct hlist_head *ecryptfs_daemon_hash; struct mutex ecryptfs_daemon_hash_mux; -static int ecryptfs_hash_buckets; +static int ecryptfs_hash_bits; #define ecryptfs_uid_hash(uid) \ - hash_long((unsigned long)uid, ecryptfs_hash_buckets) + hash_long((unsigned long)uid, ecryptfs_hash_bits) static u32 ecryptfs_msg_counter; static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; @@ -486,18 +486,19 @@ int ecryptfs_init_messaging(void) } mutex_init(&ecryptfs_daemon_hash_mux); mutex_lock(&ecryptfs_daemon_hash_mux); - ecryptfs_hash_buckets = 1; - while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) - ecryptfs_hash_buckets++; + ecryptfs_hash_bits = 1; + while (ecryptfs_number_of_users >> ecryptfs_hash_bits) + ecryptfs_hash_bits++; ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head) - * ecryptfs_hash_buckets), GFP_KERNEL); + * (1 << ecryptfs_hash_bits)), + GFP_KERNEL); if (!ecryptfs_daemon_hash) { rc = -ENOMEM; printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); mutex_unlock(&ecryptfs_daemon_hash_mux); goto out; } - for (i = 0; i < ecryptfs_hash_buckets; i++) + for (i = 0; i < (1 << ecryptfs_hash_bits); i++) INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]); mutex_unlock(&ecryptfs_daemon_hash_mux); ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) @@ -554,7 +555,7 @@ void ecryptfs_release_messaging(void) int i; mutex_lock(&ecryptfs_daemon_hash_mux); - for (i = 0; i < ecryptfs_hash_buckets; i++) { + for (i = 0; i < (1 << ecryptfs_hash_bits); i++) { int rc; hlist_for_each_entry(daemon, elem, diff --git a/fs/exec.c b/fs/exec.c index e19de6a8033..97d91a03fb1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm, else stack_base = vma->vm_start - stack_expand; #endif + current->mm->start_stack = bprm->p; ret = expand_stack(vma, stack_base); if (ret) ret = -EFAULT; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index ca7e2a0ed98..2bcc0431bad 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return error; else { inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); if (error == 0) acl = NULL; diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index 522b15498f4..e8c6ba0e4a3 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig @@ -31,6 +31,7 @@ config EXT3_FS config EXT3_DEFAULTS_TO_ORDERED bool "Default to 'data=ordered' in ext3" depends on EXT3_FS + default y help The journal mode options for ext3 have different tradeoffs between when data is guaranteed to be on disk and diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 01552abbca3..8a11fe21218 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, return error; else { inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, inode); if (error == 0) acl = NULL; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 735f0190ec2..001eb0e2d48 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1149,9 +1149,25 @@ static int walk_page_buffers( handle_t *handle, static int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh) { + int dirty = buffer_dirty(bh); + int ret; + if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; - return ext3_journal_get_write_access(handle, bh); + /* + * __block_prepare_write() could have dirtied some buffers. Clean + * the dirty bit as jbd2_journal_get_write_access() could complain + * otherwise about fs integrity issues. Setting of the dirty bit + * by __block_prepare_write() isn't a real problem here as we clear + * the bit before releasing a page lock and thus writeback cannot + * ever write the buffer. + */ + if (dirty) + clear_buffer_dirty(bh); + ret = ext3_journal_get_write_access(handle, bh); + if (!ret && dirty) + ret = ext3_journal_dirty_metadata(handle, bh); + return ret; } /* @@ -1625,10 +1641,7 @@ static int ext3_writeback_writepage(struct page *page, goto out_fail; } - if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) - ret = nobh_writepage(page, ext3_get_block, wbc); - else - ret = block_write_full_page(page, ext3_get_block, wbc); + ret = block_write_full_page(page, ext3_get_block, wbc); err = ext3_journal_stop(handle); if (!ret) @@ -1922,17 +1935,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, length = blocksize - (offset & (blocksize - 1)); iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); - /* - * For "nobh" option, we can only work if we don't need to - * read-in the page - otherwise we create buffers to do the IO. - */ - if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && - ext3_should_writeback_data(inode) && PageUptodate(page)) { - zero_user(page, offset, length); - set_page_dirty(page); - goto unlock; - } - if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); @@ -2284,27 +2286,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, depth); /* - * We've probably journalled the indirect block several - * times during the truncate. But it's no longer - * needed and we now drop it from the transaction via - * journal_revoke(). - * - * That's easy if it's exclusively part of this - * transaction. But if it's part of the committing - * transaction then journal_forget() will simply - * brelse() it. That means that if the underlying - * block is reallocated in ext3_get_block(), - * unmap_underlying_metadata() will find this block - * and will try to get rid of it. damn, damn. - * - * If this block has already been committed to the - * journal, a revoke record will be written. And - * revoke records must be emitted *before* clearing - * this block's bit in the bitmaps. - */ - ext3_forget(handle, 1, inode, bh, bh->b_blocknr); - - /* * Everything below this this pointer has been * released. Now let this top-of-subtree go. * @@ -2327,6 +2308,31 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, truncate_restart_transaction(handle, inode); } + /* + * We've probably journalled the indirect block several + * times during the truncate. But it's no longer + * needed and we now drop it from the transaction via + * journal_revoke(). + * + * That's easy if it's exclusively part of this + * transaction. But if it's part of the committing + * transaction then journal_forget() will simply + * brelse() it. That means that if the underlying + * block is reallocated in ext3_get_block(), + * unmap_underlying_metadata() will find this block + * and will try to get rid of it. damn, damn. Thus + * we don't allow a block to be reallocated until + * a transaction freeing it has fully committed. + * + * We also have to make sure journal replay after a + * crash does not overwrite non-journaled data blocks + * with old metadata when the block got reallocated for + * data. Thus we have to store a revoke record for a + * block in the same transaction in which we free the + * block. + */ + ext3_forget(handle, 1, inode, bh, bh->b_blocknr); + ext3_free_blocks(handle, inode, nr, 1); if (parent_bh) { diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index ee184084ca4..2b35ddb70d6 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1447,7 +1447,6 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, struct inode *inode) { struct inode *dir = dentry->d_parent->d_inode; - unsigned long offset; struct buffer_head * bh; struct ext3_dir_entry_2 *de; struct super_block * sb; @@ -1469,7 +1468,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, ext3_mark_inode_dirty(handle, dir); } blocks = dir->i_size >> sb->s_blocksize_bits; - for (block = 0, offset = 0; block < blocks; block++) { + for (block = 0; block < blocks; block++) { bh = ext3_bread(handle, dir, block, 0, &retval); if(!bh) return retval; diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 54351ac7cef..0ccd7b12b73 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -964,7 +964,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, ext3_fsblk_t n_blocks_count) { ext3_fsblk_t o_blocks_count; - unsigned long o_groups_count; ext3_grpblk_t last; ext3_grpblk_t add; struct buffer_head * bh; @@ -976,7 +975,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, * yet: we're going to revalidate es->s_blocks_count after * taking the s_resize_lock below. */ o_blocks_count = le32_to_cpu(es->s_blocks_count); - o_groups_count = EXT3_SB(sb)->s_groups_count; if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6c953bb255e..9650a956fd0 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -661,9 +661,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) */ seq_puts(seq, ",barrier="); seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); - if (test_opt(sb, NOBH)) - seq_puts(seq, ",nobh"); - seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); if (test_opt(sb, DATA_ERR_ABORT)) seq_puts(seq, ",data_err=abort"); @@ -1255,10 +1252,12 @@ set_qf_format: *n_blocks_count = option; break; case Opt_nobh: - set_opt(sbi->s_mount_opt, NOBH); + ext3_msg(sb, KERN_WARNING, + "warning: ignoring deprecated nobh option"); break; case Opt_bh: - clear_opt(sbi->s_mount_opt, NOBH); + ext3_msg(sb, KERN_WARNING, + "warning: ignoring deprecated bh option"); break; default: ext3_msg(sb, KERN_ERR, @@ -2001,14 +2000,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) break; } - if (test_opt(sb, NOBH)) { - if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) { - ext3_msg(sb, KERN_WARNING, - "warning: ignoring nobh option - " - "it is supported only with writeback mode"); - clear_opt(sbi->s_mount_opt, NOBH); - } - } /* * The journal_load will have done any necessary log recovery, * so we can safely mount the rest of the filesystem now. diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2c01d7391f1..06328d3e571 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2936,7 +2936,7 @@ fix_extent_len: * One of more index blocks maybe needed if the extent tree grow after * the unintialized extent split. To prevent ENOSPC occur at the IO * complete, we need to split the uninitialized extent before DIO submit - * the IO. The uninitilized extent called at this time will be split + * the IO. The uninitialized extent called at this time will be split * into three uninitialized extent(at most). After IO complete, the part * being filled will be convert to initialized by the end_io callback function * via ext4_convert_unwritten_extents(). diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1f360f07cb4..4b4ad4b7ce5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2704,7 +2704,7 @@ void exit_ext4_mballoc(void) /* - * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps + * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps * Returns 0 if success or error code */ static noinline_for_stack int diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 81cb3fc1218..8d65575f8c8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3124,7 +3124,7 @@ no_journal: ext4_ext_init(sb); err = ext4_mb_init(sb, needs_recovery); if (err) { - ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", + ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", err); goto failed_mount4; } diff --git a/fs/fcntl.c b/fs/fcntl.c index 51e11bf5708..9d175d623aa 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -733,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; + unsigned long flags; + if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } - spin_lock(&fa->fa_lock); + spin_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a @@ -747,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } - spin_unlock(&fa->fa_lock); + spin_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } } diff --git a/fs/file.c b/fs/file.c index 34bb7f71d99..cccaead962c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -178,7 +178,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr) fdt->open_fds = (fd_set *)data; data += nr / BITS_PER_BYTE; fdt->close_on_exec = (fd_set *)data; - INIT_RCU_HEAD(&fdt->rcu); fdt->next = NULL; return fdt; @@ -312,7 +311,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; new_fdt->open_fds = (fd_set *)&newf->open_fds_init; new_fdt->fd = &newf->fd_array[0]; - INIT_RCU_HEAD(&new_fdt->rcu); new_fdt->next = NULL; spin_lock(&oldf->file_lock); @@ -430,7 +428,6 @@ struct files_struct init_files = { .fd = &init_files.fd_array[0], .close_on_exec = (fd_set *)&init_files.close_on_exec_init, .open_fds = (fd_set *)&init_files.open_fds_init, - .rcu = RCU_HEAD_INIT, }, .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 1e8af939b3e..5132c99b1ca 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -135,7 +135,7 @@ static int vxfs_remount(struct super_block *sb, int *flags, char *data) } /** - * vxfs_read_super - read superblock into memory and initalize filesystem + * vxfs_read_super - read superblock into memory and initialize filesystem * @sbp: VFS superblock (to fill) * @dp: fs private mount data * @silent: do not complain loudly when sth is wrong diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1d1088f48bc..d5be1693ac9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -38,51 +38,18 @@ int nr_pdflush_threads; /* * Passed into wb_writeback(), essentially a subset of writeback_control */ -struct wb_writeback_args { +struct wb_writeback_work { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; -}; -/* - * Work items for the bdi_writeback threads - */ -struct bdi_work { struct list_head list; /* pending work list */ - struct rcu_head rcu_head; /* for RCU free/clear of work */ - - unsigned long seen; /* threads that have seen this work */ - atomic_t pending; /* number of threads still to do work */ - - struct wb_writeback_args args; /* writeback arguments */ - - unsigned long state; /* flag bits, see WS_* */ + struct completion *done; /* set if the caller waits */ }; -enum { - WS_USED_B = 0, - WS_ONSTACK_B, -}; - -#define WS_USED (1 << WS_USED_B) -#define WS_ONSTACK (1 << WS_ONSTACK_B) - -static inline bool bdi_work_on_stack(struct bdi_work *work) -{ - return test_bit(WS_ONSTACK_B, &work->state); -} - -static inline void bdi_work_init(struct bdi_work *work, - struct wb_writeback_args *args) -{ - INIT_RCU_HEAD(&work->rcu_head); - work->args = *args; - work->state = WS_USED; -} - /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -95,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) return !list_empty(&bdi->work_list); } -static void bdi_work_clear(struct bdi_work *work) -{ - clear_bit(WS_USED_B, &work->state); - smp_mb__after_clear_bit(); - /* - * work can have disappeared at this point. bit waitq functions - * should be able to tolerate this, provided bdi_sched_wait does - * not dereference it's pointer argument. - */ - wake_up_bit(&work->state, WS_USED_B); -} - -static void bdi_work_free(struct rcu_head *head) -{ - struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); - - if (!bdi_work_on_stack(work)) - kfree(work); - else - bdi_work_clear(work); -} - -static void wb_work_complete(struct bdi_work *work) -{ - const enum writeback_sync_modes sync_mode = work->args.sync_mode; - int onstack = bdi_work_on_stack(work); - - /* - * For allocated work, we can clear the done/seen bit right here. - * For on-stack work, we need to postpone both the clear and free - * to after the RCU grace period, since the stack could be invalidated - * as soon as bdi_work_clear() has done the wakeup. - */ - if (!onstack) - bdi_work_clear(work); - if (sync_mode == WB_SYNC_NONE || onstack) - call_rcu(&work->rcu_head, bdi_work_free); -} - -static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) -{ - /* - * The caller has retrieved the work arguments from this work, - * drop our reference. If this is the last ref, delete and free it - */ - if (atomic_dec_and_test(&work->pending)) { - struct backing_dev_info *bdi = wb->bdi; - - spin_lock(&bdi->wb_lock); - list_del_rcu(&work->list); - spin_unlock(&bdi->wb_lock); - - wb_work_complete(work); - } -} - -static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) +static void bdi_queue_work(struct backing_dev_info *bdi, + struct wb_writeback_work *work) { - work->seen = bdi->wb_mask; - BUG_ON(!work->seen); - atomic_set(&work->pending, bdi->wb_cnt); - BUG_ON(!bdi->wb_cnt); - - /* - * list_add_tail_rcu() contains the necessary barriers to - * make sure the above stores are seen before the item is - * noticed on the list - */ spin_lock(&bdi->wb_lock); - list_add_tail_rcu(&work->list, &bdi->work_list); + list_add_tail(&work->list, &bdi->work_list); spin_unlock(&bdi->wb_lock); /* @@ -181,97 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) } } -/* - * Used for on-stack allocated work items. The caller needs to wait until - * the wb threads have acked the work before it's safe to continue. - */ -static void bdi_wait_on_work_clear(struct bdi_work *work) -{ - wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, - TASK_UNINTERRUPTIBLE); -} - -static void bdi_alloc_queue_work(struct backing_dev_info *bdi, - struct wb_writeback_args *args) +static void +__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, + bool range_cyclic, bool for_background) { - struct bdi_work *work; + struct wb_writeback_work *work; /* * This is WB_SYNC_NONE writeback, so if allocation fails just * wakeup the thread for old dirty data writeback */ - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - bdi_work_init(work, args); - bdi_queue_work(bdi, work); - } else { - struct bdi_writeback *wb = &bdi->wb; - - if (wb->task) - wake_up_process(wb->task); + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + if (bdi->wb.task) + wake_up_process(bdi->wb.task); + return; } + + work->sync_mode = WB_SYNC_NONE; + work->nr_pages = nr_pages; + work->range_cyclic = range_cyclic; + work->for_background = for_background; + + bdi_queue_work(bdi, work); } /** - * bdi_sync_writeback - start and wait for writeback + * bdi_start_writeback - start writeback * @bdi: the backing device to write from - * @sb: write inodes from this super_block + * @nr_pages: the number of pages to write * * Description: - * This does WB_SYNC_ALL data integrity writeback and waits for the - * IO to complete. Callers must hold the sb s_umount semaphore for - * reading, to avoid having the super disappear before we are done. + * This does WB_SYNC_NONE opportunistic writeback. The IO is only + * started when this function returns, we make no guarentees on + * completion. Caller need not hold sb s_umount semaphore. + * */ -static void bdi_sync_writeback(struct backing_dev_info *bdi, - struct super_block *sb) +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) { - struct wb_writeback_args args = { - .sb = sb, - .sync_mode = WB_SYNC_ALL, - .nr_pages = LONG_MAX, - .range_cyclic = 0, - }; - struct bdi_work work; - - bdi_work_init(&work, &args); - work.state |= WS_ONSTACK; - - bdi_queue_work(bdi, &work); - bdi_wait_on_work_clear(&work); + __bdi_start_writeback(bdi, nr_pages, true, false); } /** - * bdi_start_writeback - start writeback + * bdi_start_background_writeback - start background writeback * @bdi: the backing device to write from - * @sb: write inodes from this super_block - * @nr_pages: the number of pages to write * * Description: - * This does WB_SYNC_NONE opportunistic writeback. The IO is only + * This does WB_SYNC_NONE background writeback. The IO is only * started when this function returns, we make no guarentees on * completion. Caller need not hold sb s_umount semaphore. - * */ -void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages) +void bdi_start_background_writeback(struct backing_dev_info *bdi) { - struct wb_writeback_args args = { - .sb = sb, - .sync_mode = WB_SYNC_NONE, - .nr_pages = nr_pages, - .range_cyclic = 1, - }; - - /* - * We treat @nr_pages=0 as the special case to do background writeback, - * ie. to sync pages until the background dirty threshold is reached. - */ - if (!nr_pages) { - args.nr_pages = LONG_MAX; - args.for_background = 1; - } - - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, LONG_MAX, true, true); } /* @@ -561,75 +425,69 @@ select_queue: return ret; } -static void unpin_sb_for_writeback(struct super_block *sb) -{ - up_read(&sb->s_umount); - put_super(sb); -} - -enum sb_pin_state { - SB_PINNED, - SB_NOT_PINNED, - SB_PIN_FAILED -}; - /* - * For WB_SYNC_NONE writeback, the caller does not have the sb pinned + * For background writeback the caller does not have the sb pinned * before calling writeback. So make sure that we do pin it, so it doesn't * go away while we are writing inodes from it. */ -static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, - struct super_block *sb) +static bool pin_sb_for_writeback(struct super_block *sb) { - /* - * Caller must already hold the ref for this - */ - if (wbc->sync_mode == WB_SYNC_ALL) { - WARN_ON(!rwsem_is_locked(&sb->s_umount)); - return SB_NOT_PINNED; - } spin_lock(&sb_lock); + if (list_empty(&sb->s_instances)) { + spin_unlock(&sb_lock); + return false; + } + sb->s_count++; + spin_unlock(&sb_lock); + if (down_read_trylock(&sb->s_umount)) { - if (sb->s_root) { - spin_unlock(&sb_lock); - return SB_PINNED; - } - /* - * umounted, drop rwsem again and fall through to failure - */ + if (sb->s_root) + return true; up_read(&sb->s_umount); } - sb->s_count--; - spin_unlock(&sb_lock); - return SB_PIN_FAILED; + + put_super(sb); + return false; } /* * Write a portion of b_io inodes which belong to @sb. - * If @wbc->sb != NULL, then find and write all such + * + * If @only_this_sb is true, then find and write all such * inodes. Otherwise write only ones which go sequentially * in reverse order. + * * Return 1, if the caller writeback routine should be * interrupted. Otherwise return 0. */ -static int writeback_sb_inodes(struct super_block *sb, - struct bdi_writeback *wb, - struct writeback_control *wbc) +static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, + struct writeback_control *wbc, bool only_this_sb) { while (!list_empty(&wb->b_io)) { long pages_skipped; struct inode *inode = list_entry(wb->b_io.prev, struct inode, i_list); - if (wbc->sb && sb != inode->i_sb) { - /* super block given and doesn't - match, skip this inode */ - redirty_tail(inode); - continue; - } - if (sb != inode->i_sb) - /* finish with this superblock */ + + if (inode->i_sb != sb) { + if (only_this_sb) { + /* + * We only want to write back data for this + * superblock, move all inodes not belonging + * to it back onto the dirty list. + */ + redirty_tail(inode); + continue; + } + + /* + * The inode belongs to a different superblock. + * Bounce back to the caller to unpin this and + * pin the next superblock. + */ return 0; + } + if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); continue; @@ -667,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb, return 1; } -static void writeback_inodes_wb(struct bdi_writeback *wb, - struct writeback_control *wbc) +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc) { int ret = 0; @@ -681,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, struct inode *inode = list_entry(wb->b_io.prev, struct inode, i_list); struct super_block *sb = inode->i_sb; - enum sb_pin_state state; - if (wbc->sb && sb != wbc->sb) { - /* super block given and doesn't - match, skip this inode */ - redirty_tail(inode); - continue; - } - state = pin_sb_for_writeback(wbc, sb); - - if (state == SB_PIN_FAILED) { + if (!pin_sb_for_writeback(sb)) { requeue_io(inode); continue; } - ret = writeback_sb_inodes(sb, wb, wbc); + ret = writeback_sb_inodes(sb, wb, wbc, false); + drop_super(sb); - if (state == SB_PINNED) - unpin_sb_for_writeback(sb); if (ret) break; } @@ -706,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, /* Leave any unwritten inodes on b_io */ } -void writeback_inodes_wbc(struct writeback_control *wbc) +static void __writeback_inodes_sb(struct super_block *sb, + struct bdi_writeback *wb, struct writeback_control *wbc) { - struct backing_dev_info *bdi = wbc->bdi; + WARN_ON(!rwsem_is_locked(&sb->s_umount)); - writeback_inodes_wb(&bdi->wb, wbc); + wbc->wb_start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&wb->b_io)) + queue_io(wb, wbc->older_than_this); + writeback_sb_inodes(sb, wb, wbc, true); + spin_unlock(&inode_lock); } /* @@ -748,16 +602,14 @@ static inline bool over_bground_thresh(void) * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, - struct wb_writeback_args *args) + struct wb_writeback_work *work) { struct writeback_control wbc = { - .bdi = wb->bdi, - .sb = args->sb, - .sync_mode = args->sync_mode, + .sync_mode = work->sync_mode, .older_than_this = NULL, - .for_kupdate = args->for_kupdate, - .for_background = args->for_background, - .range_cyclic = args->range_cyclic, + .for_kupdate = work->for_kupdate, + .for_background = work->for_background, + .range_cyclic = work->range_cyclic, }; unsigned long oldest_jif; long wrote = 0; @@ -777,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, /* * Stop writeback when nr_pages has been consumed */ - if (args->nr_pages <= 0) + if (work->nr_pages <= 0) break; /* * For background writeout, stop when we are below the * background dirty threshold */ - if (args->for_background && !over_bground_thresh()) + if (work->for_background && !over_bground_thresh()) break; wbc.more_io = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; - writeback_inodes_wb(wb, &wbc); - args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; + if (work->sb) + __writeback_inodes_sb(work->sb, wb, &wbc); + else + writeback_inodes_wb(wb, &wbc); + work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; /* @@ -827,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, } /* - * Return the next bdi_work struct that hasn't been processed by this - * wb thread yet. ->seen is initially set for each thread that exists - * for this device, when a thread first notices a piece of work it - * clears its bit. Depending on writeback type, the thread will notify - * completion on either receiving the work (WB_SYNC_NONE) or after - * it is done (WB_SYNC_ALL). + * Return the next wb_writeback_work struct that hasn't been processed yet. */ -static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, - struct bdi_writeback *wb) +static struct wb_writeback_work * +get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) { - struct bdi_work *work, *ret = NULL; + struct wb_writeback_work *work = NULL; - rcu_read_lock(); - - list_for_each_entry_rcu(work, &bdi->work_list, list) { - if (!test_bit(wb->nr, &work->seen)) - continue; - clear_bit(wb->nr, &work->seen); - - ret = work; - break; + spin_lock(&bdi->wb_lock); + if (!list_empty(&bdi->work_list)) { + work = list_entry(bdi->work_list.next, + struct wb_writeback_work, list); + list_del_init(&work->list); } - - rcu_read_unlock(); - return ret; + spin_unlock(&bdi->wb_lock); + return work; } static long wb_check_old_data_flush(struct bdi_writeback *wb) @@ -876,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) (inodes_stat.nr_inodes - inodes_stat.nr_unused); if (nr_pages) { - struct wb_writeback_args args = { + struct wb_writeback_work work = { .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, .for_kupdate = 1, .range_cyclic = 1, }; - return wb_writeback(wb, &args); + return wb_writeback(wb, &work); } return 0; @@ -895,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) long wb_do_writeback(struct bdi_writeback *wb, int force_wait) { struct backing_dev_info *bdi = wb->bdi; - struct bdi_work *work; + struct wb_writeback_work *work; long wrote = 0; while ((work = get_next_work_item(bdi, wb)) != NULL) { - struct wb_writeback_args args = work->args; - /* * Override sync mode, in case we must wait for completion + * because this thread is exiting now. */ if (force_wait) - work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; - - /* - * If this isn't a data integrity operation, just notify - * that we have seen this work and we are now starting it. - */ - if (args.sync_mode == WB_SYNC_NONE) - wb_clear_pending(wb, work); + work->sync_mode = WB_SYNC_ALL; - wrote += wb_writeback(wb, &args); + wrote += wb_writeback(wb, work); /* - * This is a data integrity writeback, so only do the - * notification when we have completed the work. + * Notify the caller of completion if this is a synchronous + * work item, otherwise just free it. */ - if (args.sync_mode == WB_SYNC_ALL) - wb_clear_pending(wb, work); + if (work->done) + complete(work->done); + else + kfree(work); } /* @@ -978,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb) } /* - * Schedule writeback for all backing devices. This does WB_SYNC_NONE - * writeback, for integrity writeback see bdi_sync_writeback(). + * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back + * the whole world. */ -static void bdi_writeback_all(struct super_block *sb, long nr_pages) +void wakeup_flusher_threads(long nr_pages) { - struct wb_writeback_args args = { - .sb = sb, - .nr_pages = nr_pages, - .sync_mode = WB_SYNC_NONE, - }; struct backing_dev_info *bdi; - rcu_read_lock(); + if (!nr_pages) { + nr_pages = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); + } + rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, nr_pages, false, false); } - rcu_read_unlock(); } -/* - * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back - * the whole world. - */ -void wakeup_flusher_threads(long nr_pages) -{ - if (nr_pages == 0) - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - bdi_writeback_all(NULL, nr_pages); -} - static noinline void block_dump___mark_inode_dirty(struct inode *inode) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { @@ -1218,12 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) { unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - long nr_to_write; + DECLARE_COMPLETION_ONSTACK(done); + struct wb_writeback_work work = { + .sb = sb, + .sync_mode = WB_SYNC_NONE, + .done = &done, + }; + + WARN_ON(!rwsem_is_locked(&sb->s_umount)); - nr_to_write = nr_dirty + nr_unstable + + work.nr_pages = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_start_writeback(sb->s_bdi, sb, nr_to_write); + bdi_queue_work(sb->s_bdi, &work); + wait_for_completion(&done); } EXPORT_SYMBOL(writeback_inodes_sb); @@ -1237,7 +1069,9 @@ EXPORT_SYMBOL(writeback_inodes_sb); int writeback_inodes_sb_if_idle(struct super_block *sb) { if (!writeback_in_progress(sb->s_bdi)) { + down_read(&sb->s_umount); writeback_inodes_sb(sb); + up_read(&sb->s_umount); return 1; } else return 0; @@ -1253,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); */ void sync_inodes_sb(struct super_block *sb) { - bdi_sync_writeback(sb->s_bdi, sb); + DECLARE_COMPLETION_ONSTACK(done); + struct wb_writeback_work work = { + .sb = sb, + .sync_mode = WB_SYNC_ALL, + .nr_pages = LONG_MAX, + .range_cyclic = 0, + .done = &done, + }; + + WARN_ON(!rwsem_is_locked(&sb->s_umount)); + + bdi_queue_work(sb->s_bdi, &work); + wait_for_completion(&done); + wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index cc94bb9563f..3f6dfa98988 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -1,7 +1,6 @@ config FSCACHE tristate "General filesystem local caching manager" - select SLOW_WORK help This option enables a generic filesystem caching manager that can be used by various network and other filesystems to cache data locally. diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index edd7434ab6e..6a026441c5a 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -82,6 +82,14 @@ extern unsigned fscache_defer_lookup; extern unsigned fscache_defer_create; extern unsigned fscache_debug; extern struct kobject *fscache_root; +extern struct workqueue_struct *fscache_object_wq; +extern struct workqueue_struct *fscache_op_wq; +DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); + +static inline bool fscache_object_congested(void) +{ + return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); +} extern int fscache_wait_bit(void *); extern int fscache_wait_bit_interruptible(void *); diff --git a/fs/fscache/main.c b/fs/fscache/main.c index add6bdb53f0..f9d856773f7 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -15,6 +15,7 @@ #include <linux/sched.h> #include <linux/completion.h> #include <linux/slab.h> +#include <linux/seq_file.h> #include "internal.h" MODULE_DESCRIPTION("FS Cache Manager"); @@ -40,22 +41,105 @@ MODULE_PARM_DESC(fscache_debug, "FS-Cache debugging mask"); struct kobject *fscache_root; +struct workqueue_struct *fscache_object_wq; +struct workqueue_struct *fscache_op_wq; + +DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); + +/* these values serve as lower bounds, will be adjusted in fscache_init() */ +static unsigned fscache_object_max_active = 4; +static unsigned fscache_op_max_active = 2; + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *fscache_sysctl_header; + +static int fscache_max_active_sysctl(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + struct workqueue_struct **wqp = table->extra1; + unsigned int *datap = table->data; + int ret; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (ret == 0) + workqueue_set_max_active(*wqp, *datap); + return ret; +} + +ctl_table fscache_sysctls[] = { + { + .procname = "object_max_active", + .data = &fscache_object_max_active, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = fscache_max_active_sysctl, + .extra1 = &fscache_object_wq, + }, + { + .procname = "operation_max_active", + .data = &fscache_op_max_active, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = fscache_max_active_sysctl, + .extra1 = &fscache_op_wq, + }, + {} +}; + +ctl_table fscache_sysctls_root[] = { + { + .procname = "fscache", + .mode = 0555, + .child = fscache_sysctls, + }, + {} +}; +#endif /* * initialise the fs caching module */ static int __init fscache_init(void) { + unsigned int nr_cpus = num_possible_cpus(); + unsigned int cpu; int ret; - ret = slow_work_register_user(THIS_MODULE); - if (ret < 0) - goto error_slow_work; + fscache_object_max_active = + clamp_val(nr_cpus, + fscache_object_max_active, WQ_UNBOUND_MAX_ACTIVE); + + ret = -ENOMEM; + fscache_object_wq = alloc_workqueue("fscache_object", WQ_UNBOUND, + fscache_object_max_active); + if (!fscache_object_wq) + goto error_object_wq; + + fscache_op_max_active = + clamp_val(fscache_object_max_active / 2, + fscache_op_max_active, WQ_UNBOUND_MAX_ACTIVE); + + ret = -ENOMEM; + fscache_op_wq = alloc_workqueue("fscache_operation", WQ_UNBOUND, + fscache_op_max_active); + if (!fscache_op_wq) + goto error_op_wq; + + for_each_possible_cpu(cpu) + init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu)); ret = fscache_proc_init(); if (ret < 0) goto error_proc; +#ifdef CONFIG_SYSCTL + ret = -ENOMEM; + fscache_sysctl_header = register_sysctl_table(fscache_sysctls_root); + if (!fscache_sysctl_header) + goto error_sysctl; +#endif + fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", sizeof(struct fscache_cookie), 0, @@ -78,10 +162,16 @@ static int __init fscache_init(void) error_kobj: kmem_cache_destroy(fscache_cookie_jar); error_cookie_jar: +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(fscache_sysctl_header); +error_sysctl: +#endif fscache_proc_cleanup(); error_proc: - slow_work_unregister_user(THIS_MODULE); -error_slow_work: + destroy_workqueue(fscache_op_wq); +error_op_wq: + destroy_workqueue(fscache_object_wq); +error_object_wq: return ret; } @@ -96,8 +186,12 @@ static void __exit fscache_exit(void) kobject_put(fscache_root); kmem_cache_destroy(fscache_cookie_jar); +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(fscache_sysctl_header); +#endif fscache_proc_cleanup(); - slow_work_unregister_user(THIS_MODULE); + destroy_workqueue(fscache_op_wq); + destroy_workqueue(fscache_object_wq); printk(KERN_NOTICE "FS-Cache: Unloaded\n"); } diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 4a8eb31c533..ebe29c58138 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -34,8 +34,8 @@ struct fscache_objlist_data { #define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */ #define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */ #define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */ -#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with slow work */ -#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without slow work */ +#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with work */ +#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without work */ u8 buf[512]; /* key and aux data buffer */ }; @@ -231,12 +231,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v) READS, NOREADS); FILTER(obj->events & obj->event_mask, EVENTS, NOEVENTS); - FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW), - WORK, NOWORK); + FILTER(work_busy(&obj->work), WORK, NOWORK); } seq_printf(m, - "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ", + "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1x | ", obj->debug_id, obj->parent ? obj->parent->debug_id : -1, fscache_object_states_short[obj->state], @@ -249,7 +248,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, obj->events, obj->flags, - obj->work.flags); + work_busy(&obj->work)); no_cookie = true; keylen = auxlen = 0; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 0b589a9b4ff..b6b897c550a 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -14,7 +14,6 @@ #define FSCACHE_DEBUG_LEVEL COOKIE #include <linux/module.h> -#include <linux/seq_file.h> #include "internal.h" const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { @@ -50,12 +49,8 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { [FSCACHE_OBJECT_DEAD] = "DEAD", }; -static void fscache_object_slow_work_put_ref(struct slow_work *); -static int fscache_object_slow_work_get_ref(struct slow_work *); -static void fscache_object_slow_work_execute(struct slow_work *); -#ifdef CONFIG_SLOW_WORK_DEBUG -static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); -#endif +static int fscache_get_object(struct fscache_object *); +static void fscache_put_object(struct fscache_object *); static void fscache_initialise_object(struct fscache_object *); static void fscache_lookup_object(struct fscache_object *); static void fscache_object_available(struct fscache_object *); @@ -64,17 +59,6 @@ static void fscache_withdraw_object(struct fscache_object *); static void fscache_enqueue_dependents(struct fscache_object *); static void fscache_dequeue_object(struct fscache_object *); -const struct slow_work_ops fscache_object_slow_work_ops = { - .owner = THIS_MODULE, - .get_ref = fscache_object_slow_work_get_ref, - .put_ref = fscache_object_slow_work_put_ref, - .execute = fscache_object_slow_work_execute, -#ifdef CONFIG_SLOW_WORK_DEBUG - .desc = fscache_object_slow_work_desc, -#endif -}; -EXPORT_SYMBOL(fscache_object_slow_work_ops); - /* * we need to notify the parent when an op completes that we had outstanding * upon it @@ -345,7 +329,7 @@ unsupported_event: /* * execute an object */ -static void fscache_object_slow_work_execute(struct slow_work *work) +void fscache_object_work_func(struct work_struct *work) { struct fscache_object *object = container_of(work, struct fscache_object, work); @@ -359,23 +343,9 @@ static void fscache_object_slow_work_execute(struct slow_work *work) if (object->events & object->event_mask) fscache_enqueue_object(object); clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); + fscache_put_object(object); } - -/* - * describe an object for slow-work debugging - */ -#ifdef CONFIG_SLOW_WORK_DEBUG -static void fscache_object_slow_work_desc(struct slow_work *work, - struct seq_file *m) -{ - struct fscache_object *object = - container_of(work, struct fscache_object, work); - - seq_printf(m, "FSC: OBJ%x: %s", - object->debug_id, - fscache_object_states_short[object->state]); -} -#endif +EXPORT_SYMBOL(fscache_object_work_func); /* * initialise an object @@ -393,7 +363,6 @@ static void fscache_initialise_object(struct fscache_object *object) _enter(""); ASSERT(object->cookie != NULL); ASSERT(object->cookie->parent != NULL); - ASSERT(list_empty(&object->work.link)); if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | (1 << FSCACHE_OBJECT_EV_RELEASE) | @@ -671,10 +640,8 @@ static void fscache_drop_object(struct fscache_object *object) object->parent = NULL; } - /* this just shifts the object release to the slow work processor */ - fscache_stat(&fscache_n_cop_put_object); - object->cache->ops->put_object(object); - fscache_stat_d(&fscache_n_cop_put_object); + /* this just shifts the object release to the work processor */ + fscache_put_object(object); _leave(""); } @@ -758,12 +725,10 @@ void fscache_withdrawing_object(struct fscache_cache *cache, } /* - * allow the slow work item processor to get a ref on an object + * get a ref on an object */ -static int fscache_object_slow_work_get_ref(struct slow_work *work) +static int fscache_get_object(struct fscache_object *object) { - struct fscache_object *object = - container_of(work, struct fscache_object, work); int ret; fscache_stat(&fscache_n_cop_grab_object); @@ -773,13 +738,10 @@ static int fscache_object_slow_work_get_ref(struct slow_work *work) } /* - * allow the slow work item processor to discard a ref on a work item + * discard a ref on a work item */ -static void fscache_object_slow_work_put_ref(struct slow_work *work) +static void fscache_put_object(struct fscache_object *object) { - struct fscache_object *object = - container_of(work, struct fscache_object, work); - fscache_stat(&fscache_n_cop_put_object); object->cache->ops->put_object(object); fscache_stat_d(&fscache_n_cop_put_object); @@ -792,8 +754,48 @@ void fscache_enqueue_object(struct fscache_object *object) { _enter("{OBJ%x}", object->debug_id); - slow_work_enqueue(&object->work); + if (fscache_get_object(object) >= 0) { + wait_queue_head_t *cong_wq = + &get_cpu_var(fscache_object_cong_wait); + + if (queue_work(fscache_object_wq, &object->work)) { + if (fscache_object_congested()) + wake_up(cong_wq); + } else + fscache_put_object(object); + + put_cpu_var(fscache_object_cong_wait); + } +} + +/** + * fscache_object_sleep_till_congested - Sleep until object wq is congested + * @timoutp: Scheduler sleep timeout + * + * Allow an object handler to sleep until the object workqueue is congested. + * + * The caller must set up a wake up event before calling this and must have set + * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own + * condition before calling this function as no test is made here. + * + * %true is returned if the object wq is congested, %false otherwise. + */ +bool fscache_object_sleep_till_congested(signed long *timeoutp) +{ + wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait); + DEFINE_WAIT(wait); + + if (fscache_object_congested()) + return true; + + add_wait_queue_exclusive(cong_wq, &wait); + if (!fscache_object_congested()) + *timeoutp = schedule_timeout(*timeoutp); + finish_wait(cong_wq, &wait); + + return fscache_object_congested(); } +EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested); /* * enqueue the dependents of an object for metadata-type processing @@ -819,9 +821,7 @@ static void fscache_enqueue_dependents(struct fscache_object *object) /* sort onto appropriate lists */ fscache_enqueue_object(dep); - fscache_stat(&fscache_n_cop_put_object); - dep->cache->ops->put_object(dep); - fscache_stat_d(&fscache_n_cop_put_object); + fscache_put_object(dep); if (!list_empty(&object->dependents)) cond_resched_lock(&object->lock); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index f17cecafae4..b9f34eaede0 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -42,16 +42,12 @@ void fscache_enqueue_operation(struct fscache_operation *op) fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { - case FSCACHE_OP_FAST: - _debug("queue fast"); + case FSCACHE_OP_ASYNC: + _debug("queue async"); atomic_inc(&op->usage); - if (!schedule_work(&op->fast_work)) + if (!queue_work(fscache_op_wq, &op->work)) fscache_put_operation(op); break; - case FSCACHE_OP_SLOW: - _debug("queue slow"); - slow_work_enqueue(&op->slow_work); - break; case FSCACHE_OP_MYTHREAD: _debug("queue for caller's attention"); break; @@ -455,36 +451,13 @@ void fscache_operation_gc(struct work_struct *work) } /* - * allow the slow work item processor to get a ref on an operation - */ -static int fscache_op_get_ref(struct slow_work *work) -{ - struct fscache_operation *op = - container_of(work, struct fscache_operation, slow_work); - - atomic_inc(&op->usage); - return 0; -} - -/* - * allow the slow work item processor to discard a ref on an operation - */ -static void fscache_op_put_ref(struct slow_work *work) -{ - struct fscache_operation *op = - container_of(work, struct fscache_operation, slow_work); - - fscache_put_operation(op); -} - -/* - * execute an operation using the slow thread pool to provide processing context - * - the caller holds a ref to this object, so we don't need to hold one + * execute an operation using fs_op_wq to provide processing context - + * the caller holds a ref to this object, so we don't need to hold one */ -static void fscache_op_execute(struct slow_work *work) +void fscache_op_work_func(struct work_struct *work) { struct fscache_operation *op = - container_of(work, struct fscache_operation, slow_work); + container_of(work, struct fscache_operation, work); unsigned long start; _enter("{OBJ%x OP%x,%d}", @@ -494,31 +467,7 @@ static void fscache_op_execute(struct slow_work *work) start = jiffies; op->processor(op); fscache_hist(fscache_ops_histogram, start); + fscache_put_operation(op); _leave(""); } - -/* - * describe an operation for slow-work debugging - */ -#ifdef CONFIG_SLOW_WORK_DEBUG -static void fscache_op_desc(struct slow_work *work, struct seq_file *m) -{ - struct fscache_operation *op = - container_of(work, struct fscache_operation, slow_work); - - seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx", - op->object->debug_id, op->debug_id, - op->name, op->state, op->flags); -} -#endif - -const struct slow_work_ops fscache_op_slow_work_ops = { - .owner = THIS_MODULE, - .get_ref = fscache_op_get_ref, - .put_ref = fscache_op_put_ref, - .execute = fscache_op_execute, -#ifdef CONFIG_SLOW_WORK_DEBUG - .desc = fscache_op_desc, -#endif -}; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 723b889fd21..41c441c2058 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -105,7 +105,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, page_busy: /* we might want to wait here, but that could deadlock the allocator as - * the slow-work threads writing to the cache may all end up sleeping + * the work threads writing to the cache may all end up sleeping * on memory allocation */ fscache_stat(&fscache_n_store_vmscan_busy); return false; @@ -188,9 +188,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) return -ENOMEM; } - fscache_operation_init(op, NULL); - fscache_operation_init_slow(op, fscache_attr_changed_op); - op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); + fscache_operation_init(op, fscache_attr_changed_op, NULL); + op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); fscache_set_op_name(op, "Attr"); spin_lock(&cookie->lock); @@ -218,24 +217,6 @@ nobufs: EXPORT_SYMBOL(__fscache_attr_changed); /* - * handle secondary execution given to a retrieval op on behalf of the - * cache - */ -static void fscache_retrieval_work(struct work_struct *work) -{ - struct fscache_retrieval *op = - container_of(work, struct fscache_retrieval, op.fast_work); - unsigned long start; - - _enter("{OP%x}", op->op.debug_id); - - start = jiffies; - op->op.processor(&op->op); - fscache_hist(fscache_ops_histogram, start); - fscache_put_operation(&op->op); -} - -/* * release a retrieval op reference */ static void fscache_release_retrieval_op(struct fscache_operation *_op) @@ -269,13 +250,12 @@ static struct fscache_retrieval *fscache_alloc_retrieval( return NULL; } - fscache_operation_init(&op->op, fscache_release_retrieval_op); + fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); op->mapping = mapping; op->end_io_func = end_io_func; op->context = context; op->start_time = jiffies; - INIT_WORK(&op->op.fast_work, fscache_retrieval_work); INIT_LIST_HEAD(&op->to_do); fscache_set_op_name(&op->op, "Retr"); return op; @@ -795,9 +775,9 @@ int __fscache_write_page(struct fscache_cookie *cookie, if (!op) goto nomem; - fscache_operation_init(&op->op, fscache_release_write_op); - fscache_operation_init_slow(&op->op, fscache_write_op); - op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); + fscache_operation_init(&op->op, fscache_write_op, + fscache_release_write_op); + op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); fscache_set_op_name(&op->op, "Write1"); ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); @@ -852,7 +832,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_store_ops); fscache_stat(&fscache_n_stores_ok); - /* the slow work queue now carries its own ref on the object */ + /* the work queue now carries its own ref on the object */ fscache_put_operation(&op->op); _leave(" = 0"); return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3cdc5f78a40..431be0795b6 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1016,7 +1016,7 @@ static int fuse_permission(struct inode *inode, int mask) exist. So if permissions are revoked this won't be noticed immediately, only after the attribute timeout has expired */ - } else if (mask & MAY_ACCESS) { + } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { err = fuse_access(inode, mask); } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { if (!(inode->i_mode & S_IXUGO)) { diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index a47b4310711..cc966552214 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -7,7 +7,6 @@ config GFS2_FS select IP_SCTP if DLM_SCTP select FS_POSIX_ACL select CRC32 - select SLOW_WORK select QUOTACTL help A cluster filesystem. diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 9f8b52500d6..5e96cbd8a45 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -136,10 +136,7 @@ static int gfs2_writeback_writepage(struct page *page, if (ret <= 0) return ret; - ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); - if (ret == -EAGAIN) - ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc); - return ret; + return nobh_writepage(page, gfs2_get_block_noalloc, wbc); } /** @@ -637,9 +634,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, } } - error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); - if (error) - goto out_unlock; + alloc_required = gfs2_write_alloc_required(ip, pos, len); if (alloc_required || gfs2_is_jdata(ip)) gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 4a48c0f4b40..6f482809d1a 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1040,7 +1040,8 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) goto out; if (gfs2_is_stuffed(ip)) { - u64 dsize = size + sizeof(struct gfs2_inode); + u64 dsize = size + sizeof(struct gfs2_dinode); + ip->i_disksize = size; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1243,13 +1244,12 @@ int gfs2_file_dealloc(struct gfs2_inode *ip) * @ip: the file being written to * @offset: the offset to write to * @len: the number of bytes being written - * @alloc_required: set to 1 if an alloc is required, 0 otherwise * - * Returns: errno + * Returns: 1 if an alloc is required, 0 otherwise */ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, - unsigned int len, int *alloc_required) + unsigned int len) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head bh; @@ -1257,26 +1257,23 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, u64 lblock, lblock_stop, size; u64 end_of_file; - *alloc_required = 0; - if (!len) return 0; if (gfs2_is_stuffed(ip)) { if (offset + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) - *alloc_required = 1; + return 1; return 0; } - *alloc_required = 1; shift = sdp->sd_sb.sb_bsize_shift; BUG_ON(gfs2_is_dir(ip)); end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; if (lblock_stop > end_of_file) - return 0; + return 1; size = (lblock_stop - lblock) << shift; do { @@ -1284,12 +1281,11 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, bh.b_size = size; gfs2_block_map(&ip->i_inode, lblock, &bh, 0); if (!buffer_mapped(&bh)) - return 0; + return 1; size -= bh.b_size; lblock += (bh.b_size >> ip->i_inode.i_blkbits); } while(size > 0); - *alloc_required = 0; return 0; } diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index c983177e05a..a20a5213135 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -52,6 +52,6 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size); int gfs2_truncatei_resume(struct gfs2_inode *ip); int gfs2_file_dealloc(struct gfs2_inode *ip); int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, - unsigned int len, int *alloc_required); + unsigned int len); #endif /* __BMAP_DOT_H__ */ diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 8295c5b5d4a..b9dd88a78dd 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, unsigned totlen = be16_to_cpu(dent->de_rec_len); if (gfs2_dirent_sentinel(dent)) - actual = GFS2_DIRENT_SIZE(0); + actual = 0; if (totlen - actual >= required) return 1; return 0; @@ -955,7 +955,12 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) /* Change the pointers. Don't bother distinguishing stuffed from non-stuffed. This code is complicated enough already. */ - lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL); + lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS); + if (!lp) { + error = -ENOMEM; + goto fail_brelse; + } + /* Change the pointers */ for (x = 0; x < half_len; x++) lp[x] = cpu_to_be64(bn); @@ -1063,7 +1068,9 @@ static int dir_double_exhash(struct gfs2_inode *dip) /* Allocate both the "from" and "to" buffers in one big chunk */ - buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL); + buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS); + if (!buf) + return -ENOMEM; for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) { error = gfs2_dir_read_data(dip, (char *)buf, @@ -1231,6 +1238,25 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, return 0; } +static void *gfs2_alloc_sort_buffer(unsigned size) +{ + void *ptr = NULL; + + if (size < KMALLOC_MAX_SIZE) + ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN); + if (!ptr) + ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL); + return ptr; +} + +static void gfs2_free_sort_buffer(void *ptr) +{ + if (is_vmalloc_addr(ptr)) + vfree(ptr); + else + kfree(ptr); +} + static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir, int *copied, unsigned *depth, u64 leaf_no) @@ -1271,7 +1297,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, * 99 is the maximum number of entries that can fit in a single * leaf block. */ - larr = vmalloc((leaves + entries + 99) * sizeof(void *)); + larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *)); if (!larr) goto out; darr = (const struct gfs2_dirent **)(larr + leaves); @@ -1282,7 +1308,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, do { error = get_leaf(ip, lfn, &bh); if (error) - goto out_kfree; + goto out_free; lf = (struct gfs2_leaf *)bh->b_data; lfn = be64_to_cpu(lf->lf_next); if (lf->lf_entries) { @@ -1291,7 +1317,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, gfs2_dirent_gather, NULL, &g); error = PTR_ERR(dent); if (IS_ERR(dent)) - goto out_kfree; + goto out_free; if (entries2 != g.offset) { fs_warn(sdp, "Number of entries corrupt in dir " "leaf %llu, entries2 (%u) != " @@ -1300,7 +1326,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, entries2, g.offset); error = -EIO; - goto out_kfree; + goto out_free; } error = 0; larr[leaf++] = bh; @@ -1312,10 +1338,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, BUG_ON(entries2 != entries); error = do_filldir_main(ip, offset, opaque, filldir, darr, entries, copied); -out_kfree: +out_free: for(i = 0; i < leaf; i++) brelse(larr[i]); - vfree(larr); + gfs2_free_sort_buffer(larr); out: return error; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ed9a94f0ef1..4edd662c823 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -351,7 +351,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; - int alloc_required = 0; struct gfs2_holder gh; struct gfs2_alloc *al; int ret; @@ -364,8 +363,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); - ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); - if (ret || !alloc_required) + if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) goto out_unlock; ret = -ENOMEM; al = gfs2_alloc_get(ip); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ddcdbf49353..9adf8f924e0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -328,6 +328,30 @@ static void gfs2_holder_wake(struct gfs2_holder *gh) } /** + * do_error - Something unexpected has happened during a lock request + * + */ + +static inline void do_error(struct gfs2_glock *gl, const int ret) +{ + struct gfs2_holder *gh, *tmp; + + list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (ret & LM_OUT_ERROR) + gh->gh_error = -EIO; + else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) + gh->gh_error = GLR_TRYFAILED; + else + continue; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + } +} + +/** * do_promote - promote as many requests as possible on the current queue * @gl: The glock * @@ -375,36 +399,13 @@ restart: } if (gh->gh_list.prev == &gl->gl_holders) return 1; + do_error(gl, 0); break; } return 0; } /** - * do_error - Something unexpected has happened during a lock request - * - */ - -static inline void do_error(struct gfs2_glock *gl, const int ret) -{ - struct gfs2_holder *gh, *tmp; - - list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - if (test_bit(HIF_HOLDER, &gh->gh_iflags)) - continue; - if (ret & LM_OUT_ERROR) - gh->gh_error = -EIO; - else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) - gh->gh_error = GLR_TRYFAILED; - else - continue; - list_del_init(&gh->gh_list); - trace_gfs2_glock_queue(gh, 0); - gfs2_holder_wake(gh); - } -} - -/** * find_first_waiter - find the first gh that's waiting for the glock * @gl: the glock */ @@ -1062,6 +1063,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh) spin_lock(&gl->gl_spin); add_to_queue(gh); + if ((LM_FLAG_NOEXP & gh->gh_flags) && + test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); run_queue(gl, 1); spin_unlock(&gl->gl_spin); @@ -1319,6 +1323,36 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) } /** + * gfs2_should_freeze - Figure out if glock should be frozen + * @gl: The glock in question + * + * Glocks are not frozen if (a) the result of the dlm operation is + * an error, (b) the locking operation was an unlock operation or + * (c) if there is a "noexp" flagged request anywhere in the queue + * + * Returns: 1 if freezing should occur, 0 otherwise + */ + +static int gfs2_should_freeze(const struct gfs2_glock *gl) +{ + const struct gfs2_holder *gh; + + if (gl->gl_reply & ~LM_OUT_ST_MASK) + return 0; + if (gl->gl_target == LM_ST_UNLOCKED) + return 0; + + list_for_each_entry(gh, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (LM_FLAG_NOEXP & gh->gh_flags) + return 0; + } + + return 1; +} + +/** * gfs2_glock_complete - Callback used by locking * @gl: Pointer to the glock * @ret: The return value from the dlm @@ -1328,18 +1362,17 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + gl->gl_reply = ret; + if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { - struct gfs2_holder *gh; spin_lock(&gl->gl_spin); - gh = find_first_waiter(gl); - if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) && - (gl->gl_target != LM_ST_UNLOCKED)) || - ((ret & ~LM_OUT_ST_MASK) != 0)) + if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - if (test_bit(GLF_FROZEN, &gl->gl_flags)) + spin_unlock(&gl->gl_spin); return; + } + spin_unlock(&gl->gl_spin); } set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gfs2_glock_hold(gl); @@ -1348,7 +1381,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } -static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) +static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { struct gfs2_glock *gl; int may_demote; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index b5d7363b22d..fdbf4b366fa 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -12,7 +12,6 @@ #include <linux/fs.h> #include <linux/workqueue.h> -#include <linux/slow-work.h> #include <linux/dlm.h> #include <linux/buffer_head.h> @@ -383,7 +382,7 @@ struct gfs2_journal_extent { struct gfs2_jdesc { struct list_head jd_list; struct list_head extent_list; - struct slow_work jd_work; + struct work_struct jd_work; struct inode *jd_inode; unsigned long jd_flags; #define JDF_RECOVERY 1 @@ -460,6 +459,7 @@ enum { SDF_NOBARRIERS = 3, SDF_NORECOVERY = 4, SDF_DEMOTE = 5, + SDF_NOJOURNALID = 6, }; #define GFS2_FSNAME_LEN 256 diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b5612cbb62a..f03afd9c44b 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, { struct inode *inode; struct gfs2_inode *ip; - struct gfs2_glock *io_gl; + struct gfs2_glock *io_gl = NULL; int error; inode = gfs2_iget(sb, no_addr); @@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); + io_gl = NULL; if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) goto gfs2_nfsbypass; @@ -228,7 +229,8 @@ gfs2_nfsbypass: fail_glock: gfs2_glock_dq(&ip->i_iopen_gh); fail_iopen: - gfs2_glock_put(io_gl); + if (io_gl) + gfs2_glock_put(io_gl); fail_put: if (inode->i_state & I_NEW) ip->i_gl->gl_object = NULL; @@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) { struct gfs2_sbd *sdp; struct gfs2_inode *ip; - struct gfs2_glock *io_gl; + struct gfs2_glock *io_gl = NULL; int error; struct gfs2_holder gh; struct inode *inode; @@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); + io_gl = NULL; inode->i_mode = DT2IF(DT_UNKNOWN); @@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) fail_glock: gfs2_glock_dq(&ip->i_iopen_gh); fail_iopen: - gfs2_glock_put(io_gl); + if (io_gl) + gfs2_glock_put(io_gl); fail_put: ip->i_gl->gl_object = NULL; gfs2_glock_put(ip->i_gl); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index fb2a5f93b7c..b1e9630eb46 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -15,7 +15,6 @@ #include <linux/init.h> #include <linux/gfs2_ondisk.h> #include <asm/atomic.h> -#include <linux/slow-work.h> #include "gfs2.h" #include "incore.h" @@ -24,6 +23,7 @@ #include "util.h" #include "glock.h" #include "quota.h" +#include "recovery.h" static struct shrinker qd_shrinker = { .shrink = gfs2_shrink_qd_memory, @@ -138,9 +138,11 @@ static int __init init_gfs2_fs(void) if (error) goto fail_unregister; - error = slow_work_register_user(THIS_MODULE); - if (error) - goto fail_slow; + error = -ENOMEM; + gfs_recovery_wq = alloc_workqueue("gfs_recovery", + WQ_NON_REENTRANT | WQ_RESCUER, 0); + if (!gfs_recovery_wq) + goto fail_wq; gfs2_register_debugfs(); @@ -148,7 +150,7 @@ static int __init init_gfs2_fs(void) return 0; -fail_slow: +fail_wq: unregister_filesystem(&gfs2meta_fs_type); fail_unregister: unregister_filesystem(&gfs2_fs_type); @@ -190,7 +192,7 @@ static void __exit exit_gfs2_fs(void) gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); - slow_work_unregister_user(THIS_MODULE); + destroy_workqueue(gfs_recovery_wq); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3593b3a7290..4f44bdeb2f0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -17,7 +17,6 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/gfs2_ondisk.h> -#include <linux/slow-work.h> #include <linux/quotaops.h> #include "gfs2.h" @@ -76,7 +75,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) sb->s_fs_info = sdp; sdp->sd_vfs = sb; - + set_bit(SDF_NOJOURNALID, &sdp->sd_flags); gfs2_tune_init(&sdp->sd_tune); init_waitqueue_head(&sdp->sd_glock_wait); @@ -673,7 +672,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) break; INIT_LIST_HEAD(&jd->extent_list); - slow_work_init(&jd->jd_work, &gfs2_recover_ops); + INIT_WORK(&jd->jd_work, gfs2_recover_func); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { if (!jd->jd_inode) @@ -782,7 +781,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) if (sdp->sd_lockstruct.ls_first) { unsigned int x; for (x = 0; x < sdp->sd_journals; x++) { - error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x)); + error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x), + true); if (error) { fs_err(sdp, "error recovering journal %u: %d\n", x, error); @@ -792,7 +792,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) gfs2_others_may_mount(sdp); } else if (!sdp->sd_args.ar_spectator) { - error = gfs2_recover_journal(sdp->sd_jdesc); + error = gfs2_recover_journal(sdp->sd_jdesc, true); if (error) { fs_err(sdp, "error recovering my journal: %d\n", error); goto fail_jinode_gh; @@ -1050,7 +1050,8 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) ret = match_int(&tmp[0], &option); if (ret || option < 0) goto hostdata_error; - ls->ls_jid = option; + if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags)) + ls->ls_jid = option; break; case Opt_id: /* Obsolete, but left for backward compat purposes */ @@ -1102,6 +1103,24 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp) lm->lm_unmount(sdp); } +static int gfs2_journalid_wait(void *word) +{ + if (signal_pending(current)) + return -EINTR; + schedule(); + return 0; +} + +static int wait_on_journal(struct gfs2_sbd *sdp) +{ + if (sdp->sd_args.ar_spectator) + return 0; + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + return 0; + + return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE); +} + void gfs2_online_uevent(struct gfs2_sbd *sdp) { struct super_block *sb = sdp->sd_vfs; @@ -1194,6 +1213,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (error) goto fail_locking; + error = wait_on_journal(sdp); + if (error) + goto fail_sb; + error = init_inodes(sdp, DO); if (error) goto fail_sb; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 49667d68769..1bc6b5695e6 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(qd_lru_lock); -int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) +int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; @@ -694,10 +694,8 @@ get_a_page: if (!buffer_mapped(bh)) goto unlock_out; /* If it's a newly allocated disk block for quota, zero it */ - if (buffer_new(bh)) { - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - } + if (buffer_new(bh)) + zero_user(page, pos - blocksize, bh->b_size); } if (PageUptodate(page)) @@ -723,7 +721,7 @@ get_a_page: /* If quota straddles page boundary, we need to update the rest of the * quota at the beginning of the next page */ - if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */ + if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) { ptr = ptr + nbytes; nbytes = sizeof(struct gfs2_quota) - nbytes; offset = 0; @@ -789,15 +787,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) goto out; for (x = 0; x < num_qd; x++) { - int alloc_required; - offset = qd2offset(qda[x]); - error = gfs2_write_alloc_required(ip, offset, - sizeof(struct gfs2_quota), - &alloc_required); - if (error) - goto out_gunlock; - if (alloc_required) + if (gfs2_write_alloc_required(ip, offset, + sizeof(struct gfs2_quota))) nalloc++; } @@ -1457,10 +1449,10 @@ static int gfs2_quota_get_xstate(struct super_block *sb, switch (sdp->sd_args.ar_quota) { case GFS2_QUOTA_ON: - fqs->qs_flags |= (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD); + fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD); /*FALLTHRU*/ case GFS2_QUOTA_ACCOUNT: - fqs->qs_flags |= (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT); + fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT); break; case GFS2_QUOTA_OFF: break; @@ -1506,7 +1498,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; fdq->d_version = FS_DQUOT_VERSION; - fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA; + fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; fdq->d_id = id; fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); @@ -1541,12 +1533,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, switch(type) { case USRQUOTA: type = QUOTA_USER; - if (fdq->d_flags != XFS_USER_QUOTA) + if (fdq->d_flags != FS_USER_QUOTA) return -EINVAL; break; case GRPQUOTA: type = QUOTA_GROUP; - if (fdq->d_flags != XFS_GROUP_QUOTA) + if (fdq->d_flags != FS_GROUP_QUOTA) return -EINVAL; break; default: @@ -1586,10 +1578,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, goto out_i; offset = qd2offset(qd); - error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota), - &alloc_required); - if (error) - goto out_i; + alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); if (alloc_required) { al = gfs2_alloc_get(ip); if (al == NULL) diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 195f60c8bd1..e7d236ca48b 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); +extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 4b9bece3d43..f7f89a94a5a 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -14,7 +14,6 @@ #include <linux/buffer_head.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> -#include <linux/slow-work.h> #include "gfs2.h" #include "incore.h" @@ -28,6 +27,8 @@ #include "util.h" #include "dir.h" +struct workqueue_struct *gfs_recovery_wq; + int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, struct buffer_head **bh) { @@ -443,23 +444,7 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); } -static int gfs2_recover_get_ref(struct slow_work *work) -{ - struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); - if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) - return -EBUSY; - return 0; -} - -static void gfs2_recover_put_ref(struct slow_work *work) -{ - struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); - clear_bit(JDF_RECOVERY, &jd->jd_flags); - smp_mb__after_clear_bit(); - wake_up_bit(&jd->jd_flags, JDF_RECOVERY); -} - -static void gfs2_recover_work(struct slow_work *work) +void gfs2_recover_func(struct work_struct *work) { struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); struct gfs2_inode *ip = GFS2_I(jd->jd_inode); @@ -578,7 +563,7 @@ static void gfs2_recover_work(struct slow_work *work) gfs2_glock_dq_uninit(&j_gh); fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); - return; + goto done; fail_gunlock_tr: gfs2_glock_dq_uninit(&t_gh); @@ -590,32 +575,35 @@ fail_gunlock_j: } fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); - fail: gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); +done: + clear_bit(JDF_RECOVERY, &jd->jd_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&jd->jd_flags, JDF_RECOVERY); } -struct slow_work_ops gfs2_recover_ops = { - .owner = THIS_MODULE, - .get_ref = gfs2_recover_get_ref, - .put_ref = gfs2_recover_put_ref, - .execute = gfs2_recover_work, -}; - - static int gfs2_recovery_wait(void *word) { schedule(); return 0; } -int gfs2_recover_journal(struct gfs2_jdesc *jd) +int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) { int rv; - rv = slow_work_enqueue(&jd->jd_work); - if (rv) - return rv; - wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); + + if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) + return -EBUSY; + + /* we have JDF_RECOVERY, queue should always succeed */ + rv = queue_work(gfs_recovery_wq, &jd->jd_work); + BUG_ON(!rv); + + if (wait) + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, + TASK_UNINTERRUPTIBLE); + return 0; } diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 1616ac22569..2226136c764 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -12,6 +12,8 @@ #include "incore.h" +extern struct workqueue_struct *gfs_recovery_wq; + static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) { if (++*blk == sdp->sd_jdesc->jd_blocks) @@ -27,8 +29,8 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head); -extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); -extern struct slow_work_ops gfs2_recover_ops; +extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); +extern void gfs2_recover_func(struct work_struct *work); #endif /* __RECOVERY_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 4d1aad38f1b..4140811a921 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -342,8 +342,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - int ar; - int error; if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) || (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) { @@ -352,13 +350,12 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) } jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; - error = gfs2_write_alloc_required(ip, 0, ip->i_disksize, &ar); - if (!error && ar) { + if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) { gfs2_consist_inode(ip); - error = -EIO; + return -EIO; } - return error; + return 0; } /** diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 37f5393e68e..ccacffd2faa 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -25,6 +25,7 @@ #include "quota.h" #include "util.h" #include "glops.h" +#include "recovery.h" struct gfs2_attr { struct attribute attr; @@ -325,6 +326,30 @@ static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_first); } +static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +{ + unsigned first; + int rv; + + rv = sscanf(buf, "%u", &first); + if (rv != 1 || first > 1) + return -EINVAL; + spin_lock(&sdp->sd_jindex_spin); + rv = -EBUSY; + if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) + goto out; + rv = -EINVAL; + if (sdp->sd_args.ar_spectator) + goto out; + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + goto out; + sdp->sd_lockstruct.ls_first = first; + rv = 0; +out: + spin_unlock(&sdp->sd_jindex_spin); + return rv ? rv : len; +} + static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; @@ -352,7 +377,7 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { if (jd->jd_jid != jid) continue; - rv = slow_work_enqueue(&jd->jd_work); + rv = gfs2_recover_journal(jd, false); break; } out: @@ -377,14 +402,41 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); } +static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +{ + unsigned jid; + int rv; + + rv = sscanf(buf, "%u", &jid); + if (rv != 1) + return -EINVAL; + + spin_lock(&sdp->sd_jindex_spin); + rv = -EINVAL; + if (sdp->sd_args.ar_spectator) + goto out; + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + goto out; + rv = -EBUSY; + if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) + goto out; + sdp->sd_lockstruct.ls_jid = jid; + smp_mb__after_clear_bit(); + wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); + rv = 0; +out: + spin_unlock(&sdp->sd_jindex_spin); + return rv ? rv : len; +} + #define GDLM_ATTR(_name,_mode,_show,_store) \ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); -GDLM_ATTR(jid, 0444, jid_show, NULL); -GDLM_ATTR(first, 0444, lkfirst_show, NULL); +GDLM_ATTR(jid, 0644, jid_show, jid_store); +GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); GDLM_ATTR(first_done, 0444, first_done_show, NULL); GDLM_ATTR(recover, 0600, NULL, recover_store); GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); @@ -564,7 +616,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); - if (!sdp->sd_args.ar_spectator) + if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); if (gfs2_uuid_valid(uuid)) add_uevent_var(env, "UUID=%pUB", uuid); diff --git a/fs/inode.c b/fs/inode.c index 2bee20ae3d6..722860b323a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan) * This function is passed the number of inodes to scan, and it returns the * total number of remaining possibly-reclaimable inodes. */ -static int shrink_icache_memory(int nr, gfp_t gfp_mask) +static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { if (nr) { /* diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 93d1e47647b..f19ce94693d 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1281,13 +1281,9 @@ int journal_check_used_features (journal_t *journal, unsigned long compat, int journal_check_available_features (journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { - journal_superblock_t *sb; - if (!compat && !ro && !incompat) return 1; - sb = journal->j_superblock; - /* We can support any known requested features iff the * superblock is in version 2. Otherwise we fail to support any * extended sb features. */ @@ -1481,7 +1477,6 @@ int journal_flush(journal_t *journal) int journal_wipe(journal_t *journal, int write) { - journal_superblock_t *sb; int err = 0; J_ASSERT (!(journal->j_flags & JFS_LOADED)); @@ -1490,8 +1485,6 @@ int journal_wipe(journal_t *journal, int write) if (err) return err; - sb = journal->j_superblock; - if (!journal->j_tail) goto no_recovery; diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 54c9bc9e1b1..81051dafebf 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -283,12 +283,9 @@ int journal_recover(journal_t *journal) int journal_skip_recovery(journal_t *journal) { int err; - journal_superblock_t * sb; - struct recovery_info info; memset (&info, 0, sizeof(info)); - sb = journal->j_superblock; err = do_one_pass(journal, &info, PASS_SCAN); @@ -297,7 +294,8 @@ int journal_skip_recovery(journal_t *journal) ++journal->j_transaction_sequence; } else { #ifdef CONFIG_JBD_DEBUG - int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); + int dropped = info.end_transaction - + be32_to_cpu(journal->j_superblock->s_sequence); #endif jbd_debug(1, "JBD: ignoring %d transaction%s from the journal.\n", @@ -321,11 +319,6 @@ static int do_one_pass(journal_t *journal, unsigned int sequence; int blocktype; - /* Precompute the maximum metadata descriptors in a descriptor block */ - int MAX_BLOCKS_PER_DESC; - MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) - / sizeof(journal_block_tag_t)); - /* * First thing is to establish what we expect to find in the log * (in terms of transaction IDs), and where (in terms of log diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e7bf0fd9cec..ad5866aaf0f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -296,7 +296,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); - struct jbd2_buffer_trigger_type *triggers; journal_t *journal = transaction->t_journal; /* @@ -337,21 +336,21 @@ repeat: done_copy_out = 1; new_page = virt_to_page(jh_in->b_frozen_data); new_offset = offset_in_page(jh_in->b_frozen_data); - triggers = jh_in->b_frozen_triggers; } else { new_page = jh2bh(jh_in)->b_page; new_offset = offset_in_page(jh2bh(jh_in)->b_data); - triggers = jh_in->b_triggers; } mapped_data = kmap_atomic(new_page, KM_USER0); /* - * Fire any commit trigger. Do this before checking for escaping, - * as the trigger may modify the magic offset. If a copy-out - * happens afterwards, it will have the correct data in the buffer. + * Fire data frozen trigger if data already wasn't frozen. Do this + * before checking for escaping, as the trigger may modify the magic + * offset. If a copy-out happens afterwards, it will have the correct + * data in the buffer. */ - jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, - triggers); + if (!done_copy_out) + jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset, + jh_in->b_triggers); /* * Check for escaping diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 0752bcda535..d95cc9d0401 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -767,6 +767,9 @@ done: page = jh2bh(jh)->b_page; offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; source = kmap_atomic(page, KM_USER0); + /* Fire data frozen trigger just before we copy the data */ + jbd2_buffer_frozen_trigger(jh, source + offset, + jh->b_triggers); memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); kunmap_atomic(source, KM_USER0); @@ -1005,15 +1008,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh, jh->b_triggers = type; } -void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data, +void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, struct jbd2_buffer_trigger_type *triggers) { struct buffer_head *bh = jh2bh(jh); - if (!triggers || !triggers->t_commit) + if (!triggers || !triggers->t_frozen) return; - triggers->t_commit(triggers, bh, mapped_data, bh->b_size); + triggers->t_frozen(triggers, bh, mapped_data, bh->b_size); } void jbd2_buffer_abort_trigger(struct journal_head *jh, diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index a2d58c96f1b..d258e261bdc 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -626,7 +626,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) { - /* success of check_xattr_ref_inode() means taht inode (ic) dose not have + /* success of check_xattr_ref_inode() means that inode (ic) dose not have * duplicate name/value pairs. If duplicate name/value pair would be found, * one will be removed. */ diff --git a/fs/mbcache.c b/fs/mbcache.c index ec88ff3d04a..e28f21b9534 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache) * What the mbcache registers as to get shrunk dynamically. */ -static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); +static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); static struct shrinker mb_cache_shrinker = { .shrink = mb_cache_shrink_fn, @@ -191,13 +191,14 @@ forget: * This function is called by the kernel memory management when memory * gets low. * + * @shrink: (ignored) * @nr_to_scan: Number of objects to scan * @gfp_mask: (ignored) * * Returns the number of objects which are present in the cache. */ static int -mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask) +mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(free_list); struct list_head *l, *ltmp; diff --git a/fs/namei.c b/fs/namei.c index 868d0cb9d47..42d2d28fb82 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -282,8 +282,7 @@ int inode_permission(struct inode *inode, int mask) if (retval) return retval; - return security_inode_permission(inode, - mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND)); + return security_inode_permission(inode, mask); } /** @@ -1484,8 +1483,7 @@ static int handle_truncate(struct path *path) */ error = locks_verify_locked(inode); if (!error) - error = security_path_truncate(path, 0, - ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + error = security_path_truncate(path); if (!error) { error = do_truncate(path->dentry, 0, ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index fa338515402..1e634deff94 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -728,8 +728,8 @@ out_fput: out_bdi: /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: * - * The previously used put_filp(ncp_filp); was bogous, since - * it doesn't proper unlocking. + * The previously used put_filp(ncp_filp); was bogus, since + * it doesn't perform proper unlocking. */ fput(ncp_filp); out: diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7ec9b34a59f..d25b5257b7a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) #endif /* CONFIG_NFS_V4_1 */ } +static int nfs4_server_common_setup(struct nfs_server *server, + struct nfs_fh *mntfh) +{ + struct nfs_fattr *fattr; + int error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* We must ensure the session is initialised first */ + error = nfs4_init_session(server); + if (error < 0) + goto out; + + /* Probe the root fh to retrieve its FSID and filehandle */ + error = nfs4_get_rootfh(server, mntfh); + if (error < 0) + goto out; + + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + nfs4_session_set_rwsize(server); + + error = nfs_probe_fsinfo(server, mntfh, fattr); + if (error < 0) + goto out; + + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; +out: + nfs_free_fattr(fattr); + return error; +} + /* * Create a version 4 volume record */ @@ -1346,7 +1395,6 @@ error: struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, struct nfs_fh *mntfh) { - struct nfs_fattr *fattr; struct nfs_server *server; int error; @@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, if (!server) return ERR_PTR(-ENOMEM); - error = -ENOMEM; - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - goto error; - /* set up the general RPC client */ error = nfs4_init_server(server, data); if (error < 0) goto error; - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - - error = nfs4_init_session(server); - if (error < 0) - goto error; - - /* Probe the root fh to retrieve its FSID */ - error = nfs4_get_rootfh(server, mntfh); + error = nfs4_server_common_setup(server, mntfh); if (error < 0) goto error; - dprintk("Server FSID: %llx:%llx\n", - (unsigned long long) server->fsid.major, - (unsigned long long) server->fsid.minor); - dprintk("Mount FH: %d\n", mntfh->size); - - nfs4_session_set_rwsize(server); - - error = nfs_probe_fsinfo(server, mntfh, fattr); - if (error < 0) - goto error; - - if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) - server->namelen = NFS4_MAXNAMLEN; - - spin_lock(&nfs_client_lock); - list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); - spin_unlock(&nfs_client_lock); - - server->mount_time = jiffies; dprintk("<-- nfs4_create_server() = %p\n", server); - nfs_free_fattr(fattr); return server; error: - nfs_free_fattr(fattr); nfs_free_server(server); dprintk("<-- nfs4_create_server() = error %d\n", error); return ERR_PTR(error); @@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, { struct nfs_client *parent_client; struct nfs_server *server, *parent_server; - struct nfs_fattr *fattr; int error; dprintk("--> nfs4_create_referral_server()\n"); @@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (!server) return ERR_PTR(-ENOMEM); - error = -ENOMEM; - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - goto error; - parent_server = NFS_SB(data->sb); parent_client = parent_server->nfs_client; @@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - - /* Probe the root fh to retrieve its FSID and filehandle */ - error = nfs4_get_rootfh(server, mntfh); - if (error < 0) - goto error; - - /* probe the filesystem info for this server filesystem */ - error = nfs_probe_fsinfo(server, mntfh, fattr); + error = nfs4_server_common_setup(server, mntfh); if (error < 0) goto error; - if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) - server->namelen = NFS4_MAXNAMLEN; - - dprintk("Referral FSID: %llx:%llx\n", - (unsigned long long) server->fsid.major, - (unsigned long long) server->fsid.minor); - - spin_lock(&nfs_client_lock); - list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); - spin_unlock(&nfs_client_lock); - - server->mount_time = jiffies; - - nfs_free_fattr(fattr); dprintk("<-- nfs_create_referral_server() = %p\n", server); return server; error: - nfs_free_fattr(fattr); nfs_free_server(server); dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 782b431ef91..832e9e23932 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head) } } -int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(head); struct nfs_inode *nfsi; @@ -1953,7 +1953,7 @@ int nfs_permission(struct inode *inode, int mask) if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) goto out; /* Is this sys_access() ? */ - if (mask & MAY_ACCESS) + if (mask & (MAY_ACCESS | MAY_CHDIR)) goto force_lookup; switch (inode->i_mode & S_IFMT) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 36a5e74f51b..f036153d9f5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -27,6 +27,7 @@ #include <linux/pagemap.h> #include <linux/aio.h> #include <linux/gfp.h> +#include <linux/swap.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -493,11 +494,19 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) */ static int nfs_release_page(struct page *page, gfp_t gfp) { + struct address_space *mapping = page->mapping; + dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if ((gfp & GFP_KERNEL) == GFP_KERNEL) - nfs_wb_page(page->mapping->host, page); + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + int how = FLUSH_SYNC; + + /* Don't let kswapd deadlock waiting for OOM RPC calls */ + if (current_is_kswapd()) + how = 0; + nfs_commit_inode(mapping->host, how); + } /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 7428f7d6273..a70e446e160 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) goto out; } - if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE) + if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) || !S_ISDIR(fsinfo.fattr->mode)) { printk(KERN_ERR "nfs4_get_rootfh:" " getroot encountered non-directory\n"); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d8bd619e386..e70f44b9b3f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[]; void nfs_close_context(struct nfs_open_context *ctx, int is_sync); /* dir.c */ -extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); +extern int nfs_access_cache_shrinker(struct shrinker *shrink, + int nr_to_scan, gfp_t gfp_mask); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6bdef28efa3..65c8dae4b26 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); - *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); + *p++ = cpu_to_be32(iap->ia_atime.tv_sec); + *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); } else if (iap->ia_valid & ATTR_ATIME) { bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 6bd19d843af..df101d9f546 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -105,7 +105,7 @@ static char nfs_root_name[256] __initdata = ""; static __be32 servaddr __initdata = 0; /* Name of directory to mount */ -static char nfs_export_path[NFS_MAXPATHLEN] __initdata = { 0, }; +static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, }; /* NFS-related data */ static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 04214fc5c30..f9df16de4a5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, nfs_show_mountd_netid(m, nfss, showdefaults); } +#ifdef CONFIG_NFS_V4 +static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ + struct nfs_client *clp = nfss->nfs_client; + + seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); + seq_printf(m, ",minorversion=%u", clp->cl_minorversion); +} +#else +static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ +} +#endif + /* * Describe the mount options in force on this server representation */ @@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (version != 4) nfs_show_mountd_options(m, nfss, showdefaults); + else + nfs_show_nfsv4_options(m, nfss, showdefaults); -#ifdef CONFIG_NFS_V4 - if (clp->rpc_ops->version == 4) - seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); -#endif if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 91679e2631e..9f81bdd91c5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -222,7 +222,7 @@ static void nfs_end_page_writeback(struct page *page) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } -static struct nfs_page *nfs_find_and_lock_request(struct page *page) +static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) { struct inode *inode = page->mapping->host; struct nfs_page *req; @@ -241,7 +241,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page) * request as dirty (in which case we don't care). */ spin_unlock(&inode->i_lock); - ret = nfs_wait_on_request(req); + if (!nonblock) + ret = nfs_wait_on_request(req); + else + ret = -EAGAIN; nfs_release_request(req); if (ret != 0) return ERR_PTR(ret); @@ -256,12 +259,12 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page) * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page) + struct page *page, bool nonblock) { struct nfs_page *req; int ret = 0; - req = nfs_find_and_lock_request(page); + req = nfs_find_and_lock_request(page, nonblock); if (!req) goto out; ret = PTR_ERR(req); @@ -283,12 +286,20 @@ out: static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { struct inode *inode = page->mapping->host; + int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); nfs_pageio_cond_complete(pgio, page->index); - return nfs_page_async_flush(pgio, page); + ret = nfs_page_async_flush(pgio, page, + wbc->sync_mode == WB_SYNC_NONE || + wbc->nonblocking != 0); + if (ret == -EAGAIN) { + redirty_page_for_writepage(wbc, page); + ret = 0; + } + return ret; } /* @@ -1379,7 +1390,7 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; -static int nfs_commit_inode(struct inode *inode, int how) +int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); int may_wait = how & FLUSH_SYNC; @@ -1443,11 +1454,6 @@ out_mark_dirty: return ret; } #else -static int nfs_commit_inode(struct inode *inode, int how) -{ - return 0; -} - static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { return 0; @@ -1546,7 +1552,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, nfs_fscache_release_page(page, GFP_KERNEL); - req = nfs_find_and_lock_request(page); + req = nfs_find_and_lock_request(page, false); ret = PTR_ERR(req); if (IS_ERR(req)) goto out; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1d2b1f156bc..96337a4fbbd 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock, dump_stack(); goto bail; } - - past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); - mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, - (unsigned long long)past_eof); - - if (create && (iblock >= past_eof)) - set_buffer_new(bh_result); } + past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); + mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, + (unsigned long long)past_eof); + if (create && (iblock >= past_eof)) + set_buffer_new(bh_result); + bail: if (err < 0) err = -EIO; @@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle, return ret; } -handle_t *ocfs2_start_walk_page_trans(struct inode *inode, - struct page *page, - unsigned from, - unsigned to) -{ - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - handle_t *handle; - int ret = 0; - - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = -ENOMEM; - mlog_errno(ret); - goto out; - } - - if (ocfs2_should_order_data(inode)) { - ret = ocfs2_jbd2_file_inode(handle, inode); - if (ret < 0) - mlog_errno(ret); - } -out: - if (ret) { - if (!IS_ERR(handle)) - ocfs2_commit_trans(osb, handle); - handle = ERR_PTR(ret); - } - return handle; -} - static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) { sector_t status; @@ -1136,23 +1105,37 @@ out: */ static int ocfs2_grab_pages_for_write(struct address_space *mapping, struct ocfs2_write_ctxt *wc, - u32 cpos, loff_t user_pos, int new, + u32 cpos, loff_t user_pos, + unsigned user_len, int new, struct page *mmap_page) { int ret = 0, i; - unsigned long start, target_index, index; + unsigned long start, target_index, end_index, index; struct inode *inode = mapping->host; + loff_t last_byte; target_index = user_pos >> PAGE_CACHE_SHIFT; /* * Figure out how many pages we'll be manipulating here. For * non allocating write, we just change the one - * page. Otherwise, we'll need a whole clusters worth. + * page. Otherwise, we'll need a whole clusters worth. If we're + * writing past i_size, we only need enough pages to cover the + * last page of the write. */ if (new) { wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); + /* + * We need the index *past* the last page we could possibly + * touch. This is the page past the end of the write or + * i_size, whichever is greater. + */ + last_byte = max(user_pos + user_len, i_size_read(inode)); + BUG_ON(last_byte < 1); + end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1; + if ((start + wc->w_num_pages) > end_index) + wc->w_num_pages = end_index - start; } else { wc->w_num_pages = 1; start = target_index; @@ -1625,21 +1608,20 @@ out: * write path can treat it as an non-allocating write, which has no * special case code for sparse/nonsparse files. */ -static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, - unsigned len, +static int ocfs2_expand_nonsparse_inode(struct inode *inode, + struct buffer_head *di_bh, + loff_t pos, unsigned len, struct ocfs2_write_ctxt *wc) { int ret; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); loff_t newsize = pos + len; - if (ocfs2_sparse_alloc(osb)) - return 0; + BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))); if (newsize <= i_size_read(inode)) return 0; - ret = ocfs2_extend_no_holes(inode, newsize, pos); + ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos); if (ret) mlog_errno(ret); @@ -1649,6 +1631,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, return ret; } +static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, + loff_t pos) +{ + int ret = 0; + + BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))); + if (pos > i_size_read(inode)) + ret = ocfs2_zero_extend(inode, di_bh, pos); + + return ret; +} + int ocfs2_write_begin_nolock(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, @@ -1684,7 +1678,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, } } - ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc); + if (ocfs2_sparse_alloc(osb)) + ret = ocfs2_zero_tail(inode, di_bh, pos); + else + ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len, + wc); if (ret) { mlog_errno(ret); goto out; @@ -1794,7 +1792,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * that we can zero and flush if we error after adding the * extent. */ - ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, + ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, cluster_of_pages, mmap_page); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 0cd24cf5439..5efdd37dfe4 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -419,7 +419,7 @@ static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence) static int debug_buffer_release(struct inode *inode, struct file *file) { - struct debug_buffer *db = (struct debug_buffer *)file->private_data; + struct debug_buffer *db = file->private_data; if (db) kfree(db->buf); @@ -715,7 +715,7 @@ static int debug_lockres_open(struct inode *inode, struct file *file) goto bail; } - seq = (struct seq_file *) file->private_data; + seq = file->private_data; seq->private = dl; dlm_grab(dlm); @@ -731,7 +731,7 @@ bail: static int debug_lockres_release(struct inode *inode, struct file *file) { - struct seq_file *seq = (struct seq_file *)file->private_data; + struct seq_file *seq = file->private_data; struct debug_lockres *dl = (struct debug_lockres *)seq->private; if (dl->dl_res) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6b5a492e174..153abb5abef 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain, struct dlm_ctxt *dlm = NULL; struct dlm_ctxt *new_ctxt = NULL; - if (strlen(domain) > O2NM_MAX_NAME_LEN) { + if (strlen(domain) >= O2NM_MAX_NAME_LEN) { ret = -ENAMETOOLONG; mlog(ML_ERROR, "domain name length too long\n"); goto leave; @@ -1709,6 +1709,7 @@ retry: } if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { + spin_unlock(&dlm_domain_lock); mlog(ML_ERROR, "Requested locking protocol version is not " "compatible with already registered domain " diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 4a7506a4e31..94b97fc6a88 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2808,14 +2808,8 @@ again: mlog(0, "trying again...\n"); goto again; } - /* now that we are sure the MIGRATING state is there, drop - * the unneded state which blocked threads trying to DIRTY */ - spin_lock(&res->spinlock); - BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); - BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); - res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; - spin_unlock(&res->spinlock); + ret = 0; /* did the target go down or die? */ spin_lock(&dlm->spinlock); if (!test_bit(target, dlm->domain_map)) { @@ -2826,9 +2820,21 @@ again: spin_unlock(&dlm->spinlock); /* + * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for + * another try; otherwise, we are sure the MIGRATING state is there, + * drop the unneded state which blocked threads trying to DIRTY + */ + spin_lock(&res->spinlock); + BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); + res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; + if (!ret) + BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); + spin_unlock(&res->spinlock); + + /* * at this point: * - * o the DLM_LOCK_RES_MIGRATING flag is set + * o the DLM_LOCK_RES_MIGRATING flag is set if target not down * o there are no pending asts on this lockres * o all processes trying to reserve an ast on this * lockres must wait for the MIGRATING flag to clear diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f8b75ce4be7..9dfaac73b36 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { int bit; - bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0); + bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0); if (bit >= O2NM_MAX_NODES || bit < 0) dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); else diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b83d6107a1f..bef34d0528d 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -182,8 +182,7 @@ static int dlmfs_file_release(struct inode *inode, { int level, status; struct dlmfs_inode_private *ip = DLMFS_I(inode); - struct dlmfs_filp_private *fp = - (struct dlmfs_filp_private *) file->private_data; + struct dlmfs_filp_private *fp = file->private_data; if (S_ISDIR(inode->i_mode)) BUG(); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 39eb16ac5f9..5e02a893f46 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2966,7 +2966,7 @@ static const struct seq_operations ocfs2_dlm_seq_ops = { static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) { - struct seq_file *seq = (struct seq_file *) file->private_data; + struct seq_file *seq = file->private_data; struct ocfs2_dlm_seq_priv *priv = seq->private; struct ocfs2_lock_res *res = &priv->p_iter_res; @@ -3000,7 +3000,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) goto out; } - seq = (struct seq_file *) file->private_data; + seq = file->private_data; seq->private = priv; ocfs2_add_lockres_tracking(&priv->p_iter_res, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6a13ea64c44..2b10b36d157 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -724,28 +724,55 @@ leave: return status; } +/* + * While a write will already be ordering the data, a truncate will not. + * Thus, we need to explicitly order the zeroed pages. + */ +static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + handle_t *handle = NULL; + int ret = 0; + + if (!ocfs2_should_order_data(inode)) + goto out; + + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_jbd2_file_inode(handle, inode); + if (ret < 0) + mlog_errno(ret); + +out: + if (ret) { + if (!IS_ERR(handle)) + ocfs2_commit_trans(osb, handle); + handle = ERR_PTR(ret); + } + return handle; +} + /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to * worry about recursive locking in ->write_begin() and ->write_end(). */ -static int ocfs2_write_zero_page(struct inode *inode, - u64 size) +static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, + u64 abs_to) { struct address_space *mapping = inode->i_mapping; struct page *page; - unsigned long index; - unsigned int offset; + unsigned long index = abs_from >> PAGE_CACHE_SHIFT; handle_t *handle = NULL; - int ret; + int ret = 0; + unsigned zero_from, zero_to, block_start, block_end; - offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ - /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { - offset++; - } - index = size >> PAGE_CACHE_SHIFT; + BUG_ON(abs_from >= abs_to); + BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); + BUG_ON(abs_from & (inode->i_blkbits - 1)); page = grab_cache_page(mapping, index); if (!page) { @@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode, goto out; } - ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); - if (ret < 0) { - mlog_errno(ret); - goto out_unlock; - } + /* Get the offsets within the page that we want to zero */ + zero_from = abs_from & (PAGE_CACHE_SIZE - 1); + zero_to = abs_to & (PAGE_CACHE_SIZE - 1); + if (!zero_to) + zero_to = PAGE_CACHE_SIZE; - if (ocfs2_should_order_data(inode)) { - handle = ocfs2_start_walk_page_trans(inode, page, offset, - offset); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; + mlog(0, + "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n", + (unsigned long long)abs_from, (unsigned long long)abs_to, + index, zero_from, zero_to); + + /* We know that zero_from is block aligned */ + for (block_start = zero_from; block_start < zero_to; + block_start = block_end) { + block_end = block_start + (1 << inode->i_blkbits); + + /* + * block_start is block-aligned. Bump it by one to + * force ocfs2_{prepare,commit}_write() to zero the + * whole block. + */ + ret = ocfs2_prepare_write_nolock(inode, page, + block_start + 1, + block_start + 1); + if (ret < 0) { + mlog_errno(ret); goto out_unlock; } - } - /* must not update i_size! */ - ret = block_commit_write(page, offset, offset); - if (ret < 0) - mlog_errno(ret); - else - ret = 0; + if (!handle) { + handle = ocfs2_zero_start_ordered_transaction(inode); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + handle = NULL; + break; + } + } + + /* must not update i_size! */ + ret = block_commit_write(page, block_start + 1, + block_start + 1); + if (ret < 0) + mlog_errno(ret); + else + ret = 0; + } if (handle) ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); + out_unlock: unlock_page(page); page_cache_release(page); @@ -786,22 +838,114 @@ out: return ret; } -static int ocfs2_zero_extend(struct inode *inode, - u64 zero_to_size) +/* + * Find the next range to zero. We do this in terms of bytes because + * that's what ocfs2_zero_extend() wants, and it is dealing with the + * pagecache. We may return multiple extents. + * + * zero_start and zero_end are ocfs2_zero_extend()s current idea of what + * needs to be zeroed. range_start and range_end return the next zeroing + * range. A subsequent call should pass the previous range_end as its + * zero_start. If range_end is 0, there's nothing to do. + * + * Unwritten extents are skipped over. Refcounted extents are CoWd. + */ +static int ocfs2_zero_extend_get_range(struct inode *inode, + struct buffer_head *di_bh, + u64 zero_start, u64 zero_end, + u64 *range_start, u64 *range_end) { - int ret = 0; - u64 start_off; - struct super_block *sb = inode->i_sb; + int rc = 0, needs_cow = 0; + u32 p_cpos, zero_clusters = 0; + u32 zero_cpos = + zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; + u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end); + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; - start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); - while (start_off < zero_to_size) { - ret = ocfs2_write_zero_page(inode, start_off); - if (ret < 0) { - mlog_errno(ret); + while (zero_cpos < last_cpos) { + rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos, + &num_clusters, &ext_flags); + if (rc) { + mlog_errno(rc); + goto out; + } + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { + zero_clusters = num_clusters; + if (ext_flags & OCFS2_EXT_REFCOUNTED) + needs_cow = 1; + break; + } + + zero_cpos += num_clusters; + } + if (!zero_clusters) { + *range_end = 0; + goto out; + } + + while ((zero_cpos + zero_clusters) < last_cpos) { + rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters, + &p_cpos, &num_clusters, + &ext_flags); + if (rc) { + mlog_errno(rc); goto out; } - start_off += sb->s_blocksize; + if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)) + break; + if (ext_flags & OCFS2_EXT_REFCOUNTED) + needs_cow = 1; + zero_clusters += num_clusters; + } + if ((zero_cpos + zero_clusters) > last_cpos) + zero_clusters = last_cpos - zero_cpos; + + if (needs_cow) { + rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, + UINT_MAX); + if (rc) { + mlog_errno(rc); + goto out; + } + } + + *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos); + *range_end = ocfs2_clusters_to_bytes(inode->i_sb, + zero_cpos + zero_clusters); + +out: + return rc; +} + +/* + * Zero one range returned from ocfs2_zero_extend_get_range(). The caller + * has made sure that the entire range needs zeroing. + */ +static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, + u64 range_end) +{ + int rc = 0; + u64 next_pos; + u64 zero_pos = range_start; + + mlog(0, "range_start = %llu, range_end = %llu\n", + (unsigned long long)range_start, + (unsigned long long)range_end); + BUG_ON(range_start >= range_end); + + while (zero_pos < range_end) { + next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; + if (next_pos > range_end) + next_pos = range_end; + rc = ocfs2_write_zero_page(inode, zero_pos, next_pos); + if (rc < 0) { + mlog_errno(rc); + break; + } + zero_pos = next_pos; /* * Very large extends have the potential to lock up @@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode, cond_resched(); } -out: + return rc; +} + +int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, + loff_t zero_to_size) +{ + int ret = 0; + u64 zero_start, range_start = 0, range_end = 0; + struct super_block *sb = inode->i_sb; + + zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); + mlog(0, "zero_start %llu for i_size %llu\n", + (unsigned long long)zero_start, + (unsigned long long)i_size_read(inode)); + while (zero_start < zero_to_size) { + ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, + zero_to_size, + &range_start, + &range_end); + if (ret) { + mlog_errno(ret); + break; + } + if (!range_end) + break; + /* Trim the ends */ + if (range_start < zero_start) + range_start = zero_start; + if (range_end > zero_to_size) + range_end = zero_to_size; + + ret = ocfs2_zero_extend_range(inode, range_start, + range_end); + if (ret) { + mlog_errno(ret); + break; + } + zero_start = range_end; + } + return ret; } -int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) +int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, + u64 new_i_size, u64 zero_to) { int ret; u32 clusters_to_add; struct ocfs2_inode_info *oi = OCFS2_I(inode); + /* + * Only quota files call this without a bh, and they can't be + * refcounted. + */ + BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE)); + clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); if (clusters_to_add < oi->ip_clusters) clusters_to_add = 0; @@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) * still need to zero the area between the old i_size and the * new i_size. */ - ret = ocfs2_zero_extend(inode, zero_to); + ret = ocfs2_zero_extend(inode, di_bh, zero_to); if (ret < 0) mlog_errno(ret); @@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode, goto out; if (i_size_read(inode) == new_i_size) - goto out; + goto out; BUG_ON(new_i_size < i_size_read(inode)); /* - * Fall through for converting inline data, even if the fs - * supports sparse files. - * - * The check for inline data here is legal - nobody can add - * the feature since we have i_mutex. We must check it again - * after acquiring ip_alloc_sem though, as paths like mmap - * might have raced us to converting the inode to extents. - */ - if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) - && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) - goto out_update_size; - - /* * The alloc sem blocks people in read/write from reading our * allocation until we're done changing it. We depend on * i_mutex to block other extend/truncate calls while we're - * here. + * here. We even have to hold it for sparse files because there + * might be some tail zeroing. */ down_write(&oi->ip_alloc_sem); @@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode, ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); if (ret) { up_write(&oi->ip_alloc_sem); - mlog_errno(ret); goto out; } } - if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) - ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); + if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) + ret = ocfs2_zero_extend(inode, di_bh, new_i_size); + else + ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size, + new_i_size); up_write(&oi->ip_alloc_sem); diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index d66cf4f7c70..97bf761c9e7 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, int ocfs2_simple_size_update(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size); -int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, - u64 zero_to); +int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, + u64 new_i_size, u64 zero_to); +int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, + loff_t zero_to); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9c1b92ebeb9..9b57c0350ff 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger return container_of(triggers, struct ocfs2_triggers, ot_triggers); } -static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, * Quota blocks have their own trigger because the struct ocfs2_block_check * offset depends on the blocksize. */ -static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, * Directory blocks also have their own trigger because the * struct ocfs2_block_check offset depends on the blocksize. */ -static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, static struct ocfs2_triggers di_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dinode, i_check), @@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = { static struct ocfs2_triggers eb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_extent_block, h_check), @@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = { static struct ocfs2_triggers rb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), @@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = { static struct ocfs2_triggers gd_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), @@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = { static struct ocfs2_triggers db_triggers = { .ot_triggers = { - .t_commit = ocfs2_db_commit_trigger, + .t_frozen = ocfs2_db_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, }; static struct ocfs2_triggers xb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), @@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = { static struct ocfs2_triggers dq_triggers = { .ot_triggers = { - .t_commit = ocfs2_dq_commit_trigger, + .t_frozen = ocfs2_dq_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, }; static struct ocfs2_triggers dr_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), @@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = { static struct ocfs2_triggers dl_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), @@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work) mutex_lock(&os->os_lock); ocfs2_queue_orphan_scan(osb); if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) - schedule_delayed_work(&os->os_orphan_scan_work, + queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work, ocfs2_orphan_scan_timeout()); mutex_unlock(&os->os_lock); } @@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); else { atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); + queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); } } diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 3d7419682dc..ec6adbf8f55 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) { unsigned int la_mb; unsigned int gd_mb; + unsigned int la_max_mb; unsigned int megs_per_slot; struct super_block *sb = osb->sb; @@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) if (megs_per_slot < la_mb) la_mb = megs_per_slot; + /* We can't store more bits than we can in a block. */ + la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, + ocfs2_local_alloc_size(sb) * 8); + if (la_mb > la_max_mb) + la_mb = la_max_mb; + return la_mb; } diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 2bb35fe0051..4607923eb24 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) * locking allocators ranks above a transaction start */ WARN_ON(journal_current_handle()); - status = ocfs2_extend_no_holes(gqinode, + status = ocfs2_extend_no_holes(gqinode, NULL, gqinode->i_size + (need_alloc << sb->s_blocksize_bits), gqinode->i_size); if (status < 0) diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 8bd70d4d184..dc78764ccc4 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( u64 p_blkno; /* We are protected by dqio_sem so no locking needed */ - status = ocfs2_extend_no_holes(lqinode, + status = ocfs2_extend_no_holes(lqinode, NULL, lqinode->i_size + 2 * sb->s_blocksize, lqinode->i_size); if (status < 0) { @@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( return ocfs2_local_quota_add_chunk(sb, type, offset); /* We are protected by dqio_sem so no locking needed */ - status = ocfs2_extend_no_holes(lqinode, + status = ocfs2_extend_no_holes(lqinode, NULL, lqinode->i_size + sb->s_blocksize, lqinode->i_size); if (status < 0) { diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 4793f36f651..3ac5aa733e9 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); + /* + * We only duplicate pages until we reach the page contains i_size - 1. + * So trim 'end' to i_size. + */ + if (end > i_size_read(context->inode)) + end = i_size_read(context->inode); while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; @@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry, struct inode *inode = old_dentry->d_inode; struct buffer_head *new_bh = NULL; + if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) { + ret = -EINVAL; + mlog_errno(ret); + goto out; + } + ret = filemap_fdatawrite(inode->i_mapping); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index 40650021fc2..d8b6e4259b8 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -26,7 +26,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/bitops.h> #include <linux/list.h> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f4c2a9eb8c4..a8e6a95a353 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, le16_to_cpu(bg->bg_free_bits_count)); le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); - cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno); + cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno; if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) le16_add_cpu(&cl->cl_next_free_rec, 1); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0eaa929a4db..03a799fdd74 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2472,7 +2472,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) kfree(osb->slot_recovery_generations); /* FIXME * This belongs in journal shutdown, but because we have to - * allocate osb->journal at the start of ocfs2_initalize_osb(), + * allocate osb->journal at the start of ocfs2_initialize_osb(), * we free it here. */ kfree(osb->journal); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index e97b34842cf..d03469f6180 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, struct ocfs2_xattr_value_buf *vb, struct ocfs2_xattr_set_ctxt *ctxt) { - int status = 0; + int status = 0, credits; handle_t *handle = ctxt->handle; enum ocfs2_alloc_restarted why; u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); @@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); - status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto leave; - } + while (clusters_to_add) { + status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + break; + } - prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); - status = ocfs2_add_clusters_in_btree(handle, - &et, - &logical_start, - clusters_to_add, - 0, - ctxt->data_ac, - ctxt->meta_ac, - &why); - if (status < 0) { - mlog_errno(status); - goto leave; - } + prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); + status = ocfs2_add_clusters_in_btree(handle, + &et, + &logical_start, + clusters_to_add, + 0, + ctxt->data_ac, + ctxt->meta_ac, + &why); + if ((status < 0) && (status != -EAGAIN)) { + if (status != -ENOSPC) + mlog_errno(status); + break; + } - ocfs2_journal_dirty(handle, vb->vb_bh); + ocfs2_journal_dirty(handle, vb->vb_bh); - clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; + clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - + prev_clusters; - /* - * We should have already allocated enough space before the transaction, - * so no need to restart. - */ - BUG_ON(why != RESTART_NONE || clusters_to_add); - -leave: + if (why != RESTART_NONE && clusters_to_add) { + /* + * We can only fail in case the alloc file doesn't give + * up enough clusters. + */ + BUG_ON(why == RESTART_META); + + mlog(0, "restarting xattr value extension for %u" + " clusters,.\n", clusters_to_add); + credits = ocfs2_calc_extend_credits(inode->i_sb, + &vb->vb_xv->xr_list, + clusters_to_add); + status = ocfs2_extend_trans(handle, credits); + if (status < 0) { + status = -ENOMEM; + mlog_errno(status); + break; + } + } + } return status; } @@ -6788,16 +6804,15 @@ out: return ret; } -static int ocfs2_reflink_xattr_buckets(handle_t *handle, +static int ocfs2_reflink_xattr_bucket(handle_t *handle, u64 blkno, u64 new_blkno, u32 clusters, + u32 *cpos, int num_buckets, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac, struct ocfs2_reflink_xattr_tree_args *args) { int i, j, ret = 0; struct super_block *sb = args->reflink->old_inode->i_sb; - u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); - u32 num_buckets = clusters * bpc; int bpb = args->old_bucket->bu_blocks; struct ocfs2_xattr_value_buf vb = { .vb_access = ocfs2_journal_access, @@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, break; } - /* - * The real bucket num in this series of blocks is stored - * in the 1st bucket. - */ - if (i == 0) - num_buckets = le16_to_cpu( - bucket_xh(args->old_bucket)->xh_num_buckets); - ret = ocfs2_xattr_bucket_journal_access(handle, args->new_bucket, OCFS2_JOURNAL_ACCESS_CREATE); @@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, bucket_block(args->old_bucket, j), sb->s_blocksize); + /* + * Record the start cpos so that we can use it to initialize + * our xattr tree we also set the xh_num_bucket for the new + * bucket. + */ + if (i == 0) { + *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> + xh_entries[0].xe_name_hash); + bucket_xh(args->new_bucket)->xh_num_buckets = + cpu_to_le16(num_buckets); + } + ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); ret = ocfs2_reflink_xattr_header(handle, args->reflink, @@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, } ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); + ocfs2_xattr_bucket_relse(args->old_bucket); ocfs2_xattr_bucket_relse(args->new_bucket); } @@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, ocfs2_xattr_bucket_relse(args->new_bucket); return ret; } + +static int ocfs2_reflink_xattr_buckets(handle_t *handle, + struct inode *inode, + struct ocfs2_reflink_xattr_tree_args *args, + struct ocfs2_extent_tree *et, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac, + u64 blkno, u32 cpos, u32 len) +{ + int ret, first_inserted = 0; + u32 p_cluster, num_clusters, reflink_cpos = 0; + u64 new_blkno; + unsigned int num_buckets, reflink_buckets; + unsigned int bpc = + ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); + + ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); + if (ret) { + mlog_errno(ret); + goto out; + } + num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); + ocfs2_xattr_bucket_relse(args->old_bucket); + + while (len && num_buckets) { + ret = ocfs2_claim_clusters(handle, data_ac, + 1, &p_cluster, &num_clusters); + if (ret) { + mlog_errno(ret); + goto out; + } + + new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); + reflink_buckets = min(num_buckets, bpc * num_clusters); + + ret = ocfs2_reflink_xattr_bucket(handle, blkno, + new_blkno, num_clusters, + &reflink_cpos, reflink_buckets, + meta_ac, data_ac, args); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * For the 1st allocated cluster, we make it use the same cpos + * so that the xattr tree looks the same as the original one + * in the most case. + */ + if (!first_inserted) { + reflink_cpos = cpos; + first_inserted = 1; + } + ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, + num_clusters, 0, meta_ac); + if (ret) + mlog_errno(ret); + + mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", + (unsigned long long)new_blkno, num_clusters, reflink_cpos); + + len -= num_clusters; + blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); + num_buckets -= reflink_buckets; + } +out: + return ret; +} + /* * Create the same xattr extent record in the new inode's xattr tree. */ @@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, void *para) { int ret, credits = 0; - u32 p_cluster, num_clusters; - u64 new_blkno; handle_t *handle; struct ocfs2_reflink_xattr_tree_args *args = (struct ocfs2_reflink_xattr_tree_args *)para; @@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_extent_tree et; + mlog(0, "reflink xattr buckets %llu len %u\n", + (unsigned long long)blkno, len); + ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(args->reflink->new_inode), args->new_blk_bh); @@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, goto out; } - ret = ocfs2_claim_clusters(handle, data_ac, - len, &p_cluster, &num_clusters); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - - new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster); - - mlog(0, "reflink xattr buckets %llu to %llu, len %u\n", - (unsigned long long)blkno, (unsigned long long)new_blkno, len); - ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len, - meta_ac, data_ac, args); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - - mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", - (unsigned long long)new_blkno, len, cpos); - ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno, - len, 0, meta_ac); + ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, + meta_ac, data_ac, + blkno, cpos, len); if (ret) mlog_errno(ret); -out_commit: ocfs2_commit_trans(osb, handle); out: diff --git a/fs/open.c b/fs/open.c index 5463266db9e..0d1fa3dc0ef 100644 --- a/fs/open.c +++ b/fs/open.c @@ -110,7 +110,7 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) - error = security_path_truncate(&path, length, 0); + error = security_path_truncate(&path); if (!error) error = do_truncate(path.dentry, length, 0, NULL); @@ -165,8 +165,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = locks_verify_truncate(inode, file, length); if (!error) - error = security_path_truncate(&file->f_path, length, - ATTR_MTIME|ATTR_CTIME); + error = security_path_truncate(&file->f_path); if (!error) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); out_putf: @@ -367,7 +366,7 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) if (error) goto out; - error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -396,7 +395,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) if (!S_ISDIR(inode->i_mode)) goto out_putf; - error = inode_permission(inode, MAY_EXEC | MAY_ACCESS); + error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &file->f_path); out_putf: @@ -414,7 +413,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) if (error) goto out; - error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 5dcd4b0c553..72c52656dc2 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -459,7 +459,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, } /* everything is up and running, commence */ - INIT_RCU_HEAD(&p->rcu_head); rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk supresses it */ diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 3e73de5967f..fc8497643fd 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state) } *label; unsigned char *data; Sector sect; + sector_t labelsect; res = 0; blocksize = bdev_logical_block_size(bdev); @@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state) goto out_freeall; /* + * Special case for FBA disks: label sector does not depend on + * blocksize. + */ + if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) || + (info->cu_type == 0x3880 && info->dev_type == 0x3370)) + labelsect = info->label_block; + else + labelsect = info->label_block * (blocksize >> 9); + + /* * Get volume label, extract name and type. */ - data = read_part_sector(state, info->label_block*(blocksize/512), - §); + data = read_part_sector(state, labelsect, §); if (data == NULL) goto out_readerr; diff --git a/fs/proc/array.c b/fs/proc/array.c index 9b58d38bc91..fff6572676a 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -176,7 +176,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, if (tracer) tpid = task_pid_nr_ns(tracer, ns); } - cred = get_cred((struct cred *) __task_cred(p)); + cred = get_task_cred(p); seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index ce94801f48c..d9396a4fc7f 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np, for (pp = np->properties; pp != NULL; pp = pp->next) { p = pp->name; + if (strchr(p, '/')) + continue; + if (duplicate_name(de, p)) p = fixup_name(np, de, p); diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 46d4b5d72bd..cb6306e6384 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } +static void pad_len_spaces(struct seq_file *m, int len) +{ + len = 25 + sizeof(void*) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); +} + /* * display a single VMA to a sequenced file */ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) { + struct mm_struct *mm = vma->vm_mm; unsigned long ino = 0; struct file *file; dev_t dev = 0; @@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) MAJOR(dev), MINOR(dev), ino, &len); if (file) { - len = 25 + sizeof(void *) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); + pad_len_spaces(m, len); seq_path(m, &file->f_path, ""); + } else if (mm) { + if (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack) { + pad_len_spaces(m, len); + seq_puts(m, "[stack]"); + } } seq_putc(m, '\n'); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 12c233da1b6..ef72b169942 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -132,6 +132,22 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); +void __quota_error(struct super_block *sb, const char *func, + const char *fmt, ...) +{ + va_list args; + + if (printk_ratelimit()) { + va_start(args, fmt); + printk(KERN_ERR "Quota error (device %s): %s: ", + sb->s_id, func); + vprintk(fmt, args); + printk("\n"); + va_end(args); + } +} +EXPORT_SYMBOL(__quota_error); + #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) static char *quotatypes[] = INITQFNAMES; #endif @@ -676,7 +692,7 @@ static void prune_dqcache(int count) * This is called from kswapd when we think we need some * more memory */ -static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) +static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { if (nr) { spin_lock(&dq_list_lock); @@ -705,11 +721,8 @@ void dqput(struct dquot *dquot) return; #ifdef CONFIG_QUOTA_DEBUG if (!atomic_read(&dquot->dq_count)) { - printk("VFS: dqput: trying to free free dquot\n"); - printk("VFS: device %s, dquot of %s %d\n", - dquot->dq_sb->s_id, - quotatypes[dquot->dq_type], - dquot->dq_id); + quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", + quotatypes[dquot->dq_type], dquot->dq_id); BUG(); } #endif @@ -732,9 +745,9 @@ we_slept: /* Commit dquot before releasing */ ret = dquot->dq_sb->dq_op->write_dquot(dquot); if (ret < 0) { - printk(KERN_ERR "VFS: cannot write quota structure on " - "device %s (error %d). Quota may get out of " - "sync!\n", dquot->dq_sb->s_id, ret); + quota_error(dquot->dq_sb, "Can't write quota structure" + " (error %d). Quota may get out of sync!", + ret); /* * We clear dirty bit anyway, so that we avoid * infinite loop here @@ -914,9 +927,9 @@ static void add_dquot_ref(struct super_block *sb, int type) #ifdef CONFIG_QUOTA_DEBUG if (reserved) { - printk(KERN_WARNING "VFS (%s): Writes happened before quota" - " was turned on thus quota information is probably " - "inconsistent. Please run quotacheck(8).\n", sb->s_id); + quota_error(sb, "Writes happened before quota was turned on " + "thus quota information is probably inconsistent. " + "Please run quotacheck(8)"); } #endif } @@ -947,7 +960,9 @@ static int remove_inode_dquot_ref(struct inode *inode, int type, if (dqput_blocks(dquot)) { #ifdef CONFIG_QUOTA_DEBUG if (atomic_read(&dquot->dq_count) != 1) - printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); + quota_error(inode->i_sb, "Adding dquot with " + "dq_count %d to dispose list", + atomic_read(&dquot->dq_count)); #endif spin_lock(&dq_list_lock); /* As dquot must have currently users it can't be on @@ -986,6 +1001,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head) { struct inode *inode; + int reserved = 0; spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { @@ -995,10 +1011,20 @@ static void remove_dquot_ref(struct super_block *sb, int type, * only quota pointers and these have separate locking * (dqptr_sem). */ - if (!IS_NOQUOTA(inode)) + if (!IS_NOQUOTA(inode)) { + if (unlikely(inode_get_rsv_space(inode) > 0)) + reserved = 1; remove_inode_dquot_ref(inode, type, tofree_head); + } } spin_unlock(&inode_lock); +#ifdef CONFIG_QUOTA_DEBUG + if (reserved) { + printk(KERN_WARNING "VFS (%s): Writes happened after quota" + " was disabled thus quota information is probably " + "inconsistent. Please run quotacheck(8).\n", sb->s_id); + } +#endif } /* Gather all references from inodes and drop them */ @@ -1304,6 +1330,15 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space) return QUOTA_NL_NOWARN; } +static int dquot_active(const struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + + if (IS_NOQUOTA(inode)) + return 0; + return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb); +} + /* * Initialize quota pointers in inode * @@ -1323,7 +1358,7 @@ static void __dquot_initialize(struct inode *inode, int type) /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) + if (!dquot_active(inode)) return; /* First get references to structures we might need. */ @@ -1507,7 +1542,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) * First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { + if (!dquot_active(inode)) { inode_incr_space(inode, number, reserve); goto out; } @@ -1559,7 +1594,7 @@ int dquot_alloc_inode(const struct inode *inode) /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) + if (!dquot_active(inode)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; @@ -1596,7 +1631,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { int cnt; - if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { + if (!dquot_active(inode)) { inode_claim_rsv_space(inode, number); return 0; } @@ -1629,7 +1664,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { + if (!dquot_active(inode)) { inode_decr_space(inode, number, reserve); return; } @@ -1667,7 +1702,7 @@ void dquot_free_inode(const struct inode *inode) /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ - if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) + if (!dquot_active(inode)) return; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); @@ -1790,7 +1825,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) struct super_block *sb = inode->i_sb; int ret; - if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode)) + if (!dquot_active(inode)) return 0; if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) @@ -1957,7 +1992,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) truncate_inode_pages(&toputinode[cnt]->i_data, 0); mutex_unlock(&toputinode[cnt]->i_mutex); - mark_inode_dirty(toputinode[cnt]); + mark_inode_dirty_sync(toputinode[cnt]); } mutex_unlock(&dqopt->dqonoff_mutex); } @@ -2270,7 +2305,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) memset(di, 0, sizeof(*di)); di->d_version = FS_DQUOT_VERSION; di->d_flags = dquot->dq_type == USRQUOTA ? - XFS_USER_QUOTA : XFS_GROUP_QUOTA; + FS_USER_QUOTA : FS_GROUP_QUOTA; di->d_id = dquot->dq_id; spin_lock(&dq_data_lock); diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index 24f03407eeb..9e48874eabc 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -65,8 +65,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) ret = sb->s_op->quota_write(sb, info->dqi_type, buf, info->dqi_usable_bs, blk << info->dqi_blocksize_bits); if (ret != info->dqi_usable_bs) { - q_warn(KERN_WARNING "VFS: dquota write failed on " - "dev %s\n", sb->s_id); + quota_error(sb, "dquota write failed"); if (ret >= 0) ret = -EIO; } @@ -160,9 +159,8 @@ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf, dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); /* No matter whether write succeeds block is out of list */ if (write_blk(info, blk, buf) < 0) - q_warn(KERN_ERR - "VFS: Can't write block (%u) with free entries.\n", - blk); + quota_error(info->dqi_sb, "Can't write block (%u) " + "with free entries", blk); return 0; out_buf: kfree(tmpbuf); @@ -252,9 +250,8 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { *err = remove_free_dqentry(info, buf, blk); if (*err < 0) { - q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't " - "remove block (%u) from entry free list.\n", - blk); + quota_error(dquot->dq_sb, "Can't remove block (%u) " + "from entry free list", blk); goto out_buf; } } @@ -268,16 +265,15 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, } #ifdef __QUOTA_QT_PARANOIA if (i == qtree_dqstr_in_blk(info)) { - printk(KERN_ERR "VFS: find_free_dqentry(): Data block full " - "but it shouldn't.\n"); + quota_error(dquot->dq_sb, "Data block full but it shouldn't"); *err = -EIO; goto out_buf; } #endif *err = write_blk(info, blk, buf); if (*err < 0) { - q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " - "data block %u.\n", blk); + quota_error(dquot->dq_sb, "Can't write quota data block %u", + blk); goto out_buf; } dquot->dq_off = (blk << info->dqi_blocksize_bits) + @@ -311,8 +307,8 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, } else { ret = read_blk(info, *treeblk, buf); if (ret < 0) { - q_warn(KERN_ERR "VFS: Can't read tree quota block " - "%u.\n", *treeblk); + quota_error(dquot->dq_sb, "Can't read tree quota " + "block %u", *treeblk); goto out_buf; } } @@ -323,9 +319,9 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, if (depth == info->dqi_qtree_depth - 1) { #ifdef __QUOTA_QT_PARANOIA if (newblk) { - printk(KERN_ERR "VFS: Inserting already present quota " - "entry (block %u).\n", - le32_to_cpu(ref[get_index(info, + quota_error(dquot->dq_sb, "Inserting already present " + "quota entry (block %u)", + le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)])); ret = -EIO; goto out_buf; @@ -373,8 +369,8 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) if (!dquot->dq_off) { ret = dq_insert_tree(info, dquot); if (ret < 0) { - q_warn(KERN_ERR "VFS: Error %zd occurred while " - "creating quota.\n", ret); + quota_error(sb, "Error %zd occurred while creating " + "quota", ret); kfree(ddquot); return ret; } @@ -385,8 +381,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, dquot->dq_off); if (ret != info->dqi_entry_size) { - q_warn(KERN_WARNING "VFS: dquota write failed on dev %s\n", - sb->s_id); + quota_error(sb, "dquota write failed"); if (ret >= 0) ret = -ENOSPC; } else { @@ -410,14 +405,15 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, if (!buf) return -ENOMEM; if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { - q_warn(KERN_ERR "VFS: Quota structure has offset to other " - "block (%u) than it should (%u).\n", blk, - (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); + quota_error(dquot->dq_sb, "Quota structure has offset to " + "other block (%u) than it should (%u)", blk, + (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); goto out_buf; } ret = read_blk(info, blk, buf); if (ret < 0) { - q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", blk); + quota_error(dquot->dq_sb, "Can't read quota data block %u", + blk); goto out_buf; } dh = (struct qt_disk_dqdbheader *)buf; @@ -427,8 +423,8 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, if (ret >= 0) ret = put_free_dqblk(info, buf, blk); if (ret < 0) { - q_warn(KERN_ERR "VFS: Can't move quota data block (%u) " - "to free list.\n", blk); + quota_error(dquot->dq_sb, "Can't move quota data block " + "(%u) to free list", blk); goto out_buf; } } else { @@ -440,15 +436,15 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, /* Insert will write block itself */ ret = insert_free_dqentry(info, buf, blk); if (ret < 0) { - q_warn(KERN_ERR "VFS: Can't insert quota data " - "block (%u) to free entry list.\n", blk); + quota_error(dquot->dq_sb, "Can't insert quota " + "data block (%u) to free entry list", blk); goto out_buf; } } else { ret = write_blk(info, blk, buf); if (ret < 0) { - q_warn(KERN_ERR "VFS: Can't write quota data " - "block %u\n", blk); + quota_error(dquot->dq_sb, "Can't write quota " + "data block %u", blk); goto out_buf; } } @@ -472,7 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, return -ENOMEM; ret = read_blk(info, *blk, buf); if (ret < 0) { - q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); + quota_error(dquot->dq_sb, "Can't read quota data " + "block %u", blk); goto out_buf; } newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); @@ -496,8 +493,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, } else { ret = write_blk(info, *blk, buf); if (ret < 0) - q_warn(KERN_ERR "VFS: Can't write quota tree " - "block %u.\n", *blk); + quota_error(dquot->dq_sb, "Can't write quota " + "tree block %u", blk); } } out_buf: @@ -529,7 +526,8 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, return -ENOMEM; ret = read_blk(info, blk, buf); if (ret < 0) { - q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + quota_error(dquot->dq_sb, "Can't read quota tree " + "block %u", blk); goto out_buf; } ddquot = buf + sizeof(struct qt_disk_dqdbheader); @@ -539,8 +537,8 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, ddquot += info->dqi_entry_size; } if (i == qtree_dqstr_in_blk(info)) { - q_warn(KERN_ERR "VFS: Quota for id %u referenced " - "but not present.\n", dquot->dq_id); + quota_error(dquot->dq_sb, "Quota for id %u referenced " + "but not present", dquot->dq_id); ret = -EIO; goto out_buf; } else { @@ -564,7 +562,8 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, return -ENOMEM; ret = read_blk(info, blk, buf); if (ret < 0) { - q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + quota_error(dquot->dq_sb, "Can't read quota tree block %u", + blk); goto out_buf; } ret = 0; @@ -598,7 +597,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) #ifdef __QUOTA_QT_PARANOIA /* Invalidated quota? */ if (!sb_dqopt(dquot->dq_sb)->files[type]) { - printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); + quota_error(sb, "Quota invalidated while reading!"); return -EIO; } #endif @@ -607,8 +606,8 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) offset = find_dqentry(info, dquot); if (offset <= 0) { /* Entry not present? */ if (offset < 0) - q_warn(KERN_ERR "VFS: Can't read quota " - "structure for id %u.\n", dquot->dq_id); + quota_error(sb, "Can't read quota structure " + "for id %u", dquot->dq_id); dquot->dq_off = 0; set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); @@ -625,8 +624,8 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) if (ret != info->dqi_entry_size) { if (ret >= 0) ret = -EIO; - q_warn(KERN_ERR "VFS: Error while reading quota " - "structure for id %u.\n", dquot->dq_id); + quota_error(sb, "Error while reading quota structure for id %u", + dquot->dq_id); set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); kfree(ddquot); diff --git a/fs/quota/quota_tree.h b/fs/quota/quota_tree.h index ccc3e71fb1d..a1ab8db81a5 100644 --- a/fs/quota/quota_tree.h +++ b/fs/quota/quota_tree.h @@ -22,10 +22,4 @@ struct qt_disk_dqdbheader { #define QT_TREEOFF 1 /* Offset of tree in file in blocks */ -#define q_warn(fmt, args...) \ -do { \ - if (printk_ratelimit()) \ - printk(fmt, ## args); \ -} while(0) - #endif /* _LINUX_QUOTAIO_TREE_H */ diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 4af344c5852..34b37a67bb1 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c @@ -95,8 +95,7 @@ static int v1_commit_dqblk(struct dquot *dquot) (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); if (ret != sizeof(struct v1_disk_dqblk)) { - printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", - dquot->dq_sb->s_id); + quota_error(dquot->dq_sb, "dquota write failed"); if (ret >= 0) ret = -EIO; goto out; diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index 135206af145..65444d29406 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -63,9 +63,8 @@ static int v2_read_header(struct super_block *sb, int type, size = sb->s_op->quota_read(sb, type, (char *)dqhead, sizeof(struct v2_disk_dqheader), 0); if (size != sizeof(struct v2_disk_dqheader)) { - q_warn(KERN_WARNING "quota_v2: Failed header read:" - " expected=%zd got=%zd\n", - sizeof(struct v2_disk_dqheader), size); + quota_error(sb, "Failed header read: expected=%zd got=%zd", + sizeof(struct v2_disk_dqheader), size); return 0; } return 1; @@ -106,8 +105,7 @@ static int v2_read_file_info(struct super_block *sb, int type) size = sb->s_op->quota_read(sb, type, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { - q_warn(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", - sb->s_id); + quota_error(sb, "Can't read info structure"); return -1; } info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS); @@ -167,8 +165,7 @@ static int v2_write_file_info(struct super_block *sb, int type) size = sb->s_op->quota_write(sb, type, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { - q_warn(KERN_WARNING "Can't write info structure on device %s.\n", - sb->s_id); + quota_error(sb, "Can't write info structure"); return -1; } return 0; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0f22fdaf54a..29db72203bd 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1221,7 +1221,7 @@ static void init_inode(struct inode *inode, struct treepath *path) inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, SD_V2_SIZE)); - /* read persistent inode attributes from sd and initalise + /* read persistent inode attributes from sd and initialise generic inode flags from them */ REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); diff --git a/fs/splice.c b/fs/splice.c index 740e6b9faf7..efdbfece993 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, { struct file *file = sd->u.file; - return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); + return do_splice_from(pipe, file, &file->f_pos, sd->total_len, + sd->flags); } /** @@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, if (off_in) return -ESPIPE; if (off_out) { - if (!out->f_op || !out->f_op->llseek || - out->f_op->llseek == no_llseek) + if (!(out->f_mode & FMODE_PWRITE)) return -EINVAL; if (copy_from_user(&offset, off_out, sizeof(loff_t))) return -EFAULT; @@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, if (off_out) return -ESPIPE; if (off_in) { - if (!in->f_op || !in->f_op->llseek || - in->f_op->llseek == no_llseek) + if (!(in->f_mode & FMODE_PREAD)) return -EINVAL; if (copy_from_user(&offset, off_in, sizeof(loff_t))) return -EFAULT; diff --git a/fs/super.c b/fs/super.c index 5c35bc7a499..938119ab8dc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -374,6 +374,8 @@ void sync_supers(void) up_read(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } } @@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) up_read(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } spin_unlock(&sb_lock); @@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work) } up_write(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } spin_unlock(&sb_lock); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1beaa739d0a..1b27b5688f6 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -593,7 +593,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); * @mode: file permissions. * */ -int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) +int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, + mode_t mode) { struct sysfs_dirent *sd; struct iattr newattrs; diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index f71246bebfe..a7ac78f8e67 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -28,6 +28,7 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, struct sysfs_dirent *target_sd = NULL; struct sysfs_dirent *sd = NULL; struct sysfs_addrm_cxt acxt; + enum kobj_ns_type ns_type; int error; BUG_ON(!name); @@ -58,16 +59,29 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, if (!sd) goto out_put; - if (sysfs_ns_type(parent_sd)) + ns_type = sysfs_ns_type(parent_sd); + if (ns_type) sd->s_ns = target->ktype->namespace(target); sd->s_symlink.target_sd = target_sd; target_sd = NULL; /* reference is now owned by the symlink */ sysfs_addrm_start(&acxt, parent_sd); - if (warn) - error = sysfs_add_one(&acxt, sd); - else - error = __sysfs_add_one(&acxt, sd); + /* Symlinks must be between directories with the same ns_type */ + if (!ns_type || + (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) { + if (warn) + error = sysfs_add_one(&acxt, sd); + else + error = __sysfs_add_one(&acxt, sd); + } else { + error = -EINVAL; + WARN(1, KERN_WARNING + "sysfs: symlink across ns_types %s/%s -> %s/%s\n", + parent_sd->s_name, + sd->s_name, + sd->s_symlink.target_sd->s_parent->s_name, + sd->s_symlink.target_sd->s_name); + } sysfs_addrm_finish(&acxt); if (error) @@ -122,7 +136,7 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, { const void *ns = NULL; spin_lock(&sysfs_assoc_lock); - if (targ->sd) + if (targ->sd && sysfs_ns_type(kobj->sd)) ns = targ->sd->s_ns; spin_unlock(&sysfs_assoc_lock); sysfs_hash_and_remove(kobj->sd, ns, name); diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index bbd69bdb0fa..fcc498ec9b3 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -25,6 +25,7 @@ #include <linux/stat.h> #include <linux/string.h> #include <linux/buffer_head.h> +#include <linux/writeback.h> #include "sysv.h" /* We don't trust the value of @@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) struct inode *inode; sysv_ino_t ino; unsigned count; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE + }; inode = new_inode(sb); if (!inode) @@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) insert_inode_hash(inode); mark_inode_dirty(inode); - sysv_write_inode(inode, 0); /* ensure inode not allocated again */ + sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */ mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ /* That's it. */ unlock_super(sb); diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 076ca50e993..c8ff0d1ae5d 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -62,7 +62,9 @@ */ static void shrink_liability(struct ubifs_info *c, int nr_to_write) { + down_read(&c->vfs_sb->s_umount); writeback_inodes_sb(c->vfs_sb); + up_read(&c->vfs_sb->s_umount); } /** diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index ad7f67b827e..0084a33c4c6 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1457,13 +1457,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) shft -= UBIFS_LPT_FANOUT_SHIFT; nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = ubifs_get_pnode(c, nnode, iip); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); iip = (i & (UBIFS_LPT_FANOUT - 1)); dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, pnode->lprops[iip].free, pnode->lprops[iip].dirty, @@ -1586,7 +1586,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) nnode = c->nroot; nnode = dirty_cow_nnode(c, nnode); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); i = lnum - c->main_first; shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; for (h = 1; h < c->lpt_hght; h++) { @@ -1594,19 +1594,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) shft -= UBIFS_LPT_FANOUT_SHIFT; nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); nnode = dirty_cow_nnode(c, nnode); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = ubifs_get_pnode(c, nnode, iip); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); pnode = dirty_cow_pnode(c, pnode); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); iip = (i & (UBIFS_LPT_FANOUT - 1)); dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, pnode->lprops[iip].free, pnode->lprops[iip].dirty, diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 13cb7a4237b..d12535b7fc7 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -646,7 +646,7 @@ static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i) shft -= UBIFS_LPT_FANOUT_SHIFT; nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); return ubifs_get_pnode(c, nnode, iip); diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 109c6ea03bb..daae9e1f538 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -24,7 +24,7 @@ * This file implements functions needed to recover from unclean un-mounts. * When UBIFS is mounted, it checks a flag on the master node to determine if * an un-mount was completed successfully. If not, the process of mounting - * incorparates additional checking and fixing of on-flash data structures. + * incorporates additional checking and fixing of on-flash data structures. * UBIFS always cleans away all remnants of an unclean un-mount, so that * errors do not accumulate. However UBIFS defers recovery if it is mounted * read-only, and the flash is not modified in that case. @@ -1063,8 +1063,21 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) } err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); if (err) { - if (err == -ENOSPC) - dbg_err("could not find a dirty LEB"); + /* + * There are no dirty or empty LEBs subject to here being + * enough for the index. Try to use + * 'ubifs_find_free_leb_for_idx()', which will return any empty + * LEBs (ignoring index requirements). If the index then + * doesn't have enough LEBs the recovery commit will fail - + * which is the same result anyway i.e. recovery fails. So + * there is no problem ignoring index requirements and just + * grabbing a free LEB since we have already established there + * is not a dirty LEB we could have used instead. + */ + if (err == -ENOSPC) { + dbg_rcvry("could not find a dirty LEB"); + goto find_free; + } return err; } ubifs_assert(!(lp.flags & LPROPS_INDEX)); @@ -1139,8 +1152,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) find_free: /* * There is no GC head LEB or the free space in the GC head LEB is too - * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so - * GC is not run. + * small, or there are not dirty LEBs. Allocate gc_lnum by calling + * 'ubifs_find_free_leb_for_idx()' so GC is not run. */ lnum = ubifs_find_free_leb_for_idx(c); if (lnum < 0) { diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 02feb59cefc..0b201114a5a 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -277,7 +277,7 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(int nr, gfp_t gfp_mask) +int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) { int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 4d2f2157dd3..5fc5a098897 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1307,6 +1307,8 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_orphans; err = ubifs_rcvry_gc_commit(c); + if (err) + goto out_orphans; } else { err = take_gc_lnum(c); if (err) @@ -1318,7 +1320,7 @@ static int mount_ubifs(struct ubifs_info *c) */ err = ubifs_leb_unmap(c, c->gc_lnum); if (err) - return err; + goto out_orphans; } err = dbg_check_lprops(c); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 2eef553d50c..04310878f44 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); +int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); /* commit.c */ int ubifs_bg_thread(void *info); diff --git a/fs/udf/file.c b/fs/udf/file.c index 94e06d6bddb..6e450e01a1b 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -36,7 +36,6 @@ #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/aio.h> -#include <linux/smp_lock.h> #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/udf/super.c b/fs/udf/super.c index 612d1e2e285..12bb651e540 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1579,9 +1579,7 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, { struct anchorVolDescPtr *anchor; long main_s, main_e, reserve_s, reserve_e; - struct udf_sb_info *sbi; - sbi = UDF_SB(sb); anchor = (struct anchorVolDescPtr *)bh->b_data; /* Locate the main sequence */ diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index c8fb13f83b3..0dce969d6ca 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -87,11 +87,9 @@ xfs-y += xfs_alloc.o \ xfs_trans_buf.o \ xfs_trans_extfree.o \ xfs_trans_inode.o \ - xfs_trans_item.o \ xfs_utils.o \ xfs_vnodeops.o \ - xfs_rw.o \ - xfs_dmops.o + xfs_rw.o xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index 9f769b5b38f..b2771862fd3 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -225,7 +225,7 @@ xfs_check_acl(struct inode *inode, int mask) struct posix_acl *acl; int error = -EAGAIN; - xfs_itrace_entry(ip); + trace_xfs_check_acl(ip); /* * If there is no attribute fork no ACL exists on this inode and diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 5895aaf62ac..d24e78f32f3 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -21,19 +21,12 @@ #include "xfs_inum.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_trans.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_alloc.h" -#include "xfs_btree.h" #include "xfs_error.h" #include "xfs_rw.h" #include "xfs_iomap.h" @@ -92,18 +85,15 @@ void xfs_count_page_state( struct page *page, int *delalloc, - int *unmapped, int *unwritten) { struct buffer_head *bh, *head; - *delalloc = *unmapped = *unwritten = 0; + *delalloc = *unwritten = 0; bh = head = page_buffers(page); do { - if (buffer_uptodate(bh) && !buffer_mapped(bh)) - (*unmapped) = 1; - else if (buffer_unwritten(bh)) + if (buffer_unwritten(bh)) (*unwritten) = 1; else if (buffer_delay(bh)) (*delalloc) = 1; @@ -212,23 +202,17 @@ xfs_setfilesize( } /* - * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. If we are asked to wait, - * flush the workqueue. + * Schedule IO completion handling on the final put of an ioend. */ STATIC void xfs_finish_ioend( - xfs_ioend_t *ioend, - int wait) + struct xfs_ioend *ioend) { if (atomic_dec_and_test(&ioend->io_remaining)) { - struct workqueue_struct *wq; - - wq = (ioend->io_type == IO_UNWRITTEN) ? - xfsconvertd_workqueue : xfsdatad_workqueue; - queue_work(wq, &ioend->io_work); - if (wait) - flush_workqueue(wq); + if (ioend->io_type == IO_UNWRITTEN) + queue_work(xfsconvertd_workqueue, &ioend->io_work); + else + queue_work(xfsdatad_workqueue, &ioend->io_work); } } @@ -272,11 +256,25 @@ xfs_end_io( */ if (error == EAGAIN) { atomic_inc(&ioend->io_remaining); - xfs_finish_ioend(ioend, 0); + xfs_finish_ioend(ioend); /* ensure we don't spin on blocked ioends */ delay(1); - } else + } else { + if (ioend->io_iocb) + aio_complete(ioend->io_iocb, ioend->io_result, 0); xfs_destroy_ioend(ioend); + } +} + +/* + * Call IO completion handling in caller context on the final put of an ioend. + */ +STATIC void +xfs_finish_ioend_sync( + struct xfs_ioend *ioend) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) + xfs_end_io(&ioend->io_work); } /* @@ -309,6 +307,8 @@ xfs_alloc_ioend( atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); ioend->io_offset = 0; ioend->io_size = 0; + ioend->io_iocb = NULL; + ioend->io_result = 0; INIT_WORK(&ioend->io_work, xfs_end_io); return ioend; @@ -358,7 +358,7 @@ xfs_end_bio( bio->bi_end_io = NULL; bio_put(bio); - xfs_finish_ioend(ioend, 0); + xfs_finish_ioend(ioend); } STATIC void @@ -500,7 +500,7 @@ xfs_submit_ioend( } if (bio) xfs_submit_ioend_bio(wbc, ioend, bio); - xfs_finish_ioend(ioend, 0); + xfs_finish_ioend(ioend); } while ((ioend = next) != NULL); } @@ -614,31 +614,30 @@ xfs_map_at_offset( STATIC unsigned int xfs_probe_page( struct page *page, - unsigned int pg_offset, - int mapped) + unsigned int pg_offset) { + struct buffer_head *bh, *head; int ret = 0; if (PageWriteback(page)) return 0; + if (!PageDirty(page)) + return 0; + if (!page->mapping) + return 0; + if (!page_has_buffers(page)) + return 0; - if (page->mapping && PageDirty(page)) { - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; - - bh = head = page_buffers(page); - do { - if (!buffer_uptodate(bh)) - break; - if (mapped != buffer_mapped(bh)) - break; - ret += bh->b_size; - if (ret >= pg_offset) - break; - } while ((bh = bh->b_this_page) != head); - } else - ret = mapped ? 0 : PAGE_CACHE_SIZE; - } + bh = head = page_buffers(page); + do { + if (!buffer_uptodate(bh)) + break; + if (!buffer_mapped(bh)) + break; + ret += bh->b_size; + if (ret >= pg_offset) + break; + } while ((bh = bh->b_this_page) != head); return ret; } @@ -648,8 +647,7 @@ xfs_probe_cluster( struct inode *inode, struct page *startpage, struct buffer_head *bh, - struct buffer_head *head, - int mapped) + struct buffer_head *head) { struct pagevec pvec; pgoff_t tindex, tlast, tloff; @@ -658,7 +656,7 @@ xfs_probe_cluster( /* First sum forwards in this page */ do { - if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) + if (!buffer_uptodate(bh) || !buffer_mapped(bh)) return total; total += bh->b_size; } while ((bh = bh->b_this_page) != head); @@ -692,7 +690,7 @@ xfs_probe_cluster( pg_offset = PAGE_CACHE_SIZE; if (page->index == tindex && trylock_page(page)) { - pg_len = xfs_probe_page(page, pg_offset, mapped); + pg_len = xfs_probe_page(page, pg_offset); unlock_page(page); } @@ -761,7 +759,6 @@ xfs_convert_page( struct xfs_bmbt_irec *imap, xfs_ioend_t **ioendp, struct writeback_control *wbc, - int startio, int all_bh) { struct buffer_head *bh, *head; @@ -832,19 +829,14 @@ xfs_convert_page( ASSERT(imap->br_startblock != DELAYSTARTBLOCK); xfs_map_at_offset(inode, bh, imap, offset); - if (startio) { - xfs_add_to_ioend(inode, bh, offset, - type, ioendp, done); - } else { - set_buffer_dirty(bh); - unlock_buffer(bh); - mark_buffer_dirty(bh); - } + xfs_add_to_ioend(inode, bh, offset, type, + ioendp, done); + page_dirty--; count++; } else { type = IO_NEW; - if (buffer_mapped(bh) && all_bh && startio) { + if (buffer_mapped(bh) && all_bh) { lock_buffer(bh); xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); @@ -859,14 +851,12 @@ xfs_convert_page( if (uptodate && bh == head) SetPageUptodate(page); - if (startio) { - if (count) { - wbc->nr_to_write--; - if (wbc->nr_to_write <= 0) - done = 1; - } - xfs_start_page_writeback(page, !page_dirty, count); + if (count) { + wbc->nr_to_write--; + if (wbc->nr_to_write <= 0) + done = 1; } + xfs_start_page_writeback(page, !page_dirty, count); return done; fail_unlock_page: @@ -886,7 +876,6 @@ xfs_cluster_write( struct xfs_bmbt_irec *imap, xfs_ioend_t **ioendp, struct writeback_control *wbc, - int startio, int all_bh, pgoff_t tlast) { @@ -902,7 +891,7 @@ xfs_cluster_write( for (i = 0; i < pagevec_count(&pvec); i++) { done = xfs_convert_page(inode, pvec.pages[i], tindex++, - imap, ioendp, wbc, startio, all_bh); + imap, ioendp, wbc, all_bh); if (done) break; } @@ -981,7 +970,7 @@ xfs_aops_discard_page( */ error = xfs_bmapi(NULL, ip, offset_fsb, 1, XFS_BMAPI_ENTIRE, NULL, 0, &imap, - &nimaps, NULL, NULL); + &nimaps, NULL); if (error) { /* something screwed, just bail */ @@ -1009,7 +998,7 @@ xfs_aops_discard_page( */ xfs_bmap_init(&flist, &firstblock); error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, - &flist, NULL, &done); + &flist, &done); ASSERT(!flist.xbf_count && !flist.xbf_first); if (error) { @@ -1032,50 +1021,66 @@ out_invalidate: } /* - * Calling this without startio set means we are being asked to make a dirty - * page ready for freeing it's buffers. When called with startio set then - * we are coming from writepage. + * Write out a dirty page. * - * When called with startio set it is important that we write the WHOLE - * page if possible. - * The bh->b_state's cannot know if any of the blocks or which block for - * that matter are dirty due to mmap writes, and therefore bh uptodate is - * only valid if the page itself isn't completely uptodate. Some layers - * may clear the page dirty flag prior to calling write page, under the - * assumption the entire page will be written out; by not writing out the - * whole page the page can be reused before all valid dirty data is - * written out. Note: in the case of a page that has been dirty'd by - * mapwrite and but partially setup by block_prepare_write the - * bh->b_states's will not agree and only ones setup by BPW/BCW will have - * valid state, thus the whole page must be written out thing. + * For delalloc space on the page we need to allocate space and flush it. + * For unwritten space on the page we need to start the conversion to + * regular allocated space. + * For any other dirty buffer heads on the page we should flush them. + * + * If we detect that a transaction would be required to flush the page, we + * have to check the process flags first, if we are already in a transaction + * or disk I/O during allocations is off, we need to fail the writepage and + * redirty the page. */ - STATIC int -xfs_page_state_convert( - struct inode *inode, - struct page *page, - struct writeback_control *wbc, - int startio, - int unmapped) /* also implies page uptodate */ +xfs_vm_writepage( + struct page *page, + struct writeback_control *wbc) { + struct inode *inode = page->mapping->host; + int delalloc, unwritten; struct buffer_head *bh, *head; struct xfs_bmbt_irec imap; xfs_ioend_t *ioend = NULL, *iohead = NULL; loff_t offset; - unsigned long p_offset = 0; unsigned int type; __uint64_t end_offset; pgoff_t end_index, last_index; ssize_t size, len; int flags, err, imap_valid = 0, uptodate = 1; - int page_dirty, count = 0; - int trylock = 0; - int all_bh = unmapped; + int count = 0; + int all_bh = 0; - if (startio) { - if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) - trylock |= BMAPI_TRYLOCK; - } + trace_xfs_writepage(inode, page, 0); + + ASSERT(page_has_buffers(page)); + + /* + * Refuse to write the page out if we are called from reclaim context. + * + * This avoids stack overflows when called from deeply used stacks in + * random callers for direct reclaim or memcg reclaim. We explicitly + * allow reclaim from kswapd as the stack usage there is relatively low. + * + * This should really be done by the core VM, but until that happens + * filesystems like XFS, btrfs and ext4 have to take care of this + * by themselves. + */ + if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) + goto out_fail; + + /* + * We need a transaction if there are delalloc or unwritten buffers + * on the page. + * + * If we need a transaction and the process flags say we are already + * in a transaction, or no IO is allowed then mark the page dirty + * again and leave the page as is. + */ + xfs_count_page_state(page, &delalloc, &unwritten); + if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) + goto out_fail; /* Is this page beyond the end of the file? */ offset = i_size_read(inode); @@ -1084,50 +1089,33 @@ xfs_page_state_convert( if (page->index >= end_index) { if ((page->index >= end_index + 1) || !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { - if (startio) - unlock_page(page); + unlock_page(page); return 0; } } - /* - * page_dirty is initially a count of buffers on the page before - * EOF and is decremented as we move each into a cleanable state. - * - * Derivation: - * - * End offset is the highest offset that this page should represent. - * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) - * will evaluate non-zero and be less than PAGE_CACHE_SIZE and - * hence give us the correct page_dirty count. On any other page, - * it will be zero and in that case we need page_dirty to be the - * count of buffers on the page. - */ end_offset = min_t(unsigned long long, - (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, + offset); len = 1 << inode->i_blkbits; - p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), - PAGE_CACHE_SIZE); - p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; - page_dirty = p_offset / len; bh = head = page_buffers(page); offset = page_offset(page); flags = BMAPI_READ; type = IO_NEW; - /* TODO: cleanup count and page_dirty */ - do { if (offset >= end_offset) break; if (!buffer_uptodate(bh)) uptodate = 0; - if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { - /* - * the iomap is actually still valid, but the ioend - * isn't. shouldn't happen too often. - */ + + /* + * A hole may still be marked uptodate because discard_buffer + * leaves the flag set. + */ + if (!buffer_mapped(bh) && buffer_uptodate(bh)) { + ASSERT(!buffer_dirty(bh)); imap_valid = 0; continue; } @@ -1135,19 +1123,7 @@ xfs_page_state_convert( if (imap_valid) imap_valid = xfs_imap_valid(inode, &imap, offset); - /* - * First case, map an unwritten extent and prepare for - * extent state conversion transaction on completion. - * - * Second case, allocate space for a delalloc buffer. - * We can return EAGAIN here in the release page case. - * - * Third case, an unmapped buffer was found, and we are - * in a path where we need to write the whole page out. - */ - if (buffer_unwritten(bh) || buffer_delay(bh) || - ((buffer_uptodate(bh) || PageUptodate(page)) && - !buffer_mapped(bh) && (unmapped || startio))) { + if (buffer_unwritten(bh) || buffer_delay(bh)) { int new_ioend = 0; /* @@ -1161,15 +1137,16 @@ xfs_page_state_convert( flags = BMAPI_WRITE | BMAPI_IGNSTATE; } else if (buffer_delay(bh)) { type = IO_DELAY; - flags = BMAPI_ALLOCATE | trylock; - } else { - type = IO_NEW; - flags = BMAPI_WRITE | BMAPI_MMAP; + flags = BMAPI_ALLOCATE; + + if (wbc->sync_mode == WB_SYNC_NONE && + wbc->nonblocking) + flags |= BMAPI_TRYLOCK; } if (!imap_valid) { /* - * if we didn't have a valid mapping then we + * If we didn't have a valid mapping then we * need to ensure that we put the new mapping * in a new ioend structure. This needs to be * done to ensure that the ioends correctly @@ -1177,14 +1154,7 @@ xfs_page_state_convert( * for unwritten extent conversion. */ new_ioend = 1; - if (type == IO_NEW) { - size = xfs_probe_cluster(inode, - page, bh, head, 0); - } else { - size = len; - } - - err = xfs_map_blocks(inode, offset, size, + err = xfs_map_blocks(inode, offset, len, &imap, flags); if (err) goto error; @@ -1193,19 +1163,11 @@ xfs_page_state_convert( } if (imap_valid) { xfs_map_at_offset(inode, bh, &imap, offset); - if (startio) { - xfs_add_to_ioend(inode, bh, offset, - type, &ioend, - new_ioend); - } else { - set_buffer_dirty(bh); - unlock_buffer(bh); - mark_buffer_dirty(bh); - } - page_dirty--; + xfs_add_to_ioend(inode, bh, offset, type, + &ioend, new_ioend); count++; } - } else if (buffer_uptodate(bh) && startio) { + } else if (buffer_uptodate(bh)) { /* * we got here because the buffer is already mapped. * That means it must already have extents allocated @@ -1213,8 +1175,7 @@ xfs_page_state_convert( */ if (!imap_valid || flags != BMAPI_READ) { flags = BMAPI_READ; - size = xfs_probe_cluster(inode, page, bh, - head, 1); + size = xfs_probe_cluster(inode, page, bh, head); err = xfs_map_blocks(inode, offset, size, &imap, flags); if (err) @@ -1233,18 +1194,16 @@ xfs_page_state_convert( */ type = IO_NEW; if (trylock_buffer(bh)) { - ASSERT(buffer_mapped(bh)); if (imap_valid) all_bh = 1; xfs_add_to_ioend(inode, bh, offset, type, &ioend, !imap_valid); - page_dirty--; count++; } else { imap_valid = 0; } - } else if ((buffer_uptodate(bh) || PageUptodate(page)) && - (unmapped || startio)) { + } else if (PageUptodate(page)) { + ASSERT(buffer_mapped(bh)); imap_valid = 0; } @@ -1256,8 +1215,7 @@ xfs_page_state_convert( if (uptodate && bh == head) SetPageUptodate(page); - if (startio) - xfs_start_page_writeback(page, 1, count); + xfs_start_page_writeback(page, 1, count); if (ioend && imap_valid) { xfs_off_t end_index; @@ -1275,131 +1233,27 @@ xfs_page_state_convert( end_index = last_index; xfs_cluster_write(inode, page->index + 1, &imap, &ioend, - wbc, startio, all_bh, end_index); + wbc, all_bh, end_index); } if (iohead) xfs_submit_ioend(wbc, iohead); - return page_dirty; + return 0; error: if (iohead) xfs_cancel_ioend(iohead); - /* - * If it's delalloc and we have nowhere to put it, - * throw it away, unless the lower layers told - * us to try again. - */ - if (err != -EAGAIN) { - if (!unmapped) - xfs_aops_discard_page(page); - ClearPageUptodate(page); - } + xfs_aops_discard_page(page); + ClearPageUptodate(page); + unlock_page(page); return err; -} - -/* - * writepage: Called from one of two places: - * - * 1. we are flushing a delalloc buffer head. - * - * 2. we are writing out a dirty page. Typically the page dirty - * state is cleared before we get here. In this case is it - * conceivable we have no buffer heads. - * - * For delalloc space on the page we need to allocate space and - * flush it. For unmapped buffer heads on the page we should - * allocate space if the page is uptodate. For any other dirty - * buffer heads on the page we should flush them. - * - * If we detect that a transaction would be required to flush - * the page, we have to check the process flags first, if we - * are already in a transaction or disk I/O during allocations - * is off, we need to fail the writepage and redirty the page. - */ - -STATIC int -xfs_vm_writepage( - struct page *page, - struct writeback_control *wbc) -{ - int error; - int need_trans; - int delalloc, unmapped, unwritten; - struct inode *inode = page->mapping->host; - - trace_xfs_writepage(inode, page, 0); - - /* - * Refuse to write the page out if we are called from reclaim context. - * - * This is primarily to avoid stack overflows when called from deep - * used stacks in random callers for direct reclaim, but disabling - * reclaim for kswap is a nice side-effect as kswapd causes rather - * suboptimal I/O patters, too. - * - * This should really be done by the core VM, but until that happens - * filesystems like XFS, btrfs and ext4 have to take care of this - * by themselves. - */ - if (current->flags & PF_MEMALLOC) - goto out_fail; - - /* - * We need a transaction if: - * 1. There are delalloc buffers on the page - * 2. The page is uptodate and we have unmapped buffers - * 3. The page is uptodate and we have no buffers - * 4. There are unwritten buffers on the page - */ - - if (!page_has_buffers(page)) { - unmapped = 1; - need_trans = 1; - } else { - xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - if (!PageUptodate(page)) - unmapped = 0; - need_trans = delalloc + unmapped + unwritten; - } - - /* - * If we need a transaction and the process flags say - * we are already in a transaction, or no IO is allowed - * then mark the page dirty again and leave the page - * as is. - */ - if (current_test_flags(PF_FSTRANS) && need_trans) - goto out_fail; - - /* - * Delay hooking up buffer heads until we have - * made our go/no-go decision. - */ - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << inode->i_blkbits, 0); - - /* - * Convert delayed allocate, unwritten or unmapped space - * to real space and flush out to disk. - */ - error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); - if (error == -EAGAIN) - goto out_fail; - if (unlikely(error < 0)) - goto out_unlock; - - return 0; out_fail: redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; -out_unlock: - unlock_page(page); - return error; } STATIC int @@ -1413,65 +1267,27 @@ xfs_vm_writepages( /* * Called to move a page into cleanable state - and from there - * to be released. Possibly the page is already clean. We always + * to be released. The page should already be clean. We always * have buffer heads in this call. * - * Returns 0 if the page is ok to release, 1 otherwise. - * - * Possible scenarios are: - * - * 1. We are being called to release a page which has been written - * to via regular I/O. buffer heads will be dirty and possibly - * delalloc. If no delalloc buffer heads in this case then we - * can just return zero. - * - * 2. We are called to release a page which has been written via - * mmap, all we need to do is ensure there is no delalloc - * state in the buffer heads, if not we can let the caller - * free them and we should come back later via writepage. + * Returns 1 if the page is ok to release, 0 otherwise. */ STATIC int xfs_vm_releasepage( struct page *page, gfp_t gfp_mask) { - struct inode *inode = page->mapping->host; - int dirty, delalloc, unmapped, unwritten; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, - }; - - trace_xfs_releasepage(inode, page, 0); + int delalloc, unwritten; - if (!page_has_buffers(page)) - return 0; + trace_xfs_releasepage(page->mapping->host, page, 0); - xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - if (!delalloc && !unwritten) - goto free_buffers; + xfs_count_page_state(page, &delalloc, &unwritten); - if (!(gfp_mask & __GFP_FS)) + if (WARN_ON(delalloc)) return 0; - - /* If we are already inside a transaction or the thread cannot - * do I/O, we cannot release this page. - */ - if (current_test_flags(PF_FSTRANS)) + if (WARN_ON(unwritten)) return 0; - /* - * Convert delalloc space to real space, do not flush the - * data out to disk, that will be done by the caller. - * Never need to allocate space here - we will always - * come back to writepage in that case. - */ - dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); - if (dirty == 0 && !unwritten) - goto free_buffers; - return 0; - -free_buffers: return try_to_free_buffers(page); } @@ -1481,9 +1297,9 @@ __xfs_get_blocks( sector_t iblock, struct buffer_head *bh_result, int create, - int direct, - bmapi_flags_t flags) + int direct) { + int flags = create ? BMAPI_WRITE : BMAPI_READ; struct xfs_bmbt_irec imap; xfs_off_t offset; ssize_t size; @@ -1498,8 +1314,11 @@ __xfs_get_blocks( if (!create && direct && offset >= i_size_read(inode)) return 0; - error = xfs_iomap(XFS_I(inode), offset, size, - create ? flags : BMAPI_READ, &imap, &nimap, &new); + if (direct && create) + flags |= BMAPI_DIRECT; + + error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, + &new); if (error) return -error; if (nimap == 0) @@ -1579,8 +1398,7 @@ xfs_get_blocks( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, - bh_result, create, 0, BMAPI_WRITE); + return __xfs_get_blocks(inode, iblock, bh_result, create, 0); } STATIC int @@ -1590,56 +1408,30 @@ xfs_get_blocks_direct( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, - bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT); + return __xfs_get_blocks(inode, iblock, bh_result, create, 1); } +/* + * Complete a direct I/O write request. + * + * If the private argument is non-NULL __xfs_get_blocks signals us that we + * need to issue a transaction to convert the range from unwritten to written + * extents. In case this is regular synchronous I/O we just call xfs_end_io + * to do this and we are done. But in case this was a successfull AIO + * request this handler is called from interrupt context, from which we + * can't start transactions. In that case offload the I/O completion to + * the workqueues we also use for buffered I/O completion. + */ STATIC void -xfs_end_io_direct( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private, - int ret, - bool is_async) +xfs_end_io_direct_write( + struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private, + int ret, + bool is_async) { - xfs_ioend_t *ioend = iocb->private; - - /* - * Non-NULL private data means we need to issue a transaction to - * convert a range from unwritten to written extents. This needs - * to happen from process context but aio+dio I/O completion - * happens from irq context so we need to defer it to a workqueue. - * This is not necessary for synchronous direct I/O, but we do - * it anyway to keep the code uniform and simpler. - * - * Well, if only it were that simple. Because synchronous direct I/O - * requires extent conversion to occur *before* we return to userspace, - * we have to wait for extent conversion to complete. Look at the - * iocb that has been passed to us to determine if this is AIO or - * not. If it is synchronous, tell xfs_finish_ioend() to kick the - * workqueue and wait for it to complete. - * - * The core direct I/O code might be changed to always call the - * completion handler in the future, in which case all this can - * go away. - */ - ioend->io_offset = offset; - ioend->io_size = size; - if (ioend->io_type == IO_READ) { - xfs_finish_ioend(ioend, 0); - } else if (private && size > 0) { - xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); - } else { - /* - * A direct I/O write ioend starts it's life in unwritten - * state in case they map an unwritten extent. This write - * didn't map an unwritten extent so switch it's completion - * handler. - */ - ioend->io_type = IO_NEW; - xfs_finish_ioend(ioend, 0); - } + struct xfs_ioend *ioend = iocb->private; /* * blockdev_direct_IO can return an error even after the I/O @@ -1648,8 +1440,27 @@ xfs_end_io_direct( */ iocb->private = NULL; - if (is_async) - aio_complete(iocb, ret, 0); + ioend->io_offset = offset; + ioend->io_size = size; + if (private && size > 0) + ioend->io_type = IO_UNWRITTEN; + + if (is_async) { + /* + * If we are converting an unwritten extent we need to delay + * the AIO completion until after the unwrittent extent + * conversion has completed, otherwise do it ASAP. + */ + if (ioend->io_type == IO_UNWRITTEN) { + ioend->io_iocb = iocb; + ioend->io_result = ret; + } else { + aio_complete(iocb, ret, 0); + } + xfs_finish_ioend(ioend); + } else { + xfs_finish_ioend_sync(ioend); + } } STATIC ssize_t @@ -1660,23 +1471,26 @@ xfs_vm_direct_IO( loff_t offset, unsigned long nr_segs) { - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct block_device *bdev; - ssize_t ret; - - bdev = xfs_find_bdev_for_inode(inode); - - iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? - IO_UNWRITTEN : IO_READ); - - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct); + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct block_device *bdev = xfs_find_bdev_for_inode(inode); + ssize_t ret; + + if (rw & WRITE) { + iocb->private = xfs_alloc_ioend(inode, IO_NEW); + + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct_write); + if (ret != -EIOCBQUEUED && iocb->private) + xfs_destroy_ioend(iocb->private); + } else { + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + NULL); + } - if (unlikely(ret != -EIOCBQUEUED && iocb->private)) - xfs_destroy_ioend(iocb->private); return ret; } @@ -1691,8 +1505,8 @@ xfs_vm_write_begin( void **fsdata) { *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - xfs_get_blocks); + return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS, + pagep, fsdata, xfs_get_blocks); } STATIC sector_t @@ -1703,7 +1517,7 @@ xfs_vm_bmap( struct inode *inode = (struct inode *)mapping->host; struct xfs_inode *ip = XFS_I(inode); - xfs_itrace_entry(XFS_I(inode)); + trace_xfs_vm_bmap(XFS_I(inode)); xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); xfs_iunlock(ip, XFS_IOLOCK_SHARED); diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 9f566d92ae3..c5057fb6237 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -47,6 +47,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); extern void xfs_ioend_init(void); extern void xfs_ioend_wait(struct xfs_inode *); -extern void xfs_count_page_state(struct page *, int *, int *, int *); +extern void xfs_count_page_state(struct page *, int *, int *); #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 649ade8ef59..ea79072f521 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -39,13 +39,12 @@ #include "xfs_inum.h" #include "xfs_log.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_trace.h" static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); -STATIC int xfsbufd_wakeup(int, gfp_t); +STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); static struct shrinker xfs_buf_shake = { .shrink = xfsbufd_wakeup, @@ -340,7 +339,7 @@ _xfs_buf_lookup_pages( __func__, gfp_mask); XFS_STATS_INC(xb_page_retries); - xfsbufd_wakeup(0, gfp_mask); + xfsbufd_wakeup(NULL, 0, gfp_mask); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } @@ -579,9 +578,9 @@ _xfs_buf_read( XBF_READ_AHEAD | _XBF_RUN_QUEUES); status = xfs_buf_iorequest(bp); - if (!status && !(flags & XBF_ASYNC)) - status = xfs_buf_iowait(bp); - return status; + if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) + return status; + return xfs_buf_iowait(bp); } xfs_buf_t * @@ -897,36 +896,6 @@ xfs_buf_unlock( trace_xfs_buf_unlock(bp, _RET_IP_); } - -/* - * Pinning Buffer Storage in Memory - * Ensure that no attempt to force a buffer to disk will succeed. - */ -void -xfs_buf_pin( - xfs_buf_t *bp) -{ - trace_xfs_buf_pin(bp, _RET_IP_); - atomic_inc(&bp->b_pin_count); -} - -void -xfs_buf_unpin( - xfs_buf_t *bp) -{ - trace_xfs_buf_unpin(bp, _RET_IP_); - - if (atomic_dec_and_test(&bp->b_pin_count)) - wake_up_all(&bp->b_waiters); -} - -int -xfs_buf_ispin( - xfs_buf_t *bp) -{ - return atomic_read(&bp->b_pin_count); -} - STATIC void xfs_buf_wait_unpin( xfs_buf_t *bp) @@ -1018,13 +987,12 @@ xfs_bwrite( { int error; - bp->b_strat = xfs_bdstrat_cb; bp->b_mount = mp; bp->b_flags |= XBF_WRITE; bp->b_flags &= ~(XBF_ASYNC | XBF_READ); xfs_buf_delwri_dequeue(bp); - xfs_buf_iostrategy(bp); + xfs_bdstrat_cb(bp); error = xfs_buf_iowait(bp); if (error) @@ -1040,7 +1008,6 @@ xfs_bdwrite( { trace_xfs_buf_bdwrite(bp, _RET_IP_); - bp->b_strat = xfs_bdstrat_cb; bp->b_mount = mp; bp->b_flags &= ~XBF_READ; @@ -1075,7 +1042,6 @@ xfs_bioerror( XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); - XFS_BUF_CLR_BDSTRAT_FUNC(bp); xfs_biodone(bp); return EIO; @@ -1105,7 +1071,6 @@ xfs_bioerror_relse( XFS_BUF_DONE(bp); XFS_BUF_STALE(bp); XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_CLR_BDSTRAT_FUNC(bp); if (!(fl & XBF_ASYNC)) { /* * Mark b_error and B_ERROR _both_. @@ -1311,8 +1276,19 @@ submit_io: if (size) goto next_chunk; } else { - bio_put(bio); + /* + * if we get here, no pages were added to the bio. However, + * we can't just error out here - if the pages are locked then + * we have to unlock them otherwise we can hang on a later + * access to the page. + */ xfs_buf_ioerror(bp, EIO); + if (bp->b_flags & _XBF_PAGE_LOCKED) { + int i; + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); + } + bio_put(bio); } } @@ -1762,6 +1738,7 @@ xfs_buf_runall_queues( STATIC int xfsbufd_wakeup( + struct shrinker *shrink, int priority, gfp_t mask) { @@ -1803,7 +1780,7 @@ xfs_buf_delwri_split( trace_xfs_buf_delwri_split(bp, _RET_IP_); ASSERT(bp->b_flags & XBF_DELWRI); - if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { + if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { if (!force && time_before(jiffies, bp->b_queuetime + age)) { xfs_buf_unlock(bp); @@ -1888,7 +1865,7 @@ xfsbufd( struct xfs_buf *bp; bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); - xfs_buf_iostrategy(bp); + xfs_bdstrat_cb(bp); count++; } if (count) @@ -1935,7 +1912,7 @@ xfs_flush_buftarg( bp->b_flags &= ~XBF_ASYNC; list_add(&bp->b_list, &wait_list); } - xfs_buf_iostrategy(bp); + xfs_bdstrat_cb(bp); } if (wait) { diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 5fbecefa5df..d072e5ff923 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -44,57 +44,57 @@ typedef enum { XBRW_ZERO = 3, /* Zero target memory */ } xfs_buf_rw_t; -typedef enum { - XBF_READ = (1 << 0), /* buffer intended for reading from device */ - XBF_WRITE = (1 << 1), /* buffer intended for writing to device */ - XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */ - XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ - XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ - XBF_DELWRI = (1 << 6), /* buffer has dirty pages */ - XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ - XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ - XBF_ORDERED = (1 << 11), /* use ordered writes */ - XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ - XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */ - - /* flags used only as arguments to access routines */ - XBF_LOCK = (1 << 14), /* lock requested */ - XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ - XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ - - /* flags used only internally */ - _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ - _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ - _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ - _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ - - /* - * Special flag for supporting metadata blocks smaller than a FSB. - * - * In this case we can have multiple xfs_buf_t on a single page and - * need to lock out concurrent xfs_buf_t readers as they only - * serialise access to the buffer. - * - * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation - * between reads of the page. Hence we can have one thread read the - * page and modify it, but then race with another thread that thinks - * the page is not up-to-date and hence reads it again. - * - * The result is that the first modifcation to the page is lost. - * This sort of AGF/AGI reading race can happen when unlinking inodes - * that require truncation and results in the AGI unlinked list - * modifications being lost. - */ - _XBF_PAGE_LOCKED = (1 << 22), - - /* - * If we try a barrier write, but it fails we have to communicate - * this to the upper layers. Unfortunately b_error gets overwritten - * when the buffer is re-issued so we have to add another flag to - * keep this information. - */ - _XFS_BARRIER_FAILED = (1 << 23), -} xfs_buf_flags_t; +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ +#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ +#define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */ +#define XBF_ORDERED (1 << 11)/* use ordered writes */ +#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ +#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ + +/* flags used only as arguments to access routines */ +#define XBF_LOCK (1 << 14)/* lock requested */ +#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */ +#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ + +/* flags used only internally */ +#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */ +#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ +#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ +#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ + +/* + * Special flag for supporting metadata blocks smaller than a FSB. + * + * In this case we can have multiple xfs_buf_t on a single page and + * need to lock out concurrent xfs_buf_t readers as they only + * serialise access to the buffer. + * + * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation + * between reads of the page. Hence we can have one thread read the + * page and modify it, but then race with another thread that thinks + * the page is not up-to-date and hence reads it again. + * + * The result is that the first modifcation to the page is lost. + * This sort of AGF/AGI reading race can happen when unlinking inodes + * that require truncation and results in the AGI unlinked list + * modifications being lost. + */ +#define _XBF_PAGE_LOCKED (1 << 22) + +/* + * If we try a barrier write, but it fails we have to communicate + * this to the upper layers. Unfortunately b_error gets overwritten + * when the buffer is re-issued so we have to add another flag to + * keep this information. + */ +#define _XFS_BARRIER_FAILED (1 << 23) + +typedef unsigned int xfs_buf_flags_t; #define XFS_BUF_FLAGS \ { XBF_READ, "READ" }, \ @@ -187,7 +187,6 @@ typedef struct xfs_buf { atomic_t b_io_remaining; /* #outstanding I/O requests */ xfs_buf_iodone_t b_iodone; /* I/O completion function */ xfs_buf_relse_t b_relse; /* releasing function */ - xfs_buf_bdstrat_t b_strat; /* pre-write function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; @@ -245,11 +244,6 @@ extern int xfs_buf_iowait(xfs_buf_t *); extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); -static inline int xfs_buf_iostrategy(xfs_buf_t *bp) -{ - return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp); -} - static inline int xfs_buf_geterror(xfs_buf_t *bp) { return bp ? bp->b_error : ENOMEM; @@ -258,11 +252,6 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp) /* Buffer Utility Routines */ extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); -/* Pinning Buffer Storage in Memory */ -extern void xfs_buf_pin(xfs_buf_t *); -extern void xfs_buf_unpin(xfs_buf_t *); -extern int xfs_buf_ispin(xfs_buf_t *); - /* Delayed Write Buffer Routines */ extern void xfs_buf_delwri_dequeue(xfs_buf_t *); extern void xfs_buf_delwri_promote(xfs_buf_t *); @@ -326,8 +315,6 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone) #define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func)) #define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL) -#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func)) -#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL) #define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv) #define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) @@ -351,7 +338,7 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) #define XFS_BUF_SET_REF(bp, ref) do { } while (0) -#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp) +#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) #define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp) #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) @@ -370,8 +357,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) xfs_buf_rele(bp); } -#define xfs_bpin(bp) xfs_buf_pin(bp) -#define xfs_bunpin(bp) xfs_buf_unpin(bp) #define xfs_biodone(bp) xfs_buf_ioend(bp, 0) #define xfs_biomove(bp, off, len, data, rw) \ diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h deleted file mode 100644 index a8b0b1685ee..00000000000 --- a/fs/xfs/linux-2.6/xfs_dmapi_priv.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DMAPI_PRIV_H__ -#define __XFS_DMAPI_PRIV_H__ - -/* - * Based on IO_ISDIRECT, decide which i_ flag is set. - */ -#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - DM_FLAGS_IMUX : 0) -#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) - -#endif /*__XFS_DMAPI_PRIV_H__*/ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 846b75aeb2a..3764d74790e 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -23,13 +23,13 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_export.h" #include "xfs_vnodeops.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_inode_item.h" +#include "xfs_trace.h" /* * Note that we only accept fileids which are long enough rather than allow @@ -128,13 +128,11 @@ xfs_nfs_get_inode( return ERR_PTR(-ESTALE); /* - * The XFS_IGET_BULKSTAT means that an invalid inode number is just - * fine and not an indication of a corrupted filesystem. Because - * clients can send any kind of invalid file handle, e.g. after - * a restore on the server we have to deal with this case gracefully. + * The XFS_IGET_UNTRUSTED means that an invalid inode number is just + * fine and not an indication of a corrupted filesystem as clients can + * send invalid file handles and we have to handle it gracefully.. */ - error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT, - XFS_ILOCK_SHARED, &ip, 0); + error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip); if (error) { /* * EINVAL means the inode cluster doesn't exist anymore. @@ -149,11 +147,10 @@ xfs_nfs_get_inode( } if (ip->i_d.di_gen != generation) { - xfs_iput_new(ip, XFS_ILOCK_SHARED); + IRELE(ip); return ERR_PTR(-ENOENT); } - xfs_iunlock(ip, XFS_ILOCK_SHARED); return VFS_I(ip); } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 257a56b127c..ba8ad422a16 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -22,23 +22,15 @@ #include "xfs_inum.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_trans.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_vnodeops.h" #include "xfs_da_btree.h" #include "xfs_ioctl.h" @@ -108,7 +100,7 @@ xfs_file_fsync( int error = 0; int log_flushed = 0; - xfs_itrace_entry(ip); + trace_xfs_file_fsync(ip); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -XFS_ERROR(EIO); @@ -166,8 +158,7 @@ xfs_file_fsync( * transaction. So we play it safe and fire off the * transaction anyway. */ - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); @@ -275,20 +266,6 @@ xfs_file_aio_read( mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { - int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); - int iolock = XFS_IOLOCK_SHARED; - - ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size, - dmflags, &iolock); - if (ret) { - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - if (unlikely(ioflags & IO_ISDIRECT)) - mutex_unlock(&inode->i_mutex); - return ret; - } - } - if (unlikely(ioflags & IO_ISDIRECT)) { if (inode->i_mapping->nrpages) { ret = -xfs_flushinval_pages(ip, @@ -321,7 +298,6 @@ xfs_file_splice_read( unsigned int flags) { struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); - struct xfs_mount *mp = ip->i_mount; int ioflags = 0; ssize_t ret; @@ -335,18 +311,6 @@ xfs_file_splice_read( xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { - int iolock = XFS_IOLOCK_SHARED; - int error; - - error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, - FILP_DELAY_FLAG(infilp), &iolock); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return -error; - } - } - trace_xfs_file_splice_read(ip, count, *ppos, ioflags); ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); @@ -367,7 +331,6 @@ xfs_file_splice_write( { struct inode *inode = outfilp->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; xfs_fsize_t isize, new_size; int ioflags = 0; ssize_t ret; @@ -382,18 +345,6 @@ xfs_file_splice_write( xfs_ilock(ip, XFS_IOLOCK_EXCL); - if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { - int iolock = XFS_IOLOCK_EXCL; - int error; - - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, - FILP_DELAY_FLAG(outfilp), &iolock); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return -error; - } - } - new_size = *ppos + count; xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -463,7 +414,7 @@ xfs_zero_last_block( last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, - &nimaps, NULL, NULL); + &nimaps, NULL); if (error) { return error; } @@ -558,7 +509,7 @@ xfs_zero_eof( nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, - 0, NULL, 0, &imap, &nimaps, NULL, NULL); + 0, NULL, 0, &imap, &nimaps, NULL); if (error) { ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); return error; @@ -627,7 +578,6 @@ xfs_file_aio_write( int ioflags = 0; xfs_fsize_t isize, new_size; int iolock; - int eventsent = 0; size_t ocount = 0, count; int need_i_mutex; @@ -673,33 +623,6 @@ start: goto out_unlock_mutex; } - if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && - !(ioflags & IO_INVIS) && !eventsent)) { - int dmflags = FILP_DELAY_FLAG(file); - - if (need_i_mutex) - dmflags |= DM_FLAGS_IMUX; - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip, - pos, count, dmflags, &iolock); - if (error) { - goto out_unlock_internal; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - eventsent = 1; - - /* - * The iolock was dropped and reacquired in XFS_SEND_DATA - * so we have to recheck the size when appending. - * We will only "goto start;" once, since having sent the - * event prevents another call to XFS_SEND_DATA, which is - * what allows the size to change in the first place. - */ - if ((file->f_flags & O_APPEND) && pos != ip->i_size) - goto start; - } - if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? @@ -830,22 +753,6 @@ write_retry: xfs_iunlock(ip, XFS_ILOCK_EXCL); } - if (ret == -ENOSPC && - DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { - xfs_iunlock(ip, iolock); - if (need_i_mutex) - mutex_unlock(&inode->i_mutex); - error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip, - DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL, - 0, 0, 0); /* Delay flag intentionally unused */ - if (need_i_mutex) - mutex_lock(&inode->i_mutex); - xfs_ilock(ip, iolock); - if (error) - goto out_unlock_internal; - goto start; - } - error = -ret; if (ret <= 0) goto out_unlock_internal; @@ -1014,9 +921,6 @@ const struct file_operations xfs_file_operations = { .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, -#ifdef HAVE_FOP_OPEN_EXEC - .open_exec = xfs_file_open_exec, -#endif }; const struct file_operations xfs_dir_file_operations = { diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index b6918d76bc7..1f279b012f9 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -21,10 +21,6 @@ #include "xfs_inode.h" #include "xfs_trace.h" -int fs_noerr(void) { return 0; } -int fs_nosys(void) { return ENOSYS; } -void fs_noval(void) { return; } - /* * note: all filemap functions return negative error codes. These * need to be inverted before returning to the xfs core functions. diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h deleted file mode 100644 index 82bb19b2599..00000000000 --- a/fs/xfs/linux-2.6/xfs_fs_subr.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_FS_SUBR_H__ -#define __XFS_FS_SUBR_H__ - -extern int fs_noerr(void); -extern int fs_nosys(void); -extern void fs_noval(void); - -#endif /* __XFS_FS_SUBR_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 699b60cbab9..237f5ffb2ee 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -23,24 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ioctl.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_rtalloc.h" #include "xfs_itable.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" @@ -679,10 +670,9 @@ xfs_ioc_bulkstat( error = xfs_bulkstat_single(mp, &inlast, bulkreq.ubuffer, &done); else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, - (bulkstat_one_pf)xfs_bulkstat_one, NULL, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - BULKSTAT_FG_QUICK, &done); + error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, + sizeof(xfs_bstat_t), bulkreq.ubuffer, + &done); if (error) return -error; @@ -909,7 +899,7 @@ xfs_ioctl_setattr( struct xfs_dquot *olddquot = NULL; int code; - xfs_itrace_entry(ip); + trace_xfs_ioctl_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); @@ -1044,8 +1034,7 @@ xfs_ioctl_setattr( } } - xfs_trans_ijoin(tp, ip, lock_flags); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); /* * Change file ownership. Must be the owner or privileged. @@ -1117,16 +1106,7 @@ xfs_ioctl_setattr( xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); - if (code) - return code; - - if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) { - XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, - NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0, - (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0); - } - - return 0; + return code; error_return: xfs_qm_dqrele(udqp); @@ -1302,7 +1282,7 @@ xfs_file_ioctl( if (filp->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - xfs_itrace_entry(ip); + trace_xfs_file_ioctl(ip); switch (cmd) { case XFS_IOC_ALLOCSP: diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 9287135e9bf..6c83f7f62dc 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -28,12 +28,8 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dir2_sf.h" #include "xfs_vnode.h" #include "xfs_dinode.h" #include "xfs_inode.h" @@ -237,15 +233,12 @@ xfs_bulkstat_one_compat( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* buffer to place output in */ int ubsize, /* size of buffer */ - void *private_data, /* my private data */ - xfs_daddr_t bno, /* starting bno of inode cluster */ int *ubused, /* bytes used by me */ - void *dibuff, /* on-disk inode buffer */ int *stat) /* BULKSTAT_RV_... */ { return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt_compat, bno, - ubused, dibuff, stat); + xfs_bulkstat_one_fmt_compat, + ubused, stat); } /* copied from xfs_ioctl.c */ @@ -298,13 +291,11 @@ xfs_compat_ioc_bulkstat( int res; error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, - sizeof(compat_xfs_bstat_t), - NULL, 0, NULL, NULL, &res); + sizeof(compat_xfs_bstat_t), 0, &res); } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one_compat, NULL, - sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, - BULKSTAT_FG_QUICK, &done); + xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), + bulkreq.ubuffer, &done); } else error = XFS_ERROR(EINVAL); if (error) @@ -549,7 +540,7 @@ xfs_file_compat_ioctl( if (filp->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - xfs_itrace_entry(ip); + trace_xfs_file_compat_ioctl(ip); switch (cmd) { /* No size or alignment issues on any arch */ diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 44f0b2de153..536b81e63a3 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -24,21 +24,13 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_itable.h" @@ -496,7 +488,7 @@ xfs_vn_getattr( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - xfs_itrace_entry(ip); + trace_xfs_getattr(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index facfb323a70..998a9d7fb9c 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -87,7 +87,6 @@ #include <xfs_aops.h> #include <xfs_super.h> #include <xfs_globals.h> -#include <xfs_fs_subr.h> #include <xfs_buf.h> /* diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 067cafbfc63..29b9d642e93 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -16,7 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_dmapi.h" #include "xfs_sb.h" #include "xfs_inum.h" #include "xfs_log.h" @@ -69,15 +68,15 @@ xfs_fs_set_xstate( if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - if (uflags & XFS_QUOTA_UDQ_ACCT) + if (uflags & FS_QUOTA_UDQ_ACCT) flags |= XFS_UQUOTA_ACCT; - if (uflags & XFS_QUOTA_PDQ_ACCT) + if (uflags & FS_QUOTA_PDQ_ACCT) flags |= XFS_PQUOTA_ACCT; - if (uflags & XFS_QUOTA_GDQ_ACCT) + if (uflags & FS_QUOTA_GDQ_ACCT) flags |= XFS_GQUOTA_ACCT; - if (uflags & XFS_QUOTA_UDQ_ENFD) + if (uflags & FS_QUOTA_UDQ_ENFD) flags |= XFS_UQUOTA_ENFD; - if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) + if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) flags |= XFS_OQUOTA_ENFD; switch (op) { diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index f2d1718c916..758df94690e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -25,14 +25,11 @@ #include "xfs_ag.h" #include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" @@ -43,7 +40,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_fsops.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -94,7 +90,6 @@ mempool_t *xfs_ioend_pool; #define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and * unwritten extent conversion */ #define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ -#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ #define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ #define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ @@ -116,9 +111,6 @@ mempool_t *xfs_ioend_pool; #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ -#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ -#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ @@ -172,15 +164,13 @@ suffix_strtoul(char *s, char **endp, unsigned int base) STATIC int xfs_parseargs( struct xfs_mount *mp, - char *options, - char **mtpt) + char *options) { struct super_block *sb = mp->m_super; char *this_char, *value, *eov; int dsunit = 0; int dswidth = 0; int iosize = 0; - int dmapi_implies_ikeep = 1; __uint8_t iosizelog = 0; /* @@ -243,15 +233,10 @@ xfs_parseargs( if (!mp->m_logname) return ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { - if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", - this_char); - return EINVAL; - } - *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL); - if (!*mtpt) - return ENOMEM; + cmn_err(CE_WARN, + "XFS: %s option not allowed on this system", + this_char); + return EINVAL; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -288,8 +273,6 @@ xfs_parseargs( mp->m_flags &= ~XFS_MOUNT_GRPID; } else if (!strcmp(this_char, MNTOPT_WSYNC)) { mp->m_flags |= XFS_MOUNT_WSYNC; - } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { - mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { mp->m_flags |= XFS_MOUNT_NORECOVERY; } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { @@ -329,7 +312,6 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_IKEEP)) { mp->m_flags |= XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { - dmapi_implies_ikeep = 0; mp->m_flags &= ~XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; @@ -370,12 +352,6 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); mp->m_qflags &= ~XFS_OQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_DMAPI)) { - mp->m_flags |= XFS_MOUNT_DMAPI; - } else if (!strcmp(this_char, MNTOPT_XDSM)) { - mp->m_flags |= XFS_MOUNT_DMAPI; - } else if (!strcmp(this_char, MNTOPT_DMI)) { - mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { mp->m_flags |= XFS_MOUNT_DELAYLOG; cmn_err(CE_WARN, @@ -387,9 +363,11 @@ xfs_parseargs( cmn_err(CE_WARN, "XFS: ihashsize no longer used, option is deprecated."); } else if (!strcmp(this_char, "osyncisdsync")) { - /* no-op, this is now the default */ cmn_err(CE_WARN, - "XFS: osyncisdsync is now the default, option is deprecated."); + "XFS: osyncisdsync has no effect, option is deprecated."); + } else if (!strcmp(this_char, "osyncisosync")) { + cmn_err(CE_WARN, + "XFS: osyncisosync has no effect, option is deprecated."); } else if (!strcmp(this_char, "irixsgid")) { cmn_err(CE_WARN, "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); @@ -430,12 +408,6 @@ xfs_parseargs( return EINVAL; } - if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) { - printk("XFS: %s option needs the mount point option as well\n", - MNTOPT_DMAPI); - return EINVAL; - } - if ((dsunit && !dswidth) || (!dsunit && dswidth)) { cmn_err(CE_WARN, "XFS: sunit and swidth must be specified together"); @@ -449,18 +421,6 @@ xfs_parseargs( return EINVAL; } - /* - * Applications using DMI filesystems often expect the - * inode generation number to be monotonically increasing. - * If we delete inode chunks we break this assumption, so - * keep unused inode chunks on disk for DMI filesystems - * until we come up with a better solution. - * Note that if "ikeep" or "noikeep" mount options are - * supplied, then they are honored. - */ - if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep) - mp->m_flags |= XFS_MOUNT_IKEEP; - done: if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { /* @@ -539,10 +499,8 @@ xfs_showargs( { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, - { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, - { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, { 0, NULL } @@ -947,7 +905,7 @@ xfs_fs_destroy_inode( { struct xfs_inode *ip = XFS_I(inode); - xfs_itrace_entry(ip); + trace_xfs_destroy_inode(ip); XFS_STATS_INC(vn_reclaim); @@ -1063,10 +1021,8 @@ xfs_log_inode( * an inode in another recent transaction. So we play it safe and * fire off the transaction anyway. */ - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); xfs_ilock_demote(ip, XFS_ILOCK_EXCL); @@ -1082,27 +1038,18 @@ xfs_fs_write_inode( struct xfs_mount *mp = ip->i_mount; int error = EAGAIN; - xfs_itrace_entry(ip); + trace_xfs_write_inode(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); if (wbc->sync_mode == WB_SYNC_ALL) { /* - * Make sure the inode has hit stable storage. By using the - * log and the fsync transactions we reduce the IOs we have - * to do here from two (log and inode) to just the log. - * - * Note: We still need to do a delwri write of the inode after - * this to flush it to the backing buffer so that bulkstat - * works properly if this is the first time the inode has been - * written. Because we hold the ilock atomically over the - * transaction commit and the inode flush we are guaranteed - * that the inode is not pinned when it returns. If the flush - * lock is already held, then the inode has already been - * flushed once and we don't need to flush it again. Hence - * the code will only flush the inode if it isn't already - * being flushed. + * Make sure the inode has made it it into the log. Instead + * of forcing it all the way to stable storage using a + * synchronous transaction we let the log force inside the + * ->sync_fs call do that for thus, which reduces the number + * of synchronous log foces dramatically. */ xfs_ioend_wait(ip); xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -1116,27 +1063,29 @@ xfs_fs_write_inode( * We make this non-blocking if the inode is contended, return * EAGAIN to indicate to the caller that they did not succeed. * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by xfs_sync. + * another operation right now, they get caught later by + * xfs_sync. */ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) goto out; - } - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; - /* - * Now we have the flush lock and the inode is not pinned, we can check - * if the inode is really clean as we know that there are no pending - * transaction completions, it is not waiting on the delayed write - * queue and there is no IO in progress. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - error = 0; - goto out_unlock; + /* + * Now we have the flush lock and the inode is not pinned, we + * can check if the inode is really clean as we know that + * there are no pending transaction completions, it is not + * waiting on the delayed write queue and there is no IO in + * progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; + } + error = xfs_iflush(ip, 0); } - error = xfs_iflush(ip, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -1156,7 +1105,8 @@ xfs_fs_clear_inode( { xfs_inode_t *ip = XFS_I(inode); - xfs_itrace_entry(ip); + trace_xfs_clear_inode(ip); + XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); XFS_STATS_DEC(vn_active); @@ -1193,22 +1143,13 @@ xfs_fs_put_super( { struct xfs_mount *mp = XFS_M(sb); + /* + * Unregister the memory shrinker before we tear down the mount + * structure so we don't have memory reclaim racing with us here. + */ + xfs_inode_shrinker_unregister(mp); xfs_syncd_stop(mp); - if (!(sb->s_flags & MS_RDONLY)) { - /* - * XXX(hch): this should be SYNC_WAIT. - * - * Or more likely not needed at all because the VFS is already - * calling ->sync_fs after shutting down all filestem - * operations and just before calling ->put_super. - */ - xfs_sync_data(mp, 0); - xfs_sync_attr(mp, 0); - } - - XFS_SEND_PREUNMOUNT(mp); - /* * Blow away any referenced inode in the filestreams cache. * This can and will cause log traffic as inodes go inactive @@ -1218,14 +1159,10 @@ xfs_fs_put_super( XFS_bflush(mp->m_ddev_targp); - XFS_SEND_UNMOUNT(mp); - xfs_unmountfs(mp); xfs_freesb(mp); - xfs_inode_shrinker_unregister(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); - xfs_dmops_put(mp); xfs_free_fsname(mp); kfree(mp); } @@ -1543,7 +1480,6 @@ xfs_fs_fill_super( struct inode *root; struct xfs_mount *mp = NULL; int flags = 0, error = ENOMEM; - char *mtpt = NULL; mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) @@ -1559,7 +1495,7 @@ xfs_fs_fill_super( mp->m_super = sb; sb->s_fs_info = mp; - error = xfs_parseargs(mp, (char *)data, &mtpt); + error = xfs_parseargs(mp, (char *)data); if (error) goto out_free_fsname; @@ -1571,16 +1507,12 @@ xfs_fs_fill_super( #endif sb->s_op = &xfs_super_operations; - error = xfs_dmops_get(mp); - if (error) - goto out_free_fsname; - if (silent) flags |= XFS_MFSI_QUIET; error = xfs_open_devices(mp); if (error) - goto out_put_dmops; + goto out_free_fsname; if (xfs_icsb_init_counters(mp)) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; @@ -1608,8 +1540,6 @@ xfs_fs_fill_super( if (error) goto out_filestream_unmount; - XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); - sb->s_magic = XFS_SB_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; @@ -1638,7 +1568,6 @@ xfs_fs_fill_super( xfs_inode_shrinker_register(mp); - kfree(mtpt); return 0; out_filestream_unmount: @@ -1648,11 +1577,8 @@ xfs_fs_fill_super( out_destroy_counters: xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); - out_put_dmops: - xfs_dmops_put(mp); out_free_fsname: xfs_free_fsname(mp); - kfree(mtpt); kfree(mp); out: return -error; @@ -1759,6 +1685,12 @@ xfs_init_zones(void) if (!xfs_trans_zone) goto out_destroy_ifork_zone; + xfs_log_item_desc_zone = + kmem_zone_init(sizeof(struct xfs_log_item_desc), + "xfs_log_item_desc"); + if (!xfs_log_item_desc_zone) + goto out_destroy_trans_zone; + /* * The size of the zone allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, @@ -1768,7 +1700,7 @@ xfs_init_zones(void) (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) * sizeof(int))), "xfs_buf_item"); if (!xfs_buf_item_zone) - goto out_destroy_trans_zone; + goto out_destroy_log_item_desc_zone; xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + ((XFS_EFD_MAX_FAST_EXTENTS - 1) * @@ -1805,6 +1737,8 @@ xfs_init_zones(void) kmem_zone_destroy(xfs_efd_zone); out_destroy_buf_item_zone: kmem_zone_destroy(xfs_buf_item_zone); + out_destroy_log_item_desc_zone: + kmem_zone_destroy(xfs_log_item_desc_zone); out_destroy_trans_zone: kmem_zone_destroy(xfs_trans_zone); out_destroy_ifork_zone: @@ -1835,6 +1769,7 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_efi_zone); kmem_zone_destroy(xfs_efd_zone); kmem_zone_destroy(xfs_buf_item_zone); + kmem_zone_destroy(xfs_log_item_desc_zone); kmem_zone_destroy(xfs_trans_zone); kmem_zone_destroy(xfs_ifork_zone); kmem_zone_destroy(xfs_dabuf_zone); @@ -1883,7 +1818,6 @@ init_xfs_fs(void) goto out_cleanup_procfs; vfs_initquota(); - xfs_inode_shrinker_init(); error = register_filesystem(&xfs_fs_type); if (error) @@ -1911,7 +1845,6 @@ exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); - xfs_inode_shrinker_destroy(); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 519618e9279..1ef4a4d2d99 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -56,12 +56,6 @@ extern void xfs_qm_exit(void); # define XFS_BIGFS_STRING #endif -#ifdef CONFIG_XFS_DMAPI -# define XFS_DMAPI_STRING "dmapi support, " -#else -# define XFS_DMAPI_STRING -#endif - #ifdef DEBUG # define XFS_DBG_STRING "debug" #else @@ -72,7 +66,6 @@ extern void xfs_qm_exit(void); XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ XFS_BIGFS_STRING \ - XFS_DMAPI_STRING \ XFS_DBG_STRING /* DBG must be last */ struct xfs_inode; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ef7f0218bcc..dfcbd98d159 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -24,25 +24,14 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_inode.h" #include "xfs_dinode.h" #include "xfs_error.h" -#include "xfs_mru_cache.h" #include "xfs_filestream.h" #include "xfs_vnodeops.h" -#include "xfs_utils.h" -#include "xfs_buf_item.h" #include "xfs_inode_item.h" -#include "xfs_rw.h" #include "xfs_quota.h" #include "xfs_trace.h" @@ -144,6 +133,41 @@ restart: return last_error; } +/* + * Select the next per-ag structure to iterate during the walk. The reclaim + * walk is optimised only to walk AGs with reclaimable inodes in them. + */ +static struct xfs_perag * +xfs_inode_ag_iter_next_pag( + struct xfs_mount *mp, + xfs_agnumber_t *first, + int tag) +{ + struct xfs_perag *pag = NULL; + + if (tag == XFS_ICI_RECLAIM_TAG) { + int found; + int ref; + + spin_lock(&mp->m_perag_lock); + found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, + (void **)&pag, *first, 1, tag); + if (found <= 0) { + spin_unlock(&mp->m_perag_lock); + return NULL; + } + *first = pag->pag_agno + 1; + /* open coded pag reference increment */ + ref = atomic_inc_return(&pag->pag_ref); + spin_unlock(&mp->m_perag_lock); + trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); + } else { + pag = xfs_perag_get(mp, *first); + (*first)++; + } + return pag; +} + int xfs_inode_ag_iterator( struct xfs_mount *mp, @@ -154,16 +178,15 @@ xfs_inode_ag_iterator( int exclusive, int *nr_to_scan) { + struct xfs_perag *pag; int error = 0; int last_error = 0; xfs_agnumber_t ag; int nr; nr = nr_to_scan ? *nr_to_scan : INT_MAX; - for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, ag); + ag = 0; + while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, exclusive, &nr); xfs_perag_put(pag); @@ -285,7 +308,7 @@ xfs_sync_inode_attr( /* * Write out pagecache data for the whole filesystem. */ -int +STATIC int xfs_sync_data( struct xfs_mount *mp, int flags) @@ -306,7 +329,7 @@ xfs_sync_data( /* * Write out inode metadata (attributes) for the whole filesystem. */ -int +STATIC int xfs_sync_attr( struct xfs_mount *mp, int flags) @@ -339,8 +362,7 @@ xfs_commit_dummy_trans( xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); + xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -640,6 +662,17 @@ __xfs_inode_set_reclaim_tag( radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); + + if (!pag->pag_ici_reclaimable) { + /* propagate the reclaim tag up into the perag radix tree */ + spin_lock(&ip->i_mount->m_perag_lock); + radix_tree_tag_set(&ip->i_mount->m_perag_tree, + XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_mount->m_perag_lock); + trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, + -1, _RET_IP_); + } pag->pag_ici_reclaimable++; } @@ -674,6 +707,16 @@ __xfs_inode_clear_reclaim_tag( radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); pag->pag_ici_reclaimable--; + if (!pag->pag_ici_reclaimable) { + /* clear the reclaim tag from the perag radix tree */ + spin_lock(&ip->i_mount->m_perag_lock); + radix_tree_tag_clear(&ip->i_mount->m_perag_tree, + XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_mount->m_perag_lock); + trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, + -1, _RET_IP_); + } } /* @@ -812,7 +855,36 @@ out: reclaim: xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); + + XFS_STATS_INC(xs_ig_reclaims); + /* + * Remove the inode from the per-AG radix tree. + * + * Because radix_tree_delete won't complain even if the item was never + * added to the tree assert that it's been there before to catch + * problems with the inode life time early on. + */ + write_lock(&pag->pag_ici_lock); + if (!radix_tree_delete(&pag->pag_ici_root, + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) + ASSERT(0); + write_unlock(&pag->pag_ici_lock); + + /* + * Here we do an (almost) spurious inode lock in order to coordinate + * with inode cache radix tree lookups. This is because the lookup + * can reference the inodes in the cache without taking references. + * + * We make that OK here by ensuring that we wait until the inode is + * unlocked after the lookup before we go ahead and free it. We get + * both the ilock and the iolock because the code may need to drop the + * ilock one but will still hold the iolock. + */ + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_qm_dqdetach(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + + xfs_inode_free(ip); return error; } @@ -828,83 +900,52 @@ xfs_reclaim_inodes( /* * Shrinker infrastructure. - * - * This is all far more complex than it needs to be. It adds a global list of - * mounts because the shrinkers can only call a global context. We need to make - * the shrinkers pass a context to avoid the need for global state. */ -static LIST_HEAD(xfs_mount_list); -static struct rw_semaphore xfs_mount_list_lock; - static int xfs_reclaim_inode_shrink( + struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { struct xfs_mount *mp; struct xfs_perag *pag; xfs_agnumber_t ag; - int reclaimable = 0; + int reclaimable; + mp = container_of(shrink, struct xfs_mount, m_inode_shrink); if (nr_to_scan) { if (!(gfp_mask & __GFP_FS)) return -1; - down_read(&xfs_mount_list_lock); - list_for_each_entry(mp, &xfs_mount_list, m_mplist) { - xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, + xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); - if (nr_to_scan <= 0) - break; - } - up_read(&xfs_mount_list_lock); - } + /* if we don't exhaust the scan, don't bother coming back */ + if (nr_to_scan > 0) + return -1; + } - down_read(&xfs_mount_list_lock); - list_for_each_entry(mp, &xfs_mount_list, m_mplist) { - for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - pag = xfs_perag_get(mp, ag); - reclaimable += pag->pag_ici_reclaimable; - xfs_perag_put(pag); - } + reclaimable = 0; + ag = 0; + while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, + XFS_ICI_RECLAIM_TAG))) { + reclaimable += pag->pag_ici_reclaimable; + xfs_perag_put(pag); } - up_read(&xfs_mount_list_lock); return reclaimable; } -static struct shrinker xfs_inode_shrinker = { - .shrink = xfs_reclaim_inode_shrink, - .seeks = DEFAULT_SEEKS, -}; - -void __init -xfs_inode_shrinker_init(void) -{ - init_rwsem(&xfs_mount_list_lock); - register_shrinker(&xfs_inode_shrinker); -} - -void -xfs_inode_shrinker_destroy(void) -{ - ASSERT(list_empty(&xfs_mount_list)); - unregister_shrinker(&xfs_inode_shrinker); -} - void xfs_inode_shrinker_register( struct xfs_mount *mp) { - down_write(&xfs_mount_list_lock); - list_add_tail(&mp->m_mplist, &xfs_mount_list); - up_write(&xfs_mount_list_lock); + mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink; + mp->m_inode_shrink.seeks = DEFAULT_SEEKS; + register_shrinker(&mp->m_inode_shrink); } void xfs_inode_shrinker_unregister( struct xfs_mount *mp) { - down_write(&xfs_mount_list_lock); - list_del(&mp->m_mplist); - up_write(&xfs_mount_list_lock); + unregister_shrinker(&mp->m_inode_shrink); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index cdcbaaca988..fe78726196f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -35,9 +35,6 @@ typedef struct xfs_sync_work { int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); -int xfs_sync_attr(struct xfs_mount *mp, int flags); -int xfs_sync_data(struct xfs_mount *mp, int flags); - int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); @@ -55,8 +52,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, int tag, int write_lock, int *nr_to_scan); -void xfs_inode_shrinker_init(void); -void xfs_inode_shrinker_destroy(void); void xfs_inode_shrinker_register(struct xfs_mount *mp); void xfs_inode_shrinker_unregister(struct xfs_mount *mp); diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index d12be8470cb..88d25d4aa56 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -24,17 +24,13 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_da_btree.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_ialloc.h" #include "xfs_itable.h" diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 73d5aa11738..c657cdca2cd 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name, \ unsigned long caller_ip), \ TP_ARGS(mp, agno, refcount, caller_ip)) DEFINE_PERAG_REF_EVENT(xfs_perag_get); +DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_put); +DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); +DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); TRACE_EVENT(xfs_attr_list_node_descend, TP_PROTO(struct xfs_attr_list_context *ctx, @@ -314,8 +317,6 @@ DEFINE_BUF_EVENT(xfs_buf_init); DEFINE_BUF_EVENT(xfs_buf_free); DEFINE_BUF_EVENT(xfs_buf_hold); DEFINE_BUF_EVENT(xfs_buf_rele); -DEFINE_BUF_EVENT(xfs_buf_pin); -DEFINE_BUF_EVENT(xfs_buf_unpin); DEFINE_BUF_EVENT(xfs_buf_iodone); DEFINE_BUF_EVENT(xfs_buf_iorequest); DEFINE_BUF_EVENT(xfs_buf_bawrite); @@ -538,7 +539,7 @@ DEFINE_LOCK_EVENT(xfs_ilock_nowait); DEFINE_LOCK_EVENT(xfs_ilock_demote); DEFINE_LOCK_EVENT(xfs_iunlock); -DECLARE_EVENT_CLASS(xfs_iget_class, +DECLARE_EVENT_CLASS(xfs_inode_class, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), TP_STRUCT__entry( @@ -554,16 +555,38 @@ DECLARE_EVENT_CLASS(xfs_iget_class, __entry->ino) ) -#define DEFINE_IGET_EVENT(name) \ -DEFINE_EVENT(xfs_iget_class, name, \ +#define DEFINE_INODE_EVENT(name) \ +DEFINE_EVENT(xfs_inode_class, name, \ TP_PROTO(struct xfs_inode *ip), \ TP_ARGS(ip)) -DEFINE_IGET_EVENT(xfs_iget_skip); -DEFINE_IGET_EVENT(xfs_iget_reclaim); -DEFINE_IGET_EVENT(xfs_iget_found); -DEFINE_IGET_EVENT(xfs_iget_alloc); - -DECLARE_EVENT_CLASS(xfs_inode_class, +DEFINE_INODE_EVENT(xfs_iget_skip); +DEFINE_INODE_EVENT(xfs_iget_reclaim); +DEFINE_INODE_EVENT(xfs_iget_reclaim_fail); +DEFINE_INODE_EVENT(xfs_iget_hit); +DEFINE_INODE_EVENT(xfs_iget_miss); + +DEFINE_INODE_EVENT(xfs_getattr); +DEFINE_INODE_EVENT(xfs_setattr); +DEFINE_INODE_EVENT(xfs_readlink); +DEFINE_INODE_EVENT(xfs_alloc_file_space); +DEFINE_INODE_EVENT(xfs_free_file_space); +DEFINE_INODE_EVENT(xfs_readdir); +#ifdef CONFIG_XFS_POSIX_ACL +DEFINE_INODE_EVENT(xfs_check_acl); +#endif +DEFINE_INODE_EVENT(xfs_vm_bmap); +DEFINE_INODE_EVENT(xfs_file_ioctl); +DEFINE_INODE_EVENT(xfs_file_compat_ioctl); +DEFINE_INODE_EVENT(xfs_ioctl_setattr); +DEFINE_INODE_EVENT(xfs_file_fsync); +DEFINE_INODE_EVENT(xfs_destroy_inode); +DEFINE_INODE_EVENT(xfs_write_inode); +DEFINE_INODE_EVENT(xfs_clear_inode); + +DEFINE_INODE_EVENT(xfs_dquot_dqalloc); +DEFINE_INODE_EVENT(xfs_dquot_dqdetach); + +DECLARE_EVENT_CLASS(xfs_iref_class, TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), TP_ARGS(ip, caller_ip), TP_STRUCT__entry( @@ -588,20 +611,71 @@ DECLARE_EVENT_CLASS(xfs_inode_class, (char *)__entry->caller_ip) ) -#define DEFINE_INODE_EVENT(name) \ -DEFINE_EVENT(xfs_inode_class, name, \ +#define DEFINE_IREF_EVENT(name) \ +DEFINE_EVENT(xfs_iref_class, name, \ TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ TP_ARGS(ip, caller_ip)) -DEFINE_INODE_EVENT(xfs_ihold); -DEFINE_INODE_EVENT(xfs_irele); -DEFINE_INODE_EVENT(xfs_inode_pin); -DEFINE_INODE_EVENT(xfs_inode_unpin); -DEFINE_INODE_EVENT(xfs_inode_unpin_nowait); +DEFINE_IREF_EVENT(xfs_ihold); +DEFINE_IREF_EVENT(xfs_irele); +DEFINE_IREF_EVENT(xfs_inode_pin); +DEFINE_IREF_EVENT(xfs_inode_unpin); +DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); + +DECLARE_EVENT_CLASS(xfs_namespace_class, + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), + TP_ARGS(dp, name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(dp)->i_sb->s_dev; + __entry->dp_ino = dp->i_ino; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d dp ino 0x%llx name %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __get_str(name)) +) -/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ -DEFINE_INODE_EVENT(xfs_inode); -#define xfs_itrace_entry(ip) \ - trace_xfs_inode(ip, _THIS_IP_) +#define DEFINE_NAMESPACE_EVENT(name) \ +DEFINE_EVENT(xfs_namespace_class, name, \ + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \ + TP_ARGS(dp, name)) +DEFINE_NAMESPACE_EVENT(xfs_remove); +DEFINE_NAMESPACE_EVENT(xfs_link); +DEFINE_NAMESPACE_EVENT(xfs_lookup); +DEFINE_NAMESPACE_EVENT(xfs_create); +DEFINE_NAMESPACE_EVENT(xfs_symlink); + +TRACE_EVENT(xfs_rename, + TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp, + struct xfs_name *src_name, struct xfs_name *target_name), + TP_ARGS(src_dp, target_dp, src_name, target_name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, src_dp_ino) + __field(xfs_ino_t, target_dp_ino) + __dynamic_array(char, src_name, src_name->len) + __dynamic_array(char, target_name, target_name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(src_dp)->i_sb->s_dev; + __entry->src_dp_ino = src_dp->i_ino; + __entry->target_dp_ino = target_dp->i_ino; + memcpy(__get_str(src_name), src_name->name, src_name->len); + memcpy(__get_str(target_name), target_name->name, target_name->len); + ), + TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" + " src name %s target name %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->src_dp_ino, + __entry->target_dp_ino, + __get_str(src_name), + __get_str(target_name)) +) DECLARE_EVENT_CLASS(xfs_dquot_class, TP_PROTO(struct xfs_dquot *dqp), @@ -681,9 +755,6 @@ DEFINE_DQUOT_EVENT(xfs_dqrele); DEFINE_DQUOT_EVENT(xfs_dqflush); DEFINE_DQUOT_EVENT(xfs_dqflush_force); DEFINE_DQUOT_EVENT(xfs_dqflush_done); -/* not really iget events, but we re-use the format */ -DEFINE_IGET_EVENT(xfs_dquot_dqalloc); -DEFINE_IGET_EVENT(xfs_dquot_dqdetach); DECLARE_EVENT_CLASS(xfs_loggrant_class, TP_PROTO(struct log *log, struct xlog_ticket *tic), @@ -831,33 +902,29 @@ DECLARE_EVENT_CLASS(xfs_page_class, __field(loff_t, size) __field(unsigned long, offset) __field(int, delalloc) - __field(int, unmapped) __field(int, unwritten) ), TP_fast_assign( - int delalloc = -1, unmapped = -1, unwritten = -1; + int delalloc = -1, unwritten = -1; if (page_has_buffers(page)) - xfs_count_page_state(page, &delalloc, - &unmapped, &unwritten); + xfs_count_page_state(page, &delalloc, &unwritten); __entry->dev = inode->i_sb->s_dev; __entry->ino = XFS_I(inode)->i_ino; __entry->pgoff = page_offset(page); __entry->size = i_size_read(inode); __entry->offset = off; __entry->delalloc = delalloc; - __entry->unmapped = unmapped; __entry->unwritten = unwritten; ), TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " - "delalloc %d unmapped %d unwritten %d", + "delalloc %d unwritten %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->pgoff, __entry->size, __entry->offset, __entry->delalloc, - __entry->unmapped, __entry->unwritten) ) diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 585e7633dfc..e1a2f6800e0 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -23,25 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_itable.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -64,8 +54,6 @@ flush lock - ditto. */ -STATIC void xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *); - #ifdef DEBUG xfs_buftarg_t *xfs_dqerror_target; int xfs_do_dqerror; @@ -390,21 +378,14 @@ xfs_qm_dqalloc( return (ESRCH); } - /* - * xfs_trans_commit normally decrements the vnode ref count - * when it unlocks the inode. Since we want to keep the quota - * inode around, we bump the vnode ref count now. - */ - IHOLD(quotip); - - xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL); nmaps = 1; if ((error = xfs_bmapi(tp, quotip, offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), - &map, &nmaps, &flist, NULL))) { + &map, &nmaps, &flist))) { goto error0; } ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); @@ -520,7 +501,7 @@ xfs_qm_dqtobp( error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, - NULL, 0, &map, &nmaps, NULL, NULL); + NULL, 0, &map, &nmaps, NULL); xfs_iunlock(quotip, XFS_ILOCK_SHARED); if (error) @@ -1141,6 +1122,46 @@ xfs_qm_dqrele( xfs_qm_dqput(dqp); } +/* + * This is the dquot flushing I/O completion routine. It is called + * from interrupt level when the buffer containing the dquot is + * flushed to disk. It is responsible for removing the dquot logitem + * from the AIL if it has not been re-logged, and unlocking the dquot's + * flush lock. This behavior is very similar to that of inodes.. + */ +STATIC void +xfs_qm_dqflush_done( + struct xfs_buf *bp, + struct xfs_log_item *lip) +{ + xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; + xfs_dquot_t *dqp = qip->qli_dquot; + struct xfs_ail *ailp = lip->li_ailp; + + /* + * We only want to pull the item from the AIL if its + * location in the log has not changed since we started the flush. + * Thus, we only bother if the dquot's lsn has + * not changed. First we check the lsn outside the lock + * since it's cheaper, and then we recheck while + * holding the lock before removing the dquot from the AIL. + */ + if ((lip->li_flags & XFS_LI_IN_AIL) && + lip->li_lsn == qip->qli_flush_lsn) { + + /* xfs_trans_ail_delete() drops the AIL lock. */ + spin_lock(&ailp->xa_lock); + if (lip->li_lsn == qip->qli_flush_lsn) + xfs_trans_ail_delete(ailp, lip); + else + spin_unlock(&ailp->xa_lock); + } + + /* + * Release the dq's flush lock since we're done with it. + */ + xfs_dqfunlock(dqp); +} /* * Write a modified dquot to disk. @@ -1222,8 +1243,9 @@ xfs_qm_dqflush( * Attach an iodone routine so that we can remove this dquot from the * AIL and release the flush lock once the dquot is synced to disk. */ - xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *)) - xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item)); + xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, + &dqp->q_logitem.qli_item); + /* * If the buffer is pinned then push on the log so we won't * get stuck waiting in the write for too long. @@ -1247,50 +1269,6 @@ xfs_qm_dqflush( } -/* - * This is the dquot flushing I/O completion routine. It is called - * from interrupt level when the buffer containing the dquot is - * flushed to disk. It is responsible for removing the dquot logitem - * from the AIL if it has not been re-logged, and unlocking the dquot's - * flush lock. This behavior is very similar to that of inodes.. - */ -/*ARGSUSED*/ -STATIC void -xfs_qm_dqflush_done( - xfs_buf_t *bp, - xfs_dq_logitem_t *qip) -{ - xfs_dquot_t *dqp; - struct xfs_ail *ailp; - - dqp = qip->qli_dquot; - ailp = qip->qli_item.li_ailp; - - /* - * We only want to pull the item from the AIL if its - * location in the log has not changed since we started the flush. - * Thus, we only bother if the dquot's lsn has - * not changed. First we check the lsn outside the lock - * since it's cheaper, and then we recheck while - * holding the lock before removing the dquot from the AIL. - */ - if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && - qip->qli_item.li_lsn == qip->qli_flush_lsn) { - - /* xfs_trans_ail_delete() drops the AIL lock. */ - spin_lock(&ailp->xa_lock); - if (qip->qli_item.li_lsn == qip->qli_flush_lsn) - xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip); - else - spin_unlock(&ailp->xa_lock); - } - - /* - * Release the dq's flush lock since we're done with it. - */ - xfs_dqfunlock(dqp); -} - int xfs_qm_dqlock_nowait( xfs_dquot_t *dqp) diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 8d89a24ae32..2a1f3dc10a0 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -23,42 +23,36 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_itable.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" +static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_dq_logitem, qli_item); +} + /* * returns the number of iovecs needed to log the given dquot item. */ -/* ARGSUSED */ STATIC uint xfs_qm_dquot_logitem_size( - xfs_dq_logitem_t *logitem) + struct xfs_log_item *lip) { /* * we need only two iovecs, one for the format, one for the real thing */ - return (2); + return 2; } /* @@ -66,22 +60,21 @@ xfs_qm_dquot_logitem_size( */ STATIC void xfs_qm_dquot_logitem_format( - xfs_dq_logitem_t *logitem, - xfs_log_iovec_t *logvec) + struct xfs_log_item *lip, + struct xfs_log_iovec *logvec) { - ASSERT(logitem); - ASSERT(logitem->qli_dquot); + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; + logvec->i_addr = &qlip->qli_format; logvec->i_len = sizeof(xfs_dq_logformat_t); logvec->i_type = XLOG_REG_TYPE_QFORMAT; logvec++; - logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; + logvec->i_addr = &qlip->qli_dquot->q_core; logvec->i_len = sizeof(xfs_disk_dquot_t); logvec->i_type = XLOG_REG_TYPE_DQUOT; - ASSERT(2 == logitem->qli_item.li_desc->lid_size); - logitem->qli_format.qlf_size = 2; + ASSERT(2 == lip->li_desc->lid_size); + qlip->qli_format.qlf_size = 2; } @@ -90,9 +83,9 @@ xfs_qm_dquot_logitem_format( */ STATIC void xfs_qm_dquot_logitem_pin( - xfs_dq_logitem_t *logitem) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp = logitem->qli_dquot; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); atomic_inc(&dqp->q_pincount); @@ -104,27 +97,18 @@ xfs_qm_dquot_logitem_pin( * dquot must have been previously pinned with a call to * xfs_qm_dquot_logitem_pin(). */ -/* ARGSUSED */ STATIC void xfs_qm_dquot_logitem_unpin( - xfs_dq_logitem_t *logitem) + struct xfs_log_item *lip, + int remove) { - xfs_dquot_t *dqp = logitem->qli_dquot; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; ASSERT(atomic_read(&dqp->q_pincount) > 0); if (atomic_dec_and_test(&dqp->q_pincount)) wake_up(&dqp->q_pinwait); } -/* ARGSUSED */ -STATIC void -xfs_qm_dquot_logitem_unpin_remove( - xfs_dq_logitem_t *logitem, - xfs_trans_t *tp) -{ - xfs_qm_dquot_logitem_unpin(logitem); -} - /* * Given the logitem, this writes the corresponding dquot entry to disk * asynchronously. This is called with the dquot entry securely locked; @@ -133,12 +117,10 @@ xfs_qm_dquot_logitem_unpin_remove( */ STATIC void xfs_qm_dquot_logitem_push( - xfs_dq_logitem_t *logitem) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp; - int error; - - dqp = logitem->qli_dquot; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); @@ -160,27 +142,25 @@ xfs_qm_dquot_logitem_push( xfs_dqunlock(dqp); } -/*ARGSUSED*/ STATIC xfs_lsn_t xfs_qm_dquot_logitem_committed( - xfs_dq_logitem_t *l, + struct xfs_log_item *lip, xfs_lsn_t lsn) { /* * We always re-log the entire dquot when it becomes dirty, * so, the latest copy _is_ the only one that matters. */ - return (lsn); + return lsn; } - /* * This is called to wait for the given dquot to be unpinned. * Most of these pin/unpin routines are plagiarized from inode code. */ void xfs_qm_dqunpin_wait( - xfs_dquot_t *dqp) + struct xfs_dquot *dqp) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); if (atomic_read(&dqp->q_pincount) == 0) @@ -206,13 +186,12 @@ xfs_qm_dqunpin_wait( */ STATIC void xfs_qm_dquot_logitem_pushbuf( - xfs_dq_logitem_t *qip) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp; - xfs_mount_t *mp; - xfs_buf_t *bp; + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + struct xfs_dquot *dqp = qlip->qli_dquot; + struct xfs_buf *bp; - dqp = qip->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* @@ -220,22 +199,20 @@ xfs_qm_dquot_logitem_pushbuf( * inode flush completed and the inode was taken off the AIL. * So, just get out. */ - if (completion_done(&dqp->q_flush) || - ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { + if (completion_done(&dqp->q_flush) || + !(lip->li_flags & XFS_LI_IN_AIL)) { xfs_dqunlock(dqp); return; } - mp = dqp->q_mount; - bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, - mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); + + bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, + dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); xfs_dqunlock(dqp); if (!bp) return; if (XFS_BUF_ISDELAYWRITE(bp)) xfs_buf_delwri_promote(bp); xfs_buf_relse(bp); - return; - } /* @@ -250,15 +227,14 @@ xfs_qm_dquot_logitem_pushbuf( */ STATIC uint xfs_qm_dquot_logitem_trylock( - xfs_dq_logitem_t *qip) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - dqp = qip->qli_dquot; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; - if (! xfs_qm_dqlock_nowait(dqp)) + if (!xfs_qm_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; if (!xfs_dqflock_nowait(dqp)) { @@ -269,11 +245,10 @@ xfs_qm_dquot_logitem_trylock( return XFS_ITEM_PUSHBUF; } - ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); + ASSERT(lip->li_flags & XFS_LI_IN_AIL); return XFS_ITEM_SUCCESS; } - /* * Unlock the dquot associated with the log item. * Clear the fields of the dquot and dquot log item that @@ -282,12 +257,10 @@ xfs_qm_dquot_logitem_trylock( */ STATIC void xfs_qm_dquot_logitem_unlock( - xfs_dq_logitem_t *ql) + struct xfs_log_item *lip) { - xfs_dquot_t *dqp; + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - ASSERT(ql != NULL); - dqp = ql->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* @@ -304,43 +277,32 @@ xfs_qm_dquot_logitem_unlock( xfs_dqunlock(dqp); } - /* * this needs to stamp an lsn into the dquot, I think. * rpc's that look at user dquot's would then have to * push on the dependency recorded in the dquot */ -/* ARGSUSED */ STATIC void xfs_qm_dquot_logitem_committing( - xfs_dq_logitem_t *l, + struct xfs_log_item *lip, xfs_lsn_t lsn) { - return; } - /* * This is the ops vector for dquots */ static struct xfs_item_ops xfs_dquot_item_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_qm_dquot_logitem_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) - xfs_qm_dquot_logitem_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*)) - xfs_qm_dquot_logitem_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_dquot_logitem_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push, - .iop_pushbuf = (void(*)(xfs_log_item_t*)) - xfs_qm_dquot_logitem_pushbuf, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_dquot_logitem_committing + .iop_size = xfs_qm_dquot_logitem_size, + .iop_format = xfs_qm_dquot_logitem_format, + .iop_pin = xfs_qm_dquot_logitem_pin, + .iop_unpin = xfs_qm_dquot_logitem_unpin, + .iop_trylock = xfs_qm_dquot_logitem_trylock, + .iop_unlock = xfs_qm_dquot_logitem_unlock, + .iop_committed = xfs_qm_dquot_logitem_committed, + .iop_push = xfs_qm_dquot_logitem_push, + .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, + .iop_committing = xfs_qm_dquot_logitem_committing }; /* @@ -350,10 +312,9 @@ static struct xfs_item_ops xfs_dquot_item_ops = { */ void xfs_qm_dquot_logitem_init( - struct xfs_dquot *dqp) + struct xfs_dquot *dqp) { - xfs_dq_logitem_t *lp; - lp = &dqp->q_logitem; + struct xfs_dq_logitem *lp = &dqp->q_logitem; xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, &xfs_dquot_item_ops); @@ -374,16 +335,22 @@ xfs_qm_dquot_logitem_init( /*------------------ QUOTAOFF LOG ITEMS -------------------*/ +static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_qoff_logitem, qql_item); +} + + /* * This returns the number of iovecs needed to log the given quotaoff item. * We only need 1 iovec for an quotaoff item. It just logs the * quotaoff_log_format structure. */ -/*ARGSUSED*/ STATIC uint -xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_size( + struct xfs_log_item *lip) { - return (1); + return 1; } /* @@ -394,53 +361,46 @@ xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf) * slots in the quotaoff item have been filled. */ STATIC void -xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, - xfs_log_iovec_t *log_vector) +xfs_qm_qoff_logitem_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *log_vector) { - ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF); + struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); + + ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); - log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); + log_vector->i_addr = &qflip->qql_format; log_vector->i_len = sizeof(xfs_qoff_logitem_t); log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; - qf->qql_format.qf_size = 1; + qflip->qql_format.qf_size = 1; } - /* * Pinning has no meaning for an quotaoff item, so just return. */ -/*ARGSUSED*/ STATIC void -xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_pin( + struct xfs_log_item *lip) { - return; } - /* * Since pinning has no meaning for an quotaoff item, unpinning does * not either. */ -/*ARGSUSED*/ STATIC void -xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_unpin( + struct xfs_log_item *lip, + int remove) { - return; -} - -/*ARGSUSED*/ -STATIC void -xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp) -{ - return; } /* * Quotaoff items have no locking, so just return success. */ -/*ARGSUSED*/ STATIC uint -xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_trylock( + struct xfs_log_item *lip) { return XFS_ITEM_LOCKED; } @@ -449,53 +409,51 @@ xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf) * Quotaoff items have no locking or pushing, so return failure * so that the caller doesn't bother with us. */ -/*ARGSUSED*/ STATIC void -xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_unlock( + struct xfs_log_item *lip) { - return; } /* * The quotaoff-start-item is logged only once and cannot be moved in the log, * so simply return the lsn at which it's been logged. */ -/*ARGSUSED*/ STATIC xfs_lsn_t -xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn) +xfs_qm_qoff_logitem_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) { - return (lsn); + return lsn; } /* * There isn't much you can do to push on an quotaoff item. It is simply * stuck waiting for the log to be flushed to disk. */ -/*ARGSUSED*/ STATIC void -xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf) +xfs_qm_qoff_logitem_push( + struct xfs_log_item *lip) { - return; } -/*ARGSUSED*/ STATIC xfs_lsn_t xfs_qm_qoffend_logitem_committed( - xfs_qoff_logitem_t *qfe, - xfs_lsn_t lsn) + struct xfs_log_item *lip, + xfs_lsn_t lsn) { - xfs_qoff_logitem_t *qfs; - struct xfs_ail *ailp; + struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); + struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; + struct xfs_ail *ailp = qfs->qql_item.li_ailp; - qfs = qfe->qql_start_lip; - ailp = qfs->qql_item.li_ailp; - spin_lock(&ailp->xa_lock); /* * Delete the qoff-start logitem from the AIL. * xfs_trans_ail_delete() drops the AIL lock. */ + spin_lock(&ailp->xa_lock); xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); + kmem_free(qfs); kmem_free(qfe); return (xfs_lsn_t)-1; @@ -515,71 +473,52 @@ xfs_qm_qoffend_logitem_committed( * (truly makes the quotaoff irrevocable). If we do something else, * then maybe we don't need two. */ -/* ARGSUSED */ -STATIC void -xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) -{ - return; -} - -/* ARGSUSED */ STATIC void -xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) +xfs_qm_qoff_logitem_committing( + struct xfs_log_item *lip, + xfs_lsn_t commit_lsn) { - return; } static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_qm_qoff_logitem_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) - xfs_qm_qoff_logitem_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_qoffend_logitem_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, - .iop_pushbuf = NULL, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_qoffend_logitem_committing + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_pin = xfs_qm_qoff_logitem_pin, + .iop_unpin = xfs_qm_qoff_logitem_unpin, + .iop_trylock = xfs_qm_qoff_logitem_trylock, + .iop_unlock = xfs_qm_qoff_logitem_unlock, + .iop_committed = xfs_qm_qoffend_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, + .iop_committing = xfs_qm_qoff_logitem_committing }; /* * This is the ops vector shared by all quotaoff-start log items. */ static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { - .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, - .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) - xfs_qm_qoff_logitem_format, - .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, - .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin, - .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) - xfs_qm_qoff_logitem_unpin_remove, - .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, - .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock, - .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_qoff_logitem_committed, - .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push, - .iop_pushbuf = NULL, - .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) - xfs_qm_qoff_logitem_committing + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_pin = xfs_qm_qoff_logitem_pin, + .iop_unpin = xfs_qm_qoff_logitem_unpin, + .iop_trylock = xfs_qm_qoff_logitem_trylock, + .iop_unlock = xfs_qm_qoff_logitem_unlock, + .iop_committed = xfs_qm_qoff_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, + .iop_committing = xfs_qm_qoff_logitem_committing }; /* * Allocate and initialize an quotaoff item of the correct quota type(s). */ -xfs_qoff_logitem_t * +struct xfs_qoff_logitem * xfs_qm_qoff_logitem_init( - struct xfs_mount *mp, - xfs_qoff_logitem_t *start, - uint flags) + struct xfs_mount *mp, + struct xfs_qoff_logitem *start, + uint flags) { - xfs_qoff_logitem_t *qf; + struct xfs_qoff_logitem *qf; - qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); + qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP); xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); @@ -587,5 +526,5 @@ xfs_qm_qoff_logitem_init( qf->qql_format.qf_type = XFS_LI_QUOTAOFF; qf->qql_format.qf_flags = flags; qf->qql_start_lip = start; - return (qf); + return qf; } diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 2d8b7bc792c..9a92407109a 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -23,25 +23,18 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -69,7 +62,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(int, gfp_t); +STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); static struct shrinker xfs_qm_shaker = { .shrink = xfs_qm_shake, @@ -1497,7 +1490,7 @@ xfs_qm_dqiterate( maxlblkcnt - lblkno, XFS_BMAPI_METADATA, NULL, - 0, map, &nmaps, NULL, NULL); + 0, map, &nmaps, NULL); xfs_iunlock(qip, XFS_ILOCK_SHARED); if (error) break; @@ -1632,10 +1625,7 @@ xfs_qm_dqusage_adjust( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* not used */ int ubsize, /* not used */ - void *private_data, /* not used */ - xfs_daddr_t bno, /* starting block of inode cluster */ int *ubused, /* not used */ - void *dip, /* on-disk inode pointer (not used) */ int *res) /* result code value */ { xfs_inode_t *ip; @@ -1660,7 +1650,7 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) { + if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) { *res = BULKSTAT_RV_NOTHING; return error; } @@ -1672,7 +1662,8 @@ xfs_qm_dqusage_adjust( * making us disable quotas for the file system. */ if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) { - xfs_iput(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); *res = BULKSTAT_RV_GIVEUP; return error; } @@ -1685,7 +1676,8 @@ xfs_qm_dqusage_adjust( * Walk thru the extent list and count the realtime blocks. */ if ((error = xfs_qm_get_rtblks(ip, &rtblks))) { - xfs_iput(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); if (udqp) xfs_qm_dqput(udqp); if (gdqp) @@ -1796,12 +1788,13 @@ xfs_qm_quotacheck( * Iterate thru all the inodes in the file system, * adjusting the corresponding dquot counters in core. */ - if ((error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_dqusage_adjust, NULL, - structsz, NULL, BULKSTAT_FG_IGET, &done))) + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_dqusage_adjust, + structsz, NULL, &done); + if (error) break; - } while (! done); + } while (!done); /* * We've made all the changes that we need to make incore. @@ -1889,14 +1882,14 @@ xfs_qm_init_quotainos( mp->m_sb.sb_uquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_uquotino > 0); if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip, 0))) + 0, 0, &uip))) return XFS_ERROR(error); } if (XFS_IS_OQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip, 0))) { + 0, 0, &gip))) { if (uip) IRELE(uip); return XFS_ERROR(error); @@ -2119,7 +2112,10 @@ xfs_qm_shake_freelist( */ /* ARGSUSED */ STATIC int -xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) +xfs_qm_shake( + struct shrinker *shrink, + int nr_to_scan, + gfp_t gfp_mask) { int ndqused, nfree, n; diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 97b410c1279..bea02d786c5 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -23,25 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" -#include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 3d1fc79532e..8671a0b3264 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c @@ -23,25 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_bmap.h" -#include "xfs_btree.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 92b002f1805..45e5849df23 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -26,25 +26,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_bmap.h" -#include "xfs_btree.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -248,40 +238,74 @@ out_unlock: return error; } +STATIC int +xfs_qm_scall_trunc_qfile( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + struct xfs_inode *ip; + struct xfs_trans *tp; + int error; + + if (ino == NULLFSINO) + return 0; + + error = xfs_iget(mp, NULL, ino, 0, 0, &ip); + if (error) + return error; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + goto out_put; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip); + + error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1); + if (error) { + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | + XFS_TRANS_ABORT); + goto out_unlock; + } + + xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); +out_put: + IRELE(ip); + return error; +} + int xfs_qm_scall_trunc_qfiles( xfs_mount_t *mp, uint flags) { int error = 0, error2 = 0; - xfs_inode_t *qip; if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); return XFS_ERROR(EINVAL); } - if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { - error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); - if (!error) { - error = xfs_truncate_file(mp, qip); - IRELE(qip); - } - } - - if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && - mp->m_sb.sb_gquotino != NULLFSINO) { - error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); - if (!error2) { - error2 = xfs_truncate_file(mp, qip); - IRELE(qip); - } - } + if (flags & XFS_DQ_USER) + error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); + if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) + error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); return error ? error : error2; } - /* * Switch on (a given) quota enforcement for a filesystem. This takes * effect immediately. @@ -417,12 +441,12 @@ xfs_qm_scall_getqstat( } if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip, 0) == 0) + 0, 0, &uip) == 0) tempuqip = B_TRUE; } if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip, 0) == 0) + 0, 0, &gip) == 0) tempgqip = B_TRUE; } if (uip) { @@ -786,9 +810,9 @@ xfs_qm_export_dquot( } #ifdef DEBUG - if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) || + if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || (XFS_IS_OQUOTA_ENFORCED(mp) && - (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) && + (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && dst->d_id != 0) { if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && (dst->d_blk_softlimit > 0)) { @@ -809,17 +833,17 @@ xfs_qm_export_qtype_flags( /* * Can't be more than one, or none. */ - ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) != - (XFS_PROJ_QUOTA | XFS_USER_QUOTA)); - ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) != - (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)); - ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) != - (XFS_USER_QUOTA | XFS_GROUP_QUOTA)); - ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0); + ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != + (FS_PROJ_QUOTA | FS_USER_QUOTA)); + ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != + (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); + ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != + (FS_USER_QUOTA | FS_GROUP_QUOTA)); + ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); return (flags & XFS_DQ_USER) ? - XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? - XFS_PROJ_QUOTA : XFS_GROUP_QUOTA; + FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? + FS_PROJ_QUOTA : FS_GROUP_QUOTA; } STATIC uint @@ -830,16 +854,16 @@ xfs_qm_export_flags( uflags = 0; if (flags & XFS_UQUOTA_ACCT) - uflags |= XFS_QUOTA_UDQ_ACCT; + uflags |= FS_QUOTA_UDQ_ACCT; if (flags & XFS_PQUOTA_ACCT) - uflags |= XFS_QUOTA_PDQ_ACCT; + uflags |= FS_QUOTA_PDQ_ACCT; if (flags & XFS_GQUOTA_ACCT) - uflags |= XFS_QUOTA_GDQ_ACCT; + uflags |= FS_QUOTA_GDQ_ACCT; if (flags & XFS_UQUOTA_ENFD) - uflags |= XFS_QUOTA_UDQ_ENFD; + uflags |= FS_QUOTA_UDQ_ENFD; if (flags & (XFS_OQUOTA_ENFD)) { uflags |= (flags & XFS_GQUOTA_ACCT) ? - XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD; + FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; } return (uflags); } @@ -875,8 +899,9 @@ xfs_dqrele_inode( xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } - xfs_iput(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); return 0; } @@ -1109,10 +1134,7 @@ xfs_qm_internalqcheck_adjust( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* not used */ int ubsize, /* not used */ - void *private_data, /* not used */ - xfs_daddr_t bno, /* starting block of inode cluster */ int *ubused, /* not used */ - void *dip, /* not used */ int *res) /* bulkstat result code */ { xfs_inode_t *ip; @@ -1134,7 +1156,7 @@ xfs_qm_internalqcheck_adjust( ipreleased = B_FALSE; again: lock_flags = XFS_ILOCK_SHARED; - if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) { + if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) { *res = BULKSTAT_RV_NOTHING; return (error); } @@ -1146,7 +1168,8 @@ xfs_qm_internalqcheck_adjust( * of those now. */ if (! ipreleased) { - xfs_iput(ip, lock_flags); + xfs_iunlock(ip, lock_flags); + IRELE(ip); ipreleased = B_TRUE; goto again; } @@ -1163,7 +1186,8 @@ xfs_qm_internalqcheck_adjust( ASSERT(gd); xfs_qm_internalqcheck_dqadjust(ip, gd); } - xfs_iput(ip, lock_flags); + xfs_iunlock(ip, lock_flags); + IRELE(ip); *res = BULKSTAT_RV_DIDONE; return (0); } @@ -1205,15 +1229,15 @@ xfs_qm_internalqcheck( * Iterate thru all the inodes in the file system, * adjusting the corresponding dquot counters */ - if ((error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_internalqcheck_adjust, NULL, - 0, NULL, BULKSTAT_FG_IGET, &done))) { + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_internalqcheck_adjust, + 0, NULL, &done); + if (error) { + cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); break; } - } while (! done); - if (error) { - cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); - } + } while (!done); + cmn_err(CE_DEBUG, "Checking results against system dquots"); for (i = 0; i < qmtest_hashmask; i++) { xfs_dqtest_t *d, *n; diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 061d827da33..7de91d1b75c 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -23,25 +23,15 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" #include "xfs_alloc.h" -#include "xfs_dmapi.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dir2_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" #include "xfs_itable.h" -#include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_rw.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" @@ -59,16 +49,14 @@ xfs_trans_dqjoin( xfs_trans_t *tp, xfs_dquot_t *dqp) { - xfs_dq_logitem_t *lp = &dqp->q_logitem; - ASSERT(dqp->q_transp != tp); ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(lp->qli_dquot == dqp); + ASSERT(dqp->q_logitem.qli_dquot == dqp); /* * Get a log_item_desc to point at the new item. */ - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp)); + xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); /* * Initialize i_transp so we can later determine if this dquot is @@ -93,16 +81,11 @@ xfs_trans_log_dquot( xfs_trans_t *tp, xfs_dquot_t *dqp) { - xfs_log_item_desc_t *lidp; - ASSERT(dqp->q_transp == tp); ASSERT(XFS_DQ_IS_LOCKED(dqp)); - lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem)); - ASSERT(lidp != NULL); - tp->t_flags |= XFS_TRANS_DIRTY; - lidp->lid_flags |= XFS_LID_DIRTY; + dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY; } /* @@ -874,9 +857,8 @@ xfs_trans_get_qoff_item( /* * Get a log_item_desc to point at the new item. */ - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)q); - - return (q); + xfs_trans_add_item(tp, &q->qql_item); + return q; } @@ -890,13 +872,8 @@ xfs_trans_log_quotaoff_item( xfs_trans_t *tp, xfs_qoff_logitem_t *qlp) { - xfs_log_item_desc_t *lidp; - - lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp); - ASSERT(lidp != NULL); - tp->t_flags |= XFS_TRANS_DIRTY; - lidp->lid_flags |= XFS_LID_DIRTY; + qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY; } STATIC void diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 3f3610a7ee0..975aa10e1a4 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -22,7 +22,6 @@ #include "xfs_sb.h" #include "xfs_inum.h" #include "xfs_ag.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_error.h" diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index a7fbe8a99b1..af168faccc7 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -24,18 +24,13 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -688,8 +683,6 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t ltbno; /* start bno of left side entry */ xfs_agblock_t ltbnoa; /* aligned ... */ xfs_extlen_t ltdiff; /* difference to left side entry */ - /*REFERENCED*/ - xfs_agblock_t ltend; /* end bno of left side entry */ xfs_extlen_t ltlen; /* length of left side entry */ xfs_extlen_t ltlena; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ @@ -814,8 +807,7 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - ltend = ltbno + ltlen; - ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; if (!xfs_alloc_fix_minleft(args)) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); @@ -828,7 +820,7 @@ xfs_alloc_ag_vextent_near( */ args->agbno = bnew; ASSERT(bnew >= ltbno); - ASSERT(bnew + blen <= ltend); + ASSERT(bnew + blen <= ltbno + ltlen); /* * Set up a cursor for the by-bno tree. */ @@ -1157,7 +1149,6 @@ xfs_alloc_ag_vextent_near( /* * Fix up the length and compute the useful address. */ - ltend = ltbno + ltlen; args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) { @@ -1170,7 +1161,7 @@ xfs_alloc_ag_vextent_near( (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, ltlen, <new); ASSERT(ltnew >= ltbno); - ASSERT(ltnew + rlen <= ltend); + ASSERT(ltnew + rlen <= ltbno + ltlen); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->agbno = ltnew; if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 6d05199b667..895009a9727 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -27,16 +27,16 @@ struct xfs_busy_extent; /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. */ -typedef enum xfs_alloctype -{ - XFS_ALLOCTYPE_ANY_AG, /* allocate anywhere, use rotor */ - XFS_ALLOCTYPE_FIRST_AG, /* ... start at ag 0 */ - XFS_ALLOCTYPE_START_AG, /* anywhere, start in this a.g. */ - XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. |