aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Makefile4
-rw-r--r--fs/9p/fid.c111
-rw-r--r--fs/9p/v9fs.c3
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h1
-rw-r--r--fs/9p/vfs_dir.c136
-rw-r--r--fs/9p/vfs_file.c26
-rw-r--r--fs/9p/vfs_inode.c757
-rw-r--r--fs/9p/vfs_super.c50
-rw-r--r--fs/9p/xattr.c160
-rw-r--r--fs/9p/xattr.h27
-rw-r--r--fs/9p/xattr_user.c80
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/afs/Kconfig1
-rw-r--r--fs/afs/cell.c40
-rw-r--r--fs/afs/main.c9
-rw-r--r--fs/afs/server.c5
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c21
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_flat.c27
-rw-r--r--fs/block_dev.c76
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/file.c12
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/ioctl.c24
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/root-tree.c3
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/cachefiles/namei.c13
-rw-r--r--fs/cachefiles/rdwr.c4
-rw-r--r--fs/ceph/Kconfig2
-rw-r--r--fs/ceph/auth_x.c5
-rw-r--r--fs/ceph/caps.c129
-rw-r--r--fs/ceph/crush/mapper.c41
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c13
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c27
-rw-r--r--fs/ceph/mds_client.c79
-rw-r--r--fs/ceph/mds_client.h7
-rw-r--r--fs/ceph/messenger.c75
-rw-r--r--fs/ceph/mon_client.c11
-rw-r--r--fs/ceph/osd_client.c9
-rw-r--r--fs/ceph/osdmap.c28
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/Kconfig27
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README5
-rw-r--r--fs/cifs/cache.c331
-rw-r--r--fs/cifs/cifs_debug.c25
-rw-r--r--fs/cifs/cifs_dfs_ref.c33
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifs_spnego.c7
-rw-r--r--fs/cifs/cifsfs.c49
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h48
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/connect.c181
-rw-r--r--fs/cifs/dir.c84
-rw-r--r--fs/cifs/dns_resolve.c166
-rw-r--r--fs/cifs/dns_resolve.h2
-rw-r--r--fs/cifs/file.c203
-rw-r--r--fs/cifs/fscache.c236
-rw-r--r--fs/cifs/fscache.h136
-rw-r--r--fs/cifs/inode.c67
-rw-r--r--fs/cifs/ioctl.c3
-rw-r--r--fs/cifs/misc.c20
-rw-r--r--fs/cifs/netmisc.c63
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/cifs/sess.c10
-rw-r--r--fs/cifs/smberr.h1
-rw-r--r--fs/compat.c4
-rw-r--r--fs/compat_ioctl.c11
-rw-r--r--fs/configfs/inode.c9
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/direct-io.c26
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/dlm/netlink.c15
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/messaging.c17
-rw-r--r--fs/exec.c1
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/Kconfig1
-rw-r--r--fs/ext3/acl.c1
-rw-r--r--fs/ext3/inode.c80
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c17
-rw-r--r--fs/ext4/acl.c1
-rw-r--r--fs/ext4/balloc.c6
-rw-r--r--fs/ext4/block_validity.c8
-rw-r--r--fs/ext4/dir.c23
-rw-r--r--fs/ext4/ext4.h152
-rw-r--r--fs/ext4/ext4_jbd2.c71
-rw-r--r--fs/ext4/ext4_jbd2.h56
-rw-r--r--fs/ext4/extents.c20
-rw-r--r--fs/ext4/file.c5
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode.c245
-rw-r--r--fs/ext4/mballoc.c155
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c13
-rw-r--r--fs/ext4/namei.c40
-rw-r--r--fs/ext4/resize.c8
-rw-r--r--fs/ext4/super.c336
-rw-r--r--fs/ext4/xattr.c3
-rw-r--r--fs/fcntl.c13
-rw-r--r--fs/file.c3
-rw-r--r--fs/freevxfs/vxfs_super.c2
-rw-r--r--fs/fs-writeback.c509
-rw-r--r--fs/fscache/Kconfig1
-rw-r--r--fs/fscache/internal.h8
-rw-r--r--fs/fscache/main.c106
-rw-r--r--fs/fscache/object-list.c11
-rw-r--r--fs/fscache/object.c106
-rw-r--r--fs/fscache/operation.c67
-rw-r--r--fs/fscache/page.c72
-rw-r--r--fs/fuse/dev.c229
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/aops.c9
-rw-r--r--fs/gfs2/bmap.c18
-rw-r--r--fs/gfs2/bmap.h2
-rw-r--r--fs/gfs2/dir.c44
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c97
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c12
-rw-r--r--fs/gfs2/main.c14
-rw-r--r--fs/gfs2/ops_fstype.c35
-rw-r--r--fs/gfs2/quota.c35
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/recovery.c54
-rw-r--r--fs/gfs2/recovery.h6
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/gfs2/sys.c60
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd/journal.c7
-rw-r--r--fs/jbd/recovery.c11
-rw-r--r--fs/jbd2/checkpoint.c18
-rw-r--r--fs/jbd2/commit.c50
-rw-r--r--fs/jbd2/journal.c136
-rw-r--r--fs/jbd2/recovery.c10
-rw-r--r--fs/jbd2/transaction.c242
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/dir.c127
-rw-r--r--fs/jffs2/fs.c7
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/libfs.c3
-rw-r--r--fs/mbcache.c5
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/namei.c6
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/Kconfig10
-rw-r--r--fs/nfs/callback_proc.c19
-rw-r--r--fs/nfs/client.c143
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/delegation.h4
-rw-r--r--fs/nfs/dir.c13
-rw-r--r--fs/nfs/direct.c29
-rw-r--r--fs/nfs/file.c64
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c74
-rw-r--r--fs/nfs/internal.h10
-rw-r--r--fs/nfs/nfs2xdr.c7
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4_fs.h57
-rw-r--r--fs/nfs/nfs4proc.c474
-rw-r--r--fs/nfs/nfs4renewd.c4
-rw-r--r--fs/nfs/nfs4state.c82
-rw-r--r--fs/nfs/nfs4xdr.c111
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfs/super.c26
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c39
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/bmap.c6
-rw-r--r--fs/nilfs2/bmap.h16
-rw-r--r--fs/nilfs2/bmap_union.h42
-rw-r--r--fs/nilfs2/btnode.c23
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c914
-rw-r--r--fs/nilfs2/btree.h14
-rw-r--r--fs/nilfs2/dir.c33
-rw-r--r--fs/nilfs2/direct.c96
-rw-r--r--fs/nilfs2/direct.h11
-rw-r--r--fs/nilfs2/gcinode.c17
-rw-r--r--fs/nilfs2/mdt.c1
-rw-r--r--fs/nilfs2/nilfs.h22
-rw-r--r--fs/nilfs2/page.c5
-rw-r--r--fs/nilfs2/page.h2
-rw-r--r--fs/nilfs2/recovery.c348
-rw-r--r--fs/nilfs2/segbuf.h26
-rw-r--r--fs/nilfs2/segment.c19
-rw-r--r--fs/nilfs2/segment.h12
-rw-r--r--fs/nilfs2/super.c341
-rw-r--r--fs/nilfs2/the_nilfs.c161
-rw-r--r--fs/nilfs2/the_nilfs.h23
-rw-r--r--fs/ocfs2/aops.c101
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c6
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c22
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c3
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/file.c309
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/journal.c34
-rw-r--r--fs/ocfs2/localalloc.c7
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/reservations.c1
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ocfs2/xattr.c200
-rw-r--r--fs/open.c11
-rw-r--r--fs/partitions/check.c1
-rw-r--r--fs/partitions/ibm.c14
-rw-r--r--fs/pipe.c93
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/task_nommu.c20
-rw-r--r--fs/quota/dquot.c81
-rw-r--r--fs/quota/quota_tree.c85
-rw-r--r--fs/quota/quota_tree.h6
-rw-r--r--fs/quota/quota_v1.c3
-rw-r--r--fs/quota/quota_v2.c11
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/splice.c11
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/file.c3
-rw-r--r--fs/sysfs/inode.c6
-rw-r--r--fs/sysfs/symlink.c26
-rw-r--r--fs/sysv/ialloc.c6
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/lpt.c14
-rw-r--r--fs/ubifs/lpt_commit.c2
-rw-r--r--fs/ubifs/recovery.c23
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/super.c4
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c604
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c67
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h119
-rw-r--r--fs/xfs/linux-2.6/xfs_dmapi_priv.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c104
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h25
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c34
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c26
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c173
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c188
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h473
-rw-r--r--fs/xfs/quota/xfs_dquot.c114
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c301
-rw-r--r--fs/xfs/quota/xfs_qm.c44
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c10
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c10
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c144
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c35
-rw-r--r--fs/xfs/support/debug.c1
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_alloc.c15
-rw-r--r--fs/xfs/xfs_alloc.h20
-rw-r--r--fs/xfs/xfs_alloc_btree.c5
-rw-r--r--fs/xfs/xfs_attr.c91
-rw-r--r--fs/xfs/xfs_attr_leaf.c5
-rw-r--r--fs/xfs/xfs_bmap.c327
-rw-r--r--fs/xfs/xfs_bmap.h37
-rw-r--r--fs/xfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/xfs_btree.c5
-rw-r--r--fs/xfs/xfs_buf_item.c228
-rw-r--r--fs/xfs/xfs_buf_item.h2
-rw-r--r--fs/xfs/xfs_da_btree.c20
-rw-r--r--fs/xfs/xfs_dfrag.c21
-rw-r--r--fs/xfs/xfs_dir2.c11
-rw-r--r--fs/xfs/xfs_dir2_block.c8
-rw-r--r--fs/xfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/xfs_dir2_leaf.c4
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/xfs_dmapi.h170
-rw-r--r--fs/xfs/xfs_dmops.c55
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c278
-rw-r--r--fs/xfs/xfs_filestream.c84
-rw-r--r--fs/xfs/xfs_filestream.h82
-rw-r--r--fs/xfs/xfs_fsops.c7
-rw-r--r--fs/xfs/xfs_ialloc.c146
-rw-r--r--fs/xfs/xfs_ialloc_btree.c4
-rw-r--r--fs/xfs/xfs_iget.c147
-rw-r--r--fs/xfs/xfs_inode.c207
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_inode_item.c273
-rw-r--r--fs/xfs/xfs_inode_item.h12
-rw-r--r--fs/xfs/xfs_iomap.c76
-rw-r--r--fs/xfs/xfs_iomap.h22
-rw-r--r--fs/xfs/xfs_itable.c293
-rw-r--r--fs/xfs/xfs_itable.h17
-rw-r--r--fs/xfs/xfs_log.c16
-rw-r--r--fs/xfs/xfs_log.h11
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_log_recover.c55
-rw-r--r--fs/xfs/xfs_mount.c73
-rw-r--r--fs/xfs/xfs_mount.h71
-rw-r--r--fs/xfs/xfs_rename.c63
-rw-r--r--fs/xfs/xfs_rtalloc.c17
-rw-r--r--fs/xfs/xfs_rtalloc.h11
-rw-r--r--fs/xfs/xfs_rw.c15
-rw-r--r--fs/xfs/xfs_trans.c657
-rw-r--r--fs/xfs/xfs_trans.h528
-rw-r--r--fs/xfs/xfs_trans_ail.c1
-rw-r--r--fs/xfs/xfs_trans_buf.c75
-rw-r--r--fs/xfs/xfs_trans_extfree.c23
-rw-r--r--fs/xfs/xfs_trans_inode.c76
-rw-r--r--fs/xfs/xfs_trans_item.c441
-rw-r--r--fs/xfs/xfs_trans_priv.h18
-rw-r--r--fs/xfs/xfs_utils.c87
-rw-r--r--fs/xfs/xfs_utils.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c297
347 files changed, 10414 insertions, 9378 deletions
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 1a940ec7af6..91fba025fcb 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_9P_FS) := 9p.o
vfs_dir.o \
vfs_dentry.o \
v9fs.o \
- fid.o
+ fid.o \
+ xattr.o \
+ xattr_user.o
9p-$(CONFIG_9P_FSCACHE) += cache.o
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 7317b39b281..35856368906 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -97,6 +97,34 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
return ret;
}
+/*
+ * We need to hold v9ses->rename_sem as long as we hold references
+ * to returned path array. Array element contain pointers to
+ * dentry names.
+ */
+static int build_path_from_dentry(struct v9fs_session_info *v9ses,
+ struct dentry *dentry, char ***names)
+{
+ int n = 0, i;
+ char **wnames;
+ struct dentry *ds;
+
+ for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
+ n++;
+
+ wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL);
+ if (!wnames)
+ goto err_out;
+
+ for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent)
+ wnames[i] = (char *)ds->d_name.name;
+
+ *names = wnames;
+ return n;
+err_out:
+ return -ENOMEM;
+}
+
/**
* v9fs_fid_lookup - lookup for a fid, try to walk if not found
* @dentry: dentry to look for fid in
@@ -112,7 +140,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
int i, n, l, clone, any, access;
u32 uid;
struct p9_fid *fid, *old_fid = NULL;
- struct dentry *d, *ds;
+ struct dentry *ds;
struct v9fs_session_info *v9ses;
char **wnames, *uname;
@@ -139,49 +167,62 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
fid = v9fs_fid_find(dentry, uid, any);
if (fid)
return fid;
-
+ /*
+ * we don't have a matching fid. To do a TWALK we need
+ * parent fid. We need to prevent rename when we want to
+ * look at the parent.
+ */
+ down_read(&v9ses->rename_sem);
ds = dentry->d_parent;
fid = v9fs_fid_find(ds, uid, any);
- if (!fid) { /* walk from the root */
- n = 0;
- for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
- n++;
+ if (fid) {
+ /* Found the parent fid do a lookup with that */
+ fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1);
+ goto fid_out;
+ }
+ up_read(&v9ses->rename_sem);
- fid = v9fs_fid_find(ds, uid, any);
- if (!fid) { /* the user is not attached to the fs yet */
- if (access == V9FS_ACCESS_SINGLE)
- return ERR_PTR(-EPERM);
+ /* start from the root and try to do a lookup */
+ fid = v9fs_fid_find(dentry->d_sb->s_root, uid, any);
+ if (!fid) {
+ /* the user is not attached to the fs yet */
+ if (access == V9FS_ACCESS_SINGLE)
+ return ERR_PTR(-EPERM);
- if (v9fs_proto_dotu(v9ses))
+ if (v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses))
uname = NULL;
- else
- uname = v9ses->uname;
+ else
+ uname = v9ses->uname;
- fid = p9_client_attach(v9ses->clnt, NULL, uname, uid,
- v9ses->aname);
-
- if (IS_ERR(fid))
- return fid;
-
- v9fs_fid_add(ds, fid);
- }
- } else /* walk from the parent */
- n = 1;
+ fid = p9_client_attach(v9ses->clnt, NULL, uname, uid,
+ v9ses->aname);
+ if (IS_ERR(fid))
+ return fid;
- if (ds == dentry)
+ v9fs_fid_add(dentry->d_sb->s_root, fid);
+ }
+ /* If we are root ourself just return that */
+ if (dentry->d_sb->s_root == dentry)
return fid;
-
- wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL);
- if (!wnames)
- return ERR_PTR(-ENOMEM);
-
- for (d = dentry, i = (n-1); i >= 0; i--, d = d->d_parent)
- wnames[i] = (char *) d->d_name.name;
-
+ /*
+ * Do a multipath walk with attached root.
+ * When walking parent we need to make sure we
+ * don't have a parallel rename happening
+ */
+ down_read(&v9ses->rename_sem);
+ n = build_path_from_dentry(v9ses, dentry, &wnames);
+ if (n < 0) {
+ fid = ERR_PTR(n);
+ goto err_out;
+ }
clone = 1;
i = 0;
while (i < n) {
l = min(n - i, P9_MAXWELEM);
+ /*
+ * We need to hold rename lock when doing a multipath
+ * walk to ensure none of the patch component change
+ */
fid = p9_client_walk(fid, l, &wnames[i], clone);
if (IS_ERR(fid)) {
if (old_fid) {
@@ -193,15 +234,17 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
p9_client_clunk(old_fid);
}
kfree(wnames);
- return fid;
+ goto err_out;
}
old_fid = fid;
i += l;
clone = 0;
}
-
kfree(wnames);
+fid_out:
v9fs_fid_add(dentry, fid);
+err_out:
+ up_read(&v9ses->rename_sem);
return fid;
}
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f8b86e92cd6..38dc0e06759 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -237,6 +237,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
__putname(v9ses->uname);
return ERR_PTR(-ENOMEM);
}
+ init_rwsem(&v9ses->rename_sem);
rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
if (rc) {
@@ -278,7 +279,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
/* for legacy mode, fall back to V9FS_ACCESS_ANY */
- if (!v9fs_proto_dotu(v9ses) &&
+ if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
v9ses->flags &= ~V9FS_ACCESS_MASK;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index bec4d0bcb45..4c963c9fc41 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -104,6 +104,7 @@ struct v9fs_session_info {
struct p9_client *clnt; /* 9p client */
struct list_head slist; /* list of sessions registered with v9fs */
struct backing_dev_info bdi;
+ struct rw_semaphore rename_sem;
};
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 32ef4009d03..f47c6bbb01b 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -55,6 +55,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode);
void v9fs_clear_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
+void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
int v9fs_dir_release(struct inode *inode, struct file *filp);
int v9fs_file_open(struct inode *inode, struct file *file);
void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index d61e3b28ce3..16c8a2a98c1 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -87,29 +87,19 @@ static void p9stat_init(struct p9_wstat *stbuf)
}
/**
- * v9fs_dir_readdir - read a directory
+ * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir
* @filp: opened file structure
- * @dirent: directory structure ???
- * @filldir: function to populate directory structure ???
+ * @buflen: Length in bytes of buffer to allocate
*
*/
-static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int v9fs_alloc_rdir_buf(struct file *filp, int buflen)
{
- int over;
- struct p9_wstat st;
- int err = 0;
- struct p9_fid *fid;
- int buflen;
- int reclen = 0;
struct p9_rdir *rdir;
+ struct p9_fid *fid;
+ int err = 0;
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
fid = filp->private_data;
-
- buflen = fid->clnt->msize - P9_IOHDRSZ;
-
- /* allocate rdir on demand */
if (!fid->rdir) {
rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
@@ -128,6 +118,36 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
spin_unlock(&filp->f_dentry->d_lock);
kfree(rdir);
}
+exit:
+ return err;
+}
+
+/**
+ * v9fs_dir_readdir - read a directory
+ * @filp: opened file structure
+ * @dirent: directory structure ???
+ * @filldir: function to populate directory structure ???
+ *
+ */
+
+static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ int over;
+ struct p9_wstat st;
+ int err = 0;
+ struct p9_fid *fid;
+ int buflen;
+ int reclen = 0;
+ struct p9_rdir *rdir;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+ fid = filp->private_data;
+
+ buflen = fid->clnt->msize - P9_IOHDRSZ;
+
+ err = v9fs_alloc_rdir_buf(filp, buflen);
+ if (err)
+ goto exit;
rdir = (struct p9_rdir *) fid->rdir;
err = mutex_lock_interruptible(&rdir->mutex);
@@ -146,7 +166,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
while (rdir->head < rdir->tail) {
p9stat_init(&st);
err = p9stat_read(rdir->buf + rdir->head,
- buflen - rdir->head, &st,
+ rdir->tail - rdir->head, &st,
fid->clnt->proto_version);
if (err) {
P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
@@ -176,6 +196,88 @@ exit:
return err;
}
+/**
+ * v9fs_dir_readdir_dotl - read a directory
+ * @filp: opened file structure
+ * @dirent: buffer to fill dirent structures
+ * @filldir: function to populate dirent structures
+ *
+ */
+static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ int over;
+ int err = 0;
+ struct p9_fid *fid;
+ int buflen;
+ struct p9_rdir *rdir;
+ struct p9_dirent curdirent;
+ u64 oldoffset = 0;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+ fid = filp->private_data;
+
+ buflen = fid->clnt->msize - P9_READDIRHDRSZ;
+
+ err = v9fs_alloc_rdir_buf(filp, buflen);
+ if (err)
+ goto exit;
+ rdir = (struct p9_rdir *) fid->rdir;
+
+ err = mutex_lock_interruptible(&rdir->mutex);
+ if (err)
+ return err;
+
+ while (err == 0) {
+ if (rdir->tail == rdir->head) {
+ err = p9_client_readdir(fid, rdir->buf, buflen,
+ filp->f_pos);
+ if (err <= 0)
+ goto unlock_and_exit;
+
+ rdir->head = 0;
+ rdir->tail = err;
+ }
+
+ while (rdir->head < rdir->tail) {
+
+ err = p9dirent_read(rdir->buf + rdir->head,
+ buflen - rdir->head, &curdirent,
+ fid->clnt->proto_version);
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
+ err = -EIO;
+ goto unlock_and_exit;
+ }
+
+ /* d_off in dirent structure tracks the offset into
+ * the next dirent in the dir. However, filldir()
+ * expects offset into the current dirent. Hence
+ * while calling filldir send the offset from the
+ * previous dirent structure.
+ */
+ over = filldir(dirent, curdirent.d_name,
+ strlen(curdirent.d_name),
+ oldoffset, v9fs_qid2ino(&curdirent.qid),
+ curdirent.d_type);
+ oldoffset = curdirent.d_off;
+
+ if (over) {
+ err = 0;
+ goto unlock_and_exit;
+ }
+
+ filp->f_pos = curdirent.d_off;
+ rdir->head += err;
+ }
+ }
+
+unlock_and_exit:
+ mutex_unlock(&rdir->mutex);
+exit:
+ return err;
+}
+
/**
* v9fs_dir_release - close a directory
@@ -207,7 +309,7 @@ const struct file_operations v9fs_dir_operations = {
const struct file_operations v9fs_dir_operations_dotl = {
.read = generic_read_dir,
.llseek = generic_file_llseek,
- .readdir = v9fs_dir_readdir,
+ .readdir = v9fs_dir_readdir_dotl,
.open = v9fs_file_open,
.release = v9fs_dir_release,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 2bedc6c94fc..e97c92bd6f1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -59,9 +59,13 @@ int v9fs_file_open(struct inode *inode, struct file *file)
struct p9_fid *fid;
int omode;
- P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
v9ses = v9fs_inode2v9ses(inode);
- omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses));
+ if (v9fs_proto_dotl(v9ses))
+ omode = file->f_flags;
+ else
+ omode = v9fs_uflags2omode(file->f_flags,
+ v9fs_proto_dotu(v9ses));
fid = file->private_data;
if (!fid) {
fid = v9fs_fid_clone(file->f_path.dentry);
@@ -73,11 +77,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
p9_client_clunk(fid);
return err;
}
- if (omode & P9_OTRUNC) {
+ if (file->f_flags & O_TRUNC) {
i_size_write(inode, 0);
inode->i_blocks = 0;
}
- if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses)))
+ if ((file->f_flags & O_APPEND) &&
+ (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
generic_file_llseek(file, 0, SEEK_END);
}
@@ -139,7 +144,7 @@ ssize_t
v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
u64 offset)
{
- int n, total;
+ int n, total, size;
struct p9_fid *fid = filp->private_data;
P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
@@ -147,6 +152,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
n = 0;
total = 0;
+ size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
do {
n = p9_client_read(fid, data, udata, offset, count);
if (n <= 0)
@@ -160,7 +166,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
offset += n;
count -= n;
total += n;
- } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ));
+ } while (count > 0 && n == size);
if (n < 0)
total = n;
@@ -183,11 +189,13 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
{
int ret;
struct p9_fid *fid;
+ size_t size;
P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
fid = filp->private_data;
- if (count > (fid->clnt->msize - P9_IOHDRSZ))
+ size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
+ if (count > size)
ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
else
ret = p9_client_read(fid, NULL, udata, *offset, count);
@@ -224,9 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
fid = filp->private_data;
clnt = fid->clnt;
- rsize = fid->iounit;
- if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
- rsize = clnt->msize - P9_IOHDRSZ;
+ rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ;
do {
if (count < rsize)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4331b3b5ee1..6e94f3247ce 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -35,6 +35,7 @@
#include <linux/idr.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/xattr.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -42,6 +43,7 @@
#include "v9fs_vfs.h"
#include "fid.h"
#include "cache.h"
+#include "xattr.h"
static const struct inode_operations v9fs_dir_inode_operations;
static const struct inode_operations v9fs_dir_inode_operations_dotu;
@@ -236,6 +238,41 @@ void v9fs_destroy_inode(struct inode *inode)
#endif
/**
+ * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
+ * new file system object. This checks the S_ISGID to determine the owning
+ * group of the new file system object.
+ */
+
+static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
+{
+ BUG_ON(dir_inode == NULL);
+
+ if (dir_inode->i_mode & S_ISGID) {
+ /* set_gid bit is set.*/
+ return dir_inode->i_gid;
+ }
+ return current_fsgid();
+}
+
+/**
+ * v9fs_dentry_from_dir_inode - helper function to get the dentry from
+ * dir inode.
+ *
+ */
+
+static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
+{
+ struct dentry *dentry;
+
+ spin_lock(&dcache_lock);
+ /* Directory should have only one entry. */
+ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
+ dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+ spin_unlock(&dcache_lock);
+ return dentry;
+}
+
+/**
* v9fs_get_inode - helper function to setup an inode
* @sb: superblock
* @mode: mode to setup inode with
@@ -267,7 +304,13 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
case S_IFBLK:
case S_IFCHR:
case S_IFSOCK:
- if (!v9fs_proto_dotu(v9ses)) {
+ if (v9fs_proto_dotl(v9ses)) {
+ inode->i_op = &v9fs_file_inode_operations_dotl;
+ inode->i_fop = &v9fs_file_operations_dotl;
+ } else if (v9fs_proto_dotu(v9ses)) {
+ inode->i_op = &v9fs_file_inode_operations;
+ inode->i_fop = &v9fs_file_operations;
+ } else {
P9_DPRINTK(P9_DEBUG_ERROR,
"special files without extended mode\n");
err = -EINVAL;
@@ -396,23 +439,14 @@ void v9fs_clear_inode(struct inode *inode)
#endif
}
-/**
- * v9fs_inode_from_fid - populate an inode by issuing a attribute request
- * @v9ses: session information
- * @fid: fid to issue attribute request for
- * @sb: superblock on which to create inode
- *
- */
-
static struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
struct super_block *sb)
{
int err, umode;
- struct inode *ret;
+ struct inode *ret = NULL;
struct p9_wstat *st;
- ret = NULL;
st = p9_client_stat(fid);
if (IS_ERR(st))
return ERR_CAST(st);
@@ -433,15 +467,62 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
#endif
p9stat_free(st);
kfree(st);
-
return ret;
-
error:
p9stat_free(st);
kfree(st);
return ERR_PTR(err);
}
+static struct inode *
+v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ struct inode *ret = NULL;
+ int err;
+ struct p9_stat_dotl *st;
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return ERR_CAST(st);
+
+ ret = v9fs_get_inode(sb, st->st_mode);
+ if (IS_ERR(ret)) {
+ err = PTR_ERR(ret);
+ goto error;
+ }
+
+ v9fs_stat2inode_dotl(st, ret);
+ ret->i_ino = v9fs_qid2ino(&st->qid);
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_vcookie_set_qid(ret, &st->qid);
+ v9fs_cache_inode_get_cookie(ret);
+#endif
+ kfree(st);
+ return ret;
+error:
+ kfree(st);
+ return ERR_PTR(err);
+}
+
+/**
+ * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+static inline struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ if (v9fs_proto_dotl(v9ses))
+ return v9fs_inode_dotl(v9ses, fid, sb);
+ else
+ return v9fs_inode(v9ses, fid, sb);
+}
+
/**
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
@@ -563,6 +644,118 @@ error:
}
/**
+ * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
+ * @dir: directory inode that is being created
+ * @dentry: dentry that is being deleted
+ * @mode: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ int err = 0;
+ char *name = NULL;
+ gid_t gid;
+ int flags;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL;
+ struct p9_fid *dfid, *ofid;
+ struct file *filp;
+ struct p9_qid qid;
+ struct inode *inode;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ if (nd && nd->flags & LOOKUP_OPEN)
+ flags = nd->intent.open.flags - 1;
+ else
+ flags = O_RDWR;
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
+ "mode:0x%x\n", name, flags, mode);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ /* clone a fid to use for creation */
+ ofid = p9_client_walk(dfid, 0, NULL, 1);
+ if (IS_ERR(ofid)) {
+ err = PTR_ERR(ofid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_open_dotl failed in creat %d\n",
+ err);
+ goto error;
+ }
+
+ /* No need to populate the inode if we are not opening the file AND
+ * not in cached mode.
+ */
+ if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) {
+ /* Not in cached mode. No need to populate inode with stat */
+ dentry->d_op = &v9fs_dentry_operations;
+ p9_client_clunk(ofid);
+ d_instantiate(dentry, NULL);
+ return 0;
+ }
+
+ /* Now walk from the parent so we can get an unopened fid. */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ fid = NULL;
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to dentry */
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+
+ /* if we are opening a file, assign the open fid to the file */
+ if (nd && nd->flags & LOOKUP_OPEN) {
+ filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+ if (IS_ERR(filp)) {
+ p9_client_clunk(ofid);
+ return PTR_ERR(filp);
+ }
+ filp->private_data = ofid;
+ } else
+ p9_client_clunk(ofid);
+
+ return 0;
+
+error:
+ if (ofid)
+ p9_client_clunk(ofid);
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+/**
* v9fs_vfs_create - VFS hook to create files
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
@@ -652,6 +845,83 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return err;
}
+
+/**
+ * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
+ * @dir: inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
+ int mode)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ gid_t gid;
+ char *name;
+ struct inode *inode;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+ err = 0;
+ v9ses = v9fs_inode2v9ses(dir);
+
+ mode |= S_IFDIR;
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+ goto error;
+ }
+
+ name = (char *) dentry->d_name.name;
+ err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ }
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
/**
* v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
* @dir: inode that is being walked from
@@ -678,6 +948,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
sb = dir->i_sb;
v9ses = v9fs_inode2v9ses(dir);
+ /* We can walk d_parent because we hold the dir->i_mutex */
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid))
return ERR_CAST(dfid);
@@ -785,27 +1056,33 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto clunk_olddir;
}
+ down_write(&v9ses->rename_sem);
if (v9fs_proto_dotl(v9ses)) {
retval = p9_client_rename(oldfid, newdirfid,
(char *) new_dentry->d_name.name);
if (retval != -ENOSYS)
goto clunk_newdir;
}
+ if (old_dentry->d_parent != new_dentry->d_parent) {
+ /*
+ * 9P .u can only handle file rename in the same directory
+ */
- /* 9P can only handle file rename in the same directory */
- if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
P9_DPRINTK(P9_DEBUG_ERROR,
"old dir and new dir are different\n");
retval = -EXDEV;
goto clunk_newdir;
}
-
v9fs_blank_wstat(&wstat);
wstat.muid = v9ses->uname;
wstat.name = (char *) new_dentry->d_name.name;
retval = p9_client_wstat(oldfid, &wstat);
clunk_newdir:
+ if (!retval)
+ /* successful rename */
+ d_move(old_dentry, new_dentry);
+ up_write(&v9ses->rename_sem);
p9_client_clunk(newdirfid);
clunk_olddir:
@@ -853,6 +1130,42 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
+static int
+v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+ err = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+ return simple_getattr(mnt, dentry, stat);
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ /* Ask for all the fields in stat structure. Server will return
+ * whatever it supports
+ */
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, dentry->d_inode);
+ generic_fillattr(dentry->d_inode, stat);
+ /* Change block size to what the server returned */
+ stat->blksize = st->st_blksize;
+
+ kfree(st);
+ return 0;
+}
+
/**
* v9fs_vfs_setattr - set file metadata
* @dentry: file whose metadata to set
@@ -903,6 +1216,49 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
}
/**
+ * v9fs_vfs_setattr_dotl - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+{
+ int retval;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_iattr_dotl p9attr;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+ retval = inode_change_ok(dentry->d_inode, iattr);
+ if (retval)
+ return retval;
+
+ p9attr.valid = iattr->ia_valid;
+ p9attr.mode = iattr->ia_mode;
+ p9attr.uid = iattr->ia_uid;
+ p9attr.gid = iattr->ia_gid;
+ p9attr.size = iattr->ia_size;
+ p9attr.atime_sec = iattr->ia_atime.tv_sec;
+ p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+ p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+ p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+
+ retval = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ retval = p9_client_setattr(fid, &p9attr);
+ if (retval >= 0)
+ retval = inode_setattr(dentry->d_inode, iattr);
+
+ return retval;
+}
+
+/**
* v9fs_stat2inode - populate an inode structure with mistat info
* @stat: Plan 9 metadata (mistat) structure
* @inode: inode to populate
@@ -980,6 +1336,77 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
}
/**
+ * v9fs_stat2inode_dotl - populate an inode structure with stat info
+ * @stat: stat structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+{
+
+ if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ inode->i_uid = stat->st_uid;
+ inode->i_gid = stat->st_gid;
+ inode->i_nlink = stat->st_nlink;
+ inode->i_mode = stat->st_mode;
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+
+ if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+
+ i_size_write(inode, stat->st_size);
+ inode->i_blocks = stat->st_blocks;
+ } else {
+ if (stat->st_result_mask & P9_STATS_ATIME) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_MTIME) {
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_CTIME) {
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_UID)
+ inode->i_uid = stat->st_uid;
+ if (stat->st_result_mask & P9_STATS_GID)
+ inode->i_gid = stat->st_gid;
+ if (stat->st_result_mask & P9_STATS_NLINK)
+ inode->i_nlink = stat->st_nlink;
+ if (stat->st_result_mask & P9_STATS_MODE) {
+ inode->i_mode = stat->st_mode;
+ if ((S_ISBLK(inode->i_mode)) ||
+ (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
+ }
+ if (stat->st_result_mask & P9_STATS_RDEV)
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+ if (stat->st_result_mask & P9_STATS_SIZE)
+ i_size_write(inode, stat->st_size);
+ if (stat->st_result_mask & P9_STATS_BLOCKS)
+ inode->i_blocks = stat->st_blocks;
+ }
+ if (stat->st_result_mask & P9_STATS_GEN)
+ inode->i_generation = stat->st_gen;
+
+ /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
+ * because the inode structure does not have fields for them.
+ */
+}
+
+/**
* v9fs_qid2ino - convert qid into inode number
* @qid: qid to hash
*
@@ -1022,7 +1449,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
if (IS_ERR(fid))
return PTR_ERR(fid);
- if (!v9fs_proto_dotu(v9ses))
+ if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
return -EBADF;
st = p9_client_stat(fid);
@@ -1128,6 +1555,99 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
}
/**
+ * v9fs_vfs_symlink_dotl - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See Also: 9P2000.L RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *dfid;
+ struct p9_fid *fid = NULL;
+ struct inode *inode;
+ struct p9_qid qid;
+ char *name;
+ int err;
+ gid_t gid;
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
+ dir->i_ino, name, symname);
+ v9ses = v9fs_inode2v9ses(dir);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
+ goto error;
+ }
+
+ /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
+ err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
+ goto error;
+ }
+
+ if (v9ses->cache) {
+ /* Now walk from the parent so we can get an unopened fid. */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to dentry */
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /* Not in cached mode. No need to populate inode with stat */
+ inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ dentry->d_op = &v9fs_dentry_operations;
+ d_instantiate(dentry, inode);
+ }
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+
+ return err;
+}
+
+/**
* v9fs_vfs_symlink - helper function to create symlinks
* @dir: directory inode containing symlink
* @dentry: dentry for symlink
@@ -1186,6 +1706,76 @@ clunk_fid:
}
/**
+ * v9fs_vfs_link_dotl - create a hardlink for dotl
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @dentry: dentry for link
+ *
+ */
+
+static int
+v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ int err;
+ struct p9_fid *dfid, *oldfid;
+ char *name;
+ struct v9fs_session_info *v9ses;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
+ dir->i_ino, old_dentry->d_name.name,
+ dentry->d_name.name);
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid))
+ return PTR_ERR(dfid);
+
+ oldfid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(oldfid))
+ return PTR_ERR(oldfid);
+
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
+ return err;
+ }
+
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ /* Get the latest stat info from server. */
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ fid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, old_dentry->d_inode);
+
+ kfree(st);
+ } else {
+ /* Caching disabled. No need to get upto date stat info.
+ * This dentry will be released immediately. So, just i_count++
+ */
+ atomic_inc(&old_dentry->d_inode->i_count);
+ }
+
+ dentry->d_op = old_dentry->d_op;
+ d_instantiate(dentry, old_dentry->d_inode);
+
+ return err;
+}
+
+/**
* v9fs_vfs_mknod - create a special file
* @dir: inode destination for new link
* @dentry: dentry for file
@@ -1230,6 +1820,100 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
+/**
+ * v9fs_vfs_mknod_dotl - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @rdev: device associated with special file
+ *
+ */
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
+ dev_t rdev)
+{
+ int err;
+ char *name;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ struct inode *inode;
+ gid_t gid;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS,
+ " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+ dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+
+ if (!new_valid_dev(rdev))
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+ goto error;
+ }
+
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate inode with stat.
+ * socket syscall returns a fd, so we need instantiate
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ dentry->d_op = &v9fs_dentry_operations;
+ d_instantiate(dentry, inode);
+ }
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
@@ -1238,24 +1922,29 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.unlink = v9fs_vfs_unlink,
.mkdir = v9fs_vfs_mkdir,
.rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod,
+ .mknod = v9fs_vfs_mknod_dotl,
.rename = v9fs_vfs_rename,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
static const struct inode_operations v9fs_dir_inode_operations_dotl = {
- .create = v9fs_vfs_create,
+ .create = v9fs_vfs_create_dotl,
.lookup = v9fs_vfs_lookup,
- .symlink = v9fs_vfs_symlink,
- .link = v9fs_vfs_link,
+ .link = v9fs_vfs_link_dotl,
+ .symlink = v9fs_vfs_symlink_dotl,
.unlink = v9fs_vfs_unlink,
- .mkdir = v9fs_vfs_mkdir,
+ .mkdir = v9fs_vfs_mkdir_dotl,
.rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod,
+ .mknod = v9fs_vfs_mknod_dotl,
.rename = v9fs_vfs_rename,
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+
};
static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1276,8 +1965,12 @@ static const struct inode_operations v9fs_file_inode_operations = {
};
static const struct inode_operations v9fs_file_inode_operations_dotl = {
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
};
static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -1292,6 +1985,10 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
.readlink = generic_readlink,
.follow_link = v9fs_vfs_follow_link,
.put_link = v9fs_vfs_put_link,
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index be74d020436..4b9ede0b41b 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -45,6 +45,7 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "fid.h"
+#include "xattr.h"
static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
@@ -77,9 +78,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
sb->s_blocksize = 1 << sb->s_blocksize_bits;
sb->s_magic = V9FS_MAGIC;
- if (v9fs_proto_dotl(v9ses))
+ if (v9fs_proto_dotl(v9ses)) {
sb->s_op = &v9fs_super_ops_dotl;
- else
+ sb->s_xattr = v9fs_xattr_handlers;
+ } else
sb->s_op = &v9fs_super_ops;
sb->s_bdi = &v9ses->bdi;
@@ -107,7 +109,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
struct inode *inode = NULL;
struct dentry *root = NULL;
struct v9fs_session_info *v9ses = NULL;
- struct p9_wstat *st = NULL;
int mode = S_IRWXUGO | S_ISVTX;
struct p9_fid *fid;
int retval = 0;
@@ -124,16 +125,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
goto close_session;
}
- st = p9_client_stat(fid);
- if (IS_ERR(st)) {
- retval = PTR_ERR(st);
- goto clunk_fid;
- }
-
sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
if (IS_ERR(sb)) {
retval = PTR_ERR(sb);
- goto free_stat;
+ goto clunk_fid;
}
v9fs_fill_super(sb, v9ses, flags, data);
@@ -151,22 +146,38 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
}
sb->s_root = root;
- root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode(st, root->d_inode, sb);
+ if (v9fs_proto_dotl(v9ses)) {
+ struct p9_stat_dotl *st = NULL;
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st)) {
+ retval = PTR_ERR(st);
+ goto clunk_fid;
+ }
+
+ v9fs_stat2inode_dotl(st, root->d_inode);
+ kfree(st);
+ } else {
+ struct p9_wstat *st = NULL;
+ st = p9_client_stat(fid);
+ if (IS_ERR(st)) {
+ retval = PTR_ERR(st);
+ goto clunk_fid;
+ }
+
+ root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
+ v9fs_stat2inode(st, root->d_inode, sb);
+
+ p9stat_free(st);
+ kfree(st);
+ }
v9fs_fid_add(root, fid);
- p9stat_free(st);
- kfree(st);
P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
simple_set_mnt(mnt, sb);
return 0;
-free_stat:
- p9stat_free(st);
- kfree(st);
-
clunk_fid:
p9_client_clunk(fid);
@@ -176,8 +187,6 @@ close_session:
return retval;
release_sb:
- p9stat_free(st);
- kfree(st);
deactivate_locked_super(sb);
return retval;
}
@@ -278,4 +287,5 @@ struct file_system_type v9fs_fs_type = {
.get_sb = v9fs_get_sb,
.kill_sb = v9fs_kill_super,
.owner = THIS_MODULE,
+ .fs_flags = FS_RENAME_DOES_D_MOVE,
};
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
new file mode 100644
index 00000000000..f88e5c2dc87
--- /dev/null
+++ b/fs/9p/xattr.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+
+#include "fid.h"
+#include "xattr.h"
+
+/*
+ * v9fs_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ ssize_t retval;
+ int msize, read_count;
+ u64 offset = 0, attr_size;
+ struct p9_fid *fid, *attr_fid;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
+ __func__, name, buffer_size);
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
+ if (IS_ERR(attr_fid)) {
+ retval = PTR_ERR(attr_fid);
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_attrwalk failed %zd\n", retval);
+ attr_fid = NULL;
+ goto error;
+ }
+ if (!buffer_size) {
+ /* request to get the attr_size */
+ retval = attr_size;
+ goto error;
+ }
+ if (attr_size > buffer_size) {
+ retval = -ERANGE;
+ goto error;
+ }
+ msize = attr_fid->clnt->msize;
+ while (attr_size) {
+ if (attr_size > (msize - P9_IOHDRSZ))
+ read_count = msize - P9_IOHDRSZ;
+ else
+ read_count = attr_size;
+ read_count = p9_client_read(attr_fid, ((char *)buffer)+offset,
+ NULL, offset, read_count);
+ if (read_count < 0) {
+ /* error in xattr read */
+ retval = read_count;
+ goto error;
+ }
+ offset += read_count;
+ attr_size -= read_count;
+ }
+ /* Total read xattr bytes */
+ retval = offset;
+error:
+ if (attr_fid)
+ p9_client_clunk(attr_fid);
+ return retval;
+
+}
+
+/*
+ * v9fs_xattr_set()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int v9fs_xattr_set(struct dentry *dentry, const char *name,
+ const void *value, size_t value_len, int flags)
+{
+ u64 offset = 0;
+ int retval, msize, write_count;
+ struct p9_fid *fid = NULL;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n",
+ __func__, name, value_len, flags);
+
+ fid = v9fs_fid_clone(dentry);
+ if (IS_ERR(fid)) {
+ retval = PTR_ERR(fid);
+ fid = NULL;
+ goto error;
+ }
+ /*
+ * On success fid points to xattr
+ */
+ retval = p9_client_xattrcreate(fid, name, value_len, flags);
+ if (retval < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_xattrcreate failed %d\n", retval);
+ goto error;
+ }
+ msize = fid->clnt->msize;;
+ while (value_len) {
+ if (value_len > (msize - P9_IOHDRSZ))
+ write_count = msize - P9_IOHDRSZ;
+ else
+ write_count = value_len;
+ write_count = p9_client_write(fid, ((char *)value)+offset,
+ NULL, offset, write_count);
+ if (write_count < 0) {
+ /* error in xattr write */
+ retval = write_count;
+ goto error;
+ }
+ offset += write_count;
+ value_len -= write_count;
+ }
+ /* Total read xattr bytes */
+ retval = offset;
+error:
+ if (fid)
+ retval = p9_client_clunk(fid);
+ return retval;
+}
+
+ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+ return v9fs_xattr_get(dentry, NULL, buffer, buffer_size);
+}
+
+const struct xattr_handler *v9fs_xattr_handlers[] = {
+ &v9fs_xattr_user_handler,
+ NULL
+};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
new file mode 100644
index 00000000000..9ddf672ae5c
--- /dev/null
+++ b/fs/9p/xattr.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+#ifndef FS_9P_XATTR_H
+#define FS_9P_XATTR_H
+
+#include <linux/xattr.h>
+
+extern const struct xattr_handler *v9fs_xattr_handlers[];
+extern struct xattr_handler v9fs_xattr_user_handler;
+
+extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
+ void *, size_t);
+extern int v9fs_xattr_set(struct dentry *, const char *,
+ const void *, size_t, int);
+extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t);
+#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c
new file mode 100644
index 00000000000..d0b701b7208
--- /dev/null
+++ b/fs/9p/xattr_user.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include "xattr.h"
+
+static int v9fs_xattr_user_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ int retval;
+ char *full_name;
+ size_t name_len;
+ size_t prefix_len = XATTR_USER_PREFIX_LEN;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ name_len = strlen(name);
+ full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
+ if (!full_name)
+ return -ENOMEM;
+ memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
+ memcpy(full_name+prefix_len, name, name_len);
+ full_name[prefix_len + name_len] = '\0';
+
+ retval = v9fs_xattr_get(dentry, full_name, buffer, size);
+ kfree(full_name);
+ return retval;
+}
+
+static int v9fs_xattr_user_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
+{
+ int retval;
+ char *full_name;
+ size_t name_len;
+ size_t prefix_len = XATTR_USER_PREFIX_LEN;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ name_len = strlen(name);
+ full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
+ if (!full_name)
+ return -ENOMEM;
+ memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
+ memcpy(full_name + prefix_len, name, name_len);
+ full_name[prefix_len + name_len] = '\0';
+
+ retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
+ kfree(full_name);
+ return retval;
+}
+
+struct xattr_handler v9fs_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = v9fs_xattr_user_get,
+ .set = v9fs_xattr_user_set,
+};
diff --git a/fs/Kconfig b/fs/Kconfig
index 5f85b594761..3d185308ec8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -64,7 +64,7 @@ source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
config CUSE
- tristate "Character device in Userpace support"
+ tristate "Character device in Userspace support"
depends on FUSE_FS
help
This FUSE extension allows character devices to be
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 5c4e61d3c77..8f975f25b48 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -2,6 +2,7 @@ config AFS_FS
tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
select AF_RXRPC
+ select DNS_RESOLVER
help
If you say Y here, you will get an experimental Andrew File System
driver. It currently only supports unsecured read-only AFS access.
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index e19c13f059e..ffea35c6387 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/key.h>
#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
#include <linux/sched.h>
#include <keys/rxrpc-type.h>
#include "internal.h"
@@ -36,6 +37,8 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
struct key *key;
size_t namelen;
char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
+ char *dvllist = NULL, *_vllist = NULL;
+ char delimiter = ':';
int ret;
_enter("%s,%s", name, vllist);
@@ -43,8 +46,10 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
namelen = strlen(name);
- if (namelen > AFS_MAXCELLNAME)
+ if (namelen > AFS_MAXCELLNAME) {
+ _leave(" = -ENAMETOOLONG");
return ERR_PTR(-ENAMETOOLONG);
+ }
/* allocate and initialise a cell record */
cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
@@ -64,15 +69,31 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
INIT_LIST_HEAD(&cell->vl_list);
spin_lock_init(&cell->vl_lock);
+ /* if the ip address is invalid, try dns query */
+ if (!vllist || strlen(vllist) < 7) {
+ ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL);
+ if (ret < 0) {
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+ }
+ _vllist = dvllist;
+
+ /* change the delimiter for user-space reply */
+ delimiter = ',';
+
+ } else {
+ _vllist = vllist;
+ }
+
/* fill in the VL server list from the rest of the string */
do {
unsigned a, b, c, d;
- next = strchr(vllist, ':');
+ next = strchr(_vllist, delimiter);
if (next)
*next++ = 0;
- if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
+ if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
goto bad_address;
if (a > 255 || b > 255 || c > 255 || d > 255)
@@ -81,7 +102,7 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
cell->vl_addrs[cell->vl_naddrs++].s_addr =
htonl((a << 24) | (b << 16) | (c << 8) | d);
- } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (vllist = next));
+ } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
/* create a key to represent an anonymous user */
memcpy(keyname, "afs@", 4);
@@ -110,6 +131,7 @@ bad_address:
ret = -EINVAL;
error:
key_put(cell->anonymous_key);
+ kfree(dvllist);
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
@@ -201,14 +223,12 @@ int afs_cell_init(char *rootcell)
}
cp = strchr(rootcell, ':');
- if (!cp) {
- printk(KERN_ERR "kAFS: no VL server IP addresses specified\n");
- _leave(" = -EINVAL");
- return -EINVAL;
- }
+ if (!cp)
+ _debug("kAFS: no VL server IP addresses specified");
+ else
+ *cp++ = 0;
/* allocate a cell record for the root cell */
- *cp++ = 0;
new_root = afs_cell_create(rootcell, cp);
if (IS_ERR(new_root)) {
_leave(" = %ld", PTR_ERR(new_root));
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 66d54d348c5..cfd1cbe25b2 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -111,6 +111,8 @@ static int __init afs_init(void)
/* initialise the callback update process */
ret = afs_callback_update_init();
+ if (ret < 0)
+ goto error_callback_update_init;
/* create the RxRPC transport */
ret = afs_open_socket();
@@ -127,15 +129,16 @@ static int __init afs_init(void)
error_fs:
afs_close_socket();
error_open_socket:
+ afs_callback_update_kill();
+error_callback_update_init:
+ afs_vlocation_purge();
error_vl_update_init:
+ afs_cell_purge();
error_cell_init:
#ifdef CONFIG_AFS_FSCACHE
fscache_unregister_netfs(&afs_cache_netfs);
error_cache:
#endif
- afs_callback_update_kill();
- afs_vlocation_purge();
- afs_cell_purge();
afs_proc_cleanup();
rcu_barrier();
printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f4909951667..9fdc7fe3a7b 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -91,9 +91,10 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
memcpy(&server->addr, addr, sizeof(struct in_addr));
server->addr.s_addr = addr->s_addr;
+ _leave(" = %p{%d}", server, atomic_read(&server->usage));
+ } else {
+ _leave(" = NULL [nomem]");
}
-
- _leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d..722743b152d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
{
struct address_space *mapping = vnode->vfs_inode.i_mapping;
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_cyclic = 1,
diff --git a/fs/aio.c b/fs/aio.c
index 1ccf25cef1f..3006b5bc33d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1277,7 +1277,7 @@ out:
/* sys_io_destroy:
* Destroy the aio_context specified. May cancel any outstanding
* AIOs and block on completion. Will fail with -ENOSYS if not
- * implemented. May fail with -EFAULT if the context pointed to
+ * implemented. May fail with -EINVAL if the context pointed to
* is invalid.
*/
SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
@@ -1795,15 +1795,16 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
/* io_getevents:
* Attempts to read at least min_nr events and up to nr events from
- * the completion queue for the aio_context specified by ctx_id. May
- * fail with -EINVAL if ctx_id is invalid, if min_nr is out of range,
- * if nr is out of range, if when is out of range. May fail with
- * -EFAULT if any of the memory specified to is invalid. May return
- * 0 or < min_nr if no events are available and the timeout specified
- * by when has elapsed, where when == NULL specifies an infinite
- * timeout. Note that the timeout pointed to by when is relative and
- * will be updated if not NULL and the operation blocks. Will fail
- * with -ENOSYS if not implemented.
+ * the completion queue for the aio_context specified by ctx_id. If
+ * it succeeds, the number of read events is returned. May fail with
+ * -EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
+ * out of range, if timeout is out of range. May fail with -EFAULT
+ * if any of the memory specified is invalid. May return 0 or
+ * < min_nr if the timeout specified by timeout has elapsed
+ * before sufficient events are available, where timeout == NULL
+ * specifies an infinite timeout. Note that the timeout pointed to by
+ * timeout is relative and will be updated if not NULL and the
+ * operation blocks. Will fail with -ENOSYS if not implemented.
*/
SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
long, min_nr,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 34ddda888e6..dc39d282488 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -436,7 +436,7 @@ befs_init_inodecache(void)
init_once);
if (befs_inode_cachep == NULL) {
printk(KERN_ERR "befs_init_inodecache: "
- "Couldn't initalize inode slabcache\n");
+ "Couldn't initialize inode slabcache\n");
return -ENOMEM;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c5f9a0e5d7..63039ed9576 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -990,10 +990,9 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
/* clear any space allocated but not loaded */
if (phdr->p_filesz < phdr->p_memsz) {
- ret = clear_user((void *) (seg->addr + phdr->p_filesz),
- phdr->p_memsz - phdr->p_filesz);
- if (ret)
- return ret;
+ if (clear_user((void *) (seg->addr + phdr->p_filesz),
+ phdr->p_memsz - phdr->p_filesz))
+ return -EFAULT;
}
if (mm) {
@@ -1027,7 +1026,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
struct elf32_fdpic_loadseg *seg;
struct elf32_phdr *phdr;
unsigned long load_addr, delta_vaddr;
- int loop, dvset, ret;
+ int loop, dvset;
load_addr = params->load_addr;
delta_vaddr = 0;
@@ -1127,9 +1126,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
* PT_LOAD */
if (prot & PROT_WRITE && disp > 0) {
kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
- ret = clear_user((void __user *) maddr, disp);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr, disp))
+ return -EFAULT;
maddr += disp;
}
@@ -1164,19 +1162,17 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
if (prot & PROT_WRITE && excess1 > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess1);
- ret = clear_user((void __user *) maddr + phdr->p_filesz,
- excess1);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr + phdr->p_filesz,
+ excess1))
+ return -EFAULT;
}
#else
if (excess > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess);
- ret = clear_user((void *) maddr + phdr->p_filesz, excess);
- if (ret)
- return ret;
+ if (clear_user((void *) maddr + phdr->p_filesz, excess))
+ return -EFAULT;
}
#endif
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 49566c1687d..811384bec8d 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -56,16 +56,19 @@
#endif
/*
- * User data (stack, data section and bss) needs to be aligned
- * for the same reasons as SLAB memory is, and to the same amount.
- * Avoid duplicating architecture specific code by using the same
- * macro as with SLAB allocation:
+ * User data (data section and bss) needs to be aligned.
+ * We pick 0x20 here because it is the max value elf2flt has always
+ * used in producing FLAT files, and because it seems to be large
+ * enough to make all the gcc alignment related tests happy.
*/
-#ifdef ARCH_SLAB_MINALIGN
-#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
-#else
-#define FLAT_DATA_ALIGN (sizeof(void *))
-#endif
+#define FLAT_DATA_ALIGN (0x20)
+
+/*
+ * User data (stack) also needs to be aligned.
+ * Here we can be a bit looser than the data sections since this
+ * needs to only meet arch ABI requirements.
+ */
+#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
@@ -129,7 +132,7 @@ static unsigned long create_flat_tables(
sp = (unsigned long *)p;
sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
- sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
+ sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN);
argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
envp = argv + (argc + 1);
@@ -589,7 +592,7 @@ static int load_flat_file(struct linux_binprm * bprm,
if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
do_munmap(current->mm, textpos, text_len);
- do_munmap(current->mm, realdatastart, data_len + extra);
+ do_munmap(current->mm, realdatastart, len);
ret = result;
goto err;
}
@@ -876,7 +879,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
- stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
+ stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */
res = load_flat_file(bprm, &libinfo, 0, &stack_len);
if (IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7346c96308a..b3171fb0dc9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -681,8 +681,8 @@ retry:
if (!bd_may_claim(bdev, whole, holder))
return -EBUSY;
- /* if someone else is claiming, wait for it to finish */
- if (whole->bd_claiming && whole->bd_claiming != holder) {
+ /* if claiming is already in progress, wait for it to finish */
+ if (whole->bd_claiming) {
wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
DEFINE_WAIT(wait);
@@ -706,8 +706,13 @@ retry:
* @bdev is about to be opened exclusively. Check @bdev can be opened
* exclusively and mark that an exclusive open is in progress. Each
* successful call to this function must be matched with a call to
- * either bd_claim() or bd_abort_claiming(). If this function
- * succeeds, the matching bd_claim() is guaranteed to succeed.
+ * either bd_finish_claiming() or bd_abort_claiming() (which do not
+ * fail).
+ *
+ * This function is used to gain exclusive access to the block device
+ * without actually causing other exclusive open attempts to fail. It
+ * should be used when the open sequence itself requires exclusive
+ * access but may subsequently fail.
*
* CONTEXT:
* Might sleep.
@@ -734,6 +739,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
return ERR_PTR(-ENXIO);
whole = bdget_disk(disk, 0);
+ module_put(disk->fops->owner);
put_disk(disk);
if (!whole)
return ERR_PTR(-ENOMEM);
@@ -782,15 +788,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder)
__bd_abort_claiming(whole, holder); /* releases bdev_lock */
}
+/* increment holders when we have a legitimate claim. requires bdev_lock */
+static void __bd_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder)
+{
+ /* note that for a whole device bd_holders
+ * will be incremented twice, and bd_holder will
+ * be set to bd_claim before being set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+}
+
+/**
+ * bd_finish_claiming - finish claiming a block device
+ * @bdev: block device of interest (passed to bd_start_claiming())
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Finish a claiming block started by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_finish_claiming(struct block_device *bdev,
+ struct block_device *whole, void *holder)
+{
+ spin_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ __bd_claim(bdev, whole, holder);
+ __bd_abort_claiming(whole, holder); /* not actually an abort */
+}
+
/**
* bd_claim - claim a block device
* @bdev: block device to claim
* @holder: holder trying to claim @bdev
*
- * Try to claim @bdev which must have been opened successfully. This
- * function may be called with or without preceding
- * blk_start_claiming(). In the former case, this function is always
- * successful and terminates the claiming block.
+ * Try to claim @bdev which must have been opened successfully.
*
* CONTEXT:
* Might sleep.
@@ -806,23 +843,10 @@ int bd_claim(struct block_device *bdev, void *holder)
might_sleep();
spin_lock(&bdev_lock);
-
res = bd_prepare_to_claim(bdev, whole, holder);
- if (res == 0) {
- /* note that for a whole device bd_holders
- * will be incremented twice, and bd_holder will
- * be set to bd_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
- }
-
- if (whole->bd_claiming)
- __bd_abort_claiming(whole, holder); /* releases bdev_lock */
- else
- spin_unlock(&bdev_lock);
+ if (res == 0)
+ __bd_claim(bdev, whole, holder);
+ spin_unlock(&bdev_lock);
return res;
}
@@ -1476,7 +1500,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if (whole) {
if (res == 0)
- BUG_ON(bd_claim(bdev, filp) != 0);
+ bd_finish_claiming(bdev, whole, filp);
else
bd_abort_claiming(whole, filp);
}
@@ -1712,7 +1736,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h
if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
goto out_blkdev_put;
- BUG_ON(bd_claim(bdev, holder) != 0);
+ bd_finish_claiming(bdev, whole, holder);
return bdev;
out_blkdev_put:
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 8d432cd9d58..2222d161c7b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,6 +60,8 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, value, size);
if (size > 0) {
acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return acl;
set_cached_acl(inode, type, acl);
}
kfree(value);
@@ -160,6 +162,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
int ret;
struct posix_acl *acl = NULL;
+ if (!is_owner_or_cap(dentry->d_inode))
+ return -EPERM;
+
+ if (!IS_POSIXACL(dentry->d_inode))
+ return -EOPNOTSUPP;
+
if (value) {
acl = posix_acl_from_xattr(value, size);
if (acl == NULL) {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe..c3df14ce2cc 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
return ret;
}
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right. We'll push up to and including min_slot, but no lower
+ */
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
- int free_space, u32 left_nritems)
+ int free_space, u32 left_nritems,
+ u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (empty)
nr = 0;
else
- nr = 1;
+ nr = max_t(u32, 1, min_slot);
if (path->slots[0] >= left_nritems)
push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
*
* returns 1 if the push failed because the other node didn't have enough
* room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf. It won't
+ * push any slot lower than min_slot
*/
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path,
+ int min_data_size, int data_size,
+ int empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- return __push_leaf_right(trans, root, path, data_size, empty,
- right, free_space, left_nritems);
+ return __push_leaf_right(trans, root, path, min_data_size, empty,
+ right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
+ * items
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
- int free_space, int right_nritems)
+ int free_space, u32 right_nritems,
+ u32 max_slot)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
slot = path->slots[1];
if (empty)
- nr = right_nritems;
+ nr = min(right_nritems, max_slot);
else
- nr = right_nritems - 1;
+ nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
+ * items
*/
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path, int min_data_size,
+ int data_size, int empty, u32 max_slot)
{
struct extent_buffer *right = path->nodes[0];
struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, root, path, data_size,
- empty, left, free_space, right_nritems);
+ return __push_leaf_left(trans, root, path, min_data_size,
+ empty, left, free_space, right_nritems,
+ max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf. A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ * A B C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves. If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ int data_size)
+{
+ int ret;
+ int progress = 0;
+ int slot;
+ u32 nritems;
+
+ slot = path->slots[0];
+
+ /*
+ * try to push all the items after our slot into the
+ * right leaf
+ */
+ ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+ * our goal is to get our slot at the start or end of a leaf. If
+ * we've done so we're done
+ */
+ if (path->slots[0] == 0 || path->slots[0] == nritems)
+ return 0;
+
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+
+ /* try to push all the items before our slot into the next leaf */
+ slot = path->slots[0];
+ ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ if (progress)
+ return 0;
+ return 1;
+}
+
+/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int wret;
int split;
int num_doubles = 0;
+ int tried_avoid_double = 0;
l = path->nodes[0];
slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
- if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
- wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (data_size) {
+ wret = push_leaf_right(trans, root, path, data_size,
+ data_size, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size, 0);
+ wret = push_leaf_left(trans, root, path, data_size,
+ data_size, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -2923,6 +3003,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2;
}
}
@@ -2939,6 +3021,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2 ;
}
}
@@ -3019,6 +3103,13 @@ again:
}
return ret;
+
+push_for_double:
+ push_for_double_split(trans, root, path, data_size);
+ tried_avoid_double = 1;
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+ goto again;
}
static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
extent_buffer_get(leaf);
btrfs_set_path_blocking(path);
- wret = push_leaf_left(trans, root, path, 1, 1);
+ wret = push_leaf_left(trans, root, path, 1, 1,
+ 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1, 1);
+ wret = push_leaf_right(trans, root, path, 1,
+ 1, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f3b287c22ca..34f7c375567 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1941,8 +1941,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_level_size(tree_root,
btrfs_super_log_root_level(disk_super));
- log_tree_root = kzalloc(sizeof(struct btrfs_root),
- GFP_NOFS);
+ log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+ if (!log_tree_root) {
+ err = -ENOMEM;
+ goto fail_trans_kthread;
+ }
__setup_root(nodesize, leafsize, sectorsize, stripesize,
log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
@@ -1982,6 +1985,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
goto fail_trans_kthread;
+ if (IS_ERR(fs_info->fs_root)) {
+ err = PTR_ERR(fs_info->fs_root);
+ goto fail_trans_kthread;
+ }
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b9080d71991..32d094002a5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4360,7 +4360,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
- BUG_ON(block_rsv->space_info != cache->space_info);
+ if (block_rsv->space_info != cache->space_info)
+ goto out;
if (btrfs_header_generation(buf) == trans->transid) {
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec5..d74e6af9b53 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = wbc->bdi,
.sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = inode->i_mapping->backing_dev_info,
.sync_mode = mode,
.older_than_this = NULL,
.nr_to_write = nr_pages * 2,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 787b50a16a1..e354c33df08 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1140,7 +1140,7 @@ int btrfs_sync_file(struct file *file, int datasync)
/*
* ok we haven't committed the transaction yet, lets do a commit
*/
- if (file && file->private_data)
+ if (file->private_data)
btrfs_ioctl_trans_end(file);
trans = btrfs_start_transaction(root, 0);
@@ -1190,14 +1190,22 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
- vma->vm_ops = &btrfs_file_vm_ops;
+ struct address_space *mapping = filp->f_mapping;
+
+ if (!mapping->a_ops->readpage)
+ return -ENOEXEC;
+
file_accessed(filp);
+ vma->vm_ops = &btrfs_file_vm_ops;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
+
return 0;
}
const struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.splice_read = generic_file_splice_read,
.aio_write = btrfs_file_aio_write,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fa6ccc1bfe2..1bff92ad474 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2673,7 +2673,7 @@ static int check_path_shared(struct btrfs_root *root,
struct extent_buffer *eb;
int level;
int ret;
- u64 refs;
+ u64 refs = 1;
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
@@ -6884,7 +6884,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- ret = btrfs_prealloc_file_range(inode, 0, cur_offset,
+ ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
last_byte - cur_offset,
1 << inode->i_blkbits,
offset + len,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4cdb98cf26d..9254b3d58db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1280,7 +1280,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto out;
+ goto out_up_write;
}
trans->block_rsv = &root->fs_info->global_block_rsv;
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
*/
/* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE))
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
/* determine range to clone */
ret = -EINVAL;
- if (off >= src->i_size || off + len > src->i_size)
+ if (off + len > src->i_size || off + len < off)
goto out_unlock;
if (len == 0)
olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 disko = 0, diskl = 0;
u64 datao = 0, datal = 0;
u8 comp;
+ u64 endoff;
size = btrfs_item_size_nr(leaf, slot);
read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_release_path(root, path);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- if (new_key.offset + datal > inode->i_size)
- btrfs_i_size_write(inode,
- new_key.offset + datal);
+
+ /*
+ * we round up to the block size at eof when
+ * determining which extents to clone above,
+ * but shouldn't round up the file size
+ */
+ endoff = new_key.offset + datal;
+ if (endoff > off+olen)
+ endoff = off+olen;
+ if (endoff > inode->i_size)
+ btrfs_i_size_write(inode, endoff);
+
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
@@ -1845,7 +1855,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
dir_id, "default", 7, 1);
- if (!di) {
+ if (IS_ERR_OR_NULL(di)) {
btrfs_free_path(path);
btrfs_end_transaction(trans, root);
printk(KERN_ERR "Umm, you don't have the default dir item, "
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 05d41e56923..b37d723b9d4 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -784,16 +784,17 @@ again:
struct btrfs_extent_ref_v0 *ref0;
ref0 = btrfs_item_ptr(eb, path1->slots[0],
struct btrfs_extent_ref_v0);
- root = find_tree_root(rc, eb, ref0);
- if (!root->ref_cows)
- cur->cowonly = 1;
if (key.objectid == key.offset) {
+ root = find_tree_root(rc, eb, ref0);
if (root && !should_ignore_root(root))
cur->root = root;
else
list_add(&cur->list, &useless);
break;
}
+ if (is_cowonly_root(btrfs_ref_root_v0(eb,
+ ref0)))
+ cur->cowonly = 1;
}
#else
BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b91ccd97264..2d958be761c 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -330,7 +330,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_path *path;
int ret;
- u32 refs;
struct btrfs_root_item *ri;
struct extent_buffer *leaf;
@@ -344,8 +343,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
leaf = path->nodes[0];
ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
- refs = btrfs_disk_root_refs(leaf, ri);
- BUG_ON(refs != 0);
ret = btrfs_del_item(trans, root, path);
out:
btrfs_free_path(path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d34b2dfc962..f2393b39031 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -360,6 +360,8 @@ static struct dentry *get_default_root(struct super_block *sb,
*/
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
+ if (IS_ERR(di))
+ return ERR_CAST(di);
if (!di) {
/*
* Ok the default dir item isn't there. This is weird since
@@ -390,8 +392,8 @@ setup_root:
location.offset = 0;
inode = btrfs_iget(sb, &location, new_root, &new);
- if (!inode)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
/*
* If we're just mounting the root most subvol put the inode and return
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f4a7840bf42..42c7fafc8bf 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -37,9 +37,9 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
printk(KERN_ERR "%sobject: OBJ%x\n",
prefix, object->fscache.debug_id);
- printk(KERN_ERR "%sobjstate=%s fl=%lx swfl=%lx ev=%lx[%lx]\n",
+ printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
prefix, fscache_object_states[object->fscache.state],
- object->fscache.flags, object->fscache.work.flags,
+ object->fscache.flags, work_busy(&object->fscache.work),
object->fscache.events,
object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK);
printk(KERN_ERR "%sops=%u inp=%u exc=%u\n",
@@ -212,7 +212,7 @@ wait_for_old_object:
/* if the object we're waiting for is queued for processing,
* then just put ourselves on the queue behind it */
- if (slow_work_is_queued(&xobject->fscache.work)) {
+ if (work_pending(&xobject->fscache.work)) {
_debug("queue OBJ%x behind OBJ%x immediately",
object->fscache.debug_id,
xobject->fscache.debug_id);
@@ -220,8 +220,7 @@ wait_for_old_object:
}
/* otherwise we sleep until either the object we're waiting for
- * is done, or the slow-work facility wants the thread back to
- * do other work */
+ * is done, or the fscache_object is congested */
wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE);
init_wait(&wait);
requeue = false;
@@ -229,8 +228,8 @@ wait_for_old_object:
prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags))
break;
- requeue = slow_work_sleep_till_thread_needed(
- &object->fscache.work, &timeout);
+
+ requeue = fscache_object_sleep_till_congested(&timeout);
} while (timeout > 0 && !requeue);
finish_wait(wq, &wait);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 0f0d41fbb03..0e3c0924cc3 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -422,7 +422,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
- op->op.flags |= FSCACHE_OP_FAST;
+ op->op.flags |= FSCACHE_OP_ASYNC;
op->op.processor = cachefiles_read_copier;
pagevec_init(&pagevec, 0);
@@ -729,7 +729,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
pagevec_init(&pagevec, 0);
op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
- op->op.flags |= FSCACHE_OP_FAST;
+ op->op.flags |= FSCACHE_OP_ASYNC;
op->op.processor = cachefiles_read_copier;
INIT_LIST_HEAD(&backpages);
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 04b8280582a..bc87b9c1d27 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -2,7 +2,7 @@ config CEPH_FS
tristate "Ceph distributed file system (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
select LIBCRC32C
- select CONFIG_CRYPTO_AES
+ select CRYPTO_AES
help
Choose Y or M here to include support for mounting the
experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 83d4d2785ff..6d44053ecff 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
- op = le32_to_cpu(head->op);
+ op = le16_to_cpu(head->op);
result = le32_to_cpu(head->result);
dout("handle_reply op %d result %d\n", op, result);
switch (op) {
@@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
+ if (xi->auth_authorizer.buf)
+ ceph_buffer_put(xi->auth_authorizer.buf);
+
kfree(ac->private);
ac->private = NULL;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ae3e3a30644..b81be9a5648 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
struct ceph_cap *cap = NULL;
/* temporary, until we do something about cap import/export */
- if (!ctx)
- return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (!ctx) {
+ cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (cap) {
+ caps_use_count++;
+ caps_total_count++;
+ }
+ return cap;
+ }
spin_lock(&caps_list_lock);
dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
@@ -621,7 +627,7 @@ retry:
if (fmode >= 0)
__ceph_get_fmode(ci, fmode);
spin_unlock(&inode->i_lock);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return 0;
}
@@ -981,6 +987,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
return 0;
}
+static void __queue_cap_release(struct ceph_mds_session *session,
+ u64 ino, u64 cap_id, u32 migrate_seq,
+ u32 issue_seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_cap_release *head;
+ struct ceph_mds_cap_item *item;
+
+ spin_lock(&session->s_cap_lock);
+ BUG_ON(!session->s_num_cap_releases);
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+
+ dout(" adding %llx release to mds%d msg %p (%d left)\n",
+ ino, session->s_mds, msg, session->s_num_cap_releases);
+
+ BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+ head = msg->front.iov_base;
+ head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
+ item = msg->front.iov_base + msg->front.iov_len;
+ item->ino = cpu_to_le64(ino);
+ item->cap_id = cpu_to_le64(cap_id);
+ item->migrate_seq = cpu_to_le32(migrate_seq);
+ item->seq = cpu_to_le32(issue_seq);
+
+ session->s_num_cap_releases--;
+
+ msg->front.iov_len += sizeof(*item);
+ if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+ dout(" release msg %p full\n", msg);
+ list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+ } else {
+ dout(" release msg %p at %d/%d (%d)\n", msg,
+ (int)le32_to_cpu(head->num),
+ (int)CEPH_CAPS_PER_RELEASE,
+ (int)msg->front.iov_len);
+ }
+ spin_unlock(&session->s_cap_lock);
+}
+
/*
* Queue cap releases when an inode is dropped from our cache. Since
* inode is about to be destroyed, there is no need for i_lock.
@@ -994,41 +1040,9 @@ void ceph_queue_caps_release(struct inode *inode)
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
struct ceph_mds_session *session = cap->session;
- struct ceph_msg *msg;
- struct ceph_mds_cap_release *head;
- struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
- BUG_ON(!session->s_num_cap_releases);
- msg = list_first_entry(&session->s_cap_releases,
- struct ceph_msg, list_head);
-
- dout(" adding %p release to mds%d msg %p (%d left)\n",
- inode, session->s_mds, msg, session->s_num_cap_releases);
-
- BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
- head = msg->front.iov_base;
- head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
- item = msg->front.iov_base + msg->front.iov_len;
- item->ino = cpu_to_le64(ceph_ino(inode));
- item->cap_id = cpu_to_le64(cap->cap_id);
- item->migrate_seq = cpu_to_le32(cap->mseq);
- item->seq = cpu_to_le32(cap->issue_seq);
-
- session->s_num_cap_releases--;
-
- msg->front.iov_len += sizeof(*item);
- if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
- dout(" release msg %p full\n", msg);
- list_move_tail(&msg->list_head,
- &session->s_cap_releases_done);
- } else {
- dout(" release msg %p at %d/%d (%d)\n", msg,
- (int)le32_to_cpu(head->num),
- (int)CEPH_CAPS_PER_RELEASE,
- (int)msg->front.iov_len);
- }
- spin_unlock(&session->s_cap_lock);
+ __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+ cap->mseq, cap->issue_seq);
p = rb_next(p);
__ceph_remove_cap(cap);
}
@@ -1167,7 +1181,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
}
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return delayed;
}
@@ -2139,7 +2153,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
else if (flushsnaps)
ceph_flush_snaps(ci);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (put)
iput(inode);
}
@@ -2215,7 +2229,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
iput(inode);
} else if (complete_capsnap) {
ceph_flush_snaps(ci);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
}
if (drop_capsnap)
iput(inode);
@@ -2391,7 +2405,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
if (queue_invalidate)
ceph_queue_invalidate(inode);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (check_caps == 1)
ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
@@ -2446,7 +2460,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
struct ceph_inode_info,
i_flushing_item)->vfs_inode);
mdsc->num_cap_flushing--;
- wake_up(&mdsc->cap_flushing_wq);
+ wake_up_all(&mdsc->cap_flushing_wq);
dout(" inode %p now !flushing\n", inode);
if (ci->i_dirty_caps == 0) {
@@ -2458,7 +2472,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
}
}
spin_unlock(&mdsc->cap_dirty_lock);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
out:
spin_unlock(&inode->i_lock);
@@ -2655,7 +2669,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_mds_caps *h;
int mds = session->s_mds;
int op;
- u32 seq;
+ u32 seq, mseq;
struct ceph_vino vino;
u64 cap_id;
u64 size, max_size;
@@ -2675,6 +2689,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap = CEPH_NOSNAP;
cap_id = le64_to_cpu(h->cap_id);
seq = le32_to_cpu(h->seq);
+ mseq = le32_to_cpu(h->migrate_seq);
size = le64_to_cpu(h->size);
max_size = le64_to_cpu(h->max_size);
@@ -2689,6 +2704,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap, inode);
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
+
+ if (op == CEPH_CAP_OP_IMPORT)
+ __queue_cap_release(session, vino.ino, cap_id,
+ mseq, seq);
+
+ /*
+ * send any full release message to try to move things
+ * along for the mds (who clearly thinks we still have this
+ * cap).
+ */
+ ceph_add_cap_releases(mdsc, session, -1);
+ ceph_send_cap_releases(mdsc, session);
goto done;
}
@@ -2714,7 +2741,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
spin_lock(&inode->i_lock);
cap = __get_cap_for_mds(ceph_inode(inode), mds);
if (!cap) {
- dout("no cap on %p ino %llx.%llx from mds%d, releasing\n",
+ dout(" no cap on %p ino %llx.%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode), mds);
spin_unlock(&inode->i_lock);
goto done;
@@ -2865,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
+ int used, dirty;
int ret = 0;
- int used = 0;
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
- dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
- mds, ceph_cap_string(used), ceph_cap_string(drop),
+ dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+ inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
ceph_cap_string(unless));
- /* only drop unused caps */
- drop &= ~used;
+ /* only drop unused, clean caps */
+ drop &= ~(used | dirty);
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
@@ -2956,6 +2984,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
memcpy(*p, dentry->d_name.name, dentry->d_name.len);
*p += dentry->d_name.len;
rel->dname_seq = cpu_to_le32(di->lease_seq);
+ __ceph_mdsc_drop_dentry_lease(dentry);
}
spin_unlock(&dentry->d_lock);
return ret;
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
index 9ba54efb654..a4eec133258 100644
--- a/fs/ceph/crush/mapper.c
+++ b/fs/ceph/crush/mapper.c
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
- dprintk("choose %d x=%d r=%d\n", in->id, x, r);
+ dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
*/
static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
{
- if (weight[item] >= 0x1000)
+ if (weight[item] >= 0x10000)
return 0;
if (weight[item] == 0)
return 1;
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map,
int itemtype;
int collide, reject;
const int orig_tries = 5; /* attempts before we fall back to search */
- dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos);
+
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep);
for (rep = outpos; rep < numrep; rep++) {
/* keep trying until we get a non-out, non-colliding item */
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map,
BUG_ON(item >= 0 ||
(-1-item) >= map->max_buckets);
in = map->buckets[-1-item];
+ retry_bucket = 1;
continue;
}
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map,
}
}
- if (recurse_to_leaf &&
- item < 0 &&
- crush_choose(map, map->buckets[-1-item],
- weight,
- x, outpos+1, 0,
- out2, outpos,
- firstn, 0, NULL) <= outpos) {
- reject = 1;
- } else {
+ reject = 0;
+ if (recurse_to_leaf) {
+ if (item < 0) {
+ if (crush_choose(map,
+ map->buckets[-1-item],
+ weight,
+ x, outpos+1, 0,
+ out2, outpos,
+ firstn, 0,
+ NULL) <= outpos)
+ /* didn't get leaf */
+ reject = 1;
+ } else {
+ /* we already have a leaf! */
+ out2[outpos] = item;
+ }
+ }
+
+ if (!reject) {
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
@@ -424,12 +437,12 @@ reject:
continue;
}
- dprintk("choose got %d\n", item);
+ dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
}
- dprintk("choose returns %d\n", outpos);
+ dprintk("CHOOSE returns %d\n", outpos);
return outpos;
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3be33fb066c..f2f5332ddbb 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp)
static int caps_show(struct seq_file *s, void *p)
{
- struct ceph_client *client = p;
+ struct ceph_client *client = s->private;
int total, avail, used, reserved, min;
ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f85719310db..f94ed3c7f6a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -266,6 +266,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
spin_lock(&inode->i_lock);
if ((filp->f_pos == 2 || fi->dentry) &&
!ceph_test_opt(client, NOASYNCREADDIR) &&
+ ceph_snap(inode) != CEPH_SNAPDIR &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
err = __dcache_readdir(filp, dirent, filldir);
@@ -1013,18 +1014,22 @@ out_touch:
/*
* When a dentry is released, clear the dir I_COMPLETE if it was part
- * of the current dir gen.
+ * of the current dir gen or if this is in the snapshot namespace.
*/
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
struct inode *parent_inode = dentry->d_parent->d_inode;
+ u64 snapid = ceph_snap(parent_inode);
- if (parent_inode) {
+ dout("dentry_release %p parent %p\n", dentry, parent_inode);
+
+ if (parent_inode && snapid != CEPH_SNAPDIR) {
struct ceph_inode_info *ci = ceph_inode(parent_inode);
spin_lock(&parent_inode->i_lock);
- if (ci->i_shared_gen == di->lease_shared_gen) {
+ if (ci->i_shared_gen == di->lease_shared_gen ||
+ snapid <= CEPH_MAXSNAP) {
dout(" clearing %p complete (d_release)\n",
parent_inode);
ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
@@ -1241,7 +1246,9 @@ struct dentry_operations ceph_dentry_ops = {
struct dentry_operations ceph_snapdir_dentry_ops = {
.d_revalidate = ceph_snapdir_d_revalidate,
+ .d_release = ceph_dentry_release,
};
struct dentry_operations ceph_snap_dentry_ops = {
+ .d_release = ceph_dentry_release,
};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6251a1574b9..7c08698fad3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -265,7 +265,7 @@ int ceph_release(struct inode *inode, struct file *file)
kmem_cache_free(ceph_file_cachep, cf);
/* wake up anyone waiting for caps on this inode */
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return 0;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 226f5a50d36..389f9dbd994 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
spin_lock(&dcache_lock);
spin_lock(&dn->d_lock);
- list_move_tail(&dir->d_subdirs, &dn->d_u.d_child);
+ list_move(&dn->d_u.d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
dn->d_u.d_child.prev, dn->d_u.d_child.next);
spin_unlock(&dn->d_lock);
@@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
d_drop(dn);
realdn = d_materialise_unique(dn, in);
if (IS_ERR(realdn)) {
- pr_err("splice_dentry error %p inode %p ino %llx.%llx\n",
- dn, in, ceph_vinop(in));
+ pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+ PTR_ERR(realdn), dn, in, ceph_vinop(in));
if (prehash)
*prehash = false; /* don't rehash on error */
dn = realdn; /* note realdn contains the error */
@@ -1199,8 +1199,10 @@ retry_lookup:
goto out;
}
err = ceph_init_dentry(dn);
- if (err < 0)
+ if (err < 0) {
+ dput(dn);
goto out;
+ }
} else if (dn->d_inode &&
(ceph_ino(dn->d_inode) != vino.ino ||
ceph_snap(dn->d_inode) != vino.snap)) {
@@ -1234,18 +1236,23 @@ retry_lookup:
goto out;
}
dn = splice_dentry(dn, in, NULL);
+ if (IS_ERR(dn))
+ dn = NULL;
}
if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
- dput(dn);
- continue;
+ goto next_item;
}
- update_dentry_lease(dn, rinfo->dir_dlease[i],
- req->r_session, req->r_request_started);
- dput(dn);
+ if (dn)
+ update_dentry_lease(dn, rinfo->dir_dlease[i],
+ req->r_session,
+ req->r_request_started);
+next_item:
+ if (dn)
+ dput(dn);
}
req->r_did_prepopulate = true;
@@ -1494,7 +1501,7 @@ retry:
if (wrbuffer_refs == 0)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b49f12822cb..dd440bd438a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -868,7 +868,7 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
{
struct ceph_inode_info *ci = ceph_inode(inode);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (arg) {
spin_lock(&inode->i_lock);
ci->i_wanted_max_size = 0;
@@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
*
* Called under s_mutex.
*/
-static int add_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session,
- int extra)
+int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int extra)
{
struct ceph_msg *msg;
struct ceph_mds_cap_release *head;
@@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
/*
* called under s_mutex
*/
-static void send_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
+void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
{
struct ceph_msg *msg;
@@ -1514,6 +1514,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_filepath(&p, end, ino1, path1);
ceph_encode_filepath(&p, end, ino2, path2);
+ /* make note of release offset, in case we need to replay */
+ req->r_request_release_offset = p - msg->front.iov_base;
+
/* cap releases */
releases = 0;
if (req->r_inode_drop)
@@ -1561,7 +1564,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
if (req->r_callback)
req->r_callback(mdsc, req);
else
- complete(&req->r_completion);
+ complete_all(&req->r_completion);
}
/*
@@ -1580,6 +1583,32 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
+ if (req->r_got_unsafe) {
+ /*
+ * Replay. Do not regenerate message (and rebuild
+ * paths, etc.); just use the original message.
+ * Rebuilding paths will break for renames because
+ * d_move mangles the src name.
+ */
+ msg = req->r_request;
+ rhead = msg->front.iov_base;
+
+ flags = le32_to_cpu(rhead->flags);
+ flags |= CEPH_MDS_FLAG_REPLAY;
+ rhead->flags = cpu_to_le32(flags);
+
+ if (req->r_target_inode)
+ rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+ rhead->num_retry = req->r_attempts - 1;
+
+ /* remove cap/dentry releases from message */
+ rhead->num_releases = 0;
+ msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
+ msg->front.iov_len = req->r_request_release_offset;
+ return 0;
+ }
+
if (req->r_request) {
ceph_msg_put(req->r_request);
req->r_request = NULL;
@@ -1601,13 +1630,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
rhead->flags = cpu_to_le32(flags);
rhead->num_fwd = req->r_num_fwd;
rhead->num_retry = req->r_attempts - 1;
+ rhead->ino = 0;
dout(" r_locked_dir = %p\n", req->r_locked_dir);
-
- if (req->r_target_inode && req->r_got_unsafe)
- rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
- else
- rhead->ino = 0;
return 0;
}
@@ -1907,7 +1932,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete(&req->r_safe_completion);
+ complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -1922,7 +1947,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
- complete(&mdsc->safe_umount_waiters);
+ complete_all(&mdsc->safe_umount_waiters);
mutex_unlock(&mdsc->mutex);
goto out;
}
@@ -1980,7 +2005,7 @@ out_err:
}
mutex_unlock(&mdsc->mutex);
- add_cap_releases(mdsc, req->r_session, -1);
+ ceph_add_cap_releases(mdsc, req->r_session, -1);
mutex_unlock(&session->s_mutex);
/* kick calling process */
@@ -2101,7 +2126,7 @@ static void handle_session(struct ceph_mds_session *session,
pr_info("mds%d reconnect denied\n", session->s_mds);
remove_session_caps(session);
wake = 1; /* for good measure */
- complete(&mdsc->session_close_waiters);
+ complete_all(&mdsc->session_close_waiters);
kick_requests(mdsc, mds);
break;
@@ -2433,6 +2458,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_dentry_info *di;
int mds = session->s_mds;
struct ceph_mds_lease *h = msg->front.iov_base;
+ u32 seq;
struct ceph_vino vino;
int mask;
struct qstr dname;
@@ -2446,6 +2472,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
mask = le16_to_cpu(h->mask);
+ seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
if (dname.len != get_unaligned_le32(h+1))
@@ -2456,8 +2483,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
/* lookup inode */
inode = ceph_find_inode(sb, vino);
- dout("handle_lease '%s', mask %d, ino %llx %p\n",
- ceph_lease_op_name(h->action), mask, vino.ino, inode);
+ dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
+ ceph_lease_op_name(h->action), mask, vino.ino, inode,
+ dname.len, dname.name);
if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
@@ -2482,7 +2510,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
switch (h->action) {
case CEPH_MDS_LEASE_REVOKE:
if (di && di->lease_session == session) {
- h->seq = cpu_to_le32(di->lease_seq);
+ if (ceph_seq_cmp(di->lease_seq, seq) > 0)
+ h->seq = cpu_to_le32(di->lease_seq);
__ceph_mdsc_drop_dentry_lease(dentry);
}
release = 1;
@@ -2496,7 +2525,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
unsigned long duration =
le32_to_cpu(h->duration_ms) * HZ / 1000;
- di->lease_seq = le32_to_cpu(h->seq);
+ di->lease_seq = seq;
dentry->d_time = di->lease_renew_from + duration;
di->lease_renew_after = di->lease_renew_from +
(duration >> 1);
@@ -2686,10 +2715,10 @@ static void delayed_work(struct work_struct *work)
send_renew_caps(mdsc, s);
else
ceph_con_keepalive(&s->s_con);
- add_cap_releases(mdsc, s, -1);
+ ceph_add_cap_releases(mdsc, s, -1);
if (s->s_state == CEPH_MDS_SESSION_OPEN ||
s->s_state == CEPH_MDS_SESSION_HUNG)
- send_cap_releases(mdsc, s);
+ ceph_send_cap_releases(mdsc, s);
mutex_unlock(&s->s_mutex);
ceph_put_mds_session(s);
@@ -2779,6 +2808,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
drop_leases(mdsc);
ceph_flush_dirty_caps(mdsc);
wait_requests(mdsc);
+
+ /*
+ * wait for reply handlers to drop their request refs and
+ * their inode/dcache refs
+ */
+ ceph_msgr_flush();
}
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d9936c4f121..952410c60d0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -188,6 +188,7 @@ struct ceph_mds_request {
int r_old_inode_drop, r_old_inode_unless;
struct ceph_msg *r_request; /* original request */
+ int r_request_release_offset;
struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info;
int r_err;
@@ -322,6 +323,12 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
kref_put(&req->r_kref, ceph_mdsc_release_request);
}
+extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int extra);
+extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 64b8b1f7863..15167b2daa5 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con);
* nicely render a sockaddr as a string.
*/
#define MAX_ADDR_STR 20
-static char addr_str[MAX_ADDR_STR][40];
+#define MAX_ADDR_STR_LEN 60
+static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
static DEFINE_SPINLOCK(addr_str_lock);
static int last_addr_str;
@@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss)
int i;
char *s;
struct sockaddr_in *in4 = (void *)ss;
- unsigned char *quad = (void *)&in4->sin_addr.s_addr;
struct sockaddr_in6 *in6 = (void *)ss;
spin_lock(&addr_str_lock);
@@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss)
switch (ss->ss_family) {
case AF_INET:
- sprintf(s, "%u.%u.%u.%u:%u",
- (unsigned int)quad[0],
- (unsigned int)quad[1],
- (unsigned int)quad[2],
- (unsigned int)quad[3],
- (unsigned int)ntohs(in4->sin_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
+ (unsigned int)ntohs(in4->sin_port));
break;
case AF_INET6:
- sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u",
- in6->sin6_addr.s6_addr16[0],
- in6->sin6_addr.s6_addr16[1],
- in6->sin6_addr.s6_addr16[2],
- in6->sin6_addr.s6_addr16[3],
- in6->sin6_addr.s6_addr16[4],
- in6->sin6_addr.s6_addr16[5],
- in6->sin6_addr.s6_addr16[6],
- in6->sin6_addr.s6_addr16[7],
- (unsigned int)ntohs(in6->sin6_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
+ (unsigned int)ntohs(in6->sin6_port));
break;
default:
@@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock,
*/
static struct socket *ceph_tcp_connect(struct ceph_connection *con)
{
- struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr;
+ struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
int ret;
BUG_ON(con->sock);
- ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
if (ret)
return ERR_PTR(ret);
con->sock = sock;
@@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
- ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK);
+ ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+ O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
pr_addr(&con->peer_addr.in_addr),
@@ -657,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
- con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT;
+ con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end,
struct sockaddr_in *in4 = (void *)ss;
struct sockaddr_in6 *in6 = (void *)ss;
int port;
+ char delim = ',';
+
+ if (*p == '[') {
+ delim = ']';
+ p++;
+ }
memset(ss, 0, sizeof(*ss));
if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
- ',', &ipend)) {
+ delim, &ipend))
ss->ss_family = AF_INET;
- } else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
- ',', &ipend)) {
+ else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
+ delim, &ipend))
ss->ss_family = AF_INET6;
- } else {
+ else
goto bad;
- }
p = ipend;
+ if (delim == ']') {
+ if (*p != ']') {
+ dout("missing matching ']'\n");
+ goto bad;
+ }
+ p++;
+ }
+
/* port? */
if (p < end && *p == ':') {
port = 0;
@@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end,
return 0;
bad:
- pr_err("parse_ips bad ip '%s'\n", c);
+ pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
return -EINVAL;
}
@@ -1396,10 +1399,12 @@ static int read_partial_message(struct ceph_connection *con)
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
+ skip = 0;
con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
+ BUG_ON(con->in_msg);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
@@ -2013,20 +2018,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
{
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p\n", con, msg);
+ dout("con_revoke %p msg %p - was on queue\n", con, msg);
list_del_init(&msg->list_head);
ceph_msg_put(msg);
msg->hdr.seq = 0;
- if (con->out_msg == msg) {
- ceph_msg_put(con->out_msg);
- con->out_msg = NULL;
- }
+ }
+ if (con->out_msg == msg) {
+ dout("con_revoke %p msg %p - was sending\n", con, msg);
+ con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- } else {
- dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg);
+ ceph_msg_put(msg);
+ msg->hdr.seq = 0;
}
mutex_unlock(&con->mutex);
}
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 21c62e9b7d1..54fe01c5070 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -345,7 +345,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
out:
mutex_unlock(&monc->mutex);
- wake_up(&client->auth_wq);
+ wake_up_all(&client->auth_wq);
}
/*
@@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref)
ceph_msg_put(req->reply);
if (req->request)
ceph_msg_put(req->request);
+
+ kfree(req);
}
static void put_generic_request(struct ceph_mon_generic_request *req)
@@ -460,7 +462,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc,
}
mutex_unlock(&monc->mutex);
if (req) {
- complete(&req->completion);
+ complete_all(&req->completion);
put_generic_request(req);
}
return;
@@ -716,14 +718,15 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
monc->m_auth->front_max);
if (ret < 0) {
monc->client->auth_err = ret;
- wake_up(&monc->client->auth_wq);
+ wake_up_all(&monc->client->auth_wq);
} else if (ret > 0) {
__send_prepared_auth_request(monc, ret);
} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num = monc->auth->global_id;
+ monc->client->msgr->inst.name.num =
+ cpu_to_le64(monc->auth->global_id);
__send_subscribe(monc);
__resend_generic_request(monc);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index d25b4add85b..e3852234789 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -862,12 +862,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
if (req->r_callback)
req->r_callback(req, msg);
else
- complete(&req->r_completion);
+ complete_all(&req->r_completion);
if (flags & CEPH_OSD_FLAG_ONDISK) {
if (req->r_safe_callback)
req->r_safe_callback(req, msg);
- complete(&req->r_safe_completion); /* fsync waiter */
+ complete_all(&req->r_safe_completion); /* fsync waiter */
}
done:
@@ -1083,7 +1083,7 @@ done:
if (newmap)
kick_requests(osdc, NULL);
up_read(&osdc->map_sem);
- wake_up(&osdc->client->auth_wq);
+ wake_up_all(&osdc->client->auth_wq);
return;
bad:
@@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
int type = le16_to_cpu(msg->hdr.type);
if (!osd)
- return;
+ goto out;
osdc = osd->o_osdc;
switch (type) {
@@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
+out:
ceph_msg_put(msg);
}
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index ddc656fb5c0..416d46adbf8 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -568,6 +568,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
if (ev > CEPH_PG_POOL_VERSION) {
pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
ev, CEPH_PG_POOL_VERSION);
+ kfree(pi);
goto bad;
}
__decode_pool(p, pi);
@@ -707,6 +708,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
newcrush = crush_decode(*p, min(*p+len, end));
if (IS_ERR(newcrush))
return ERR_CAST(newcrush);
+ *p += len;
}
/* new flags? */
@@ -829,12 +831,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
/* remove any? */
while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
node)->pgid, pgid) <= 0) {
- struct rb_node *cur = rbp;
+ struct ceph_pg_mapping *cur =
+ rb_entry(rbp, struct ceph_pg_mapping, node);
+
rbp = rb_next(rbp);
- dout(" removed pg_temp %llx\n",
- *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
- node)->pgid);
- rb_erase(cur, &map->pg_temp);
+ dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
+ rb_erase(&cur->node, &map->pg_temp);
+ kfree(cur);
}
if (pglen) {
@@ -850,19 +853,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
for (j = 0; j < pglen; j++)
pg->osds[j] = ceph_decode_32(p);
err = __insert_pg_mapping(pg, &map->pg_temp);
- if (err)
+ if (err) {
+ kfree(pg);
goto bad;
+ }
dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
pglen);
}
}
while (rbp) {
- struct rb_node *cur = rbp;
+ struct ceph_pg_mapping *cur =
+ rb_entry(rbp, struct ceph_pg_mapping, node);
+
rbp = rb_next(rbp);
- dout(" removed pg_temp %llx\n",
- *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
- node)->pgid);
- rb_erase(cur, &map->pg_temp);
+ dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
+ rb_erase(&cur->node, &map->pg_temp);
+ kfree(cur);
}
/* ignore the rest */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e0bee240b9..fa87f51e38e 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
- buf->f_namelen = PATH_MAX;
+ buf->f_namelen = NAME_MAX;
buf->f_frsize = PAGE_CACHE_SIZE;
/* leave fsid little-endian, regardless of host endianness */
@@ -926,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data)
/*
* construct our own bdi so we can control readahead, etc.
*/
-static atomic_long_t bdi_seq = ATOMIC_INIT(0);
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
{
diff --git a/fs/char_dev.c b/fs/char_dev.c
index d6db933df2b..f80a4f25123 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -20,6 +20,7 @@
#include <linux/cdev.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
+#include <linux/tty.h>
#include "internal.h"
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 80f35259680..917b7d449bb 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,7 +2,6 @@ config CIFS
tristate "CIFS support (advanced network filesystem, SMBFS successor)"
depends on INET
select NLS
- select SLOW_WORK
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
@@ -71,14 +70,14 @@ config CIFS_WEAK_PW_HASH
If unsure, say N.
config CIFS_UPCALL
- bool "Kerberos/SPNEGO advanced session setup"
- depends on CIFS && KEYS
- help
- Enables an upcall mechanism for CIFS which accesses
- userspace helper utilities to provide SPNEGO packaged (RFC 4178)
- Kerberos tickets which are needed to mount to certain secure servers
- (for which more secure Kerberos authentication is required). If
- unsure, say N.
+ bool "Kerberos/SPNEGO advanced session setup"
+ depends on CIFS && KEYS
+ select DNS_RESOLVER
+ help
+ Enables an upcall mechanism for CIFS which accesses userspace helper
+ utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets
+ which are needed to mount to certain secure servers (for which more
+ secure Kerberos authentication is required). If unsure, say N.
config CIFS_XATTR
bool "CIFS extended attributes"
@@ -122,6 +121,7 @@ config CIFS_DEBUG2
config CIFS_DFS_UPCALL
bool "DFS feature support"
depends on CIFS && KEYS
+ select DNS_RESOLVER
help
Distributed File System (DFS) support is used to access shares
transparently in an enterprise name space, even if the share
@@ -131,6 +131,15 @@ config CIFS_DFS_UPCALL
IP addresses) which is needed for implicit mounts of DFS junction
points. If unsure, say N.
+config CIFS_FSCACHE
+ bool "Provide CIFS client caching support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
+ help
+ Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
+ to be cached locally on disk through the general filesystem cache
+ manager. If unsure, say N.
+
config CIFS_EXPERIMENTAL
bool "CIFS Experimental Features (EXPERIMENTAL)"
depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 9948c0030e8..adefa60a9bd 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -11,3 +11,5 @@ cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
+
+cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
diff --git a/fs/cifs/README b/fs/cifs/README
index a727b7cb075..a7081eeeb85 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -568,8 +568,9 @@ module can be displayed via modinfo.
Misc /proc/fs/cifs Flags and Debug Info
=======================================
Informational pseudo-files:
-DebugData Displays information about active CIFS sessions
- and shares, as well as the cifs.ko version.
+DebugData Displays information about active CIFS sessions and
+ shares, features enabled as well as the cifs.ko
+ version.
Stats Lists summary resource usage information as well as per
share statistics, if CONFIG_CIFS_STATS in enabled
in the kernel configuration.
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
new file mode 100644
index 00000000000..224d7bbd1fc
--- /dev/null
+++ b/fs/cifs/cache.c
@@ -0,0 +1,331 @@
+/*
+ * fs/cifs/cache.c - CIFS filesystem cache index structure definitions
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "fscache.h"
+#include "cifs_debug.h"
+
+/*
+ * CIFS filesystem definition for FS-Cache
+ */
+struct fscache_netfs cifs_fscache_netfs = {
+ .name = "cifs",
+ .version = 0,
+};
+
+/*
+ * Register CIFS for caching with FS-Cache
+ */
+int cifs_fscache_register(void)
+{
+ return fscache_register_netfs(&cifs_fscache_netfs);
+}
+
+/*
+ * Unregister CIFS for caching
+ */
+void cifs_fscache_unregister(void)
+{
+ fscache_unregister_netfs(&cifs_fscache_netfs);
+}
+
+/*
+ * Key layout of CIFS server cache index object
+ */
+struct cifs_server_key {
+ uint16_t family; /* address family */
+ uint16_t port; /* IP port */
+ union {
+ struct in_addr ipv4_addr;
+ struct in6_addr ipv6_addr;
+ } addr[0];
+};
+
+/*
+ * Server object keyed by {IPaddress,port,family} tuple
+ */
+static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct TCP_Server_Info *server = cookie_netfs_data;
+ const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr;
+ struct cifs_server_key *key = buffer;
+ uint16_t key_len = sizeof(struct cifs_server_key);
+
+ memset(key, 0, key_len);
+
+ /*
+ * Should not be a problem as sin_family/sin6_family overlays
+ * sa_family field
+ */
+ switch (sa->sa_family) {
+ case AF_INET:
+ key->family = server->addr.sockAddr.sin_family;
+ key->port = server->addr.sockAddr.sin_port;
+ key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr;
+ key_len += sizeof(key->addr[0].ipv4_addr);
+ break;
+
+ case AF_INET6:
+ key->family = server->addr.sockAddr6.sin6_family;
+ key->port = server->addr.sockAddr6.sin6_port;
+ key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr;
+ key_len += sizeof(key->addr[0].ipv6_addr);
+ break;
+
+ default:
+ cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family);
+ key_len = 0;
+ break;
+ }
+
+ return key_len;
+}
+
+/*
+ * Server object for FS-Cache
+ */
+const struct fscache_cookie_def cifs_fscache_server_index_def = {
+ .name = "CIFS.server",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = cifs_server_get_key,
+};
+
+/*
+ * Auxiliary data attached to CIFS superblock within the cache
+ */
+struct cifs_fscache_super_auxdata {
+ u64 resource_id; /* unique server resource id */
+};
+
+static char *extract_sharename(const char *treename)
+{
+ const char *src;
+ char *delim, *dst;
+ int len;
+
+ /* skip double chars at the beginning */
+ src = treename + 2;
+
+ /* share name is always preceded by '\\' now */
+ delim = strchr(src, '\\');
+ if (!delim)
+ return ERR_PTR(-EINVAL);
+ delim++;
+ len = strlen(delim);
+
+ /* caller has to free the memory */
+ dst = kstrndup(delim, len, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ return dst;
+}
+
+/*
+ * Superblock object currently keyed by share name
+ */
+static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+ char *sharename;
+ uint16_t len;
+
+ sharename = extract_sharename(tcon->treeName);
+ if (IS_ERR(sharename)) {
+ cFYI(1, "CIFS: couldn't extract sharename\n");
+ sharename = NULL;
+ return 0;
+ }
+
+ len = strlen(sharename);
+ if (len > maxbuf)
+ return 0;
+
+ memcpy(buffer, sharename, len);
+
+ kfree(sharename);
+
+ return len;
+}
+
+static uint16_t
+cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ struct cifs_fscache_super_auxdata auxdata;
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.resource_id = tcon->resource_id;
+
+ if (maxbuf > sizeof(auxdata))
+ maxbuf = sizeof(auxdata);
+
+ memcpy(buffer, &auxdata, maxbuf);
+
+ return maxbuf;
+}
+
+static enum
+fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct cifs_fscache_super_auxdata auxdata;
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+
+ if (datalen != sizeof(auxdata))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.resource_id = tcon->resource_id;
+
+ if (memcmp(data, &auxdata, datalen) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+/*
+ * Superblock object for FS-Cache
+ */
+const struct fscache_cookie_def cifs_fscache_super_index_def = {
+ .name = "CIFS.super",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = cifs_super_get_key,
+ .get_aux = cifs_fscache_super_get_aux,
+ .check_aux = cifs_fscache_super_check_aux,
+};
+
+/*
+ * Auxiliary data attached to CIFS inode within the cache
+ */
+struct cifs_fscache_inode_auxdata {
+ struct timespec last_write_time;
+ struct timespec last_change_time;
+ u64 eof;
+};
+
+static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+ uint16_t keylen;
+
+ /* use the UniqueId as the key */
+ keylen = sizeof(cifsi->uniqueid);
+ if (keylen > maxbuf)
+ keylen = 0;
+ else
+ memcpy(buffer, &cifsi->uniqueid, keylen);
+
+ return keylen;
+}
+
+static void
+cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size)
+{
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ *size = cifsi->vfs_inode.i_size;
+}
+
+static uint16_t
+cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+ auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
+ if (maxbuf > sizeof(auxdata))
+ maxbuf = sizeof(auxdata);
+
+ memcpy(buffer, &auxdata, maxbuf);
+
+ return maxbuf;
+}
+
+static enum
+fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+ struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ if (datalen != sizeof(auxdata))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+ auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
+ if (memcmp(data, &auxdata, datalen) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data)
+{
+ struct cifsInodeInfo *cifsi = cookie_netfs_data;
+ struct pagevec pvec;
+ pgoff_t first;
+ int loop, nr_pages;
+
+ pagevec_init(&pvec, 0);
+ first = 0;
+
+ cFYI(1, "cifs inode 0x%p now uncached", cifsi);
+
+ for (;;) {
+ nr_pages = pagevec_lookup(&pvec,
+ cifsi->vfs_inode.i_mapping, first,
+ PAGEVEC_SIZE - pagevec_count(&pvec));
+ if (!nr_pages)
+ break;
+
+ for (loop = 0; loop < nr_pages; loop++)
+ ClearPageFsCache(pvec.pages[loop]);
+
+ first = pvec.pages[nr_pages - 1]->index + 1;
+
+ pvec.nr = nr_pages;
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
+const struct fscache_cookie_def cifs_fscache_inode_object_def = {
+ .name = "CIFS.uniqueid",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = cifs_fscache_inode_get_key,
+ .get_attr = cifs_fscache_inode_get_attr,
+ .get_aux = cifs_fscache_inode_get_aux,
+ .check_aux = cifs_fscache_inode_check_aux,
+ .now_uncached = cifs_fscache_inode_now_uncached,
+};
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 4fce6e61b34..eb1ba493489 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -119,6 +119,31 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
"Display Internal CIFS Data Structures for Debugging\n"
"---------------------------------------------------\n");
seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
+ seq_printf(m, "Features: ");
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ seq_printf(m, "dfs");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_FSCACHE
+ seq_printf(m, "fscache");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+ seq_printf(m, "lanman");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_POSIX
+ seq_printf(m, "posix");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_UPCALL
+ seq_printf(m, "spnego");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_XATTR
+ seq_printf(m, "xattr");
+#endif
+ seq_putc(m, '\n');
seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
seq_printf(m, "Servers:");
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ac19a6f3dae..d6ced7aa23c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -141,7 +141,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
}
rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
- if (rc != 0) {
+ if (rc < 0) {
cERROR(1, "%s: Failed to resolve server part of %s to IP: %d",
__func__, *devname, rc);
goto compose_mount_options_err;
@@ -150,8 +150,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
* assuming that we have 'unc=' and 'ip=' in
* the original sb_mountdata
*/
- md_len = strlen(sb_mountdata) + strlen(srvIP) +
- strlen(ref->node_name) + 12;
+ md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12;
mountdata = kzalloc(md_len+1, GFP_KERNEL);
if (mountdata == NULL) {
rc = -ENOMEM;
@@ -230,28 +229,22 @@ compose_mount_options_err:
goto compose_mount_options_out;
}
-
-static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent,
- struct dentry *dentry, const struct dfs_info3_param *ref)
+/**
+ * cifs_dfs_do_refmount - mounts specified path using provided refferal
+ * @cifs_sb: parent/root superblock
+ * @fullpath: full path in UNC format
+ * @ref: server's referral
+ */
+static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
+ const char *fullpath, const struct dfs_info3_param *ref)
{
- struct cifs_sb_info *cifs_sb;
struct vfsmount *mnt;
char *mountdata;
char *devname = NULL;
- char *fullpath;
-
- cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
- /*
- * this function gives us a path with a double backslash prefix. We
- * require a single backslash for DFS.
- */
- fullpath = build_path_from_dentry(dentry);
- if (!fullpath)
- return ERR_PTR(-ENOMEM);
+ /* strip first '\' from fullpath */
mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
fullpath + 1, ref, &devname);
- kfree(fullpath);
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
@@ -357,8 +350,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
rc = -EINVAL;
goto out_err;
}
- mnt = cifs_dfs_do_refmount(nd->path.mnt,
- nd->path.dentry, referrals + i);
+ mnt = cifs_dfs_do_refmount(cifs_sb,
+ full_path, referrals + i);
cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
referrals[i].node_name, mnt);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 246a167cb91..9e771450c3b 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -35,6 +35,7 @@
#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */
#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */
#define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/
+#define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */
struct cifs_sb_info {
struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 379bd7d9c05..87044906cd1 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -84,6 +84,9 @@ struct key_type cifs_spnego_key_type = {
/* strlen of ";uid=0x" */
#define UID_KEY_LEN 7
+/* strlen of ";creduid=0x" */
+#define CREDUID_KEY_LEN 11
+
/* strlen of ";user=" */
#define USER_KEY_LEN 6
@@ -107,6 +110,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
IP_KEY_LEN + INET6_ADDRSTRLEN +
MAX_MECH_STR_LEN +
UID_KEY_LEN + (sizeof(uid_t) * 2) +
+ CREDUID_KEY_LEN + (sizeof(uid_t) * 2) +
USER_KEY_LEN + strlen(sesInfo->userName) +
PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
@@ -144,6 +148,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
sprintf(dp, ";uid=0x%x", sesInfo->linux_uid);
dp = description + strlen(description);
+ sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
+
+ dp = description + strlen(description);
sprintf(dp, ";user=%s", sesInfo->userName);
dp = description + strlen(description);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1..a5ed10c9afe 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -45,8 +45,8 @@
#include "cifs_fs_sb.h"
#include <linux/mm.h>
#include <linux/key-type.h>
-#include "dns_resolve.h"
#include "cifs_spnego.h"
+#include "fscache.h"
#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
int cifsFYI = 0;
@@ -329,6 +329,12 @@ cifs_destroy_inode(struct inode *inode)
}
static void
+cifs_clear_inode(struct inode *inode)
+{
+ cifs_fscache_release_inode_cookie(inode);
+}
+
+static void
cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
{
seq_printf(s, ",addr=");
@@ -473,14 +479,25 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
+void cifs_drop_inode(struct inode *inode)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+ return generic_drop_inode(inode);
+
+ return generic_delete_inode(inode);
+}
+
static const struct super_operations cifs_super_ops = {
.put_super = cifs_put_super,
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
.destroy_inode = cifs_destroy_inode,
-/* .drop_inode = generic_delete_inode,
- .delete_inode = cifs_delete_inode, */ /* Do not need above two
- functions unless later we add lazy close of inodes or unless the
+ .drop_inode = cifs_drop_inode,
+ .clear_inode = cifs_clear_inode,
+/* .delete_inode = cifs_delete_inode, */ /* Do not need above
+ function unless later we add lazy close of inodes or unless the
kernel forgets to call us with the same number of releases (closes)
as opens */
.show_options = cifs_show_options,
@@ -892,6 +909,10 @@ init_cifs(void)
cFYI(1, "cifs_max_pending set to max of 256");
}
+ rc = cifs_fscache_register();
+ if (rc)
+ goto out;
+
rc = cifs_init_inodecache();
if (rc)
goto out_clean_proc;
@@ -912,27 +933,13 @@ init_cifs(void)
if (rc)
goto out_unregister_filesystem;
#endif
-#ifdef CONFIG_CIFS_DFS_UPCALL
- rc = register_key_type(&key_type_dns_resolver);
- if (rc)
- goto out_unregister_key_type;
-#endif
- rc = slow_work_register_user(THIS_MODULE);
- if (rc)
- goto out_unregister_resolver_key;
return 0;
- out_unregister_resolver_key:
-#ifdef CONFIG_CIFS_DFS_UPCALL
- unregister_key_type(&key_type_dns_resolver);
- out_unregister_key_type:
-#endif
#ifdef CONFIG_CIFS_UPCALL
- unregister_key_type(&cifs_spnego_key_type);
out_unregister_filesystem:
-#endif
unregister_filesystem(&cifs_fs_type);
+#endif
out_destroy_request_bufs:
cifs_destroy_request_bufs();
out_destroy_mids:
@@ -941,6 +948,8 @@ init_cifs(void)
cifs_destroy_inodecache();
out_clean_proc:
cifs_proc_clean();
+ cifs_fscache_unregister();
+ out:
return rc;
}
@@ -949,9 +958,9 @@ exit_cifs(void)
{
cFYI(DBG2, "exit_cifs");
cifs_proc_clean();
+ cifs_fscache_unregister();
#ifdef CONFIG_CIFS_DFS_UPCALL
cifs_dfs_release_automount_timer();
- unregister_key_type(&key_type_dns_resolver);
#endif
#ifdef CONFIG_CIFS_UPCALL
unregister_key_type(&cifs_spnego_key_type);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a7eb65c84b1..d82f5fb4761 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -114,5 +114,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.64"
+#define CIFS_VERSION "1.65"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a88479ceaad..0cdfb8c32ac 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -16,10 +16,13 @@
* the GNU Lesser General Public License for more details.
*
*/
+#ifndef _CIFS_GLOB_H
+#define _CIFS_GLOB_H
+
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/slab.h>
-#include <linux/slow-work.h>
+#include <linux/workqueue.h>
#include "cifs_fs_sb.h"
#include "cifsacl.h"
/*
@@ -34,7 +37,7 @@
#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */
#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null
termination then *2 for unicode versions */
-#define MAX_PASSWORD_SIZE 16
+#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
#define CIFS_MIN_RCV_POOL 4
@@ -80,8 +83,7 @@ enum statusEnum {
};
enum securityEnum {
- PLAINTXT = 0, /* Legacy with Plaintext passwords */
- LANMAN, /* Legacy LANMAN auth */
+ LANMAN = 0, /* Legacy LANMAN auth */
NTLM, /* Legacy NTLM012 auth with NTLM hash */
NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
@@ -142,7 +144,6 @@ struct TCP_Server_Info {
struct list_head pending_mid_q;
void *Server_NlsInfo; /* BB - placeholder for future NLS info */
unsigned short server_codepage; /* codepage for the server */
- unsigned long ip_address; /* IP addr for the server if known */
enum protocolEnum protocolType;
char versionMajor;
char versionMinor;
@@ -190,19 +191,9 @@ struct TCP_Server_Info {
bool sec_mskerberos; /* supports legacy MS Kerberos */
bool sec_kerberosu2u; /* supports U2U Kerberos */
bool sec_ntlmssp; /* supports NTLMSSP */
-};
-
-/*
- * The following is our shortcut to user information. We surface the uid,
- * and name. We always get the password on the fly in case it
- * has changed. We also hang a list of sessions owned by this user off here.
- */
-struct cifsUidInfo {
- struct list_head userList;
- struct list_head sessionList; /* SMB sessions for this user */
- uid_t linux_uid;
- char user[MAX_USERNAME_SIZE + 1]; /* ascii name of user */
- /* BB may need ptr or callback for PAM or WinBind info */
+#ifdef CONFIG_CIFS_FSCACHE
+ struct fscache_cookie *fscache; /* client index cache cookie */
+#endif
};
/*
@@ -212,9 +203,6 @@ struct cifsSesInfo {
struct list_head smb_ses_list;
struct list_head tcon_list;
struct mutex session_mutex;
-#if 0
- struct cifsUidInfo *uidInfo; /* pointer to user info */
-#endif
struct TCP_Server_Info *server; /* pointer to server info */
int ses_count; /* reference counter */
enum statusEnum status;
@@ -226,7 +214,8 @@ struct cifsSesInfo {
char *serverNOS; /* name of network operating system of server */
char *serverDomain; /* security realm of server */
int Suid; /* remote smb uid */
- uid_t linux_uid; /* local Linux uid */
+ uid_t linux_uid; /* overriding owner of files on the mount */
+ uid_t cred_uid; /* owner of credentials */
int capabilities;
char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
TCP names - will ipv6 and sctp addresses fit? */
@@ -311,6 +300,10 @@ struct cifsTconInfo {
bool local_lease:1; /* check leases (only) on local system not remote */
bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
bool need_reconnect:1; /* connection reset, tid now invalid */
+#ifdef CONFIG_CIFS_FSCACHE
+ u64 resource_id; /* server resource id */
+ struct fscache_cookie *fscache; /* cookie for share */
+#endif
/* BB add field for back pointer to sb struct(s)? */
};
@@ -363,7 +356,7 @@ struct cifsFileInfo {
atomic_t count; /* reference count */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
struct cifs_search_info srch_inf;
- struct slow_work oplock_break; /* slow_work job for oplock breaks */
+ struct work_struct oplock_break; /* work for oplock breaks */
};
/* Take a reference on the file private data */
@@ -398,6 +391,9 @@ struct cifsInodeInfo {
bool invalid_mapping:1; /* pagecache is invalid */
u64 server_eof; /* current file size on server */
u64 uniqueid; /* server inode number */
+#ifdef CONFIG_CIFS_FSCACHE
+ struct fscache_cookie *fscache;
+#endif
struct inode vfs_inode;
};
@@ -732,4 +728,10 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
+void cifs_oplock_break(struct work_struct *work);
+void cifs_oplock_break_get(struct cifsFileInfo *cfile);
+void cifs_oplock_break_put(struct cifsFileInfo *cfile);
+
extern const struct slow_work_ops cifs_oplock_break_ops;
+
+#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb..1f545081408 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -86,7 +86,9 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
-extern int cifs_convert_address(char *src, void *dst);
+extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
+extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
+ unsigned short int port);
extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
extern void header_assemble(struct smb_hdr *, char /* command */ ,
const struct cifsTconInfo *, int /* length of
@@ -106,7 +108,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
__u16 fileHandle, struct file *file,
struct vfsmount *mnt, unsigned int oflags);
extern int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt,
struct super_block *sb,
int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2208f06e4c4..95c2ea67edf 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -48,6 +48,7 @@
#include "nterr.h"
#include "rfc1002pdu.h"
#include "cn_cifs.h"
+#include "fscache.h"
#define CIFS_PORT 445
#define RFC1001_PORT 139
@@ -66,6 +67,7 @@ struct smb_vol {
char *iocharset; /* local code page for mapping to and from Unicode */
char source_rfc1001_name[16]; /* netbios name of client */
char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
+ uid_t cred_uid;
uid_t linux_uid;
gid_t linux_gid;
mode_t file_mode;
@@ -97,6 +99,7 @@ struct smb_vol {
bool noblocksnd:1;
bool noautotune:1;
bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
+ bool fsc:1; /* enable fscache */
unsigned int rsize;
unsigned int wsize;
bool sockopt_tcp_nodelay:1;
@@ -830,7 +833,8 @@ cifs_parse_mount_options(char *options, const char *devname,
/* null target name indicates to use *SMBSERVR default called name
if we end up sending RFC1001 session initialize */
vol->target_rfc1001_name[0] = 0;
- vol->linux_uid = current_uid(); /* use current_euid() instead? */
+ vol->cred_uid = current_uid();
+ vol->linux_uid = current_uid();
vol->linux_gid = current_gid();
/* default to only allowing write access to owner of the mount */
@@ -1257,6 +1261,12 @@ cifs_parse_mount_options(char *options, const char *devname,
} else if ((strnicmp(data, "nocase", 6) == 0) ||
(strnicmp(data, "ignorecase", 10) == 0)) {
vol->nocase = 1;
+ } else if (strnicmp(data, "mand", 4) == 0) {
+ /* ignore */
+ } else if (strnicmp(data, "nomand", 6) == 0) {
+ /* ignore */
+ } else if (strnicmp(data, "_netdev", 7) == 0) {
+ /* ignore */
} else if (strnicmp(data, "brl", 3) == 0) {
vol->nobrl = 0;
} else if ((strnicmp(data, "nobrl", 5) == 0) ||
@@ -1331,6 +1341,8 @@ cifs_parse_mount_options(char *options, const char *devname,
printk(KERN_WARNING "CIFS: Mount option noac not "
"supported. Instead set "
"/proc/fs/cifs/LookupCacheEnabled to 0\n");
+ } else if (strnicmp(data, "fsc", 3) == 0) {
+ vol->fsc = true;
} else
printk(KERN_WARNING "CIFS: Unknown mount option %s\n",
data);
@@ -1380,18 +1392,92 @@ cifs_parse_mount_options(char *options, const char *devname,
return 0;
}
+static bool
+match_address(struct TCP_Server_Info *server, struct sockaddr *addr)
+{
+ struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (addr4->sin_addr.s_addr !=
+ server->addr.sockAddr.sin_addr.s_addr)
+ return false;
+ if (addr4->sin_port &&
+ addr4->sin_port != server->addr.sockAddr.sin_port)
+ return false;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_equal(&addr6->sin6_addr,
+ &server->addr.sockAddr6.sin6_addr))
+ return false;
+ if (addr6->sin6_scope_id !=
+ server->addr.sockAddr6.sin6_scope_id)
+ return false;
+ if (addr6->sin6_port &&
+ addr6->sin6_port != server->addr.sockAddr6.sin6_port)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+static bool
+match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
+{
+ unsigned int secFlags;
+
+ if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
+ secFlags = vol->secFlg;
+ else
+ secFlags = global_secflags | vol->secFlg;
+
+ switch (server->secType) {
+ case LANMAN:
+ if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT)))
+ return false;
+ break;
+ case NTLMv2:
+ if (!(secFlags & CIFSSEC_MAY_NTLMV2))
+ return false;
+ break;
+ case NTLM:
+ if (!(secFlags & CIFSSEC_MAY_NTLM))
+ return false;
+ break;
+ case Kerberos:
+ if (!(secFlags & CIFSSEC_MAY_KRB5))
+ return false;
+ break;
+ case RawNTLMSSP:
+ if (!(secFlags & CIFSSEC_MAY_NTLMSSP))
+ return false;
+ break;
+ default:
+ /* shouldn't happen */
+ return false;
+ }
+
+ /* now check if signing mode is acceptible */
+ if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
+ (server->secMode & SECMODE_SIGN_REQUIRED))
+ return false;
+ else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) &&
+ (server->secMode &
+ (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0)
+ return false;
+
+ return true;
+}
+
static struct TCP_Server_Info *
-cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
+cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
{
- struct list_head *tmp;
struct TCP_Server_Info *server;
- struct sockaddr_in *addr4 = (struct sockaddr_in *) addr;
- struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) addr;
write_lock(&cifs_tcp_ses_lock);
- list_for_each(tmp, &cifs_tcp_ses_list) {
- server = list_entry(tmp, struct TCP_Server_Info,
- tcp_ses_list);
+ list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
/*
* the demux thread can exit on its own while still in CifsNew
* so don't accept any sockets in that state. Since the
@@ -1401,37 +1487,11 @@ cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
if (server->tcpStatus == CifsNew)
continue;
- switch (addr->ss_family) {
- case AF_INET:
- if (addr4->sin_addr.s_addr ==
- server->addr.sockAddr.sin_addr.s_addr) {
- addr4->sin_port = htons(port);
- /* user overrode default port? */
- if (addr4->sin_port) {
- if (addr4->sin_port !=
- server->addr.sockAddr.sin_port)
- continue;
- }
- break;
- } else
- continue;
+ if (!match_address(server, addr))
+ continue;
- case AF_INET6:
- if (ipv6_addr_equal(&addr6->sin6_addr,
- &server->addr.sockAddr6.sin6_addr) &&
- (addr6->sin6_scope_id ==
- server->addr.sockAddr6.sin6_scope_id)) {
- addr6->sin6_port = htons(port);
- /* user overrode default port? */
- if (addr6->sin6_port) {
- if (addr6->sin6_port !=
- server->addr.sockAddr6.sin6_port)
- continue;
- }
- break;
- } else
- continue;
- }
+ if (!match_security(server, vol))
+ continue;
++server->srv_count;
write_unlock(&cifs_tcp_ses_lock);
@@ -1460,6 +1520,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
+ cifs_fscache_release_client_cookie(server);
+
task = xchg(&server->tsk, NULL);
if (task)
force_sig(SIGKILL, task);
@@ -1479,7 +1541,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip);
if (volume_info->UNCip && volume_info->UNC) {
- rc = cifs_convert_address(volume_info->UNCip, &addr);
+ rc = cifs_fill_sockaddr((struct sockaddr *)&addr,
+ volume_info->UNCip,
+ strlen(volume_info->UNCip),
+ volume_info->port);
if (!rc) {
/* we failed translating address */
rc = -EINVAL;
@@ -1499,7 +1564,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
}
/* see if we already have a matching tcp_ses */
- tcp_ses = cifs_find_tcp_session(&addr, volume_info->port);
+ tcp_ses = cifs_find_tcp_session((struct sockaddr *)&addr, volume_info);
if (tcp_ses)
return tcp_ses;
@@ -1543,12 +1608,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cFYI(1, "attempting ipv6 connect");
/* BB should we allow ipv6 on port 139? */
/* other OS never observed in Wild doing 139 with v6 */
- sin_server6->sin6_port = htons(volume_info->port);
memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
sizeof(struct sockaddr_in6));
rc = ipv6_connect(tcp_ses);
} else {
- sin_server->sin_port = htons(volume_info->port);
memcpy(&tcp_ses->addr.sockAddr, sin_server,
sizeof(struct sockaddr_in));
rc = ipv4_connect(tcp_ses);
@@ -1577,6 +1640,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
write_unlock(&cifs_tcp_ses_lock);
+ cifs_fscache_get_client_cookie(tcp_ses);
+
return tcp_ses;
out_err:
@@ -1591,17 +1656,27 @@ out_err:
}
static struct cifsSesInfo *
-cifs_find_smb_ses(struct TCP_Server_Info *server, char *username)
+cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
{
- struct list_head *tmp;
struct cifsSesInfo *ses;
write_lock(&cifs_tcp_ses_lock);
- list_for_each(tmp, &server->smb_ses_list) {
- ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list);
- if (strncmp(ses->userName, username, MAX_USERNAME_SIZE))
- continue;
-
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ switch (server->secType) {
+ case Kerberos:
+ if (vol->cred_uid != ses->cred_uid)
+ continue;
+ break;
+ default:
+ /* anything else takes username/password */
+ if (strncmp(ses->userName, vol->username,
+ MAX_USERNAME_SIZE))
+ continue;
+ if (strlen(vol->username) != 0 &&
+ strncmp(ses->password, vol->password,
+ MAX_PASSWORD_SIZE))
+ continue;
+ }
++ses->ses_count;
write_unlock(&cifs_tcp_ses_lock);
return ses;
@@ -1643,7 +1718,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
xid = GetXid();
- ses = cifs_find_smb_ses(server, volume_info->username);
+ ses = cifs_find_smb_ses(server, volume_info);
if (ses) {
cFYI(1, "Existing smb sess found (status=%d)", ses->status);
@@ -1706,6 +1781,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
if (ses->domainName)
strcpy(ses->domainName, volume_info->domainname);
}
+ ses->cred_uid = volume_info->cred_uid;
ses->linux_uid = volume_info->linux_uid;
ses->overrideSecFlg = volume_info->secFlg;
@@ -1773,6 +1849,7 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
CIFSSMBTDis(xid, tcon);
_FreeXid(xid);
+ cifs_fscache_release_super_cookie(tcon);
tconInfoFree(tcon);
cifs_put_smb_ses(ses);
}
@@ -1843,6 +1920,8 @@ cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info)
list_add(&tcon->tcon_list, &ses->tcon_list);
write_unlock(&cifs_tcp_ses_lock);
+ cifs_fscache_get_super_cookie(tcon);
+
return tcon;
out_fail:
@@ -2397,6 +2476,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
if (pvolume_info->dynperm)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
+ if (pvolume_info->fsc)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
if (pvolume_info->direct_io) {
cFYI(1, "mounting share using direct i/o");
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461c..578d88c5b46 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/mount.h>
+#include <linux/file.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifsglob.h"
@@ -129,12 +130,6 @@ cifs_bp_rename_retry:
return full_path;
}
-/*
- * When called with struct file pointer set to NULL, there is no way we could
- * update file->private_data, but getting it stuck on openFileList provides a
- * way to access it from cifs_fill_filedata and thereby set file->private_data
- * from cifs_open.
- */
struct cifsFileInfo *
cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
struct file *file, struct vfsmount *mnt, unsigned int oflags)
@@ -162,7 +157,7 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
mutex_init(&pCifsFile->lock_mutex);
INIT_LIST_HEAD(&pCifsFile->llist);
atomic_set(&pCifsFile->count, 1);
- slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops);
+ INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
write_lock(&GlobalSMBSeslock);
list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
@@ -184,12 +179,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
}
write_unlock(&GlobalSMBSeslock);
+ file->private_data = pCifsFile;
+
return pCifsFile;
}
int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt, struct super_block *sb,
- int mode, int oflags,
+ struct super_block *sb, int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid)
{
int rc;
@@ -258,19 +254,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
cifs_fattr_to_inode(*pinode, &fattr);
}
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
- * file->private_data.
- */
- if (mnt) {
- struct cifsFileInfo *pfile_info;
-
- pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
- oflags);
- if (pfile_info == NULL)
- rc = -ENOMEM;
- }
-
posix_open_ret:
kfree(presp_data);
return rc;
@@ -298,7 +281,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
int create_options = CREATE_NOT_DIR;
__u32 oplock = 0;
int oflags;
- bool posix_create = false;
/*
* BB below access is probably too much for mknod to request
* but we have to do query and setpathinfo so requesting
@@ -339,7 +321,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = cifs_posix_open(full_path, &newinode,
- nd ? nd->path.mnt : NULL,
inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
/* EIO could indicate that (posix open) operation is not
supported, despite what server claimed in capability
@@ -347,7 +328,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
handled in posix open */
if (rc == 0) {
- posix_create = true;
if (newinode == NULL) /* query inode info */
goto cifs_create_get_file_info;
else /* success, no need to query */
@@ -478,21 +458,28 @@ cifs_create_set_dentry:
else
cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
- /* nfsd case - nfs srv does not set nd */
- if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
- /* mknod case - do not leave file open */
- CIFSSMBClose(xid, tcon, fileHandle);
- } else if (!(posix_create) && (newinode)) {
+ if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
struct cifsFileInfo *pfile_info;
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo
- * pointer to file->private_data.
- */
- pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
+ struct file *filp;
+
+ filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
+ goto cifs_create_out;
+ }
+
+ pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
nd->path.mnt, oflags);
- if (pfile_info == NULL)
+ if (pfile_info == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
rc = -ENOMEM;
+ }
+ } else {
+ CIFSSMBClose(xid, tcon, fileHandle);
}
+
cifs_create_out:
kfree(buf);
kfree(full_path);
@@ -636,6 +623,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
bool posix_open = false;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
+ struct cifsFileInfo *cfile;
struct inode *newInode = NULL;
char *full_path = NULL;
struct file *filp;
@@ -703,7 +691,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
(nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
(nd->intent.open.flags & O_CREAT)) {
- rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+ rc = cifs_posix_open(full_path, &newInode,
parent_dir_inode->i_sb,
nd->intent.open.create_mode,
nd->intent.open.flags, &oplock,
@@ -733,8 +721,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
else
direntry->d_op = &cifs_dentry_ops;
d_add(direntry, newInode);
- if (posix_open)
- filp = lookup_instantiate_filp(nd, direntry, NULL);
+ if (posix_open) {
+ filp = lookup_instantiate_filp(nd, direntry,
+ generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ goto lookup_out;
+ }
+
+ cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
+ nd->path.mnt,
+ nd->intent.open.flags);
+ if (cfile == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ rc = -ENOMEM;
+ goto lookup_out;
+ }
+ }
/* since paths are not looked up by component - the parent
directories are presumed to be good here */
renew_parental_timestamps(direntry);
@@ -755,6 +760,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
is a common return code */
}
+lookup_out:
kfree(full_path);
FreeXid(xid);
return ERR_PTR(rc);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 4db2c5e7283..0eb87026cad 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -4,6 +4,8 @@
* Copyright (c) 2007 Igor Mammedov
* Author(s): Igor Mammedov (niallain@gmail.com)
* Steve French (sfrench@us.ibm.com)
+ * Wang Lei (wang840925@gmail.com)
+ * David Howells (dhowells@redhat.com)
*
* Contains the CIFS DFS upcall routines used for hostname to
* IP address translation.
@@ -24,145 +26,73 @@
*/
#include <linux/slab.h>
-#include <keys/user-type.h>
+#include <linux/dns_resolver.h>
#include "dns_resolve.h"
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
-/* Checks if supplied name is IP address
- * returns:
- * 1 - name is IP
- * 0 - name is not IP
- */
-static int
-is_ip(char *name)
-{
- struct sockaddr_storage ss;
-
- return cifs_convert_address(name, &ss);
-}
-
-static int
-dns_resolver_instantiate(struct key *key, const void *data,
- size_t datalen)
-{
- int rc = 0;
- char *ip;
-
- ip = kmalloc(datalen + 1, GFP_KERNEL);
- if (!ip)
- return -ENOMEM;
-
- memcpy(ip, data, datalen);
- ip[datalen] = '\0';
-
- /* make sure this looks like an address */
- if (!is_ip(ip)) {
- kfree(ip);
- return -EINVAL;
- }
-
- key->type_data.x[0] = datalen;
- key->payload.data = ip;
-
- return rc;
-}
-
-static void
-dns_resolver_destroy(struct key *key)
-{
- kfree(key->payload.data);
-}
-
-struct key_type key_type_dns_resolver = {
- .name = "dns_resolver",
- .def_datalen = sizeof(struct in_addr),
- .describe = user_describe,
- .instantiate = dns_resolver_instantiate,
- .destroy = dns_resolver_destroy,
- .match = user_match,
-};
-
-/* Resolves server name to ip address.
- * input:
- * unc - server UNC
- * output:
- * *ip_addr - pointer to server ip, caller responcible for freeing it.
- * return 0 on success
+/**
+ * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
+ * @unc: UNC path specifying the server
+ * @ip_addr: Where to return the IP address.
+ *
+ * The IP address will be returned in string form, and the caller is
+ * responsible for freeing it.
+ *
+ * Returns length of result on success, -ve on error.
*/
int
dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
{
- int rc = -EAGAIN;
- struct key *rkey = ERR_PTR(-EAGAIN);
+ struct sockaddr_storage ss;
+ const char *hostname, *sep;
char *name;
- char *data = NULL;
- int len;
+ int len, rc;
if (!ip_addr || !unc)
return -EINVAL;
- /* search for server name delimiter */
len = strlen(unc);
if (len < 3) {
cFYI(1, "%s: unc is too short: %s", __func__, unc);
return -EINVAL;
}
+
+ /* Discount leading slashes for cifs */
len -= 2;
- name = memchr(unc+2, '\\', len);
- if (!name) {
+ hostname = unc + 2;
+
+ /* Search for server name delimiter */
+ sep = memchr(hostname, '\\', len);
+ if (sep)
+ len = sep - unc;
+ else
cFYI(1, "%s: probably server name is whole unc: %s",
- __func__, unc);
- } else {
- len = (name - unc) - 2/* leading // */;
- }
+ __func__, unc);
+
+ /* Try to interpret hostname as an IPv4 or IPv6 address */
+ rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len);
+ if (rc > 0)
+ goto name_is_IP_address;
+
+ /* Perform the upcall */
+ rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
+ if (rc < 0)
+ cERROR(1, "%s: unable to resolve: %*.*s",
+ __func__, len, len, hostname);
+ else
+ cFYI(1, "%s: resolved: %*.*s to %s",
+ __func__, len, len, hostname, *ip_addr);
+ return rc;
- name = kmalloc(len+1, GFP_KERNEL);
- if (!name) {
- rc = -ENOMEM;
- return rc;
- }
- memcpy(name, unc+2, len);
+name_is_IP_address:
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+ memcpy(name, hostname, len);
name[len] = 0;
-
- if (is_ip(name)) {
- cFYI(1, "%s: it is IP, skipping dns upcall: %s",
- __func__, name);
- data = name;
- goto skip_upcall;
- }
-
- rkey = request_key(&key_type_dns_resolver, name, "");
- if (!IS_ERR(rkey)) {
- len = rkey->type_data.x[0];
- data = rkey->payload.data;
- } else {
- cERROR(1, "%s: unable to resolve: %s", __func__, name);
- goto out;
- }
-
-skip_upcall:
- if (data) {
- *ip_addr = kmalloc(len + 1, GFP_KERNEL);
- if (*ip_addr) {
- memcpy(*ip_addr, data, len + 1);
- if (!IS_ERR(rkey))
- cFYI(1, "%s: resolved: %s to %s", __func__,
- name,
- *ip_addr
- );
- rc = 0;
- } else {
- rc = -ENOMEM;
- }
- if (!IS_ERR(rkey))
- key_put(rkey);
- }
-
-out:
- kfree(name);
- return rc;
+ cFYI(1, "%s: unc is IP, skipping dns upcall: %s", __func__, name);
+ *ip_addr = name;
+ return 0;
}
-
-
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 966e9288930..d3f5d27f4d0 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -24,8 +24,6 @@
#define _DNS_RESOLVE_H
#ifdef __KERNEL__
-#include <linux/key-type.h>
-extern struct key_type key_type_dns_resolver;
extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr);
#endif /* KERNEL */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f1ff785b229..db11fdef0e9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -40,6 +40,7 @@
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "fscache.h"
static inline int cifs_convert_flags(unsigned int flags)
{
@@ -162,44 +163,12 @@ psx_client_can_cache:
return 0;
}
-static struct cifsFileInfo *
-cifs_fill_filedata(struct file *file)
-{
- struct list_head *tmp;
- struct cifsFileInfo *pCifsFile = NULL;
- struct cifsInodeInfo *pCifsInode = NULL;
-
- /* search inode for this file and fill in file->private_data */
- pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- read_lock(&GlobalSMBSeslock);
- list_for_each(tmp, &pCifsInode->openFileList) {
- pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
- if ((pCifsFile->pfile == NULL) &&
- (pCifsFile->pid == current->tgid)) {
- /* mode set in cifs_create */
-
- /* needed for writepage */
- pCifsFile->pfile = file;
- file->private_data = pCifsFile;
- break;
- }
- }
- read_unlock(&GlobalSMBSeslock);
-
- if (file->private_data != NULL) {
- return pCifsFile;
- } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
- cERROR(1, "could not find file instance for "
- "new file %p", file);
- return NULL;
-}
-
/* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
- struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
+static inline int cifs_open_inode_helper(struct inode *inode,
struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
char *full_path, int xid)
{
+ struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
struct timespec temp;
int rc;
@@ -213,36 +182,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
/* if not oplocked, invalidate inode pages if mtime or file
size changed */
temp = cifs_NTtimeToUnix(buf->LastWriteTime);
- if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
- (file->f_path.dentry->d_inode->i_size ==
+ if (timespec_equal(&inode->i_mtime, &temp) &&
+ (inode->i_size ==
(loff_t)le64_to_cpu(buf->EndOfFile))) {
cFYI(1, "inode unchanged on server");
} else {
- if (file->f_path.dentry->d_inode->i_mapping) {
+ if (inode->i_mapping) {
/* BB no need to lock inode until after invalidate
since namei code should already have it locked? */
- rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+ rc = filemap_write_and_wait(inode->i_mapping);
if (rc != 0)
- CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+ pCifsInode->write_behind_rc = rc;
}
cFYI(1, "invalidating remote inode since open detected it "
"changed");
- invalidate_remote_inode(file->f_path.dentry->d_inode);
+ invalidate_remote_inode(inode);
}
client_can_cache:
if (pTcon->unix_ext)
- rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
- full_path, inode->i_sb, xid);
+ rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+ xid);
else
- rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
- full_path, buf, inode->i_sb, xid, NULL);
+ rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+ xid, NULL);
if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
pCifsInode->clientCanCacheAll = true;
pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p",
- file->f_path.dentry->d_inode);
+ cFYI(1, "Exclusive Oplock granted on inode %p", inode);
} else if ((*oplock & 0xF) == OPLOCK_READ)
pCifsInode->clientCanCacheRead = true;
@@ -256,7 +224,7 @@ int cifs_open(struct inode *inode, struct file *file)
__u32 oplock;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *pCifsFile;
+ struct cifsFileInfo *pCifsFile = NULL;
struct cifsInodeInfo *pCifsInode;
char *full_path = NULL;
int desiredAccess;
@@ -270,12 +238,6 @@ int cifs_open(struct inode *inode, struct file *file)
tcon = cifs_sb->tcon;
pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- pCifsFile = cifs_fill_filedata(file);
- if (pCifsFile) {
- rc = 0;
- FreeXid(xid);
- return rc;
- }
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
@@ -299,8 +261,7 @@ int cifs_open(struct inode *inode, struct file *file)
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
oflags |= SMB_O_CREAT;
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, &inode, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -308,9 +269,23 @@ int cifs_open(struct inode *inode, struct file *file)
/* no need for special case handling of setting mode
on read only files needed here */
- pCifsFile = cifs_fill_filedata(file);
- cifs_posix_open_inode_helper(inode, file, pCifsInode,
- oplock, netfid);
+ rc = cifs_posix_open_inode_helper(inode, file,
+ pCifsInode, oplock, netfid);
+ if (rc != 0) {
+ CIFSSMBClose(xid, tcon, netfid);
+ goto out;
+ }
+
+ pCifsFile = cifs_new_fileinfo(inode, netfid, file,
+ file->f_path.mnt,
+ oflags);
+ if (pCifsFile == NULL) {
+ CIFSSMBClose(xid, tcon, netfid);
+ rc = -ENOMEM;
+ }
+
+ cifs_fscache_set_inode_cookie(inode, file);
+
goto out;
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
@@ -391,16 +366,18 @@ int cifs_open(struct inode *inode, struct file *file)
goto out;
}
+ rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
+ if (rc != 0)
+ goto out;
+
pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
file->f_flags);
- file->private_data = pCifsFile;
- if (file->private_data == NULL) {
+ if (pCifsFile == NULL) {
rc = -ENOMEM;
goto out;
}
- rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
- &oplock, buf, full_path, xid);
+ cifs_fscache_set_inode_cookie(inode, file);
if (oplock & CIFS_CREATE_ACTION) {
/* time to set mode which we can not set earlier due to
@@ -456,7 +433,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
__u16 netfid;
if (file->private_data)
- pCifsFile = (struct cifsFileInfo *)file->private_data;
+ pCifsFile = file->private_data;
else
return -EBADF;
@@ -513,8 +490,7 @@ reopen_error_exit:
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, NULL, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -595,8 +571,7 @@ int cifs_close(struct inode *inode, struct file *file)
int xid, timeout;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
- struct cifsFileInfo *pSMBFile =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *pSMBFile = file->private_data;
xid = GetXid();
@@ -671,8 +646,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
{
int rc = 0;
int xid;
- struct cifsFileInfo *pCFileStruct =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *pCFileStruct = file->private_data;
char *ptmp;
cFYI(1, "Closedir inode = 0x%p", inode);
@@ -893,8 +867,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
length, pfLock,
posix_lock_type, wait_flag);
} else {
- struct cifsFileInfo *fid =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *fid = file->private_data;
if (numLock) {
rc = CIFSSMBLock(xid, tcon, netfid, length,
@@ -995,7 +968,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
if (file->private_data == NULL)
return -EBADF;
- open_file = (struct cifsFileInfo *) file->private_data;
+ open_file = file->private_data;
rc = generic_write_checks(file, poffset, &write_size, 0);
if (rc)
@@ -1097,7 +1070,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
if (file->private_data == NULL)
return -EBADF;
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
xid = GetXid();
@@ -1681,8 +1654,7 @@ int cifs_fsync(struct file *file, int datasync)
int xid;
int rc = 0;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *smbfile =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *smbfile = file->private_data;
struct inode *inode = file->f_path.dentry->d_inode;
xid = GetXid();
@@ -1786,7 +1758,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
if ((file->f_flags & O_ACCMODE) == O_WRONLY)
cFYI(1, "attempting read on write only file instance");
@@ -1867,7 +1839,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
if ((file->f_flags & O_ACCMODE) == O_WRONLY)
cFYI(1, "attempting read on write only file instance");
@@ -1952,6 +1924,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
bytes_read -= PAGE_CACHE_SIZE;
continue;
}
+ page_cache_release(page);
target = kmap_atomic(page, KM_USER0);
@@ -1971,6 +1944,9 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
SetPageUptodate(page);
unlock_page(page);
data += PAGE_CACHE_SIZE;
+
+ /* add page to FS-Cache */
+ cifs_readpage_to_fscache(mapping->host, page);
}
return;
}
@@ -1997,10 +1973,19 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
pTcon = cifs_sb->tcon;
+ /*
+ * Reads as many pages as possible from fscache. Returns -ENOBUFS
+ * immediately if the cookie is negative
+ */
+ rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
+ &num_pages);
+ if (rc == 0)
+ goto read_complete;
+
cFYI(DBG2, "rpages: num pages %d", num_pages);
for (i = 0; i < num_pages; ) {
unsigned contig_pages;
@@ -2111,6 +2096,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
smb_read_data = NULL;
}
+read_complete:
FreeXid(xid);
return rc;
}
@@ -2121,6 +2107,11 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
char *read_data;
int rc;
+ /* Is the page cached? */
+ rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
+ if (rc == 0)
+ goto read_complete;
+
page_cache_get(page);
read_data = kmap(page);
/* for reads over a certain size could initiate async read ahead */
@@ -2140,11 +2131,17 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
flush_dcache_page(page);
SetPageUptodate(page);
+
+ /* send this page to the cache */
+ cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
+
rc = 0;
io_error:
kunmap(page);
page_cache_release(page);
+
+read_complete:
return rc;
}
@@ -2294,8 +2291,23 @@ out:
return rc;
}
-static void
-cifs_oplock_break(struct slow_work *work)
+static int cifs_release_page(struct page *page, gfp_t gfp)
+{
+ if (PagePrivate(page))
+ return 0;
+
+ return cifs_fscache_release_page(page, gfp);
+}
+
+static void cifs_invalidate_page(struct page *page, unsigned long offset)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
+
+ if (offset == 0)
+ cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
+}
+
+void cifs_oplock_break(struct work_struct *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
@@ -2332,33 +2344,30 @@ cifs_oplock_break(struct slow_work *work)
LOCKING_ANDX_OPLOCK_RELEASE, false);
cFYI(1, "Oplock release rc = %d", rc);
}
+
+ /*
+ * We might have kicked in before is_valid_oplock_break()
+ * finished grabbing reference for us. Make sure it's done by
+ * waiting for GlobalSMSSeslock.
+ */
+ write_lock(&GlobalSMBSeslock);
+ write_unlock(&GlobalSMBSeslock);
+
+ cifs_oplock_break_put(cfile);
}
-static int
-cifs_oplock_break_get(struct slow_work *work)
+void cifs_oplock_break_get(struct cifsFileInfo *cfile)
{
- struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
- oplock_break);
mntget(cfile->mnt);
cifsFileInfo_get(cfile);
- return 0;
}
-static void
-cifs_oplock_break_put(struct slow_work *work)
+void cifs_oplock_break_put(struct cifsFileInfo *cfile)
{
- struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
- oplock_break);
mntput(cfile->mnt);
cifsFileInfo_put(cfile);
}
-const struct slow_work_ops cifs_oplock_break_ops = {
- .get_ref = cifs_oplock_break_get,
- .put_ref = cifs_oplock_break_put,
- .execute = cifs_oplock_break,
-};
-
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
.readpages = cifs_readpages,
@@ -2367,6 +2376,8 @@ const struct address_space_operations cifs_addr_ops = {
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
+ .releasepage = cifs_release_page,
+ .invalidatepage = cifs_invalidate_page,
/* .sync_page = cifs_sync_page, */
/* .direct_IO = */
};
@@ -2383,6 +2394,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
+ .releasepage = cifs_release_page,
+ .invalidatepage = cifs_invalidate_page,
/* .sync_page = cifs_sync_page, */
/* .direct_IO = */
};
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
new file mode 100644
index 00000000000..9f3f5c4be16
--- /dev/null
+++ b/fs/cifs/fscache.c
@@ -0,0 +1,236 @@
+/*
+ * fs/cifs/fscache.c - CIFS filesystem cache interface
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Author(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "fscache.h"
+#include "cifsglob.h"
+#include "cifs_debug.h"
+#include "cifs_fs_sb.h"
+
+void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
+{
+ server->fscache =
+ fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
+ &cifs_fscache_server_index_def, server);
+ cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server,
+ server->fscache);
+}
+
+void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
+{
+ cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server,
+ server->fscache);
+ fscache_relinquish_cookie(server->fscache, 0);
+ server->fscache = NULL;
+}
+
+void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon)
+{
+ struct TCP_Server_Info *server = tcon->ses->server;
+
+ tcon->fscache =
+ fscache_acquire_cookie(server->fscache,
+ &cifs_fscache_super_index_def, tcon);
+ cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)",
+ server->fscache, tcon->fscache);
+}
+
+void cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon)
+{
+ cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache);
+ fscache_relinquish_cookie(tcon->fscache, 0);
+ tcon->fscache = NULL;
+}
+
+static void cifs_fscache_enable_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ if (cifsi->fscache)
+ return;
+
+ cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ cifsi);
+ cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)",
+ cifs_sb->tcon->fscache, cifsi->fscache);
+}
+
+void cifs_fscache_release_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ if (cifsi->fscache) {
+ cFYI(1, "CIFS releasing inode cookie (0x%p)",
+ cifsi->fscache);
+ fscache_relinquish_cookie(cifsi->fscache, 0);
+ cifsi->fscache = NULL;
+ }
+}
+
+static void cifs_fscache_disable_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ if (cifsi->fscache) {
+ cFYI(1, "CIFS disabling inode cookie (0x%p)",
+ cifsi->fscache);
+ fscache_relinquish_cookie(cifsi->fscache, 1);
+ cifsi->fscache = NULL;
+ }
+}
+
+void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
+{
+ if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+ cifs_fscache_disable_inode_cookie(inode);
+ else {
+ cifs_fscache_enable_inode_cookie(inode);
+ cFYI(1, "CIFS: fscache inode cookie set");
+ }
+}
+
+void cifs_fscache_reset_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct fscache_cookie *old = cifsi->fscache;
+
+ if (cifsi->fscache) {
+ /* retire the current fscache cache and get a new one */
+ fscache_relinquish_cookie(cifsi->fscache, 1);
+
+ cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ cifsi);
+ cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p",
+ cifsi->fscache, old);
+ }
+}
+
+int cifs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ if (PageFsCache(page)) {
+ struct inode *inode = page->mapping->host;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ cFYI(1, "CIFS: fscache release page (0x%p/0x%p)",
+ page, cifsi->fscache);
+ if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
+ return 0;
+ }
+
+ return 1;
+}
+
+static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
+ int error)
+{
+ cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)",
+ page, error);
+ if (!error)
+ SetPageUptodate(page);
+ unlock_page(page);
+}
+
+/*
+ * Retrieve a page from FS-Cache
+ */
+int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+
+ cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p",
+ CIFS_I(inode)->fscache, page, inode);
+ ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
+ cifs_readpage_from_fscache_complete,
+ NULL,
+ GFP_KERNEL);
+ switch (ret) {
+
+ case 0: /* page found in fscache, read submitted */
+ cFYI(1, "CIFS: readpage_from_fscache: submitted");
+ return ret;
+ case -ENOBUFS: /* page won't be cached */
+ case -ENODATA: /* page not in cache */
+ cFYI(1, "CIFS: readpage_from_fscache %d", ret);
+ return 1;
+
+ default:
+ cERROR(1, "unknown error ret = %d", ret);
+ }
+ return ret;
+}
+
+/*
+ * Retrieve a set of pages from FS-Cache
+ */
+int __cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ int ret;
+
+ cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)",
+ CIFS_I(inode)->fscache, *nr_pages, inode);
+ ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
+ pages, nr_pages,
+ cifs_readpage_from_fscache_complete,
+ NULL,
+ mapping_gfp_mask(mapping));
+ switch (ret) {
+ case 0: /* read submitted to the cache for all pages */
+ cFYI(1, "CIFS: readpages_from_fscache: submitted");
+ return ret;
+
+ case -ENOBUFS: /* some pages are not cached and can't be */
+ case -ENODATA: /* some pages are not cached */
+ cFYI(1, "CIFS: readpages_from_fscache: no page");
+ return 1;
+
+ default:
+ cFYI(1, "unknown error ret = %d", ret);
+ }
+
+ return ret;
+}
+
+void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+
+ cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p",
+ CIFS_I(inode)->fscache, page, inode);
+ ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
+ if (ret != 0)
+ fscache_uncache_page(CIFS_I(inode)->fscache, page);
+}
+
+void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct fscache_cookie *cookie = cifsi->fscache;
+
+ cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie);
+ fscache_wait_on_page_write(cookie, page);
+ fscache_uncache_page(cookie, page);
+}
+
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
new file mode 100644
index 00000000000..31b88ec2341
--- /dev/null
+++ b/fs/cifs/fscache.h
@@ -0,0 +1,136 @@
+/*
+ * fs/cifs/fscache.h - CIFS filesystem cache interface definitions
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _CIFS_FSCACHE_H
+#define _CIFS_FSCACHE_H
+
+#include <linux/fscache.h>
+
+#include "cifsglob.h"
+
+#ifdef CONFIG_CIFS_FSCACHE
+
+extern struct fscache_netfs cifs_fscache_netfs;
+extern const struct fscache_cookie_def cifs_fscache_server_index_def;
+extern const struct fscache_cookie_def cifs_fscache_super_index_def;
+extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
+
+extern int cifs_fscache_register(void);
+extern void cifs_fscache_unregister(void);
+
+/*
+ * fscache.c
+ */
+extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
+extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
+extern void cifs_fscache_get_super_cookie(struct cifsTconInfo *);
+extern void cifs_fscache_release_super_cookie(struct cifsTconInfo *);
+
+extern void cifs_fscache_release_inode_cookie(struct inode *);
+extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
+extern void cifs_fscache_reset_inode_cookie(struct inode *);
+
+extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
+extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
+extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
+extern int __cifs_readpages_from_fscache(struct inode *,
+ struct address_space *,
+ struct list_head *,
+ unsigned *);
+
+extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
+
+static inline void cifs_fscache_invalidate_page(struct page *page,
+ struct inode *inode)
+{
+ if (PageFsCache(page))
+ __cifs_fscache_invalidate_page(page, inode);
+}
+
+static inline int cifs_readpage_from_fscache(struct inode *inode,
+ struct page *page)
+{
+ if (CIFS_I(inode)->fscache)
+ return __cifs_readpage_from_fscache(inode, page);
+
+ return -ENOBUFS;
+}
+
+static inline int cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ if (CIFS_I(inode)->fscache)
+ return __cifs_readpages_from_fscache(inode, mapping, pages,
+ nr_pages);
+ return -ENOBUFS;
+}
+
+static inline void cifs_readpage_to_fscache(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __cifs_readpage_to_fscache(inode, page);
+}
+
+#else /* CONFIG_CIFS_FSCACHE */
+static inline int cifs_fscache_register(void) { return 0; }
+static inline void cifs_fscache_unregister(void) {}
+
+static inline void
+cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
+static inline void
+cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
+static inline void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) {}
+static inline void
+cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) {}
+
+static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
+static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
+ struct file *filp) {}
+static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
+static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ return 1; /* May release page */
+}
+
+static inline void cifs_fscache_invalidate_page(struct page *page,
+ struct inode *inode) {}
+static inline int
+cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ return -ENOBUFS;
+}
+
+static inline int cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return -ENOBUFS;
+}
+
+static inline void cifs_readpage_to_fscache(struct inode *inode,
+ struct page *page) {}
+
+#endif /* CONFIG_CIFS_FSCACHE */
+
+#endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a5..dc4c47ab958 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -29,6 +29,7 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "fscache.h"
static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
@@ -288,7 +289,7 @@ int cifs_get_file_info_unix(struct file *filp)
struct inode *inode = filp->f_path.dentry->d_inode;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsTconInfo *tcon = cifs_sb->tcon;
- struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
+ struct cifsFileInfo *cfile = filp->private_data;
xid = GetXid();
rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -515,7 +516,7 @@ int cifs_get_file_info(struct file *filp)
struct inode *inode = filp->f_path.dentry->d_inode;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsTconInfo *tcon = cifs_sb->tcon;
- struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
+ struct cifsFileInfo *cfile = filp->private_data;
xid = GetXid();
rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -723,18 +724,17 @@ cifs_find_inode(struct inode *inode, void *opaque)
{
struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
+ /* don't match inode with different uniqueid */
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
- /*
- * uh oh -- it's a directory. We can't use it since hardlinked dirs are
- * verboten. Disable serverino and return it as if it were found, the
- * caller can discard it, generate a uniqueid and retry the find
- */
- if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
+ /* don't match inode of different type */
+ if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
+ return 0;
+
+ /* if it's not a directory or has no dentries, then flag it */
+ if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry))
fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
- cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
- }
return 1;
}
@@ -748,6 +748,27 @@ cifs_init_inode(struct inode *inode, void *opaque)
return 0;
}
+/*
+ * walk dentry list for an inode and report whether it has aliases that
+ * are hashed. We use this to determine if a directory inode can actually
+ * be used.
+ */
+static bool
+inode_has_hashed_dentries(struct inode *inode)
+{
+ struct dentry *dentry;
+
+ spin_lock(&dcache_lock);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
+ spin_unlock(&dcache_lock);
+ return true;
+ }
+ }
+ spin_unlock(&dcache_lock);
+ return false;
+}
+
/* Given fattrs, get a corresponding inode */
struct inode *
cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
@@ -763,12 +784,16 @@ retry_iget5_locked:
inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
if (inode) {
- /* was there a problematic inode number collision? */
+ /* was there a potentially problematic inode collision? */
if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
- iput(inode);
- fattr->cf_uniqueid = iunique(sb, ROOT_I);
fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
- goto retry_iget5_locked;
+
+ if (inode_has_hashed_dentries(inode)) {
+ cifs_autodisable_serverino(CIFS_SB(sb));
+ iput(inode);
+ fattr->cf_uniqueid = iunique(sb, ROOT_I);
+ goto retry_iget5_locked;
+ }
}
cifs_fattr_to_inode(inode, fattr);
@@ -776,6 +801,10 @@ retry_iget5_locked:
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
+#ifdef CONFIG_CIFS_FSCACHE
+ /* initialize per-inode cache cookie pointer */
+ CIFS_I(inode)->fscache = NULL;
+#endif
unlock_new_inode(inode);
}
}
@@ -807,6 +836,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
if (!inode)
return ERR_PTR(-ENOMEM);
+#ifdef CONFIG_CIFS_FSCACHE
+ /* populate tcon->resource_id */
+ cifs_sb->tcon->resource_id = CIFS_I(inode)->uniqueid;
+#endif
+
if (rc && cifs_sb->tcon->ipc) {
cFYI(1, "ipc connection - fake read inode");
inode->i_mode |= S_IFDIR;
@@ -1401,6 +1435,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
if (rc == 0 || rc != -ETXTBSY)
return rc;
+ /* open-file renames don't work across directories */
+ if (to_dentry->d_parent != from_dentry->d_parent)
+ return rc;
+
/* open the file to be renamed -- we need DELETE perms */
rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
CREATE_NOT_DIR, &srcfid, &oplock, NULL,
@@ -1564,6 +1602,7 @@ cifs_invalidate_mapping(struct inode *inode)
cifs_i->write_behind_rc = rc;
}
invalidate_remote_inode(inode);
+ cifs_fscache_reset_inode_cookie(inode);
}
int cifs_revalidate_file(struct file *filp)
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 505926f1ee6..9d38a71c8e1 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -41,8 +41,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
__u64 ExtAttrMask = 0;
__u64 caps;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *pSMBFile =
- (struct cifsFileInfo *)filep->private_data;
+ struct cifsFileInfo *pSMBFile = filep->private_data;
#endif /* CONFIG_CIFS_POSIX */
xid = GetXid();
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1394aa37f26..3ccadc1326d 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -498,7 +498,6 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
struct cifsTconInfo *tcon;
struct cifsInodeInfo *pCifsInode;
struct cifsFileInfo *netfile;
- int rc;
cFYI(1, "Checking for oplock break or dnotify response");
if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) &&
@@ -583,13 +582,18 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
pCifsInode->clientCanCacheAll = false;
if (pSMB->OplockLevel == 0)
pCifsInode->clientCanCacheRead = false;
- rc = slow_work_enqueue(&netfile->oplock_break);
- if (rc) {
- cERROR(1, "failed to enqueue oplock "
- "break: %d\n", rc);
- } else {
- netfile->oplock_break_cancelled = false;
- }
+
+ /*
+ * cifs_oplock_break_put() can't be called
+ * from here. Get reference after queueing
+ * succeeded. cifs_oplock_break() will
+ * synchronize using GlobalSMSSeslock.
+ */
+ if (queue_work(system_nrt_wq,
+ &netfile->oplock_break))
+ cifs_oplock_break_get(netfile);
+ netfile->oplock_break_cancelled = false;
+
read_unlock(&GlobalSMBSeslock);
read_unlock(&cifs_tcp_ses_lock);
return true;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index d35d52889cb..f97851119e6 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -61,6 +61,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
{ERRremcd, -EACCES},
{ERRdiffdevice, -EXDEV},
{ERRnofiles, -ENOENT},
+ {ERRwriteprot, -EROFS},
{ERRbadshare, -ETXTBSY},
{ERRlock, -EACCES},
{ERRunsup, -EINVAL},
@@ -139,17 +140,18 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
* Returns 0 on failure.
*/
static int
-cifs_inet_pton(const int address_family, const char *cp, void *dst)
+cifs_inet_pton(const int address_family, const char *cp, int len, void *dst)
{
int ret = 0;
/* calculate length by finding first slash or NULL */
if (address_family == AF_INET)
- ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL);
+ ret = in4_pton(cp, len, dst, '\\', NULL);
else if (address_family == AF_INET6)
- ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
+ ret = in6_pton(cp, len, dst , '\\', NULL);
- cFYI(DBG2, "address conversion returned %d for %s", ret, cp);
+ cFYI(DBG2, "address conversion returned %d for %*.*s",
+ ret, len, len, cp);
if (ret > 0)
ret = 1;
return ret;
@@ -164,43 +166,66 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
* Returns 0 on failure.
*/
int
-cifs_convert_address(char *src, void *dst)
+cifs_convert_address(struct sockaddr *dst, const char *src, int len)
{
- int rc;
- char *pct, *endp;
+ int rc, alen, slen;
+ const char *pct;
+ char *endp, scope_id[13];
struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
/* IPv4 address */
- if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) {
+ if (cifs_inet_pton(AF_INET, src, len, &s4->sin_addr.s_addr)) {
s4->sin_family = AF_INET;
return 1;
}
- /* temporarily terminate string */
- pct = strchr(src, '%');
- if (pct)
- *pct = '\0';
-
- rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr);
-
- /* repair temp termination (if any) and make pct point to scopeid */
- if (pct)
- *pct++ = '%';
+ /* attempt to exclude the scope ID from the address part */
+ pct = memchr(src, '%', len);
+ alen = pct ? pct - src : len;
+ rc = cifs_inet_pton(AF_INET6, src, alen, &s6->sin6_addr.s6_addr);
if (!rc)
return rc;
s6->sin6_family = AF_INET6;
if (pct) {
+ /* grab the scope ID */
+ slen = len - (alen + 1);
+ if (slen <= 0 || slen > 12)
+ return 0;
+ memcpy(scope_id, pct + 1, slen);
+ scope_id[slen] = '\0';
+
s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
- if (!*pct || *endp)
+ if (endp != scope_id + slen)
return 0;
}
return rc;
}
+int
+cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
+ const unsigned short int port)
+{
+ if (!cifs_convert_address(dst, src, len))
+ return 0;
+
+ switch (dst->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)dst)->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)dst)->sin6_port = htons(port);
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
/*****************************************************************************
convert a NT status code to a dos class/code
*****************************************************************************/
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index daf1753af67..d5e591fab47 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -847,6 +847,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
+ if (tmp_buf == NULL) {
+ rc = -ENOMEM;
+ break;
+ }
+
for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
if (current_entry == NULL) {
/* evaluate whether this case is an error */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2..0a57cb7db5d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
/* calculate session key */
setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
- if (first_time) /* should this be moved into common code
- with similar ntlmv2 path? */
- /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
- response BB FIXME, v2_sess_key); */
-
- /* copy session key */
-
- /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
- bcc_ptr += LM2_SESS_KEY_SIZE; */
+ /* FIXME: calculate MAC key */
memcpy(bcc_ptr, (char *)v2_sess_key,
sizeof(struct ntlmv2_resp));
bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index c5084d27db7..7f16cb825fe 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -76,6 +76,7 @@
#define ERRnofiles 18 /* A File Search command can find no
more files matching the specified
criteria. */
+#define ERRwriteprot 19 /* media is write protected */
#define ERRgeneral 31
#define ERRbadshare 32 /* The sharing mode specified for an
Open conflicts with existing FIDs on
diff --git a/fs/compat.c b/fs/compat.c
index f0b391c5055..c6fda9aeb86 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -8,7 +8,7 @@
* Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
* Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
* Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
- * Copyright (C) 2003 Pavel Machek (pavel@suse.cz)
+ * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -626,7 +626,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
tot_len += len;
if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
goto out;
- if (!access_ok(vrfy_dir(type), buf, len)) {
+ if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 641640dc7ae..63ae8583146 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -4,7 +4,7 @@
* Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
* Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
* Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
- * Copyright (C) 2003 Pavel Machek (pavel@suse.cz)
+ * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz)
*
* These routines maintain argument size conversion between 32bit and 64bit
* ioctls.
@@ -601,8 +601,11 @@ static int ioc_settimeout(unsigned int fd, unsigned int cmd,
}
/* Bluetooth ioctls */
-#define HCIUARTSETPROTO _IOW('U', 200, int)
-#define HCIUARTGETPROTO _IOR('U', 201, int)
+#define HCIUARTSETPROTO _IOW('U', 200, int)
+#define HCIUARTGETPROTO _IOR('U', 201, int)
+#define HCIUARTGETDEVICE _IOR('U', 202, int)
+#define HCIUARTSETFLAGS _IOW('U', 203, int)
+#define HCIUARTGETFLAGS _IOR('U', 204, int)
#define BNEPCONNADD _IOW('B', 200, int)
#define BNEPCONNDEL _IOW('B', 201, int)
@@ -1328,6 +1331,8 @@ COMPATIBLE_IOCTL(HCISETLINKPOL)
COMPATIBLE_IOCTL(HCISETLINKMODE)
COMPATIBLE_IOCTL(HCISETACLMTU)
COMPATIBLE_IOCTL(HCISETSCOMTU)
+COMPATIBLE_IOCTL(HCIBLOCKADDR)
+COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
COMPATIBLE_IOCTL(HCIINQUIRY)
COMPATIBLE_IOCTL(HCIUARTSETPROTO)
COMPATIBLE_IOCTL(HCIUARTGETPROTO)
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 41645142b88..cf78d44a8d6 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -72,10 +72,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
if (!sd)
return -EINVAL;
- error = simple_setattr(dentry, iattr);
- if (error)
- return error;
-
sd_iattr = sd->s_iattr;
if (!sd_iattr) {
/* setting attributes for the first time, allocate now */
@@ -89,9 +85,12 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
sd->s_iattr = sd_iattr;
}
-
/* attributes were changed atleast once in past */
+ error = simple_setattr(dentry, iattr);
+ if (error)
+ return error;
+
if (ia_valid & ATTR_UID)
sd_iattr->ia_uid = iattr->ia_uid;
if (ia_valid & ATTR_GID)
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a63..86d4db15473 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
up_read(&sb->s_umount);
}
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
count -= pruned;
__put_super(sb);
/* more work left to do? */
@@ -894,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aacf531..a10cb91cade 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
{
ssize_t transferred = 0;
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
transferred = dio->i_size - offset;
}
- if (dio->end_io && dio->result)
- dio->end_io(dio->iocb, offset, transferred,
- dio->map_bh.b_private);
-
- if (dio->flags & DIO_LOCKING)
- /* lockdep: non-owner release */
- up_read_non_owner(&dio->inode->i_alloc_sem);
-
if (ret == 0)
ret = dio->page_errors;
if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
if (ret == 0)
ret = transferred;
+ if (dio->end_io && dio->result) {
+ dio->end_io(dio->iocb, offset, transferred,
+ dio->map_bh.b_private, ret, is_async);
+ } else if (is_async) {
+ aio_complete(dio->iocb, ret, 0);
+ }
+
+ if (dio->flags & DIO_LOCKING)
+ /* lockdep: non-owner release */
+ up_read_non_owner(&dio->inode->i_alloc_sem);
+
return ret;
}
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
- aio_complete(dio->iocb, ret, 0);
+ dio_complete(dio, dio->iocb->ki_pos, 0, true);
kfree(dio);
}
}
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (ret2 == 0) {
- ret = dio_complete(dio, offset, ret);
+ ret = dio_complete(dio, offset, ret, false);
kfree(dio);
} else
BUG_ON(ret != -EIOCBQUEUED);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index c0d35c62052..37a34c2c622 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -248,7 +248,7 @@ static struct connection *assoc2con(int assoc_id)
for (i = 0 ; i < CONN_HASH_SIZE; i++) {
hlist_for_each_entry(con, h, &connection_hash[i], list) {
- if (con && con->sctp_assoc == assoc_id) {
+ if (con->sctp_assoc == assoc_id) {
mutex_unlock(&connections_lock);
return con;
}
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 2c6ad518100..ef17e0169da 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -81,24 +81,11 @@ static struct genl_ops dlm_nl_ops = {
int __init dlm_netlink_init(void)
{
- int rv;
-
- rv = genl_register_family(&family);
- if (rv)
- return rv;
-
- rv = genl_register_ops(&family, &dlm_nl_ops);
- if (rv < 0)
- goto err;
- return 0;
- err:
- genl_unregister_family(&family);
- return rv;
+ return genl_register_family_with_ops(&family, &dlm_nl_ops, 1);
}
void dlm_netlink_exit(void)
{
- genl_unregister_ops(&family, &dlm_nl_ops);
genl_unregister_family(&family);
}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 1cc087635a5..a2e3b562e65 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -762,7 +762,7 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
/**
* ecryptfs_init_crypt_ctx
- * @crypt_stat: Uninitilized crypt stats structure
+ * @crypt_stat: Uninitialized crypt stats structure
*
* Initialize the crypto context.
*
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 2d8dbce9d48..46c4dd8dfcc 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -31,9 +31,9 @@ static struct mutex ecryptfs_msg_ctx_lists_mux;
static struct hlist_head *ecryptfs_daemon_hash;
struct mutex ecryptfs_daemon_hash_mux;
-static int ecryptfs_hash_buckets;
+static int ecryptfs_hash_bits;
#define ecryptfs_uid_hash(uid) \
- hash_long((unsigned long)uid, ecryptfs_hash_buckets)
+ hash_long((unsigned long)uid, ecryptfs_hash_bits)
static u32 ecryptfs_msg_counter;
static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
@@ -486,18 +486,19 @@ int ecryptfs_init_messaging(void)
}
mutex_init(&ecryptfs_daemon_hash_mux);
mutex_lock(&ecryptfs_daemon_hash_mux);
- ecryptfs_hash_buckets = 1;
- while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
- ecryptfs_hash_buckets++;
+ ecryptfs_hash_bits = 1;
+ while (ecryptfs_number_of_users >> ecryptfs_hash_bits)
+ ecryptfs_hash_bits++;
ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
- * ecryptfs_hash_buckets), GFP_KERNEL);
+ * (1 << ecryptfs_hash_bits)),
+ GFP_KERNEL);
if (!ecryptfs_daemon_hash) {
rc = -ENOMEM;
printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
mutex_unlock(&ecryptfs_daemon_hash_mux);
goto out;
}
- for (i = 0; i < ecryptfs_hash_buckets; i++)
+ for (i = 0; i < (1 << ecryptfs_hash_bits); i++)
INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
mutex_unlock(&ecryptfs_daemon_hash_mux);
ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
@@ -554,7 +555,7 @@ void ecryptfs_release_messaging(void)
int i;
mutex_lock(&ecryptfs_daemon_hash_mux);
- for (i = 0; i < ecryptfs_hash_buckets; i++) {
+ for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
int rc;
hlist_for_each_entry(daemon, elem,
diff --git a/fs/exec.c b/fs/exec.c
index e19de6a8033..97d91a03fb1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
else
stack_base = vma->vm_start - stack_expand;
#endif
+ current->mm->start_stack = bprm->p;
ret = expand_stack(vma, stack_base);
if (ret)
ret = -EFAULT;
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98..2bcc0431bad 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 19214435b75..3675088cb88 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1552,7 +1552,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
if (error)
return error;
}
- if (iattr->ia_valid & ATTR_SIZE) {
+ if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
error = ext2_setsize(inode, iattr->ia_size);
if (error)
return error;
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
index 522b15498f4..e8c6ba0e4a3 100644
--- a/fs/ext3/Kconfig
+++ b/fs/ext3/Kconfig
@@ -31,6 +31,7 @@ config EXT3_FS
config EXT3_DEFAULTS_TO_ORDERED
bool "Default to 'data=ordered' in ext3"
depends on EXT3_FS
+ default y
help
The journal mode options for ext3 have different tradeoffs
between when data is guaranteed to be on disk and
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3..8a11fe21218 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 735f0190ec2..001eb0e2d48 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1149,9 +1149,25 @@ static int walk_page_buffers( handle_t *handle,
static int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh)
{
+ int dirty = buffer_dirty(bh);
+ int ret;
+
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
- return ext3_journal_get_write_access(handle, bh);
+ /*
+ * __block_prepare_write() could have dirtied some buffers. Clean
+ * the dirty bit as jbd2_journal_get_write_access() could complain
+ * otherwise about fs integrity issues. Setting of the dirty bit
+ * by __block_prepare_write() isn't a real problem here as we clear
+ * the bit before releasing a page lock and thus writeback cannot
+ * ever write the buffer.
+ */
+ if (dirty)
+ clear_buffer_dirty(bh);
+ ret = ext3_journal_get_write_access(handle, bh);
+ if (!ret && dirty)
+ ret = ext3_journal_dirty_metadata(handle, bh);
+ return ret;
}
/*
@@ -1625,10 +1641,7 @@ static int ext3_writeback_writepage(struct page *page,
goto out_fail;
}
- if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
- ret = nobh_writepage(page, ext3_get_block, wbc);
- else
- ret = block_write_full_page(page, ext3_get_block, wbc);
+ ret = block_write_full_page(page, ext3_get_block, wbc);
err = ext3_journal_stop(handle);
if (!ret)
@@ -1922,17 +1935,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
- /*
- * For "nobh" option, we can only work if we don't need to
- * read-in the page - otherwise we create buffers to do the IO.
- */
- if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
- ext3_should_writeback_data(inode) && PageUptodate(page)) {
- zero_user(page, offset, length);
- set_page_dirty(page);
- goto unlock;
- }
-
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
@@ -2284,27 +2286,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
depth);
/*
- * We've probably journalled the indirect block several
- * times during the truncate. But it's no longer
- * needed and we now drop it from the transaction via
- * journal_revoke().
- *
- * That's easy if it's exclusively part of this
- * transaction. But if it's part of the committing
- * transaction then journal_forget() will simply
- * brelse() it. That means that if the underlying
- * block is reallocated in ext3_get_block(),
- * unmap_underlying_metadata() will find this block
- * and will try to get rid of it. damn, damn.
- *
- * If this block has already been committed to the
- * journal, a revoke record will be written. And
- * revoke records must be emitted *before* clearing
- * this block's bit in the bitmaps.
- */
- ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
-
- /*
* Everything below this this pointer has been
* released. Now let this top-of-subtree go.
*
@@ -2327,6 +2308,31 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
truncate_restart_transaction(handle, inode);
}
+ /*
+ * We've probably journalled the indirect block several
+ * times during the truncate. But it's no longer
+ * needed and we now drop it from the transaction via
+ * journal_revoke().
+ *
+ * That's easy if it's exclusively part of this
+ * transaction. But if it's part of the committing
+ * transaction then journal_forget() will simply
+ * brelse() it. That means that if the underlying
+ * block is reallocated in ext3_get_block(),
+ * unmap_underlying_metadata() will find this block
+ * and will try to get rid of it. damn, damn. Thus
+ * we don't allow a block to be reallocated until
+ * a transaction freeing it has fully committed.
+ *
+ * We also have to make sure journal replay after a
+ * crash does not overwrite non-journaled data blocks
+ * with old metadata when the block got reallocated for
+ * data. Thus we have to store a revoke record for a
+ * block in the same transaction in which we free the
+ * block.
+ */
+ ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+
ext3_free_blocks(handle, inode, nr, 1);
if (parent_bh) {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index ee184084ca4..2b35ddb70d6 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1447,7 +1447,6 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
struct inode *dir = dentry->d_parent->d_inode;
- unsigned long offset;
struct buffer_head * bh;
struct ext3_dir_entry_2 *de;
struct super_block * sb;
@@ -1469,7 +1468,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
ext3_mark_inode_dirty(handle, dir);
}
blocks = dir->i_size >> sb->s_blocksize_bits;
- for (block = 0, offset = 0; block < blocks; block++) {
+ for (block = 0; block < blocks; block++) {
bh = ext3_bread(handle, dir, block, 0, &retval);
if(!bh)
return retval;
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 54351ac7cef..0ccd7b12b73 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -964,7 +964,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
ext3_fsblk_t n_blocks_count)
{
ext3_fsblk_t o_blocks_count;
- unsigned long o_groups_count;
ext3_grpblk_t last;
ext3_grpblk_t add;
struct buffer_head * bh;
@@ -976,7 +975,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
* yet: we're going to revalidate es->s_blocks_count after
* taking the s_resize_lock below. */
o_blocks_count = le32_to_cpu(es->s_blocks_count);
- o_groups_count = EXT3_SB(sb)->s_groups_count;
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6c953bb255e..9650a956fd0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -661,9 +661,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
*/
seq_puts(seq, ",barrier=");
seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
- if (test_opt(sb, NOBH))
- seq_puts(seq, ",nobh");
-
seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
if (test_opt(sb, DATA_ERR_ABORT))
seq_puts(seq, ",data_err=abort");
@@ -1255,10 +1252,12 @@ set_qf_format:
*n_blocks_count = option;
break;
case Opt_nobh:
- set_opt(sbi->s_mount_opt, NOBH);
+ ext3_msg(sb, KERN_WARNING,
+ "warning: ignoring deprecated nobh option");
break;
case Opt_bh:
- clear_opt(sbi->s_mount_opt, NOBH);
+ ext3_msg(sb, KERN_WARNING,
+ "warning: ignoring deprecated bh option");
break;
default:
ext3_msg(sb, KERN_ERR,
@@ -2001,14 +2000,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
break;
}
- if (test_opt(sb, NOBH)) {
- if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
- ext3_msg(sb, KERN_WARNING,
- "warning: ignoring nobh option - "
- "it is supported only with writeback mode");
- clear_opt(sbi->s_mount_opt, NOBH);
- }
- }
/*
* The journal_load will have done any necessary log recovery,
* so we can safely mount the rest of the filesystem now.
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index feaf498feaa..5e2ed4504ea 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -204,6 +204,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 95b7594c76f..bd30799a43e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -377,14 +377,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_grpblk_t bit;
unsigned int i;
struct ext4_group_desc *desc;
- struct ext4_super_block *es;
- struct ext4_sb_info *sbi;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
int err = 0, ret, blk_free_count;
ext4_grpblk_t blocks_freed;
struct ext4_group_info *grp;
- sbi = EXT4_SB(sb);
- es = sbi->s_es;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -477,7 +474,6 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err)
err = ret;
- sb->s_dirt = 1;
error_return:
brelse(bitmap_bh);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 5b6973fbf1b..3db5084db9b 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -229,16 +229,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count < start_blk) ||
- (start_blk + count > ext4_blocks_count(sbi->s_es)))
+ (start_blk + count > ext4_blocks_count(sbi->s_es))) {
+ sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0;
+ }
while (n) {
entry = rb_entry(n, struct ext4_system_zone, node);
if (start_blk + count - 1 < entry->start_blk)
n = n->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
- else
+ else {
+ sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0;
+ }
}
return 1;
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ea5e6cb7e2a..374510f72ba 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -61,10 +61,11 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
}
-int ext4_check_dir_entry(const char *function, struct inode *dir,
- struct ext4_dir_entry_2 *de,
- struct buffer_head *bh,
- unsigned int offset)
+int __ext4_check_dir_entry(const char *function, unsigned int line,
+ struct inode *dir,
+ struct ext4_dir_entry_2 *de,
+ struct buffer_head *bh,
+ unsigned int offset)
{
const char *error_msg = NULL;
const int rlen = ext4_rec_len_from_disk(de->rec_len,
@@ -83,11 +84,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
error_msg = "inode out of bounds";
if (error_msg != NULL)
- ext4_error_inode(function, dir,
- "bad entry in directory: %s - block=%llu"
+ ext4_error_inode(dir, function, line, bh->b_blocknr,
+ "bad entry in directory: %s - "
"offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned long long) bh->b_blocknr,
- (unsigned) (offset%bh->b_size), offset,
+ error_msg, (unsigned) (offset%bh->b_size), offset,
le32_to_cpu(de->inode),
rlen, de->name_len);
return error_msg == NULL ? 1 : 0;
@@ -121,7 +121,8 @@ static int ext4_readdir(struct file *filp,
* We don't set the inode dirty flag since it's not
* critical that it get flushed back to the disk.
*/
- ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX);
+ ext4_clear_inode_flag(filp->f_path.dentry->d_inode,
+ EXT4_INODE_INDEX);
}
stored = 0;
offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -193,7 +194,7 @@ revalidate:
while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
- if (!ext4_check_dir_entry("ext4_readdir", inode, de,
+ if (!ext4_check_dir_entry(inode, de,
bh, offset)) {
/*
* On error, skip the f_pos to the next block
@@ -343,7 +344,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
struct dir_private_info *info;
int len;
- info = (struct dir_private_info *) dir_file->private_data;
+ info = dir_file->private_data;
p = &info->root.rb_node;
/* Create and allocate the fname structure */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 19a4de57128..e03841d9f30 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -57,10 +57,13 @@
#endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \
- ext4_error_inode(__func__, (inode), (fmt), ## a)
+ ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
+
+#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
+ ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
#define EXT4_ERROR_FILE(file, fmt, a...) \
- ext4_error_file(__func__, (file), (fmt), ## a)
+ ext4_error_file(__func__, __LINE__, (file), (fmt), ## a)
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@@ -167,13 +170,15 @@ struct mpage_da_data {
};
#define EXT4_IO_UNWRITTEN 0x1
typedef struct ext4_io_end {
- struct list_head list; /* per-file finished AIO list */
+ struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
struct page *page; /* page struct for buffer write */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */
+ struct kiocb *iocb; /* iocb struct for AIO */
+ int result; /* error value for AIO */
} ext4_io_end_t;
/*
@@ -460,7 +465,7 @@ struct ext4_new_group_data {
};
/*
- * Flags used by ext4_get_blocks()
+ * Flags used by ext4_map_blocks()
*/
/* Allocate any needed blocks and/or convert an unitialized
extent to be an initialized ext4 */
@@ -873,7 +878,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
@@ -982,7 +986,7 @@ struct ext4_super_block {
__le32 s_last_orphan; /* start of list of inodes to delete */
__le32 s_hash_seed[4]; /* HTREE hash seed */
__u8 s_def_hash_version; /* Default hash version to use */
- __u8 s_reserved_char_pad;
+ __u8 s_jnl_backup_type;
__le16 s_desc_size; /* size of group descriptor */
/*100*/ __le32 s_default_mount_opts;
__le32 s_first_meta_bg; /* First metablock block group */
@@ -1000,12 +1004,34 @@ struct ext4_super_block {
__le64 s_mmp_block; /* Block for multi-mount protection */
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
- __u8 s_reserved_char_pad2;
+ __u8 s_reserved_char_pad;
__le16 s_reserved_pad;
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */
- __u32 s_reserved[160]; /* Padding to the end of the block */
+ __le32 s_snapshot_inum; /* Inode number of active snapshot */
+ __le32 s_snapshot_id; /* sequential ID of active snapshot */
+ __le64 s_snapshot_r_blocks_count; /* reserved blocks for active
+ snapshot's future use */
+ __le32 s_snapshot_list; /* inode number of the head of the
+ on-disk snapshot list */
+#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
+ __le32 s_error_count; /* number of fs errors */
+ __le32 s_first_error_time; /* first time an error happened */
+ __le32 s_first_error_ino; /* inode involved in first error */
+ __le64 s_first_error_block; /* block involved of first error */
+ __u8 s_first_error_func[32]; /* function where the error happened */
+ __le32 s_first_error_line; /* line number where error happened */
+ __le32 s_last_error_time; /* most recent time of an error */
+ __le32 s_last_error_ino; /* inode involved in last error */
+ __le32 s_last_error_line; /* line number where error happened */
+ __le64 s_last_error_block; /* block involved of last error */
+ __u8 s_last_error_func[32]; /* function where the error happened */
+#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
+ __u8 s_mount_opts[64];
+ __le32 s_reserved[112]; /* Padding to the end of the block */
};
+#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
+
#ifdef __KERNEL__
/*
@@ -1143,6 +1169,9 @@ struct ext4_sb_info {
/* workqueue for dio unwritten */
struct workqueue_struct *dio_unwritten_wq;
+
+ /* timer for periodic error stats printing */
+ struct timer_list s_err_report;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1313,6 +1342,10 @@ EXT4_INODE_BIT_FNS(state, state_flags)
#define EXT4_DEFM_JMODE_DATA 0x0020
#define EXT4_DEFM_JMODE_ORDERED 0x0040
#define EXT4_DEFM_JMODE_WBACK 0x0060
+#define EXT4_DEFM_NOBARRIER 0x0100
+#define EXT4_DEFM_BLOCK_VALIDITY 0x0200
+#define EXT4_DEFM_DISCARD 0x0400
+#define EXT4_DEFM_NODELALLOC 0x0800
/*
* Default journal batch times
@@ -1379,6 +1412,43 @@ struct ext4_dir_entry_2 {
#define EXT4_MAX_REC_LEN ((1<<16)-1)
/*
+ * If we ever get support for fs block sizes > page_size, we'll need
+ * to remove the #if statements in the next two functions...
+ */
+static inline unsigned int
+ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+#if (PAGE_CACHE_SIZE >= 65536)
+ if (len == EXT4_MAX_REC_LEN || len == 0)
+ return blocksize;
+ return (len & 65532) | ((len & 3) << 16);
+#else
+ return len;
+#endif
+}
+
+static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
+{
+ if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
+ BUG();
+#if (PAGE_CACHE_SIZE >= 65536)
+ if (len < 65536)
+ return cpu_to_le16(len);
+ if (len == blocksize) {
+ if (blocksize == 65536)
+ return cpu_to_le16(EXT4_MAX_REC_LEN);
+ else
+ return cpu_to_le16(0);
+ }
+ return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
+#else
+ return cpu_to_le16(len);
+#endif
+}
+
+/*
* Hash Tree Directory indexing
* (c) Daniel Phillips, 2001
*/
@@ -1510,9 +1580,11 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
ext4_init_block_bitmap(sb, NULL, group, desc)
/* dir.c */
-extern int ext4_check_dir_entry(const char *, struct inode *,
- struct ext4_dir_entry_2 *,
- struct buffer_head *, unsigned int);
+extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
+ struct ext4_dir_entry_2 *,
+ struct buffer_head *, unsigned int);
+#define ext4_check_dir_entry(dir, de, bh, offset) \
+ __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset))
extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
__u32 minor_hash,
struct ext4_dir_entry_2 *dirent);
@@ -1601,8 +1673,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
extern int ext4_ext_migrate(struct inode *);
/* namei.c */
-extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
-extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@@ -1616,25 +1686,38 @@ extern int ext4_group_extend(struct super_block *sb,
ext4_fsblk_t n_blocks_count);
/* super.c */
-extern void __ext4_error(struct super_block *, const char *, const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message)
-extern void ext4_error_inode(const char *, struct inode *, const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-extern void ext4_error_file(const char *, struct file *, const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-extern void __ext4_std_error(struct super_block *, const char *, int);
-extern void ext4_abort(struct super_block *, const char *, const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-extern void __ext4_warning(struct super_block *, const char *,
+extern void __ext4_error(struct super_block *, const char *, unsigned int,
+ const char *, ...)
+ __attribute__ ((format (printf, 4, 5)));
+#define ext4_error(sb, message...) __ext4_error(sb, __func__, \
+ __LINE__, ## message)
+extern void ext4_error_inode(struct inode *, const char *, unsigned int,
+ ext4_fsblk_t, const char *, ...)
+ __attribute__ ((format (printf, 5, 6)));
+extern void ext4_error_file(struct file *, const char *, unsigned int,
+ const char *, ...)
+ __attribute__ ((format (printf, 4, 5)));
+extern void __ext4_std_error(struct super_block *, const char *,
+ unsigned int, int);
+extern void __ext4_abort(struct super_block *, const char *, unsigned int,
+ const char *, ...)
+ __attribute__ ((format (printf, 4, 5)));
+#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \
+ __LINE__, ## message)
+extern void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...)
- __attribute__ ((format (printf, 3, 4)));
-#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message)
+ __attribute__ ((format (printf, 4, 5)));
+#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \
+ __LINE__, ## message)
extern void ext4_msg(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
-extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
- const char *, const char *, ...)
- __attribute__ ((format (printf, 4, 5)));
+extern void __ext4_grp_locked_error(const char *, unsigned int, \
+ struct super_block *, ext4_group_t, \
+ unsigned long, ext4_fsblk_t, \
+ const char *, ...)
+ __attribute__ ((format (printf, 7, 8)));
+#define ext4_grp_locked_error(sb, grp, message...) \
+ __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
extern void ext4_update_dynamic_rev(struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat);
@@ -1768,7 +1851,7 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
#define ext4_std_error(sb, errno) \
do { \
if ((errno)) \
- __ext4_std_error((sb), __func__, (errno)); \
+ __ext4_std_error((sb), __func__, __LINE__, (errno)); \
} while (0)
#ifdef CONFIG_SMP
@@ -1860,6 +1943,12 @@ static inline void ext4_unlock_group(struct super_block *sb,
spin_unlock(ext4_group_lock_ptr(sb, group));
}
+static inline void ext4_mark_super_dirty(struct super_block *sb)
+{
+ if (EXT4_SB(sb)->s_journal == NULL)
+ sb->s_dirt =1;
+}
+
/*
* Inodes and files operations
*/
@@ -1905,9 +1994,6 @@ extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
-extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
- sector_t block, unsigned int max_blocks,
- struct buffer_head *bh, int flags);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
/* move_extent.c */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 53d2764d71c..6e272ef6ba9 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,29 +6,29 @@
#include <trace/events/ext4.h>
-int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
- struct buffer_head *bh)
+int __ext4_journal_get_undo_access(const char *where, unsigned int line,
+ handle_t *handle, struct buffer_head *bh)
{
int err = 0;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_undo_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
+ ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
}
return err;
}
-int __ext4_journal_get_write_access(const char *where, handle_t *handle,
- struct buffer_head *bh)
+int __ext4_journal_get_write_access(const char *where, unsigned int line,
+ handle_t *handle, struct buffer_head *bh)
{
int err = 0;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_write_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
+ ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
}
return err;
@@ -46,9 +46,9 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
* If the handle isn't valid we're not journaling, but we still need to
* call into ext4_journal_revoke() to put the buffer head.
*/
-int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
- struct inode *inode, struct buffer_head *bh,
- ext4_fsblk_t blocknr)
+int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
+ int is_metadata, struct inode *inode,
+ struct buffer_head *bh, ext4_fsblk_t blocknr)
{
int err;
@@ -79,8 +79,8 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
BUFFER_TRACE(bh, "call jbd2_journal_forget");
err = jbd2_journal_forget(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
- handle, err);
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
return err;
}
return 0;
@@ -92,15 +92,16 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
BUFFER_TRACE(bh, "call jbd2_journal_revoke");
err = jbd2_journal_revoke(handle, blocknr, bh);
if (err) {
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
- ext4_abort(inode->i_sb, __func__,
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
+ __ext4_abort(inode->i_sb, where, line,
"error %d when attempting revoke", err);
}
BUFFER_TRACE(bh, "exit");
return err;
}
-int __ext4_journal_get_create_access(const char *where,
+int __ext4_journal_get_create_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh)
{
int err = 0;
@@ -108,22 +109,23 @@ int __ext4_journal_get_create_access(const char *where,
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_create_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
- handle, err);
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
}
return err;
}
-int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
- struct inode *inode, struct buffer_head *bh)
+int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct buffer_head *bh)
{
int err = 0;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __func__, bh,
- handle, err);
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
} else {
if (inode)
mark_buffer_dirty_inode(bh, inode);
@@ -132,14 +134,33 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
if (inode && inode_needs_sync(inode)) {
sync_dirty_buffer(bh);
if (buffer_req(bh) && !buffer_uptodate(bh)) {
- ext4_error(inode->i_sb,
- "IO error syncing inode, "
- "inode=%lu, block=%llu",
- inode->i_ino,
- (unsigned long long) bh->b_blocknr);
+ struct ext4_super_block *es;
+
+ es = EXT4_SB(inode->i_sb)->s_es;
+ es->s_last_error_block =
+ cpu_to_le64(bh->b_blocknr);
+ ext4_error_inode(inode, where, line,
+ bh->b_blocknr,
+ "IO error syncing itable block");
err = -EIO;
}
}
}
return err;
}
+
+int __ext4_handle_dirty_super(const char *where, unsigned int line,
+ handle_t *handle, struct super_block *sb)
+{
+ struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_dirty_metadata(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, line, __func__,
+ bh, handle, err);
+ } else
+ sb->s_dirt = 1;
+ return err;
+}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index dade0c02479..b0bd792c58c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -122,39 +122,47 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
/*
* Wrapper functions with which ext4 calls into JBD.
*/
-void ext4_journal_abort_handle(const char *caller, const char *err_fn,
+void ext4_journal_abort_handle(const char *caller, unsigned int line,
+ const char *err_fn,
struct buffer_head *bh, handle_t *handle, int err);
-int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
- struct buffer_head *bh);
+int __ext4_journal_get_undo_access(const char *where, unsigned int line,
+ handle_t *handle, struct buffer_head *bh);
-int __ext4_journal_get_write_access(const char *where, handle_t *handle,
- struct buffer_head *bh);
+int __ext4_journal_get_write_access(const char *where, unsigned int line,
+ handle_t *handle, struct buffer_head *bh);
-int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
- struct inode *inode, struct buffer_head *bh,
- ext4_fsblk_t blocknr);
+int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
+ int is_metadata, struct inode *inode,
+ struct buffer_head *bh, ext4_fsblk_t blocknr);
-int __ext4_journal_get_create_access(const char *where,
+int __ext4_journal_get_create_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh);
-int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
- struct inode *inode, struct buffer_head *bh);
+int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct buffer_head *bh);
+
+int __ext4_handle_dirty_super(const char *where, unsigned int line,
+ handle_t *handle, struct super_block *sb);
#define ext4_journal_get_undo_access(handle, bh) \
- __ext4_journal_get_undo_access(__func__, (handle), (bh))
+ __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh))
#define ext4_journal_get_write_access(handle, bh) \
- __ext4_journal_get_write_access(__func__, (handle), (bh))
+ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
- __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
- (block_nr))
+ __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
+ (bh), (block_nr))
#define ext4_journal_get_create_access(handle, bh) \
- __ext4_journal_get_create_access(__func__, (handle), (bh))
+ __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
#define ext4_handle_dirty_metadata(handle, inode, bh) \
- __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
+ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
+ (bh))
+#define ext4_handle_dirty_super(handle, sb) \
+ __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
-int __ext4_journal_stop(const char *where, handle_t *handle);
+int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -207,7 +215,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
}
#define ext4_journal_stop(handle) \
- __ext4_journal_stop(__func__, (handle))
+ __ext4_journal_stop(__func__, __LINE__, (handle))
static inline handle_t *ext4_journal_current_handle(void)
{
@@ -308,17 +316,15 @@ static inline int ext4_should_writeback_data(struct inode *inode)
* This function controls whether or not we should try to go down the
* dioread_nolock code paths, which makes it safe to avoid taking
* i_mutex for direct I/O reads. This only works for extent-based
- * files, and it doesn't work for nobh or if data journaling is
- * enabled, since the dioread_nolock code uses b_private to pass
- * information back to the I/O completion handler, and this conflicts
- * with the jbd's use of b_private.
+ * files, and it doesn't work if data journaling is enabled, since the
+ * dioread_nolock code uses b_private to pass information back to the
+ * I/O completion handler, and this conflicts with the jbd's use of
+ * b_private.
*/
static inline int ext4_should_dioread_nolock(struct inode *inode)
{
if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
return 0;
- if (test_opt(inode->i_sb, NOBH))
- return 0;
if (!S_ISREG(inode->i_mode))
return 0;
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 377309c1af6..06328d3e571 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -401,9 +401,9 @@ static int ext4_valid_extent_entries(struct inode *inode,
return 1;
}
-static int __ext4_ext_check(const char *function, struct inode *inode,
- struct ext4_extent_header *eh,
- int depth)
+static int __ext4_ext_check(const char *function, unsigned int line,
+ struct inode *inode, struct ext4_extent_header *eh,
+ int depth)
{
const char *error_msg;
int max = 0;
@@ -436,7 +436,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
return 0;
corrupted:
- ext4_error_inode(function, inode,
+ ext4_error_inode(inode, function, line, 0,
"bad header/extent: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)",
error_msg, le16_to_cpu(eh->eh_magic),
@@ -447,7 +447,7 @@ corrupted:
}
#define ext4_ext_check(inode, eh, depth) \
- __ext4_ext_check(__func__, inode, eh, depth)
+ __ext4_ext_check(__func__, __LINE__, inode, eh, depth)
int ext4_ext_check_inode(struct inode *inode)
{
@@ -1083,7 +1083,6 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
{
struct ext4_ext_path *curp = path;
struct ext4_extent_header *neh;
- struct ext4_extent_idx *fidx;
struct buffer_head *bh;
ext4_fsblk_t newblock;
int err = 0;
@@ -1144,10 +1143,10 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
ext4_idx_store_pblock(curp->p_idx, newblock);
neh = ext_inode_hdr(inode);
- fidx = EXT_FIRST_INDEX(neh);
ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
- le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
+ le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
+ idx_pblock(EXT_FIRST_INDEX(neh)));
neh->eh_depth = cpu_to_le16(path->p_depth + 1);
err = ext4_ext_dirty(handle, inode, curp);
@@ -2937,7 +2936,7 @@ fix_extent_len:
* One of more index blocks maybe needed if the extent tree grow after
* the unintialized extent split. To prevent ENOSPC occur at the IO
* complete, we need to split the uninitialized extent before DIO submit
- * the IO. The uninitilized extent called at this time will be split
+ * the IO. The uninitialized extent called at this time will be split
* into three uninitialized extent(at most). After IO complete, the part
* being filled will be convert to initialized by the end_io callback function
* via ext4_convert_unwritten_extents().
@@ -2954,7 +2953,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
- struct ext4_extent_header *eh;
ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
@@ -2971,7 +2969,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
eof_block = map->m_lblk + map->m_len;
depth = ext_depth(inode);
- eh = path[depth].p_hdr;
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
@@ -3058,7 +3055,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
err = PTR_ERR(path);
goto out;
}
- eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex2 != &newex)
ex2 = ex;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5313ae4cda2..ee92b66d455 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -70,7 +70,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
size_t length = iov_length(iov, nr_segs);
- if (pos > sbi->s_bitmap_maxbytes)
+ if ((pos > sbi->s_bitmap_maxbytes ||
+ (pos == sbi->s_bitmap_maxbytes && length > 0)))
return -EFBIG;
if (pos + length > sbi->s_bitmap_maxbytes) {
@@ -123,7 +124,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (!IS_ERR(cp)) {
memcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
}
}
return dquot_file_open(inode, filp);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25c4b3173fd..ac377505ed5 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -279,7 +279,7 @@ out:
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
} else
ext4_error(sb, "bit already cleared for inode %lu", ino);
@@ -965,7 +965,7 @@ got:
percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
if (sbi->s_log_groups_per_flex) {
flex_group = ext4_flex_group(sbi, group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 19df61c321f..a0ab3754d0d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -221,6 +221,7 @@ void ext4_delete_inode(struct inode *inode)
"couldn't extend journal (err %d)", err);
stop_handle:
ext4_journal_stop(handle);
+ ext4_orphan_del(NULL, inode);
goto no_delete;
}
}
@@ -337,9 +338,11 @@ static int ext4_block_to_path(struct inode *inode,
return n;
}
-static int __ext4_check_blockref(const char *function, struct inode *inode,
+static int __ext4_check_blockref(const char *function, unsigned int line,
+ struct inode *inode,
__le32 *p, unsigned int max)
{
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
__le32 *bref = p;
unsigned int blk;
@@ -348,8 +351,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) {
- ext4_error_inode(function, inode,
- "invalid block reference %u", blk);
+ es->s_last_error_block = cpu_to_le64(blk);
+ ext4_error_inode(inode, function, line, blk,
+ "invalid block");
return -EIO;
}
}
@@ -358,11 +362,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
#define ext4_check_indirect_blockref(inode, bh) \
- __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
+ __ext4_check_blockref(__func__, __LINE__, inode, \
+ (__le32 *)(bh)->b_data, \
EXT4_ADDR_PER_BLOCK((inode)->i_sb))
#define ext4_check_inode_blockref(inode) \
- __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
+ __ext4_check_blockref(__func__, __LINE__, inode, \
+ EXT4_I(inode)->i_data, \
EXT4_NDIR_BLOCKS)
/**
@@ -1128,20 +1134,24 @@ void ext4_da_update_reserve_space(struct inode *inode,
ext4_discard_preallocations(inode);
}
-static int check_block_validity(struct inode *inode, const char *func,
+static int __check_block_validity(struct inode *inode, const char *func,
+ unsigned int line,
struct ext4_map_blocks *map)
{
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
map->m_len)) {
- ext4_error_inode(func, inode,
- "lblock %lu mapped to illegal pblock %llu "
- "(length %d)", (unsigned long) map->m_lblk,
- map->m_pblk, map->m_len);
+ ext4_error_inode(inode, func, line, map->m_pblk,
+ "lblock %lu mapped to illegal pblock "
+ "(length %d)", (unsigned long) map->m_lblk,
+ map->m_len);
return -EIO;
}
return 0;
}
+#define check_block_validity(inode, map) \
+ __check_block_validity((inode), __func__, __LINE__, (map))
+
/*
* Return the number of contiguous dirty pages in a given inode
* starting at page frame idx.
@@ -1244,7 +1254,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
- int ret = check_block_validity(inode, __func__, map);
+ int ret = check_block_validity(inode, map);
if (ret != 0)
return ret;
}
@@ -1324,9 +1334,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
- int ret = check_block_validity(inode,
- "ext4_map_blocks_after_alloc",
- map);
+ int ret = check_block_validity(inode, map);
if (ret != 0)
return ret;
}
@@ -1519,9 +1527,25 @@ static int walk_page_buffers(handle_t *handle,
static int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh)
{
+ int dirty = buffer_dirty(bh);
+ int ret;
+
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
- return ext4_journal_get_write_access(handle, bh);
+ /*
+ * __block_prepare_write() could have dirtied some buffers. Clean
+ * the dirty bit as jbd2_journal_get_write_access() could complain
+ * otherwise about fs integrity issues. Setting of the dirty bit
+ * by __block_prepare_write() isn't a real problem here as we clear
+ * the bit before releasing a page lock and thus writeback cannot
+ * ever write the buffer.
+ */
+ if (dirty)
+ clear_buffer_dirty(bh);
+ ret = ext4_journal_get_write_access(handle, bh);
+ if (!ret && dirty)
+ ret = ext4_handle_dirty_metadata(handle, NULL, bh);
+ return ret;
}
/*
@@ -2194,7 +2218,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
BUG_ON(!handle);
/*
- * Call ext4_get_blocks() to allocate any delayed allocation
+ * Call ext4_map_blocks() to allocate any delayed allocation
* blocks, or to convert an uninitialized extent to be
* initialized (in the case where we have written into
* one or more preallocated blocks).
@@ -2203,7 +2227,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* indicate that we are on the delayed allocation path. This
* affects functions in many different parts of the allocation
* call path. This flag exists primarily because we don't
- * want to change *many* call functions, so ext4_get_blocks()
+ * want to change *many* call functions, so ext4_map_blocks()
* will set the magic i_delalloc_reserved_flag once the
* inode's allocation semaphore is taken.
*
@@ -2221,6 +2245,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
if (blks < 0) {
+ struct super_block *sb = mpd->inode->i_sb;
+
err = blks;
/*
* If get block returns with error we simply
@@ -2231,7 +2257,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
return 0;
if (err == -ENOSPC &&
- ext4_count_free_blocks(mpd->inode->i_sb)) {
+ ext4_count_free_blocks(sb)) {
mpd->retval = err;
return 0;
}
@@ -2243,16 +2269,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* writepage and writepages will again try to write
* the same.
*/
- ext4_msg(mpd->inode->i_sb, KERN_CRIT,
- "delayed block allocation failed for inode %lu at "
- "logical offset %llu with max blocks %zd with "
- "error %d", mpd->inode->i_ino,
- (unsigned long long) next,
- mpd->b_size >> mpd->inode->i_blkbits, err);
- printk(KERN_CRIT "This should not happen!! "
- "Data will be lost\n");
- if (err == -ENOSPC) {
- ext4_print_free_blocks(mpd->inode);
+ if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+ ext4_msg(sb, KERN_CRIT,
+ "delayed block allocation failed for inode %lu "
+ "at logical offset %llu with max blocks %zd "
+ "with error %d", mpd->inode->i_ino,
+ (unsigned long long) next,
+ mpd->b_size >> mpd->inode->i_blkbits, err);
+ ext4_msg(sb, KERN_CRIT,
+ "This should not happen!! Data will be lost\n");
+ if (err == -ENOSPC)
+ ext4_print_free_blocks(mpd->inode);
}
/* invalidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
@@ -2320,7 +2347,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
* XXX Don't go larger than mballoc is willing to allocate
* This is a stopgap solution. We eventually need to fold
* mpage_da_submit_io() into this function and then call
- * ext4_get_blocks() multiple times in a loop
+ * ext4_map_blocks() multiple times in a loop
*/
if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
goto flush_it;
@@ -2553,18 +2580,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
/*
* This function is used as a standard get_block_t calback function
* when there is no desire to allocate any blocks. It is used as a
- * callback function for block_prepare_write(), nobh_writepage(), and
- * block_write_full_page(). These functions should only try to map a
- * single block at a time.
+ * callback function for block_prepare_write() and block_write_full_page().
+ * These functions should only try to map a single block at a time.
*
* Since this function doesn't do block allocations even if the caller
* requests it by passing in create=1, it is critically important that
* any caller checks to make sure that any buffer heads are returned
* by this function are either all already mapped or marked for
- * delayed allocation before calling nobh_writepage() or
- * block_write_full_page(). Otherwise, b_blocknr could be left
- * unitialized, and the page write functions will be taken by
- * surprise.
+ * delayed allocation before calling block_write_full_page(). Otherwise,
+ * b_blocknr could be left unitialized, and the page write functions will
+ * be taken by surprise.
*/
static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
@@ -2749,9 +2774,7 @@ static int ext4_writepage(struct page *page,
return __ext4_journalled_writepage(page, len);
}
- if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
- ret = nobh_writepage(page, noalloc_get_block_write, wbc);
- else if (page_bufs && buffer_uninit(page_bufs)) {
+ if (page_bufs && buffer_uninit(page_bufs)) {
ext4_set_bh_endio(page_bufs, inode);
ret = block_write_full_page_endio(page, noalloc_get_block_write,
wbc, ext4_end_io_buffer_write);
@@ -3146,13 +3169,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
int ret, retries = 0;
struct page *page;
pgoff_t index;
- unsigned from, to;
struct inode *inode = mapping->host;
handle_t *handle;
index = pos >> PAGE_CACHE_SHIFT;
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
if (ext4_nonda_switch(inode->i_sb)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -3668,6 +3688,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
return ret;
}
+ if (io->iocb)
+ aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
io->flag = 0;
return ret;
@@ -3767,6 +3789,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
io->offset = 0;
io->size = 0;
io->page = NULL;
+ io->iocb = NULL;
+ io->result = 0;
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
@@ -3775,7 +3799,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
}
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
- ssize_t size, void *private)
+ ssize_t size, void *private, int ret,
+ bool is_async)
{
ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq;
@@ -3784,7 +3809,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
/* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end || !size)
- return;
+ goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p"
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,12 +3820,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end);
iocb->private = NULL;
+out:
+ if (is_async)
+ aio_complete(iocb, ret, 0);
return;
}
io_end->offset = offset;
io_end->size = size;
- io_end->flag = EXT4_IO_UNWRITTEN;
+ if (is_async) {
+ io_end->iocb = iocb;
+ io_end->result = ret;
+ }
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
@@ -3937,7 +3968,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
return -ENOMEM;
/*
* we save the io structure for current async
- * direct IO, so that later ext4_get_blocks()
+ * direct IO, so that later ext4_map_blocks()
* could flag the io structure whether there
* is a unwritten extents needs to be converted
* when IO is completed.
@@ -4128,17 +4159,6 @@ int ext4_block_truncate_page(handle_t *handle,
length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
- /*
- * For "nobh" option, we can only work if we don't need to
- * read-in the page - otherwise we create buffers to do the IO.
- */
- if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
- ext4_should_writeback_data(inode) && PageUptodate(page)) {
- zero_user(page, offset, length);
- set_page_dirty(page);
- goto unlock;
- }
-
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
@@ -4488,9 +4508,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
* (should be rare).
*/
if (!bh) {
- EXT4_ERROR_INODE(inode,
- "Read failure block=%llu",
- (unsigned long long) nr);
+ EXT4_ERROR_INODE_BLOCK(inode, nr,
+ "Read failure");
continue;
}
@@ -4502,27 +4521,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
depth);
/*
- * We've probably journalled the indirect block several
- * times during the truncate. But it's no longer
- * needed and we now drop it from the transaction via
- * jbd2_journal_revoke().
- *
- * That's easy if it's exclusively part of this
- * transaction. But if it's part of the committing
- * transaction then jbd2_journal_forget() will simply
- * brelse() it. That means that if the underlying
- * block is reallocated in ext4_get_block(),
- * unmap_underlying_metadata() will find this block
- * and will try to get rid of it. damn, damn.
- *
- * If this block has already been committed to the
- * journal, a revoke record will be written. And
- * revoke records must be emitted *before* clearing
- * this block's bit in the bitmaps.
- */
- ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
-
- /*
* Everything below this this pointer has been
* released. Now let this top-of-subtree go.
*
@@ -4546,8 +4544,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
blocks_for_truncate(inode));
}
+ /*
+ * The forget flag here is critical because if
+ * we are journaling (and not doing data
+ * journaling), we have to make sure a revoke
+ * record is written to prevent the journal
+ * replay from overwriting the (former)
+ * indirect block if it gets reallocated as a
+ * data block. This must happen in the same
+ * transaction where the data blocks are
+ * actually freed.
+ */
ext4_free_blocks(handle, inode, 0, nr, 1,
- EXT4_FREE_BLOCKS_METADATA);
+ EXT4_FREE_BLOCKS_METADATA|
+ EXT4_FREE_BLOCKS_FORGET);
if (parent_bh) {
/*
@@ -4805,8 +4815,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
bh = sb_getblk(sb, block);
if (!bh) {
- EXT4_ERROR_INODE(inode, "unable to read inode block - "
- "block %llu", block);
+ EXT4_ERROR_INODE_BLOCK(inode, block,
+ "unable to read itable block");
return -EIO;
}
if (!buffer_uptodate(bh)) {
@@ -4904,8 +4914,8 @@ make_io:
submit_bh(READ_META, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
- EXT4_ERROR_INODE(inode, "unable to read inode "
- "block %llu", block);
+ EXT4_ERROR_INODE_BLOCK(inode, block,
+ "unable to read itable block");
brelse(bh);
return -EIO;
}
@@ -4942,20 +4952,26 @@ void ext4_set_inode_flags(struct inode *inode)
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
void ext4_get_inode_flags(struct ext4_inode_info *ei)
{
- unsigned int flags = ei->vfs_inode.i_flags;
-
- ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
- EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
- if (flags & S_SYNC)
- ei->i_flags |= EXT4_SYNC_FL;
- if (flags & S_APPEND)
- ei->i_flags |= EXT4_APPEND_FL;
- if (flags & S_IMMUTABLE)
- ei->i_flags |= EXT4_IMMUTABLE_FL;
- if (flags & S_NOATIME)
- ei->i_flags |= EXT4_NOATIME_FL;
- if (flags & S_DIRSYNC)
- ei->i_flags |= EXT4_DIRSYNC_FL;
+ unsigned int vfs_fl;
+ unsigned long old_fl, new_fl;
+
+ do {
+ vfs_fl = ei->vfs_inode.i_flags;
+ old_fl = ei->i_flags;
+ new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+ EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
+ EXT4_DIRSYNC_FL);
+ if (vfs_fl & S_SYNC)
+ new_fl |= EXT4_SYNC_FL;
+ if (vfs_fl & S_APPEND)
+ new_fl |= EXT4_APPEND_FL;
+ if (vfs_fl & S_IMMUTABLE)
+ new_fl |= EXT4_IMMUTABLE_FL;
+ if (vfs_fl & S_NOATIME)
+ new_fl |= EXT4_NOATIME_FL;
+ if (vfs_fl & S_DIRSYNC)
+ new_fl |= EXT4_DIRSYNC_FL;
+ } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -4970,7 +4986,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
/* we are using combined 48 bit field */
i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
le32_to_cpu(raw_inode->i_blocks_lo);
- if (ei->i_flags & EXT4_HUGE_FILE_FL) {
+ if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
/* i_blocks represent file system block size */
return i_blocks << (inode->i_blkbits - 9);
} else {
@@ -5066,7 +5082,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
transaction_t *transaction;
tid_t tid;
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
transaction = journal->j_running_transaction;
else
@@ -5075,7 +5091,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
tid = transaction->t_tid;
else
tid = journal->j_commit_sequence;
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
ei->i_sync_tid = tid;
ei->i_datasync_tid = tid;
}
@@ -5120,7 +5136,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_file_acl);
ret = -EIO;
goto bad_inode;
- } else if (ei->i_flags & EXT4_EXTENTS_FL) {
+ } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
(S_ISLNK(inode->i_mode) &&
!ext4_inode_is_fast_symlink(inode)))
@@ -5191,7 +5207,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = 0;
- ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
return 0;
}
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
@@ -5204,9 +5220,9 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
- ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
} else {
- ei->i_flags |= EXT4_HUGE_FILE_FL;
+ ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
/* i_block is stored in file system block size */
i_blocks = i_blocks >> (inode->i_blkbits - 9);
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
@@ -5400,9 +5416,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode == WB_SYNC_ALL)
sync_dirty_buffer(iloc.bh);
if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
- EXT4_ERROR_INODE(inode,
- "IO error syncing inode (block=%llu)",
- (unsigned long long) iloc.bh->b_blocknr);
+ EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
+ "IO error syncing inode");
err = -EIO;
}
brelse(iloc.bh);
@@ -5477,10 +5492,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if (attr->ia_size > sbi->s_bitmap_maxbytes) {
- error = -EFBIG;
- goto err_out;
- }
+ if (attr->ia_size > sbi->s_bitmap_maxbytes)
+ return -EFBIG;
}
}
@@ -5682,7 +5695,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
* Calculate the journal credits for a chunk of data modification.
*
* This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
+ * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
*
* journal buffers for data blocks are not included here, as DIO
* and fallocate do no need to journal data buffers.
@@ -5748,7 +5761,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
{
struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_entry *entry;
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
return 0;
@@ -5756,7 +5768,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
- entry = IFIRST(header);
/* No extended attributes present */
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 12b3bc026a6..4b4ad4b7ce5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -446,10 +446,11 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += first + i;
ext4_grp_locked_error(sb, e4b->bd_group,
- __func__, "double-free of inode"
- " %lu's block %llu(bit %u in group %u)",
- inode ? inode->i_ino : 0, blocknr,
- first + i, e4b->bd_group);
+ inode ? inode->i_ino : 0,
+ blocknr,
+ "freeing block already freed "
+ "(bit %u)",
+ first + i);
}
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
}
@@ -712,9 +713,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
grp->bb_fragments = fragments;
if (free != grp->bb_free) {
- ext4_grp_locked_error(sb, group, __func__,
- "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
- group, free, grp->bb_free);
+ ext4_grp_locked_error(sb, group, 0, 0,
+ "%u blocks in bitmap, %u in gd",
+ free, grp->bb_free);
/*
* If we intent to continue, we consider group descritor
* corrupt and update bb_free using bitmap value
@@ -1296,10 +1297,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += block;
ext4_grp_locked_error(sb, e4b->bd_group,
- __func__, "double-free of inode"
- " %lu's block %llu(bit %u in group %u)",
- inode ? inode->i_ino : 0, blocknr, block,
- e4b->bd_group);
+ inode ? inode->i_ino : 0,
+ blocknr,
+ "freeing already freed block "
+ "(bit %u)", block);
}
mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
e4b->bd_info->bb_counters[order]++;
@@ -1788,8 +1789,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we
* we have free blocks
*/
- ext4_grp_locked_error(sb, e4b->bd_group,
- __func__, "%d free blocks as per "
+ ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
+ "%d free blocks as per "
"group info. But bitmap says 0",
free);
break;
@@ -1798,8 +1799,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
BUG_ON(ex.fe_len <= 0);
if (free < ex.fe_len) {
- ext4_grp_locked_error(sb, e4b->bd_group,
- __func__, "%d free blocks as per "
+ ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
+ "%d free blocks as per "
"group info. But got %d blocks",
free, ex.fe_len);
/*
@@ -1821,8 +1822,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
/*
* This is a special case for storages like raid5
- * we try to find stripe-aligned chunks for stripe-size requests
- * XXX should do so at least for multiples of stripe size as well
+ * we try to find stripe-aligned chunks for stripe-size-multiple requests
*/
static noinline_for_stack
void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
@@ -1999,7 +1999,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ext4_group_t ngroups, group, i;
int cr;
int err = 0;
- int bsbits;
struct ext4_sb_info *sbi;
struct super_block *sb;
struct ext4_buddy e4b;
@@ -2041,8 +2040,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ac->ac_2order = i - 1;
}
- bsbits = ac->ac_sb->s_blocksize_bits;
-
/* if stream allocation is enabled, use global goal */
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
/* TBD: may be hot point */
@@ -2094,8 +2091,8 @@ repeat:
ac->ac_groups_scanned++;
if (cr == 0)
ext4_mb_simple_scan_group(ac, &e4b);
- else if (cr == 1 &&
- ac->ac_g_ex.fe_len == sbi->s_stripe)
+ else if (cr == 1 && sbi->s_stripe &&
+ !(ac->ac_g_ex.fe_len % sbi->s_stripe))
ext4_mb_scan_aligned(ac, &e4b);
else
ext4_mb_complex_scan_group(ac, &e4b);
@@ -2221,7 +2218,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
rc = seq_open(file, &ext4_mb_seq_groups_ops);
if (rc == 0) {
- struct seq_file *m = (struct seq_file *)file->private_data;
+ struct seq_file *m = file->private_data;
m->private = sb;
}
return rc;
@@ -2560,6 +2557,22 @@ int ext4_mb_release(struct super_block *sb)
return 0;
}
+static inline void ext4_issue_discard(struct super_block *sb,
+ ext4_group_t block_group, ext4_grpblk_t block, int count)
+{
+ int ret;
+ ext4_fsblk_t discard_block;
+
+ discard_block = block + ext4_group_first_block_no(sb, block_group);
+ trace_ext4_discard_blocks(sb,
+ (unsigned long long) discard_block, count);
+ ret = sb_issue_discard(sb, discard_block, count);
+ if (ret == EOPNOTSUPP) {
+ ext4_warning(sb, "discard not supported, disabling");
+ clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+ }
+}
+
/*
* This function is called by the jbd2 layer once the commit has finished,
* so we know we can free the blocks that were released with that commit.
@@ -2579,22 +2592,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->count, entry->group, entry);
- if (test_opt(sb, DISCARD)) {
- int ret;
- ext4_fsblk_t discard_block;
-
- discard_block = entry->start_blk +
- ext4_group_first_block_no(sb, entry->group);
- trace_ext4_discard_blocks(sb,
- (unsigned long long)discard_block,
- entry->count);
- ret = sb_issue_discard(sb, discard_block, entry->count);
- if (ret == EOPNOTSUPP) {
- ext4_warning(sb,
- "discard not supported, disabling");
- clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
- }
- }
+ if (test_opt(sb, DISCARD))
+ ext4_issue_discard(sb, entry->group,
+ entry->start_blk, entry->count);
err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */
@@ -2704,7 +2704,7 @@ void exit_ext4_mballoc(void)
/*
- * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
+ * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
* Returns 0 if success or error code
*/
static noinline_for_stack int
@@ -2712,7 +2712,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
handle_t *handle, unsigned int reserv_blks)
{
struct buffer_head *bitmap_bh = NULL;
- struct ext4_super_block *es;
struct ext4_group_desc *gdp;
struct buffer_head *gdp_bh;
struct ext4_sb_info *sbi;
@@ -2725,8 +2724,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
sb = ac->ac_sb;
sbi = EXT4_SB(sb);
- es = sbi->s_es;
-
err = -EIO;
bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -2812,7 +2809,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
out_err:
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
brelse(bitmap_bh);
return err;
}
@@ -2850,7 +2847,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
int bsbits, max;
ext4_lblk_t end;
loff_t size, orig_size, start_off;
- ext4_lblk_t start, orig_start;
+ ext4_lblk_t start;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_prealloc_space *pa;
@@ -2881,6 +2878,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
size = size << bsbits;
if (size < i_size_read(ac->ac_inode))
size = i_size_read(ac->ac_inode);
+ orig_size = size;
/* max size of free chunks */
max = 2 << bsbits;
@@ -2922,8 +2920,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
size = ac->ac_o_ex.fe_len << bsbits;
}
- orig_size = size = size >> bsbits;
- orig_start = start = start_off >> bsbits;
+ size = size >> bsbits;
+ start = start_off >> bsbits;
/* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) {
@@ -3547,7 +3545,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
ext4_group_t group;
ext4_grpblk_t bit;
unsigned long long grp_blk_start;
- sector_t start;
int err = 0;
int free = 0;
@@ -3567,10 +3564,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
if (bit >= end)
break;
next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
- start = ext4_group_first_block_no(sb, group) + bit;
mb_debug(1, " free preallocated %u/%u in group %u\n",
- (unsigned) start, (unsigned) next - bit,
- (unsigned) group);
+ (unsigned) ext4_group_first_block_no(sb, group) + bit,
+ (unsigned) next - bit, (unsigned) group);
free += next - bit;
if (ac) {
@@ -3581,7 +3577,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
trace_ext4_mballoc_discard(ac);
}
- trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
+ trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
@@ -3591,8 +3587,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
pa, (unsigned long) pa->pa_lstart,
(unsigned long) pa->pa_pstart,
(unsigned long) pa->pa_len);
- ext4_grp_locked_error(sb, group,
- __func__, "free %u, pa_free %u",
+ ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
free, pa->pa_free);
/*
* pa is already deleted so we use the value obtained
@@ -3613,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
ext4_group_t group;
ext4_grpblk_t bit;
- trace_ext4_mb_release_group_pa(ac, pa);
+ trace_ext4_mb_release_group_pa(sb, ac, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
struct super_block *sb = ac->ac_sb;
ext4_group_t ngroups, i;
+ if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ return;
+
printk(KERN_ERR "EXT4-fs: Can't allocate:"
" Allocation context details:\n");
printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
@@ -4255,7 +4253,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
* to usual allocation
*/
ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
- struct ext4_allocation_request *ar, int *errp)
+ struct ext4_allocation_request *ar, int *errp)
{
int freed;
struct ext4_allocation_context *ac = NULL;
@@ -4299,7 +4297,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
inquota = ar->len;
if (ar->len == 0) {
*errp = -EDQUOT;
- goto out3;
+ goto out;
}
}
@@ -4307,13 +4305,13 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
if (!ac) {
ar->len = 0;
*errp = -ENOMEM;
- goto out1;
+ goto out;
}
*errp = ext4_mb_initialize_context(ac, ar);
if (*errp) {
ar->len = 0;
- goto out2;
+ goto out;
}
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
@@ -4322,7 +4320,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_mb_normalize_request(ac, ar);
repeat:
/* allocate space in core */
- ext4_mb_regular_allocator(ac);
+ *errp = ext4_mb_regular_allocator(ac);
+ if (*errp)
+ goto errout;
/* as we've just preallocated more space than
* user requested orinally, we store allocated
@@ -4333,7 +4333,7 @@ repeat:
}
if (likely(ac->ac_status == AC_STATUS_FOUND)) {
*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
- if (*errp == -EAGAIN) {
+ if (*errp == -EAGAIN) {
/*
* drop the reference that we took
* in ext4_mb_use_best_found
@@ -4344,12 +4344,10 @@ repeat:
ac->ac_b_ex.fe_len = 0;
ac->ac_status = AC_STATUS_CONTINUE;
goto repeat;
- } else if (*errp) {
+ } else if (*errp)
+ errout:
ext4_discard_allocated_blocks(ac);
- ac->ac_b_ex.fe_len = 0;
- ar->len = 0;
- ext4_mb_show_ac(ac);
- } else {
+ else {
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
ar->len = ac->ac_b_ex.fe_len;
}
@@ -4358,19 +4356,19 @@ repeat:
if (freed)
goto repeat;
*errp = -ENOSPC;
+ }
+
+ if (*errp) {
ac->ac_b_ex.fe_len = 0;
ar->len = 0;
ext4_mb_show_ac(ac);
}
-
ext4_mb_release_context(ac);
-
-out2:
- kmem_cache_free(ext4_ac_cachep, ac);
-out1:
+out:
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
if (inquota && ar->len < inquota)
dquot_free_block(ar->inode, inquota - ar->len);
-out3:
if (!ar->len) {
if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
/* release all the reserved blocks if non delalloc */
@@ -4402,6 +4400,7 @@ static noinline_for_stack int
ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct ext4_free_data *new_entry)
{
+ ext4_group_t group = e4b->bd_group;
ext4_grpblk_t block;
struct ext4_free_data *entry;
struct ext4_group_info *db = e4b->bd_info;
@@ -4434,9 +4433,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
else if (block >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
- ext4_grp_locked_error(sb, e4b->bd_group, __func__,
- "Double free of blocks %d (%d %d)",
- block, entry->start_blk, entry->count);
+ ext4_grp_locked_error(sb, group, 0,
+ ext4_group_first_block_no(sb, group) + block,
+ "Block already on to-be-freed list");
return 0;
}
}
@@ -4494,7 +4493,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct super_block *sb = inode->i_sb;
struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
- struct ext4_super_block *es;
unsigned long freed = 0;
unsigned int overflow;
ext4_grpblk_t bit;
@@ -4513,7 +4511,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
}
sbi = EXT4_SB(sb);
- es = EXT4_SB(sb)->s_es;
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
!ext4_data_block_valid(sbi, block, count)) {
ext4_error(sb, "Freeing blocks not in datazone - "
@@ -4647,6 +4644,8 @@ do_more:
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
ext4_mb_return_to_preallocation(inode, &e4b, block, count);
+ if (test_opt(sb, DISCARD))
+ ext4_issue_discard(sb, block_group, bit, count);
}
ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4680,7 +4679,7 @@ do_more:
put_bh(bitmap_bh);
goto do_more;
}
- sb->s_dirt = 1;
+ ext4_mark_super_dirty(sb);
error_return:
if (freed)
dquot_free_block(inode, freed);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 6f3a27ec30b..1765c2c50a9 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* We have the extent map build with the tmp inode.
* Now copy the i_data across
*/
- ei->i_flags |= EXT4_EXTENTS_FL;
+ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS);
memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
/*
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 3a6c92ac131..5f1ed9fc913 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -148,17 +148,17 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
*/
static int
mext_check_null_inode(struct inode *inode1, struct inode *inode2,
- const char *function)
+ const char *function, unsigned int line)
{
int ret = 0;
if (inode1 == NULL) {
- __ext4_error(inode2->i_sb, function,
+ __ext4_error(inode2->i_sb, function, line,
"Both inodes should not be NULL: "
"inode1 NULL inode2 %lu", inode2->i_ino);
ret = -EIO;
} else if (inode2 == NULL) {
- __ext4_error(inode1->i_sb, function,
+ __ext4_error(inode1->i_sb, function, line,
"Both inodes should not be NULL: "
"inode1 %lu inode2 NULL", inode1->i_ino);
ret = -EIO;
@@ -960,6 +960,9 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
+ if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
+ return -EPERM;
+
/* Ext4 move extent does not support swapfile */
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
@@ -1081,7 +1084,7 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
BUG_ON(inode1 == NULL && inode2 == NULL);
- ret = mext_check_null_inode(inode1, inode2, __func__);
+ ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
if (ret < 0)
goto out;
@@ -1118,7 +1121,7 @@ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
BUG_ON(inode1 == NULL && inode2 == NULL);
- ret = mext_check_null_inode(inode1, inode2, __func__);
+ ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
if (ret < 0)
goto out;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a43e6617b35..314c0d3b3fa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode);
-unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
-{
- unsigned len = le16_to_cpu(dlen);
-
- if (len == EXT4_MAX_REC_LEN || len == 0)
- return blocksize;
- return (len & 65532) | ((len & 3) << 16);
-}
-
-__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
-{
- if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
- BUG();
- if (len < 65536)
- return cpu_to_le16(len);
- if (len == blocksize) {
- if (blocksize == 65536)
- return cpu_to_le16(EXT4_MAX_REC_LEN);
- else
- return cpu_to_le16(0);
- }
- return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
-}
-
/*
* p is at least 6 bytes before the end of page
*/
@@ -605,7 +581,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(0));
for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
- if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+ if (!ext4_check_dir_entry(dir, de, bh,
(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
/* On error, skip the f_pos to the next block. */
@@ -844,8 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
if ((char *) de + namelen <= dlimit &&
ext4_match (namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ext4_check_dir_entry("ext4_find_entry",
- dir, de, bh, offset))
+ if (!ext4_check_dir_entry(dir, de, bh, offset))
return -1;
*res_dir = de;
return 1;
@@ -1019,7 +994,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
+ ((char *) de - bh->b_data);
- if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) {
+ if (!ext4_check_dir_entry(dir, de, bh, off)) {
brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
@@ -1088,7 +1063,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
struct dentry *ext4_get_parent(struct dentry *child)
{
__u32 ino;
- struct inode *inode;
static const struct qstr dotdot = {
.name = "..",
.len = 2,
@@ -1097,7 +1071,6 @@ struct dentry *ext4_get_parent(struct dentry *child)
struct buffer_head *bh;
bh = ext4_find_entry(child->d_inode, &dotdot, &de);
- inode = NULL;
if (!bh)
return ERR_PTR(-ENOENT);
ino = le32_to_cpu(de->inode);
@@ -1305,8 +1278,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *)bh->b_data;
top = bh->b_data + blocksize - reclen;
while ((char *) de <= top) {
- if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
- bh, offset))
+ if (!ext4_check_dir_entry(dir, de, bh, offset))
return -EIO;
if (ext4_match(namelen, name, de))
return -EEXIST;
@@ -1673,7 +1645,7 @@ static int ext4_delete_entry(handle_t *handle,
pde = NULL;
de = (struct ext4_dir_entry_2 *) bh->b_data;
while (i < bh->b_size) {
- if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i))
+ if (!ext4_check_dir_entry(dir, de, bh, i))
return -EIO;
if (de == de_del) {
BUFFER_TRACE(bh, "get_write_access");
@@ -1956,7 +1928,7 @@ static int empty_dir(struct inode *inode)
}
de = (struct ext4_dir_entry_2 *) bh->b_data;
}
- if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+ if (!ext4_check_dir_entry(inode, de, bh, offset)) {
de = (struct ext4_dir_entry_2 *)(bh->b_data +
sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 6df797eb9ae..ca5c8aa00a2 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -921,8 +921,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
&sbi->s_flex_groups[flex_group].free_inodes);
}
- ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
- sb->s_dirt = 1;
+ ext4_handle_dirty_super(handle, sb);
exit_journal:
mutex_unlock(&sbi->s_resize_lock);
@@ -953,7 +952,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count)
{
ext4_fsblk_t o_blocks_count;
- ext4_group_t o_groups_count;
ext4_grpblk_t last;
ext4_grpblk_t add;
struct buffer_head *bh;
@@ -965,7 +963,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
* yet: we're going to revalidate es->s_blocks_count after
* taking the s_resize_lock below. */
o_blocks_count = ext4_blocks_count(es);
- o_groups_count = EXT4_SB(sb)->s_groups_count;
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
@@ -1045,13 +1042,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
goto exit_put;
}
ext4_blocks_count_set(es, o_blocks_count + add);
- ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
- sb->s_dirt = 1;
mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
/* We add the blocks to the bitmap and set the group need init bit */
ext4_add_groupblocks(handle, sb, o_blocks_count, add);
+ ext4_handle_dirty_super(handle, sb);
ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
if ((err = ext4_journal_stop(handle)))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4e8983a9811..8d65575f8c8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -241,14 +241,14 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS);
- vfs_check_frozen(sb, SB_FREEZE_WRITE);
+ vfs_check_frozen(sb, SB_FREEZE_TRANS);
/* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */
journal = EXT4_SB(sb)->s_journal;
if (journal) {
if (is_journal_aborted(journal)) {
- ext4_abort(sb, __func__, "Detected aborted journal");
+ ext4_abort(sb, "Detected aborted journal");
return ERR_PTR(-EROFS);
}
return jbd2_journal_start(journal, nblocks);
@@ -262,7 +262,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
* that sync() will call the filesystem's write_super callback if
* appropriate.
*/
-int __ext4_journal_stop(const char *where, handle_t *handle)
+int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
{
struct super_block *sb;
int err;
@@ -279,12 +279,13 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
if (!err)
err = rc;
if (err)
- __ext4_std_error(sb, where, err);
+ __ext4_std_error(sb, where, line, err);
return err;
}
-void ext4_journal_abort_handle(const char *caller, const char *err_fn,
- struct buffer_head *bh, handle_t *handle, int err)
+void ext4_journal_abort_handle(const char *caller, unsigned int line,
+ const char *err_fn, struct buffer_head *bh,
+ handle_t *handle, int err)
{
char nbuf[16];
const char *errstr = ext4_decode_error(NULL, err, nbuf);
@@ -300,12 +301,47 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
if (is_handle_aborted(handle))
return;
- printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
- caller, errstr, err_fn);
+ printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
+ caller, line, errstr, err_fn);
jbd2_journal_abort_handle(handle);
}
+static void __save_error_info(struct super_block *sb, const char *func,
+ unsigned int line)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ es->s_last_error_time = cpu_to_le32(get_seconds());
+ strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+ es->s_last_error_line = cpu_to_le32(line);
+ if (!es->s_first_error_time) {
+ es->s_first_error_time = es->s_last_error_time;
+ strncpy(es->s_first_error_func, func,
+ sizeof(es->s_first_error_func));
+ es->s_first_error_line = cpu_to_le32(line);
+ es->s_first_error_ino = es->s_last_error_ino;
+ es->s_first_error_block = es->s_last_error_block;
+ }
+ /*
+ * Start the daily error reporting function if it hasn't been
+ * started already
+ */
+ if (!es->s_error_count)
+ mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
+ es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
+}
+
+static void save_error_info(struct super_block *sb, const char *func,
+ unsigned int line)
+{
+ __save_error_info(sb, func, line);
+ ext4_commit_super(sb, 1);
+}
+
+
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
@@ -323,11 +359,6 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
static void ext4_handle_error(struct super_block *sb)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-
if (sb->s_flags & MS_RDONLY)
return;
@@ -342,19 +373,19 @@ static void ext4_handle_error(struct super_block *sb)
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
sb->s_flags |= MS_RDONLY;
}
- ext4_commit_super(sb, 1);
if (test_opt(sb, ERRORS_PANIC))
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
void __ext4_error(struct super_block *sb, const char *function,
- const char *fmt, ...)
+ unsigned int line, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
+ sb->s_id, function, line, current->comm);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -362,14 +393,22 @@ void __ext4_error(struct super_block *sb, const char *function,
ext4_handle_error(sb);
}
-void ext4_error_inode(const char *function, struct inode *inode,
+void ext4_error_inode(struct inode *inode, const char *function,
+ unsigned int line, ext4_fsblk_t block,
const char *fmt, ...)
{
va_list args;
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+ es->s_last_error_block = cpu_to_le64(block);
+ save_error_info(inode->i_sb, function, line);
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ",
- inode->i_sb->s_id, function, inode->i_ino, current->comm);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
+ inode->i_sb->s_id, function, line, inode->i_ino);
+ if (block)
+ printk("block %llu: ", block);
+ printk("comm %s: ", current->comm);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -377,20 +416,26 @@ void ext4_error_inode(const char *function, struct inode *inode,
ext4_handle_error(inode->i_sb);
}
-void ext4_error_file(const char *function, struct file *file,
- const char *fmt, ...)
+void ext4_error_file(struct file *file, const char *function,
+ unsigned int line, const char *fmt, ...)
{
va_list args;
+ struct ext4_super_block *es;
struct inode *inode = file->f_dentry->d_inode;
char pathname[80], *path;
+ es = EXT4_SB(inode->i_sb)->s_es;
+ es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+ save_error_info(inode->i_sb, function, line);
va_start(args, fmt);
path = d_path(&(file->f_path), pathname, sizeof(pathname));
if (!path)
path = "(unknown)";
printk(KERN_CRIT
- "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ",
- inode->i_sb->s_id, function, inode->i_ino, current->comm, path);
+ "EXT4-fs error (device %s): %s:%d: inode #%lu "
+ "(comm %s path %s): ",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, path);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -435,7 +480,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
/* __ext4_std_error decodes expected errors from journaling functions
* automatically and invokes the appropriate error response. */
-void __ext4_std_error(struct super_block *sb, const char *function, int errno)
+void __ext4_std_error(struct super_block *sb, const char *function,
+ unsigned int line, int errno)
{
char nbuf[16];
const char *errstr;
@@ -448,8 +494,9 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
return;
errstr = ext4_decode_error(sb, errno, nbuf);
- printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
- sb->s_id, function, errstr);
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
+ save_error_info(sb, function, line);
ext4_handle_error(sb);
}
@@ -464,29 +511,29 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
* case we take the easy way out and panic immediately.
*/
-void ext4_abort(struct super_block *sb, const char *function,
- const char *fmt, ...)
+void __ext4_abort(struct super_block *sb, const char *function,
+ unsigned int line, const char *fmt, ...)
{
va_list args;
+ save_error_info(sb, function, line);
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
+ function, line);
vprintk(fmt, args);
printk("\n");
va_end(args);
+ if ((sb->s_flags & MS_RDONLY) == 0) {
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ sb->s_flags |= MS_RDONLY;
+ EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ if (EXT4_SB(sb)->s_journal)
+ jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+ save_error_info(sb, function, line);
+ }
if (test_opt(sb, ERRORS_PANIC))
panic("EXT4-fs panic from previous error\n");
-
- if (sb->s_flags & MS_RDONLY)
- return;
-
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- sb->s_flags |= MS_RDONLY;
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
void ext4_msg (struct super_block * sb, const char *prefix,
@@ -502,38 +549,47 @@ void ext4_msg (struct super_block * sb, const char *prefix,
}
void __ext4_warning(struct super_block *sb, const char *function,
- const char *fmt, ...)
+ unsigned int line, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
- sb->s_id, function);
+ printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
+ sb->s_id, function, line);
vprintk(fmt, args);
printk("\n");
va_end(args);
}
-void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
- const char *function, const char *fmt, ...)
+void __ext4_grp_locked_error(const char *function, unsigned int line,
+ struct super_block *sb, ext4_group_t grp,
+ unsigned long ino, ext4_fsblk_t block,
+ const char *fmt, ...)
__releases(bitlock)
__acquires(bitlock)
{
va_list args;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ es->s_last_error_ino = cpu_to_le32(ino);
+ es->s_last_error_block = cpu_to_le64(block);
+ __save_error_info(sb, function, line);
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
+ sb->s_id, function, line, grp);
+ if (ino)
+ printk("inode %lu: ", ino);
+ if (block)
+ printk("block %llu:", (unsigned long long) block);
vprintk(fmt, args);
printk("\n");
va_end(args);
if (test_opt(sb, ERRORS_CONT)) {
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb, 0);
return;
}
+
ext4_unlock_group(sb, grp);
ext4_handle_error(sb);
/*
@@ -660,8 +716,7 @@ static void ext4_put_super(struct super_block *sb)
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
if (err < 0)
- ext4_abort(sb, __func__,
- "Couldn't clean up the journal");
+ ext4_abort(sb, "Couldn't clean up the journal");
}
ext4_release_system_zone(sb);
@@ -946,14 +1001,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",journal_async_commit");
else if (test_opt(sb, JOURNAL_CHECKSUM))
seq_puts(seq, ",journal_checksum");
- if (test_opt(sb, NOBH))
- seq_puts(seq, ",nobh");
if (test_opt(sb, I_VERSION))
seq_puts(seq, ",i_version");
- if (!test_opt(sb, DELALLOC))
+ if (!test_opt(sb, DELALLOC) &&
+ !(def_mount_opts & EXT4_DEFM_NODELALLOC))
seq_puts(seq, ",nodelalloc");
-
if (sbi->s_stripe)
seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
/*
@@ -977,7 +1030,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, NO_AUTO_DA_ALLOC))
seq_puts(seq, ",noauto_da_alloc");
- if (test_opt(sb, DISCARD))
+ if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
seq_puts(seq, ",discard");
if (test_opt(sb, NOLOAD))
@@ -986,6 +1039,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, DIOREAD_NOLOCK))
seq_puts(seq, ",dioread_nolock");
+ if (test_opt(sb, BLOCK_VALIDITY) &&
+ !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
+ seq_puts(seq, ",block_validity");
+
ext4_show_quota_options(seq, sb);
return 0;
@@ -1065,6 +1122,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
char *path);
+static int ext4_quota_off(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -1086,7 +1144,7 @@ static const struct dquot_operations ext4_quota_operations = {
static const struct quotactl_ops ext4_qctl_operations = {
.quota_on = ext4_quota_on,
- .quota_off = dquot_quota_off,
+ .quota_off = ext4_quota_off,
.quota_sync = dquot_quota_sync,
.get_info = dquot_get_dqinfo,
.set_info = dquot_set_dqinfo,
@@ -1624,10 +1682,12 @@ set_qf_format:
*n_blocks_count = option;
break;
case Opt_nobh:
- set_opt(sbi->s_mount_opt, NOBH);
+ ext4_msg(sb, KERN_WARNING,
+ "Ignoring deprecated nobh option");
break;
case Opt_bh:
- clear_opt(sbi->s_mount_opt, NOBH);
+ ext4_msg(sb, KERN_WARNING,
+ "Ignoring deprecated bh option");
break;
case Opt_i_version:
set_opt(sbi->s_mount_opt, I_VERSION);
@@ -2249,6 +2309,8 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a,
{
struct super_block *sb = sbi->s_buddy_cache->i_sb;
+ if (!sb->s_bdev->bd_part)
+ return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%lu\n",
(part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
sbi->s_sectors_written_start) >> 1);
@@ -2259,6 +2321,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
{
struct super_block *sb = sbi->s_buddy_cache->i_sb;
+ if (!sb->s_bdev->bd_part)
+ return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)(sbi->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
@@ -2431,6 +2495,53 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 1;
}
+/*
+ * This function is called once a day if we have errors logged
+ * on the file system
+ */
+static void print_daily_error_info(unsigned long arg)
+{
+ struct super_block *sb = (struct super_block *) arg;
+ struct ext4_sb_info *sbi;
+ struct ext4_super_block *es;
+
+ sbi = EXT4_SB(sb);
+ es = sbi->s_es;
+
+ if (es->s_error_count)
+ ext4_msg(sb, KERN_NOTICE, "error count: %u",
+ le32_to_cpu(es->s_error_count));
+ if (es->s_first_error_time) {
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+ sb->s_id, le32_to_cpu(es->s_first_error_time),
+ (int) sizeof(es->s_first_error_func),
+ es->s_first_error_func,
+ le32_to_cpu(es->s_first_error_line));
+ if (es->s_first_error_ino)
+ printk(": inode %u",
+ le32_to_cpu(es->s_first_error_ino));
+ if (es->s_first_error_block)
+ printk(": block %llu", (unsigned long long)
+ le64_to_cpu(es->s_first_error_block));
+ printk("\n");
+ }
+ if (es->s_last_error_time) {
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+ sb->s_id, le32_to_cpu(es->s_last_error_time),
+ (int) sizeof(es->s_last_error_func),
+ es->s_last_error_func,
+ le32_to_cpu(es->s_last_error_line));
+ if (es->s_last_error_ino)
+ printk(": inode %u",
+ le32_to_cpu(es->s_last_error_ino));
+ if (es->s_last_error_block)
+ printk(": block %llu", (unsigned long long)
+ le64_to_cpu(es->s_last_error_block));
+ printk("\n");
+ }
+ mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock)
__acquires(kernel_lock)
@@ -2448,7 +2559,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root;
char *cp;
const char *descr;
- int ret = -EINVAL;
+ int ret = -ENOMEM;
int blocksize;
unsigned int db_count;
unsigned int i;
@@ -2459,13 +2570,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
- return -ENOMEM;
+ goto out_free_orig;
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock) {
kfree(sbi);
- return -ENOMEM;
+ goto out_free_orig;
}
sb->s_fs_info = sbi;
sbi->s_mount_opt = 0;
@@ -2473,8 +2584,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_resgid = EXT4_DEF_RESGID;
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
sbi->s_sb_block = sb_block;
- sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
- sectors[1]);
+ if (sb->s_bdev->bd_part)
+ sbi->s_sectors_written_start =
+ part_stat_read(sb->s_bdev->bd_part, sectors[1]);
unlock_kernel();
@@ -2482,6 +2594,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
for (cp = sb->s_id; (cp = strchr(cp, '/'));)
*cp = '!';
+ ret = -EINVAL;
blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
if (!blocksize) {
ext4_msg(sb, KERN_ERR, "unable to set blocksize");
@@ -2546,6 +2659,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, ERRORS_CONT);
else
set_opt(sbi->s_mount_opt, ERRORS_RO);
+ if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
+ set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ if (def_mount_opts & EXT4_DEFM_DISCARD)
+ set_opt(sbi->s_mount_opt, DISCARD);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -2553,15 +2670,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
- set_opt(sbi->s_mount_opt, BARRIER);
+ if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
+ set_opt(sbi->s_mount_opt, BARRIER);
/*
* enable delayed allocation by default
* Use -o nodelalloc to turn it off
*/
- if (!IS_EXT3_SB(sb))
+ if (!IS_EXT3_SB(sb) &&
+ ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sbi->s_mount_opt, DELALLOC);
+ if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
+ &journal_devnum, &journal_ioprio, NULL, 0)) {
+ ext4_msg(sb, KERN_WARNING,
+ "failed to parse options in superblock: %s",
+ sbi->s_es->s_mount_opts);
+ }
if (!parse_options((char *) data, sb, &journal_devnum,
&journal_ioprio, NULL, 0))
goto failed_mount;
@@ -2912,18 +3037,7 @@ no_journal:
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount_wq;
}
- if (test_opt(sb, NOBH)) {
- if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
- ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
- "its supported only with writeback mode");
- clear_opt(sbi->s_mount_opt, NOBH);
- }
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
- "not supported with nobh mode");
- goto failed_mount_wq;
- }
- }
+
EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3010,7 +3124,7 @@ no_journal:
ext4_ext_init(sb);
err = ext4_mb_init(sb, needs_recovery);
if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
+ ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
err);
goto failed_mount4;
}
@@ -3043,7 +3157,14 @@ no_journal:
descr = "out journal";
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %s", descr, orig_data);
+ "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+ *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+
+ init_timer(&sbi->s_err_report);
+ sbi->s_err_report.function = print_daily_error_info;
+ sbi->s_err_report.data = (unsigned long) sb;
+ if (es->s_error_count)
+ mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
lock_kernel();
kfree(orig_data);
@@ -3093,6 +3214,7 @@ out_fail:
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
lock_kernel();
+out_free_orig:
kfree(orig_data);
return ret;
}
@@ -3110,7 +3232,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_min_batch_time = sbi->s_min_batch_time;
journal->j_max_batch_time = sbi->s_max_batch_time;
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER))
journal->j_flags |= JBD2_BARRIER;
else
@@ -3119,7 +3241,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
else
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
}
static journal_t *ext4_get_journal(struct super_block *sb,
@@ -3327,8 +3449,17 @@ static int ext4_load_journal(struct super_block *sb,
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
err = jbd2_journal_wipe(journal, !really_read_only);
- if (!err)
+ if (!err) {
+ char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
+ if (save)
+ memcpy(save, ((char *) es) +
+ EXT4_S_ERR_START, EXT4_S_ERR_LEN);
err = jbd2_journal_load(journal);
+ if (save)
+ memcpy(((char *) es) + EXT4_S_ERR_START,
+ save, EXT4_S_ERR_LEN);
+ kfree(save);
+ }
if (err) {
ext4_msg(sb, KERN_ERR, "error loading journal");
@@ -3384,10 +3515,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
*/
if (!(sb->s_flags & MS_RDONLY))
es->s_wtime = cpu_to_le32(get_seconds());
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
+ if (sb->s_bdev->bd_part)
+ es->s_kbytes_written =
+ cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
EXT4_SB(sb)->s_sectors_written_start) >> 1));
+ else
+ es->s_kbytes_written =
+ cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeblocks_counter));
es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
@@ -3491,7 +3626,7 @@ int ext4_force_commit(struct super_block *sb)
journal = EXT4_SB(sb)->s_journal;
if (journal) {
- vfs_check_frozen(sb, SB_FREEZE_WRITE);
+ vfs_check_frozen(sb, SB_FREEZE_TRANS);
ret = ext4_journal_force_commit(journal);
}
@@ -3616,7 +3751,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
- ext4_abort(sb, __func__, "Abort forced by user");
+ ext4_abort(sb, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3981,6 +4116,18 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return err;
}
+static int ext4_quota_off(struct super_block *sb, int type)
+{
+ /* Force all delayed allocation blocks to be allocated */
+ if (test_opt(sb, DELALLOC)) {
+ down_read(&sb->s_umount);
+ sync_filesystem(sb);
+ up_read(&sb->s_umount);
+ }
+
+ return dquot_quota_off(sb, type);
+}
+
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and noone else should touch the files)
@@ -4030,7 +4177,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
int err = 0;
int offset = off & (sb->s_blocksize - 1);
- int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
@@ -4055,24 +4201,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
bh = ext4_bread(handle, inode, blk, 1, &err);
if (!bh)
goto out;
- if (journal_quota) {
- err = ext4_journal_get_write_access(handle, bh);
- if (err) {
- brelse(bh);
- goto out;
- }
+ err = ext4_journal_get_write_access(handle, bh);
+ if (err) {
+ brelse(bh);
+ goto out;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, len);
flush_dcache_page(bh->b_page);
unlock_buffer(bh);
- if (journal_quota)
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- else {
- /* Always do at least ordered writes for quotas */
- err = ext4_jbd2_file_inode(handle, inode);
- mark_buffer_dirty(bh);
- }
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
out:
if (err) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 04338009793..a6f31424957 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -458,8 +458,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
- sb->s_dirt = 1;
- ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_super(handle, sb);
}
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f74d270ba15..9d175d623aa 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,7 +274,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
ret = copy_from_user(&owner, owner_p, sizeof(owner));
if (ret)
- return ret;
+ return -EFAULT;
switch (owner.type) {
case F_OWNER_TID:
@@ -332,8 +332,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
}
read_unlock(&filp->f_owner.lock);
- if (!ret)
+ if (!ret) {
ret = copy_to_user(owner_p, &owner, sizeof(owner));
+ if (ret)
+ ret = -EFAULT;
+ }
return ret;
}
@@ -730,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
+ unsigned long flags;
+
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- spin_lock(&fa->fa_lock);
+ spin_lock_irqsave(&fa->fa_lock, flags);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -744,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- spin_unlock(&fa->fa_lock);
+ spin_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/file.c b/fs/file.c
index 34bb7f71d99..cccaead962c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -178,7 +178,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
fdt->open_fds = (fd_set *)data;
data += nr / BITS_PER_BYTE;
fdt->close_on_exec = (fd_set *)data;
- INIT_RCU_HEAD(&fdt->rcu);
fdt->next = NULL;
return fdt;
@@ -312,7 +311,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
new_fdt->open_fds = (fd_set *)&newf->open_fds_init;
new_fdt->fd = &newf->fd_array[0];
- INIT_RCU_HEAD(&new_fdt->rcu);
new_fdt->next = NULL;
spin_lock(&oldf->file_lock);
@@ -430,7 +428,6 @@ struct files_struct init_files = {
.fd = &init_files.fd_array[0],
.close_on_exec = (fd_set *)&init_files.close_on_exec_init,
.open_fds = (fd_set *)&init_files.open_fds_init,
- .rcu = RCU_HEAD_INIT,
},
.file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
};
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 1e8af939b3e..5132c99b1ca 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -135,7 +135,7 @@ static int vxfs_remount(struct super_block *sb, int *flags, char *data)
}
/**
- * vxfs_read_super - read superblock into memory and initalize filesystem
+ * vxfs_read_super - read superblock into memory and initialize filesystem
* @sbp: VFS superblock (to fill)
* @dp: fs private mount data
* @silent: do not complain loudly when sth is wrong
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ea8592b9069..d5be1693ac9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,52 +38,18 @@ int nr_pdflush_threads;
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
-struct wb_writeback_args {
+struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
- unsigned int sb_pinned:1;
-};
-/*
- * Work items for the bdi_writeback threads
- */
-struct bdi_work {
struct list_head list; /* pending work list */
- struct rcu_head rcu_head; /* for RCU free/clear of work */
-
- unsigned long seen; /* threads that have seen this work */
- atomic_t pending; /* number of threads still to do work */
-
- struct wb_writeback_args args; /* writeback arguments */
-
- unsigned long state; /* flag bits, see WS_* */
+ struct completion *done; /* set if the caller waits */
};
-enum {
- WS_USED_B = 0,
- WS_ONSTACK_B,
-};
-
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
- return test_bit(WS_ONSTACK_B, &work->state);
-}
-
-static inline void bdi_work_init(struct bdi_work *work,
- struct wb_writeback_args *args)
-{
- INIT_RCU_HEAD(&work->rcu_head);
- work->args = *args;
- work->state = WS_USED;
-}
-
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -96,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
return !list_empty(&bdi->work_list);
}
-static void bdi_work_clear(struct bdi_work *work)
-{
- clear_bit(WS_USED_B, &work->state);
- smp_mb__after_clear_bit();
- /*
- * work can have disappeared at this point. bit waitq functions
- * should be able to tolerate this, provided bdi_sched_wait does
- * not dereference it's pointer argument.
- */
- wake_up_bit(&work->state, WS_USED_B);
-}
-
-static void bdi_work_free(struct rcu_head *head)
-{
- struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-
- if (!bdi_work_on_stack(work))
- kfree(work);
- else
- bdi_work_clear(work);
-}
-
-static void wb_work_complete(struct bdi_work *work)
-{
- const enum writeback_sync_modes sync_mode = work->args.sync_mode;
- int onstack = bdi_work_on_stack(work);
-
- /*
- * For allocated work, we can clear the done/seen bit right here.
- * For on-stack work, we need to postpone both the clear and free
- * to after the RCU grace period, since the stack could be invalidated
- * as soon as bdi_work_clear() has done the wakeup.
- */
- if (!onstack)
- bdi_work_clear(work);
- if (sync_mode == WB_SYNC_NONE || onstack)
- call_rcu(&work->rcu_head, bdi_work_free);
-}
-
-static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
-{
- /*
- * The caller has retrieved the work arguments from this work,
- * drop our reference. If this is the last ref, delete and free it
- */
- if (atomic_dec_and_test(&work->pending)) {
- struct backing_dev_info *bdi = wb->bdi;
-
- spin_lock(&bdi->wb_lock);
- list_del_rcu(&work->list);
- spin_unlock(&bdi->wb_lock);
-
- wb_work_complete(work);
- }
-}
-
-static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+static void bdi_queue_work(struct backing_dev_info *bdi,
+ struct wb_writeback_work *work)
{
- work->seen = bdi->wb_mask;
- BUG_ON(!work->seen);
- atomic_set(&work->pending, bdi->wb_cnt);
- BUG_ON(!bdi->wb_cnt);
-
- /*
- * list_add_tail_rcu() contains the necessary barriers to
- * make sure the above stores are seen before the item is
- * noticed on the list
- */
spin_lock(&bdi->wb_lock);
- list_add_tail_rcu(&work->list, &bdi->work_list);
+ list_add_tail(&work->list, &bdi->work_list);
spin_unlock(&bdi->wb_lock);
/*
@@ -182,107 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
}
}
-/*
- * Used for on-stack allocated work items. The caller needs to wait until
- * the wb threads have acked the work before it's safe to continue.
- */
-static void bdi_wait_on_work_clear(struct bdi_work *work)
-{
- wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
- TASK_UNINTERRUPTIBLE);
-}
-
-static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_args *args,
- int wait)
+static void
+__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+ bool range_cyclic, bool for_background)
{
- struct bdi_work *work;
+ struct wb_writeback_work *work;
/*
* This is WB_SYNC_NONE writeback, so if allocation fails just
* wakeup the thread for old dirty data writeback
*/
- work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (work) {
- bdi_work_init(work, args);
- bdi_queue_work(bdi, work);
- if (wait)
- bdi_wait_on_work_clear(work);
- } else {
- struct bdi_writeback *wb = &bdi->wb;
-
- if (wb->task)
- wake_up_process(wb->task);
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ if (bdi->wb.task)
+ wake_up_process(bdi->wb.task);
+ return;
}
+
+ work->sync_mode = WB_SYNC_NONE;
+ work->nr_pages = nr_pages;
+ work->range_cyclic = range_cyclic;
+ work->for_background = for_background;
+
+ bdi_queue_work(bdi, work);
}
/**
- * bdi_sync_writeback - start and wait for writeback
+ * bdi_start_writeback - start writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
+ * @nr_pages: the number of pages to write
*
* Description:
- * This does WB_SYNC_ALL data integrity writeback and waits for the
- * IO to complete. Callers must hold the sb s_umount semaphore for
- * reading, to avoid having the super disappear before we are done.
+ * This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ * started when this function returns, we make no guarentees on
+ * completion. Caller need not hold sb s_umount semaphore.
+ *
*/
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
- struct super_block *sb)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_ALL,
- .nr_pages = LONG_MAX,
- .range_cyclic = 0,
- /*
- * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
- * lets make it explicitly clear.
- */
- .sb_pinned = 1,
- };
- struct bdi_work work;
-
- bdi_work_init(&work, &args);
- work.state |= WS_ONSTACK;
-
- bdi_queue_work(bdi, &work);
- bdi_wait_on_work_clear(&work);
+ __bdi_start_writeback(bdi, nr_pages, true, false);
}
/**
- * bdi_start_writeback - start writeback
+ * bdi_start_background_writeback - start background writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
- * @nr_pages: the number of pages to write
- * @sb_locked: caller already holds sb umount sem.
*
* Description:
- * This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ * This does WB_SYNC_NONE background writeback. The IO is only
* started when this function returns, we make no guarentees on
- * completion. Caller specifies whether sb umount sem is held already or not.
- *
+ * completion. Caller need not hold sb s_umount semaphore.
*/
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
- long nr_pages, int sb_locked)
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_NONE,
- .nr_pages = nr_pages,
- .range_cyclic = 1,
- .sb_pinned = sb_locked,
- };
-
- /*
- * We treat @nr_pages=0 as the special case to do background writeback,
- * ie. to sync pages until the background dirty threshold is reached.
- */
- if (!nr_pages) {
- args.nr_pages = LONG_MAX;
- args.for_background = 1;
- }
-
- bdi_alloc_queue_work(bdi, &args, sb_locked);
+ __bdi_start_writeback(bdi, LONG_MAX, true, true);
}
/*
@@ -572,75 +425,69 @@ select_queue:
return ret;
}
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
- up_read(&sb->s_umount);
- put_super(sb);
-}
-
-enum sb_pin_state {
- SB_PINNED,
- SB_NOT_PINNED,
- SB_PIN_FAILED
-};
-
/*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
* before calling writeback. So make sure that we do pin it, so it doesn't
* go away while we are writing inodes from it.
*/
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
- struct super_block *sb)
+static bool pin_sb_for_writeback(struct super_block *sb)
{
- /*
- * Caller must already hold the ref for this
- */
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
- return SB_NOT_PINNED;
- }
spin_lock(&sb_lock);
+ if (list_empty(&sb->s_instances)) {
+ spin_unlock(&sb_lock);
+ return false;
+ }
+
sb->s_count++;
+ spin_unlock(&sb_lock);
+
if (down_read_trylock(&sb->s_umount)) {
- if (sb->s_root) {
- spin_unlock(&sb_lock);
- return SB_PINNED;
- }
- /*
- * umounted, drop rwsem again and fall through to failure
- */
+ if (sb->s_root)
+ return true;
up_read(&sb->s_umount);
}
- sb->s_count--;
- spin_unlock(&sb_lock);
- return SB_PIN_FAILED;
+
+ put_super(sb);
+ return false;
}
/*
* Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
+ *
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
-static int writeback_sb_inodes(struct super_block *sb,
- struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
+ struct writeback_control *wbc, bool only_this_sb)
{
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
- if (wbc->sb && sb != inode->i_sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- if (sb != inode->i_sb)
- /* finish with this superblock */
+
+ if (inode->i_sb != sb) {
+ if (only_this_sb) {
+ /*
+ * We only want to write back data for this
+ * superblock, move all inodes not belonging
+ * to it back onto the dirty list.
+ */
+ redirty_tail(inode);
+ continue;
+ }
+
+ /*
+ * The inode belongs to a different superblock.
+ * Bounce back to the caller to unpin this and
+ * pin the next superblock.
+ */
return 0;
+ }
+
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode);
continue;
@@ -678,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
return 1;
}
-static void writeback_inodes_wb(struct bdi_writeback *wb,
- struct writeback_control *wbc)
+void writeback_inodes_wb(struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
int ret = 0;
@@ -692,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
struct super_block *sb = inode->i_sb;
- enum sb_pin_state state;
-
- if (wbc->sb && sb != wbc->sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- state = pin_sb_for_writeback(wbc, sb);
- if (state == SB_PIN_FAILED) {
+ if (!pin_sb_for_writeback(sb)) {
requeue_io(inode);
continue;
}
- ret = writeback_sb_inodes(sb, wb, wbc);
+ ret = writeback_sb_inodes(sb, wb, wbc, false);
+ drop_super(sb);
- if (state == SB_PINNED)
- unpin_sb_for_writeback(sb);
if (ret)
break;
}
@@ -717,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
/* Leave any unwritten inodes on b_io */
}
-void writeback_inodes_wbc(struct writeback_control *wbc)
+static void __writeback_inodes_sb(struct super_block *sb,
+ struct bdi_writeback *wb, struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = wbc->bdi;
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
- writeback_inodes_wb(&bdi->wb, wbc);
+ wbc->wb_start = jiffies; /* livelock avoidance */
+ spin_lock(&inode_lock);
+ if (!wbc->for_kupdate || list_empty(&wb->b_io))
+ queue_io(wb, wbc->older_than_this);
+ writeback_sb_inodes(sb, wb, wbc, true);
+ spin_unlock(&inode_lock);
}
/*
@@ -759,17 +602,14 @@ static inline bool over_bground_thresh(void)
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
- struct wb_writeback_args *args)
+ struct wb_writeback_work *work)
{
struct writeback_control wbc = {
- .bdi = wb->bdi,
- .sb = args->sb,
- .sync_mode = args->sync_mode,
+ .sync_mode = work->sync_mode,
.older_than_this = NULL,
- .for_kupdate = args->for_kupdate,
- .for_background = args->for_background,
- .range_cyclic = args->range_cyclic,
- .sb_pinned = args->sb_pinned,
+ .for_kupdate = work->for_kupdate,
+ .for_background = work->for_background,
+ .range_cyclic = work->range_cyclic,
};
unsigned long oldest_jif;
long wrote = 0;
@@ -789,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
/*
* Stop writeback when nr_pages has been consumed
*/
- if (args->nr_pages <= 0)
+ if (work->nr_pages <= 0)
break;
/*
* For background writeout, stop when we are below the
* background dirty threshold
*/
- if (args->for_background && !over_bground_thresh())
+ if (work->for_background && !over_bground_thresh())
break;
wbc.more_io = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
- writeback_inodes_wb(wb, &wbc);
- args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+ if (work->sb)
+ __writeback_inodes_sb(work->sb, wb, &wbc);
+ else
+ writeback_inodes_wb(wb, &wbc);
+ work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
/*
@@ -839,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
}
/*
- * Return the next bdi_work struct that hasn't been processed by this
- * wb thread yet. ->seen is initially set for each thread that exists
- * for this device, when a thread first notices a piece of work it
- * clears its bit. Depending on writeback type, the thread will notify
- * completion on either receiving the work (WB_SYNC_NONE) or after
- * it is done (WB_SYNC_ALL).
+ * Return the next wb_writeback_work struct that hasn't been processed yet.
*/
-static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
- struct bdi_writeback *wb)
+static struct wb_writeback_work *
+get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
{
- struct bdi_work *work, *ret = NULL;
+ struct wb_writeback_work *work = NULL;
- rcu_read_lock();
-
- list_for_each_entry_rcu(work, &bdi->work_list, list) {
- if (!test_bit(wb->nr, &work->seen))
- continue;
- clear_bit(wb->nr, &work->seen);
-
- ret = work;
- break;
+ spin_lock(&bdi->wb_lock);
+ if (!list_empty(&bdi->work_list)) {
+ work = list_entry(bdi->work_list.next,
+ struct wb_writeback_work, list);
+ list_del_init(&work->list);
}
-
- rcu_read_unlock();
- return ret;
+ spin_unlock(&bdi->wb_lock);
+ return work;
}
static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -888,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
if (nr_pages) {
- struct wb_writeback_args args = {
+ struct wb_writeback_work work = {
.nr_pages = nr_pages,
.sync_mode = WB_SYNC_NONE,
.for_kupdate = 1,
.range_cyclic = 1,
};
- return wb_writeback(wb, &args);
+ return wb_writeback(wb, &work);
}
return 0;
@@ -907,36 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
{
struct backing_dev_info *bdi = wb->bdi;
- struct bdi_work *work;
+ struct wb_writeback_work *work;
long wrote = 0;
while ((work = get_next_work_item(bdi, wb)) != NULL) {
- struct wb_writeback_args args = work->args;
- int post_clear;
-
/*
* Override sync mode, in case we must wait for completion
+ * because this thread is exiting now.
*/
if (force_wait)
- work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
-
- post_clear = WB_SYNC_ALL || args.sb_pinned;
-
- /*
- * If this isn't a data integrity operation, just notify
- * that we have seen this work and we are now starting it.
- */
- if (!post_clear)
- wb_clear_pending(wb, work);
+ work->sync_mode = WB_SYNC_ALL;
- wrote += wb_writeback(wb, &args);
+ wrote += wb_writeback(wb, work);
/*
- * This is a data integrity writeback, so only do the
- * notification when we have completed the work.
+ * Notify the caller of completion if this is a synchronous
+ * work item, otherwise just free it.
*/
- if (post_clear)
- wb_clear_pending(wb, work);
+ if (work->done)
+ complete(work->done);
+ else
+ kfree(work);
}
/*
@@ -993,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb)
}
/*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
+ * the whole world.
*/
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .nr_pages = nr_pages,
- .sync_mode = WB_SYNC_NONE,
- };
struct backing_dev_info *bdi;
- rcu_read_lock();
+ if (!nr_pages) {
+ nr_pages = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ }
+ rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
-
- bdi_alloc_queue_work(bdi, &args, 0);
+ __bdi_start_writeback(bdi, nr_pages, false, false);
}
-
rcu_read_unlock();
}
-/*
- * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
- if (nr_pages == 0)
- nr_pages = global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS);
- bdi_writeback_all(NULL, nr_pages);
-}
-
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1220,18 +1029,6 @@ static void wait_sb_inodes(struct super_block *sb)
iput(old_inode);
}
-static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
-{
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
- long nr_to_write;
-
- nr_to_write = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
- bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
-}
-
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock
@@ -1243,21 +1040,24 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
*/
void writeback_inodes_sb(struct super_block *sb)
{
- __writeback_inodes_sb(sb, 0);
-}
-EXPORT_SYMBOL(writeback_inodes_sb);
+ unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+ unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_NONE,
+ .done = &done,
+ };
-/**
- * writeback_inodes_sb_locked - writeback dirty inodes from given super_block
- * @sb: the superblock
- *
- * Like writeback_inodes_sb(), except the caller already holds the
- * sb umount sem.
- */
-void writeback_inodes_sb_locked(struct super_block *sb)
-{
- __writeback_inodes_sb(sb, 1);
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ work.nr_pages = nr_dirty + nr_unstable +
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
}
+EXPORT_SYMBOL(writeback_inodes_sb);
/**
* writeback_inodes_sb_if_idle - start writeback if none underway
@@ -1269,7 +1069,9 @@ void writeback_inodes_sb_locked(struct super_block *sb)
int writeback_inodes_sb_if_idle(struct super_block *sb)
{
if (!writeback_in_progress(sb->s_bdi)) {
+ down_read(&sb->s_umount);
writeback_inodes_sb(sb);
+ up_read(&sb->s_umount);
return 1;
} else
return 0;
@@ -1285,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
*/
void sync_inodes_sb(struct super_block *sb)
{
- bdi_sync_writeback(sb->s_bdi, sb);
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_ALL,
+ .nr_pages = LONG_MAX,
+ .range_cyclic = 0,
+ .done = &done,
+ };
+
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
+
wait_sb_inodes(sb);
}
EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index cc94bb9563f..3f6dfa98988 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -1,7 +1,6 @@
config FSCACHE
tristate "General filesystem local caching manager"
- select SLOW_WORK
help
This option enables a generic filesystem caching manager that can be
used by various network and other filesystems to cache data locally.
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index edd7434ab6e..6a026441c5a 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -82,6 +82,14 @@ extern unsigned fscache_defer_lookup;
extern unsigned fscache_defer_create;
extern unsigned fscache_debug;
extern struct kobject *fscache_root;
+extern struct workqueue_struct *fscache_object_wq;
+extern struct workqueue_struct *fscache_op_wq;
+DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
+
+static inline bool fscache_object_congested(void)
+{
+ return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
+}
extern int fscache_wait_bit(void *);
extern int fscache_wait_bit_interruptible(void *);
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index add6bdb53f0..f9d856773f7 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -15,6 +15,7 @@
#include <linux/sched.h>
#include <linux/completion.h>
#include <linux/slab.h>
+#include <linux/seq_file.h>
#include "internal.h"
MODULE_DESCRIPTION("FS Cache Manager");
@@ -40,22 +41,105 @@ MODULE_PARM_DESC(fscache_debug,
"FS-Cache debugging mask");
struct kobject *fscache_root;
+struct workqueue_struct *fscache_object_wq;
+struct workqueue_struct *fscache_op_wq;
+
+DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
+
+/* these values serve as lower bounds, will be adjusted in fscache_init() */
+static unsigned fscache_object_max_active = 4;
+static unsigned fscache_op_max_active = 2;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *fscache_sysctl_header;
+
+static int fscache_max_active_sysctl(struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ struct workqueue_struct **wqp = table->extra1;
+ unsigned int *datap = table->data;
+ int ret;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (ret == 0)
+ workqueue_set_max_active(*wqp, *datap);
+ return ret;
+}
+
+ctl_table fscache_sysctls[] = {
+ {
+ .procname = "object_max_active",
+ .data = &fscache_object_max_active,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = fscache_max_active_sysctl,
+ .extra1 = &fscache_object_wq,
+ },
+ {
+ .procname = "operation_max_active",
+ .data = &fscache_op_max_active,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = fscache_max_active_sysctl,
+ .extra1 = &fscache_op_wq,
+ },
+ {}
+};
+
+ctl_table fscache_sysctls_root[] = {
+ {
+ .procname = "fscache",
+ .mode = 0555,
+ .child = fscache_sysctls,
+ },
+ {}
+};
+#endif
/*
* initialise the fs caching module
*/
static int __init fscache_init(void)
{
+ unsigned int nr_cpus = num_possible_cpus();
+ unsigned int cpu;
int ret;
- ret = slow_work_register_user(THIS_MODULE);
- if (ret < 0)
- goto error_slow_work;
+ fscache_object_max_active =
+ clamp_val(nr_cpus,
+ fscache_object_max_active, WQ_UNBOUND_MAX_ACTIVE);
+
+ ret = -ENOMEM;
+ fscache_object_wq = alloc_workqueue("fscache_object", WQ_UNBOUND,
+ fscache_object_max_active);
+ if (!fscache_object_wq)
+ goto error_object_wq;
+
+ fscache_op_max_active =
+ clamp_val(fscache_object_max_active / 2,
+ fscache_op_max_active, WQ_UNBOUND_MAX_ACTIVE);
+
+ ret = -ENOMEM;
+ fscache_op_wq = alloc_workqueue("fscache_operation", WQ_UNBOUND,
+ fscache_op_max_active);
+ if (!fscache_op_wq)
+ goto error_op_wq;
+
+ for_each_possible_cpu(cpu)
+ init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
ret = fscache_proc_init();
if (ret < 0)
goto error_proc;
+#ifdef CONFIG_SYSCTL
+ ret = -ENOMEM;
+ fscache_sysctl_header = register_sysctl_table(fscache_sysctls_root);
+ if (!fscache_sysctl_header)
+ goto error_sysctl;
+#endif
+
fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar",
sizeof(struct fscache_cookie),
0,
@@ -78,10 +162,16 @@ static int __init fscache_init(void)
error_kobj:
kmem_cache_destroy(fscache_cookie_jar);
error_cookie_jar:
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(fscache_sysctl_header);
+error_sysctl:
+#endif
fscache_proc_cleanup();
error_proc:
- slow_work_unregister_user(THIS_MODULE);
-error_slow_work:
+ destroy_workqueue(fscache_op_wq);
+error_op_wq:
+ destroy_workqueue(fscache_object_wq);
+error_object_wq:
return ret;
}
@@ -96,8 +186,12 @@ static void __exit fscache_exit(void)
kobject_put(fscache_root);
kmem_cache_destroy(fscache_cookie_jar);
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(fscache_sysctl_header);
+#endif
fscache_proc_cleanup();
- slow_work_unregister_user(THIS_MODULE);
+ destroy_workqueue(fscache_op_wq);
+ destroy_workqueue(fscache_object_wq);
printk(KERN_NOTICE "FS-Cache: Unloaded\n");
}
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 4a8eb31c533..ebe29c58138 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -34,8 +34,8 @@ struct fscache_objlist_data {
#define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */
#define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */
#define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */
-#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with slow work */
-#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without slow work */
+#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with work */
+#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without work */
u8 buf[512]; /* key and aux data buffer */
};
@@ -231,12 +231,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
READS, NOREADS);
FILTER(obj->events & obj->event_mask,
EVENTS, NOEVENTS);
- FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW),
- WORK, NOWORK);
+ FILTER(work_busy(&obj->work), WORK, NOWORK);
}
seq_printf(m,
- "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ",
+ "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1x | ",
obj->debug_id,
obj->parent ? obj->parent->debug_id : -1,
fscache_object_states_short[obj->state],
@@ -249,7 +248,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK,
obj->events,
obj->flags,
- obj->work.flags);
+ work_busy(&obj->work));
no_cookie = true;
keylen = auxlen = 0;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 0b589a9b4ff..b6b897c550a 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -14,7 +14,6 @@
#define FSCACHE_DEBUG_LEVEL COOKIE
#include <linux/module.h>
-#include <linux/seq_file.h>
#include "internal.h"
const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
@@ -50,12 +49,8 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
[FSCACHE_OBJECT_DEAD] = "DEAD",
};
-static void fscache_object_slow_work_put_ref(struct slow_work *);
-static int fscache_object_slow_work_get_ref(struct slow_work *);
-static void fscache_object_slow_work_execute(struct slow_work *);
-#ifdef CONFIG_SLOW_WORK_DEBUG
-static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
-#endif
+static int fscache_get_object(struct fscache_object *);
+static void fscache_put_object(struct fscache_object *);
static void fscache_initialise_object(struct fscache_object *);
static void fscache_lookup_object(struct fscache_object *);
static void fscache_object_available(struct fscache_object *);
@@ -64,17 +59,6 @@ static void fscache_withdraw_object(struct fscache_object *);
static void fscache_enqueue_dependents(struct fscache_object *);
static void fscache_dequeue_object(struct fscache_object *);
-const struct slow_work_ops fscache_object_slow_work_ops = {
- .owner = THIS_MODULE,
- .get_ref = fscache_object_slow_work_get_ref,
- .put_ref = fscache_object_slow_work_put_ref,
- .execute = fscache_object_slow_work_execute,
-#ifdef CONFIG_SLOW_WORK_DEBUG
- .desc = fscache_object_slow_work_desc,
-#endif
-};
-EXPORT_SYMBOL(fscache_object_slow_work_ops);
-
/*
* we need to notify the parent when an op completes that we had outstanding
* upon it
@@ -345,7 +329,7 @@ unsupported_event:
/*
* execute an object
*/
-static void fscache_object_slow_work_execute(struct slow_work *work)
+void fscache_object_work_func(struct work_struct *work)
{
struct fscache_object *object =
container_of(work, struct fscache_object, work);
@@ -359,23 +343,9 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
if (object->events & object->event_mask)
fscache_enqueue_object(object);
clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
+ fscache_put_object(object);
}
-
-/*
- * describe an object for slow-work debugging
- */
-#ifdef CONFIG_SLOW_WORK_DEBUG
-static void fscache_object_slow_work_desc(struct slow_work *work,
- struct seq_file *m)
-{
- struct fscache_object *object =
- container_of(work, struct fscache_object, work);
-
- seq_printf(m, "FSC: OBJ%x: %s",
- object->debug_id,
- fscache_object_states_short[object->state]);
-}
-#endif
+EXPORT_SYMBOL(fscache_object_work_func);
/*
* initialise an object
@@ -393,7 +363,6 @@ static void fscache_initialise_object(struct fscache_object *object)
_enter("");
ASSERT(object->cookie != NULL);
ASSERT(object->cookie->parent != NULL);
- ASSERT(list_empty(&object->work.link));
if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) |
(1 << FSCACHE_OBJECT_EV_RELEASE) |
@@ -671,10 +640,8 @@ static void fscache_drop_object(struct fscache_object *object)
object->parent = NULL;
}
- /* this just shifts the object release to the slow work processor */
- fscache_stat(&fscache_n_cop_put_object);
- object->cache->ops->put_object(object);
- fscache_stat_d(&fscache_n_cop_put_object);
+ /* this just shifts the object release to the work processor */
+ fscache_put_object(object);
_leave("");
}
@@ -758,12 +725,10 @@ void fscache_withdrawing_object(struct fscache_cache *cache,
}
/*
- * allow the slow work item processor to get a ref on an object
+ * get a ref on an object
*/
-static int fscache_object_slow_work_get_ref(struct slow_work *work)
+static int fscache_get_object(struct fscache_object *object)
{
- struct fscache_object *object =
- container_of(work, struct fscache_object, work);
int ret;
fscache_stat(&fscache_n_cop_grab_object);
@@ -773,13 +738,10 @@ static int fscache_object_slow_work_get_ref(struct slow_work *work)
}
/*
- * allow the slow work item processor to discard a ref on a work item
+ * discard a ref on a work item
*/
-static void fscache_object_slow_work_put_ref(struct slow_work *work)
+static void fscache_put_object(struct fscache_object *object)
{
- struct fscache_object *object =
- container_of(work, struct fscache_object, work);
-
fscache_stat(&fscache_n_cop_put_object);
object->cache->ops->put_object(object);
fscache_stat_d(&fscache_n_cop_put_object);
@@ -792,8 +754,48 @@ void fscache_enqueue_object(struct fscache_object *object)
{
_enter("{OBJ%x}", object->debug_id);
- slow_work_enqueue(&object->work);
+ if (fscache_get_object(object) >= 0) {
+ wait_queue_head_t *cong_wq =
+ &get_cpu_var(fscache_object_cong_wait);
+
+ if (queue_work(fscache_object_wq, &object->work)) {
+ if (fscache_object_congested())
+ wake_up(cong_wq);
+ } else
+ fscache_put_object(object);
+
+ put_cpu_var(fscache_object_cong_wait);
+ }
+}
+
+/**
+ * fscache_object_sleep_till_congested - Sleep until object wq is congested
+ * @timoutp: Scheduler sleep timeout
+ *
+ * Allow an object handler to sleep until the object workqueue is congested.
+ *
+ * The caller must set up a wake up event before calling this and must have set
+ * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own
+ * condition before calling this function as no test is made here.
+ *
+ * %true is returned if the object wq is congested, %false otherwise.
+ */
+bool fscache_object_sleep_till_congested(signed long *timeoutp)
+{
+ wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait);
+ DEFINE_WAIT(wait);
+
+ if (fscache_object_congested())
+ return true;
+
+ add_wait_queue_exclusive(cong_wq, &wait);
+ if (!fscache_object_congested())
+ *timeoutp = schedule_timeout(*timeoutp);
+ finish_wait(cong_wq, &wait);
+
+ return fscache_object_congested();
}
+EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested);
/*
* enqueue the dependents of an object for metadata-type processing
@@ -819,9 +821,7 @@ static void fscache_enqueue_dependents(struct fscache_object *object)
/* sort onto appropriate lists */
fscache_enqueue_object(dep);
- fscache_stat(&fscache_n_cop_put_object);
- dep->cache->ops->put_object(dep);
- fscache_stat_d(&fscache_n_cop_put_object);
+ fscache_put_object(dep);
if (!list_empty(&object->dependents))
cond_resched_lock(&object->lock);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index f17cecafae4..b9f34eaede0 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -42,16 +42,12 @@ void fscache_enqueue_operation(struct fscache_operation *op)
fscache_stat(&fscache_n_op_enqueue);
switch (op->flags & FSCACHE_OP_TYPE) {
- case FSCACHE_OP_FAST:
- _debug("queue fast");
+ case FSCACHE_OP_ASYNC:
+ _debug("queue async");
atomic_inc(&op->usage);
- if (!schedule_work(&op->fast_work))
+ if (!queue_work(fscache_op_wq, &op->work))
fscache_put_operation(op);
break;
- case FSCACHE_OP_SLOW:
- _debug("queue slow");
- slow_work_enqueue(&op->slow_work);
- break;
case FSCACHE_OP_MYTHREAD:
_debug("queue for caller's attention");
break;
@@ -455,36 +451,13 @@ void fscache_operation_gc(struct work_struct *work)
}
/*
- * allow the slow work item processor to get a ref on an operation
- */
-static int fscache_op_get_ref(struct slow_work *work)
-{
- struct fscache_operation *op =
- container_of(work, struct fscache_operation, slow_work);
-
- atomic_inc(&op->usage);
- return 0;
-}
-
-/*
- * allow the slow work item processor to discard a ref on an operation
- */
-static void fscache_op_put_ref(struct slow_work *work)
-{
- struct fscache_operation *op =
- container_of(work, struct fscache_operation, slow_work);
-
- fscache_put_operation(op);
-}
-
-/*
- * execute an operation using the slow thread pool to provide processing context
- * - the caller holds a ref to this object, so we don't need to hold one
+ * execute an operation using fs_op_wq to provide processing context -
+ * the caller holds a ref to this object, so we don't need to hold one
*/
-static void fscache_op_execute(struct slow_work *work)
+void fscache_op_work_func(struct work_struct *work)
{
struct fscache_operation *op =
- container_of(work, struct fscache_operation, slow_work);
+ container_of(work, struct fscache_operation, work);
unsigned long start;
_enter("{OBJ%x OP%x,%d}",
@@ -494,31 +467,7 @@ static void fscache_op_execute(struct slow_work *work)
start = jiffies;
op->processor(op);
fscache_hist(fscache_ops_histogram, start);
+ fscache_put_operation(op);
_leave("");
}
-
-/*
- * describe an operation for slow-work debugging
- */
-#ifdef CONFIG_SLOW_WORK_DEBUG
-static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
-{
- struct fscache_operation *op =
- container_of(work, struct fscache_operation, slow_work);
-
- seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx",
- op->object->debug_id, op->debug_id,
- op->name, op->state, op->flags);
-}
-#endif
-
-const struct slow_work_ops fscache_op_slow_work_ops = {
- .owner = THIS_MODULE,
- .get_ref = fscache_op_get_ref,
- .put_ref = fscache_op_put_ref,
- .execute = fscache_op_execute,
-#ifdef CONFIG_SLOW_WORK_DEBUG
- .desc = fscache_op_desc,
-#endif
-};
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 47aefd376e5..41c441c2058 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -105,7 +105,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
page_busy:
/* we might want to wait here, but that could deadlock the allocator as
- * the slow-work threads writing to the cache may all end up sleeping
+ * the work threads writing to the cache may all end up sleeping
* on memory allocation */
fscache_stat(&fscache_n_store_vmscan_busy);
return false;
@@ -188,9 +188,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
return -ENOMEM;
}
- fscache_operation_init(op, NULL);
- fscache_operation_init_slow(op, fscache_attr_changed_op);
- op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE);
+ fscache_operation_init(op, fscache_attr_changed_op, NULL);
+ op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
fscache_set_op_name(op, "Attr");
spin_lock(&cookie->lock);
@@ -218,24 +217,6 @@ nobufs:
EXPORT_SYMBOL(__fscache_attr_changed);
/*
- * handle secondary execution given to a retrieval op on behalf of the
- * cache
- */
-static void fscache_retrieval_work(struct work_struct *work)
-{
- struct fscache_retrieval *op =
- container_of(work, struct fscache_retrieval, op.fast_work);
- unsigned long start;
-
- _enter("{OP%x}", op->op.debug_id);
-
- start = jiffies;
- op->op.processor(&op->op);
- fscache_hist(fscache_ops_histogram, start);
- fscache_put_operation(&op->op);
-}
-
-/*
* release a retrieval op reference
*/
static void fscache_release_retrieval_op(struct fscache_operation *_op)
@@ -269,13 +250,12 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
return NULL;
}
- fscache_operation_init(&op->op, fscache_release_retrieval_op);
+ fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op);
op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING);
op->mapping = mapping;
op->end_io_func = end_io_func;
op->context = context;
op->start_time = jiffies;
- INIT_WORK(&op->op.fast_work, fscache_retrieval_work);
INIT_LIST_HEAD(&op->to_do);
fscache_set_op_name(&op->op, "Retr");
return op;
@@ -710,30 +690,26 @@ static void fscache_write_op(struct fscache_operation *_op)
goto superseded;
}
- if (page) {
- radix_tree_tag_set(&cookie->stores, page->index,
- FSCACHE_COOKIE_STORING_TAG);
- radix_tree_tag_clear(&cookie->stores, page->index,
- FSCACHE_COOKIE_PENDING_TAG);
- }
+ radix_tree_tag_set(&cookie->stores, page->index,
+ FSCACHE_COOKIE_STORING_TAG);
+ radix_tree_tag_clear(&cookie->stores, page->index,
+ FSCACHE_COOKIE_PENDING_TAG);
spin_unlock(&cookie->stores_lock);
spin_unlock(&object->lock);
- if (page) {
- fscache_set_op_state(&op->op, "Store");
- fscache_stat(&fscache_n_store_pages);
- fscache_stat(&fscache_n_cop_write_page);
- ret = object->cache->ops->write_page(op, page);
- fscache_stat_d(&fscache_n_cop_write_page);
- fscache_set_op_state(&op->op, "EndWrite");
- fscache_end_page_write(object, page);
- if (ret < 0) {
- fscache_set_op_state(&op->op, "Abort");
- fscache_abort_object(object);
- } else {
- fscache_enqueue_operation(&op->op);
- }
+ fscache_set_op_state(&op->op, "Store");
+ fscache_stat(&fscache_n_store_pages);
+ fscache_stat(&fscache_n_cop_write_page);
+ ret = object->cache->ops->write_page(op, page);
+ fscache_stat_d(&fscache_n_cop_write_page);
+ fscache_set_op_state(&op->op, "EndWrite");
+ fscache_end_page_write(object, page);
+ if (ret < 0) {
+ fscache_set_op_state(&op->op, "Abort");
+ fscache_abort_object(object);
+ } else {
+ fscache_enqueue_operation(&op->op);
}
_leave("");
@@ -799,9 +775,9 @@ int __fscache_write_page(struct fscache_cookie *cookie,
if (!op)
goto nomem;
- fscache_operation_init(&op->op, fscache_release_write_op);
- fscache_operation_init_slow(&op->op, fscache_write_op);
- op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING);
+ fscache_operation_init(&op->op, fscache_write_op,
+ fscache_release_write_op);
+ op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
fscache_set_op_name(&op->op, "Write1");
ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
@@ -856,7 +832,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
fscache_stat(&fscache_n_store_ops);
fscache_stat(&fscache_n_stores_ok);
- /* the slow work queue now carries its own ref on the object */
+ /* the work queue now carries its own ref on the object */
fscache_put_operation(&op->op);
_leave(" = 0");
return 0;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9424796d663..69ad053ffd7 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -239,7 +239,6 @@ static u64 fuse_get_unique(struct fuse_conn *fc)
static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
{
- req->in.h.unique = fuse_get_unique(fc);
req->in.h.len = sizeof(struct fuse_in_header) +
len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
list_add_tail(&req->list, &fc->pending);
@@ -261,6 +260,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
req = list_entry(fc->bg_queue.next, struct fuse_req, list);
list_del(&req->list);
fc->active_background++;
+ req->in.h.unique = fuse_get_unique(fc);
queue_request(fc, req);
}
}
@@ -398,6 +398,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
else if (fc->conn_error)
req->out.h.error = -ECONNREFUSED;
else {
+ req->in.h.unique = fuse_get_unique(fc);
queue_request(fc, req);
/* acquire extra reference, since request is still needed
after request_end() */
@@ -450,6 +451,23 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
}
EXPORT_SYMBOL_GPL(fuse_request_send_background);
+static int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ struct fuse_req *req, u64 unique)
+{
+ int err = -ENODEV;
+
+ req->isreply = 0;
+ req->in.h.unique = unique;
+ spin_lock(&fc->lock);
+ if (fc->connected) {
+ queue_request(fc, req);
+ err = 0;
+ }
+ spin_unlock(&fc->lock);
+
+ return err;
+}
+
/*
* Called under fc->lock
*
@@ -535,13 +553,13 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
if (!cs->write) {
buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
} else {
- kunmap_atomic(cs->mapaddr, KM_USER0);
+ kunmap(buf->page);
buf->len = PAGE_SIZE - cs->len;
}
cs->currbuf = NULL;
cs->mapaddr = NULL;
} else if (cs->mapaddr) {
- kunmap_atomic(cs->mapaddr, KM_USER0);
+ kunmap(cs->pg);
if (cs->write) {
flush_dcache_page(cs->pg);
set_page_dirty_lock(cs->pg);
@@ -572,7 +590,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
- cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+ cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
cs->len = buf->len;
cs->buf = cs->mapaddr + buf->offset;
cs->pipebufs++;
@@ -592,7 +610,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
buf->len = 0;
cs->currbuf = buf;
- cs->mapaddr = kmap_atomic(page, KM_USER0);
+ cs->mapaddr = kmap(page);
cs->buf = cs->mapaddr;
cs->len = PAGE_SIZE;
cs->pipebufs++;
@@ -611,7 +629,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
return err;
BUG_ON(err != 1);
offset = cs->addr % PAGE_SIZE;
- cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
+ cs->mapaddr = kmap(cs->pg);
cs->buf = cs->mapaddr + offset;
cs->len = min(PAGE_SIZE - offset, cs->seglen);
cs->seglen -= cs->len;
@@ -1231,6 +1249,199 @@ err:
return err;
}
+static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_store_out outarg;
+ struct inode *inode;
+ struct address_space *mapping;
+ u64 nodeid;
+ int err;
+ pgoff_t index;
+ unsigned int offset;
+ unsigned int num;
+ loff_t file_size;
+ loff_t end;
+
+ err = -EINVAL;
+ if (size < sizeof(outarg))
+ goto out_finish;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto out_finish;
+
+ err = -EINVAL;
+ if (size - sizeof(outarg) != outarg.size)
+ goto out_finish;
+
+ nodeid = outarg.nodeid;
+
+ down_read(&fc->killsb);
+
+ err = -ENOENT;
+ if (!fc->sb)
+ goto out_up_killsb;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (!inode)
+ goto out_up_killsb;
+
+ mapping = inode->i_mapping;
+ index = outarg.offset >> PAGE_CACHE_SHIFT;
+ offset = outarg.offset & ~PAGE_CACHE_MASK;
+ file_size = i_size_read(inode);
+ end = outarg.offset + outarg.size;
+ if (end > file_size) {
+ file_size = end;
+ fuse_write_update_size(inode, file_size);
+ }
+
+ num = outarg.size;
+ while (num) {
+ struct page *page;
+ unsigned int this_num;
+
+ err = -ENOMEM;
+ page = find_or_create_page(mapping, index,
+ mapping_gfp_mask(mapping));
+ if (!page)
+ goto out_iput;
+
+ this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ err = fuse_copy_page(cs, &page, offset, this_num, 0);
+ if (!err && offset == 0 && (num != 0 || file_size == end))
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (err)
+ goto out_iput;
+
+ num -= this_num;
+ offset = 0;
+ index++;
+ }
+
+ err = 0;
+
+out_iput:
+ iput(inode);
+out_up_killsb:
+ up_read(&fc->killsb);
+out_finish:
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ int i;
+
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+ page_cache_release(page);
+ }
+}
+
+static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ int err;
+ struct address_space *mapping = inode->i_mapping;
+ struct fuse_req *req;
+ pgoff_t index;
+ loff_t file_size;
+ unsigned int num;
+ unsigned int offset;
+ size_t total_len;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ offset = outarg->offset & ~PAGE_CACHE_MASK;
+
+ req->in.h.opcode = FUSE_NOTIFY_REPLY;
+ req->in.h.nodeid = outarg->nodeid;
+ req->in.numargs = 2;
+ req->in.argpages = 1;
+ req->page_offset = offset;
+ req->end = fuse_retrieve_end;
+
+ index = outarg->offset >> PAGE_CACHE_SHIFT;
+ file_size = i_size_read(inode);
+ num = outarg->size;
+ if (outarg->offset > file_size)
+ num = 0;
+ else if (outarg->offset + num > file_size)
+ num = file_size - outarg->offset;
+
+ while (num) {
+ struct page *page;
+ unsigned int this_num;
+
+ page = find_get_page(mapping, index);
+ if (!page)
+ break;
+
+ this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ req->pages[req->num_pages] = page;
+ req->num_pages++;
+
+ num -= this_num;
+ total_len += this_num;
+ }
+ req->misc.retrieve_in.offset = outarg->offset;
+ req->misc.retrieve_in.size = total_len;
+ req->in.args[0].size = sizeof(req->misc.retrieve_in);
+ req->in.args[0].value = &req->misc.retrieve_in;
+ req->in.args[1].size = total_len;
+
+ err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+ if (err)
+ fuse_retrieve_end(fc, req);
+
+ return err;
+}
+
+static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_retrieve_out outarg;
+ struct inode *inode;
+ int err;
+
+ err = -EINVAL;
+ if (size != sizeof(outarg))
+ goto copy_finish;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto copy_finish;
+
+ fuse_copy_finish(cs);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (fc->sb) {
+ u64 nodeid = outarg.nodeid;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (inode) {
+ err = fuse_retrieve(fc, inode, &outarg);
+ iput(inode);
+ }
+ }
+ up_read(&fc->killsb);
+
+ return err;
+
+copy_finish:
+ fuse_copy_finish(cs);
+ return err;
+}
+
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
@@ -1244,6 +1455,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_INVAL_ENTRY:
return fuse_notify_inval_entry(fc, size, cs);
+ case FUSE_NOTIFY_STORE:
+ return fuse_notify_store(fc, size, cs);
+
+ case FUSE_NOTIFY_RETRIEVE:
+ return fuse_notify_retrieve(fc, size, cs);
+
default:
fuse_copy_finish(cs);
return -EINVAL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 3cdc5f78a40..431be0795b6 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1016,7 +1016,7 @@ static int fuse_permission(struct inode *inode, int mask)
exist. So if permissions are revoked this won't be
noticed immediately, only after the attribute
timeout has expired */
- } else if (mask & MAY_ACCESS) {
+ } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
err = fuse_access(inode, mask);
} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ada0adeb3bb..147c1f71bdb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -706,7 +706,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
return 0;
}
-static void fuse_write_update_size(struct inode *inode, loff_t pos)
+void fuse_write_update_size(struct inode *inode, loff_t pos)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 8f309f04064..57d4a3a0f10 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,6 +272,7 @@ struct fuse_req {
struct fuse_write_in in;
struct fuse_write_out out;
} write;
+ struct fuse_notify_retrieve_in retrieve_in;
struct fuse_lk_in lk_in;
} misc;
@@ -748,4 +749,6 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
unsigned fuse_file_poll(struct file *file, poll_table *wait);
int fuse_dev_release(struct inode *inode, struct file *file);
+void fuse_write_update_size(struct inode *inode, loff_t pos);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index a47b4310711..cc966552214 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -7,7 +7,6 @@ config GFS2_FS
select IP_SCTP if DLM_SCTP
select FS_POSIX_ACL
select CRC32
- select SLOW_WORK
select QUOTACTL
help
A cluster filesystem.
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 9f8b52500d6..5e96cbd8a45 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -136,10 +136,7 @@ static int gfs2_writeback_writepage(struct page *page,
if (ret <= 0)
return ret;
- ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc);
- if (ret == -EAGAIN)
- ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
- return ret;
+ return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
}
/**
@@ -637,9 +634,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
}
}
- error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
- if (error)
- goto out_unlock;
+ alloc_required = gfs2_write_alloc_required(ip, pos, len);
if (alloc_required || gfs2_is_jdata(ip))
gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4a48c0f4b40..6f482809d1a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1040,7 +1040,8 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
goto out;
if (gfs2_is_stuffed(ip)) {
- u64 dsize = size + sizeof(struct gfs2_inode);
+ u64 dsize = size + sizeof(struct gfs2_dinode);
+ ip->i_disksize = size;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1243,13 +1244,12 @@ int gfs2_file_dealloc(struct gfs2_inode *ip)
* @ip: the file being written to
* @offset: the offset to write to
* @len: the number of bytes being written
- * @alloc_required: set to 1 if an alloc is required, 0 otherwise
*
- * Returns: errno
+ * Returns: 1 if an alloc is required, 0 otherwise
*/
int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
- unsigned int len, int *alloc_required)
+ unsigned int len)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head bh;
@@ -1257,26 +1257,23 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
u64 lblock, lblock_stop, size;
u64 end_of_file;
- *alloc_required = 0;
-
if (!len)
return 0;
if (gfs2_is_stuffed(ip)) {
if (offset + len >
sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
- *alloc_required = 1;
+ return 1;
return 0;
}
- *alloc_required = 1;
shift = sdp->sd_sb.sb_bsize_shift;
BUG_ON(gfs2_is_dir(ip));
end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift;
lblock = offset >> shift;
lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
if (lblock_stop > end_of_file)
- return 0;
+ return 1;
size = (lblock_stop - lblock) << shift;
do {
@@ -1284,12 +1281,11 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
bh.b_size = size;
gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
if (!buffer_mapped(&bh))
- return 0;
+ return 1;
size -= bh.b_size;
lblock += (bh.b_size >> ip->i_inode.i_blkbits);
} while(size > 0);
- *alloc_required = 0;
return 0;
}
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index c983177e05a..a20a5213135 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -52,6 +52,6 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
int gfs2_truncatei_resume(struct gfs2_inode *ip);
int gfs2_file_dealloc(struct gfs2_inode *ip);
int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
- unsigned int len, int *alloc_required);
+ unsigned int len);
#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8295c5b5d4a..b9dd88a78dd 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
unsigned totlen = be16_to_cpu(dent->de_rec_len);
if (gfs2_dirent_sentinel(dent))
- actual = GFS2_DIRENT_SIZE(0);
+ actual = 0;
if (totlen - actual >= required)
return 1;
return 0;
@@ -955,7 +955,12 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
/* Change the pointers.
Don't bother distinguishing stuffed from non-stuffed.
This code is complicated enough already. */
- lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL);
+ lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS);
+ if (!lp) {
+ error = -ENOMEM;
+ goto fail_brelse;
+ }
+
/* Change the pointers */
for (x = 0; x < half_len; x++)
lp[x] = cpu_to_be64(bn);
@@ -1063,7 +1068,9 @@ static int dir_double_exhash(struct gfs2_inode *dip)
/* Allocate both the "from" and "to" buffers in one big chunk */
- buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL);
+ buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS);
+ if (!buf)
+ return -ENOMEM;
for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) {
error = gfs2_dir_read_data(dip, (char *)buf,
@@ -1231,6 +1238,25 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
return 0;
}
+static void *gfs2_alloc_sort_buffer(unsigned size)
+{
+ void *ptr = NULL;
+
+ if (size < KMALLOC_MAX_SIZE)
+ ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN);
+ if (!ptr)
+ ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL);
+ return ptr;
+}
+
+static void gfs2_free_sort_buffer(void *ptr)
+{
+ if (is_vmalloc_addr(ptr))
+ vfree(ptr);
+ else
+ kfree(ptr);
+}
+
static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
filldir_t filldir, int *copied, unsigned *depth,
u64 leaf_no)
@@ -1271,7 +1297,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
* 99 is the maximum number of entries that can fit in a single
* leaf block.
*/
- larr = vmalloc((leaves + entries + 99) * sizeof(void *));
+ larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *));
if (!larr)
goto out;
darr = (const struct gfs2_dirent **)(larr + leaves);
@@ -1282,7 +1308,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
do {
error = get_leaf(ip, lfn, &bh);
if (error)
- goto out_kfree;
+ goto out_free;
lf = (struct gfs2_leaf *)bh->b_data;
lfn = be64_to_cpu(lf->lf_next);
if (lf->lf_entries) {
@@ -1291,7 +1317,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
gfs2_dirent_gather, NULL, &g);
error = PTR_ERR(dent);
if (IS_ERR(dent))
- goto out_kfree;
+ goto out_free;
if (entries2 != g.offset) {
fs_warn(sdp, "Number of entries corrupt in dir "
"leaf %llu, entries2 (%u) != "
@@ -1300,7 +1326,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
entries2, g.offset);
error = -EIO;
- goto out_kfree;
+ goto out_free;
}
error = 0;
larr[leaf++] = bh;
@@ -1312,10 +1338,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
BUG_ON(entries2 != entries);
error = do_filldir_main(ip, offset, opaque, filldir, darr,
entries, copied);
-out_kfree:
+out_free:
for(i = 0; i < leaf; i++)
brelse(larr[i]);
- vfree(larr);
+ gfs2_free_sort_buffer(larr);
out:
return error;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ed9a94f0ef1..4edd662c823 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -351,7 +351,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long last_index;
u64 pos = page->index << PAGE_CACHE_SHIFT;
unsigned int data_blocks, ind_blocks, rblocks;
- int alloc_required = 0;
struct gfs2_holder gh;
struct gfs2_alloc *al;
int ret;
@@ -364,8 +363,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
- ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
- if (ret || !alloc_required)
+ if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE))
goto out_unlock;
ret = -ENOMEM;
al = gfs2_alloc_get(ip);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddcdbf49353..9adf8f924e0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -328,6 +328,30 @@ static void gfs2_holder_wake(struct gfs2_holder *gh)
}
/**
+ * do_error - Something unexpected has happened during a lock request
+ *
+ */
+
+static inline void do_error(struct gfs2_glock *gl, const int ret)
+{
+ struct gfs2_holder *gh, *tmp;
+
+ list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
+ if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+ continue;
+ if (ret & LM_OUT_ERROR)
+ gh->gh_error = -EIO;
+ else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
+ gh->gh_error = GLR_TRYFAILED;
+ else
+ continue;
+ list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
+ gfs2_holder_wake(gh);
+ }
+}
+
+/**
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
*
@@ -375,36 +399,13 @@ restart:
}
if (gh->gh_list.prev == &gl->gl_holders)
return 1;
+ do_error(gl, 0);
break;
}
return 0;
}
/**
- * do_error - Something unexpected has happened during a lock request
- *
- */
-
-static inline void do_error(struct gfs2_glock *gl, const int ret)
-{
- struct gfs2_holder *gh, *tmp;
-
- list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
- if (test_bit(HIF_HOLDER, &gh->gh_iflags))
- continue;
- if (ret & LM_OUT_ERROR)
- gh->gh_error = -EIO;
- else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
- gh->gh_error = GLR_TRYFAILED;
- else
- continue;
- list_del_init(&gh->gh_list);
- trace_gfs2_glock_queue(gh, 0);
- gfs2_holder_wake(gh);
- }
-}
-
-/**
* find_first_waiter - find the first gh that's waiting for the glock
* @gl: the glock
*/
@@ -1062,6 +1063,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
spin_lock(&gl->gl_spin);
add_to_queue(gh);
+ if ((LM_FLAG_NOEXP & gh->gh_flags) &&
+ test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
run_queue(gl, 1);
spin_unlock(&gl->gl_spin);
@@ -1319,6 +1323,36 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
}
/**
+ * gfs2_should_freeze - Figure out if glock should be frozen
+ * @gl: The glock in question
+ *
+ * Glocks are not frozen if (a) the result of the dlm operation is
+ * an error, (b) the locking operation was an unlock operation or
+ * (c) if there is a "noexp" flagged request anywhere in the queue
+ *
+ * Returns: 1 if freezing should occur, 0 otherwise
+ */
+
+static int gfs2_should_freeze(const struct gfs2_glock *gl)
+{
+ const struct gfs2_holder *gh;
+
+ if (gl->gl_reply & ~LM_OUT_ST_MASK)
+ return 0;
+ if (gl->gl_target == LM_ST_UNLOCKED)
+ return 0;
+
+ list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+ if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+ continue;
+ if (LM_FLAG_NOEXP & gh->gh_flags)
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
* gfs2_glock_complete - Callback used by locking
* @gl: Pointer to the glock
* @ret: The return value from the dlm
@@ -1328,18 +1362,17 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
{
struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+
gl->gl_reply = ret;
+
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
- struct gfs2_holder *gh;
spin_lock(&gl->gl_spin);
- gh = find_first_waiter(gl);
- if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) &&
- (gl->gl_target != LM_ST_UNLOCKED)) ||
- ((ret & ~LM_OUT_ST_MASK) != 0))
+ if (gfs2_should_freeze(gl)) {
set_bit(GLF_FROZEN, &gl->gl_flags);
- spin_unlock(&gl->gl_spin);
- if (test_bit(GLF_FROZEN, &gl->gl_flags))
+ spin_unlock(&gl->gl_spin);
return;
+ }
+ spin_unlock(&gl->gl_spin);
}
set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
gfs2_glock_hold(gl);
@@ -1348,7 +1381,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
}
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_glock *gl;
int may_demote;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b5d7363b22d..fdbf4b366fa 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -12,7 +12,6 @@
#include <linux/fs.h>
#include <linux/workqueue.h>
-#include <linux/slow-work.h>
#include <linux/dlm.h>
#include <linux/buffer_head.h>
@@ -383,7 +382,7 @@ struct gfs2_journal_extent {
struct gfs2_jdesc {
struct list_head jd_list;
struct list_head extent_list;
- struct slow_work jd_work;
+ struct work_struct jd_work;
struct inode *jd_inode;
unsigned long jd_flags;
#define JDF_RECOVERY 1
@@ -460,6 +459,7 @@ enum {
SDF_NOBARRIERS = 3,
SDF_NORECOVERY = 4,
SDF_DEMOTE = 5,
+ SDF_NOJOURNALID = 6,
};
#define GFS2_FSNAME_LEN 256
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b5612cbb62a..f03afd9c44b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
{
struct inode *inode;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error;
inode = gfs2_iget(sb, no_addr);
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
+ io_gl = NULL;
if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
goto gfs2_nfsbypass;
@@ -228,7 +229,8 @@ gfs2_nfsbypass:
fail_glock:
gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
- gfs2_glock_put(io_gl);
+ if (io_gl)
+ gfs2_glock_put(io_gl);
fail_put:
if (inode->i_state & I_NEW)
ip->i_gl->gl_object = NULL;
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
{
struct gfs2_sbd *sdp;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error;
struct gfs2_holder gh;
struct inode *inode;
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
+ io_gl = NULL;
inode->i_mode = DT2IF(DT_UNKNOWN);
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
fail_glock:
gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
- gfs2_glock_put(io_gl);
+ if (io_gl)
+ gfs2_glock_put(io_gl);
fail_put:
ip->i_gl->gl_object = NULL;
gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index fb2a5f93b7c..b1e9630eb46 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -15,7 +15,6 @@
#include <linux/init.h>
#include <linux/gfs2_ondisk.h>
#include <asm/atomic.h>
-#include <linux/slow-work.h>
#include "gfs2.h"
#include "incore.h"
@@ -24,6 +23,7 @@
#include "util.h"
#include "glock.h"
#include "quota.h"
+#include "recovery.h"
static struct shrinker qd_shrinker = {
.shrink = gfs2_shrink_qd_memory,
@@ -138,9 +138,11 @@ static int __init init_gfs2_fs(void)
if (error)
goto fail_unregister;
- error = slow_work_register_user(THIS_MODULE);
- if (error)
- goto fail_slow;
+ error = -ENOMEM;
+ gfs_recovery_wq = alloc_workqueue("gfs_recovery",
+ WQ_NON_REENTRANT | WQ_RESCUER, 0);
+ if (!gfs_recovery_wq)
+ goto fail_wq;
gfs2_register_debugfs();
@@ -148,7 +150,7 @@ static int __init init_gfs2_fs(void)
return 0;
-fail_slow:
+fail_wq:
unregister_filesystem(&gfs2meta_fs_type);
fail_unregister:
unregister_filesystem(&gfs2_fs_type);
@@ -190,7 +192,7 @@ static void __exit exit_gfs2_fs(void)
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
- slow_work_unregister_user(THIS_MODULE);
+ destroy_workqueue(gfs_recovery_wq);
kmem_cache_destroy(gfs2_quotad_cachep);
kmem_cache_destroy(gfs2_rgrpd_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3593b3a7290..4f44bdeb2f0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -17,7 +17,6 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/slow-work.h>
#include <linux/quotaops.h>
#include "gfs2.h"
@@ -76,7 +75,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
sb->s_fs_info = sdp;
sdp->sd_vfs = sb;
-
+ set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
gfs2_tune_init(&sdp->sd_tune);
init_waitqueue_head(&sdp->sd_glock_wait);
@@ -673,7 +672,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
break;
INIT_LIST_HEAD(&jd->extent_list);
- slow_work_init(&jd->jd_work, &gfs2_recover_ops);
+ INIT_WORK(&jd->jd_work, gfs2_recover_func);
jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
if (!jd->jd_inode)
@@ -782,7 +781,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
if (sdp->sd_lockstruct.ls_first) {
unsigned int x;
for (x = 0; x < sdp->sd_journals; x++) {
- error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x));
+ error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x),
+ true);
if (error) {
fs_err(sdp, "error recovering journal %u: %d\n",
x, error);
@@ -792,7 +792,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
gfs2_others_may_mount(sdp);
} else if (!sdp->sd_args.ar_spectator) {
- error = gfs2_recover_journal(sdp->sd_jdesc);
+ error = gfs2_recover_journal(sdp->sd_jdesc, true);
if (error) {
fs_err(sdp, "error recovering my journal: %d\n", error);
goto fail_jinode_gh;
@@ -1050,7 +1050,8 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
ret = match_int(&tmp[0], &option);
if (ret || option < 0)
goto hostdata_error;
- ls->ls_jid = option;
+ if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags))
+ ls->ls_jid = option;
break;
case Opt_id:
/* Obsolete, but left for backward compat purposes */
@@ -1102,6 +1103,24 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
lm->lm_unmount(sdp);
}
+static int gfs2_journalid_wait(void *word)
+{
+ if (signal_pending(current))
+ return -EINTR;
+ schedule();
+ return 0;
+}
+
+static int wait_on_journal(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_args.ar_spectator)
+ return 0;
+ if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+ return 0;
+
+ return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE);
+}
+
void gfs2_online_uevent(struct gfs2_sbd *sdp)
{
struct super_block *sb = sdp->sd_vfs;
@@ -1194,6 +1213,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (error)
goto fail_locking;
+ error = wait_on_journal(sdp);
+ if (error)
+ goto fail_sb;
+
error = init_inodes(sdp, DO);
if (error)
goto fail_sb;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 49667d68769..1bc6b5695e6 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_quota_data *qd;
struct gfs2_sbd *sdp;
@@ -694,10 +694,8 @@ get_a_page:
if (!buffer_mapped(bh))
goto unlock_out;
/* If it's a newly allocated disk block for quota, zero it */
- if (buffer_new(bh)) {
- memset(bh->b_data, 0, bh->b_size);
- set_buffer_uptodate(bh);
- }
+ if (buffer_new(bh))
+ zero_user(page, pos - blocksize, bh->b_size);
}
if (PageUptodate(page))
@@ -723,7 +721,7 @@ get_a_page:
/* If quota straddles page boundary, we need to update the rest of the
* quota at the beginning of the next page */
- if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */
+ if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
ptr = ptr + nbytes;
nbytes = sizeof(struct gfs2_quota) - nbytes;
offset = 0;
@@ -789,15 +787,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
goto out;
for (x = 0; x < num_qd; x++) {
- int alloc_required;
-
offset = qd2offset(qda[x]);
- error = gfs2_write_alloc_required(ip, offset,
- sizeof(struct gfs2_quota),
- &alloc_required);
- if (error)
- goto out_gunlock;
- if (alloc_required)
+ if (gfs2_write_alloc_required(ip, offset,
+ sizeof(struct gfs2_quota)))
nalloc++;
}
@@ -1457,10 +1449,10 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
switch (sdp->sd_args.ar_quota) {
case GFS2_QUOTA_ON:
- fqs->qs_flags |= (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD);
+ fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD);
/*FALLTHRU*/
case GFS2_QUOTA_ACCOUNT:
- fqs->qs_flags |= (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT);
+ fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT);
break;
case GFS2_QUOTA_OFF:
break;
@@ -1506,7 +1498,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
fdq->d_version = FS_DQUOT_VERSION;
- fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+ fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
fdq->d_id = id;
fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
@@ -1541,12 +1533,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
switch(type) {
case USRQUOTA:
type = QUOTA_USER;
- if (fdq->d_flags != XFS_USER_QUOTA)
+ if (fdq->d_flags != FS_USER_QUOTA)
return -EINVAL;
break;
case GRPQUOTA:
type = QUOTA_GROUP;
- if (fdq->d_flags != XFS_GROUP_QUOTA)
+ if (fdq->d_flags != FS_GROUP_QUOTA)
return -EINVAL;
break;
default:
@@ -1586,10 +1578,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
goto out_i;
offset = qd2offset(qd);
- error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota),
- &alloc_required);
- if (error)
- goto out_i;
+ alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
if (alloc_required) {
al = gfs2_alloc_get(ip);
if (al == NULL)
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd1..e7d236ca48b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
extern const struct quotactl_ops gfs2_quotactl_ops;
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 4b9bece3d43..f7f89a94a5a 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -14,7 +14,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/slow-work.h>
#include "gfs2.h"
#include "incore.h"
@@ -28,6 +27,8 @@
#include "util.h"
#include "dir.h"
+struct workqueue_struct *gfs_recovery_wq;
+
int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh)
{
@@ -443,23 +444,7 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
}
-static int gfs2_recover_get_ref(struct slow_work *work)
-{
- struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
- if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
- return -EBUSY;
- return 0;
-}
-
-static void gfs2_recover_put_ref(struct slow_work *work)
-{
- struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
- clear_bit(JDF_RECOVERY, &jd->jd_flags);
- smp_mb__after_clear_bit();
- wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
-}
-
-static void gfs2_recover_work(struct slow_work *work)
+void gfs2_recover_func(struct work_struct *work)
{
struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
@@ -578,7 +563,7 @@ static void gfs2_recover_work(struct slow_work *work)
gfs2_glock_dq_uninit(&j_gh);
fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
- return;
+ goto done;
fail_gunlock_tr:
gfs2_glock_dq_uninit(&t_gh);
@@ -590,32 +575,35 @@ fail_gunlock_j:
}
fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
-
fail:
gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
+done:
+ clear_bit(JDF_RECOVERY, &jd->jd_flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
}
-struct slow_work_ops gfs2_recover_ops = {
- .owner = THIS_MODULE,
- .get_ref = gfs2_recover_get_ref,
- .put_ref = gfs2_recover_put_ref,
- .execute = gfs2_recover_work,
-};
-
-
static int gfs2_recovery_wait(void *word)
{
schedule();
return 0;
}
-int gfs2_recover_journal(struct gfs2_jdesc *jd)
+int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
{
int rv;
- rv = slow_work_enqueue(&jd->jd_work);
- if (rv)
- return rv;
- wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE);
+
+ if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
+ return -EBUSY;
+
+ /* we have JDF_RECOVERY, queue should always succeed */
+ rv = queue_work(gfs_recovery_wq, &jd->jd_work);
+ BUG_ON(!rv);
+
+ if (wait)
+ wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
+ TASK_UNINTERRUPTIBLE);
+
return 0;
}
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 1616ac22569..2226136c764 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -12,6 +12,8 @@
#include "incore.h"
+extern struct workqueue_struct *gfs_recovery_wq;
+
static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
{
if (++*blk == sdp->sd_jdesc->jd_blocks)
@@ -27,8 +29,8 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head);
-extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
-extern struct slow_work_ops gfs2_recover_ops;
+extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
+extern void gfs2_recover_func(struct work_struct *work);
#endif /* __RECOVERY_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4d1aad38f1b..4140811a921 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -342,8 +342,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
- int ar;
- int error;
if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) ||
(ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) {
@@ -352,13 +350,12 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
}
jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
- error = gfs2_write_alloc_required(ip, 0, ip->i_disksize, &ar);
- if (!error && ar) {
+ if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) {
gfs2_consist_inode(ip);
- error = -EIO;
+ return -EIO;
}
- return error;
+ return 0;
}
/**
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 37f5393e68e..ccacffd2faa 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -25,6 +25,7 @@
#include "quota.h"
#include "util.h"
#include "glops.h"
+#include "recovery.h"
struct gfs2_attr {
struct attribute attr;
@@ -325,6 +326,30 @@ static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
return sprintf(buf, "%d\n", ls->ls_first);
}
+static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ unsigned first;
+ int rv;
+
+ rv = sscanf(buf, "%u", &first);
+ if (rv != 1 || first > 1)
+ return -EINVAL;
+ spin_lock(&sdp->sd_jindex_spin);
+ rv = -EBUSY;
+ if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
+ goto out;
+ rv = -EINVAL;
+ if (sdp->sd_args.ar_spectator)
+ goto out;
+ if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+ goto out;
+ sdp->sd_lockstruct.ls_first = first;
+ rv = 0;
+out:
+ spin_unlock(&sdp->sd_jindex_spin);
+ return rv ? rv : len;
+}
+
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -352,7 +377,7 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
if (jd->jd_jid != jid)
continue;
- rv = slow_work_enqueue(&jd->jd_work);
+ rv = gfs2_recover_journal(jd, false);
break;
}
out:
@@ -377,14 +402,41 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid);
}
+static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ unsigned jid;
+ int rv;
+
+ rv = sscanf(buf, "%u", &jid);
+ if (rv != 1)
+ return -EINVAL;
+
+ spin_lock(&sdp->sd_jindex_spin);
+ rv = -EINVAL;
+ if (sdp->sd_args.ar_spectator)
+ goto out;
+ if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+ goto out;
+ rv = -EBUSY;
+ if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
+ goto out;
+ sdp->sd_lockstruct.ls_jid = jid;
+ smp_mb__after_clear_bit();
+ wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
+ rv = 0;
+out:
+ spin_unlock(&sdp->sd_jindex_spin);
+ return rv ? rv : len;
+}
+
#define GDLM_ATTR(_name,_mode,_show,_store) \
static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
GDLM_ATTR(block, 0644, block_show, block_store);
GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
-GDLM_ATTR(jid, 0444, jid_show, NULL);
-GDLM_ATTR(first, 0444, lkfirst_show, NULL);
+GDLM_ATTR(jid, 0644, jid_show, jid_store);
+GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
GDLM_ATTR(first_done, 0444, first_done_show, NULL);
GDLM_ATTR(recover, 0600, NULL, recover_store);
GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
@@ -564,7 +616,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
- if (!sdp->sd_args.ar_spectator)
+ if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
if (gfs2_uuid_valid(uuid))
add_uevent_var(env, "UUID=%pUB", uuid);
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d6..722860b323a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
* This function is passed the number of inodes to scan, and it returns the
* total number of remaining possibly-reclaimable inodes.
*/
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
/*
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 93d1e47647b..f19ce94693d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1281,13 +1281,9 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
int journal_check_available_features (journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
- journal_superblock_t *sb;
-
if (!compat && !ro && !incompat)
return 1;
- sb = journal->j_superblock;
-
/* We can support any known requested features iff the
* superblock is in version 2. Otherwise we fail to support any
* extended sb features. */
@@ -1481,7 +1477,6 @@ int journal_flush(journal_t *journal)
int journal_wipe(journal_t *journal, int write)
{
- journal_superblock_t *sb;
int err = 0;
J_ASSERT (!(journal->j_flags & JFS_LOADED));
@@ -1490,8 +1485,6 @@ int journal_wipe(journal_t *journal, int write)
if (err)
return err;
- sb = journal->j_superblock;
-
if (!journal->j_tail)
goto no_recovery;
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 54c9bc9e1b1..81051dafebf 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -283,12 +283,9 @@ int journal_recover(journal_t *journal)
int journal_skip_recovery(journal_t *journal)
{
int err;
- journal_superblock_t * sb;
-
struct recovery_info info;
memset (&info, 0, sizeof(info));
- sb = journal->j_superblock;
err = do_one_pass(journal, &info, PASS_SCAN);
@@ -297,7 +294,8 @@ int journal_skip_recovery(journal_t *journal)
++journal->j_transaction_sequence;
} else {
#ifdef CONFIG_JBD_DEBUG
- int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
+ int dropped = info.end_transaction -
+ be32_to_cpu(journal->j_superblock->s_sequence);
#endif
jbd_debug(1,
"JBD: ignoring %d transaction%s from the journal.\n",
@@ -321,11 +319,6 @@ static int do_one_pass(journal_t *journal,
unsigned int sequence;
int blocktype;
- /* Precompute the maximum metadata descriptors in a descriptor block */
- int MAX_BLOCKS_PER_DESC;
- MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
- / sizeof(journal_block_tag_t));
-
/*
* First thing is to establish what we expect to find in the log
* (in terms of transaction IDs), and where (in terms of log
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 076d1cc44f9..1c23a0f4e8a 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
void __jbd2_log_wait_for_space(journal_t *journal)
{
int nblocks, space_left;
- assert_spin_locked(&journal->j_state_lock);
+ /* assert_spin_locked(&journal->j_state_lock); */
nblocks = jbd_space_needed(journal);
while (__jbd2_log_space_left(journal) < nblocks) {
if (journal->j_flags & JBD2_ABORT)
return;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
mutex_lock(&journal->j_checkpoint_mutex);
/*
@@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
* filesystem, so abort the journal and leave a stack
* trace for forensic evidence.
*/
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
nblocks = jbd_space_needed(journal);
space_left = __jbd2_log_space_left(journal);
@@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
if (journal->j_committing_transaction)
tid = journal->j_committing_transaction->t_tid;
spin_unlock(&journal->j_list_lock);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
if (chkpt) {
jbd2_log_do_checkpoint(journal);
} else if (jbd2_cleanup_journal_tail(journal) == 0) {
@@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
WARN_ON(1);
jbd2_journal_abort(journal, 0);
}
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
} else {
spin_unlock(&journal->j_list_lock);
}
@@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* next transaction ID we will write, and where it will
* start. */
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
transaction = journal->j_checkpoint_transactions;
if (transaction) {
@@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* If the oldest pinned transaction is at the tail of the log
already then there's not much we can do right now. */
if (journal->j_tail_sequence == first_tid) {
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return 1;
}
@@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
journal->j_free += freed;
journal->j_tail_sequence = first_tid;
journal->j_tail = blocknr;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
/*
* If there is an external journal, we need to make sure that
@@ -775,7 +775,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(transaction->t_log_list == NULL);
J_ASSERT(transaction->t_checkpoint_list == NULL);
J_ASSERT(transaction->t_checkpoint_io_list == NULL);
- J_ASSERT(transaction->t_updates == 0);
+ J_ASSERT(atomic_read(&transaction->t_updates) == 0);
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 75716d3d2be..f52e5e8049f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -150,11 +150,11 @@ static int journal_submit_commit_record(journal_t *journal,
*/
if (ret == -EOPNOTSUPP && barrier_done) {
printk(KERN_WARNING
- "JBD: barrier-based sync failed on %s - "
- "disabling barriers\n", journal->j_devname);
- spin_lock(&journal->j_state_lock);
+ "JBD2: Disabling barriers on %s, "
+ "not supported by device\n", journal->j_devname);
+ write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_BARRIER;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
/* And try again, without the barrier */
lock_buffer(bh);
@@ -180,11 +180,11 @@ retry:
wait_on_buffer(bh);
if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
printk(KERN_WARNING
- "JBD2: wait_on_commit_record: sync failed on %s - "
- "disabling barriers\n", journal->j_devname);
- spin_lock(&journal->j_state_lock);
+ "JBD2: %s: disabling barries on %s - not supported "
+ "by device\n", __func__, journal->j_devname);
+ write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_BARRIER;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
lock_buffer(bh);
clear_buffer_dirty(bh);
@@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
/*
@@ -417,23 +417,23 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_locked);
spin_lock(&commit_transaction->t_handle_lock);
- while (commit_transaction->t_updates) {
+ while (atomic_read(&commit_transaction->t_updates)) {
DEFINE_WAIT(wait);
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
- if (commit_transaction->t_updates) {
+ if (atomic_read(&commit_transaction->t_updates)) {
spin_unlock(&commit_transaction->t_handle_lock);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
schedule();
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
spin_lock(&commit_transaction->t_handle_lock);
}
finish_wait(&journal->j_wait_updates, &wait);
}
spin_unlock(&commit_transaction->t_handle_lock);
- J_ASSERT (commit_transaction->t_outstanding_credits <=
+ J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
journal->j_max_transaction_buffers);
/*
@@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
start_time = ktime_get();
commit_transaction->t_log_start = journal->j_head;
wake_up(&journal->j_wait_transaction_locked);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
jbd_debug (3, "JBD: commit phase 2\n");
@@ -519,19 +519,20 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* transaction! Now comes the tricky part: we need to write out
* metadata. Loop over the transaction's entire buffer list:
*/
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_COMMIT;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
trace_jbd2_commit_logging(journal, commit_transaction);
stats.run.rs_logging = jiffies;
stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
stats.run.rs_logging);
- stats.run.rs_blocks = commit_transaction->t_outstanding_credits;
+ stats.run.rs_blocks =
+ atomic_read(&commit_transaction->t_outstanding_credits);
stats.run.rs_blocks_logged = 0;
J_ASSERT(commit_transaction->t_nr_buffers <=
- commit_transaction->t_outstanding_credits);
+ atomic_read(&commit_transaction->t_outstanding_credits));
err = 0;
descriptor = NULL;
@@ -616,7 +617,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* the free space in the log, but this counter is changed
* by jbd2_journal_next_log_block() also.
*/
- commit_transaction->t_outstanding_credits--;
+ atomic_dec(&commit_transaction->t_outstanding_credits);
/* Bump b_count to prevent truncate from stumbling over
the shadowed buffer! @@@ This can go if we ever get
@@ -977,7 +978,7 @@ restart_loop:
* __jbd2_journal_drop_transaction(). Otherwise we could race with
* other checkpointing code processing the transaction...
*/
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
/*
* Now recheck if some buffers did not get attached to the transaction
@@ -985,7 +986,7 @@ restart_loop:
*/
if (commit_transaction->t_forget) {
spin_unlock(&journal->j_list_lock);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
goto restart_loop;
}
@@ -1003,7 +1004,8 @@ restart_loop:
* File the transaction statistics
*/
stats.ts_tid = commit_transaction->t_tid;
- stats.run.rs_handle_count = commit_transaction->t_handle_count;
+ stats.run.rs_handle_count =
+ atomic_read(&commit_transaction->t_handle_count);
trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
commit_transaction->t_tid, &stats.run);
@@ -1037,7 +1039,7 @@ restart_loop:
journal->j_average_commit_time*3) / 4;
else
journal->j_average_commit_time = commit_time;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
if (commit_transaction->t_checkpoint_list == NULL &&
commit_transaction->t_checkpoint_io_list == NULL) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff593276..ad5866aaf0f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -41,6 +41,7 @@
#include <linux/hash.h>
#include <linux/log2.h>
#include <linux/vmalloc.h>
+#include <linux/backing-dev.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
@@ -48,8 +49,6 @@
#include <asm/uaccess.h>
#include <asm/page.h>
-EXPORT_SYMBOL(jbd2_journal_start);
-EXPORT_SYMBOL(jbd2_journal_restart);
EXPORT_SYMBOL(jbd2_journal_extend);
EXPORT_SYMBOL(jbd2_journal_stop);
EXPORT_SYMBOL(jbd2_journal_lock_updates);
@@ -143,7 +142,7 @@ static int kjournald2(void *arg)
/*
* And now, wait forever for commit wakeup events.
*/
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
loop:
if (journal->j_flags & JBD2_UNMOUNT)
@@ -154,10 +153,10 @@ loop:
if (journal->j_commit_sequence != journal->j_commit_request) {
jbd_debug(1, "OK, requests differ\n");
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
del_timer_sync(&journal->j_commit_timer);
jbd2_journal_commit_transaction(journal);
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
goto loop;
}
@@ -169,9 +168,9 @@ loop:
* be already stopped.
*/
jbd_debug(1, "Now suspending kjournald2\n");
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
refrigerator();
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
} else {
/*
* We assume on resume that commits are already there,
@@ -191,9 +190,9 @@ loop:
if (journal->j_flags & JBD2_UNMOUNT)
should_sleep = 0;
if (should_sleep) {
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
schedule();
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
}
finish_wait(&journal->j_wait_commit, &wait);
}
@@ -211,7 +210,7 @@ loop:
goto loop;
end_loop:
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
del_timer_sync(&journal->j_commit_timer);
journal->j_task = NULL;
wake_up(&journal->j_wait_done_commit);
@@ -234,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal)
static void journal_kill_thread(journal_t *journal)
{
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_UNMOUNT;
while (journal->j_task) {
wake_up(&journal->j_wait_commit);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
}
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
}
/*
@@ -297,7 +296,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct page *new_page;
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
- struct jbd2_buffer_trigger_type *triggers;
journal_t *journal = transaction->t_journal;
/*
@@ -311,7 +309,17 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
*/
J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
- new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+retry_alloc:
+ new_bh = alloc_buffer_head(GFP_NOFS);
+ if (!new_bh) {
+ /*
+ * Failure is not an option, but __GFP_NOFAIL is going
+ * away; so we retry ourselves here.
+ */
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry_alloc;
+ }
+
/* keep subsequent assertions sane */
new_bh->b_state = 0;
init_buffer(new_bh, NULL, NULL);
@@ -328,21 +336,21 @@ repeat:
done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data);
new_offset = offset_in_page(jh_in->b_frozen_data);
- triggers = jh_in->b_frozen_triggers;
} else {
new_page = jh2bh(jh_in)->b_page;
new_offset = offset_in_page(jh2bh(jh_in)->b_data);
- triggers = jh_in->b_triggers;
}
mapped_data = kmap_atomic(new_page, KM_USER0);
/*
- * Fire any commit trigger. Do this before checking for escaping,
- * as the trigger may modify the magic offset. If a copy-out
- * happens afterwards, it will have the correct data in the buffer.
+ * Fire data frozen trigger if data already wasn't frozen. Do this
+ * before checking for escaping, as the trigger may modify the magic
+ * offset. If a copy-out happens afterwards, it will have the correct
+ * data in the buffer.
*/
- jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
- triggers);
+ if (!done_copy_out)
+ jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+ jh_in->b_triggers);
/*
* Check for escaping
@@ -443,7 +451,7 @@ int __jbd2_log_space_left(journal_t *journal)
{
int left = journal->j_free;
- assert_spin_locked(&journal->j_state_lock);
+ /* assert_spin_locked(&journal->j_state_lock); */
/*
* Be pessimistic here about the number of those free blocks which
@@ -488,9 +496,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
{
int ret;
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
ret = __jbd2_log_start_commit(journal, tid);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return ret;
}
@@ -509,7 +517,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
transaction_t *transaction = NULL;
tid_t tid;
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction;
__jbd2_log_start_commit(journal, transaction->t_tid);
@@ -517,12 +525,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
transaction = journal->j_committing_transaction;
if (!transaction) {
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
return 0; /* Nothing to retry */
}
tid = transaction->t_tid;
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid);
return 1;
}
@@ -536,7 +544,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{
int ret = 0;
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid;
@@ -555,7 +563,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
*ptid = journal->j_committing_transaction->t_tid;
ret = 1;
}
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return ret;
}
@@ -567,26 +575,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
{
int err = 0;
+ read_lock(&journal->j_state_lock);
#ifdef CONFIG_JBD2_DEBUG
- spin_lock(&journal->j_state_lock);
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_EMERG
"%s: error: j_commit_request=%d, tid=%d\n",
__func__, journal->j_commit_request, tid);
}
- spin_unlock(&journal->j_state_lock);
#endif
- spin_lock(&journal->j_state_lock);
while (tid_gt(tid, journal->j_commit_sequence)) {
jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
tid, journal->j_commit_sequence);
wake_up(&journal->j_wait_commit);
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_done_commit,
!tid_gt(tid, journal->j_commit_sequence));
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
}
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
if (unlikely(is_journal_aborted(journal))) {
printk(KERN_EMERG "journal commit I/O error\n");
@@ -603,7 +609,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
{
unsigned long blocknr;
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
J_ASSERT(journal->j_free > 1);
blocknr = journal->j_head;
@@ -611,7 +617,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
journal->j_free--;
if (journal->j_head == journal->j_last)
journal->j_head = journal->j_first;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return jbd2_journal_bmap(journal, blocknr, retp);
}
@@ -831,7 +837,7 @@ static journal_t * journal_init_common (void)
mutex_init(&journal->j_checkpoint_mutex);
spin_lock_init(&journal->j_revoke_lock);
spin_lock_init(&journal->j_list_lock);
- spin_lock_init(&journal->j_state_lock);
+ rwlock_init(&journal->j_state_lock);
journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
journal->j_min_batch_time = 0;
@@ -1097,14 +1103,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
set_buffer_uptodate(bh);
}
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
journal->j_tail, journal->j_tail_sequence, journal->j_errno);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(journal->j_tail);
sb->s_errno = cpu_to_be32(journal->j_errno);
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
BUFFER_TRACE(bh, "marking dirty");
mark_buffer_dirty(bh);
@@ -1125,12 +1131,12 @@ out:
* any future commit will have to be careful to update the
* superblock again to re-record the true start of the log. */
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
if (sb->s_start)
journal->j_flags &= ~JBD2_FLUSHED;
else
journal->j_flags |= JBD2_FLUSHED;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
}
/*
@@ -1392,13 +1398,9 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
- journal_superblock_t *sb;
-
if (!compat && !ro && !incompat)
return 1;
- sb = journal->j_superblock;
-
/* We can support any known requested features iff the
* superblock is in version 2. Otherwise we fail to support any
* extended sb features. */
@@ -1546,7 +1548,7 @@ int jbd2_journal_flush(journal_t *journal)
transaction_t *transaction = NULL;
unsigned long old_tail;
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
/* Force everything buffered to the log... */
if (journal->j_running_transaction) {
@@ -1559,10 +1561,10 @@ int jbd2_journal_flush(journal_t *journal)
if (transaction) {
tid_t tid = transaction->t_tid;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid);
} else {
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
}
/* ...and flush everything in the log out to disk. */
@@ -1586,12 +1588,12 @@ int jbd2_journal_flush(journal_t *journal)
* the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current
* s_start value. */
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
old_tail = journal->j_tail;
journal->j_tail = 0;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
jbd2_journal_update_superblock(journal, 1);
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
journal->j_tail = old_tail;
J_ASSERT(!journal->j_running_transaction);
@@ -1599,7 +1601,7 @@ int jbd2_journal_flush(journal_t *journal)
J_ASSERT(!journal->j_checkpoint_transactions);
J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return 0;
}
@@ -1618,7 +1620,6 @@ int jbd2_journal_flush(journal_t *journal)
int jbd2_journal_wipe(journal_t *journal, int write)
{
- journal_superblock_t *sb;
int err = 0;
J_ASSERT (!(journal->j_flags & JBD2_LOADED));
@@ -1627,8 +1628,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
if (err)
return err;
- sb = journal->j_superblock;
-
if (!journal->j_tail)
goto no_recovery;
@@ -1666,12 +1665,12 @@ void __jbd2_journal_abort_hard(journal_t *journal)
printk(KERN_ERR "Aborting journal on device %s.\n",
journal->j_devname);
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_ABORT;
transaction = journal->j_running_transaction;
if (transaction)
__jbd2_log_start_commit(journal, transaction->t_tid);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
}
/* Soft abort: record the abort error status in the journal superblock,
@@ -1756,12 +1755,12 @@ int jbd2_journal_errno(journal_t *journal)
{
int err;
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
if (journal->j_flags & JBD2_ABORT)
err = -EROFS;
else
err = journal->j_errno;
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
return err;
}
@@ -1776,12 +1775,12 @@ int jbd2_journal_clear_err(journal_t *journal)
{
int err = 0;
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
if (journal->j_flags & JBD2_ABORT)
err = -EROFS;
else
journal->j_errno = 0;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return err;
}
@@ -1794,10 +1793,10 @@ int jbd2_journal_clear_err(journal_t *journal)
*/
void jbd2_journal_ack_err(journal_t *journal)
{
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
if (journal->j_errno)
journal->j_flags |= JBD2_ACK_ERR;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
}
int jbd2_journal_blocks_per_page(struct inode *inode)
@@ -2202,8 +2201,6 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
void jbd2_journal_release_jbd_inode(journal_t *journal,
struct jbd2_inode *jinode)
{
- int writeout = 0;
-
if (!journal)
return;
restart:
@@ -2220,9 +2217,6 @@ restart:
goto restart;
}
- /* Do we need to wait for data writeback? */
- if (journal->j_committing_transaction == jinode->i_transaction)
- writeout = 1;
if (jinode->i_transaction) {
list_del(&jinode->i_list);
jinode->i_transaction = NULL;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 049281b7cb8..2bc4d5f116f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -285,12 +285,10 @@ int jbd2_journal_recover(journal_t *journal)
int jbd2_journal_skip_recovery(journal_t *journal)
{
int err;
- journal_superblock_t * sb;
struct recovery_info info;
memset (&info, 0, sizeof(info));
- sb = journal->j_superblock;
err = do_one_pass(journal, &info, PASS_SCAN);
@@ -299,7 +297,8 @@ int jbd2_journal_skip_recovery(journal_t *journal)
++journal->j_transaction_sequence;
} else {
#ifdef CONFIG_JBD2_DEBUG
- int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
+ int dropped = info.end_transaction -
+ be32_to_cpu(journal->j_superblock->s_sequence);
#endif
jbd_debug(1,
"JBD: ignoring %d transaction%s from the journal.\n",
@@ -365,11 +364,6 @@ static int do_one_pass(journal_t *journal,
int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */
- /* Precompute the maximum metadata descriptors in a descriptor block */
- int MAX_BLOCKS_PER_DESC;
- MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
- / tag_bytes);
-
/*
* First thing is to establish what we expect to find in the log
* (in terms of transaction IDs), and where (in terms of log
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620a..d95cc9d0401 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -26,6 +26,8 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
@@ -53,6 +55,9 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval;
spin_lock_init(&transaction->t_handle_lock);
+ atomic_set(&transaction->t_updates, 0);
+ atomic_set(&transaction->t_outstanding_credits, 0);
+ atomic_set(&transaction->t_handle_count, 0);
INIT_LIST_HEAD(&transaction->t_inode_list);
INIT_LIST_HEAD(&transaction->t_private_list);
@@ -83,65 +88,75 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
* transaction's buffer credits.
*/
-static int start_this_handle(journal_t *journal, handle_t *handle)
+static int start_this_handle(journal_t *journal, handle_t *handle,
+ int gfp_mask)
{
transaction_t *transaction;
int needed;
int nblocks = handle->h_buffer_credits;
transaction_t *new_transaction = NULL;
- int ret = 0;
unsigned long ts = jiffies;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
current->comm, nblocks,
journal->j_max_transaction_buffers);
- ret = -ENOSPC;
- goto out;
+ return -ENOSPC;
}
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = kzalloc(sizeof(*new_transaction),
- GFP_NOFS|__GFP_NOFAIL);
+ new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
if (!new_transaction) {
- ret = -ENOMEM;
- goto out;
+ /*
+ * If __GFP_FS is not present, then we may be
+ * being called from inside the fs writeback
+ * layer, so we MUST NOT fail. Since
+ * __GFP_NOFAIL is going away, we will arrange
+ * to retry the allocation ourselves.
+ */
+ if ((gfp_mask & __GFP_FS) == 0) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto alloc_transaction;
+ }
+ return -ENOMEM;
}
}
jbd_debug(3, "New handle %p going live.\n", handle);
-repeat:
-
/*
* We need to hold j_state_lock until t_updates has been incremented,
* for proper journal barrier handling
*/
- spin_lock(&journal->j_state_lock);
-repeat_locked:
+repeat:
+ read_lock(&journal->j_state_lock);
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
- spin_unlock(&journal->j_state_lock);
- ret = -EROFS;
- goto out;
+ read_unlock(&journal->j_state_lock);
+ kfree(new_transaction);
+ return -EROFS;
}
/* Wait on the journal's transaction barrier if necessary */
if (journal->j_barrier_count) {
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_transaction_locked,
journal->j_barrier_count == 0);
goto repeat;
}
if (!journal->j_running_transaction) {
- if (!new_transaction) {
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
+ if (!new_transaction)
goto alloc_transaction;
+ write_lock(&journal->j_state_lock);
+ if (!journal->j_running_transaction) {
+ jbd2_get_transaction(journal, new_transaction);
+ new_transaction = NULL;
}
- jbd2_get_transaction(journal, new_transaction);
- new_transaction = NULL;
+ write_unlock(&journal->j_state_lock);
+ goto repeat;
}
transaction = journal->j_running_transaction;
@@ -155,7 +170,7 @@ repeat_locked:
prepare_to_wait(&journal->j_wait_transaction_locked,
&wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat;
@@ -166,8 +181,8 @@ repeat_locked:
* buffers requested by this operation, we need to stall pending a log
* checkpoint to free some more log space.
*/
- spin_lock(&transaction->t_handle_lock);
- needed = transaction->t_outstanding_credits + nblocks;
+ needed = atomic_add_return(nblocks,
+ &transaction->t_outstanding_credits);
if (needed > journal->j_max_transaction_buffers) {
/*
@@ -178,11 +193,11 @@ repeat_locked:
DEFINE_WAIT(wait);
jbd_debug(2, "Handle %p starting new commit...\n", handle);
- spin_unlock(&transaction->t_handle_lock);
+ atomic_sub(nblocks, &transaction->t_outstanding_credits);
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
__jbd2_log_start_commit(journal, transaction->t_tid);
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat;
@@ -215,35 +230,48 @@ repeat_locked:
*/
if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
- spin_unlock(&transaction->t_handle_lock);
- __jbd2_log_wait_for_space(journal);
- goto repeat_locked;
+ atomic_sub(nblocks, &transaction->t_outstanding_credits);
+ read_unlock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
+ if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
+ __jbd2_log_wait_for_space(journal);
+ write_unlock(&journal->j_state_lock);
+ goto repeat;
}
/* OK, account for the buffers that this operation expects to
- * use and add the handle to the running transaction. */
-
- if (time_after(transaction->t_start, ts)) {
+ * use and add the handle to the running transaction.
+ *
+ * In order for t_max_wait to be reliable, it must be
+ * protected by a lock. But doing so will mean that
+ * start_this_handle() can not be run in parallel on SMP
+ * systems, which limits our scalability. So we only enable
+ * it when debugging is enabled. We may want to use a
+ * separate flag, eventually, so we can enable this
+ * independently of debugging.
+ */
+#ifdef CONFIG_JBD2_DEBUG
+ if (jbd2_journal_enable_debug &&
+ time_after(transaction->t_start, ts)) {
ts = jbd2_time_diff(ts, transaction->t_start);
+ spin_lock(&transaction->t_handle_lock);
if (ts > transaction->t_max_wait)
transaction->t_max_wait = ts;
+ spin_unlock(&transaction->t_handle_lock);
}
-
+#endif
handle->h_transaction = transaction;
- transaction->t_outstanding_credits += nblocks;
- transaction->t_updates++;
- transaction->t_handle_count++;
+ atomic_inc(&transaction->t_updates);
+ atomic_inc(&transaction->t_handle_count);
jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
- handle, nblocks, transaction->t_outstanding_credits,
+ handle, nblocks,
+ atomic_read(&transaction->t_outstanding_credits),
__jbd2_log_space_left(journal));
- spin_unlock(&transaction->t_handle_lock);
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
lock_map_acquire(&handle->h_lockdep_map);
-out:
- if (unlikely(new_transaction)) /* It's usually NULL */
- kfree(new_transaction);
- return ret;
+ kfree(new_transaction);
+ return 0;
}
static struct lock_class_key jbd2_handle_key;
@@ -278,7 +306,7 @@ static handle_t *new_handle(int nblocks)
*
* Return a pointer to a newly allocated handle, or NULL on failure
*/
-handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
+handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
{
handle_t *handle = journal_current_handle();
int err;
@@ -298,7 +326,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
current->journal_info = handle;
- err = start_this_handle(journal, handle);
+ err = start_this_handle(journal, handle, gfp_mask);
if (err < 0) {
jbd2_free_handle(handle);
current->journal_info = NULL;
@@ -308,6 +336,15 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
out:
return handle;
}
+EXPORT_SYMBOL(jbd2__journal_start);
+
+
+handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
+{
+ return jbd2__journal_start(journal, nblocks, GFP_NOFS);
+}
+EXPORT_SYMBOL(jbd2_journal_start);
+
/**
* int jbd2_journal_extend() - extend buffer credits.
@@ -342,7 +379,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
result = 1;
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
/* Don't extend a locked-down transaction! */
if (handle->h_transaction->t_state != T_RUNNING) {
@@ -352,7 +389,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
}
spin_lock(&transaction->t_handle_lock);
- wanted = transaction->t_outstanding_credits + nblocks;
+ wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks;
if (wanted > journal->j_max_transaction_buffers) {
jbd_debug(3, "denied handle %p %d blocks: "
@@ -367,14 +404,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
}
handle->h_buffer_credits += nblocks;
- transaction->t_outstanding_credits += nblocks;
+ atomic_add(nblocks, &transaction->t_outstanding_credits);
result = 0;
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
unlock:
spin_unlock(&transaction->t_handle_lock);
error_out:
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
out:
return result;
}
@@ -394,8 +431,7 @@ out:
* transaction capabable of guaranteeing the requested number of
* credits.
*/
-
-int jbd2_journal_restart(handle_t *handle, int nblocks)
+int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
@@ -410,29 +446,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
* First unlink the handle from its current transaction, and start the
* commit on that.
*/
- J_ASSERT(transaction->t_updates > 0);
+ J_ASSERT(atomic_read(&transaction->t_updates) > 0);
J_ASSERT(journal_current_handle() == handle);
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
spin_lock(&transaction->t_handle_lock);
- transaction->t_outstanding_credits -= handle->h_buffer_credits;
- transaction->t_updates--;
-
- if (!transaction->t_updates)
+ atomic_sub(handle->h_buffer_credits,
+ &transaction->t_outstanding_credits);
+ if (atomic_dec_and_test(&transaction->t_updates))
wake_up(&journal->j_wait_updates);
spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "restarting handle %p\n", handle);
__jbd2_log_start_commit(journal, transaction->t_tid);
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks;
- ret = start_this_handle(journal, handle);
+ ret = start_this_handle(journal, handle, gfp_mask);
return ret;
}
+EXPORT_SYMBOL(jbd2__journal_restart);
+int jbd2_journal_restart(handle_t *handle, int nblocks)
+{
+ return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
+}
+EXPORT_SYMBOL(jbd2_journal_restart);
+
/**
* void jbd2_journal_lock_updates () - establish a transaction barrier.
* @journal: Journal to establish a barrier on.
@@ -447,7 +489,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
{
DEFINE_WAIT(wait);
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
++journal->j_barrier_count;
/* Wait until there are no running updates */
@@ -458,19 +500,19 @@ void jbd2_journal_lock_updates(journal_t *journal)
break;
spin_lock(&transaction->t_handle_lock);
- if (!transaction->t_updates) {
+ if (!atomic_read(&transaction->t_updates)) {
spin_unlock(&transaction->t_handle_lock);
break;
}
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
spin_unlock(&transaction->t_handle_lock);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_updates, &wait);
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
}
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
/*
* We have now established a barrier against other normal updates, but
@@ -494,9 +536,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
J_ASSERT(journal->j_barrier_count != 0);
mutex_unlock(&journal->j_barrier);
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
--journal->j_barrier_count;
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_transaction_locked);
}
@@ -725,6 +767,9 @@ done:
page = jh2bh(jh)->b_page;
offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
source = kmap_atomic(page, KM_USER0);
+ /* Fire data frozen trigger just before we copy the data */
+ jbd2_buffer_frozen_trigger(jh, source + offset,
+ jh->b_triggers);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source, KM_USER0);
@@ -963,15 +1008,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
jh->b_triggers = type;
}
-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
struct jbd2_buffer_trigger_type *triggers)
{
struct buffer_head *bh = jh2bh(jh);
- if (!triggers || !triggers->t_commit)
+ if (!triggers || !triggers->t_frozen)
return;
- triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+ triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
}
void jbd2_buffer_abort_trigger(struct journal_head *jh,
@@ -1235,7 +1280,8 @@ int jbd2_journal_stop(handle_t *handle)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
- int err;
+ int err, wait_for_commit = 0;
+ tid_t tid;
pid_t pid;
J_ASSERT(journal_current_handle() == handle);
@@ -1243,7 +1289,7 @@ int jbd2_journal_stop(handle_t *handle)
if (is_handle_aborted(handle))
err = -EIO;
else {
- J_ASSERT(transaction->t_updates > 0);
+ J_ASSERT(atomic_read(&transaction->t_updates) > 0);
err = 0;
}
@@ -1288,9 +1334,9 @@ int jbd2_journal_stop(handle_t *handle)
journal->j_last_sync_writer = pid;
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
commit_time = journal->j_average_commit_time;
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
trans_time = ktime_to_ns(ktime_sub(ktime_get(),
transaction->t_start_time));
@@ -1311,14 +1357,8 @@ int jbd2_journal_stop(handle_t *handle)
if (handle->h_sync)
transaction->t_synchronous_commit = 1;
current->journal_info = NULL;
- spin_lock(&transaction->t_handle_lock);
- transaction->t_outstanding_credits -= handle->h_buffer_credits;
- transaction->t_updates--;
- if (!transaction->t_updates) {
- wake_up(&journal->j_wait_updates);
- if (journal->j_barrier_count)
- wake_up(&journal->j_wait_transaction_locked);
- }
+ atomic_sub(handle->h_buffer_credits,
+ &transaction->t_outstanding_credits);
/*
* If the handle is marked SYNC, we need to set another commit
@@ -1327,15 +1367,13 @@ int jbd2_journal_stop(handle_t *handle)
* transaction is too old now.
*/
if (handle->h_sync ||
- transaction->t_outstanding_credits >
- journal->j_max_transaction_buffers ||
- time_after_eq(jiffies, transaction->t_expires)) {
+ (atomic_read(&transaction->t_outstanding_credits) >
+ journal->j_max_transaction_buffers) ||
+ time_after_eq(jiffies, transaction->t_expires)) {
/* Do this even for aborted journals: an abort still
* completes the commit thread, it just doesn't write
* anything to disk. */
- tid_t tid = transaction->t_tid;
- spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "transaction too old, requesting commit for "
"handle %p\n", handle);
/* This is non-blocking */
@@ -1346,11 +1384,25 @@ int jbd2_journal_stop(handle_t *handle)
* to wait for the commit to complete.
*/
if (handle->h_sync && !(current->flags & PF_MEMALLOC))
- err = jbd2_log_wait_commit(journal, tid);
- } else {
- spin_unlock(&transaction->t_handle_lock);
+ wait_for_commit = 1;
}
+ /*
+ * Once we drop t_updates, if it goes to zero the transaction
+ * could start commiting on us and eventually disappear. So
+ * once we do this, we must not dereference transaction
+ * pointer again.
+ */
+ tid = transaction->t_tid;
+ if (atomic_dec_and_test(&transaction->t_updates)) {
+ wake_up(&journal->j_wait_updates);
+ if (journal->j_barrier_count)
+ wake_up(&journal->j_wait_transaction_locked);
+ }
+
+ if (wait_for_commit)
+ err = jbd2_log_wait_commit(journal, tid);
+
lock_map_release(&handle->h_lockdep_map);
jbd2_free_handle(handle);
@@ -1716,7 +1768,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
goto zap_buffer_unlocked;
/* OK, we have data buffer in journaled mode */
- spin_lock(&journal->j_state_lock);
+ write_lock(&journal->j_state_lock);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
@@ -1769,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return ret;
} else {
/* There is no currently-running transaction. So the
@@ -1783,7 +1835,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return ret;
} else {
/* The orphan record's transaction has
@@ -1807,7 +1859,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
return 0;
} else {
/* Good, the buffer belongs to the running transaction.
@@ -1826,7 +1878,7 @@ zap_buffer:
zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
+ write_unlock(&journal->j_state_lock);
zap_buffer_unlocked:
clear_buffer_dirty(bh);
J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2133,9 +2185,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
/* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started
* a transaction adding the inode to orphan list */
- spin_lock(&journal->j_state_lock);
+ read_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction;
- spin_unlock(&journal->j_state_lock);
+ read_unlock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
inode_trans = jinode->i_transaction;
spin_unlock(&journal->j_list_lock);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index a33aab6b5e6..54a92fd02bb 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -234,8 +234,9 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (inode->i_mode != mode) {
struct iattr attr;
- attr.ia_valid = ATTR_MODE;
+ attr.ia_valid = ATTR_MODE | ATTR_CTIME;
attr.ia_mode = mode;
+ attr.ia_ctime = CURRENT_TIME_SEC;
rc = jffs2_do_setattr(inode, &attr);
if (rc < 0)
return rc;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 7aa4417e085..166062a6823 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -222,15 +222,18 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
jffs2_free_raw_inode(ri);
- d_instantiate(dentry, inode);
D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages));
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
fail:
make_bad_inode(inode);
+ unlock_new_inode(inode);
iput(inode);
jffs2_free_raw_inode(ri);
return ret;
@@ -360,8 +363,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* We use f->target field to store the target path. */
@@ -370,8 +373,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
memcpy(f->target, target, targetlen + 1);
@@ -386,30 +389,24 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -437,8 +434,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -453,7 +450,14 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
@@ -519,8 +523,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* No data here. Only a metadata node, which will be
obsoleted by the first data write
@@ -531,30 +535,24 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -582,8 +580,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -599,7 +597,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
@@ -693,8 +698,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* No data here. Only a metadata node, which will be
obsoleted by the first data write
@@ -705,30 +710,24 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -759,8 +758,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -775,8 +774,14 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
-
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 8bc2c80ab15..459d39d1ea0 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -465,7 +465,12 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
inode->i_blocks = 0;
inode->i_size = 0;
- insert_inode_hash(inode);
+ if (insert_inode_locked(inode) < 0) {
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(-EINVAL);
+ }
return inode;
}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index a2d58c96f1b..d258e261bdc 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -626,7 +626,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
{
- /* success of check_xattr_ref_inode() means taht inode (ic) dose not have
+ /* success of check_xattr_ref_inode() means that inode (ic) dose not have
* duplicate name/value pairs. If duplicate name/value pair would be found,
* one will be removed.
*/
diff --git a/fs/libfs.c b/fs/libfs.c
index 09e1016eb77..dcaf972cbf1 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -489,7 +489,8 @@ int simple_write_end(struct file *file, struct address_space *mapping,
* unique inode values later for this filesystem, then you must take care
* to pass it an appropriate max_reserved value to avoid collisions.
*/
-int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files)
+int simple_fill_super(struct super_block *s, unsigned long magic,
+ struct tree_descr *files)
{
struct inode *inode;
struct dentry *root;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a..e28f21b9534 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
* What the mbcache registers as to get shrunk dynamically.
*/
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
static struct shrinker mb_cache_shrinker = {
.shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
* This function is called by the kernel memory management when memory
* gets low.
*
+ * @shrink: (ignored)
* @nr_to_scan: Number of objects to scan
* @gfp_mask: (ignored)
*
* Returns the number of objects which are present in the cache.
*/
static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(free_list);
struct list_head *l, *ltmp;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 91969589131..1dbf921ca44 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -75,10 +75,6 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
if (!IS_ERR(page))
kmap(page);
return page;
-
-fail:
- dir_put_page(page);
- return ERR_PTR(-EIO);
}
static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
diff --git a/fs/namei.c b/fs/namei.c
index 868d0cb9d47..42d2d28fb82 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -282,8 +282,7 @@ int inode_permission(struct inode *inode, int mask)
if (retval)
return retval;
- return security_inode_permission(inode,
- mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
+ return security_inode_permission(inode, mask);
}
/**
@@ -1484,8 +1483,7 @@ static int handle_truncate(struct path *path)
*/
error = locks_verify_locked(inode);
if (!error)
- error = security_path_truncate(path, 0,
- ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
+ error = security_path_truncate(path);
if (!error) {
error = do_truncate(path->dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index fa338515402..1e634deff94 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -728,8 +728,8 @@ out_fput:
out_bdi:
/* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
*
- * The previously used put_filp(ncp_filp); was bogous, since
- * it doesn't proper unlocking.
+ * The previously used put_filp(ncp_filp); was bogus, since
+ * it doesn't perform proper unlocking.
*/
fput(ncp_filp);
out:
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index a43d07e7b92..cc1bb33b59b 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -61,8 +61,8 @@ config NFS_V3_ACL
If unsure, say N.
config NFS_V4
- bool "NFS client support for NFS version 4 (EXPERIMENTAL)"
- depends on NFS_FS && EXPERIMENTAL
+ bool "NFS client support for NFS version 4"
+ depends on NFS_FS
select RPCSEC_GSS_KRB5
help
This option enables support for version 4 of the NFS protocol
@@ -72,16 +72,16 @@ config NFS_V4
space programs which can be found in the Linux nfs-utils package,
available from http://linux-nfs.org/.
- If unsure, say N.
+ If unsure, say Y.
config NFS_V4_1
- bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)"
+ bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
depends on NFS_V4 && EXPERIMENTAL
help
This option enables support for minor version 1 of the NFSv4 protocol
(draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
- Unless you're an NFS developer, say N.
+ If unsure, say N.
config ROOT_NFS
bool "Root file system on NFS"
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index a08770a7e85..930d10fecda 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -37,8 +37,8 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
if (inode == NULL)
goto out_putclient;
nfsi = NFS_I(inode);
- down_read(&nfsi->rwsem);
- delegation = nfsi->delegation;
+ rcu_read_lock();
+ delegation = rcu_dereference(nfsi->delegation);
if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
goto out_iput;
res->size = i_size_read(inode);
@@ -53,7 +53,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
args->bitmap[1];
res->status = 0;
out_iput:
- up_read(&nfsi->rwsem);
+ rcu_read_unlock();
iput(inode);
out_putclient:
nfs_put_client(clp);
@@ -62,16 +62,6 @@ out:
return res->status;
}
-static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
-{
-#if defined(CONFIG_NFS_V4_1)
- if (clp->cl_minorversion > 0)
- return nfs41_validate_delegation_stateid;
-#endif
- return nfs4_validate_delegation_stateid;
-}
-
-
__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
{
struct nfs_client *clp;
@@ -92,8 +82,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
inode = nfs_delegation_find_inode(clp, &args->fh);
if (inode != NULL) {
/* Set up a helper thread to actually return the delegation */
- switch (nfs_async_inode_return_delegation(inode, &args->stateid,
- nfs_validate_delegation_stateid(clp))) {
+ switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
case 0:
res = 0;
break;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f..4e7df2adb21 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -150,6 +150,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
clp->cl_boot_time = CURRENT_TIME;
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_minorversion = cl_init->minorversion;
+ clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
#endif
cred = rpc_lookup_machine_cred();
if (!IS_ERR(cred))
@@ -178,7 +179,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
clp->cl_session = NULL;
}
- clp->cl_call_sync = _nfs4_call_sync;
+ clp->cl_mvops = nfs_v4_minor_ops[0];
#endif /* CONFIG_NFS_V4_1 */
}
@@ -188,7 +189,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
static void nfs4_destroy_callback(struct nfs_client *clp)
{
if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
- nfs_callback_down(clp->cl_minorversion);
+ nfs_callback_down(clp->cl_mvops->minor_version);
}
static void nfs4_shutdown_client(struct nfs_client *clp)
@@ -1126,7 +1127,7 @@ static int nfs4_init_callback(struct nfs_client *clp)
return error;
}
- error = nfs_callback_up(clp->cl_minorversion,
+ error = nfs_callback_up(clp->cl_mvops->minor_version,
clp->cl_rpcclient->cl_xprt);
if (error < 0) {
dprintk("%s: failed to start callback. Error = %d\n",
@@ -1143,10 +1144,8 @@ static int nfs4_init_callback(struct nfs_client *clp)
*/
static int nfs4_init_client_minor_version(struct nfs_client *clp)
{
- clp->cl_call_sync = _nfs4_call_sync;
-
#if defined(CONFIG_NFS_V4_1)
- if (clp->cl_minorversion) {
+ if (clp->cl_mvops->minor_version) {
struct nfs4_session *session = NULL;
/*
* Create the session and mark it expired.
@@ -1158,7 +1157,13 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
return -ENOMEM;
clp->cl_session = session;
- clp->cl_call_sync = _nfs4_call_sync_session;
+ /*
+ * The create session reply races with the server back
+ * channel probe. Mark the client NFS_CS_SESSION_INITING
+ * so that the client back channel can find the
+ * nfs_client struct
+ */
+ clp->cl_cons_state = NFS_CS_SESSION_INITING;
}
#endif /* CONFIG_NFS_V4_1 */
@@ -1286,6 +1291,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
#endif /* CONFIG_NFS_V4_1 */
}
+static int nfs4_server_common_setup(struct nfs_server *server,
+ struct nfs_fh *mntfh)
+{
+ struct nfs_fattr *fattr;
+ int error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL)
+ return -ENOMEM;
+
+ /* We must ensure the session is initialised first */
+ error = nfs4_init_session(server);
+ if (error < 0)
+ goto out;
+
+ /* Probe the root fh to retrieve its FSID and filehandle */
+ error = nfs4_get_rootfh(server, mntfh);
+ if (error < 0)
+ goto out;
+
+ dprintk("Server FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+ dprintk("Mount FH: %d\n", mntfh->size);
+
+ nfs4_session_set_rwsize(server);
+
+ error = nfs_probe_fsinfo(server, mntfh, fattr);
+ if (error < 0)
+ goto out;
+
+ if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+ server->namelen = NFS4_MAXNAMLEN;
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+ server->mount_time = jiffies;
+out:
+ nfs_free_fattr(fattr);
+ return error;
+}
+
/*
* Create a version 4 volume record
*/
@@ -1346,7 +1400,6 @@ error:
struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
struct nfs_fh *mntfh)
{
- struct nfs_fattr *fattr;
struct nfs_server *server;
int error;
@@ -1356,55 +1409,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
if (!server)
return ERR_PTR(-ENOMEM);
- error = -ENOMEM;
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- goto error;
-
/* set up the general RPC client */
error = nfs4_init_server(server, data);
if (error < 0)
goto error;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- error = nfs4_init_session(server);
- if (error < 0)
- goto error;
-
- /* Probe the root fh to retrieve its FSID */
- error = nfs4_get_rootfh(server, mntfh);
- if (error < 0)
- goto error;
-
- dprintk("Server FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
- dprintk("Mount FH: %d\n", mntfh->size);
-
- nfs4_session_set_rwsize(server);
-
- error = nfs_probe_fsinfo(server, mntfh, fattr);
+ error = nfs4_server_common_setup(server, mntfh);
if (error < 0)
goto error;
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
- server->mount_time = jiffies;
dprintk("<-- nfs4_create_server() = %p\n", server);
- nfs_free_fattr(fattr);
return server;
error:
- nfs_free_fattr(fattr);
nfs_free_server(server);
dprintk("<-- nfs4_create_server() = error %d\n", error);
return ERR_PTR(error);
@@ -1418,7 +1435,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
{
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
- struct nfs_fattr *fattr;
int error;
dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1443,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (!server)
return ERR_PTR(-ENOMEM);
- error = -ENOMEM;
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- goto error;
-
parent_server = NFS_SB(data->sb);
parent_client = parent_server->nfs_client;
@@ -1448,7 +1459,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
data->authflavor,
parent_server->client->cl_xprt->prot,
parent_server->client->cl_timeout,
- parent_client->cl_minorversion);
+ parent_client->cl_mvops->minor_version);
if (error < 0)
goto error;
@@ -1456,40 +1467,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- /* Probe the root fh to retrieve its FSID and filehandle */
- error = nfs4_get_rootfh(server, mntfh);
- if (error < 0)
- goto error;
-
- /* probe the filesystem info for this server filesystem */
- error = nfs_probe_fsinfo(server, mntfh, fattr);
+ error = nfs4_server_common_setup(server, mntfh);
if (error < 0)
goto error;
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- dprintk("Referral FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
-
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
- server->mount_time = jiffies;
-
- nfs_free_fattr(fattr);
dprintk("<-- nfs_create_referral_server() = %p\n", server);
return server;
error:
- nfs_free_fattr(fattr);
nfs_free_server(server);
dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
return ERR_PTR(error);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 30163454397..b9c3c43cea1 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -268,14 +268,6 @@ out:
return status;
}
-/* Sync all data to disk upon delegation return */
-static void nfs_msync_inode(struct inode *inode)
-{
- filemap_fdatawrite(inode->i_mapping);
- nfs_wb_all(inode);
- filemap_fdatawait(inode->i_mapping);
-}
-
/*
* Basic procedure for returning a delegation to the server
*/
@@ -367,7 +359,7 @@ int nfs_inode_return_delegation(struct inode *inode)
delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
spin_unlock(&clp->cl_lock);
if (delegation != NULL) {
- nfs_msync_inode(inode);
+ nfs_wb_all(inode);
err = __nfs_inode_return_delegation(inode, delegation, 1);
}
}
@@ -471,9 +463,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
/*
* Asynchronous delegation recall!
*/
-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
- int (*validate_stateid)(struct nfs_delegation *delegation,
- const nfs4_stateid *stateid))
+int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_delegation *delegation;
@@ -481,7 +471,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (!validate_stateid(delegation, stateid)) {
+ if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
rcu_read_unlock();
return -ENOENT;
}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 69e7b814012..2026304bda1 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -34,9 +34,7 @@ enum {
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
int nfs_inode_return_delegation(struct inode *inode);
-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
- int (*validate_stateid)(struct nfs_delegation *delegation,
- const nfs4_stateid *stateid));
+int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_return_delegation_noreclaim(struct inode *inode);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91..29539ceeb74 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1652,16 +1652,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
- /*
- * ... prune child dentries and writebacks if needed.
- */
- if (atomic_read(&old_dentry->d_count) > 1) {
- if (S_ISREG(old_inode->i_mode))
- nfs_wb_all(old_inode);
- shrink_dcache_parent(old_dentry);
- }
nfs_inode_return_delegation(old_inode);
-
if (new_inode != NULL)
nfs_inode_return_delegation(new_inode);
@@ -1710,7 +1701,7 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(head);
struct nfs_inode *nfsi;
@@ -1953,7 +1944,7 @@ int nfs_permission(struct inode *inode, int mask)
if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
goto out;
/* Is this sys_access() ? */
- if (mask & MAY_ACCESS)
+ if (mask & (MAY_ACCESS | MAY_CHDIR))
goto force_lookup;
switch (inode->i_mode & S_IFMT) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ad4cd31d605..064a8096167 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -69,6 +69,7 @@ struct nfs_direct_req {
/* I/O parameters */
struct nfs_open_context *ctx; /* file open context info */
+ struct nfs_lock_context *l_ctx; /* Lock context info */
struct kiocb * iocb; /* controlling i/o request */
struct inode * inode; /* target file of i/o */
@@ -160,6 +161,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
INIT_LIST_HEAD(&dreq->rewrite_list);
dreq->iocb = NULL;
dreq->ctx = NULL;
+ dreq->l_ctx = NULL;
spin_lock_init(&dreq->lock);
atomic_set(&dreq->io_count, 0);
dreq->count = 0;
@@ -173,6 +175,8 @@ static void nfs_direct_req_free(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+ if (dreq->l_ctx != NULL)
+ nfs_put_lock_context(dreq->l_ctx);
if (dreq->ctx != NULL)
put_nfs_open_context(dreq->ctx);
kmem_cache_free(nfs_direct_cachep, dreq);
@@ -336,6 +340,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
+ data->args.lock_context = dreq->l_ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = data->pagevec;
@@ -416,24 +421,28 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
- ssize_t result = 0;
+ ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
dreq = nfs_direct_req_alloc();
- if (!dreq)
- return -ENOMEM;
+ if (dreq == NULL)
+ goto out;
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+ dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
+ if (dreq->l_ctx == NULL)
+ goto out_release;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
+out_release:
nfs_direct_req_release(dreq);
-
+out:
return result;
}
@@ -574,6 +583,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
data->args.offset = 0;
data->args.count = 0;
data->args.context = dreq->ctx;
+ data->args.lock_context = dreq->l_ctx;
data->res.count = 0;
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
@@ -761,6 +771,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
+ data->args.lock_context = dreq->l_ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = data->pagevec;
@@ -845,7 +856,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos,
size_t count)
{
- ssize_t result = 0;
+ ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
size_t wsize = NFS_SERVER(inode)->wsize;
@@ -853,7 +864,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
dreq = nfs_direct_req_alloc();
if (!dreq)
- return -ENOMEM;
+ goto out;
nfs_alloc_commit_data(dreq);
if (dreq->commit_data == NULL || count < wsize)
@@ -861,14 +872,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+ dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
+ if (dreq->l_ctx != NULL)
+ goto out_release;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
if (!result)
result = nfs_direct_wait(dreq);
+out_release:
nfs_direct_req_release(dreq);
-
+out:
return result;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 36a5e74f51b..2d141a74ae8 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -27,6 +27,7 @@
#include <linux/pagemap.h>
#include <linux/aio.h>
#include <linux/gfp.h>
+#include <linux/swap.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -202,37 +203,11 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
}
/*
- * Helper for nfs_file_flush() and nfs_file_fsync()
- *
- * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
- * disk, but it retrieves and clears ctx->error after synching, despite
- * the two being set at the same time in nfs_context_set_write_error().
- * This is because the former is used to notify the _next_ call to
- * nfs_file_write() that a write error occured, and hence cause it to
- * fall back to doing a synchronous write.
- */
-static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
-{
- int have_error, status;
- int ret = 0;
-
- have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
- status = nfs_wb_all(inode);
- have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
- if (have_error)
- ret = xchg(&ctx->error, 0);
- if (!ret)
- ret = status;
- return ret;
-}
-
-/*
* Flush all dirty pages, and check for write errors.
*/
static int
nfs_file_flush(struct file *file, fl_owner_t id)
{
- struct nfs_open_context *ctx = nfs_file_open_context(file);
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
@@ -245,7 +220,7 @@ nfs_file_flush(struct file *file, fl_owner_t id)
return 0;
/* Flush writes to the server and return any errors */
- return nfs_do_fsync(ctx, inode);
+ return vfs_fsync(file, 0);
}
static ssize_t
@@ -320,6 +295,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
* Flush any dirty pages for this process, and check for write errors.
* The return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
+ *
+ * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
+ * disk, but it retrieves and clears ctx->error after synching, despite
+ * the two being set at the same time in nfs_context_set_write_error().
+ * This is because the former is used to notify the _next_ call to
+ * nfs_file_write() that a write error occured, and hence cause it to
+ * fall back to doing a synchronous write.
*/
static int
nfs_file_fsync(struct file *file, int datasync)
@@ -327,13 +309,23 @@ nfs_file_fsync(struct file *file, int datasync)
struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = dentry->d_inode;
+ int have_error, status;
+ int ret = 0;
+
dprintk("NFS: fsync file(%s/%s) datasync %d\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- return nfs_do_fsync(ctx, inode);
+ have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+ status = nfs_commit_inode(inode, FLUSH_SYNC);
+ have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+ if (have_error)
+ ret = xchg(&ctx->error, 0);
+ if (!ret)
+ ret = status;
+ return ret;
}
/*
@@ -493,11 +485,19 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
*/
static int nfs_release_page(struct page *page, gfp_t gfp)
{
+ struct address_space *mapping = page->mapping;
+
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
/* Only do I/O if gfp is a superset of GFP_KERNEL */
- if ((gfp & GFP_KERNEL) == GFP_KERNEL)
- nfs_wb_page(page->mapping->host, page);
+ if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) {
+ int how = FLUSH_SYNC;
+
+ /* Don't let kswapd deadlock waiting for OOM RPC calls */
+ if (current_is_kswapd())
+ how = 0;
+ nfs_commit_inode(mapping->host, how);
+ }
/* If PagePrivate() is set, then the page is not freeable */
if (PagePrivate(page))
return 0;
@@ -639,7 +639,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
/* Return error values for O_DSYNC and IS_SYNC() */
if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
- int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
+ int err = vfs_fsync(iocb->ki_filp, 0);
if (err < 0)
result = err;
}
@@ -675,7 +675,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
written = ret;
if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
- int err = nfs_do_fsync(nfs_file_open_context(filp), inode);
+ int err = vfs_fsync(filp, 0);
if (err < 0)
ret = err;
}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273..a70e446e160 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
goto out;
}
- if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
+ if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
|| !S_ISDIR(fsinfo.fattr->mode)) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot encountered non-directory\n");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 099b3518fee..581d8f081e6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -413,10 +413,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
/* Write all dirty data */
- if (S_ISREG(inode->i_mode)) {
- filemap_write_and_wait(inode->i_mapping);
+ if (S_ISREG(inode->i_mode))
nfs_wb_all(inode);
- }
fattr = nfs_alloc_fattr();
if (fattr == NULL)
@@ -530,6 +528,68 @@ out:
return err;
}
+static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
+{
+ atomic_set(&l_ctx->count, 1);
+ l_ctx->lockowner = current->files;
+ l_ctx->pid = current->tgid;
+ INIT_LIST_HEAD(&l_ctx->list);
+}
+
+static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
+{
+ struct nfs_lock_context *pos;
+
+ list_for_each_entry(pos, &ctx->lock_context.list, list) {
+ if (pos->lockowner != current->files)
+ continue;
+ if (pos->pid != current->tgid)
+ continue;
+ atomic_inc(&pos->count);
+ return pos;
+ }
+ return NULL;
+}
+
+struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
+{
+ struct nfs_lock_context *res, *new = NULL;
+ struct inode *inode = ctx->path.dentry->d_inode;
+
+ spin_lock(&inode->i_lock);
+ res = __nfs_find_lock_context(ctx);
+ if (res == NULL) {
+ spin_unlock(&inode->i_lock);
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (new == NULL)