aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2010-08-26 13:22:27 -0400
committerJ. Bruce Fields <bfields@redhat.com>2010-08-26 13:22:27 -0400
commitf632265d0ffb5acf331252d98c64939849d96bb2 (patch)
tree31187d9a726bf1ca6ca12e26ad8e7c609eaf4d8b /fs
parent7d94784293096c0a46897acdb83be5abd9278ece (diff)
parentda5cabf80e2433131bf0ed8993abc0f7ea618c73 (diff)
downloadmrst-s0i3-test-f632265d0ffb5acf331252d98c64939849d96bb2.tar.gz
mrst-s0i3-test-f632265d0ffb5acf331252d98c64939849d96bb2.tar.xz
mrst-s0i3-test-f632265d0ffb5acf331252d98c64939849d96bb2.zip
Merge commit 'v2.6.36-rc1' into HEAD
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Makefile4
-rw-r--r--fs/9p/fid.c111
-rw-r--r--fs/9p/v9fs.c3
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h3
-rw-r--r--fs/9p/vfs_dir.c136
-rw-r--r--fs/9p/vfs_file.c26
-rw-r--r--fs/9p/vfs_inode.c785
-rw-r--r--fs/9p/vfs_super.c54
-rw-r--r--fs/9p/xattr.c160
-rw-r--r--fs/9p/xattr.h27
-rw-r--r--fs/9p/xattr_user.c80
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/adfs/inode.c16
-rw-r--r--fs/affs/affs.h3
-rw-r--r--fs/affs/file.c11
-rw-r--r--fs/affs/inode.c38
-rw-r--r--fs/affs/super.c32
-rw-r--r--fs/afs/Kconfig1
-rw-r--r--fs/afs/cell.c96
-rw-r--r--fs/afs/dir.c47
-rw-r--r--fs/afs/inode.c91
-rw-r--r--fs/afs/internal.h25
-rw-r--r--fs/afs/main.c9
-rw-r--r--fs/afs/mntpt.c78
-rw-r--r--fs/afs/proc.c2
-rw-r--r--fs/afs/rxrpc.c1
-rw-r--r--fs/afs/server.c5
-rw-r--r--fs/afs/super.c22
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c21
-rw-r--r--fs/attr.c88
-rw-r--r--fs/autofs/root.c67
-rw-r--r--fs/autofs4/root.c52
-rw-r--r--fs/bad_inode.c7
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/file.c17
-rw-r--r--fs/bfs/inode.c116
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_flat.c27
-rw-r--r--fs/binfmt_misc.c5
-rw-r--r--fs/bio.c5
-rw-r--r--fs/block_dev.c113
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c19
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/file.c12
-rw-r--r--fs/btrfs/inode.c44
-rw-r--r--fs/btrfs/ioctl.c24
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/root-tree.c3
-rw-r--r--fs/btrfs/super.c8
-rw-r--r--fs/btrfs/volumes.c18
-rw-r--r--fs/buffer.c180
-rw-r--r--fs/cachefiles/bind.c2
-rw-r--r--fs/cachefiles/daemon.c38
-rw-r--r--fs/cachefiles/internal.h13
-rw-r--r--fs/cachefiles/namei.c13
-rw-r--r--fs/cachefiles/rdwr.c4
-rw-r--r--fs/ceph/Kconfig2
-rw-r--r--fs/ceph/Makefile2
-rw-r--r--fs/ceph/addr.c16
-rw-r--r--fs/ceph/armor.c6
-rw-r--r--fs/ceph/auth.c6
-rw-r--r--fs/ceph/auth_x.c11
-rw-r--r--fs/ceph/buffer.c16
-rw-r--r--fs/ceph/caps.c426
-rw-r--r--fs/ceph/ceph_frag.h4
-rw-r--r--fs/ceph/ceph_fs.c50
-rw-r--r--fs/ceph/ceph_fs.h87
-rw-r--r--fs/ceph/ceph_hash.h4
-rw-r--r--fs/ceph/ceph_strings.c3
-rw-r--r--fs/ceph/crush/crush.h4
-rw-r--r--fs/ceph/crush/hash.h4
-rw-r--r--fs/ceph/crush/mapper.c41
-rw-r--r--fs/ceph/crush/mapper.h4
-rw-r--r--fs/ceph/crypto.c27
-rw-r--r--fs/ceph/crypto.h4
-rw-r--r--fs/ceph/debugfs.c23
-rw-r--r--fs/ceph/decode.h6
-rw-r--r--fs/ceph/dir.c23
-rw-r--r--fs/ceph/file.c34
-rw-r--r--fs/ceph/inode.c32
-rw-r--r--fs/ceph/ioctl.c24
-rw-r--r--fs/ceph/ioctl.h2
-rw-r--r--fs/ceph/locks.c256
-rw-r--r--fs/ceph/mds_client.c306
-rw-r--r--fs/ceph/mds_client.h33
-rw-r--r--fs/ceph/mdsmap.c6
-rw-r--r--fs/ceph/mdsmap.h8
-rw-r--r--fs/ceph/messenger.c96
-rw-r--r--fs/ceph/mon_client.c181
-rw-r--r--fs/ceph/mon_client.h5
-rw-r--r--fs/ceph/msgr.h4
-rw-r--r--fs/ceph/osd_client.c18
-rw-r--r--fs/ceph/osdmap.c63
-rw-r--r--fs/ceph/rados.h13
-rw-r--r--fs/ceph/super.c92
-rw-r--r--fs/ceph/super.h40
-rw-r--r--fs/ceph/xattr.c2
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/Kconfig27
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README15
-rw-r--r--fs/cifs/cache.c331
-rw-r--r--fs/cifs/cifs_debug.c25
-rw-r--r--fs/cifs/cifs_dfs_ref.c33
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifs_spnego.c7
-rw-r--r--fs/cifs/cifsfs.c50
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h48
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/connect.c181
-rw-r--r--fs/cifs/dir.c84
-rw-r--r--fs/cifs/dns_resolve.c166
-rw-r--r--fs/cifs/dns_resolve.h2
-rw-r--r--fs/cifs/file.c203
-rw-r--r--fs/cifs/fscache.c236
-rw-r--r--fs/cifs/fscache.h136
-rw-r--r--fs/cifs/inode.c153
-rw-r--r--fs/cifs/ioctl.c3
-rw-r--r--fs/cifs/misc.c20
-rw-r--r--fs/cifs/netmisc.c63
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/cifs/sess.c10
-rw-r--r--fs/cifs/smberr.h1
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/coda/psdev.c12
-rw-r--r--fs/coda/upcall.c12
-rw-r--r--fs/compat.c52
-rw-r--r--fs/compat_ioctl.c51
-rw-r--r--fs/configfs/inode.c9
-rw-r--r--fs/cramfs/inode.c88
-rw-r--r--fs/dcache.c227
-rw-r--r--fs/direct-io.c100
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/dlm/netlink.c15
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/file.c60
-rw-r--r--fs/ecryptfs/inode.c112
-rw-r--r--fs/ecryptfs/messaging.c19
-rw-r--r--fs/ecryptfs/super.c14
-rw-r--r--fs/exec.c12
-rw-r--r--fs/exofs/exofs.h3
-rw-r--r--fs/exofs/file.c30
-rw-r--r--fs/exofs/inode.c136
-rw-r--r--fs/exofs/ios.c46
-rw-r--r--fs/exofs/super.c3
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext2/balloc.c11
-rw-r--r--fs/ext2/dir.c23
-rw-r--r--fs/ext2/ext2.h5
-rw-r--r--fs/ext2/ialloc.c13
-rw-r--r--fs/ext2/inode.c89
-rw-r--r--fs/ext2/super.c14
-rw-r--r--fs/ext2/xattr.c25
-rw-r--r--fs/ext3/Kconfig1
-rw-r--r--fs/ext3/acl.c1
-rw-r--r--fs/ext3/ialloc.c12
-rw-r--r--fs/ext3/inode.c143
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c31
-rw-r--r--fs/ext3/xattr.c12
-rw-r--r--fs/ext4/acl.c1
-rw-r--r--fs/ext4/balloc.c6
-rw-r--r--fs/ext4/block_validity.c8
-rw-r--r--fs/ext4/dir.c23
-rw-r--r--fs/ext4/ext4.h155
-rw-r--r--fs/ext4/ext4_jbd2.c71
-rw-r--r--fs/ext4/ext4_jbd2.h56
-rw-r--r--fs/ext4/extents.c20
-rw-r--r--fs/ext4/file.c5
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c298
-rw-r--r--fs/ext4/mballoc.c155
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c13
-rw-r--r--fs/ext4/namei.c40
-rw-r--r--fs/ext4/resize.c8
-rw-r--r--fs/ext4/super.c346
-rw-r--r--fs/ext4/xattr.c15
-rw-r--r--fs/fat/fat.h1
-rw-r--r--fs/fat/file.c49
-rw-r--r--fs/fat/inode.c26
-rw-r--r--fs/fcntl.c28
-rw-r--r--fs/file.c60
-rw-r--r--fs/file_table.c19
-rw-r--r--fs/freevxfs/vxfs_extern.h2
-rw-r--r--fs/freevxfs/vxfs_inode.c8
-rw-r--r--fs/freevxfs/vxfs_super.c4
-rw-r--r--fs/fs-writeback.c744
-rw-r--r--fs/fs_struct.c7
-rw-r--r--fs/fscache/Kconfig1
-rw-r--r--fs/fscache/internal.h22
-rw-r--r--fs/fscache/main.c106
-rw-r--r--fs/fscache/object-list.c11
-rw-r--r--fs/fscache/object.c106
-rw-r--r--fs/fscache/operation.c67
-rw-r--r--fs/fscache/page.c72
-rw-r--r--fs/fuse/dev.c229
-rw-r--r--fs/fuse/dir.c19
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c6
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/aops.c19
-rw-r--r--fs/gfs2/bmap.c18
-rw-r--r--fs/gfs2/bmap.h2
-rw-r--r--fs/gfs2/dir.c44
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c97
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c39
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/main.c14
-rw-r--r--fs/gfs2/meta_io.c8
-rw-r--r--fs/gfs2/ops_fstype.c37
-rw-r--r--fs/gfs2/ops_inode.c18
-rw-r--r--fs/gfs2/quota.c35
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/recovery.c54
-rw-r--r--fs/gfs2/recovery.h6
-rw-r--r--fs/gfs2/super.c52
-rw-r--r--fs/gfs2/sys.c60
-rw-r--r--fs/gfs2/xattr.c24
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c70
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/inode.c77
-rw-r--r--fs/hfsplus/super.c10
-rw-r--r--fs/hostfs/hostfs.h22
-rw-r--r--fs/hostfs/hostfs_kern.c517
-rw-r--r--fs/hostfs/hostfs_user.c112
-rw-r--r--fs/hpfs/file.c11
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/inode.c24
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hppfs/hppfs.c8
-rw-r--r--fs/hugetlbfs/inode.c41
-rw-r--r--fs/inode.c187
-rw-r--r--fs/ioctl.c18
-rw-r--r--fs/isofs/inode.c7
-rw-r--r--fs/jbd/journal.c7
-rw-r--r--fs/jbd/recovery.c11
-rw-r--r--fs/jbd2/checkpoint.c18
-rw-r--r--fs/jbd2/commit.c50
-rw-r--r--fs/jbd2/journal.c136
-rw-r--r--fs/jbd2/recovery.c10
-rw-r--r--fs/jbd2/transaction.c260
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/background.c1
-rw-r--r--fs/jffs2/build.c1
-rw-r--r--fs/jffs2/compr.c5
-rw-r--r--fs/jffs2/compr.h1
-rw-r--r--fs/jffs2/compr_lzo.c1
-rw-r--r--fs/jffs2/compr_rtime.c1
-rw-r--r--fs/jffs2/compr_rubin.c1
-rw-r--r--fs/jffs2/compr_zlib.c1
-rw-r--r--fs/jffs2/debug.c1
-rw-r--r--fs/jffs2/debug.h1
-rw-r--r--fs/jffs2/dir.c124
-rw-r--r--fs/jffs2/erase.c1
-rw-r--r--fs/jffs2/file.c1
-rw-r--r--fs/jffs2/fs.c18
-rw-r--r--fs/jffs2/gc.c1
-rw-r--r--fs/jffs2/ioctl.c1
-rw-r--r--fs/jffs2/jffs2_fs_i.h1
-rw-r--r--fs/jffs2/jffs2_fs_sb.h1
-rw-r--r--fs/jffs2/nodelist.h1
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jffs2/xattr.c4
-rw-r--r--fs/jfs/file.c14
-rw-r--r--fs/jfs/inode.c63
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/super.c8
-rw-r--r--fs/jfs/xattr.c87
-rw-r--r--fs/libfs.c73
-rw-r--r--fs/logfs/dir.c7
-rw-r--r--fs/logfs/file.c24
-rw-r--r--fs/logfs/inode.c51
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/logfs.h7
-rw-r--r--fs/logfs/readwrite.c62
-rw-r--r--fs/logfs/segment.c1
-rw-r--r--fs/logfs/super.c23
-rw-r--r--fs/mbcache.c173
-rw-r--r--fs/minix/bitmap.c6
-rw-r--r--fs/minix/dir.c25
-rw-r--r--fs/minix/file.c22
-rw-r--r--fs/minix/inode.c35
-rw-r--r--fs/minix/minix.h4
-rw-r--r--fs/namei.c23
-rw-r--r--fs/namespace.c13
-rw-r--r--fs/ncpfs/inode.c40
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/nfs/Kconfig27
-rw-r--r--fs/nfs/callback.c11
-rw-r--r--fs/nfs/callback_proc.c19
-rw-r--r--fs/nfs/client.c143
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/delegation.h4
-rw-r--r--fs/nfs/dir.c13
-rw-r--r--fs/nfs/direct.c29
-rw-r--r--fs/nfs/dns_resolve.c24
-rw-r--r--fs/nfs/dns_resolve.h12
-rw-r--r--fs/nfs/file.c64
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c87
-rw-r--r--fs/nfs/internal.h14
-rw-r--r--fs/nfs/nfs2xdr.c7
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4_fs.h57
-rw-r--r--fs/nfs/nfs4proc.c474
-rw-r--r--fs/nfs/nfs4renewd.c4
-rw-r--r--fs/nfs/nfs4state.c82
-rw-r--r--fs/nfs/nfs4xdr.c111
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfs/super.c30
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c39
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfs4xdr.c6
-rw-r--r--fs/nfsd/vfs.c17
-rw-r--r--fs/nilfs2/bmap.c6
-rw-r--r--fs/nilfs2/bmap.h16
-rw-r--r--fs/nilfs2/bmap_union.h42
-rw-r--r--fs/nilfs2/btnode.c23
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c914
-rw-r--r--fs/nilfs2/btree.h14
-rw-r--r--fs/nilfs2/dir.c58
-rw-r--r--fs/nilfs2/direct.c96
-rw-r--r--fs/nilfs2/direct.h11
-rw-r--r--fs/nilfs2/gcdat.c2
-rw-r--r--fs/nilfs2/gcinode.c17
-rw-r--r--fs/nilfs2/inode.c78
-rw-r--r--fs/nilfs2/mdt.c1
-rw-r--r--fs/nilfs2/nilfs.h24
-rw-r--r--fs/nilfs2/page.c5
-rw-r--r--fs/nilfs2/page.h2
-rw-r--r--fs/nilfs2/recovery.c359
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/nilfs2/segbuf.h26
-rw-r--r--fs/nilfs2/segment.c19
-rw-r--r--fs/nilfs2/segment.h12
-rw-r--r--fs/nilfs2/super.c361
-rw-r--r--fs/nilfs2/the_nilfs.c161
-rw-r--r--fs/nilfs2/the_nilfs.h23
-rw-r--r--fs/notify/Kconfig1
-rw-r--r--fs/notify/Makefile4
-rw-r--r--fs/notify/dnotify/dnotify.c213
-rw-r--r--fs/notify/fanotify/Kconfig26
-rw-r--r--fs/notify/fanotify/Makefile1
-rw-r--r--fs/notify/fanotify/fanotify.c212
-rw-r--r--fs/notify/fanotify/fanotify_user.c760
-rw-r--r--fs/notify/fsnotify.c201
-rw-r--r--fs/notify/fsnotify.h27
-rw-r--r--fs/notify/group.c182
-rw-r--r--fs/notify/inode_mark.c337
-rw-r--r--fs/notify/inotify/Kconfig15
-rw-r--r--fs/notify/inotify/Makefile1
-rw-r--r--fs/notify/inotify/inotify.c873
-rw-r--r--fs/notify/inotify/inotify.h7
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c151
-rw-r--r--fs/notify/inotify/inotify_user.c369
-rw-r--r--fs/notify/mark.c371
-rw-r--r--fs/notify/notification.c209
-rw-r--r--fs/notify/vfsmount_mark.c187
-rw-r--r--fs/ntfs/inode.c10
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/acl.c33
-rw-r--r--fs/ocfs2/aops.c110
-rw-r--r--fs/ocfs2/cluster/tcp.c17
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c6
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c31
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c24
-rw-r--r--fs/ocfs2/dlm/dlmthread.c114
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c18
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/file.c331
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/inode.c29
-rw-r--r--fs/ocfs2/inode.h5
-rw-r--r--fs/ocfs2/journal.c34
-rw-r--r--fs/ocfs2/localalloc.c7
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c32
-rw-r--r--fs/ocfs2/reservations.c1
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/ocfs2/xattr.c200
-rw-r--r--fs/omfs/dir.c22
-rw-r--r--fs/omfs/file.c46
-rw-r--r--fs/omfs/inode.c53
-rw-r--r--fs/omfs/omfs.h1
-rw-r--r--fs/omfs/omfs_fs.h1
-rw-r--r--fs/open.c18
-rw-r--r--fs/partitions/acorn.c35
-rw-r--r--fs/partitions/amiga.c20
-rw-r--r--fs/partitions/atari.c12
-rw-r--r--fs/partitions/check.c23
-rw-r--r--fs/partitions/check.h6
-rw-r--r--fs/partitions/efi.c2
-rw-r--r--fs/partitions/ibm.c31
-rw-r--r--fs/partitions/karma.c2
-rw-r--r--fs/partitions/ldm.c4
-rw-r--r--fs/partitions/mac.c4
-rw-r--r--fs/partitions/msdos.c67
-rw-r--r--fs/partitions/osf.c2
-rw-r--r--fs/partitions/sgi.c2
-rw-r--r--fs/partitions/sun.c2
-rw-r--r--fs/partitions/sysv68.c9
-rw-r--r--fs/partitions/ultrix.c2
-rw-r--r--fs/pipe.c93
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c144
-rw-r--r--fs/proc/generic.c18
-rw-r--r--fs/proc/inode.c23
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/proc_sysctl.c15
-rw-r--r--fs/proc/task_mmu.c8
-rw-r--r--fs/proc/task_nommu.c20
-rw-r--r--fs/qnx4/inode.c11
-rw-r--r--fs/quota/dquot.c83
-rw-r--r--fs/quota/quota_tree.c85
-rw-r--r--fs/quota/quota_tree.h6
-rw-r--r--fs/quota/quota_v1.c3
-rw-r--r--fs/quota/quota_v2.c11
-rw-r--r--fs/ramfs/file-nommu.c7
-rw-r--r--fs/read_write.c8
-rw-r--r--fs/readdir.c8
-rw-r--r--fs/reiserfs/file.c50
-rw-r--r--fs/reiserfs/inode.c136
-rw-r--r--fs/reiserfs/journal.c1
-rw-r--r--fs/reiserfs/super.c10
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/smbfs/inode.c12
-rw-r--r--fs/splice.c25
-rw-r--r--fs/squashfs/Kconfig25
-rw-r--r--fs/squashfs/Makefile4
-rw-r--r--fs/squashfs/decompressor.c6
-rw-r--r--fs/squashfs/lzo_wrapper.c136
-rw-r--r--fs/squashfs/squashfs.h3
-rw-r--r--fs/squashfs/squashfs_fs.h20
-rw-r--r--fs/squashfs/xattr.c4
-rw-r--r--fs/squashfs/xattr.h2
-rw-r--r--fs/stat.c29
-rw-r--r--fs/statfs.c95
-rw-r--r--fs/super.c45
-rw-r--r--fs/sync.c27
-rw-r--r--fs/sysfs/file.c3
-rw-r--r--fs/sysfs/inode.c14
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/symlink.c26
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/dir.c21
-rw-r--r--fs/sysv/file.c22
-rw-r--r--fs/sysv/ialloc.c7
-rw-r--r--fs/sysv/inode.c19
-rw-r--r--fs/sysv/itree.c19
-rw-r--r--fs/sysv/super.c74
-rw-r--r--fs/sysv/sysv.h4
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/file.c23
-rw-r--r--fs/ubifs/lpt.c14
-rw-r--r--fs/ubifs/lpt_commit.c2
-rw-r--r--fs/ubifs/recovery.c23
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/super.c16
-rw-r--r--fs/ubifs/ubifs.h4
-rw-r--r--fs/udf/file.c23
-rw-r--r--fs/udf/ialloc.c2
-rw-r--r--fs/udf/inode.c61
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/ufs/dir.c13
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c63
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/ufs/truncate.c16
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/ufs/util.h4
-rw-r--r--fs/utimes.c7
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c638
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c67
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h119
-rw-r--r--fs/xfs/linux-2.6/xfs_dmapi_priv.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c104
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h25
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c34
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c46
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c179
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c188
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h473
-rw-r--r--fs/xfs/quota/xfs_dquot.c114
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c301
-rw-r--r--fs/xfs/quota/xfs_qm.c44
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c10
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c10
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c144
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c35
-rw-r--r--fs/xfs/support/debug.c1
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_alloc.c15
-rw-r--r--fs/xfs/xfs_alloc.h20
-rw-r--r--fs/xfs/xfs_alloc_btree.c5
-rw-r--r--fs/xfs/xfs_attr.c91
-rw-r--r--fs/xfs/xfs_attr_leaf.c5
-rw-r--r--fs/xfs/xfs_bmap.c327
-rw-r--r--fs/xfs/xfs_bmap.h37
-rw-r--r--fs/xfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/xfs_btree.c5
-rw-r--r--fs/xfs/xfs_buf_item.c228
-rw-r--r--fs/xfs/xfs_buf_item.h2
-rw-r--r--fs/xfs/xfs_da_btree.c20
-rw-r--r--fs/xfs/xfs_dfrag.c21
-rw-r--r--fs/xfs/xfs_dir2.c11
-rw-r--r--fs/xfs/xfs_dir2_block.c8
-rw-r--r--fs/xfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/xfs_dir2_leaf.c4
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/xfs_dmapi.h170
-rw-r--r--fs/xfs/xfs_dmops.c55
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c278
-rw-r--r--fs/xfs/xfs_filestream.c84
-rw-r--r--fs/xfs/xfs_filestream.h82
-rw-r--r--fs/xfs/xfs_fsops.c7
-rw-r--r--fs/xfs/xfs_ialloc.c146
-rw-r--r--fs/xfs/xfs_ialloc_btree.c4
-rw-r--r--fs/xfs/xfs_iget.c147
-rw-r--r--fs/xfs/xfs_inode.c207
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_inode_item.c273
-rw-r--r--fs/xfs/xfs_inode_item.h12
-rw-r--r--fs/xfs/xfs_iomap.c76
-rw-r--r--fs/xfs/xfs_iomap.h22
-rw-r--r--fs/xfs/xfs_itable.c293
-rw-r--r--fs/xfs/xfs_itable.h17
-rw-r--r--fs/xfs/xfs_log.c16
-rw-r--r--fs/xfs/xfs_log.h11
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_log_recover.c55
-rw-r--r--fs/xfs/xfs_mount.c73
-rw-r--r--fs/xfs/xfs_mount.h71
-rw-r--r--fs/xfs/xfs_rename.c63
-rw-r--r--fs/xfs/xfs_rtalloc.c17
-rw-r--r--fs/xfs/xfs_rtalloc.h11
-rw-r--r--fs/xfs/xfs_rw.c15
-rw-r--r--fs/xfs/xfs_trans.c657
-rw-r--r--fs/xfs/xfs_trans.h528
-rw-r--r--fs/xfs/xfs_trans_ail.c1
-rw-r--r--fs/xfs/xfs_trans_buf.c75
-rw-r--r--fs/xfs/xfs_trans_extfree.c23
-rw-r--r--fs/xfs/xfs_trans_inode.c76
-rw-r--r--fs/xfs/xfs_trans_item.c441
-rw-r--r--fs/xfs/xfs_trans_priv.h18
-rw-r--r--fs/xfs/xfs_utils.c87
-rw-r--r--fs/xfs/xfs_utils.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c333
587 files changed, 18087 insertions, 14699 deletions
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 1a940ec7af6..91fba025fcb 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_9P_FS) := 9p.o
vfs_dir.o \
vfs_dentry.o \
v9fs.o \
- fid.o
+ fid.o \
+ xattr.o \
+ xattr_user.o
9p-$(CONFIG_9P_FSCACHE) += cache.o
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 7317b39b281..35856368906 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -97,6 +97,34 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
return ret;
}
+/*
+ * We need to hold v9ses->rename_sem as long as we hold references
+ * to returned path array. Array element contain pointers to
+ * dentry names.
+ */
+static int build_path_from_dentry(struct v9fs_session_info *v9ses,
+ struct dentry *dentry, char ***names)
+{
+ int n = 0, i;
+ char **wnames;
+ struct dentry *ds;
+
+ for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
+ n++;
+
+ wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL);
+ if (!wnames)
+ goto err_out;
+
+ for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent)
+ wnames[i] = (char *)ds->d_name.name;
+
+ *names = wnames;
+ return n;
+err_out:
+ return -ENOMEM;
+}
+
/**
* v9fs_fid_lookup - lookup for a fid, try to walk if not found
* @dentry: dentry to look for fid in
@@ -112,7 +140,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
int i, n, l, clone, any, access;
u32 uid;
struct p9_fid *fid, *old_fid = NULL;
- struct dentry *d, *ds;
+ struct dentry *ds;
struct v9fs_session_info *v9ses;
char **wnames, *uname;
@@ -139,49 +167,62 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
fid = v9fs_fid_find(dentry, uid, any);
if (fid)
return fid;
-
+ /*
+ * we don't have a matching fid. To do a TWALK we need
+ * parent fid. We need to prevent rename when we want to
+ * look at the parent.
+ */
+ down_read(&v9ses->rename_sem);
ds = dentry->d_parent;
fid = v9fs_fid_find(ds, uid, any);
- if (!fid) { /* walk from the root */
- n = 0;
- for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
- n++;
+ if (fid) {
+ /* Found the parent fid do a lookup with that */
+ fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1);
+ goto fid_out;
+ }
+ up_read(&v9ses->rename_sem);
- fid = v9fs_fid_find(ds, uid, any);
- if (!fid) { /* the user is not attached to the fs yet */
- if (access == V9FS_ACCESS_SINGLE)
- return ERR_PTR(-EPERM);
+ /* start from the root and try to do a lookup */
+ fid = v9fs_fid_find(dentry->d_sb->s_root, uid, any);
+ if (!fid) {
+ /* the user is not attached to the fs yet */
+ if (access == V9FS_ACCESS_SINGLE)
+ return ERR_PTR(-EPERM);
- if (v9fs_proto_dotu(v9ses))
+ if (v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses))
uname = NULL;
- else
- uname = v9ses->uname;
+ else
+ uname = v9ses->uname;
- fid = p9_client_attach(v9ses->clnt, NULL, uname, uid,
- v9ses->aname);
-
- if (IS_ERR(fid))
- return fid;
-
- v9fs_fid_add(ds, fid);
- }
- } else /* walk from the parent */
- n = 1;
+ fid = p9_client_attach(v9ses->clnt, NULL, uname, uid,
+ v9ses->aname);
+ if (IS_ERR(fid))
+ return fid;
- if (ds == dentry)
+ v9fs_fid_add(dentry->d_sb->s_root, fid);
+ }
+ /* If we are root ourself just return that */
+ if (dentry->d_sb->s_root == dentry)
return fid;
-
- wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL);
- if (!wnames)
- return ERR_PTR(-ENOMEM);
-
- for (d = dentry, i = (n-1); i >= 0; i--, d = d->d_parent)
- wnames[i] = (char *) d->d_name.name;
-
+ /*
+ * Do a multipath walk with attached root.
+ * When walking parent we need to make sure we
+ * don't have a parallel rename happening
+ */
+ down_read(&v9ses->rename_sem);
+ n = build_path_from_dentry(v9ses, dentry, &wnames);
+ if (n < 0) {
+ fid = ERR_PTR(n);
+ goto err_out;
+ }
clone = 1;
i = 0;
while (i < n) {
l = min(n - i, P9_MAXWELEM);
+ /*
+ * We need to hold rename lock when doing a multipath
+ * walk to ensure none of the patch component change
+ */
fid = p9_client_walk(fid, l, &wnames[i], clone);
if (IS_ERR(fid)) {
if (old_fid) {
@@ -193,15 +234,17 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
p9_client_clunk(old_fid);
}
kfree(wnames);
- return fid;
+ goto err_out;
}
old_fid = fid;
i += l;
clone = 0;
}
-
kfree(wnames);
+fid_out:
v9fs_fid_add(dentry, fid);
+err_out:
+ up_read(&v9ses->rename_sem);
return fid;
}
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f8b86e92cd6..38dc0e06759 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -237,6 +237,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
__putname(v9ses->uname);
return ERR_PTR(-ENOMEM);
}
+ init_rwsem(&v9ses->rename_sem);
rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
if (rc) {
@@ -278,7 +279,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
/* for legacy mode, fall back to V9FS_ACCESS_ANY */
- if (!v9fs_proto_dotu(v9ses) &&
+ if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
v9ses->flags &= ~V9FS_ACCESS_MASK;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index bec4d0bcb45..4c963c9fc41 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -104,6 +104,7 @@ struct v9fs_session_info {
struct p9_client *clnt; /* 9p client */
struct list_head slist; /* list of sessions registered with v9fs */
struct backing_dev_info bdi;
+ struct rw_semaphore rename_sem;
};
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 32ef4009d03..88418c419ea 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -52,9 +52,10 @@ void v9fs_destroy_inode(struct inode *inode);
#endif
struct inode *v9fs_get_inode(struct super_block *sb, int mode);
-void v9fs_clear_inode(struct inode *inode);
+void v9fs_evict_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
+void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
int v9fs_dir_release(struct inode *inode, struct file *filp);
int v9fs_file_open(struct inode *inode, struct file *file);
void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index d61e3b28ce3..16c8a2a98c1 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -87,29 +87,19 @@ static void p9stat_init(struct p9_wstat *stbuf)
}
/**
- * v9fs_dir_readdir - read a directory
+ * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir
* @filp: opened file structure
- * @dirent: directory structure ???
- * @filldir: function to populate directory structure ???
+ * @buflen: Length in bytes of buffer to allocate
*
*/
-static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int v9fs_alloc_rdir_buf(struct file *filp, int buflen)
{
- int over;
- struct p9_wstat st;
- int err = 0;
- struct p9_fid *fid;
- int buflen;
- int reclen = 0;
struct p9_rdir *rdir;
+ struct p9_fid *fid;
+ int err = 0;
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
fid = filp->private_data;
-
- buflen = fid->clnt->msize - P9_IOHDRSZ;
-
- /* allocate rdir on demand */
if (!fid->rdir) {
rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
@@ -128,6 +118,36 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
spin_unlock(&filp->f_dentry->d_lock);
kfree(rdir);
}
+exit:
+ return err;
+}
+
+/**
+ * v9fs_dir_readdir - read a directory
+ * @filp: opened file structure
+ * @dirent: directory structure ???
+ * @filldir: function to populate directory structure ???
+ *
+ */
+
+static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ int over;
+ struct p9_wstat st;
+ int err = 0;
+ struct p9_fid *fid;
+ int buflen;
+ int reclen = 0;
+ struct p9_rdir *rdir;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+ fid = filp->private_data;
+
+ buflen = fid->clnt->msize - P9_IOHDRSZ;
+
+ err = v9fs_alloc_rdir_buf(filp, buflen);
+ if (err)
+ goto exit;
rdir = (struct p9_rdir *) fid->rdir;
err = mutex_lock_interruptible(&rdir->mutex);
@@ -146,7 +166,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
while (rdir->head < rdir->tail) {
p9stat_init(&st);
err = p9stat_read(rdir->buf + rdir->head,
- buflen - rdir->head, &st,
+ rdir->tail - rdir->head, &st,
fid->clnt->proto_version);
if (err) {
P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
@@ -176,6 +196,88 @@ exit:
return err;
}
+/**
+ * v9fs_dir_readdir_dotl - read a directory
+ * @filp: opened file structure
+ * @dirent: buffer to fill dirent structures
+ * @filldir: function to populate dirent structures
+ *
+ */
+static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ int over;
+ int err = 0;
+ struct p9_fid *fid;
+ int buflen;
+ struct p9_rdir *rdir;
+ struct p9_dirent curdirent;
+ u64 oldoffset = 0;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+ fid = filp->private_data;
+
+ buflen = fid->clnt->msize - P9_READDIRHDRSZ;
+
+ err = v9fs_alloc_rdir_buf(filp, buflen);
+ if (err)
+ goto exit;
+ rdir = (struct p9_rdir *) fid->rdir;
+
+ err = mutex_lock_interruptible(&rdir->mutex);
+ if (err)
+ return err;
+
+ while (err == 0) {
+ if (rdir->tail == rdir->head) {
+ err = p9_client_readdir(fid, rdir->buf, buflen,
+ filp->f_pos);
+ if (err <= 0)
+ goto unlock_and_exit;
+
+ rdir->head = 0;
+ rdir->tail = err;
+ }
+
+ while (rdir->head < rdir->tail) {
+
+ err = p9dirent_read(rdir->buf + rdir->head,
+ buflen - rdir->head, &curdirent,
+ fid->clnt->proto_version);
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
+ err = -EIO;
+ goto unlock_and_exit;
+ }
+
+ /* d_off in dirent structure tracks the offset into
+ * the next dirent in the dir. However, filldir()
+ * expects offset into the current dirent. Hence
+ * while calling filldir send the offset from the
+ * previous dirent structure.
+ */
+ over = filldir(dirent, curdirent.d_name,
+ strlen(curdirent.d_name),
+ oldoffset, v9fs_qid2ino(&curdirent.qid),
+ curdirent.d_type);
+ oldoffset = curdirent.d_off;
+
+ if (over) {
+ err = 0;
+ goto unlock_and_exit;
+ }
+
+ filp->f_pos = curdirent.d_off;
+ rdir->head += err;
+ }
+ }
+
+unlock_and_exit:
+ mutex_unlock(&rdir->mutex);
+exit:
+ return err;
+}
+
/**
* v9fs_dir_release - close a directory
@@ -207,7 +309,7 @@ const struct file_operations v9fs_dir_operations = {
const struct file_operations v9fs_dir_operations_dotl = {
.read = generic_read_dir,
.llseek = generic_file_llseek,
- .readdir = v9fs_dir_readdir,
+ .readdir = v9fs_dir_readdir_dotl,
.open = v9fs_file_open,
.release = v9fs_dir_release,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 2bedc6c94fc..e97c92bd6f1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -59,9 +59,13 @@ int v9fs_file_open(struct inode *inode, struct file *file)
struct p9_fid *fid;
int omode;
- P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
v9ses = v9fs_inode2v9ses(inode);
- omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses));
+ if (v9fs_proto_dotl(v9ses))
+ omode = file->f_flags;
+ else
+ omode = v9fs_uflags2omode(file->f_flags,
+ v9fs_proto_dotu(v9ses));
fid = file->private_data;
if (!fid) {
fid = v9fs_fid_clone(file->f_path.dentry);
@@ -73,11 +77,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
p9_client_clunk(fid);
return err;
}
- if (omode & P9_OTRUNC) {
+ if (file->f_flags & O_TRUNC) {
i_size_write(inode, 0);
inode->i_blocks = 0;
}
- if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses)))
+ if ((file->f_flags & O_APPEND) &&
+ (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
generic_file_llseek(file, 0, SEEK_END);
}
@@ -139,7 +144,7 @@ ssize_t
v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
u64 offset)
{
- int n, total;
+ int n, total, size;
struct p9_fid *fid = filp->private_data;
P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
@@ -147,6 +152,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
n = 0;
total = 0;
+ size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
do {
n = p9_client_read(fid, data, udata, offset, count);
if (n <= 0)
@@ -160,7 +166,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
offset += n;
count -= n;
total += n;
- } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ));
+ } while (count > 0 && n == size);
if (n < 0)
total = n;
@@ -183,11 +189,13 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
{
int ret;
struct p9_fid *fid;
+ size_t size;
P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
fid = filp->private_data;
- if (count > (fid->clnt->msize - P9_IOHDRSZ))
+ size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
+ if (count > size)
ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
else
ret = p9_client_read(fid, NULL, udata, *offset, count);
@@ -224,9 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
fid = filp->private_data;
clnt = fid->clnt;
- rsize = fid->iounit;
- if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
- rsize = clnt->msize - P9_IOHDRSZ;
+ rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ;
do {
if (count < rsize)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4331b3b5ee1..c7c23eab944 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -35,6 +35,7 @@
#include <linux/idr.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/xattr.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -42,6 +43,7 @@
#include "v9fs_vfs.h"
#include "fid.h"
#include "cache.h"
+#include "xattr.h"
static const struct inode_operations v9fs_dir_inode_operations;
static const struct inode_operations v9fs_dir_inode_operations_dotu;
@@ -236,6 +238,41 @@ void v9fs_destroy_inode(struct inode *inode)
#endif
/**
+ * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
+ * new file system object. This checks the S_ISGID to determine the owning
+ * group of the new file system object.
+ */
+
+static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
+{
+ BUG_ON(dir_inode == NULL);
+
+ if (dir_inode->i_mode & S_ISGID) {
+ /* set_gid bit is set.*/
+ return dir_inode->i_gid;
+ }
+ return current_fsgid();
+}
+
+/**
+ * v9fs_dentry_from_dir_inode - helper function to get the dentry from
+ * dir inode.
+ *
+ */
+
+static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
+{
+ struct dentry *dentry;
+
+ spin_lock(&dcache_lock);
+ /* Directory should have only one entry. */
+ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
+ dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+ spin_unlock(&dcache_lock);
+ return dentry;
+}
+
+/**
* v9fs_get_inode - helper function to setup an inode
* @sb: superblock
* @mode: mode to setup inode with
@@ -267,7 +304,13 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
case S_IFBLK:
case S_IFCHR:
case S_IFSOCK:
- if (!v9fs_proto_dotu(v9ses)) {
+ if (v9fs_proto_dotl(v9ses)) {
+ inode->i_op = &v9fs_file_inode_operations_dotl;
+ inode->i_fop = &v9fs_file_operations_dotl;
+ } else if (v9fs_proto_dotu(v9ses)) {
+ inode->i_op = &v9fs_file_inode_operations;
+ inode->i_fop = &v9fs_file_operations;
+ } else {
P9_DPRINTK(P9_DEBUG_ERROR,
"special files without extended mode\n");
err = -EINVAL;
@@ -387,8 +430,10 @@ error:
* @inode: inode to release
*
*/
-void v9fs_clear_inode(struct inode *inode)
+void v9fs_evict_inode(struct inode *inode)
{
+ truncate_inode_pages(inode->i_mapping, 0);
+ end_writeback(inode);
filemap_fdatawrite(inode->i_mapping);
#ifdef CONFIG_9P_FSCACHE
@@ -396,23 +441,14 @@ void v9fs_clear_inode(struct inode *inode)
#endif
}
-/**
- * v9fs_inode_from_fid - populate an inode by issuing a attribute request
- * @v9ses: session information
- * @fid: fid to issue attribute request for
- * @sb: superblock on which to create inode
- *
- */
-
static struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
struct super_block *sb)
{
int err, umode;
- struct inode *ret;
+ struct inode *ret = NULL;
struct p9_wstat *st;
- ret = NULL;
st = p9_client_stat(fid);
if (IS_ERR(st))
return ERR_CAST(st);
@@ -433,15 +469,62 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
#endif
p9stat_free(st);
kfree(st);
-
return ret;
-
error:
p9stat_free(st);
kfree(st);
return ERR_PTR(err);
}
+static struct inode *
+v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ struct inode *ret = NULL;
+ int err;
+ struct p9_stat_dotl *st;
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return ERR_CAST(st);
+
+ ret = v9fs_get_inode(sb, st->st_mode);
+ if (IS_ERR(ret)) {
+ err = PTR_ERR(ret);
+ goto error;
+ }
+
+ v9fs_stat2inode_dotl(st, ret);
+ ret->i_ino = v9fs_qid2ino(&st->qid);
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_vcookie_set_qid(ret, &st->qid);
+ v9fs_cache_inode_get_cookie(ret);
+#endif
+ kfree(st);
+ return ret;
+error:
+ kfree(st);
+ return ERR_PTR(err);
+}
+
+/**
+ * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+static inline struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ if (v9fs_proto_dotl(v9ses))
+ return v9fs_inode_dotl(v9ses, fid, sb);
+ else
+ return v9fs_inode(v9ses, fid, sb);
+}
+
/**
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
@@ -563,6 +646,118 @@ error:
}
/**
+ * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
+ * @dir: directory inode that is being created
+ * @dentry: dentry that is being deleted
+ * @mode: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ int err = 0;
+ char *name = NULL;
+ gid_t gid;
+ int flags;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL;
+ struct p9_fid *dfid, *ofid;
+ struct file *filp;
+ struct p9_qid qid;
+ struct inode *inode;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ if (nd && nd->flags & LOOKUP_OPEN)
+ flags = nd->intent.open.flags - 1;
+ else
+ flags = O_RDWR;
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
+ "mode:0x%x\n", name, flags, mode);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ /* clone a fid to use for creation */
+ ofid = p9_client_walk(dfid, 0, NULL, 1);
+ if (IS_ERR(ofid)) {
+ err = PTR_ERR(ofid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_open_dotl failed in creat %d\n",
+ err);
+ goto error;
+ }
+
+ /* No need to populate the inode if we are not opening the file AND
+ * not in cached mode.
+ */
+ if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) {
+ /* Not in cached mode. No need to populate inode with stat */
+ dentry->d_op = &v9fs_dentry_operations;
+ p9_client_clunk(ofid);
+ d_instantiate(dentry, NULL);
+ return 0;
+ }
+
+ /* Now walk from the parent so we can get an unopened fid. */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ fid = NULL;
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to dentry */
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+
+ /* if we are opening a file, assign the open fid to the file */
+ if (nd && nd->flags & LOOKUP_OPEN) {
+ filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+ if (IS_ERR(filp)) {
+ p9_client_clunk(ofid);
+ return PTR_ERR(filp);
+ }
+ filp->private_data = ofid;
+ } else
+ p9_client_clunk(ofid);
+
+ return 0;
+
+error:
+ if (ofid)
+ p9_client_clunk(ofid);
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+/**
* v9fs_vfs_create - VFS hook to create files
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
@@ -652,6 +847,83 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return err;
}
+
+/**
+ * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
+ * @dir: inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
+ int mode)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ gid_t gid;
+ char *name;
+ struct inode *inode;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+ err = 0;
+ v9ses = v9fs_inode2v9ses(dir);
+
+ mode |= S_IFDIR;
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+ goto error;
+ }
+
+ name = (char *) dentry->d_name.name;
+ err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ }
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
/**
* v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
* @dir: inode that is being walked from
@@ -678,6 +950,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
sb = dir->i_sb;
v9ses = v9fs_inode2v9ses(dir);
+ /* We can walk d_parent because we hold the dir->i_mutex */
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid))
return ERR_CAST(dfid);
@@ -785,27 +1058,33 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto clunk_olddir;
}
+ down_write(&v9ses->rename_sem);
if (v9fs_proto_dotl(v9ses)) {
retval = p9_client_rename(oldfid, newdirfid,
(char *) new_dentry->d_name.name);
if (retval != -ENOSYS)
goto clunk_newdir;
}
+ if (old_dentry->d_parent != new_dentry->d_parent) {
+ /*
+ * 9P .u can only handle file rename in the same directory
+ */
- /* 9P can only handle file rename in the same directory */
- if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
P9_DPRINTK(P9_DEBUG_ERROR,
"old dir and new dir are different\n");
retval = -EXDEV;
goto clunk_newdir;
}
-
v9fs_blank_wstat(&wstat);
wstat.muid = v9ses->uname;
wstat.name = (char *) new_dentry->d_name.name;
retval = p9_client_wstat(oldfid, &wstat);
clunk_newdir:
+ if (!retval)
+ /* successful rename */
+ d_move(old_dentry, new_dentry);
+ up_write(&v9ses->rename_sem);
p9_client_clunk(newdirfid);
clunk_olddir:
@@ -853,6 +1132,42 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
+static int
+v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+ err = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+ return simple_getattr(mnt, dentry, stat);
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ /* Ask for all the fields in stat structure. Server will return
+ * whatever it supports
+ */
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, dentry->d_inode);
+ generic_fillattr(dentry->d_inode, stat);
+ /* Change block size to what the server returned */
+ stat->blksize = st->st_blksize;
+
+ kfree(st);
+ return 0;
+}
+
/**
* v9fs_vfs_setattr - set file metadata
* @dentry: file whose metadata to set
@@ -896,10 +1211,71 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
}
retval = p9_client_wstat(fid, &wstat);
- if (retval >= 0)
- retval = inode_setattr(dentry->d_inode, iattr);
+ if (retval < 0)
+ return retval;
+
+ if ((iattr->ia_valid & ATTR_SIZE) &&
+ iattr->ia_size != i_size_read(dentry->d_inode)) {
+ retval = vmtruncate(dentry->d_inode, iattr->ia_size);
+ if (retval)
+ return retval;
+ }
- return retval;
+ setattr_copy(dentry->d_inode, iattr);
+ mark_inode_dirty(dentry->d_inode);
+ return 0;
+}
+
+/**
+ * v9fs_vfs_setattr_dotl - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+{
+ int retval;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_iattr_dotl p9attr;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+ retval = inode_change_ok(dentry->d_inode, iattr);
+ if (retval)
+ return retval;
+
+ p9attr.valid = iattr->ia_valid;
+ p9attr.mode = iattr->ia_mode;
+ p9attr.uid = iattr->ia_uid;
+ p9attr.gid = iattr->ia_gid;
+ p9attr.size = iattr->ia_size;
+ p9attr.atime_sec = iattr->ia_atime.tv_sec;
+ p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+ p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+ p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+
+ retval = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ retval = p9_client_setattr(fid, &p9attr);
+ if (retval < 0)
+ return retval;
+
+ if ((iattr->ia_valid & ATTR_SIZE) &&
+ iattr->ia_size != i_size_read(dentry->d_inode)) {
+ retval = vmtruncate(dentry->d_inode, iattr->ia_size);
+ if (retval)
+ return retval;
+ }
+
+ setattr_copy(dentry->d_inode, iattr);
+ mark_inode_dirty(dentry->d_inode);
+ return 0;
}
/**
@@ -980,6 +1356,77 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
}
/**
+ * v9fs_stat2inode_dotl - populate an inode structure with stat info
+ * @stat: stat structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+{
+
+ if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ inode->i_uid = stat->st_uid;
+ inode->i_gid = stat->st_gid;
+ inode->i_nlink = stat->st_nlink;
+ inode->i_mode = stat->st_mode;
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+
+ if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+
+ i_size_write(inode, stat->st_size);
+ inode->i_blocks = stat->st_blocks;
+ } else {
+ if (stat->st_result_mask & P9_STATS_ATIME) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_MTIME) {
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_CTIME) {
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_UID)
+ inode->i_uid = stat->st_uid;
+ if (stat->st_result_mask & P9_STATS_GID)
+ inode->i_gid = stat->st_gid;
+ if (stat->st_result_mask & P9_STATS_NLINK)
+ inode->i_nlink = stat->st_nlink;
+ if (stat->st_result_mask & P9_STATS_MODE) {
+ inode->i_mode = stat->st_mode;
+ if ((S_ISBLK(inode->i_mode)) ||
+ (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
+ }
+ if (stat->st_result_mask & P9_STATS_RDEV)
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+ if (stat->st_result_mask & P9_STATS_SIZE)
+ i_size_write(inode, stat->st_size);
+ if (stat->st_result_mask & P9_STATS_BLOCKS)
+ inode->i_blocks = stat->st_blocks;
+ }
+ if (stat->st_result_mask & P9_STATS_GEN)
+ inode->i_generation = stat->st_gen;
+
+ /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
+ * because the inode structure does not have fields for them.
+ */
+}
+
+/**
* v9fs_qid2ino - convert qid into inode number
* @qid: qid to hash
*
@@ -1022,7 +1469,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
if (IS_ERR(fid))
return PTR_ERR(fid);
- if (!v9fs_proto_dotu(v9ses))
+ if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
return -EBADF;
st = p9_client_stat(fid);
@@ -1128,6 +1575,99 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
}
/**
+ * v9fs_vfs_symlink_dotl - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See Also: 9P2000.L RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *dfid;
+ struct p9_fid *fid = NULL;
+ struct inode *inode;
+ struct p9_qid qid;
+ char *name;
+ int err;
+ gid_t gid;
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
+ dir->i_ino, name, symname);
+ v9ses = v9fs_inode2v9ses(dir);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
+ goto error;
+ }
+
+ /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
+ err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
+ goto error;
+ }
+
+ if (v9ses->cache) {
+ /* Now walk from the parent so we can get an unopened fid. */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to dentry */
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /* Not in cached mode. No need to populate inode with stat */
+ inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ dentry->d_op = &v9fs_dentry_operations;
+ d_instantiate(dentry, inode);
+ }
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+
+ return err;
+}
+
+/**
* v9fs_vfs_symlink - helper function to create symlinks
* @dir: directory inode containing symlink
* @dentry: dentry for symlink
@@ -1186,6 +1726,76 @@ clunk_fid:
}
/**
+ * v9fs_vfs_link_dotl - create a hardlink for dotl
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @dentry: dentry for link
+ *
+ */
+
+static int
+v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ int err;
+ struct p9_fid *dfid, *oldfid;
+ char *name;
+ struct v9fs_session_info *v9ses;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
+ dir->i_ino, old_dentry->d_name.name,
+ dentry->d_name.name);
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid))
+ return PTR_ERR(dfid);
+
+ oldfid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(oldfid))
+ return PTR_ERR(oldfid);
+
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
+ return err;
+ }
+
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ /* Get the latest stat info from server. */
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ fid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, old_dentry->d_inode);
+
+ kfree(st);
+ } else {
+ /* Caching disabled. No need to get upto date stat info.
+ * This dentry will be released immediately. So, just i_count++
+ */
+ atomic_inc(&old_dentry->d_inode->i_count);
+ }
+
+ dentry->d_op = old_dentry->d_op;
+ d_instantiate(dentry, old_dentry->d_inode);
+
+ return err;
+}
+
+/**
* v9fs_vfs_mknod - create a special file
* @dir: inode destination for new link
* @dentry: dentry for file
@@ -1230,6 +1840,100 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
+/**
+ * v9fs_vfs_mknod_dotl - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @rdev: device associated with special file
+ *
+ */
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
+ dev_t rdev)
+{
+ int err;
+ char *name;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ struct inode *inode;
+ gid_t gid;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS,
+ " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+ dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+
+ if (!new_valid_dev(rdev))
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+ goto error;
+ }
+
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate inode with stat.
+ * socket syscall returns a fd, so we need instantiate
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ dentry->d_op = &v9fs_dentry_operations;
+ d_instantiate(dentry, inode);
+ }
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
@@ -1238,24 +1942,29 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.unlink = v9fs_vfs_unlink,
.mkdir = v9fs_vfs_mkdir,
.rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod,
+ .mknod = v9fs_vfs_mknod_dotl,
.rename = v9fs_vfs_rename,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
static const struct inode_operations v9fs_dir_inode_operations_dotl = {
- .create = v9fs_vfs_create,
+ .create = v9fs_vfs_create_dotl,
.lookup = v9fs_vfs_lookup,
- .symlink = v9fs_vfs_symlink,
- .link = v9fs_vfs_link,
+ .link = v9fs_vfs_link_dotl,
+ .symlink = v9fs_vfs_symlink_dotl,
.unlink = v9fs_vfs_unlink,
- .mkdir = v9fs_vfs_mkdir,
+ .mkdir = v9fs_vfs_mkdir_dotl,
.rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod,
+ .mknod = v9fs_vfs_mknod_dotl,
.rename = v9fs_vfs_rename,
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+
};
static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1276,8 +1985,12 @@ static const struct inode_operations v9fs_file_inode_operations = {
};
static const struct inode_operations v9fs_file_inode_operations_dotl = {
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
};
static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -1292,6 +2005,10 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
.readlink = generic_readlink,
.follow_link = v9fs_vfs_follow_link,
.put_link = v9fs_vfs_put_link,
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index be74d020436..f9311077de6 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -45,6 +45,7 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "fid.h"
+#include "xattr.h"
static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
@@ -77,9 +78,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
sb->s_blocksize = 1 << sb->s_blocksize_bits;
sb->s_magic = V9FS_MAGIC;
- if (v9fs_proto_dotl(v9ses))
+ if (v9fs_proto_dotl(v9ses)) {
sb->s_op = &v9fs_super_ops_dotl;
- else
+ sb->s_xattr = v9fs_xattr_handlers;
+ } else
sb->s_op = &v9fs_super_ops;
sb->s_bdi = &v9ses->bdi;
@@ -107,7 +109,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
struct inode *inode = NULL;
struct dentry *root = NULL;
struct v9fs_session_info *v9ses = NULL;
- struct p9_wstat *st = NULL;
int mode = S_IRWXUGO | S_ISVTX;
struct p9_fid *fid;
int retval = 0;
@@ -124,16 +125,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
goto close_session;
}
- st = p9_client_stat(fid);
- if (IS_ERR(st)) {
- retval = PTR_ERR(st);
- goto clunk_fid;
- }
-
sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
if (IS_ERR(sb)) {
retval = PTR_ERR(sb);
- goto free_stat;
+ goto clunk_fid;
}
v9fs_fill_super(sb, v9ses, flags, data);
@@ -151,22 +146,38 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
}
sb->s_root = root;
- root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode(st, root->d_inode, sb);
+ if (v9fs_proto_dotl(v9ses)) {
+ struct p9_stat_dotl *st = NULL;
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st)) {
+ retval = PTR_ERR(st);
+ goto clunk_fid;
+ }
+
+ v9fs_stat2inode_dotl(st, root->d_inode);
+ kfree(st);
+ } else {
+ struct p9_wstat *st = NULL;
+ st = p9_client_stat(fid);
+ if (IS_ERR(st)) {
+ retval = PTR_ERR(st);
+ goto clunk_fid;
+ }
+
+ root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
+ v9fs_stat2inode(st, root->d_inode, sb);
+
+ p9stat_free(st);
+ kfree(st);
+ }
v9fs_fid_add(root, fid);
- p9stat_free(st);
- kfree(st);
P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
simple_set_mnt(mnt, sb);
return 0;
-free_stat:
- p9stat_free(st);
- kfree(st);
-
clunk_fid:
p9_client_clunk(fid);
@@ -176,8 +187,6 @@ close_session:
return retval;
release_sb:
- p9stat_free(st);
- kfree(st);
deactivate_locked_super(sb);
return retval;
}
@@ -257,7 +266,7 @@ static const struct super_operations v9fs_super_ops = {
.destroy_inode = v9fs_destroy_inode,
#endif
.statfs = simple_statfs,
- .clear_inode = v9fs_clear_inode,
+ .evict_inode = v9fs_evict_inode,
.show_options = generic_show_options,
.umount_begin = v9fs_umount_begin,
};
@@ -268,7 +277,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
.destroy_inode = v9fs_destroy_inode,
#endif
.statfs = v9fs_statfs,
- .clear_inode = v9fs_clear_inode,
+ .evict_inode = v9fs_evict_inode,
.show_options = generic_show_options,
.umount_begin = v9fs_umount_begin,
};
@@ -278,4 +287,5 @@ struct file_system_type v9fs_fs_type = {
.get_sb = v9fs_get_sb,
.kill_sb = v9fs_kill_super,
.owner = THIS_MODULE,
+ .fs_flags = FS_RENAME_DOES_D_MOVE,
};
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
new file mode 100644
index 00000000000..f88e5c2dc87
--- /dev/null
+++ b/fs/9p/xattr.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+
+#include "fid.h"
+#include "xattr.h"
+
+/*
+ * v9fs_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ ssize_t retval;
+ int msize, read_count;
+ u64 offset = 0, attr_size;
+ struct p9_fid *fid, *attr_fid;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
+ __func__, name, buffer_size);
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
+ if (IS_ERR(attr_fid)) {
+ retval = PTR_ERR(attr_fid);
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_attrwalk failed %zd\n", retval);
+ attr_fid = NULL;
+ goto error;
+ }
+ if (!buffer_size) {
+ /* request to get the attr_size */
+ retval = attr_size;
+ goto error;
+ }
+ if (attr_size > buffer_size) {
+ retval = -ERANGE;
+ goto error;
+ }
+ msize = attr_fid->clnt->msize;
+ while (attr_size) {
+ if (attr_size > (msize - P9_IOHDRSZ))
+ read_count = msize - P9_IOHDRSZ;
+ else
+ read_count = attr_size;
+ read_count = p9_client_read(attr_fid, ((char *)buffer)+offset,
+ NULL, offset, read_count);
+ if (read_count < 0) {
+ /* error in xattr read */
+ retval = read_count;
+ goto error;
+ }
+ offset += read_count;
+ attr_size -= read_count;
+ }
+ /* Total read xattr bytes */
+ retval = offset;
+error:
+ if (attr_fid)
+ p9_client_clunk(attr_fid);
+ return retval;
+
+}
+
+/*
+ * v9fs_xattr_set()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int v9fs_xattr_set(struct dentry *dentry, const char *name,
+ const void *value, size_t value_len, int flags)
+{
+ u64 offset = 0;
+ int retval, msize, write_count;
+ struct p9_fid *fid = NULL;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n",
+ __func__, name, value_len, flags);
+
+ fid = v9fs_fid_clone(dentry);
+ if (IS_ERR(fid)) {
+ retval = PTR_ERR(fid);
+ fid = NULL;
+ goto error;
+ }
+ /*
+ * On success fid points to xattr
+ */
+ retval = p9_client_xattrcreate(fid, name, value_len, flags);
+ if (retval < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_xattrcreate failed %d\n", retval);
+ goto error;
+ }
+ msize = fid->clnt->msize;;
+ while (value_len) {
+ if (value_len > (msize - P9_IOHDRSZ))
+ write_count = msize - P9_IOHDRSZ;
+ else
+ write_count = value_len;
+ write_count = p9_client_write(fid, ((char *)value)+offset,
+ NULL, offset, write_count);
+ if (write_count < 0) {
+ /* error in xattr write */
+ retval = write_count;
+ goto error;
+ }
+ offset += write_count;
+ value_len -= write_count;
+ }
+ /* Total read xattr bytes */
+ retval = offset;
+error:
+ if (fid)
+ retval = p9_client_clunk(fid);
+ return retval;
+}
+
+ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+ return v9fs_xattr_get(dentry, NULL, buffer, buffer_size);
+}
+
+const struct xattr_handler *v9fs_xattr_handlers[] = {
+ &v9fs_xattr_user_handler,
+ NULL
+};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
new file mode 100644
index 00000000000..9ddf672ae5c
--- /dev/null
+++ b/fs/9p/xattr.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+#ifndef FS_9P_XATTR_H
+#define FS_9P_XATTR_H
+
+#include <linux/xattr.h>
+
+extern const struct xattr_handler *v9fs_xattr_handlers[];
+extern struct xattr_handler v9fs_xattr_user_handler;
+
+extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
+ void *, size_t);
+extern int v9fs_xattr_set(struct dentry *, const char *,
+ const void *, size_t, int);
+extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t);
+#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c
new file mode 100644
index 00000000000..d0b701b7208
--- /dev/null
+++ b/fs/9p/xattr_user.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include "xattr.h"
+
+static int v9fs_xattr_user_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ int retval;
+ char *full_name;
+ size_t name_len;
+ size_t prefix_len = XATTR_USER_PREFIX_LEN;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ name_len = strlen(name);
+ full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
+ if (!full_name)
+ return -ENOMEM;
+ memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
+ memcpy(full_name+prefix_len, name, name_len);
+ full_name[prefix_len + name_len] = '\0';
+
+ retval = v9fs_xattr_get(dentry, full_name, buffer, size);
+ kfree(full_name);
+ return retval;
+}
+
+static int v9fs_xattr_user_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
+{
+ int retval;
+ char *full_name;
+ size_t name_len;
+ size_t prefix_len = XATTR_USER_PREFIX_LEN;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ name_len = strlen(name);
+ full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
+ if (!full_name)
+ return -ENOMEM;
+ memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
+ memcpy(full_name + prefix_len, name, name_len);
+ full_name[prefix_len + name_len] = '\0';
+
+ retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
+ kfree(full_name);
+ return retval;
+}
+
+struct xattr_handler v9fs_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = v9fs_xattr_user_get,
+ .set = v9fs_xattr_user_set,
+};
diff --git a/fs/Kconfig b/fs/Kconfig
index 5f85b594761..3d185308ec8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -64,7 +64,7 @@ source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
config CUSE
- tristate "Character device in Userpace support"
+ tristate "Character device in Userspace support"
depends on FUSE_FS
help
This FUSE extension allows character devices to be
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 6f850b06ab6..65794b8fe79 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -50,10 +50,19 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int ret;
+
*pagep = NULL;
- return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+ ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
adfs_get_block,
&ADFS_I(mapping->host)->mmu_private);
+ if (unlikely(ret)) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
+
+ return ret;
}
static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
@@ -324,10 +333,7 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
/* XXX: this is missing some actual on-disk truncation.. */
if (ia_valid & ATTR_SIZE)
- error = simple_setsize(inode, attr->ia_size);
-
- if (error)
- goto out;
+ truncate_setsize(inode, attr->ia_size);
if (ia_valid & ATTR_MTIME) {
inode->i_mtime = attr->ia_mtime;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index f05b6155ccc..a8cbdeb3402 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -171,8 +171,7 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry,
extern unsigned long affs_parent_ino(struct inode *dir);
extern struct inode *affs_new_inode(struct inode *dir);
extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
-extern void affs_delete_inode(struct inode *inode);
-extern void affs_clear_inode(struct inode *inode);
+extern void affs_evict_inode(struct inode *inode);
extern struct inode *affs_iget(struct super_block *sb,
unsigned long ino);
extern int affs_write_inode(struct inode *inode,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 322710c3eed..c4a9875bd1a 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -406,10 +406,19 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int ret;
+
*pagep = NULL;
- return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+ ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
affs_get_block,
&AFFS_I(mapping->host)->mmu_private);
+ if (unlikely(ret)) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
+
+ return ret;
}
static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index f4b2a4ee4f9..3a0fdec175b 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -235,31 +235,36 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
goto out;
}
- error = inode_setattr(inode, attr);
- if (!error && (attr->ia_valid & ATTR_MODE))
+ if ((attr->ia_valid & ATTR_SIZE) &&
+ attr->ia_size != i_size_read(inode)) {
+ error = vmtruncate(inode, attr->ia_size);
+ if (error)
+ return error;
+ }
+
+ setattr_copy(inode, attr);
+ mark_inode_dirty(inode);
+
+ if (attr->ia_valid & ATTR_MODE)
mode_to_prot(inode);
out:
return error;
}
void
-affs_delete_inode(struct inode *inode)
-{
- pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
- truncate_inode_pages(&inode->i_data, 0);
- inode->i_size = 0;
- affs_truncate(inode);
- clear_inode(inode);
- affs_free_block(inode->i_sb, inode->i_ino);
-}
-
-void
-affs_clear_inode(struct inode *inode)
+affs_evict_inode(struct inode *inode)
{
unsigned long cache_page;
+ pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+ truncate_inode_pages(&inode->i_data, 0);
- pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+ if (!inode->i_nlink) {
+ inode->i_size = 0;
+ affs_truncate(inode);
+ }
+ invalidate_inode_buffers(inode);
+ end_writeback(inode);
affs_free_prealloc(inode);
cache_page = (unsigned long)AFFS_I(inode)->i_lc;
if (cache_page) {
@@ -271,6 +276,9 @@ affs_clear_inode(struct inode *inode)
affs_brelse(AFFS_I(inode)->i_ext_bh);
AFFS_I(inode)->i_ext_last = ~1;
AFFS_I(inode)->i_ext_bh = NULL;
+
+ if (!inode->i_nlink)
+ affs_free_block(inode->i_sb, inode->i_ino);
}
struct inode *
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 16a3e4765f6..33c4e7eef47 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -26,7 +26,7 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int affs_remount (struct super_block *sb, int *flags, char *data);
static void
-affs_commit_super(struct super_block *sb, int clean)
+affs_commit_super(struct super_block *sb, int wait, int clean)
{
struct affs_sb_info *sbi = AFFS_SB(sb);
struct buffer_head *bh = sbi->s_root_bh;
@@ -36,6 +36,8 @@ affs_commit_super(struct super_block *sb, int clean)
secs_to_datestamp(get_seconds(), &tail->disk_change);
affs_fix_checksum(sb, bh);
mark_buffer_dirty(bh);
+ if (wait)
+ sync_dirty_buffer(bh);
}
static void
@@ -46,8 +48,8 @@ affs_put_super(struct super_block *sb)
lock_kernel();
- if (!(sb->s_flags & MS_RDONLY))
- affs_commit_super(sb, 1);
+ if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt)
+ affs_commit_super(sb, 1, 1);
kfree(sbi->s_prefix);
affs_free_bitmap(sb);
@@ -61,27 +63,20 @@ affs_put_super(struct super_block *sb)
static void
affs_write_super(struct super_block *sb)
{
- int clean = 2;
-
lock_super(sb);
- if (!(sb->s_flags & MS_RDONLY)) {
- // if (sbi->s_bitmap[i].bm_bh) {
- // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) {
- // clean = 0;
- affs_commit_super(sb, clean);
- sb->s_dirt = !clean; /* redo until bitmap synced */
- } else
- sb->s_dirt = 0;
+ if (!(sb->s_flags & MS_RDONLY))
+ affs_commit_super(sb, 1, 2);
+ sb->s_dirt = 0;
unlock_super(sb);
- pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
+ pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds());
}
static int
affs_sync_fs(struct super_block *sb, int wait)
{
lock_super(sb);
- affs_commit_super(sb, 2);
+ affs_commit_super(sb, wait, 2);
sb->s_dirt = 0;
unlock_super(sb);
return 0;
@@ -140,8 +135,7 @@ static const struct super_operations affs_sops = {
.alloc_inode = affs_alloc_inode,
.destroy_inode = affs_destroy_inode,
.write_inode = affs_write_inode,
- .delete_inode = affs_delete_inode,
- .clear_inode = affs_clear_inode,
+ .evict_inode = affs_evict_inode,
.put_super = affs_put_super,
.write_super = affs_write_super,
.sync_fs = affs_sync_fs,
@@ -554,9 +548,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
if (*flags & MS_RDONLY) {
- sb->s_dirt = 1;
- while (sb->s_dirt)
- affs_write_super(sb);
+ affs_write_super(sb);
affs_free_bitmap(sb);
} else
res = affs_init_bitmap(sb, flags);
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 5c4e61d3c77..8f975f25b48 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -2,6 +2,7 @@ config AFS_FS
tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
select AF_RXRPC
+ select DNS_RESOLVER
help
If you say Y here, you will get an experimental Andrew File System
driver. It currently only supports unsecured read-only AFS access.
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index e19c13f059e..0d5eeadf612 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/key.h>
#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
#include <linux/sched.h>
#include <keys/rxrpc-type.h>
#include "internal.h"
@@ -30,21 +31,24 @@ static struct afs_cell *afs_cell_root;
* allocate a cell record and fill in its name, VL server address list and
* allocate an anonymous key
*/
-static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
+static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen,
+ char *vllist)
{
struct afs_cell *cell;
struct key *key;
- size_t namelen;
char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
+ char *dvllist = NULL, *_vllist = NULL;
+ char delimiter = ':';
int ret;
- _enter("%s,%s", name, vllist);
+ _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist);
BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
- namelen = strlen(name);
- if (namelen > AFS_MAXCELLNAME)
+ if (namelen > AFS_MAXCELLNAME) {
+ _leave(" = -ENAMETOOLONG");
return ERR_PTR(-ENAMETOOLONG);
+ }
/* allocate and initialise a cell record */
cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
@@ -64,15 +68,35 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
INIT_LIST_HEAD(&cell->vl_list);
spin_lock_init(&cell->vl_lock);
+ /* if the ip address is invalid, try dns query */
+ if (!vllist || strlen(vllist) < 7) {
+ ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL);
+ if (ret < 0) {
+ if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY)
+ /* translate these errors into something
+ * userspace might understand */
+ ret = -EDESTADDRREQ;
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+ }
+ _vllist = dvllist;
+
+ /* change the delimiter for user-space reply */
+ delimiter = ',';
+
+ } else {
+ _vllist = vllist;
+ }
+
/* fill in the VL server list from the rest of the string */
do {
unsigned a, b, c, d;
- next = strchr(vllist, ':');
+ next = strchr(_vllist, delimiter);
if (next)
*next++ = 0;
- if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
+ if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
goto bad_address;
if (a > 255 || b > 255 || c > 255 || d > 255)
@@ -81,7 +105,7 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
cell->vl_addrs[cell->vl_naddrs++].s_addr =
htonl((a << 24) | (b << 16) | (c << 8) | d);
- } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (vllist = next));
+ } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
/* create a key to represent an anonymous user */
memcpy(keyname, "afs@", 4);
@@ -110,32 +134,36 @@ bad_address:
ret = -EINVAL;
error:
key_put(cell->anonymous_key);
+ kfree(dvllist);
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
}
/*
- * create a cell record
- * - "name" is the name of the cell
- * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
+ * afs_cell_crate() - create a cell record
+ * @name: is the name of the cell.
+ * @namsesz: is the strlen of the cell name.
+ * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format.
+ * @retref: is T to return the cell reference when the cell exists.
*/
-struct afs_cell *afs_cell_create(const char *name, char *vllist)
+struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
+ char *vllist, bool retref)
{
struct afs_cell *cell;
int ret;
- _enter("%s,%s", name, vllist);
+ _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist);
down_write(&afs_cells_sem);
read_lock(&afs_cells_lock);
list_for_each_entry(cell, &afs_cells, link) {
- if (strcasecmp(cell->name, name) == 0)
+ if (strncasecmp(cell->name, name, namesz) == 0)
goto duplicate_name;
}
read_unlock(&afs_cells_lock);
- cell = afs_cell_alloc(name, vllist);
+ cell = afs_cell_alloc(name, namesz, vllist);
if (IS_ERR(cell)) {
_leave(" = %ld", PTR_ERR(cell));
up_write(&afs_cells_sem);
@@ -175,8 +203,18 @@ error:
return ERR_PTR(ret);
duplicate_name:
+ if (retref && !IS_ERR(cell))
+ afs_get_cell(cell);
+
read_unlock(&afs_cells_lock);
up_write(&afs_cells_sem);
+
+ if (retref) {
+ _leave(" = %p", cell);
+ return cell;
+ }
+
+ _leave(" = -EEXIST");
return ERR_PTR(-EEXIST);
}
@@ -201,15 +239,13 @@ int afs_cell_init(char *rootcell)
}
cp = strchr(rootcell, ':');
- if (!cp) {
- printk(KERN_ERR "kAFS: no VL server IP addresses specified\n");
- _leave(" = -EINVAL");
- return -EINVAL;
- }
+ if (!cp)
+ _debug("kAFS: no VL server IP addresses specified");
+ else
+ *cp++ = 0;
/* allocate a cell record for the root cell */
- *cp++ = 0;
- new_root = afs_cell_create(rootcell, cp);
+ new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false);
if (IS_ERR(new_root)) {
_leave(" = %ld", PTR_ERR(new_root));
return PTR_ERR(new_root);
@@ -229,11 +265,12 @@ int afs_cell_init(char *rootcell)
/*
* lookup a cell record
*/
-struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz)
+struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
+ bool dns_cell)
{
struct afs_cell *cell;
- _enter("\"%*.*s\",", namesz, namesz, name ? name : "");
+ _enter("\"%*.*s\",", namesz, namesz, name ?: "");
down_read(&afs_cells_sem);
read_lock(&afs_cells_lock);
@@ -247,6 +284,8 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz)
}
}
cell = ERR_PTR(-ENOENT);
+ if (dns_cell)
+ goto create_cell;
found:
;
} else {
@@ -269,6 +308,15 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz)
up_read(&afs_cells_sem);
_leave(" = %p", cell);
return cell;
+
+create_cell:
+ read_unlock(&afs_cells_lock);
+ up_read(&afs_cells_sem);
+
+ cell = afs_cell_create(name, namesz, NULL, true);
+
+ _leave(" = %p", cell);
+ return cell;
}
#if 0
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b42d5cc1d6d..0d38c09bd55 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -477,6 +477,40 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
}
/*
+ * Try to auto mount the mountpoint with pseudo directory, if the autocell
+ * operation is setted.
+ */
+static struct inode *afs_try_auto_mntpt(
+ int ret, struct dentry *dentry, struct inode *dir, struct key *key,
+ struct afs_fid *fid)
+{
+ const char *devname = dentry->d_name.name;
+ struct afs_vnode *vnode = AFS_FS_I(dir);
+ struct inode *inode;
+
+ _enter("%d, %p{%s}, {%x:%u}, %p",
+ ret, dentry, devname, vnode->fid.vid, vnode->fid.vnode, key);
+
+ if (ret != -ENOENT ||
+ !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+ goto out;
+
+ inode = afs_iget_autocell(dir, devname, strlen(devname), key);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto out;
+ }
+
+ *fid = AFS_FS_I(inode)->fid;
+ _leave("= %p", inode);
+ return inode;
+
+out:
+ _leave("= %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
* look up an entry in a directory
*/
static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
@@ -520,6 +554,13 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
ret = afs_do_lookup(dir, dentry, &fid, key);
if (ret < 0) {
+ inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid);
+ if (!IS_ERR(inode)) {
+ key_put(key);
+ goto success;
+ }
+
+ ret = PTR_ERR(inode);
key_put(key);
if (ret == -ENOENT) {
d_add(dentry, NULL);
@@ -539,6 +580,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_CAST(inode);
}
+success:
dentry->d_op = &afs_fs_dentry_operations;
d_add(dentry, inode);
@@ -696,8 +738,9 @@ static int afs_d_delete(struct dentry *dentry)
goto zap;
if (dentry->d_inode &&
- test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags))
- goto zap;
+ (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags) ||
+ test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(dentry->d_inode)->flags)))
+ goto zap;
_leave(" = 0 [keep]");
return 0;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d00b312e311..0747339011c 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -19,6 +19,8 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/sched.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
#include "internal.h"
struct afs_iget_data {
@@ -102,6 +104,16 @@ static int afs_iget5_test(struct inode *inode, void *opaque)
}
/*
+ * iget5() comparator for inode created by autocell operations
+ *
+ * These pseudo inodes don't match anything.
+ */
+static int afs_iget5_autocell_test(struct inode *inode, void *opaque)
+{
+ return 0;
+}
+
+/*
* iget5() inode initialiser
*/
static int afs_iget5_set(struct inode *inode, void *opaque)
@@ -118,6 +130,67 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
}
/*
+ * inode retrieval for autocell
+ */
+struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
+ int namesz, struct key *key)
+{
+ struct afs_iget_data data;
+ struct afs_super_info *as;
+ struct afs_vnode *vnode;
+ struct super_block *sb;
+ struct inode *inode;
+ static atomic_t afs_autocell_ino;
+
+ _enter("{%x:%u},%*.*s,",
+ AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode,
+ namesz, namesz, dev_name ?: "");
+
+ sb = dir->i_sb;
+ as = sb->s_fs_info;
+ data.volume = as->volume;
+ data.fid.vid = as->volume->vid;
+ data.fid.unique = 0;
+ data.fid.vnode = 0;
+
+ inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino),
+ afs_iget5_autocell_test, afs_iget5_set,
+ &data);
+ if (!inode) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }",
+ inode, inode->i_ino, data.fid.vid, data.fid.vnode,
+ data.fid.unique);
+
+ vnode = AFS_FS_I(inode);
+
+ /* there shouldn't be an existing inode */
+ BUG_ON(!(inode->i_state & I_NEW));
+
+ inode->i_size = 0;
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
+ inode->i_op = &afs_autocell_inode_operations;
+ inode->i_nlink = 2;
+ inode->i_uid = 0;
+ inode->i_gid = 0;
+ inode->i_ctime.tv_sec = get_seconds();
+ inode->i_ctime.tv_nsec = 0;
+ inode->i_atime = inode->i_mtime = inode->i_ctime;
+ inode->i_blocks = 0;
+ inode->i_version = 0;
+ inode->i_generation = 0;
+
+ set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
+ inode->i_flags |= S_NOATIME;
+ unlock_new_inode(inode);
+ _leave(" = %p", inode);
+ return inode;
+}
+
+/*
* inode retrieval
*/
struct inode *afs_iget(struct super_block *sb, struct key *key,
@@ -314,9 +387,22 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
}
/*
+ * discard an AFS inode
+ */
+int afs_drop_inode(struct inode *inode)
+{
+ _enter("");
+
+ if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags))
+ return generic_delete_inode(inode);
+ else
+ return generic_drop_inode(inode);
+}
+
+/*
* clear an AFS inode
*/
-void afs_clear_inode(struct inode *inode)
+void afs_evict_inode(struct inode *inode)
{
struct afs_permits *permits;
struct afs_vnode *vnode;
@@ -335,6 +421,9 @@ void afs_clear_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
+ truncate_inode_pages(&inode->i_data, 0);
+ end_writeback(inode);
+
afs_give_up_callback(vnode);
if (vnode->server) {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5f679b77ce2..cca8eef736f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -42,6 +42,7 @@ typedef enum {
struct afs_mount_params {
bool rwpath; /* T if the parent should be considered R/W */
bool force; /* T to force cell type */
+ bool autocell; /* T if set auto mount operation */
afs_voltype_t type; /* type of volume requested */
int volnamesz; /* size of volume name */
const char *volname; /* name of volume to mount */
@@ -358,6 +359,8 @@ struct afs_vnode {
#define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */
#define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */
#define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */
+#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */
+#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */
long acl_order; /* ACL check count (callback break count) */
@@ -468,8 +471,8 @@ extern struct list_head afs_proc_cells;
#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
extern int afs_cell_init(char *);
-extern struct afs_cell *afs_cell_create(const char *, char *);
-extern struct afs_cell *afs_cell_lookup(const char *, unsigned);
+extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool);
+extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool);
extern struct afs_cell *afs_grab_cell(struct afs_cell *);
extern void afs_put_cell(struct afs_cell *);
extern void afs_cell_purge(void);
@@ -558,6 +561,8 @@ extern int afs_fs_release_lock(struct afs_server *, struct key *,
/*
* inode.c
*/
+extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
+ struct key *);
extern struct inode *afs_iget(struct super_block *, struct key *,
struct afs_fid *, struct afs_file_status *,
struct afs_callback *);
@@ -565,7 +570,8 @@ extern void afs_zap_data(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int afs_setattr(struct dentry *, struct iattr *);
-extern void afs_clear_inode(struct inode *);
+extern void afs_evict_inode(struct inode *);
+extern int afs_drop_inode(struct inode *);
/*
* main.c
@@ -581,6 +587,7 @@ extern int afs_abort_to_error(u32);
* mntpt.c
*/
extern const struct inode_operations afs_mntpt_inode_operations;
+extern const struct inode_operations afs_autocell_inode_operations;
extern const struct file_operations afs_mntpt_file_operations;
extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
@@ -752,12 +759,6 @@ extern unsigned afs_debug;
#define dbgprintk(FMT,...) \
printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
-/* make sure we maintain the format strings, even when debugging is disabled */
-static inline __attribute__((format(printf,1,2)))
-void _dbprintk(const char *fmt, ...)
-{
-}
-
#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
@@ -792,9 +793,9 @@ do { \
} while (0)
#else
-#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
-#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
-#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
+#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
+#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__)
#endif
/*
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 66d54d348c5..cfd1cbe25b2 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -111,6 +111,8 @@ static int __init afs_init(void)
/* initialise the callback update process */
ret = afs_callback_update_init();
+ if (ret < 0)
+ goto error_callback_update_init;
/* create the RxRPC transport */
ret = afs_open_socket();
@@ -127,15 +129,16 @@ static int __init afs_init(void)
error_fs:
afs_close_socket();
error_open_socket:
+ afs_callback_update_kill();
+error_callback_update_init:
+ afs_vlocation_purge();
error_vl_update_init:
+ afs_cell_purge();
error_cell_init:
#ifdef CONFIG_AFS_FSCACHE
fscache_unregister_netfs(&afs_cache_netfs);
error_cache:
#endif
- afs_callback_update_kill();
- afs_vlocation_purge();
- afs_cell_purge();
afs_proc_cleanup();
rcu_barrier();
printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index a9e23039ea3..6d552686c49 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -38,6 +38,11 @@ const struct inode_operations afs_mntpt_inode_operations = {
.getattr = afs_getattr,
};
+const struct inode_operations afs_autocell_inode_operations = {
+ .follow_link = afs_mntpt_follow_link,
+ .getattr = afs_getattr,
+};
+
static LIST_HEAD(afs_vfsmounts);
static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
@@ -136,20 +141,16 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
{
struct afs_super_info *super;
struct vfsmount *mnt;
+ struct afs_vnode *vnode;
struct page *page;
- size_t size;
- char *buf, *devname, *options;
+ char *devname, *options;
+ bool rwpath = false;
int ret;
_enter("{%s}", mntpt->d_name.name);
BUG_ON(!mntpt->d_inode);
- ret = -EINVAL;
- size = mntpt->d_inode->i_size;
- if (size > PAGE_SIZE - 1)
- goto error_no_devname;
-
ret = -ENOMEM;
devname = (char *) get_zeroed_page(GFP_KERNEL);
if (!devname)
@@ -159,28 +160,59 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
if (!options)
goto error_no_options;
- /* read the contents of the AFS special symlink */
- page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto error_no_page;
+ vnode = AFS_FS_I(mntpt->d_inode);
+ if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
+ /* if the directory is a pseudo directory, use the d_name */
+ static const char afs_root_cell[] = ":root.cell.";
+ unsigned size = mntpt->d_name.len;
+
+ ret = -ENOENT;
+ if (size < 2 || size > AFS_MAXCELLNAME)
+ goto error_no_page;
+
+ if (mntpt->d_name.name[0] == '.') {
+ devname[0] = '#';
+ memcpy(devname + 1, mntpt->d_name.name, size - 1);
+ memcpy(devname + size, afs_root_cell,
+ sizeof(afs_root_cell));
+ rwpath = true;
+ } else {
+ devname[0] = '%';
+ memcpy(devname + 1, mntpt->d_name.name, size);
+ memcpy(devname + size + 1, afs_root_cell,
+ sizeof(afs_root_cell));
+ }
+ } else {
+ /* read the contents of the AFS special symlink */
+ loff_t size = i_size_read(mntpt->d_inode);
+ char *buf;
+
+ ret = -EINVAL;
+ if (size > PAGE_SIZE - 1)
+ goto error_no_page;
+
+ page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ goto error_no_page;
+ }
+
+ ret = -EIO;
+ if (PageError(page))
+ goto error;
+
+ buf = kmap_atomic(page, KM_USER0);
+ memcpy(devname, buf, size);
+ kunmap_atomic(buf, KM_USER0);
+ page_cache_release(page);
+ page = NULL;
}
- ret = -EIO;
- if (PageError(page))
- goto error;
-
- buf = kmap_atomic(page, KM_USER0);
- memcpy(devname, buf, size);
- kunmap_atomic(buf, KM_USER0);
- page_cache_release(page);
- page = NULL;
-
/* work out what options we want */
super = AFS_FS_S(mntpt->d_sb);
memcpy(options, "cell=", 5);
strcpy(options + 5, super->volume->cell->name);
- if (super->volume->type == AFSVL_RWVOL)
+ if (super->volume->type == AFSVL_RWVOL || rwpath)
strcat(options, ",rwpath");
/* try and do the mount */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 852739d262a..096b23f821a 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -294,7 +294,7 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
if (strcmp(kbuf, "add") == 0) {
struct afs_cell *cell;
- cell = afs_cell_create(name, args);
+ cell = afs_cell_create(name, strlen(name), args, false);
if (IS_ERR(cell)) {
ret = PTR_ERR(cell);
goto done;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 67cf810e0fd..654d8fdbf01 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -100,6 +100,7 @@ int afs_open_socket(void)
ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
if (ret < 0) {
sock_release(socket);
+ destroy_workqueue(afs_async_calls);
_leave(" = %d [bind]", ret);
return ret;
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f4909951667..9fdc7fe3a7b 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -91,9 +91,10 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
memcpy(&server->addr, addr, sizeof(struct in_addr));
server->addr.s_addr = addr->s_addr;
+ _leave(" = %p{%d}", server, atomic_read(&server->usage));
+ } else {
+ _leave(" = NULL [nomem]");
}
-
- _leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e932e5a3a0c..77e1e5a6115 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -16,6 +16,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/mount.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
@@ -48,8 +49,9 @@ struct file_system_type afs_fs_type = {
static const struct super_operations afs_super_ops = {
.statfs = afs_statfs,
.alloc_inode = afs_alloc_inode,
+ .drop_inode = afs_drop_inode,
.destroy_inode = afs_destroy_inode,
- .clear_inode = afs_clear_inode,
+ .evict_inode = afs_evict_inode,
.put_super = afs_put_super,
.show_options = generic_show_options,
};
@@ -62,12 +64,14 @@ enum {
afs_opt_cell,
afs_opt_rwpath,
afs_opt_vol,
+ afs_opt_autocell,
};
static const match_table_t afs_options_list = {
{ afs_opt_cell, "cell=%s" },
{ afs_opt_rwpath, "rwpath" },
{ afs_opt_vol, "vol=%s" },
+ { afs_opt_autocell, "autocell" },
{ afs_no_opt, NULL },
};
@@ -151,7 +155,8 @@ static int afs_parse_options(struct afs_mount_params *params,
switch (token) {
case afs_opt_cell:
cell = afs_cell_lookup(args[0].from,
- args[0].to - args[0].from);
+ args[0].to - args[0].from,
+ false);
if (IS_ERR(cell))
return PTR_ERR(cell);
afs_put_cell(params->cell);
@@ -166,6 +171,10 @@ static int afs_parse_options(struct afs_mount_params *params,
*devname = args[0].from;
break;
+ case afs_opt_autocell:
+ params->autocell = 1;
+ break;
+
default:
printk(KERN_ERR "kAFS:"
" Unknown or invalid mount option: '%s'\n", p);
@@ -252,10 +261,10 @@ static int afs_parse_device_name(struct afs_mount_params *params,
/* lookup the cell record */
if (cellname || !params->cell) {
- cell = afs_cell_lookup(cellname, cellnamesz);
+ cell = afs_cell_lookup(cellname, cellnamesz, true);
if (IS_ERR(cell)) {
- printk(KERN_ERR "kAFS: unable to lookup cell '%s'\n",
- cellname ?: "");
+ printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n",
+ cellnamesz, cellnamesz, cellname ?: "");
return PTR_ERR(cell);
}
afs_put_cell(params->cell);
@@ -321,6 +330,9 @@ static int afs_fill_super(struct super_block *sb, void *data)
if (IS_ERR(inode))
goto error_inode;
+ if (params->autocell)
+ set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
+
ret = -ENOMEM;
root = d_alloc_root(inode);
if (!root)
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d..722743b152d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
{
struct address_space *mapping = vnode->vfs_inode.i_mapping;
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_cyclic = 1,
diff --git a/fs/aio.c b/fs/aio.c
index 1ccf25cef1f..3006b5bc33d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1277,7 +1277,7 @@ out:
/* sys_io_destroy:
* Destroy the aio_context specified. May cancel any outstanding
* AIOs and block on completion. Will fail with -ENOSYS if not
- * implemented. May fail with -EFAULT if the context pointed to
+ * implemented. May fail with -EINVAL if the context pointed to
* is invalid.
*/
SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
@@ -1795,15 +1795,16 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
/* io_getevents:
* Attempts to read at least min_nr events and up to nr events from
- * the completion queue for the aio_context specified by ctx_id. May
- * fail with -EINVAL if ctx_id is invalid, if min_nr is out of range,
- * if nr is out of range, if when is out of range. May fail with
- * -EFAULT if any of the memory specified to is invalid. May return
- * 0 or < min_nr if no events are available and the timeout specified
- * by when has elapsed, where when == NULL specifies an infinite
- * timeout. Note that the timeout pointed to by when is relative and
- * will be updated if not NULL and the operation blocks. Will fail
- * with -ENOSYS if not implemented.
+ * the completion queue for the aio_context specified by ctx_id. If
+ * it succeeds, the number of read events is returned. May fail with
+ * -EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
+ * out of range, if timeout is out of range. May fail with -EFAULT
+ * if any of the memory specified is invalid. May return 0 or
+ * < min_nr if the timeout specified by timeout has elapsed
+ * before sufficient events are available, where timeout == NULL
+ * specifies an infinite timeout. Note that the timeout pointed to by
+ * timeout is relative and will be updated if not NULL and the
+ * operation blocks. Will fail with -ENOSYS if not implemented.
*/
SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
long, min_nr,
diff --git a/fs/attr.c b/fs/attr.c
index b4fa3b0aa59..7ca41811afa 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,35 +14,53 @@
#include <linux/fcntl.h>
#include <linux/security.h>
-/* Taken over from the old code... */
-
-/* POSIX UID/GID verification for setting inode attributes. */
+/**
+ * inode_change_ok - check if attribute changes to an inode are allowed
+ * @inode: inode to check
+ * @attr: attributes to change
+ *
+ * Check if we are allowed to change the attributes contained in @attr
+ * in the given inode. This includes the normal unix access permission
+ * checks, as well as checks for rlimits and others.
+ *
+ * Should be called as the first thing in ->setattr implementations,
+ * possibly after taking additional locks.
+ */
int inode_change_ok(const struct inode *inode, struct iattr *attr)
{
- int retval = -EPERM;
unsigned int ia_valid = attr->ia_valid;
+ /*
+ * First check size constraints. These can't be overriden using
+ * ATTR_FORCE.
+ */
+ if (ia_valid & ATTR_SIZE) {
+ int error = inode_newsize_ok(inode, attr->ia_size);
+ if (error)
+ return error;
+ }
+
/* If force is set do it anyway. */
if (ia_valid & ATTR_FORCE)
- goto fine;
+ return 0;
/* Make sure a caller can chown. */
if ((ia_valid & ATTR_UID) &&
(current_fsuid() != inode->i_uid ||
attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
- goto error;
+ return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
(current_fsuid() != inode->i_uid ||
(!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
!capable(CAP_CHOWN))
- goto error;
+ return -EPERM;
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
if (!is_owner_or_cap(inode))
- goto error;
+ return -EPERM;
/* Also check the setgid bit! */
if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
inode->i_gid) && !capable(CAP_FSETID))
@@ -52,12 +70,10 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
/* Check for setting the inode time. */
if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
if (!is_owner_or_cap(inode))
- goto error;
+ return -EPERM;
}
-fine:
- retval = 0;
-error:
- return retval;
+
+ return 0;
}
EXPORT_SYMBOL(inode_change_ok);
@@ -105,21 +121,21 @@ out_big:
EXPORT_SYMBOL(inode_newsize_ok);
/**
- * generic_setattr - copy simple metadata updates into the generic inode
+ * setattr_copy - copy simple metadata updates into the generic inode
* @inode: the inode to be updated
* @attr: the new attributes
*
- * generic_setattr must be called with i_mutex held.
+ * setattr_copy must be called with i_mutex held.
*
- * generic_setattr updates the inode's metadata with that specified
+ * setattr_copy updates the inode's metadata with that specified
* in attr. Noticably missing is inode size update, which is more complex
- * as it requires pagecache updates. See simple_setsize.
+ * as it requires pagecache updates.
*
* The inode is not marked as dirty after this operation. The rationale is
* that for "simple" filesystems, the struct inode is the inode storage.
* The caller is free to mark the inode dirty afterwards if needed.
*/
-void generic_setattr(struct inode *inode, const struct iattr *attr)
+void setattr_copy(struct inode *inode, const struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
@@ -144,32 +160,7 @@ void generic_setattr(struct inode *inode, const struct iattr *attr)
inode->i_mode = mode;
}
}
-EXPORT_SYMBOL(generic_setattr);
-
-/*
- * note this function is deprecated, the new truncate sequence should be
- * used instead -- see eg. simple_setsize, generic_setattr.
- */
-int inode_setattr(struct inode *inode, const struct iattr *attr)
-{
- unsigned int ia_valid = attr->ia_valid;
-
- if (ia_valid & ATTR_SIZE &&
- attr->ia_size != i_size_read(inode)) {
- int error;
-
- error = vmtruncate(inode, attr->ia_size);
- if (error)
- return error;
- }
-
- generic_setattr(inode, attr);
-
- mark_inode_dirty(inode);
-
- return 0;
-}
-EXPORT_SYMBOL(inode_setattr);
+EXPORT_SYMBOL(setattr_copy);
int notify_change(struct dentry * dentry, struct iattr * attr)
{
@@ -237,13 +228,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
if (ia_valid & ATTR_SIZE)
down_write(&dentry->d_inode->i_alloc_sem);
- if (inode->i_op && inode->i_op->setattr) {
+ if (inode->i_op->setattr)
error = inode->i_op->setattr(dentry, attr);
- } else {
- error = inode_change_ok(inode, attr);
- if (!error)
- error = inode_setattr(inode, attr);
- }
+ else
+ error = simple_setattr(dentry, attr);
if (ia_valid & ATTR_SIZE)
up_write(&dentry->d_inode->i_alloc_sem);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 9a0520b5066..11b1ea786d0 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/param.h>
#include <linux/time.h>
+#include <linux/compat.h>
#include <linux/smp_lock.h>
#include "autofs_i.h"
@@ -25,13 +26,17 @@ static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
static int autofs_root_unlink(struct inode *,struct dentry *);
static int autofs_root_rmdir(struct inode *,struct dentry *);
static int autofs_root_mkdir(struct inode *,struct dentry *,int);
-static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
+static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
+static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
const struct file_operations autofs_root_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = autofs_root_readdir,
- .ioctl = autofs_root_ioctl,
+ .unlocked_ioctl = autofs_root_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = autofs_root_compat_ioctl,
+#endif
};
const struct inode_operations autofs_root_inode_operations = {
@@ -492,6 +497,25 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
/* Get/set timeout ioctl() operation */
+#ifdef CONFIG_COMPAT
+static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
+ unsigned int __user *p)
+{
+ unsigned long ntimeout;
+
+ if (get_user(ntimeout, p) ||
+ put_user(sbi->exp_timeout / HZ, p))
+ return -EFAULT;
+
+ if (ntimeout > UINT_MAX/HZ)
+ sbi->exp_timeout = 0;
+ else
+ sbi->exp_timeout = ntimeout * HZ;
+
+ return 0;
+}
+#endif
+
static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
unsigned long __user *p)
{
@@ -546,7 +570,7 @@ static inline int autofs_expire_run(struct super_block *sb,
* ioctl()'s on the root directory is the chief method for the daemon to
* generate kernel reactions
*/
-static int autofs_root_ioctl(struct inode *inode, struct file *filp,
+static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
@@ -571,6 +595,10 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
return 0;
case AUTOFS_IOC_PROTOVER: /* Get protocol version */
return autofs_get_protover(argp);
+#ifdef CONFIG_COMPAT
+ case AUTOFS_IOC_SETTIMEOUT32:
+ return autofs_compat_get_set_timeout(sbi, argp);
+#endif
case AUTOFS_IOC_SETTIMEOUT:
return autofs_get_set_timeout(sbi, argp);
case AUTOFS_IOC_EXPIRE:
@@ -579,4 +607,37 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
default:
return -ENOSYS;
}
+
+}
+
+static long autofs_root_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ int ret;
+
+ lock_kernel();
+ ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
+ filp, cmd, arg);
+ unlock_kernel();
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long autofs_root_compat_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ int ret;
+
+ lock_kernel();
+ if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
+ ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
+ else
+ ret = autofs_do_root_ioctl(inode, filp, cmd,
+ (unsigned long)compat_ptr(arg));
+ unlock_kernel();
+
+ return ret;
}
+#endif
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index db4117ed780..cb1bd38dc08 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -18,7 +18,9 @@
#include <linux/slab.h>
#include <linux/param.h>
#include <linux/time.h>
+#include <linux/compat.h>
#include <linux/smp_lock.h>
+
#include "autofs_i.h"
static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
@@ -26,6 +28,7 @@ static int autofs4_dir_unlink(struct inode *,struct dentry *);
static int autofs4_dir_rmdir(struct inode *,struct dentry *);
static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
+static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
static int autofs4_dir_open(struct inode *inode, struct file *file);
static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
static void *autofs4_follow_link(struct dentry *, struct nameidata *);
@@ -40,6 +43,9 @@ const struct file_operations autofs4_root_operations = {
.readdir = dcache_readdir,
.llseek = dcache_dir_lseek,
.unlocked_ioctl = autofs4_root_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = autofs4_root_compat_ioctl,
+#endif
};
const struct file_operations autofs4_dir_operations = {
@@ -198,8 +204,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
}
/* Initialize expiry counter after successful mount */
- if (ino)
- ino->last_used = jiffies;
+ ino->last_used = jiffies;
spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_PENDING;
@@ -840,6 +845,26 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
/* Get/set timeout ioctl() operation */
+#ifdef CONFIG_COMPAT
+static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi,
+ compat_ulong_t __user *p)
+{
+ int rv;
+ unsigned long ntimeout;
+
+ if ((rv = get_user(ntimeout, p)) ||
+ (rv = put_user(sbi->exp_timeout/HZ, p)))
+ return rv;
+
+ if (ntimeout > UINT_MAX/HZ)
+ sbi->exp_timeout = 0;
+ else
+ sbi->exp_timeout = ntimeout * HZ;
+
+ return 0;
+}
+#endif
+
static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi,
unsigned long __user *p)
{
@@ -933,6 +958,10 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
return autofs4_get_protosubver(sbi, p);
case AUTOFS_IOC_SETTIMEOUT:
return autofs4_get_set_timeout(sbi, p);
+#ifdef CONFIG_COMPAT
+ case AUTOFS_IOC_SETTIMEOUT32:
+ return autofs4_compat_get_set_timeout(sbi, p);
+#endif
case AUTOFS_IOC_ASKUMOUNT:
return autofs4_ask_umount(filp->f_path.mnt, p);
@@ -961,3 +990,22 @@ static long autofs4_root_ioctl(struct file *filp,
return ret;
}
+
+#ifdef CONFIG_COMPAT
+static long autofs4_root_compat_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ int ret;
+
+ lock_kernel();
+ if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
+ ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
+ else
+ ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
+ (unsigned long)compat_ptr(arg));
+ unlock_kernel();
+
+ return ret;
+}
+#endif
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 52e59bf4aa5..f024d8aadde 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -55,12 +55,6 @@ static unsigned int bad_file_poll(struct file *filp, poll_table *wait)
return POLLERR;
}
-static int bad_file_ioctl (struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- return -EIO;
-}
-
static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd,
unsigned long arg)
{
@@ -159,7 +153,6 @@ static const struct file_operations bad_file_ops =
.aio_write = bad_file_aio_write,
.readdir = bad_file_readdir,
.poll = bad_file_poll,
- .ioctl = bad_file_ioctl,
.unlocked_ioctl = bad_file_unlocked_ioctl,
.compat_ioctl = bad_file_compat_ioctl,
.mmap = bad_file_mmap,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 34ddda888e6..dc39d282488 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -436,7 +436,7 @@ befs_init_inodecache(void)
init_once);
if (befs_inode_cachep == NULL) {
printk(KERN_ERR "befs_init_inodecache: "
- "Couldn't initalize inode slabcache\n");
+ "Couldn't initialize inode slabcache\n");
return -ENOMEM;
}
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 7109e451abf..f7f87e233dd 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -17,7 +17,6 @@ struct bfs_sb_info {
unsigned long si_lf_eblk;
unsigned long si_lasti;
unsigned long *si_imap;
- struct buffer_head *si_sbh; /* buffer header w/superblock */
struct mutex bfs_lock;
};
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 88b9a3ff44e..eb67edd0f8e 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -70,7 +70,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
struct super_block *sb = inode->i_sb;
struct bfs_sb_info *info = BFS_SB(sb);
struct bfs_inode_info *bi = BFS_I(inode);
- struct buffer_head *sbh = info->si_sbh;
phys = bi->i_sblock + block;
if (!create) {
@@ -112,7 +111,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
info->si_freeb -= phys - bi->i_eblock;
info->si_lf_eblk = bi->i_eblock = phys;
mark_inode_dirty(inode);
- mark_buffer_dirty(sbh);
err = 0;
goto out;
}
@@ -147,7 +145,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
*/
info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks;
mark_inode_dirty(inode);
- mark_buffer_dirty(sbh);
map_bh(bh_result, sb, phys);
out:
mutex_unlock(&info->bfs_lock);
@@ -168,9 +165,17 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- *pagep = NULL;
- return block_write_begin(file, mapping, pos, len, flags,
- pagep, fsdata, bfs_get_block);
+ int ret;
+
+ ret = block_write_begin(mapping, pos, len, flags, pagep,
+ bfs_get_block);
+ if (unlikely(ret)) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
+
+ return ret;
}
static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f22a7d3dc36..c4daf0f5fc0 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -31,7 +31,6 @@ MODULE_LICENSE("GPL");
#define dprintf(x...)
#endif
-static void bfs_write_super(struct super_block *s);
void dump_imap(const char *prefix, struct super_block *s);
struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -99,6 +98,24 @@ error:
return ERR_PTR(-EIO);
}
+static struct bfs_inode *find_inode(struct super_block *sb, u16 ino, struct buffer_head **p)
+{
+ if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(sb)->si_lasti)) {
+ printf("Bad inode number %s:%08x\n", sb->s_id, ino);
+ return ERR_PTR(-EIO);
+ }
+
+ ino -= BFS_ROOT_INO;
+
+ *p = sb_bread(sb, 1 + ino / BFS_INODES_PER_BLOCK);
+ if (!*p) {
+ printf("Unable to read inode %s:%08x\n", sb->s_id, ino);
+ return ERR_PTR(-EIO);
+ }
+
+ return (struct bfs_inode *)(*p)->b_data + ino % BFS_INODES_PER_BLOCK;
+}
+
static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct bfs_sb_info *info = BFS_SB(inode->i_sb);
@@ -106,28 +123,15 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
unsigned long i_sblock;
struct bfs_inode *di;
struct buffer_head *bh;
- int block, off;
int err = 0;
dprintf("ino=%08x\n", ino);
- if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) {
- printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
- return -EIO;
- }
+ di = find_inode(inode->i_sb, ino, &bh);
+ if (IS_ERR(di))
+ return PTR_ERR(di);
mutex_lock(&info->bfs_lock);
- block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
- bh = sb_bread(inode->i_sb, block);
- if (!bh) {
- printf("Unable to read inode %s:%08x\n",
- inode->i_sb->s_id, ino);
- mutex_unlock(&info->bfs_lock);
- return -EIO;
- }
-
- off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
- di = (struct bfs_inode *)bh->b_data + off;
if (ino == BFS_ROOT_INO)
di->i_vtype = cpu_to_le32(BFS_VDIR);
@@ -158,12 +162,11 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
return err;
}
-static void bfs_delete_inode(struct inode *inode)
+static void bfs_evict_inode(struct inode *inode)
{
unsigned long ino = inode->i_ino;
struct bfs_inode *di;
struct buffer_head *bh;
- int block, off;
struct super_block *s = inode->i_sb;
struct bfs_sb_info *info = BFS_SB(s);
struct bfs_inode_info *bi = BFS_I(inode);
@@ -171,28 +174,19 @@ static void bfs_delete_inode(struct inode *inode)
dprintf("ino=%08lx\n", ino);
truncate_inode_pages(&inode->i_data, 0);
+ invalidate_inode_buffers(inode);
+ end_writeback(inode);
- if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) {
- printf("invalid ino=%08lx\n", ino);
+ if (inode->i_nlink)
return;
- }
-
- inode->i_size = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
- mutex_lock(&info->bfs_lock);
- mark_inode_dirty(inode);
- block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
- bh = sb_bread(s, block);
- if (!bh) {
- printf("Unable to read inode %s:%08lx\n",
- inode->i_sb->s_id, ino);
- mutex_unlock(&info->bfs_lock);
+ di = find_inode(s, inode->i_ino, &bh);
+ if (IS_ERR(di))
return;
- }
- off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
- di = (struct bfs_inode *)bh->b_data + off;
- memset((void *)di, 0, sizeof(struct bfs_inode));
+
+ mutex_lock(&info->bfs_lock);
+ /* clear on-disk inode */
+ memset(di, 0, sizeof(struct bfs_inode));
mark_buffer_dirty(bh);
brelse(bh);
@@ -209,32 +203,9 @@ static void bfs_delete_inode(struct inode *inode)
* "last block of the last file" even if there is no
* real file there, saves us 1 gap.
*/
- if (info->si_lf_eblk == bi->i_eblock) {
+ if (info->si_lf_eblk == bi->i_eblock)
info->si_lf_eblk = bi->i_sblock - 1;
- mark_buffer_dirty(info->si_sbh);
- }
mutex_unlock(&info->bfs_lock);
- clear_inode(inode);
-}
-
-static int bfs_sync_fs(struct super_block *sb, int wait)
-{
- struct bfs_sb_info *info = BFS_SB(sb);
-
- mutex_lock(&info->bfs_lock);
- mark_buffer_dirty(info->si_sbh);
- sb->s_dirt = 0;
- mutex_unlock(&info->bfs_lock);
-
- return 0;
-}
-
-static void bfs_write_super(struct super_block *sb)
-{
- if (!(sb->s_flags & MS_RDONLY))
- bfs_sync_fs(sb, 1);
- else
- sb->s_dirt = 0;
}
static void bfs_put_super(struct super_block *s)
@@ -246,10 +217,6 @@ static void bfs_put_super(struct super_block *s)
lock_kernel();
- if (s->s_dirt)
- bfs_write_super(s);
-
- brelse(info->si_sbh);
mutex_destroy(&info->bfs_lock);
kfree(info->si_imap);
kfree(info);
@@ -319,10 +286,8 @@ static const struct super_operations bfs_sops = {
.alloc_inode = bfs_alloc_inode,
.destroy_inode = bfs_destroy_inode,
.write_inode = bfs_write_inode,
- .delete_inode = bfs_delete_inode,
+ .evict_inode = bfs_evict_inode,
.put_super = bfs_put_super,
- .write_super = bfs_write_super,
- .sync_fs = bfs_sync_fs,
.statfs = bfs_statfs,
};
@@ -349,7 +314,7 @@ void dump_imap(const char *prefix, struct super_block *s)
static int bfs_fill_super(struct super_block *s, void *data, int silent)
{
- struct buffer_head *bh;
+ struct buffer_head *bh, *sbh;
struct bfs_super_block *bfs_sb;
struct inode *inode;
unsigned i, imap_len;
@@ -365,10 +330,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
sb_set_blocksize(s, BFS_BSIZE);
- info->si_sbh = sb_bread(s, 0);
- if (!info->si_sbh)
+ sbh = sb_bread(s, 0);
+ if (!sbh)
goto out;
- bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data;
+ bfs_sb = (struct bfs_super_block *)sbh->b_data;
if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
if (!silent)
printf("No BFS filesystem on %s (magic=%08x)\n",
@@ -472,10 +437,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
info->si_lf_eblk = eblock;
}
brelse(bh);
- if (!(s->s_flags & MS_RDONLY)) {
- mark_buffer_dirty(info->si_sbh);
- s->s_dirt = 1;
- }
+ brelse(sbh);
dump_imap("read_super", s);
return 0;
@@ -485,7 +447,7 @@ out3:
out2:
kfree(info->si_imap);
out1:
- brelse(info->si_sbh);
+ brelse(sbh);
out:
mutex_destroy(&info->bfs_lock);
kfree(info);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c5f9a0e5d7..63039ed9576 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -990,10 +990,9 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
/* clear any space allocated but not loaded */
if (phdr->p_filesz < phdr->p_memsz) {
- ret = clear_user((void *) (seg->addr + phdr->p_filesz),
- phdr->p_memsz - phdr->p_filesz);
- if (ret)
- return ret;
+ if (clear_user((void *) (seg->addr + phdr->p_filesz),
+ phdr->p_memsz - phdr->p_filesz))
+ return -EFAULT;
}
if (mm) {
@@ -1027,7 +1026,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
struct elf32_fdpic_loadseg *seg;
struct elf32_phdr *phdr;
unsigned long load_addr, delta_vaddr;
- int loop, dvset, ret;
+ int loop, dvset;
load_addr = params->load_addr;
delta_vaddr = 0;
@@ -1127,9 +1126,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
* PT_LOAD */
if (prot & PROT_WRITE && disp > 0) {
kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
- ret = clear_user((void __user *) maddr, disp);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr, disp))
+ return -EFAULT;
maddr += disp;
}
@@ -1164,19 +1162,17 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
if (prot & PROT_WRITE && excess1 > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess1);
- ret = clear_user((void __user *) maddr + phdr->p_filesz,
- excess1);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr + phdr->p_filesz,
+ excess1))
+ return -EFAULT;
}
#else
if (excess > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess);
- ret = clear_user((void *) maddr + phdr->p_filesz, excess);
- if (ret)
- return ret;
+ if (clear_user((void *) maddr + phdr->p_filesz, excess))
+ return -EFAULT;
}
#endif
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 49566c1687d..811384bec8d 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -56,16 +56,19 @@
#endif
/*
- * User data (stack, data section and bss) needs to be aligned
- * for the same reasons as SLAB memory is, and to the same amount.
- * Avoid duplicating architecture specific code by using the same
- * macro as with SLAB allocation:
+ * User data (data section and bss) needs to be aligned.
+ * We pick 0x20 here because it is the max value elf2flt has always
+ * used in producing FLAT files, and because it seems to be large
+ * enough to make all the gcc alignment related tests happy.
*/
-#ifdef ARCH_SLAB_MINALIGN
-#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
-#else
-#define FLAT_DATA_ALIGN (sizeof(void *))
-#endif
+#define FLAT_DATA_ALIGN (0x20)
+
+/*
+ * User data (stack) also needs to be aligned.
+ * Here we can be a bit looser than the data sections since this
+ * needs to only meet arch ABI requirements.
+ */
+#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
@@ -129,7 +132,7 @@ static unsigned long create_flat_tables(
sp = (unsigned long *)p;
sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
- sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
+ sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN);
argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
envp = argv + (argc + 1);
@@ -589,7 +592,7 @@ static int load_flat_file(struct linux_binprm * bprm,
if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
do_munmap(current->mm, textpos, text_len);
- do_munmap(current->mm, realdatastart, data_len + extra);
+ do_munmap(current->mm, realdatastart, len);
ret = result;
goto err;
}
@@ -876,7 +879,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
- stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
+ stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */
res = load_flat_file(bprm, &libinfo, 0, &stack_len);
if (IS_ERR_VALUE(res))
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c4e83537ead..9e60fd20171 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -502,8 +502,9 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
return inode;
}
-static void bm_clear_inode(struct inode *inode)
+static void bm_evict_inode(struct inode *inode)
{
+ end_writeback(inode);
kfree(inode->i_private);
}
@@ -685,7 +686,7 @@ static const struct file_operations bm_status_operations = {
static const struct super_operations s_ops = {
.statfs = simple_statfs,
- .clear_inode = bm_clear_inode,
+ .evict_inode = bm_evict_inode,
};
static int bm_fill_super(struct super_block * sb, void * data, int silent)
diff --git a/fs/bio.c b/fs/bio.c
index e7bf6ca64dc..8abb2dfb2e7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -843,7 +843,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
if (!bio)
goto out_bmd;
- bio->bi_rw |= (!write_to_vm << BIO_RW);
+ if (!write_to_vm)
+ bio->bi_rw |= REQ_WRITE;
ret = 0;
@@ -1024,7 +1025,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
* set data direction, and check if mapped pages need bouncing
*/
if (!write_to_vm)
- bio->bi_rw |= (1 << BIO_RW);
+ bio->bi_rw |= REQ_WRITE;
bio->bi_bdev = bdev;
bio->bi_flags |= (1 << BIO_USER_MAPPED);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7346c96308a..50e8c8582fa 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -172,9 +172,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode,
- I_BDEV(inode), iov, offset, nr_segs,
- blkdev_get_blocks, NULL);
+ return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
+ nr_segs, blkdev_get_blocks, NULL, NULL, 0);
}
int __sync_blockdev(struct block_device *bdev, int wait)
@@ -309,9 +308,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- *pagep = NULL;
- return block_write_begin_newtrunc(file, mapping, pos, len, flags,
- pagep, fsdata, blkdev_get_block);
+ return block_write_begin(mapping, pos, len, flags, pagep,
+ blkdev_get_block);
}
static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -428,10 +426,13 @@ static inline void __bd_forget(struct inode *inode)
inode->i_mapping = &inode->i_data;
}
-static void bdev_clear_inode(struct inode *inode)
+static void bdev_evict_inode(struct inode *inode)
{
struct block_device *bdev = &BDEV_I(inode)->bdev;
struct list_head *p;
+ truncate_inode_pages(&inode->i_data, 0);
+ invalidate_inode_buffers(inode); /* is it needed here? */
+ end_writeback(inode);
spin_lock(&bdev_lock);
while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
__bd_forget(list_entry(p, struct inode, i_devices));
@@ -445,7 +446,7 @@ static const struct super_operations bdev_sops = {
.alloc_inode = bdev_alloc_inode,
.destroy_inode = bdev_destroy_inode,
.drop_inode = generic_delete_inode,
- .clear_inode = bdev_clear_inode,
+ .evict_inode = bdev_evict_inode,
};
static int bd_get_sb(struct file_system_type *fs_type,
@@ -681,8 +682,8 @@ retry:
if (!bd_may_claim(bdev, whole, holder))
return -EBUSY;
- /* if someone else is claiming, wait for it to finish */
- if (whole->bd_claiming && whole->bd_claiming != holder) {
+ /* if claiming is already in progress, wait for it to finish */
+ if (whole->bd_claiming) {
wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
DEFINE_WAIT(wait);
@@ -706,8 +707,13 @@ retry:
* @bdev is about to be opened exclusively. Check @bdev can be opened
* exclusively and mark that an exclusive open is in progress. Each
* successful call to this function must be matched with a call to
- * either bd_claim() or bd_abort_claiming(). If this function
- * succeeds, the matching bd_claim() is guaranteed to succeed.
+ * either bd_finish_claiming() or bd_abort_claiming() (which do not
+ * fail).
+ *
+ * This function is used to gain exclusive access to the block device
+ * without actually causing other exclusive open attempts to fail. It
+ * should be used when the open sequence itself requires exclusive
+ * access but may subsequently fail.
*
* CONTEXT:
* Might sleep.
@@ -734,6 +740,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
return ERR_PTR(-ENXIO);
whole = bdget_disk(disk, 0);
+ module_put(disk->fops->owner);
put_disk(disk);
if (!whole)
return ERR_PTR(-ENOMEM);
@@ -782,15 +789,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder)
__bd_abort_claiming(whole, holder); /* releases bdev_lock */
}
+/* increment holders when we have a legitimate claim. requires bdev_lock */
+static void __bd_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder)
+{
+ /* note that for a whole device bd_holders
+ * will be incremented twice, and bd_holder will
+ * be set to bd_claim before being set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+}
+
+/**
+ * bd_finish_claiming - finish claiming a block device
+ * @bdev: block device of interest (passed to bd_start_claiming())
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Finish a claiming block started by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_finish_claiming(struct block_device *bdev,
+ struct block_device *whole, void *holder)
+{
+ spin_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ __bd_claim(bdev, whole, holder);
+ __bd_abort_claiming(whole, holder); /* not actually an abort */
+}
+
/**
* bd_claim - claim a block device
* @bdev: block device to claim
* @holder: holder trying to claim @bdev
*
- * Try to claim @bdev which must have been opened successfully. This
- * function may be called with or without preceding
- * blk_start_claiming(). In the former case, this function is always
- * successful and terminates the claiming block.
+ * Try to claim @bdev which must have been opened successfully.
*
* CONTEXT:
* Might sleep.
@@ -806,23 +844,10 @@ int bd_claim(struct block_device *bdev, void *holder)
might_sleep();
spin_lock(&bdev_lock);
-
res = bd_prepare_to_claim(bdev, whole, holder);
- if (res == 0) {
- /* note that for a whole device bd_holders
- * will be incremented twice, and bd_holder will
- * be set to bd_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
- }
-
- if (whole->bd_claiming)
- __bd_abort_claiming(whole, holder); /* releases bdev_lock */
- else
- spin_unlock(&bdev_lock);
+ if (res == 0)
+ __bd_claim(bdev, whole, holder);
+ spin_unlock(&bdev_lock);
return res;
}
@@ -1315,19 +1340,20 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
/*
* hooks: /n/, see "layering violations".
*/
- ret = devcgroup_inode_permission(bdev->bd_inode, perm);
- if (ret != 0) {
- bdput(bdev);
- return ret;
+ if (!for_part) {
+ ret = devcgroup_inode_permission(bdev->bd_inode, perm);
+ if (ret != 0) {
+ bdput(bdev);
+ return ret;
+ }
}
- lock_kernel();
restart:
ret = -ENXIO;
disk = get_gendisk(bdev->bd_dev, &partno);
if (!disk)
- goto out_unlock_kernel;
+ goto out;
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
@@ -1407,7 +1433,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (for_part)
bdev->bd_part_count++;
mutex_unlock(&bdev->bd_mutex);
- unlock_kernel();
return 0;
out_clear:
@@ -1420,9 +1445,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_contains = NULL;
out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
- out_unlock_kernel:
- unlock_kernel();
-
+ out:
if (disk)
module_put(disk->fops->owner);
put_disk(disk);
@@ -1476,7 +1499,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if (whole) {
if (res == 0)
- BUG_ON(bd_claim(bdev, filp) != 0);
+ bd_finish_claiming(bdev, whole, filp);
else
bd_abort_claiming(whole, filp);
}
@@ -1491,7 +1514,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct block_device *victim = NULL;
mutex_lock_nested(&bdev->bd_mutex, for_part);
- lock_kernel();
if (for_part)
bdev->bd_part_count--;
@@ -1516,7 +1538,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
victim = bdev->bd_contains;
bdev->bd_contains = NULL;
}
- unlock_kernel();
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
if (victim)
@@ -1712,7 +1733,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h
if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
goto out_blkdev_put;
- BUG_ON(bd_claim(bdev, holder) != 0);
+ bd_finish_claiming(bdev, whole, holder);
return bdev;
out_blkdev_put:
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 8d432cd9d58..2222d161c7b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,6 +60,8 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, value, size);
if (size > 0) {
acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return acl;
set_cached_acl(inode, type, acl);
}
kfree(value);
@@ -160,6 +162,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
int ret;
struct posix_acl *acl = NULL;
+ if (!is_owner_or_cap(dentry->d_inode))
+ return -EPERM;
+
+ if (!IS_POSIXACL(dentry->d_inode))
+ return -EOPNOTSUPP;
+
if (value) {
acl = posix_acl_from_xattr(value, size);
if (acl == NULL) {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe..c3df14ce2cc 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
return ret;
}
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right. We'll push up to and including min_slot, but no lower
+ */
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
- int free_space, u32 left_nritems)
+ int free_space, u32 left_nritems,
+ u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (empty)
nr = 0;
else
- nr = 1;
+ nr = max_t(u32, 1, min_slot);
if (path->slots[0] >= left_nritems)
push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
*
* returns 1 if the push failed because the other node didn't have enough
* room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf. It won't
+ * push any slot lower than min_slot
*/
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path,
+ int min_data_size, int data_size,
+ int empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- return __push_leaf_right(trans, root, path, data_size, empty,
- right, free_space, left_nritems);
+ return __push_leaf_right(trans, root, path, min_data_size, empty,
+ right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
+ * items
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
- int free_space, int right_nritems)
+ int free_space, u32 right_nritems,
+ u32 max_slot)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
slot = path->slots[1];
if (empty)
- nr = right_nritems;
+ nr = min(right_nritems, max_slot);
else
- nr = right_nritems - 1;
+ nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
+ * items
*/
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path, int min_data_size,
+ int data_size, int empty, u32 max_slot)
{
struct extent_buffer *right = path->nodes[0];
struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, root, path, data_size,
- empty, left, free_space, right_nritems);
+ return __push_leaf_left(trans, root, path, min_data_size,
+ empty, left, free_space, right_nritems,
+ max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf. A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ * A B C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves. If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ int data_size)
+{
+ int ret;
+ int progress = 0;
+ int slot;
+ u32 nritems;
+
+ slot = path->slots[0];
+
+ /*
+ * try to push all the items after our slot into the
+ * right leaf
+ */
+ ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+ * our goal is to get our slot at the start or end of a leaf. If
+ * we've done so we're done
+ */
+ if (path->slots[0] == 0 || path->slots[0] == nritems)
+ return 0;
+
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+
+ /* try to push all the items before our slot into the next leaf */
+ slot = path->slots[0];
+ ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ if (progress)
+ return 0;
+ return 1;
+}
+
+/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int wret;
int split;
int num_doubles = 0;
+ int tried_avoid_double = 0;
l = path->nodes[0];
slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
- if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
- wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (data_size) {
+ wret = push_leaf_right(trans, root, path, data_size,
+ data_size, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size, 0);
+ wret = push_leaf_left(trans, root, path, data_size,
+ data_size, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -2923,6 +3003,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2;
}
}
@@ -2939,6 +3021,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2 ;
}
}
@@ -3019,6 +3103,13 @@ again:
}
return ret;
+
+push_for_double:
+ push_for_double_split(trans, root, path, data_size);
+ tried_avoid_double = 1;
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+ goto again;
}
static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
extent_buffer_get(leaf);
btrfs_set_path_blocking(path);
- wret = push_leaf_left(trans, root, path, 1, 1);
+ wret = push_leaf_left(trans, root, path, 1, 1,
+ 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1, 1);
+ wret = push_leaf_right(trans, root, path, 1,
+ 1, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 29c20092847..eaf286abad1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2389,13 +2389,13 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
pgoff_t offset, pgoff_t last_index);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
-void btrfs_delete_inode(struct inode *inode);
+void btrfs_evict_inode(struct inode *inode);
void btrfs_put_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
void btrfs_dirty_inode(struct inode *inode);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
-void btrfs_drop_inode(struct inode *inode);
+int btrfs_drop_inode(struct inode *inode);
int btrfs_init_cachep(void);
void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f3b287c22ca..64f10082f04 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -480,7 +480,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
end_io_wq->work.func = end_workqueue_fn;
end_io_wq->work.flags = 0;
- if (bio->bi_rw & (1 << BIO_RW)) {
+ if (bio->bi_rw & REQ_WRITE) {
if (end_io_wq->metadata)
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
@@ -604,7 +604,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
atomic_inc(&fs_info->nr_async_submits);
- if (rw & (1 << BIO_RW_SYNCIO))
+ if (rw & REQ_SYNC)
btrfs_set_work_high_prio(&async->work);
btrfs_queue_worker(&fs_info->workers, &async->work);
@@ -668,7 +668,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
bio, 1);
BUG_ON(ret);
- if (!(rw & (1 << BIO_RW))) {
+ if (!(rw & REQ_WRITE)) {
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
@@ -1427,7 +1427,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
* ram and up to date before trying to verify things. For
* blocksize <= pagesize, it is basically a noop
*/
- if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata &&
+ if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
!bio_ready_for_csum(bio)) {
btrfs_queue_worker(&fs_info->endio_meta_workers,
&end_io_wq->work);
@@ -1941,8 +1941,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_level_size(tree_root,
btrfs_super_log_root_level(disk_super));
- log_tree_root = kzalloc(sizeof(struct btrfs_root),
- GFP_NOFS);
+ log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+ if (!log_tree_root) {
+ err = -ENOMEM;
+ goto fail_trans_kthread;
+ }
__setup_root(nodesize, leafsize, sectorsize, stripesize,
log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
@@ -1982,6 +1985,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
goto fail_trans_kthread;
+ if (IS_ERR(fs_info->fs_root)) {
+ err = PTR_ERR(fs_info->fs_root);
+ goto fail_trans_kthread;
+ }
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b9080d71991..32d094002a5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4360,7 +4360,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
- BUG_ON(block_rsv->space_info != cache->space_info);
+ if (block_rsv->space_info != cache->space_info)
+ goto out;
if (btrfs_header_generation(buf) == trans->transid) {
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec5..d74e6af9b53 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = wbc->bdi,
.sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = inode->i_mapping->backing_dev_info,
.sync_mode = mode,
.older_than_this = NULL,
.nr_to_write = nr_pages * 2,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 787b50a16a1..e354c33df08 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1140,7 +1140,7 @@ int btrfs_sync_file(struct file *file, int datasync)
/*
* ok we haven't committed the transaction yet, lets do a commit
*/
- if (file && file->private_data)
+ if (file->private_data)
btrfs_ioctl_trans_end(file);
trans = btrfs_start_transaction(root, 0);
@@ -1190,14 +1190,22 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
- vma->vm_ops = &btrfs_file_vm_ops;
+ struct address_space *mapping = filp->f_mapping;
+
+ if (!mapping->a_ops->readpage)
+ return -ENOEXEC;
+
file_accessed(filp);
+ vma->vm_ops = &btrfs_file_vm_ops;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
+
return 0;
}
const struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.splice_read = generic_file_splice_read,
.aio_write = btrfs_file_aio_write,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fa6ccc1bfe2..c03864406af 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1429,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
- if (!(rw & (1 << BIO_RW))) {
+ if (!(rw & REQ_WRITE)) {
if (bio_flags & EXTENT_BIO_COMPRESSED) {
return btrfs_submit_compressed_read(inode, bio,
mirror_num, bio_flags);
@@ -1841,7 +1841,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
bio->bi_size = 0;
bio_add_page(bio, page, failrec->len, start - page_offset(page));
- if (failed_bio->bi_rw & (1 << BIO_RW))
+ if (failed_bio->bi_rw & REQ_WRITE)
rw = WRITE;
else
rw = READ;
@@ -2673,7 +2673,7 @@ static int check_path_shared(struct btrfs_root *root,
struct extent_buffer *eb;
int level;
int ret;
- u64 refs;
+ u64 refs = 1;
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
@@ -2938,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
- dir->i_sb->s_dirt = 1;
btrfs_free_path(path);
return 0;
@@ -3656,17 +3655,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
return err;
}
- attr->ia_valid &= ~ATTR_SIZE;
- if (attr->ia_valid)
- err = inode_setattr(inode, attr);
+ if (attr->ia_valid) {
+ setattr_copy(inode, attr);
+ mark_inode_dirty(inode);
+
+ if (attr->ia_valid & ATTR_MODE)
+ err = btrfs_acl_chmod(inode);
+ }
- if (!err && ((attr->ia_valid & ATTR_MODE)))
- err = btrfs_acl_chmod(inode);
return err;
}
-void btrfs_delete_inode(struct inode *inode)
+void btrfs_evict_inode(struct inode *inode)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3674,10 +3675,14 @@ void btrfs_delete_inode(struct inode *inode)
int ret;
truncate_inode_pages(&inode->i_data, 0);
+ if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+ goto no_delete;
+
if (is_bad_inode(inode)) {
btrfs_orphan_del(NULL, inode);
goto no_delete;
}
+ /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (root->fs_info->log_root_recovering) {
@@ -3727,7 +3732,7 @@ void btrfs_delete_inode(struct inode *inode)
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
no_delete:
- clear_inode(inode);
+ end_writeback(inode);
return;
}
@@ -3858,7 +3863,7 @@ again:
p = &parent->rb_right;
else {
WARN_ON(!(entry->vfs_inode.i_state &
- (I_WILL_FREE | I_FREEING | I_CLEAR)));
+ (I_WILL_FREE | I_FREEING)));
rb_erase(parent, &root->inode_tree);
RB_CLEAR_NODE(parent);
spin_unlock(&root->inode_lock);
@@ -3937,7 +3942,7 @@ again:
if (atomic_read(&inode->i_count) > 1)
d_prune_aliases(inode);
/*
- * btrfs_drop_inode will remove it from
+ * btrfs_drop_inode will have it removed from
* the inode cache when its usage count
* hits zero.
*/
@@ -5642,7 +5647,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
struct bio_vec *bvec = bio->bi_io_vec;
u64 start;
int skip_sum;
- int write = rw & (1 << BIO_RW);
+ int write = rw & REQ_WRITE;
int ret = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -6331,13 +6336,14 @@ free:
kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
}
-void btrfs_drop_inode(struct inode *inode)
+int btrfs_drop_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
- generic_delete_inode(inode);
+
+ if (btrfs_root_refs(&root->root_item) == 0)
+ return 1;
else
- generic_drop_inode(inode);
+ return generic_drop_inode(inode);
}
static void init_once(void *foo)
@@ -6884,7 +6890,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- ret = btrfs_prealloc_file_range(inode, 0, cur_offset,
+ ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
last_byte - cur_offset,
1 << inode->i_blkbits,
offset + len,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4cdb98cf26d..9254b3d58db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1280,7 +1280,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto out;
+ goto out_up_write;
}
trans->block_rsv = &root->fs_info->global_block_rsv;
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
*/
/* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE))
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
/* determine range to clone */
ret = -EINVAL;
- if (off >= src->i_size || off + len > src->i_size)
+ if (off + len > src->i_size || off + len < off)
goto out_unlock;
if (len == 0)
olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 disko = 0, diskl = 0;
u64 datao = 0, datal = 0;
u8 comp;
+ u64 endoff;
size = btrfs_item_size_nr(leaf, slot);
read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_release_path(root, path);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- if (new_key.offset + datal > inode->i_size)
- btrfs_i_size_write(inode,
- new_key.offset + datal);
+
+ /*
+ * we round up to the block size at eof when
+ * determining which extents to clone above,
+ * but shouldn't round up the file size
+ */
+ endoff = new_key.offset + datal;
+ if (endoff > off+olen)
+ endoff = off+olen;
+ if (endoff > inode->i_size)
+ btrfs_i_size_write(inode, endoff);
+
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
@@ -1845,7 +1855,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
dir_id, "default", 7, 1);
- if (!di) {
+ if (IS_ERR_OR_NULL(di)) {
btrfs_free_path(path);
btrfs_end_transaction(trans, root);
printk(KERN_ERR "Umm, you don't have the default dir item, "
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 05d41e56923..b37d723b9d4 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -784,16 +784,17 @@ again:
struct btrfs_extent_ref_v0 *ref0;
ref0 = btrfs_item_ptr(eb, path1->slots[0],
struct btrfs_extent_ref_v0);
- root = find_tree_root(rc, eb, ref0);
- if (!root->ref_cows)
- cur->cowonly = 1;
if (key.objectid == key.offset) {
+ root = find_tree_root(rc, eb, ref0);
if (root && !should_ignore_root(root))
cur->root = root;
else
list_add(&cur->list, &useless);
break;
}
+ if (is_cowonly_root(btrfs_ref_root_v0(eb,
+ ref0)))
+ cur->cowonly = 1;
}
#else
BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b91ccd97264..2d958be761c 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -330,7 +330,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_path *path;
int ret;
- u32 refs;
struct btrfs_root_item *ri;
struct extent_buffer *leaf;
@@ -344,8 +343,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
leaf = path->nodes[0];
ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
- refs = btrfs_disk_root_refs(leaf, ri);
- BUG_ON(refs != 0);
ret = btrfs_del_item(trans, root, path);
out:
btrfs_free_path(path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d34b2dfc962..1776dbd8dc9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -360,6 +360,8 @@ static struct dentry *get_default_root(struct super_block *sb,
*/
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
+ if (IS_ERR(di))
+ return ERR_CAST(di);
if (!di) {
/*
* Ok the default dir item isn't there. This is weird since
@@ -390,8 +392,8 @@ setup_root:
location.offset = 0;
inode = btrfs_iget(sb, &location, new_root, &new);
- if (!inode)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
/*
* If we're just mounting the root most subvol put the inode and return
@@ -795,7 +797,7 @@ static int btrfs_unfreeze(struct super_block *sb)
static const struct super_operations btrfs_super_ops = {
.drop_inode = btrfs_drop_inode,
- .delete_inode = btrfs_delete_inode,
+ .evict_inode = btrfs_evict_inode,
.put_super = btrfs_put_super,
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d6e3af8be95..dd318ff280b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -258,7 +258,7 @@ loop_lock:
BUG_ON(atomic_read(&cur->bi_cnt) == 0);
- if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
+ if (cur->bi_rw & REQ_SYNC)
num_sync_run++;
submit_bio(cur->bi_rw, cur);
@@ -2651,7 +2651,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
int max_errors = 0;
struct btrfs_multi_bio *multi = NULL;
- if (multi_ret && !(rw & (1 << BIO_RW)))
+ if (multi_ret && !(rw & REQ_WRITE))
stripes_allocated = 1;
again:
if (multi_ret) {
@@ -2687,7 +2687,7 @@ again:
mirror_num = 0;
/* if our multi bio struct is too small, back off and try again */
- if (rw & (1 << BIO_RW)) {
+ if (rw & REQ_WRITE) {
if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_DUP)) {
stripes_required = map->num_stripes;
@@ -2697,7 +2697,7 @@ again:
max_errors = 1;
}
}
- if (multi_ret && (rw & (1 << BIO_RW)) &&
+ if (multi_ret && (rw & REQ_WRITE) &&
stripes_allocated < stripes_required) {
stripes_allocated = map->num_stripes;
free_extent_map(em);
@@ -2733,7 +2733,7 @@ again:
num_stripes = 1;
stripe_index = 0;
if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- if (unplug_page || (rw & (1 << BIO_RW)))
+ if (unplug_page || (rw & REQ_WRITE))
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -2744,7 +2744,7 @@ again:
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (rw & (1 << BIO_RW))
+ if (rw & REQ_WRITE)
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -2755,7 +2755,7 @@ again:
stripe_index = do_div(stripe_nr, factor);
stripe_index *= map->sub_stripes;
- if (unplug_page || (rw & (1 << BIO_RW)))
+ if (unplug_page || (rw & REQ_WRITE))
num_stripes = map->sub_stripes;
else if (mirror_num)
stripe_index += mirror_num - 1;
@@ -2945,7 +2945,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
struct btrfs_pending_bios *pending_bios;
/* don't bother with additional async steps for reads, right now */
- if (!(rw & (1 << BIO_RW))) {
+ if (!(rw & REQ_WRITE)) {
bio_get(bio);
submit_bio(rw, bio);
bio_put(bio);
@@ -2964,7 +2964,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
bio->bi_rw |= rw;
spin_lock(&device->io_lock);
- if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
+ if (bio->bi_rw & REQ_SYNC)
pending_bios = &device->pending_sync_bios;
else
pending_bios = &device->pending_bios;
diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b198e..50efa339e05 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1833,9 +1833,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
}
EXPORT_SYMBOL(page_zero_new_buffers);
-static int __block_prepare_write(struct inode *inode, struct page *page,
- unsigned from, unsigned to, get_block_t *get_block)
+int block_prepare_write(struct page *page, unsigned from, unsigned to,
+ get_block_t *get_block)
{
+ struct inode *inode = page->mapping->host;
unsigned block_start, block_end;
sector_t block;
int err = 0;
@@ -1908,10 +1909,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
if (!buffer_uptodate(*wait_bh))
err = -EIO;
}
- if (unlikely(err))
+ if (unlikely(err)) {
page_zero_new_buffers(page, from, to);
+ ClearPageUptodate(page);
+ }
return err;
}
+EXPORT_SYMBOL(block_prepare_write);
static int __block_commit_write(struct inode *inode, struct page *page,
unsigned from, unsigned to)
@@ -1948,90 +1952,41 @@ static int __block_commit_write(struct inode *inode, struct page *page,
return 0;
}
-/*
- * Filesystems implementing the new truncate sequence should use the
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
- * The filesystem needs to handle block truncation upon failure.
- */
-int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block)
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
+ get_block_t *get_block)
{
- struct inode *inode = mapping->host;
- int status = 0;
- struct page *page;
- pgoff_t index;
- unsigned start, end;
- int ownpage = 0;
-
- index = pos >> PAGE_CACHE_SHIFT;
- start = pos & (PAGE_CACHE_SIZE - 1);
- end = start + len;
-
- page = *pagep;
- if (page == NULL) {
- ownpage = 1;
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page) {
- status = -ENOMEM;
- goto out;
- }
- *pagep = page;
- } else
- BUG_ON(!PageLocked(page));
-
- status = __block_prepare_write(inode, page, start, end, get_block);
- if (unlikely(status)) {
- ClearPageUptodate(page);
+ unsigned start = pos & (PAGE_CACHE_SIZE - 1);
- if (ownpage) {
- unlock_page(page);
- page_cache_release(page);
- *pagep = NULL;
- }
- }
-
-out:
- return status;
+ return block_prepare_write(page, start, start + len, get_block);
}
-EXPORT_SYMBOL(block_write_begin_newtrunc);
+EXPORT_SYMBOL(__block_write_begin);
/*
* block_write_begin takes care of the basic task of block allocation and
* bringing partial write blocks uptodate first.
*
- * If *pagep is not NULL, then block_write_begin uses the locked page
- * at *pagep rather than allocating its own. In this case, the page will
- * not be unlocked or deallocated on failure.
+ * The filesystem needs to handle block truncation upon failure.
*/
-int block_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block)
+int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
+ unsigned flags, struct page **pagep, get_block_t *get_block)
{
- int ret;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct page *page;
+ int status;
- ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
- pagep, fsdata, get_block);
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page)
+ return -ENOMEM;
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- *
- * Filesystems which pass down their own page also cannot
- * call into vmtruncate here because it would lead to lock
- * inversion problems (*pagep is locked). This is a further
- * example of where the old truncate sequence is inadequate.
- */
- if (unlikely(ret) && *pagep == NULL) {
- loff_t isize = mapping->host->i_size;
- if (pos + len > isize)
- vmtruncate(mapping->host, isize);
+ status = __block_write_begin(page, pos, len, get_block);
+ if (unlikely(status)) {
+ unlock_page(page);
+ page_cache_release(page);
+ page = NULL;
}
- return ret;
+ *pagep = page;
+ return status;
}
EXPORT_SYMBOL(block_write_begin);
@@ -2351,7 +2306,7 @@ out:
* For moronic filesystems that do not allow holes in file.
* We may have to extend the file.
*/
-int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int cont_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block, loff_t *bytes)
@@ -2363,7 +2318,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
err = cont_expand_zero(file, mapping, pos, bytes);
if (err)
- goto out;
+ return err;
zerofrom = *bytes & ~PAGE_CACHE_MASK;
if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2326,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
(*bytes)++;
}
- *pagep = NULL;
- err = block_write_begin_newtrunc(file, mapping, pos, len,
- flags, pagep, fsdata, get_block);
-out:
- return err;
-}
-EXPORT_SYMBOL(cont_write_begin_newtrunc);
-
-int cont_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block, loff_t *bytes)
-{
- int ret;
-
- ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
- pagep, fsdata, get_block, bytes);
- if (unlikely(ret)) {
- loff_t isize = mapping->host->i_size;
- if (pos + len > isize)
- vmtruncate(mapping->host, isize);
- }
-
- return ret;
+ return block_write_begin(mapping, pos, len, flags, pagep, get_block);
}
EXPORT_SYMBOL(cont_write_begin);
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
- get_block_t *get_block)
-{
- struct inode *inode = page->mapping->host;
- int err = __block_prepare_write(inode, page, from, to, get_block);
- if (err)
- ClearPageUptodate(page);
- return err;
-}
-EXPORT_SYMBOL(block_prepare_write);
-
int block_commit_write(struct page *page, unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
@@ -2510,11 +2431,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
}
/*
- * Filesystems implementing the new truncate sequence should use the
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
* The filesystem needs to handle block truncation upon failure.
*/
-int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int nobh_write_begin(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block)
@@ -2547,8 +2468,8 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
*pagep = NULL;
- return block_write_begin_newtrunc(file, mapping, pos, len,
- flags, pagep, fsdata, get_block);
+ return block_write_begin(mapping, pos, len, flags, pagep,
+ get_block);
}
if (PageMappedToDisk(page))
@@ -2654,35 +2575,6 @@ out_release:
return ret;
}
-EXPORT_SYMBOL(nobh_write_begin_newtrunc);
-
-/*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
- */
-int nobh_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block)
-{
- int ret;
-
- ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
- pagep, fsdata, get_block);
-
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- */
- if (unlikely(ret)) {
- loff_t isize = mapping->host->i_size;
- if (pos + len > isize)
- vmtruncate(mapping->host, isize);
- }
-
- return ret;
-}
EXPORT_SYMBOL(nobh_write_begin);
int nobh_write_end(struct file *file, struct address_space *mapping,
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 2906077ac79..a2603e7c0bb 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -146,7 +146,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
/* get the cache size and blocksize */
- ret = vfs_statfs(root, &stats);
+ ret = vfs_statfs(&path, &stats);
if (ret < 0)
goto error_unsupported;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index c2413561ea7..727caedcdd9 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -552,8 +552,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args)
*/
static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args)
{
- struct fs_struct *fs;
- struct dentry *dir;
+ struct path path;
const struct cred *saved_cred;
int ret;
@@ -573,24 +572,21 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args)
}
/* extract the directory dentry from the cwd */
- fs = current->fs;
- read_lock(&fs->lock);
- dir = dget(fs->pwd.dentry);
- read_unlock(&fs->lock);
+ get_fs_pwd(current->fs, &path);
- if (!S_ISDIR(dir->d_inode->i_mode))
+ if (!S_ISDIR(path.dentry->d_inode->i_mode))
goto notdir;
cachefiles_begin_secure(cache, &saved_cred);
- ret = cachefiles_cull(cache, dir, args);
+ ret = cachefiles_cull(cache, path.dentry, args);
cachefiles_end_secure(cache, saved_cred);
- dput(dir);
+ path_put(&path);
_leave(" = %d", ret);
return ret;
notdir:
- dput(dir);
+ path_put(&path);
kerror("cull command requires dirfd to be a directory");
return -ENOTDIR;
@@ -628,8 +624,7 @@ inval:
*/
static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args)
{
- struct fs_struct *fs;
- struct dentry *dir;
+ struct path path;
const struct cred *saved_cred;
int ret;
@@ -649,24 +644,21 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args)
}
/* extract the directory dentry from the cwd */
- fs = current->fs;
- read_lock(&fs->lock);
- dir = dget(fs->pwd.dentry);
- read_unlock(&fs->lock);
+ get_fs_pwd(current->fs, &path);
- if (!S_ISDIR(dir->d_inode->i_mode))
+ if (!S_ISDIR(path.dentry->d_inode->i_mode))
goto notdir;
cachefiles_begin_secure(cache, &saved_cred);
- ret = cachefiles_check_in_use(cache, dir, args);
+ ret = cachefiles_check_in_use(cache, path.dentry, args);
cachefiles_end_secure(cache, saved_cred);
- dput(dir);
+ path_put(&path);
//_leave(" = %d", ret);
return ret;
notdir:
- dput(dir);
+ path_put(&path);
kerror("inuse command requires dirfd to be a directory");
return -ENOTDIR;
@@ -683,6 +675,10 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
unsigned fnr, unsigned bnr)
{
struct kstatfs stats;
+ struct path path = {
+ .mnt = cache->mnt,
+ .dentry = cache->mnt->mnt_root,
+ };
int ret;
//_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u",
@@ -697,7 +693,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
/* find out how many pages of blockdev are available */
memset(&stats, 0, sizeof(stats));
- ret = vfs_statfs(cache->mnt->mnt_root, &stats);
+ ret = vfs_statfs(&path, &stats);
if (ret < 0) {
if (ret == -EIO)
cachefiles_io_error(cache, "statfs failed");
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index a8cd821226d..bd6bc1bde2d 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -267,13 +267,6 @@ do { \
#define dbgprintk(FMT, ...) \
printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
-/* make sure we maintain the format strings, even when debugging is disabled */
-static inline void _dbprintk(const char *fmt, ...)
- __attribute__((format(printf, 1, 2)));
-static inline void _dbprintk(const char *fmt, ...)
-{
-}
-
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
@@ -304,9 +297,9 @@ do { \
} while (0)
#else
-#define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__)
+#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
+#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
#endif
#if 1 /* defined(__KDEBUGALL) */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f4a7840bf42..42c7fafc8bf 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -37,9 +37,9 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
printk(KERN_ERR "%sobject: OBJ%x\n",
prefix, object->fscache.debug_id);
- printk(KERN_ERR "%sobjstate=%s fl=%lx swfl=%lx ev=%lx[%lx]\n",
+ printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
prefix, fscache_object_states[object->fscache.state],
- object->fscache.flags, object->fscache.work.flags,
+ object->fscache.flags, work_busy(&object->fscache.work),
object->fscache.events,
object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK);
printk(KERN_ERR "%sops=%u inp=%u exc=%u\n",
@@ -212,7 +212,7 @@ wait_for_old_object:
/* if the object we're waiting for is queued for processing,
* then just put ourselves on the queue behind it */
- if (slow_work_is_queued(&xobject->fscache.work)) {
+ if (work_pending(&xobject->fscache.work)) {
_debug("queue OBJ%x behind OBJ%x immediately",
object->fscache.debug_id,
xobject->fscache.debug_id);
@@ -220,8 +220,7 @@ wait_for_old_object:
}
/* otherwise we sleep until either the object we're waiting for
- * is done, or the slow-work facility wants the thread back to
- * do other work */
+ * is done, or the fscache_object is congested */
wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE);
init_wait(&wait);
requeue = false;
@@ -229,8 +228,8 @@ wait_for_old_object:
prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags))
break;
- requeue = slow_work_sleep_till_thread_needed(
- &object->fscache.work, &timeout);
+
+ requeue = fscache_object_sleep_till_congested(&timeout);
} while (timeout > 0 && !requeue);
finish_wait(wq, &wait);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 0f0d41fbb03..0e3c0924cc3 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -422,7 +422,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
- op->op.flags |= FSCACHE_OP_FAST;
+ op->op.flags |= FSCACHE_OP_ASYNC;
op->op.processor = cachefiles_read_copier;
pagevec_init(&pagevec, 0);
@@ -729,7 +729,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
pagevec_init(&pagevec, 0);
op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
- op->op.flags |= FSCACHE_OP_FAST;
+ op->op.flags |= FSCACHE_OP_ASYNC;
op->op.processor = cachefiles_read_copier;
INIT_LIST_HEAD(&backpages);
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 04b8280582a..bc87b9c1d27 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -2,7 +2,7 @@ config CEPH_FS
tristate "Ceph distributed file system (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
select LIBCRC32C
- select CONFIG_CRYPTO_AES
+ select CRYPTO_AES
help
Choose Y or M here to include support for mounting the
experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 6a660e610be..278e1172600 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -6,7 +6,7 @@ ifneq ($(KERNELRELEASE),)
obj-$(CONFIG_CEPH_FS) += ceph.o
-ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \
+ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
export.o caps.o snap.o xattr.o \
messenger.o msgpool.o buffer.o pagelist.o \
mds_client.o mdsmap.o \
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index d9c60b84949..5598a0d0229 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -309,7 +309,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
zero_user_segment(page, s, PAGE_CACHE_SIZE);
}
- if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) {
+ if (add_to_page_cache_lru(page, mapping, page->index,
+ GFP_NOFS)) {
page_cache_release(page);
dout("readpages %p add_to_page_cache failed %p\n",
inode, page);
@@ -552,7 +553,7 @@ static void writepages_finish(struct ceph_osd_request *req,
* page truncation thread, possibly losing some data that
* raced its way in
*/
- if ((issued & CEPH_CAP_FILE_CACHE) == 0)
+ if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
generic_error_remove_page(inode->i_mapping, page);
unlock_page(page);
@@ -797,9 +798,12 @@ get_more_pages:
dout("%p will write page %p idx %lu\n",
inode, page, page->index);
- writeback_stat = atomic_long_inc_return(&client->writeback_count);
- if (writeback_stat > CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) {
- set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC);
+ writeback_stat =
+ atomic_long_inc_return(&client->writeback_count);
+ if (writeback_stat > CONGESTION_ON_THRESH(
+ client->mount_args->congestion_kb)) {
+ set_bdi_congested(&client->backing_dev_info,
+ BLK_RW_ASYNC);
}
set_page_writeback(page);
@@ -1036,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
*pagep = page;
dout("write_begin file %p inode %p page %p %d~%d\n", file,
- inode, page, (int)pos, (int)len);
+ inode, page, (int)pos, (int)len);
r = ceph_update_writeable_page(file, pos, len, page);
} while (r == -EAGAIN);
diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c
index 67b2c030924..eb2a666b0be 100644
--- a/fs/ceph/armor.c
+++ b/fs/ceph/armor.c
@@ -1,11 +1,15 @@
#include <linux/errno.h>
+int ceph_armor(char *dst, const char *src, const char *end);
+int ceph_unarmor(char *dst, const char *src, const char *end);
+
/*
* base64 encode/decode.
*/
-const char *pem_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char *pem_key =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static int encode_bits(int c)
{
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index 89490beaf53..6d2e3060062 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -20,7 +20,7 @@ static u32 supported_protocols[] = {
CEPH_AUTH_CEPHX
};
-int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
+static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
{
switch (protocol) {
case CEPH_AUTH_NONE:
@@ -133,8 +133,8 @@ bad:
return -ERANGE;
}
-int ceph_build_auth_request(struct ceph_auth_client *ac,
- void *msg_buf, size_t msg_len)
+static int ceph_build_auth_request(struct ceph_auth_client *ac,
+ void *msg_buf, size_t msg_len)
{
struct ceph_mon_request_header *monhdr = msg_buf;
void *p = monhdr + 1;
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 83d4d2785ff..582e0b2caf8 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -87,8 +87,8 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret,
/*
* get existing (or insert new) ticket handler
*/
-struct ceph_x_ticket_handler *get_ticket_handler(struct ceph_auth_client *ac,
- int service)
+static struct ceph_x_ticket_handler *
+get_ticket_handler(struct ceph_auth_client *ac, int service)
{
struct ceph_x_ticket_handler *th;
struct ceph_x_info *xi = ac->private;
@@ -429,7 +429,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
auth->struct_v = 1;
auth->key = 0;
for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++)
- auth->key ^= *u;
+ auth->key ^= *(__le64 *)u;
dout(" server_challenge %llx client_challenge %llx key %llx\n",
xi->server_challenge, le64_to_cpu(auth->client_challenge),
le64_to_cpu(auth->key));
@@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
- op = le32_to_cpu(head->op);
+ op = le16_to_cpu(head->op);
result = le32_to_cpu(head->result);
dout("handle_reply op %d result %d\n", op, result);
switch (op) {
@@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
+ if (xi->auth_authorizer.buf)
+ ceph_buffer_put(xi->auth_authorizer.buf);
+
kfree(ac->private);
ac->private = NULL;
}
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c
index c67535d70aa..cd39f17021d 100644
--- a/fs/ceph/buffer.c
+++ b/fs/ceph/buffer.c
@@ -47,22 +47,6 @@ void ceph_buffer_release(struct kref *kref)
kfree(b);
}
-int ceph_buffer_alloc(struct ceph_buffer *b, int len, gfp_t gfp)
-{
- b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN);
- if (b->vec.iov_base) {
- b->is_vmalloc = false;
- } else {
- b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL);
- b->is_vmalloc = true;
- }
- if (!b->vec.iov_base)
- return -ENOMEM;
- b->alloc_len = len;
- b->vec.iov_len = len;
- return 0;
-}
-
int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
{
size_t len;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ae3e3a30644..7bf182b0397 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -113,58 +113,41 @@ const char *ceph_cap_string(int caps)
return cap_str[i];
}
-/*
- * Cap reservations
- *
- * Maintain a global pool of preallocated struct ceph_caps, referenced
- * by struct ceph_caps_reservations. This ensures that we preallocate
- * memory needed to successfully process an MDS response. (If an MDS
- * sends us cap information and we fail to process it, we will have
- * problems due to the client and MDS being out of sync.)
- *
- * Reservations are 'owned' by a ceph_cap_reservation context.
- */
-static spinlock_t caps_list_lock;
-static struct list_head caps_list; /* unused (reserved or unreserved) */
-static int caps_total_count; /* total caps allocated */
-static int caps_use_count; /* in use */
-static int caps_reserve_count; /* unused, reserved */
-static int caps_avail_count; /* unused, unreserved */
-static int caps_min_count; /* keep at least this many (unreserved) */
-
-void __init ceph_caps_init(void)
+void ceph_caps_init(struct ceph_mds_client *mdsc)
{
- INIT_LIST_HEAD(&caps_list);
- spin_lock_init(&caps_list_lock);
+ INIT_LIST_HEAD(&mdsc->caps_list);
+ spin_lock_init(&mdsc->caps_list_lock);
}
-void ceph_caps_finalize(void)
+void ceph_caps_finalize(struct ceph_mds_client *mdsc)
{
struct ceph_cap *cap;
- spin_lock(&caps_list_lock);
- while (!list_empty(&caps_list)) {
- cap = list_first_entry(&caps_list, struct ceph_cap, caps_item);
+ spin_lock(&mdsc->caps_list_lock);
+ while (!list_empty(&mdsc->caps_list)) {
+ cap = list_first_entry(&mdsc->caps_list,
+ struct ceph_cap, caps_item);
list_del(&cap->caps_item);
kmem_cache_free(ceph_cap_cachep, cap);
}
- caps_total_count = 0;
- caps_avail_count = 0;
- caps_use_count = 0;
- caps_reserve_count = 0;
- caps_min_count = 0;
- spin_unlock(&caps_list_lock);
+ mdsc->caps_total_count = 0;
+ mdsc->caps_avail_count = 0;
+ mdsc->caps_use_count = 0;
+ mdsc->caps_reserve_count = 0;
+ mdsc->caps_min_count = 0;
+ spin_unlock(&mdsc->caps_list_lock);
}
-void ceph_adjust_min_caps(int delta)
+void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
{
- spin_lock(&caps_list_lock);
- caps_min_count += delta;
- BUG_ON(caps_min_count < 0);
- spin_unlock(&caps_list_lock);
+ spin_lock(&mdsc->caps_list_lock);
+ mdsc->caps_min_count += delta;
+ BUG_ON(mdsc->caps_min_count < 0);
+ spin_unlock(&mdsc->caps_list_lock);
}
-int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need)
+int ceph_reserve_caps(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx, int need)
{
int i;
struct ceph_cap *cap;
@@ -176,16 +159,17 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need)
dout("reserve caps ctx=%p need=%d\n", ctx, need);
/* first reserve any caps that are already allocated */
- spin_lock(&caps_list_lock);
- if (caps_avail_count >= need)
+ spin_lock(&mdsc->caps_list_lock);
+ if (mdsc->caps_avail_count >= need)
have = need;
else
- have = caps_avail_count;
- caps_avail_count -= have;
- caps_reserve_count += have;
- BUG_ON(caps_total_count != caps_use_count + caps_reserve_count +
- caps_avail_count);
- spin_unlock(&caps_list_lock);
+ have = mdsc->caps_avail_count;
+ mdsc->caps_avail_count -= have;
+ mdsc->caps_reserve_count += have;
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count +
+ mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
for (i = have; i < need; i++) {
cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
@@ -198,19 +182,20 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need)
}
BUG_ON(have + alloc != need);
- spin_lock(&caps_list_lock);
- caps_total_count += alloc;
- caps_reserve_count += alloc;
- list_splice(&newcaps, &caps_list);
+ spin_lock(&mdsc->caps_list_lock);
+ mdsc->caps_total_count += alloc;
+ mdsc->caps_reserve_count += alloc;
+ list_splice(&newcaps, &mdsc->caps_list);
- BUG_ON(caps_total_count != caps_use_count + caps_reserve_count +
- caps_avail_count);
- spin_unlock(&caps_list_lock);
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count +
+ mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
ctx->count = need;
dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
- ctx, caps_total_count, caps_use_count, caps_reserve_count,
- caps_avail_count);
+ ctx, mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
return 0;
out_alloc_count:
@@ -220,92 +205,104 @@ out_alloc_count:
return ret;
}
-int ceph_unreserve_caps(struct ceph_cap_reservation *ctx)
+int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx)
{
dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
if (ctx->count) {
- spin_lock(&caps_list_lock);
- BUG_ON(caps_reserve_count < ctx->count);
- caps_reserve_count -= ctx->count;
- caps_avail_count += ctx->count;
+ spin_lock(&mdsc->caps_list_lock);
+ BUG_ON(mdsc->caps_reserve_count < ctx->count);
+ mdsc->caps_reserve_count -= ctx->count;
+ mdsc->caps_avail_count += ctx->count;
ctx->count = 0;
dout("unreserve caps %d = %d used + %d resv + %d avail\n",
- caps_total_count, caps_use_count, caps_reserve_count,
- caps_avail_count);
- BUG_ON(caps_total_count != caps_use_count + caps_reserve_count +
- caps_avail_count);
- spin_unlock(&caps_list_lock);
+ mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count +
+ mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
}
return 0;
}
-static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
+static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx)
{
struct ceph_cap *cap = NULL;
/* temporary, until we do something about cap import/export */
- if (!ctx)
- return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (!ctx) {
+ cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (cap) {
+ mdsc->caps_use_count++;
+ mdsc->caps_total_count++;
+ }
+ return cap;
+ }
- spin_lock(&caps_list_lock);
+ spin_lock(&mdsc->caps_list_lock);
dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
- ctx, ctx->count, caps_total_count, caps_use_count,
- caps_reserve_count, caps_avail_count);
+ ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
BUG_ON(!ctx->count);
- BUG_ON(ctx->count > caps_reserve_count);
- BUG_ON(list_empty(&caps_list));
+ BUG_ON(ctx->count > mdsc->caps_reserve_count);
+ BUG_ON(list_empty(&mdsc->caps_list));
ctx->count--;
- caps_reserve_count--;
- caps_use_count++;
+ mdsc->caps_reserve_count--;
+ mdsc->caps_use_count++;
- cap = list_first_entry(&caps_list, struct ceph_cap, caps_item);
+ cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item);
list_del(&cap->caps_item);
- BUG_ON(caps_total_count != caps_use_count + caps_reserve_count +
- caps_avail_count);
- spin_unlock(&caps_list_lock);
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count + mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
return cap;
}
-void ceph_put_cap(struct ceph_cap *cap)
+void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
{
- spin_lock(&caps_list_lock);
+ spin_lock(&mdsc->caps_list_lock);
dout("put_cap %p %d = %d used + %d resv + %d avail\n",
- cap, caps_total_count, caps_use_count,
- caps_reserve_count, caps_avail_count);
- caps_use_count--;
+ cap, mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
+ mdsc->caps_use_count--;
/*
* Keep some preallocated caps around (ceph_min_count), to
* avoid lots of free/alloc churn.
*/
- if (caps_avail_count >= caps_reserve_count + caps_min_count) {
- caps_total_count--;
+ if (mdsc->caps_avail_count >= mdsc->caps_reserve_count +
+ mdsc->caps_min_count) {
+ mdsc->caps_total_count--;
kmem_cache_free(ceph_cap_cachep, cap);
} else {
- caps_avail_count++;
- list_add(&cap->caps_item, &caps_list);
+ mdsc->caps_avail_count++;
+ list_add(&cap->caps_item, &mdsc->caps_list);
}
- BUG_ON(caps_total_count != caps_use_count + caps_reserve_count +
- caps_avail_count);
- spin_unlock(&caps_list_lock);
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count + mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
}
void ceph_reservation_status(struct ceph_client *client,
int *total, int *avail, int *used, int *reserved,
int *min)
{
+ struct ceph_mds_client *mdsc = &client->mdsc;
+
if (total)
- *total = caps_total_count;
+ *total = mdsc->caps_total_count;
if (avail)
- *avail = caps_avail_count;
+ *avail = mdsc->caps_avail_count;
if (used)
- *used = caps_use_count;
+ *used = mdsc->caps_use_count;
if (reserved)
- *reserved = caps_reserve_count;
+ *reserved = mdsc->caps_reserve_count;
if (min)
- *min = caps_min_count;
+ *min = mdsc->caps_min_count;
}
/*
@@ -330,22 +327,29 @@ static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
return NULL;
}
+struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
+{
+ struct ceph_cap *cap;
+
+ spin_lock(&ci->vfs_inode.i_lock);
+ cap = __get_cap_for_mds(ci, mds);
+ spin_unlock(&ci->vfs_inode.i_lock);
+ return cap;
+}
+
/*
- * Return id of any MDS with a cap, preferably FILE_WR|WRBUFFER|EXCL, else
- * -1.
+ * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1.
*/
-static int __ceph_get_cap_mds(struct ceph_inode_info *ci, u32 *mseq)
+static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
{
struct ceph_cap *cap;
int mds = -1;
struct rb_node *p;
- /* prefer mds with WR|WRBUFFER|EXCL caps */
+ /* prefer mds with WR|BUFFER|EXCL caps */
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
mds = cap->mds;
- if (mseq)
- *mseq = cap->mseq;
if (cap->issued & (CEPH_CAP_FILE_WR |
CEPH_CAP_FILE_BUFFER |
CEPH_CAP_FILE_EXCL))
@@ -358,7 +362,7 @@ int ceph_get_cap_mds(struct inode *inode)
{
int mds;
spin_lock(&inode->i_lock);
- mds = __ceph_get_cap_mds(ceph_inode(inode), NULL);
+ mds = __ceph_get_cap_mds(ceph_inode(inode));
spin_unlock(&inode->i_lock);
return mds;
}
@@ -477,8 +481,8 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
* Each time we receive FILE_CACHE anew, we increment
* i_rdcache_gen.
*/
- if ((issued & CEPH_CAP_FILE_CACHE) &&
- (had & CEPH_CAP_FILE_CACHE) == 0)
+ if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+ (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
ci->i_rdcache_gen++;
/*
@@ -537,7 +541,7 @@ retry:
new_cap = NULL;
} else {
spin_unlock(&inode->i_lock);
- new_cap = get_cap(caps_reservation);
+ new_cap = get_cap(mdsc, caps_reservation);
if (new_cap == NULL)
return -ENOMEM;
goto retry;
@@ -582,6 +586,7 @@ retry:
} else {
pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
realmino);
+ WARN_ON(!realm);
}
}
@@ -621,7 +626,7 @@ retry:
if (fmode >= 0)
__ceph_get_fmode(ci, fmode);
spin_unlock(&inode->i_lock);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return 0;
}
@@ -825,7 +830,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
{
int want = 0;
int mode;
- for (mode = 0; mode < 4; mode++)
+ for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++)
if (ci->i_nr_by_mode[mode])
want |= ceph_caps_for_mode(mode);
return want;
@@ -895,7 +900,7 @@ void __ceph_remove_cap(struct ceph_cap *cap)
ci->i_auth_cap = NULL;
if (removed)
- ceph_put_cap(cap);
+ ceph_put_cap(mdsc, cap);
if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
struct ceph_snap_realm *realm = ci->i_snap_realm;
@@ -981,6 +986,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
return 0;
}
+static void __queue_cap_release(struct ceph_mds_session *session,
+ u64 ino, u64 cap_id, u32 migrate_seq,
+ u32 issue_seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_cap_release *head;
+ struct ceph_mds_cap_item *item;
+
+ spin_lock(&session->s_cap_lock);
+ BUG_ON(!session->s_num_cap_releases);
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+
+ dout(" adding %llx release to mds%d msg %p (%d left)\n",
+ ino, session->s_mds, msg, session->s_num_cap_releases);
+
+ BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+ head = msg->front.iov_base;
+ head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
+ item = msg->front.iov_base + msg->front.iov_len;
+ item->ino = cpu_to_le64(ino);
+ item->cap_id = cpu_to_le64(cap_id);
+ item->migrate_seq = cpu_to_le32(migrate_seq);
+ item->seq = cpu_to_le32(issue_seq);
+
+ session->s_num_cap_releases--;
+
+ msg->front.iov_len += sizeof(*item);
+ if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+ dout(" release msg %p full\n", msg);
+ list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+ } else {
+ dout(" release msg %p at %d/%d (%d)\n", msg,
+ (int)le32_to_cpu(head->num),
+ (int)CEPH_CAPS_PER_RELEASE,
+ (int)msg->front.iov_len);
+ }
+ spin_unlock(&session->s_cap_lock);
+}
+
/*
* Queue cap releases when an inode is dropped from our cache. Since
* inode is about to be destroyed, there is no need for i_lock.
@@ -994,41 +1039,9 @@ void ceph_queue_caps_release(struct inode *inode)
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
struct ceph_mds_session *session = cap->session;
- struct ceph_msg *msg;
- struct ceph_mds_cap_release *head;
- struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
- BUG_ON(!session->s_num_cap_releases);
- msg = list_first_entry(&session->s_cap_releases,
- struct ceph_msg, list_head);
-
- dout(" adding %p release to mds%d msg %p (%d left)\n",
- inode, session->s_mds, msg, session->s_num_cap_releases);
-
- BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
- head = msg->front.iov_base;
- head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
- item = msg->front.iov_base + msg->front.iov_len;
- item->ino = cpu_to_le64(ceph_ino(inode));
- item->cap_id = cpu_to_le64(cap->cap_id);
- item->migrate_seq = cpu_to_le32(cap->mseq);
- item->seq = cpu_to_le32(cap->issue_seq);
-
- session->s_num_cap_releases--;
-
- msg->front.iov_len += sizeof(*item);
- if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
- dout(" release msg %p full\n", msg);
- list_move_tail(&msg->list_head,
- &session->s_cap_releases_done);
- } else {
- dout(" release msg %p at %d/%d (%d)\n", msg,
- (int)le32_to_cpu(head->num),
- (int)CEPH_CAPS_PER_RELEASE,
- (int)msg->front.iov_len);
- }
- spin_unlock(&session->s_cap_lock);
+ __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+ cap->mseq, cap->issue_seq);
p = rb_next(p);
__ceph_remove_cap(cap);
}
@@ -1167,7 +1180,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
}
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return delayed;
}
@@ -1183,6 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
*/
void __ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession)
+ __releases(ci->vfs_inode->i_lock)
+ __acquires(ci->vfs_inode->i_lock)
{
struct inode *inode = &ci->vfs_inode;
int mds;
@@ -1218,7 +1233,13 @@ retry:
BUG_ON(capsnap->dirty == 0);
/* pick mds, take s_mutex */
- mds = __ceph_get_cap_mds(ci, &mseq);
+ if (ci->i_auth_cap == NULL) {
+ dout("no auth cap (migrating?), doing nothing\n");
+ goto out;
+ }
+ mds = ci->i_auth_cap->session->s_mds;
+ mseq = ci->i_auth_cap->mseq;
+
if (session && session->s_mds != mds) {
dout("oops, wrong session %p mutex\n", session);
mutex_unlock(&session->s_mutex);
@@ -1237,8 +1258,8 @@ retry:
}
/*
* if session == NULL, we raced against a cap
- * deletion. retry, and we'll get a better
- * @mds value next time.
+ * deletion or migration. retry, and we'll
+ * get a better @mds value next time.
*/
spin_lock(&inode->i_lock);
goto retry;
@@ -1276,6 +1297,7 @@ retry:
list_del_init(&ci->i_snap_flush_item);
spin_unlock(&mdsc->snap_flush_lock);
+out:
if (psession)
*psession = session;
else if (session) {
@@ -1421,7 +1443,6 @@ static int try_nonblocking_invalidate(struct inode *inode)
*/
void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session)
- __releases(session->s_mutex)
{
struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
struct ceph_mds_client *mdsc = &client->mdsc;
@@ -1496,11 +1517,13 @@ retry_locked:
ci->i_wrbuffer_ref == 0 && /* no dirty pages... */
ci->i_rdcache_gen && /* may have cached pages */
(file_wanted == 0 || /* no open files */
- (revoking & CEPH_CAP_FILE_CACHE)) && /* or revoking cache */
+ (revoking & (CEPH_CAP_FILE_CACHE|
+ CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */
!tried_invalidate) {
dout("check_caps trying to invalidate on %p\n", inode);
if (try_nonblocking_invalidate(inode) < 0) {
- if (revoking & CEPH_CAP_FILE_CACHE) {
+ if (revoking & (CEPH_CAP_FILE_CACHE|
+ CEPH_CAP_FILE_LAZYIO)) {
dout("check_caps queuing invalidate\n");
queue_invalidate = 1;
ci->i_rdcache_revoking = ci->i_rdcache_gen;
@@ -2139,7 +2162,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
else if (flushsnaps)
ceph_flush_snaps(ci);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (put)
iput(inode);
}
@@ -2215,7 +2238,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
iput(inode);
} else if (complete_capsnap) {
ceph_flush_snaps(ci);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
}
if (drop_capsnap)
iput(inode);
@@ -2236,8 +2259,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
struct ceph_mds_session *session,
struct ceph_cap *cap,
struct ceph_buffer *xattr_buf)
- __releases(inode->i_lock)
- __releases(session->s_mutex)
+ __releases(inode->i_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
@@ -2264,6 +2286,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
* will invalidate _after_ writeback.)
*/
if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
+ (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
!ci->i_wrbuffer_ref) {
if (try_nonblocking_invalidate(inode) == 0) {
revoked_rdcache = 1;
@@ -2355,15 +2378,22 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
/* revocation, grant, or no-op? */
if (cap->issued & ~newcaps) {
- dout("revocation: %s -> %s\n", ceph_cap_string(cap->issued),
- ceph_cap_string(newcaps));
- if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
- writeback = 1; /* will delay ack */
- else if (dirty & ~newcaps)
- check_caps = 1; /* initiate writeback in check_caps */
- else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
- revoked_rdcache)
- check_caps = 2; /* send revoke ack in check_caps */
+ int revoking = cap->issued & ~newcaps;
+
+ dout("revocation: %s -> %s (revoking %s)\n",
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(newcaps),
+ ceph_cap_string(revoking));
+ if (revoking & used & CEPH_CAP_FILE_BUFFER)
+ writeback = 1; /* initiate writeback; will delay ack */
+ else if (revoking == CEPH_CAP_FILE_CACHE &&
+ (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
+ queue_invalidate)
+ ; /* do nothing yet, invalidation will be queued */
+ else if (cap == ci->i_auth_cap)
+ check_caps = 1; /* check auth cap only */
+ else
+ check_caps = 2; /* check all caps */
cap->issued = newcaps;
cap->implemented |= newcaps;
} else if (cap->issued == newcaps) {
@@ -2391,7 +2421,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
if (queue_invalidate)
ceph_queue_invalidate(inode);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (check_caps == 1)
ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
@@ -2446,7 +2476,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
struct ceph_inode_info,
i_flushing_item)->vfs_inode);
mdsc->num_cap_flushing--;
- wake_up(&mdsc->cap_flushing_wq);
+ wake_up_all(&mdsc->cap_flushing_wq);
dout(" inode %p now !flushing\n", inode);
if (ci->i_dirty_caps == 0) {
@@ -2458,7 +2488,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
}
}
spin_unlock(&mdsc->cap_dirty_lock);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
out:
spin_unlock(&inode->i_lock);
@@ -2554,7 +2584,8 @@ static void handle_cap_trunc(struct inode *inode,
* caller holds s_mutex
*/
static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
- struct ceph_mds_session *session)
+ struct ceph_mds_session *session,
+ int *open_target_sessions)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
@@ -2586,6 +2617,12 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
ci->i_cap_exporting_mds = mds;
ci->i_cap_exporting_mseq = mseq;
ci->i_cap_exporting_issued = cap->issued;
+
+ /*
+ * make sure we have open sessions with all possible
+ * export targets, so that we get the matching IMPORT
+ */
+ *open_target_sessions = 1;
}
__ceph_remove_cap(cap);
}
@@ -2655,12 +2692,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_mds_caps *h;
int mds = session->s_mds;
int op;
- u32 seq;
+ u32 seq, mseq;
struct ceph_vino vino;
u64 cap_id;
u64 size, max_size;
u64 tid;
void *snaptrace;
+ size_t snaptrace_len;
+ void *flock;
+ u32 flock_len;
+ int open_target_sessions = 0;
dout("handle_caps from mds%d\n", mds);
@@ -2669,15 +2710,30 @@ void ceph_handle_caps(struct ceph_mds_session *session,
if (msg->front.iov_len < sizeof(*h))
goto bad;
h = msg->front.iov_base;
- snaptrace = h + 1;
op = le32_to_cpu(h->op);
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
cap_id = le64_to_cpu(h->cap_id);
seq = le32_to_cpu(h->seq);
+ mseq = le32_to_cpu(h->migrate_seq);
size = le64_to_cpu(h->size);
max_size = le64_to_cpu(h->max_size);
+ snaptrace = h + 1;
+ snaptrace_len = le32_to_cpu(h->snap_trace_len);
+
+ if (le16_to_cpu(msg->hdr.version) >= 2) {
+ void *p, *end;
+
+ p = snaptrace + snaptrace_len;
+ end = msg->front.iov_base + msg->front.iov_len;
+ ceph_decode_32_safe(&p, end, flock_len, bad);
+ flock = p;
+ } else {
+ flock = NULL;
+ flock_len = 0;
+ }
+
mutex_lock(&session->s_mutex);
session->s_seq++;
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
@@ -2689,6 +2745,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap, inode);
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
+
+ if (op == CEPH_CAP_OP_IMPORT)
+ __queue_cap_release(session, vino.ino, cap_id,
+ mseq, seq);
+
+ /*
+ * send any full release message to try to move things
+ * along for the mds (who clearly thinks we still have this
+ * cap).
+ */
+ ceph_add_cap_releases(mdsc, session);
+ ceph_send_cap_releases(mdsc, session);
goto done;
}
@@ -2699,12 +2767,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
goto done;
case CEPH_CAP_OP_EXPORT:
- handle_cap_export(inode, h, session);
+ handle_cap_export(inode, h, session, &open_target_sessions);
goto done;
case CEPH_CAP_OP_IMPORT:
handle_cap_import(mdsc, inode, h, session,
- snaptrace, le32_to_cpu(h->snap_trace_len));
+ snaptrace, snaptrace_len);
ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
session);
goto done_unlocked;
@@ -2714,7 +2782,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
spin_lock(&inode->i_lock);
cap = __get_cap_for_mds(ceph_inode(inode), mds);
if (!cap) {
- dout("no cap on %p ino %llx.%llx from mds%d, releasing\n",
+ dout(" no cap on %p ino %llx.%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode), mds);
spin_unlock(&inode->i_lock);
goto done;
@@ -2746,6 +2814,8 @@ done:
done_unlocked:
if (inode)
iput(inode);
+ if (open_target_sessions)
+ ceph_mdsc_open_export_target_sessions(mdsc, session);
return;
bad:
@@ -2865,18 +2935,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
+ int used, dirty;
int ret = 0;
- int used = 0;
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
- dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
- mds, ceph_cap_string(used), ceph_cap_string(drop),
+ dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+ inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
ceph_cap_string(unless));
- /* only drop unused caps */
- drop &= ~used;
+ /* only drop unused, clean caps */
+ drop &= ~(used | dirty);
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
@@ -2956,6 +3027,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
memcpy(*p, dentry->d_name.name, dentry->d_name.len);
*p += dentry->d_name.len;
rel->dname_seq = cpu_to_le32(di->lease_seq);
+ __ceph_mdsc_drop_dentry_lease(dentry);
}
spin_unlock(&dentry->d_lock);
return ret;
diff --git a/fs/ceph/ceph_frag.h b/fs/ceph/ceph_frag.h
index 793f50cb7c2..5babb8e9535 100644
--- a/fs/ceph/ceph_frag.h
+++ b/fs/ceph/ceph_frag.h
@@ -1,5 +1,5 @@
-#ifndef _FS_CEPH_FRAG_H
-#define _FS_CEPH_FRAG_H
+#ifndef FS_CEPH_FRAG_H
+#define FS_CEPH_FRAG_H
/*
* "Frags" are a way to describe a subset of a 32-bit number space,
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c
index 79d76bc4303..3ac6cc7c115 100644
--- a/fs/ceph/ceph_fs.c
+++ b/fs/ceph/ceph_fs.c
@@ -29,46 +29,44 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
int ceph_flags_to_mode(int flags)
{
+ int mode;
+
#ifdef O_DIRECTORY /* fixme */
if ((flags & O_DIRECTORY) == O_DIRECTORY)
return CEPH_FILE_MODE_PIN;
#endif
+ if ((flags & O_APPEND) == O_APPEND)
+ flags |= O_WRONLY;
+
+ if ((flags & O_ACCMODE) == O_RDWR)
+ mode = CEPH_FILE_MODE_RDWR;
+ else if ((flags & O_ACCMODE) == O_WRONLY)
+ mode = CEPH_FILE_MODE_WR;
+ else
+ mode = CEPH_FILE_MODE_RD;
+
#ifdef O_LAZY
if (flags & O_LAZY)
- return CEPH_FILE_MODE_LAZY;
+ mode |= CEPH_FILE_MODE_LAZY;
#endif
- if ((flags & O_APPEND) == O_APPEND)
- flags |= O_WRONLY;
- flags &= O_ACCMODE;
- if ((flags & O_RDWR) == O_RDWR)
- return CEPH_FILE_MODE_RDWR;
- if ((flags & O_WRONLY) == O_WRONLY)
- return CEPH_FILE_MODE_WR;
- return CEPH_FILE_MODE_RD;
+ return mode;
}
int ceph_caps_for_mode(int mode)
{
- switch (mode) {
- case CEPH_FILE_MODE_PIN:
- return CEPH_CAP_PIN;
- case CEPH_FILE_MODE_RD:
- return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED |
+ int caps = CEPH_CAP_PIN;
+
+ if (mode & CEPH_FILE_MODE_RD)
+ caps |= CEPH_CAP_FILE_SHARED |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
- case CEPH_FILE_MODE_RDWR:
- return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED |
- CEPH_CAP_FILE_EXCL |
- CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE |
- CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
- CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
- CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
- case CEPH_FILE_MODE_WR:
- return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED |
- CEPH_CAP_FILE_EXCL |
+ if (mode & CEPH_FILE_MODE_WR)
+ caps |= CEPH_CAP_FILE_EXCL |
CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
- }
- return 0;
+ if (mode & CEPH_FILE_MODE_LAZY)
+ caps |= CEPH_CAP_FILE_LAZYIO;
+
+ return caps;
}
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h
index 2fa992eaf7d..d5619ac8671 100644
--- a/fs/ceph/ceph_fs.h
+++ b/fs/ceph/ceph_fs.h
@@ -9,27 +9,13 @@
* LGPL2
*/
-#ifndef _FS_CEPH_CEPH_FS_H
-#define _FS_CEPH_CEPH_FS_H
+#ifndef CEPH_FS_H
+#define CEPH_FS_H
#include "msgr.h"
#include "rados.h"
/*
- * Ceph release version
- */
-#define CEPH_VERSION_MAJOR 0
-#define CEPH_VERSION_MINOR 20
-#define CEPH_VERSION_PATCH 0
-
-#define _CEPH_STRINGIFY(x) #x
-#define CEPH_STRINGIFY(x) _CEPH_STRINGIFY(x)
-#define CEPH_MAKE_VERSION(x, y, z) CEPH_STRINGIFY(x) "." CEPH_STRINGIFY(y) \
- "." CEPH_STRINGIFY(z)
-#define CEPH_VERSION CEPH_MAKE_VERSION(CEPH_VERSION_MAJOR, \
- CEPH_VERSION_MINOR, CEPH_VERSION_PATCH)
-
-/*
* subprotocol versions. when specific messages types or high-level
* protocols change, bump the affected components. we keep rev
* internal cluster protocols separately from the public,
@@ -53,18 +39,10 @@
/*
* feature bits
*/
-#define CEPH_FEATURE_UID 1
-#define CEPH_FEATURE_NOSRCADDR 2
-#define CEPH_FEATURE_FLOCK 4
-
-#define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR
-#define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID
-#define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK
-#define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID
-#define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR
-#define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID
-#define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR
-#define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_UID (1<<0)
+#define CEPH_FEATURE_NOSRCADDR (1<<1)
+#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
+#define CEPH_FEATURE_FLOCK (1<<3)
/*
@@ -96,6 +74,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
#define CEPH_CRYPTO_NONE 0x0
#define CEPH_CRYPTO_AES 0x1
+#define CEPH_AES_IV "cephsageyudagreg"
+
/* security/authentication protocols */
#define CEPH_AUTH_UNKNOWN 0x0
#define CEPH_AUTH_NONE 0x1
@@ -275,6 +255,7 @@ extern const char *ceph_mds_state_name(int s);
#define CEPH_LOCK_IDFT 512 /* dir frag tree */
#define CEPH_LOCK_INEST 1024 /* mds internal */
#define CEPH_LOCK_IXATTR 2048
+#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */
#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
/* client_session ops */
@@ -316,6 +297,8 @@ enum {
CEPH_MDS_OP_RMXATTR = 0x01106,
CEPH_MDS_OP_SETLAYOUT = 0x01107,
CEPH_MDS_OP_SETATTR = 0x01108,
+ CEPH_MDS_OP_SETFILELOCK= 0x01109,
+ CEPH_MDS_OP_GETFILELOCK= 0x00110,
CEPH_MDS_OP_MKNOD = 0x01201,
CEPH_MDS_OP_LINK = 0x01202,
@@ -386,6 +369,15 @@ union ceph_mds_request_args {
struct {
struct ceph_file_layout layout;
} __attribute__ ((packed)) setlayout;
+ struct {
+ __u8 rule; /* currently fcntl or flock */
+ __u8 type; /* shared, exclusive, remove*/
+ __le64 pid; /* process id requesting the lock */
+ __le64 pid_namespace;
+ __le64 start; /* initial location to lock */
+ __le64 length; /* num bytes to lock from start */
+ __u8 wait; /* will caller wait for lock to become available? */
+ } __attribute__ ((packed)) filelock_change;
} __attribute__ ((packed));
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
@@ -480,6 +472,23 @@ struct ceph_mds_reply_dirfrag {
__le32 dist[];
} __attribute__ ((packed));
+#define CEPH_LOCK_FCNTL 1
+#define CEPH_LOCK_FLOCK 2
+
+#define CEPH_LOCK_SHARED 1
+#define CEPH_LOCK_EXCL 2
+#define CEPH_LOCK_UNLOCK 4
+
+struct ceph_filelock {
+ __le64 start;/* file offset to start lock at */
+ __le64 length; /* num bytes to lock; 0 for all following start */
+ __le64 client; /* which client holds the lock */
+ __le64 pid; /* process id holding the lock on the client */
+ __le64 pid_namespace;
+ __u8 type; /* shared lock, exclusive lock, or unlock */
+} __attribute__ ((packed));
+
+
/* file access modes */
#define CEPH_FILE_MODE_PIN 0
#define CEPH_FILE_MODE_RD 1
@@ -508,9 +517,10 @@ int ceph_flags_to_mode(int flags);
#define CEPH_CAP_SAUTH 2
#define CEPH_CAP_SLINK 4
#define CEPH_CAP_SXATTR 6
-#define CEPH_CAP_SFILE 8 /* goes at the end (uses >2 cap bits) */
+#define CEPH_CAP_SFILE 8
+#define CEPH_CAP_SFLOCK 20
-#define CEPH_CAP_BITS 16
+#define CEPH_CAP_BITS 22
/* composed values */
#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
@@ -528,6 +538,9 @@ int ceph_flags_to_mode(int flags);
#define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE)
#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE)
#define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE)
+#define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK)
+#define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK)
+
/* cap masks (for getattr) */
#define CEPH_STAT_CAP_INODE CEPH_CAP_PIN
@@ -563,7 +576,8 @@ int ceph_flags_to_mode(int flags);
CEPH_CAP_FILE_EXCL)
#define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
#define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
- CEPH_CAP_ANY_FILE_WR | CEPH_CAP_PIN)
+ CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
+ CEPH_CAP_PIN)
#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
CEPH_LOCK_IXATTR)
@@ -653,12 +667,21 @@ struct ceph_mds_cap_reconnect {
__le64 cap_id;
__le32 wanted;
__le32 issued;
+ __le64 snaprealm;
+ __le64 pathbase; /* base ino for our path to this ino */
+ __le32 flock_len; /* size of flock state blob, if any */
+} __attribute__ ((packed));
+/* followed by flock blob */
+
+struct ceph_mds_cap_reconnect_v1 {
+ __le64 cap_id;
+ __le32 wanted;
+ __le32 issued;
__le64 size;
struct ceph_timespec mtime, atime;
__le64 snaprealm;
__le64 pathbase; /* base ino for our path to this ino */
} __attribute__ ((packed));
-/* followed by encoded string */
struct ceph_mds_snaprealm_reconnect {
__le64 ino; /* snap realm base */
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h
index 5ac470c433c..d099c3f9023 100644
--- a/fs/ceph/ceph_hash.h
+++ b/fs/ceph/ceph_hash.h
@@ -1,5 +1,5 @@
-#ifndef _FS_CEPH_HASH_H
-#define _FS_CEPH_HASH_H
+#ifndef FS_CEPH_HASH_H
+#define FS_CEPH_HASH_H
#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */
#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c
index 7503aee828c..c6179d3a26a 100644
--- a/fs/ceph/ceph_strings.c
+++ b/fs/ceph/ceph_strings.c
@@ -28,6 +28,7 @@ const char *ceph_osd_op_name(int op)
case CEPH_OSD_OP_TRUNCATE: return "truncate";
case CEPH_OSD_OP_ZERO: return "zero";
case CEPH_OSD_OP_WRITEFULL: return "writefull";
+ case CEPH_OSD_OP_ROLLBACK: return "rollback";
case CEPH_OSD_OP_APPEND: return "append";
case CEPH_OSD_OP_STARTSYNC: return "startsync";
@@ -129,6 +130,8 @@ const char *ceph_mds_op_name(int op)
case CEPH_MDS_OP_LSSNAP: return "lssnap";
case CEPH_MDS_OP_MKSNAP: return "mksnap";
case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+ case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
+ case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
}
return "???";
}
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h
index dcd7e752370..97e435b191f 100644
--- a/fs/ceph/crush/crush.h
+++ b/fs/ceph/crush/crush.h
@@ -1,5 +1,5 @@
-#ifndef _CRUSH_CRUSH_H
-#define _CRUSH_CRUSH_H
+#ifndef CEPH_CRUSH_CRUSH_H
+#define CEPH_CRUSH_CRUSH_H
#include <linux/types.h>
diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h
index ff48e110e4b..91e884230d5 100644
--- a/fs/ceph/crush/hash.h
+++ b/fs/ceph/crush/hash.h
@@ -1,5 +1,5 @@
-#ifndef _CRUSH_HASH_H
-#define _CRUSH_HASH_H
+#ifndef CEPH_CRUSH_HASH_H
+#define CEPH_CRUSH_HASH_H
#define CRUSH_HASH_RJENKINS1 0
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
index 9ba54efb654..a4eec133258 100644
--- a/fs/ceph/crush/mapper.c
+++ b/fs/ceph/crush/mapper.c
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
- dprintk("choose %d x=%d r=%d\n", in->id, x, r);
+ dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
*/
static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
{
- if (weight[item] >= 0x1000)
+ if (weight[item] >= 0x10000)
return 0;
if (weight[item] == 0)
return 1;
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map,
int itemtype;
int collide, reject;
const int orig_tries = 5; /* attempts before we fall back to search */
- dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos);
+
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep);
for (rep = outpos; rep < numrep; rep++) {
/* keep trying until we get a non-out, non-colliding item */
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map,
BUG_ON(item >= 0 ||
(-1-item) >= map->max_buckets);
in = map->buckets[-1-item];
+ retry_bucket = 1;
continue;
}
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map,
}
}
- if (recurse_to_leaf &&
- item < 0 &&
- crush_choose(map, map->buckets[-1-item],
- weight,
- x, outpos+1, 0,
- out2, outpos,
- firstn, 0, NULL) <= outpos) {
- reject = 1;
- } else {
+ reject = 0;
+ if (recurse_to_leaf) {
+ if (item < 0) {
+ if (crush_choose(map,
+ map->buckets[-1-item],
+ weight,
+ x, outpos+1, 0,
+ out2, outpos,
+ firstn, 0,
+ NULL) <= outpos)
+ /* didn't get leaf */
+ reject = 1;
+ } else {
+ /* we already have a leaf! */
+ out2[outpos] = item;
+ }
+ }
+
+ if (!reject) {
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
@@ -424,12 +437,12 @@ reject:
continue;
}
- dprintk("choose got %d\n", item);
+ dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
}
- dprintk("choose returns %d\n", outpos);
+ dprintk("CHOOSE returns %d\n", outpos);
return outpos;
}
diff --git a/fs/ceph/crush/mapper.h b/fs/ceph/crush/mapper.h
index 98e90046fd9..c46b99c18bb 100644
--- a/fs/ceph/crush/mapper.h
+++ b/fs/ceph/crush/mapper.h
@@ -1,5 +1,5 @@
-#ifndef _CRUSH_MAPPER_H
-#define _CRUSH_MAPPER_H
+#ifndef CEPH_CRUSH_MAPPER_H
+#define CEPH_CRUSH_MAPPER_H
/*
* CRUSH functions for find rules and then mapping an input to an
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index f704b3b6242..a3e627f6329 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -75,10 +75,11 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
}
-const u8 *aes_iv = "cephsageyudagreg";
+static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
-int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len,
- const void *src, size_t src_len)
+static int ceph_aes_encrypt(const void *key, int key_len,
+ void *dst, size_t *dst_len,
+ const void *src, size_t src_len)
{
struct scatterlist sg_in[2], sg_out[1];
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
@@ -126,9 +127,10 @@ int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len,
return 0;
}
-int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len,
- const void *src1, size_t src1_len,
- const void *src2, size_t src2_len)
+static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
+ size_t *dst_len,
+ const void *src1, size_t src1_len,
+ const void *src2, size_t src2_len)
{
struct scatterlist sg_in[3], sg_out[1];
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
@@ -179,8 +181,9 @@ int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len,
return 0;
}
-int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len,
- const void *src, size_t src_len)
+static int ceph_aes_decrypt(const void *key, int key_len,
+ void *dst, size_t *dst_len,
+ const void *src, size_t src_len)
{
struct scatterlist sg_in[1], sg_out[2];
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
@@ -238,10 +241,10 @@ int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len,
return 0;
}
-int ceph_aes_decrypt2(const void *key, int key_len,
- void *dst1, size_t *dst1_len,
- void *dst2, size_t *dst2_len,
- const void *src, size_t src_len)
+static int ceph_aes_decrypt2(const void *key, int key_len,
+ void *dst1, size_t *dst1_len,
+ void *dst2, size_t *dst2_len,
+ const void *src, size_t src_len)
{
struct scatterlist sg_in[1], sg_out[3];
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
index 40b502e6bd8..bdf38607323 100644
--- a/fs/ceph/crypto.h
+++ b/fs/ceph/crypto.h
@@ -42,7 +42,7 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret,
const void *src2, size_t src2_len);
/* armor.c */
-extern int ceph_armor(char *dst, const void *src, const void *end);
-extern int ceph_unarmor(void *dst, const char *src, const char *end);
+extern int ceph_armor(char *dst, const char *src, const char *end);
+extern int ceph_unarmor(char *dst, const char *src, const char *end);
#endif
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3be33fb066c..360c4f22718 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp)
static int caps_show(struct seq_file *s, void *p)
{
- struct ceph_client *client = p;
+ struct ceph_client *client = s->private;
int total, avail, used, reserved, min;
ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
@@ -291,7 +291,7 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
return 0;
}
-#define DEFINE_SHOW_FUNC(name) \
+#define DEFINE_SHOW_FUNC(name) \
static int name##_open(struct inode *inode, struct file *file) \
{ \
struct seq_file *sf; \
@@ -361,8 +361,8 @@ int ceph_debugfs_client_init(struct ceph_client *client)
int ret = 0;
char name[80];
- snprintf(name, sizeof(name), FSID_FORMAT ".client%lld",
- PR_FSID(&client->fsid), client->monc.auth->global_id);
+ snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
+ client->monc.auth->global_id);
client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
if (!client->debugfs_dir)
@@ -432,11 +432,12 @@ int ceph_debugfs_client_init(struct ceph_client *client)
if (!client->debugfs_caps)
goto out;
- client->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb",
- 0600,
- client->debugfs_dir,
- client,
- &congestion_kb_fops);
+ client->debugfs_congestion_kb =
+ debugfs_create_file("writeback_congestion_kb",
+ 0600,
+ client->debugfs_dir,
+ client,
+ &congestion_kb_fops);
if (!client->debugfs_congestion_kb)
goto out;
@@ -466,7 +467,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
debugfs_remove(client->debugfs_dir);
}
-#else // CONFIG_DEBUG_FS
+#else /* CONFIG_DEBUG_FS */
int __init ceph_debugfs_init(void)
{
@@ -486,4 +487,4 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
}
-#endif // CONFIG_DEBUG_FS
+#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ceph/decode.h b/fs/ceph/decode.h
index 65b3e022eaf..3d25415afe6 100644
--- a/fs/ceph/decode.h
+++ b/fs/ceph/decode.h
@@ -99,11 +99,13 @@ static inline void ceph_encode_timespec(struct ceph_timespec *tv,
*/
static inline void ceph_encode_addr(struct ceph_entity_addr *a)
{
- a->in_addr.ss_family = htons(a->in_addr.ss_family);
+ __be16 ss_family = htons(a->in_addr.ss_family);
+ a->in_addr.ss_family = *(__u16 *)&ss_family;
}
static inline void ceph_decode_addr(struct ceph_entity_addr *a)
{
- a->in_addr.ss_family = ntohs(a->in_addr.ss_family);
+ __be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
+ a->in_addr.ss_family = ntohs(ss_family);
WARN_ON(a->in_addr.ss_family == 512);
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f85719310db..67bbb41d552 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -27,7 +27,7 @@
const struct inode_operations ceph_dir_iops;
const struct file_operations ceph_dir_fops;
-struct dentry_operations ceph_dentry_ops;
+const struct dentry_operations ceph_dentry_ops;
/*
* Initialize ceph dentry state.
@@ -94,6 +94,8 @@ static unsigned fpos_off(loff_t p)
*/
static int __dcache_readdir(struct file *filp,
void *dirent, filldir_t filldir)
+ __releases(inode->i_lock)
+ __acquires(inode->i_lock)
{
struct inode *inode = filp->f_dentry->d_inode;
struct ceph_file_info *fi = filp->private_data;
@@ -266,6 +268,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
spin_lock(&inode->i_lock);
if ((filp->f_pos == 2 || fi->dentry) &&
!ceph_test_opt(client, NOASYNCREADDIR) &&
+ ceph_snap(inode) != CEPH_SNAPDIR &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
err = __dcache_readdir(filp, dirent, filldir);
@@ -1013,18 +1016,22 @@ out_touch:
/*
* When a dentry is released, clear the dir I_COMPLETE if it was part
- * of the current dir gen.
+ * of the current dir gen or if this is in the snapshot namespace.
*/
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
struct inode *parent_inode = dentry->d_parent->d_inode;
+ u64 snapid = ceph_snap(parent_inode);
- if (parent_inode) {
+ dout("dentry_release %p parent %p\n", dentry, parent_inode);
+
+ if (parent_inode && snapid != CEPH_SNAPDIR) {
struct ceph_inode_info *ci = ceph_inode(parent_inode);
spin_lock(&parent_inode->i_lock);
- if (ci->i_shared_gen == di->lease_shared_gen) {
+ if (ci->i_shared_gen == di->lease_shared_gen ||
+ snapid <= CEPH_MAXSNAP) {
dout(" clearing %p complete (d_release)\n",
parent_inode);
ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
@@ -1234,14 +1241,16 @@ const struct inode_operations ceph_dir_iops = {
.create = ceph_create,
};
-struct dentry_operations ceph_dentry_ops = {
+const struct dentry_operations ceph_dentry_ops = {
.d_revalidate = ceph_d_revalidate,
.d_release = ceph_dentry_release,
};
-struct dentry_operations ceph_snapdir_dentry_ops = {
+const struct dentry_operations ceph_snapdir_dentry_ops = {
.d_revalidate = ceph_snapdir_d_revalidate,
+ .d_release = ceph_dentry_release,
};
-struct dentry_operations ceph_snap_dentry_ops = {
+const struct dentry_operations ceph_snap_dentry_ops = {
+ .d_release = ceph_dentry_release,
};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6251a1574b9..8c044a4f045 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -265,7 +265,7 @@ int ceph_release(struct inode *inode, struct file *file)
kmem_cache_free(ceph_file_cachep, cf);
/* wake up anyone waiting for caps on this inode */
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return 0;
}
@@ -317,7 +317,7 @@ void ceph_release_page_vector(struct page **pages, int num_pages)
/*
* allocate a vector new pages
*/
-struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
+static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
{
struct page **pages;
int i;
@@ -665,7 +665,7 @@ more:
* throw out any page cache pages in this range. this
* may block.
*/
- truncate_inode_pages_range(inode->i_mapping, pos,
+ truncate_inode_pages_range(inode->i_mapping, pos,
(pos+len) | (PAGE_CACHE_SIZE-1));
} else {
pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
@@ -740,28 +740,32 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *filp = iocb->ki_filp;
+ struct ceph_file_info *fi = filp->private_data;
loff_t *ppos = &iocb->ki_pos;
size_t len = iov->iov_len;
struct inode *inode = filp->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
- void *base = iov->iov_base;
+ void __user *base = iov->iov_base;
ssize_t ret;
- int got = 0;
+ int want, got = 0;
int checkeof = 0, read = 0;
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
inode, ceph_vinop(inode), pos, (unsigned)len, inode);
again:
__ceph_do_pending_vmtruncate(inode);
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE,
- &got, -1);
+ if (fi->fmode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_CACHE;
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
if (ret < 0)
goto out;
dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)len,
ceph_cap_string(got));
- if ((got & CEPH_CAP_FILE_CACHE) == 0 ||
+ if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
(inode->i_sb->s_flags & MS_SYNCHRONOUS))
/* hmm, this isn't really async... */
@@ -807,11 +811,12 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
+ struct ceph_file_info *fi = file->private_data;
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
loff_t endoff = pos + iov->iov_len;
- int got = 0;
+ int want, got = 0;
int ret, err;
if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -824,8 +829,11 @@ retry_snap:
dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
inode->i_size);
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
- &got, endoff);
+ if (fi->fmode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_BUFFER;
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
if (ret < 0)
goto out;
@@ -833,7 +841,7 @@ retry_snap:
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
ceph_cap_string(got));
- if ((got & CEPH_CAP_FILE_BUFFER) == 0 ||
+ if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
(inode->i_sb->s_flags & MS_SYNCHRONOUS)) {
ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
@@ -930,6 +938,8 @@ const struct file_operations ceph_file_fops = {
.aio_write = ceph_aio_write,
.mmap = ceph_mmap,
.fsync = ceph_fsync,
+ .lock = ceph_lock,
+ .flock = ceph_flock,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.unlocked_ioctl = ceph_ioctl,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 226f5a50d36..5d893d31e39 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -442,8 +442,9 @@ int ceph_fill_file_size(struct inode *inode, int issued,
* the file is either opened or mmaped
*/
if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD|
- CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER|
- CEPH_CAP_FILE_EXCL)) ||
+ CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER|
+ CEPH_CAP_FILE_EXCL|
+ CEPH_CAP_FILE_LAZYIO)) ||
mapping_mapped(inode->i_mapping) ||
__ceph_caps_file_wanted(ci)) {
ci->i_truncate_pending++;
@@ -827,7 +828,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
spin_lock(&dcache_lock);
spin_lock(&dn->d_lock);
- list_move_tail(&dir->d_subdirs, &dn->d_u.d_child);
+ list_move(&dn->d_u.d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
dn->d_u.d_child.prev, dn->d_u.d_child.next);
spin_unlock(&dn->d_lock);
@@ -854,8 +855,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
d_drop(dn);
realdn = d_materialise_unique(dn, in);
if (IS_ERR(realdn)) {
- pr_err("splice_dentry error %p inode %p ino %llx.%llx\n",
- dn, in, ceph_vinop(in));
+ pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+ PTR_ERR(realdn), dn, in, ceph_vinop(in));
if (prehash)
*prehash = false; /* don't rehash on error */
dn = realdn; /* note realdn contains the error */
@@ -1199,8 +1200,10 @@ retry_lookup:
goto out;
}
err = ceph_init_dentry(dn);
- if (err < 0)
+ if (err < 0) {
+ dput(dn);
goto out;
+ }
} else if (dn->d_inode &&
(ceph_ino(dn->d_inode) != vino.ino ||
ceph_snap(dn->d_inode) != vino.snap)) {
@@ -1234,18 +1237,23 @@ retry_lookup:
goto out;
}
dn = splice_dentry(dn, in, NULL);
+ if (IS_ERR(dn))
+ dn = NULL;
}
if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
- dput(dn);
- continue;
+ goto next_item;
}
- update_dentry_lease(dn, rinfo->dir_dlease[i],
- req->r_session, req->r_request_started);
- dput(dn);
+ if (dn)
+ update_dentry_lease(dn, rinfo->dir_dlease[i],
+ req->r_session,
+ req->r_request_started);
+next_item:
+ if (dn)
+ dput(dn);
}
req->r_did_prepopulate = true;
@@ -1494,7 +1502,7 @@ retry:
if (wrbuffer_refs == 0)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index d085f07756b..76e307d2aba 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -143,6 +143,27 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
return 0;
}
+static long ceph_ioctl_lazyio(struct file *file)
+{
+ struct ceph_file_info *fi = file->private_data;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
+ spin_lock(&inode->i_lock);
+ ci->i_nr_by_mode[fi->fmode]--;
+ fi->fmode |= CEPH_FILE_MODE_LAZY;
+ ci->i_nr_by_mode[fi->fmode]++;
+ spin_unlock(&inode->i_lock);
+ dout("ioctl_layzio: file %p marked lazy\n", file);
+
+ ceph_check_caps(ci, 0, NULL);
+ } else {
+ dout("ioctl_layzio: file %p already lazy\n", file);
+ }
+ return 0;
+}
+
long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
@@ -155,6 +176,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case CEPH_IOC_GET_DATALOC:
return ceph_ioctl_get_dataloc(file, (void __user *)arg);
+
+ case CEPH_IOC_LAZYIO:
+ return ceph_ioctl_lazyio(file);
}
return -ENOTTY;
}
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index 25e4f1a9d05..88451a3b685 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -37,4 +37,6 @@ struct ceph_ioctl_dataloc {
#define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \
struct ceph_ioctl_dataloc)
+#define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4)
+
#endif
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
new file mode 100644
index 00000000000..ae85af06454
--- /dev/null
+++ b/fs/ceph/locks.c
@@ -0,0 +1,256 @@
+#include "ceph_debug.h"
+
+#include <linux/file.h>
+#include <linux/namei.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "pagelist.h"
+
+/**
+ * Implement fcntl and flock locking functions.
+ */
+static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
+ u64 pid, u64 pid_ns,
+ int cmd, u64 start, u64 length, u8 wait)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(inode->i_sb)->mdsc;
+ struct ceph_mds_request *req;
+ int err;
+
+ req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->r_inode = igrab(inode);
+
+ dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
+ "length: %llu, wait: %d, type`: %d", (int)lock_type,
+ (int)operation, pid, start, length, wait, cmd);
+
+ req->r_args.filelock_change.rule = lock_type;
+ req->r_args.filelock_change.type = cmd;
+ req->r_args.filelock_change.pid = cpu_to_le64(pid);
+ /* This should be adjusted, but I'm not sure if
+ namespaces actually get id numbers*/
+ req->r_args.filelock_change.pid_namespace =
+ cpu_to_le64((u64)pid_ns);
+ req->r_args.filelock_change.start = cpu_to_le64(start);
+ req->r_args.filelock_change.length = cpu_to_le64(length);
+ req->r_args.filelock_change.wait = wait;
+
+ err = ceph_mdsc_do_request(mdsc, inode, req);
+ ceph_mdsc_put_request(req);
+ dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
+ "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type,
+ (int)operation, pid, start, length, wait, cmd, err);
+ return err;
+}
+
+/**
+ * Attempt to set an fcntl lock.
+ * For now, this just goes away to the server. Later it may be more awesome.
+ */
+int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ u64 length;
+ u8 lock_cmd;
+ int err;
+ u8 wait = 0;
+ u16 op = CEPH_MDS_OP_SETFILELOCK;
+
+ fl->fl_nspid = get_pid(task_tgid(current));
+ dout("ceph_lock, fl_pid:%d", fl->fl_pid);
+
+ /* set wait bit as appropriate, then make command as Ceph expects it*/
+ if (F_SETLKW == cmd)
+ wait = 1;
+ if (F_GETLK == cmd)
+ op = CEPH_MDS_OP_GETFILELOCK;
+
+ if (F_RDLCK == fl->fl_type)
+ lock_cmd = CEPH_LOCK_SHARED;
+ else if (F_WRLCK == fl->fl_type)
+ lock_cmd = CEPH_LOCK_EXCL;
+ else
+ lock_cmd = CEPH_LOCK_UNLOCK;
+
+ if (LLONG_MAX == fl->fl_end)
+ length = 0;
+ else
+ length = fl->fl_end - fl->fl_start + 1;
+
+ err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ lock_cmd, fl->fl_start,
+ length, wait);
+ if (!err) {
+ dout("mds locked, locking locally");
+ err = posix_lock_file(file, fl, NULL);
+ if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+ /* undo! This should only happen if the kernel detects
+ * local deadlock. */
+ ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ CEPH_LOCK_UNLOCK, fl->fl_start,
+ length, 0);
+ dout("got %d on posix_lock_file, undid lock", err);
+ }
+ } else {
+ dout("mds returned error code %d", err);
+ }
+ return err;
+}
+
+int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+ u64 length;
+ u8 lock_cmd;
+ int err;
+ u8 wait = 1;
+
+ fl->fl_nspid = get_pid(task_tgid(current));
+ dout("ceph_flock, fl_pid:%d", fl->fl_pid);
+
+ /* set wait bit, then clear it out of cmd*/
+ if (cmd & LOCK_NB)
+ wait = 0;
+ cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN);
+ /* set command sequence that Ceph wants to see:
+ shared lock, exclusive lock, or unlock */
+ if (LOCK_SH == cmd)
+ lock_cmd = CEPH_LOCK_SHARED;
+ else if (LOCK_EX == cmd)
+ lock_cmd = CEPH_LOCK_EXCL;
+ else
+ lock_cmd = CEPH_LOCK_UNLOCK;
+ /* mds requires start and length rather than start and end */
+ if (LLONG_MAX == fl->fl_end)
+ length = 0;
+ else
+ length = fl->fl_end - fl->fl_start + 1;
+
+ err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
+ file, (u64)fl->fl_pid, (u64)fl->fl_nspid,
+ lock_cmd, fl->fl_start,
+ length, wait);
+ if (!err) {
+ err = flock_lock_file_wait(file, fl);
+ if (err) {
+ ceph_lock_message(CEPH_LOCK_FLOCK,
+ CEPH_MDS_OP_SETFILELOCK,
+ file, (u64)fl->fl_pid,
+ (u64)fl->fl_nspid,
+ CEPH_LOCK_UNLOCK, fl->fl_start,
+ length, 0);
+ dout("got %d on flock_lock_file_wait, undid lock", err);
+ }
+ } else {
+ dout("mds error code %d", err);
+ }
+ return err;
+}
+
+/**
+ * Must be called with BKL already held. Fills in the passed
+ * counter variables, so you can prepare pagelist metadata before calling
+ * ceph_encode_locks.
+ */
+void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
+{
+ struct file_lock *lock;
+
+ *fcntl_count = 0;
+ *flock_count = 0;
+
+ for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+ if (lock->fl_flags & FL_POSIX)
+ ++(*fcntl_count);
+ else if (lock->fl_flags & FL_FLOCK)
+ ++(*flock_count);
+ }
+ dout("counted %d flock locks and %d fcntl locks",
+ *flock_count, *fcntl_count);
+}
+
+/**
+ * Encode the flock and fcntl locks for the given inode into the pagelist.
+ * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
+ * sequential flock locks.
+ * Must be called with BLK already held, and the lock numbers should have
+ * been gathered under the same lock holding window.
+ */
+int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
+ int num_fcntl_locks, int num_flock_locks)
+{
+ struct file_lock *lock;
+ struct ceph_filelock cephlock;
+ int err = 0;
+
+ dout("encoding %d flock and %d fcntl locks", num_flock_locks,
+ num_fcntl_locks);
+ err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32));
+ if (err)
+ goto fail;
+ for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+ if (lock->fl_flags & FL_POSIX) {
+ err = lock_to_ceph_filelock(lock, &cephlock);
+ if (err)
+ goto fail;
+ err = ceph_pagelist_append(pagelist, &cephlock,
+ sizeof(struct ceph_filelock));
+ }
+ if (err)
+ goto fail;
+ }
+
+ err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32));
+ if (err)
+ goto fail;
+ for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+ if (lock->fl_flags & FL_FLOCK) {
+ err = lock_to_ceph_filelock(lock, &cephlock);
+ if (err)
+ goto fail;
+ err = ceph_pagelist_append(pagelist, &cephlock,
+ sizeof(struct ceph_filelock));
+ }
+ if (err)
+ goto fail;
+ }
+fail:
+ return err;
+}
+
+/*
+ * Given a pointer to a lock, convert it to a ceph filelock
+ */
+int lock_to_ceph_filelock(struct file_lock *lock,
+ struct ceph_filelock *cephlock)
+{
+ int err = 0;
+
+ cephlock->start = cpu_to_le64(lock->fl_start);
+ cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
+ cephlock->client = cpu_to_le64(0);
+ cephlock->pid = cpu_to_le64(lock->fl_pid);
+ cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid);
+
+ switch (lock->fl_type) {
+ case F_RDLCK:
+ cephlock->type = CEPH_LOCK_SHARED;
+ break;
+ case F_WRLCK:
+ cephlock->type = CEPH_LOCK_EXCL;
+ break;
+ case F_UNLCK:
+ cephlock->type = CEPH_LOCK_UNLOCK;
+ break;
+ default:
+ dout("Have unknown lock type %d", lock->fl_type);
+ err = -EINVAL;
+ }
+
+ return err;
+}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b49f12822cb..a75ddbf9fe3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3,6 +3,7 @@
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
#include "mds_client.h"
#include "mon_client.h"
@@ -37,6 +38,11 @@
* are no longer valid.
*/
+struct ceph_reconnect_state {
+ struct ceph_pagelist *pagelist;
+ bool flock;
+};
+
static void __wake_requests(struct ceph_mds_client *mdsc,
struct list_head *head);
@@ -449,7 +455,7 @@ void ceph_mdsc_release_request(struct kref *kref)
kfree(req->r_path1);
kfree(req->r_path2);
put_request_session(req);
- ceph_unreserve_caps(&req->r_caps_reservation);
+ ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
kfree(req);
}
@@ -512,7 +518,8 @@ static void __register_request(struct ceph_mds_client *mdsc,
{
req->r_tid = ++mdsc->last_tid;
if (req->r_num_caps)
- ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps);
+ ceph_reserve_caps(mdsc, &req->r_caps_reservation,
+ req->r_num_caps);
dout("__register_request %p tid %lld\n", req, req->r_tid);
ceph_mdsc_get_request(req);
__insert_request(mdsc, req);
@@ -704,6 +711,51 @@ static int __open_session(struct ceph_mds_client *mdsc,
}
/*
+ * open sessions for any export targets for the given mds
+ *
+ * called under mdsc->mutex
+ */
+static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_info *mi;
+ struct ceph_mds_session *ts;
+ int i, mds = session->s_mds;
+ int target;
+
+ if (mds >= mdsc->mdsmap->m_max_mds)
+ return;
+ mi = &mdsc->mdsmap->m_info[mds];
+ dout("open_export_target_sessions for mds%d (%d targets)\n",
+ session->s_mds, mi->num_export_targets);
+
+ for (i = 0; i < mi->num_export_targets; i++) {
+ target = mi->export_targets[i];
+ ts = __ceph_lookup_mds_session(mdsc, target);
+ if (!ts) {
+ ts = register_session(mdsc, target);
+ if (IS_ERR(ts))
+ return;
+ }
+ if (session->s_state == CEPH_MDS_SESSION_NEW ||
+ session->s_state == CEPH_MDS_SESSION_CLOSING)
+ __open_session(mdsc, session);
+ else
+ dout(" mds%d target mds%d %p is %s\n", session->s_mds,
+ i, ts, session_state_name(ts->s_state));
+ ceph_put_mds_session(ts);
+ }
+}
+
+void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ mutex_lock(&mdsc->mutex);
+ __open_export_target_sessions(mdsc, session);
+ mutex_unlock(&mdsc->mutex);
+}
+
+/*
* session caps
*/
@@ -764,7 +816,7 @@ static int iterate_session_caps(struct ceph_mds_session *session,
last_inode = NULL;
}
if (old_cap) {
- ceph_put_cap(old_cap);
+ ceph_put_cap(session->s_mdsc, old_cap);
old_cap = NULL;
}
@@ -793,7 +845,7 @@ out:
if (last_inode)
iput(last_inode);
if (old_cap)
- ceph_put_cap(old_cap);
+ ceph_put_cap(session->s_mdsc, old_cap);
return ret;
}
@@ -868,7 +920,7 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
{
struct ceph_inode_info *ci = ceph_inode(inode);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (arg) {
spin_lock(&inode->i_lock);
ci->i_wanted_max_size = 0;
@@ -1066,16 +1118,17 @@ static int trim_caps(struct ceph_mds_client *mdsc,
*
* Called under s_mutex.
*/
-static int add_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session,
- int extra)
+int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
{
- struct ceph_msg *msg;
+ struct ceph_msg *msg, *partial = NULL;
struct ceph_mds_cap_release *head;
int err = -ENOMEM;
+ int extra = mdsc->client->mount_args->cap_release_safety;
+ int num;
- if (extra < 0)
- extra = mdsc->client->mount_args->cap_release_safety;
+ dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
+ extra);
spin_lock(&session->s_cap_lock);
@@ -1084,9 +1137,14 @@ static int add_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_msg,
list_head);
head = msg->front.iov_base;
- extra += CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num);
+ num = le32_to_cpu(head->num);
+ if (num) {
+ dout(" partial %p with (%d/%d)\n", msg, num,
+ (int)CEPH_CAPS_PER_RELEASE);
+ extra += CEPH_CAPS_PER_RELEASE - num;
+ partial = msg;
+ }
}
-
while (session->s_num_cap_releases < session->s_nr_caps + extra) {
spin_unlock(&session->s_cap_lock);
msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE,
@@ -1103,19 +1161,14 @@ static int add_cap_releases(struct ceph_mds_client *mdsc,
session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE;
}
- if (!list_empty(&session->s_cap_releases)) {
- msg = list_first_entry(&session->s_cap_releases,
- struct ceph_msg,
- list_head);
- head = msg->front.iov_base;
- if (head->num) {
- dout(" queueing non-full %p (%d)\n", msg,
- le32_to_cpu(head->num));
- list_move_tail(&msg->list_head,
- &session->s_cap_releases_done);
- session->s_num_cap_releases -=
- CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num);
- }
+ if (partial) {
+ head = partial->front.iov_base;
+ num = le32_to_cpu(head->num);
+ dout(" queueing partial %p with %d/%d\n", partial, num,
+ (int)CEPH_CAPS_PER_RELEASE);
+ list_move_tail(&partial->list_head,
+ &session->s_cap_releases_done);
+ session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num;
}
err = 0;
spin_unlock(&session->s_cap_lock);
@@ -1176,8 +1229,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
/*
* called under s_mutex
*/
-static void send_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
+void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
{
struct ceph_msg *msg;
@@ -1250,6 +1303,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
return ERR_PTR(-ENOMEM);
mutex_init(&req->r_fill_mutex);
+ req->r_mdsc = mdsc;
req->r_started = jiffies;
req->r_resend_mds = -1;
INIT_LIST_HEAD(&req->r_unsafe_dir_item);
@@ -1514,6 +1568,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_filepath(&p, end, ino1, path1);
ceph_encode_filepath(&p, end, ino2, path2);
+ /* make note of release offset, in case we need to replay */
+ req->r_request_release_offset = p - msg->front.iov_base;
+
/* cap releases */
releases = 0;
if (req->r_inode_drop)
@@ -1561,7 +1618,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
if (req->r_callback)
req->r_callback(mdsc, req);
else
- complete(&req->r_completion);
+ complete_all(&req->r_completion);
}
/*
@@ -1577,9 +1634,44 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
req->r_mds = mds;
req->r_attempts++;
+ if (req->r_inode) {
+ struct ceph_cap *cap =
+ ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds);
+
+ if (cap)
+ req->r_sent_on_mseq = cap->mseq;
+ else
+ req->r_sent_on_mseq = -1;
+ }
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
+ if (req->r_got_unsafe) {
+ /*
+ * Replay. Do not regenerate message (and rebuild
+ * paths, etc.); just use the original message.
+ * Rebuilding paths will break for renames because
+ * d_move mangles the src name.
+ */
+ msg = req->r_request;
+ rhead = msg->front.iov_base;
+
+ flags = le32_to_cpu(rhead->flags);
+ flags |= CEPH_MDS_FLAG_REPLAY;
+ rhead->flags = cpu_to_le32(flags);
+
+ if (req->r_target_inode)
+ rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+ rhead->num_retry = req->r_attempts - 1;
+
+ /* remove cap/dentry releases from message */
+ rhead->num_releases = 0;
+ msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
+ msg->front.iov_len = req->r_request_release_offset;
+ return 0;
+ }
+
if (req->r_request) {
ceph_msg_put(req->r_request);
req->r_request = NULL;
@@ -1601,13 +1693,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
rhead->flags = cpu_to_le32(flags);
rhead->num_fwd = req->r_num_fwd;
rhead->num_retry = req->r_attempts - 1;
+ rhead->ino = 0;
dout(" r_locked_dir = %p\n", req->r_locked_dir);
-
- if (req->r_target_inode && req->r_got_unsafe)
- rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
- else
- rhead->ino = 0;
return 0;
}
@@ -1889,25 +1977,44 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
result = le32_to_cpu(head->result);
/*
- * Tolerate 2 consecutive ESTALEs from the same mds.
- * FIXME: we should be looking at the cap migrate_seq.
+ * Handle an ESTALE
+ * if we're not talking to the authority, send to them
+ * if the authority has changed while we weren't looking,
+ * send to new authority
+ * Otherwise we just have to return an ESTALE
*/
if (result == -ESTALE) {
- req->r_direct_mode = USE_AUTH_MDS;
- req->r_num_stale++;
- if (req->r_num_stale <= 2) {
+ dout("got ESTALE on request %llu", req->r_tid);
+ if (!req->r_inode) {
+ /* do nothing; not an authority problem */
+ } else if (req->r_direct_mode != USE_AUTH_MDS) {
+ dout("not using auth, setting for that now");
+ req->r_direct_mode = USE_AUTH_MDS;
__do_request(mdsc, req);
mutex_unlock(&mdsc->mutex);
goto out;
+ } else {
+ struct ceph_inode_info *ci = ceph_inode(req->r_inode);
+ struct ceph_cap *cap =
+ ceph_get_cap_for_mds(ci, req->r_mds);;
+
+ dout("already using auth");
+ if ((!cap || cap != ci->i_auth_cap) ||
+ (cap->mseq != req->r_sent_on_mseq)) {
+ dout("but cap changed, so resending");
+ __do_request(mdsc, req);
+ mutex_unlock(&mdsc->mutex);
+ goto out;
+ }
}
- } else {
- req->r_num_stale = 0;
+ dout("have to return ESTALE on request %llu", req->r_tid);
}
+
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete(&req->r_safe_completion);
+ complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -1922,7 +2029,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
- complete(&mdsc->safe_umount_waiters);
+ complete_all(&mdsc->safe_umount_waiters);
mutex_unlock(&mdsc->mutex);
goto out;
}
@@ -1960,7 +2067,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (err == 0) {
if (result == 0 && rinfo->dir_nr)
ceph_readdir_prepopulate(req, req->r_session);
- ceph_unreserve_caps(&req->r_caps_reservation);
+ ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
}
mutex_unlock(&req->r_fill_mutex);
@@ -1980,7 +2087,7 @@ out_err:
}
mutex_unlock(&mdsc->mutex);
- add_cap_releases(mdsc, req->r_session, -1);
+ ceph_add_cap_releases(mdsc, req->r_session);
mutex_unlock(&session->s_mutex);
/* kick calling process */
@@ -2101,7 +2208,7 @@ static void handle_session(struct ceph_mds_session *session,
pr_info("mds%d reconnect denied\n", session->s_mds);
remove_session_caps(session);
wake = 1; /* for good measure */
- complete(&mdsc->session_close_waiters);
+ complete_all(&mdsc->session_close_waiters);
kick_requests(mdsc, mds);
break;
@@ -2168,9 +2275,14 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
void *arg)
{
- struct ceph_mds_cap_reconnect rec;
+ union {
+ struct ceph_mds_cap_reconnect v2;
+ struct ceph_mds_cap_reconnect_v1 v1;
+ } rec;
+ size_t reclen;
struct ceph_inode_info *ci;
- struct ceph_pagelist *pagelist = arg;
+ struct ceph_reconnect_state *recon_state = arg;
+ struct ceph_pagelist *pagelist = recon_state->pagelist;
char *path;
int pathlen, err;
u64 pathbase;
@@ -2203,17 +2315,44 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
spin_lock(&inode->i_lock);
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
- rec.cap_id = cpu_to_le64(cap->cap_id);
- rec.pathbase = cpu_to_le64(pathbase);
- rec.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
- rec.issued = cpu_to_le32(cap->issued);
- rec.size = cpu_to_le64(inode->i_size);
- ceph_encode_timespec(&rec.mtime, &inode->i_mtime);
- ceph_encode_timespec(&rec.atime, &inode->i_atime);
- rec.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+
+ if (recon_state->flock) {
+ rec.v2.cap_id = cpu_to_le64(cap->cap_id);
+ rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
+ rec.v2.issued = cpu_to_le32(cap->issued);
+ rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+ rec.v2.pathbase = cpu_to_le64(pathbase);
+ rec.v2.flock_len = 0;
+ reclen = sizeof(rec.v2);
+ } else {
+ rec.v1.cap_id = cpu_to_le64(cap->cap_id);
+ rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
+ rec.v1.issued = cpu_to_le32(cap->issued);
+ rec.v1.size = cpu_to_le64(inode->i_size);
+ ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime);
+ ceph_encode_timespec(&rec.v1.atime, &inode->i_atime);
+ rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+ rec.v1.pathbase = cpu_to_le64(pathbase);
+ reclen = sizeof(rec.v1);
+ }
spin_unlock(&inode->i_lock);
- err = ceph_pagelist_append(pagelist, &rec, sizeof(rec));
+ if (recon_state->flock) {
+ int num_fcntl_locks, num_flock_locks;
+
+ lock_kernel();
+ ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+ rec.v2.flock_len = (2*sizeof(u32) +
+ (num_fcntl_locks+num_flock_locks) *
+ sizeof(struct ceph_filelock));
+
+ err = ceph_pagelist_append(pagelist, &rec, reclen);
+ if (!err)
+ err = ceph_encode_locks(inode, pagelist,
+ num_fcntl_locks,
+ num_flock_locks);
+ unlock_kernel();
+ }
out:
kfree(path);
@@ -2242,6 +2381,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
int mds = session->s_mds;
int err = -ENOMEM;
struct ceph_pagelist *pagelist;
+ struct ceph_reconnect_state recon_state;
pr_info("mds%d reconnect start\n", mds);
@@ -2276,7 +2416,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps);
if (err)
goto fail;
- err = iterate_session_caps(session, encode_caps_cb, pagelist);
+
+ recon_state.pagelist = pagelist;
+ recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
+ err = iterate_session_caps(session, encode_caps_cb, &recon_state);
if (err < 0)
goto fail;
@@ -2301,6 +2444,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
}
reply->pagelist = pagelist;
+ if (recon_state.flock)
+ reply->hdr.version = cpu_to_le16(2);
reply->hdr.data_len = cpu_to_le32(pagelist->length);
reply->nr_pages = calc_pages_for(0, pagelist->length);
ceph_con_send(&session->s_con, reply);
@@ -2351,9 +2496,11 @@ static void check_new_map(struct ceph_mds_client *mdsc,
oldstate = ceph_mdsmap_get_state(oldmap, i);
newstate = ceph_mdsmap_get_state(newmap, i);
- dout("check_new_map mds%d state %s -> %s (session %s)\n",
+ dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n",
i, ceph_mds_state_name(oldstate),
+ ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
ceph_mds_state_name(newstate),
+ ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
session_state_name(s->s_state));
if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
@@ -2403,6 +2550,21 @@ static void check_new_map(struct ceph_mds_client *mdsc,
wake_up_session_caps(s, 1);
}
}
+
+ for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) {
+ s = mdsc->sessions[i];
+ if (!s)
+ continue;
+ if (!ceph_mdsmap_is_laggy(newmap, i))
+ continue;
+ if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+ s->s_state == CEPH_MDS_SESSION_HUNG ||
+ s->s_state == CEPH_MDS_SESSION_CLOSING) {
+ dout(" connecting to export targets of laggy mds%d\n",
+ i);
+ __open_export_target_sessions(mdsc, s);
+ }
+ }
}
@@ -2433,6 +2595,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_dentry_info *di;
int mds = session->s_mds;
struct ceph_mds_lease *h = msg->front.iov_base;
+ u32 seq;
struct ceph_vino vino;
int mask;
struct qstr dname;
@@ -2446,6 +2609,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
mask = le16_to_cpu(h->mask);
+ seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
if (dname.len != get_unaligned_le32(h+1))
@@ -2456,8 +2620,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
/* lookup inode */
inode = ceph_find_inode(sb, vino);
- dout("handle_lease '%s', mask %d, ino %llx %p\n",
- ceph_lease_op_name(h->action), mask, vino.ino, inode);
+ dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
+ ceph_lease_op_name(h->action), mask, vino.ino, inode,
+ dname.len, dname.name);
if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
@@ -2482,7 +2647,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
switch (h->action) {
case CEPH_MDS_LEASE_REVOKE:
if (di && di->lease_session == session) {
- h->seq = cpu_to_le32(di->lease_seq);
+ if (ceph_seq_cmp(di->lease_seq, seq) > 0)
+ h->seq = cpu_to_le32(di->lease_seq);
__ceph_mdsc_drop_dentry_lease(dentry);
}
release = 1;
@@ -2496,7 +2662,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
unsigned long duration =
le32_to_cpu(h->duration_ms) * HZ / 1000;
- di->lease_seq = le32_to_cpu(h->seq);
+ di->lease_seq = seq;
dentry->d_time = di->lease_renew_from + duration;
di->lease_renew_after = di->lease_renew_from +
(duration >> 1);
@@ -2686,10 +2852,10 @@ static void delayed_work(struct work_struct *work)
send_renew_caps(mdsc, s);
else
ceph_con_keepalive(&s->s_con);
- add_cap_releases(mdsc, s, -1);
+ ceph_add_cap_releases(mdsc, s);
if (s->s_state == CEPH_MDS_SESSION_OPEN ||
s->s_state == CEPH_MDS_SESSION_HUNG)
- send_cap_releases(mdsc, s);
+ ceph_send_cap_releases(mdsc, s);
mutex_unlock(&s->s_mutex);
ceph_put_mds_session(s);
@@ -2735,6 +2901,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
spin_lock_init(&mdsc->dentry_lru_lock);
INIT_LIST_HEAD(&mdsc->dentry_lru);
+ ceph_caps_init(mdsc);
+ ceph_adjust_min_caps(mdsc, client->min_caps);
+
return 0;
}
@@ -2779,6 +2948,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
drop_leases(mdsc);
ceph_flush_dirty_caps(mdsc);
wait_requests(mdsc);
+
+ /*
+ * wait for reply handlers to drop their request refs and
+ * their inode/dcache refs
+ */
+ ceph_msgr_flush();
}
/*
@@ -2924,6 +3099,7 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
if (mdsc->mdsmap)
ceph_mdsmap_destroy(mdsc->mdsmap);
kfree(mdsc->sessions);
+ ceph_caps_finalize(mdsc);
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d9936c4f121..ab7e89f5e34 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -151,6 +151,7 @@ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
struct ceph_mds_request {
u64 r_tid; /* transaction id */
struct rb_node r_node;
+ struct ceph_mds_client *r_mdsc;
int r_op; /* mds op code */
int r_mds;
@@ -188,6 +189,7 @@ struct ceph_mds_request {
int r_old_inode_drop, r_old_inode_unless;
struct ceph_msg *r_request; /* original request */
+ int r_request_release_offset;
struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info;
int r_err;
@@ -206,8 +208,8 @@ struct ceph_mds_request {
int r_attempts; /* resend attempts */
int r_num_fwd; /* number of forward attempts */
- int r_num_stale;
int r_resend_mds; /* mds to resend to next, if any*/
+ u32 r_sent_on_mseq; /* cap mseq request was sent at*/
struct kref r_kref;
struct list_head r_wait;
@@ -266,6 +268,27 @@ struct ceph_mds_client {
spinlock_t cap_dirty_lock; /* protects above items */
wait_queue_head_t cap_flushing_wq;
+ /*
+ * Cap reservations
+ *
+ * Maintain a global pool of preallocated struct ceph_caps, referenced
+ * by struct ceph_caps_reservations. This ensures that we preallocate
+ * memory needed to successfully process an MDS response. (If an MDS
+ * sends us cap information and we fail to process it, we will have
+ * problems due to the client and MDS being out of sync.)
+ *
+ * Reservations are 'owned' by a ceph_cap_reservation context.
+ */
+ spinlock_t caps_list_lock;
+ struct list_head caps_list; /* unused (reserved or
+ unreserved) */
+ int caps_total_count; /* total caps allocated */
+ int caps_use_count; /* in use */
+ int caps_reserve_count; /* unused, reserved */
+ int caps_avail_count; /* unused, unreserved */
+ int caps_min_count; /* keep at least this many
+ (unreserved) */
+
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_file;
#endif
@@ -322,6 +345,11 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
kref_put(&req->r_kref, ceph_mdsc_release_request);
}
+extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
@@ -336,4 +364,7 @@ extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc,
struct ceph_msg *msg);
+extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+
#endif
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index c4c498e6dfe..040be6d1150 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -85,6 +85,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
struct ceph_entity_addr addr;
u32 num_export_targets;
void *pexport_targets = NULL;
+ struct ceph_timespec laggy_since;
ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad);
global_id = ceph_decode_64(p);
@@ -103,7 +104,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
state_seq = ceph_decode_64(p);
ceph_decode_copy(p, &addr, sizeof(addr));
ceph_decode_addr(&addr);
- *p += sizeof(struct ceph_timespec);
+ ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
*p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad);
*p += namelen;
@@ -122,6 +123,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_info[mds].global_id = global_id;
m->m_info[mds].state = state;
m->m_info[mds].addr = addr;
+ m->m_info[mds].laggy =
+ (laggy_since.tv_sec != 0 ||
+ laggy_since.tv_nsec != 0);
m->m_info[mds].num_export_targets = num_export_targets;
if (num_export_targets) {
m->m_info[mds].export_targets =
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h
index eacc131aa5c..4c5cb0880bb 100644
--- a/fs/ceph/mdsmap.h
+++ b/fs/ceph/mdsmap.h
@@ -13,6 +13,7 @@ struct ceph_mds_info {
struct ceph_entity_addr addr;
s32 state;
int num_export_targets;
+ bool laggy;
u32 *export_targets;
};
@@ -47,6 +48,13 @@ static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
return m->m_info[w].state;
}
+static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
+{
+ if (w >= 0 && w < m->m_max_mds)
+ return m->m_info[w].laggy;
+ return false;
+}
+
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 64b8b1f7863..2502d76fcec 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con);
* nicely render a sockaddr as a string.
*/
#define MAX_ADDR_STR 20
-static char addr_str[MAX_ADDR_STR][40];
+#define MAX_ADDR_STR_LEN 60
+static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
static DEFINE_SPINLOCK(addr_str_lock);
static int last_addr_str;
@@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss)
int i;
char *s;
struct sockaddr_in *in4 = (void *)ss;
- unsigned char *quad = (void *)&in4->sin_addr.s_addr;
struct sockaddr_in6 *in6 = (void *)ss;
spin_lock(&addr_str_lock);
@@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss)
switch (ss->ss_family) {
case AF_INET:
- sprintf(s, "%u.%u.%u.%u:%u",
- (unsigned int)quad[0],
- (unsigned int)quad[1],
- (unsigned int)quad[2],
- (unsigned int)quad[3],
- (unsigned int)ntohs(in4->sin_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
+ (unsigned int)ntohs(in4->sin_port));
break;
case AF_INET6:
- sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u",
- in6->sin6_addr.s6_addr16[0],
- in6->sin6_addr.s6_addr16[1],
- in6->sin6_addr.s6_addr16[2],
- in6->sin6_addr.s6_addr16[3],
- in6->sin6_addr.s6_addr16[4],
- in6->sin6_addr.s6_addr16[5],
- in6->sin6_addr.s6_addr16[6],
- in6->sin6_addr.s6_addr16[7],
- (unsigned int)ntohs(in6->sin6_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
+ (unsigned int)ntohs(in6->sin6_port));
break;
default:
@@ -120,7 +108,7 @@ void ceph_msgr_exit(void)
destroy_workqueue(ceph_msgr_wq);
}
-void ceph_msgr_flush()
+void ceph_msgr_flush(void)
{
flush_workqueue(ceph_msgr_wq);
}
@@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock,
*/
static struct socket *ceph_tcp_connect(struct ceph_connection *con)
{
- struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr;
+ struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
int ret;
BUG_ON(con->sock);
- ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
if (ret)
return ERR_PTR(ret);
con->sock = sock;
@@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
- ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK);
+ ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+ O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
pr_addr(&con->peer_addr.in_addr),
@@ -657,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
- con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT;
+ con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end,
struct sockaddr_in *in4 = (void *)ss;
struct sockaddr_in6 *in6 = (void *)ss;
int port;
+ char delim = ',';
+
+ if (*p == '[') {
+ delim = ']';
+ p++;
+ }
memset(ss, 0, sizeof(*ss));
if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
- ',', &ipend)) {
+ delim, &ipend))
ss->ss_family = AF_INET;
- } else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
- ',', &ipend)) {
+ else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
+ delim, &ipend))
ss->ss_family = AF_INET6;
- } else {
+ else
goto bad;
- }
p = ipend;
+ if (delim == ']') {
+ if (*p != ']') {
+ dout("missing matching ']'\n");
+ goto bad;
+ }
+ p++;
+ }
+
/* port? */
if (p < end && *p == ':') {
port = 0;
@@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end,
return 0;
bad:
- pr_err("parse_ips bad ip '%s'\n", c);
+ pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
return -EINVAL;
}
@@ -1078,11 +1081,11 @@ static int process_banner(struct ceph_connection *con)
sizeof(con->peer_addr)) != 0 &&
!(addr_is_blank(&con->actual_peer_addr.in_addr) &&
con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
- pr_warning("wrong peer, want %s/%lld, got %s/%lld\n",
+ pr_warning("wrong peer, want %s/%d, got %s/%d\n",
pr_addr(&con->peer_addr.in_addr),
- le64_to_cpu(con->peer_addr.nonce),
+ (int)le32_to_cpu(con->peer_addr.nonce),
pr_addr(&con->actual_peer_addr.in_addr),
- le64_to_cpu(con->actual_peer_addr.nonce));
+ (int)le32_to_cpu(con->actual_peer_addr.nonce));
con->error_msg = "wrong peer at address";
return -1;
}
@@ -1120,8 +1123,8 @@ static void fail_protocol(struct ceph_connection *con)
static int process_connect(struct ceph_connection *con)
{
- u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT;
- u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT;
+ u64 sup_feat = CEPH_FEATURE_SUPPORTED;
+ u64 req_feat = CEPH_FEATURE_REQUIRED;
u64 server_feat = le64_to_cpu(con->in_reply.features);
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
@@ -1299,8 +1302,8 @@ static void process_ack(struct ceph_connection *con)
static int read_partial_message_section(struct ceph_connection *con,
- struct kvec *section, unsigned int sec_len,
- u32 *crc)
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
{
int left;
int ret;
@@ -1396,10 +1399,12 @@ static int read_partial_message(struct ceph_connection *con)
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
+ skip = 0;
con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
+ BUG_ON(con->in_msg);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1429,7 +1434,8 @@ static int read_partial_message(struct ceph_connection *con)
/* middle */
if (m->middle) {
- ret = read_partial_message_section(con, &m->middle->vec, middle_len,
+ ret = read_partial_message_section(con, &m->middle->vec,
+ middle_len,
&con->in_middle_crc);
if (ret <= 0)
return ret;
@@ -1915,7 +1921,7 @@ out:
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
- */
+ */
if (con->auth_retry && con->ops->invalidate_authorizer) {
dout("calling invalidate_authorizer()\n");
con->ops->invalidate_authorizer(con);
@@ -2013,20 +2019,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
{
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p\n", con, msg);
+ dout("con_revoke %p msg %p - was on queue\n", con, msg);
list_del_init(&msg->list_head);
ceph_msg_put(msg);
msg->hdr.seq = 0;
- if (con->out_msg == msg) {
- ceph_msg_put(con->out_msg);
- con->out_msg = NULL;
- }
+ }
+ if (con->out_msg == msg) {
+ dout("con_revoke %p msg %p - was sending\n", con, msg);
+ con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- } else {
- dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg);
+ ceph_msg_put(msg);
+ msg->hdr.seq = 0;
}
mutex_unlock(&con->mutex);
}
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 21c62e9b7d1..b2a5a3e4a67 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -345,11 +345,11 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
out:
mutex_unlock(&monc->mutex);
- wake_up(&client->auth_wq);
+ wake_up_all(&client->auth_wq);
}
/*
- * statfs
+ * generic requests (e.g., statfs, poolop)
*/
static struct ceph_mon_generic_request *__lookup_generic_req(
struct ceph_mon_client *monc, u64 tid)
@@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref)
ceph_msg_put(req->reply);
if (req->request)
ceph_msg_put(req->request);
+
+ kfree(req);
}
static void put_generic_request(struct ceph_mon_generic_request *req)
@@ -440,6 +442,35 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
return m;
}
+static int do_generic_request(struct ceph_mon_client *monc,
+ struct ceph_mon_generic_request *req)
+{
+ int err;
+
+ /* register request */
+ mutex_lock(&monc->mutex);
+ req->tid = ++monc->last_tid;
+ req->request->hdr.tid = cpu_to_le64(req->tid);
+ __insert_generic_request(monc, req);
+ monc->num_generic_requests++;
+ ceph_con_send(monc->con, ceph_msg_get(req->request));
+ mutex_unlock(&monc->mutex);
+
+ err = wait_for_completion_interruptible(&req->completion);
+
+ mutex_lock(&monc->mutex);
+ rb_erase(&req->node, &monc->generic_request_tree);
+ monc->num_generic_requests--;
+ mutex_unlock(&monc->mutex);
+
+ if (!err)
+ err = req->result;
+ return err;
+}
+
+/*
+ * statfs
+ */
static void handle_statfs_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
@@ -460,13 +491,13 @@ static void handle_statfs_reply(struct ceph_mon_client *monc,
}
mutex_unlock(&monc->mutex);
if (req) {
- complete(&req->completion);
+ complete_all(&req->completion);
put_generic_request(req);
}
return;
bad:
- pr_err("corrupt generic reply, no tid\n");
+ pr_err("corrupt generic reply, tid %llu\n", tid);
ceph_msg_dump(msg);
}
@@ -485,6 +516,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
kref_init(&req->kref);
req->buf = buf;
+ req->buf_len = sizeof(*buf);
init_completion(&req->completion);
err = -ENOMEM;
@@ -502,33 +534,134 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
h->monhdr.session_mon_tid = 0;
h->fsid = monc->monmap->fsid;
- /* register request */
- mutex_lock(&monc->mutex);
- req->tid = ++monc->last_tid;
- req->request->hdr.tid = cpu_to_le64(req->tid);
- __insert_generic_request(monc, req);
- monc->num_generic_requests++;
- mutex_unlock(&monc->mutex);
+ err = do_generic_request(monc, req);
- /* send request and wait */
- ceph_con_send(monc->con, ceph_msg_get(req->request));
- err = wait_for_completion_interruptible(&req->completion);
+out:
+ kref_put(&req->kref, release_generic_request);
+ return err;
+}
+
+/*
+ * pool ops
+ */
+static int get_poolop_reply_buf(const char *src, size_t src_len,
+ char *dst, size_t dst_len)
+{
+ u32 buf_len;
+
+ if (src_len != sizeof(u32) + dst_len)
+ return -EINVAL;
+
+ buf_len = le32_to_cpu(*(u32 *)src);
+ if (buf_len != dst_len)
+ return -EINVAL;
+
+ memcpy(dst, src + sizeof(u32), dst_len);
+ return 0;
+}
+
+static void handle_poolop_reply(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_poolop_reply *reply = msg->front.iov_base;
+ u64 tid = le64_to_cpu(msg->hdr.tid);
+
+ if (msg->front.iov_len < sizeof(*reply))
+ goto bad;
+ dout("handle_poolop_reply %p tid %llu\n", msg, tid);
mutex_lock(&monc->mutex);
- rb_erase(&req->node, &monc->generic_request_tree);
- monc->num_generic_requests--;
+ req = __lookup_generic_req(monc, tid);
+ if (req) {
+ if (req->buf_len &&
+ get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply),
+ msg->front.iov_len - sizeof(*reply),
+ req->buf, req->buf_len) < 0) {
+ mutex_unlock(&monc->mutex);
+ goto bad;
+ }
+ req->result = le32_to_cpu(reply->reply_code);
+ get_generic_request(req);
+ }
mutex_unlock(&monc->mutex);
+ if (req) {
+ complete(&req->completion);
+ put_generic_request(req);
+ }
+ return;
- if (!err)
- err = req->result;
+bad:
+ pr_err("corrupt generic reply, tid %llu\n", tid);
+ ceph_msg_dump(msg);
+}
+
+/*
+ * Do a synchronous pool op.
+ */
+int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op,
+ u32 pool, u64 snapid,
+ char *buf, int len)
+{
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_poolop *h;
+ int err;
+
+ req = kzalloc(sizeof(*req), GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ kref_init(&req->kref);
+ req->buf = buf;
+ req->buf_len = len;
+ init_completion(&req->completion);
+
+ err = -ENOMEM;
+ req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS);
+ if (!req->request)
+ goto out;
+ req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS);
+ if (!req->reply)
+ goto out;
+
+ /* fill out request */
+ req->request->hdr.version = cpu_to_le16(2);
+ h = req->request->front.iov_base;
+ h->monhdr.have_version = 0;
+ h->monhdr.session_mon = cpu_to_le16(-1);
+ h->monhdr.session_mon_tid = 0;
+ h->fsid = monc->monmap->fsid;
+ h->pool = cpu_to_le32(pool);
+ h->op = cpu_to_le32(op);
+ h->auid = 0;
+ h->snapid = cpu_to_le64(snapid);
+ h->name_len = 0;
+
+ err = do_generic_request(monc, req);
out:
kref_put(&req->kref, release_generic_request);
return err;
}
+int ceph_monc_create_snapid(struct ceph_mon_client *monc,
+ u32 pool, u64 *snapid)
+{
+ return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
+ pool, 0, (char *)snapid, sizeof(*snapid));
+
+}
+
+int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
+ u32 pool, u64 snapid)
+{
+ return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
+ pool, snapid, 0, 0);
+
+}
+
/*
- * Resend pending statfs requests.
+ * Resend pending generic requests.
*/
static void __resend_generic_request(struct ceph_mon_client *monc)
{
@@ -716,14 +849,15 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
monc->m_auth->front_max);
if (ret < 0) {
monc->client->auth_err = ret;
- wake_up(&monc->client->auth_wq);
+ wake_up_all(&monc->client->auth_wq);
} else if (ret > 0) {
__send_prepared_auth_request(monc, ret);
} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num = monc->auth->global_id;
+ monc->client->msgr->inst.name.num =
+ cpu_to_le64(monc->auth->global_id);
__send_subscribe(monc);
__resend_generic_request(monc);
@@ -780,6 +914,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
handle_statfs_reply(monc, msg);
break;
+ case CEPH_MSG_POOLOP_REPLY:
+ handle_poolop_reply(monc, msg);
+ break;
+
case CEPH_MSG_MON_MAP:
ceph_monc_handle_map(monc, msg);
break;
@@ -817,6 +955,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_MON_SUBSCRIBE_ACK:
m = ceph_msg_get(monc->m_subscribe_ack);
break;
+ case CEPH_MSG_POOLOP_REPLY:
case CEPH_MSG_STATFS_REPLY:
return get_generic_reply(con, hdr, skip);
case CEPH_MSG_AUTH_REPLY:
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h
index 174d794321d..8e396f2c096 100644
--- a/fs/ceph/mon_client.h
+++ b/fs/ceph/mon_client.h
@@ -50,6 +50,7 @@ struct ceph_mon_generic_request {
struct rb_node node;
int result;
void *buf;
+ int buf_len;
struct completion completion;
struct ceph_msg *request; /* original request */
struct ceph_msg *reply; /* and reply */
@@ -111,6 +112,10 @@ extern int ceph_monc_open_session(struct ceph_mon_client *monc);
extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
+extern int ceph_monc_create_snapid(struct ceph_mon_client *monc,
+ u32 pool, u64 *snapid);
+extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
+ u32 pool, u64 snapid);
#endif
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h
index 892a0298dfd..680d3d648ca 100644
--- a/fs/ceph/msgr.h
+++ b/fs/ceph/msgr.h
@@ -1,5 +1,5 @@
-#ifndef __MSGR_H
-#define __MSGR_H
+#ifndef CEPH_MSGR_H
+#define CEPH_MSGR_H
/*
* Data types for message passing layer used by Ceph.
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index d25b4add85b..bed6391e52c 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -862,12 +862,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
if (req->r_callback)
req->r_callback(req, msg);
else
- complete(&req->r_completion);
+ complete_all(&req->r_completion);
if (flags & CEPH_OSD_FLAG_ONDISK) {
if (req->r_safe_callback)
req->r_safe_callback(req, msg);
- complete(&req->r_safe_completion); /* fsync waiter */
+ complete_all(&req->r_safe_completion); /* fsync waiter */
}
done:
@@ -1083,7 +1083,7 @@ done:
if (newmap)
kick_requests(osdc, NULL);
up_read(&osdc->map_sem);
- wake_up(&osdc->client->auth_wq);
+ wake_up_all(&osdc->client->auth_wq);
return;
bad:
@@ -1276,8 +1276,6 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
/* it may be a short read due to an object boundary */
req->r_pages = pages;
- num_pages = calc_pages_for(off, *plen);
- req->r_num_pages = num_pages;
dout("readpages final extent is %llu~%llu (%d pages)\n",
off, *plen, req->r_num_pages);
@@ -1319,7 +1317,6 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
/* it may be a short write due to an object boundary */
req->r_pages = pages;
- req->r_num_pages = calc_pages_for(off, len);
dout("writepages %llu~%llu (%d pages)\n", off, len,
req->r_num_pages);
@@ -1344,7 +1341,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
int type = le16_to_cpu(msg->hdr.type);
if (!osd)
- return;
+ goto out;
osdc = osd->o_osdc;
switch (type) {
@@ -1359,6 +1356,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
+out:
ceph_msg_put(msg);
}
@@ -1475,8 +1473,8 @@ static void put_osd_con(struct ceph_connection *con)
* authentication
*/
static int get_authorizer(struct ceph_connection *con,
- void **buf, int *len, int *proto,
- void **reply_buf, int *reply_len, int force_new)
+ void **buf, int *len, int *proto,
+ void **reply_buf, int *reply_len, int force_new)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
@@ -1496,7 +1494,7 @@ static int get_authorizer(struct ceph_connection *con,
&o->o_authorizer_reply_buf,
&o->o_authorizer_reply_buf_len);
if (ret)
- return ret;
+ return ret;
}
*proto = ac->protocol;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index ddc656fb5c0..e31f118f139 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -424,12 +424,30 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
kfree(pi);
}
-void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
+static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
{
+ unsigned n, m;
+
ceph_decode_copy(p, &pi->v, sizeof(pi->v));
calc_pg_masks(pi);
- *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
+
+ /* num_snaps * snap_info_t */
+ n = le32_to_cpu(pi->v.num_snaps);
+ while (n--) {
+ ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) +
+ sizeof(struct ceph_timespec), bad);
+ *p += sizeof(u64) + /* key */
+ 1 + sizeof(u64) + /* u8, snapid */
+ sizeof(struct ceph_timespec);
+ m = ceph_decode_32(p); /* snap name */
+ *p += m;
+ }
+
*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
+ return 0;
+
+bad:
+ return -EINVAL;
}
static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
@@ -568,9 +586,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
if (ev > CEPH_PG_POOL_VERSION) {
pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
ev, CEPH_PG_POOL_VERSION);
+ kfree(pi);
goto bad;
}
- __decode_pool(p, pi);
+ err = __decode_pool(p, end, pi);
+ if (err < 0)
+ goto bad;
__insert_pg_pool(&map->pg_pools, pi);
}
@@ -707,6 +728,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
newcrush = crush_decode(*p, min(*p+len, end));
if (IS_ERR(newcrush))
return ERR_CAST(newcrush);
+ *p += len;
}
/* new flags? */
@@ -758,7 +780,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
pi->id = pool;
__insert_pg_pool(&map->pg_pools, pi);
}
- __decode_pool(p, pi);
+ err = __decode_pool(p, end, pi);
+ if (err < 0)
+ goto bad;
}
if (version >= 5 && __decode_pool_names(p, end, map) < 0)
goto bad;
@@ -829,12 +853,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
/* remove any? */
while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
node)->pgid, pgid) <= 0) {
- struct rb_node *cur = rbp;
+ struct ceph_pg_mapping *cur =
+ rb_entry(rbp, struct ceph_pg_mapping, node);
+
rbp = rb_next(rbp);
- dout(" removed pg_temp %llx\n",
- *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
- node)->pgid);
- rb_erase(cur, &map->pg_temp);
+ dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
+ rb_erase(&cur->node, &map->pg_temp);
+ kfree(cur);
}
if (pglen) {
@@ -850,19 +875,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
for (j = 0; j < pglen; j++)
pg->osds[j] = ceph_decode_32(p);
err = __insert_pg_mapping(pg, &map->pg_temp);
- if (err)
+ if (err) {
+ kfree(pg);
goto bad;
+ }
dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
pglen);
}
}
while (rbp) {
- struct rb_node *cur = rbp;
+ struct ceph_pg_mapping *cur =
+ rb_entry(rbp, struct ceph_pg_mapping, node);
+
rbp = rb_next(rbp);
- dout(" removed pg_temp %llx\n",
- *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
- node)->pgid);
- rb_erase(cur, &map->pg_temp);
+ dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
+ rb_erase(&cur->node, &map->pg_temp);
+ kfree(cur);
}
/* ignore the rest */
@@ -1020,8 +1048,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
pool->v.type, pool->v.size);
if (ruleno < 0) {
- pr_err("no crush rule pool %d type %d size %d\n",
- poolid, pool->v.type, pool->v.size);
+ pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
+ poolid, pool->v.crush_ruleset, pool->v.type,
+ pool->v.size);
return NULL;
}
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index 8fcc023056c..6d5247f2e81 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -1,5 +1,5 @@
-#ifndef __RADOS_H
-#define __RADOS_H
+#ifndef CEPH_RADOS_H
+#define CEPH_RADOS_H
/*
* Data types for the Ceph distributed object storage layer RADOS
@@ -203,6 +203,7 @@ enum {
CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
+ CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
/** attrs **/
/* read */
@@ -272,6 +273,10 @@ static inline int ceph_osd_op_mode_modify(int op)
return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR;
}
+/*
+ * note that the following tmap stuff is also defined in the ceph librados.h
+ * any modification here needs to be updated there
+ */
#define CEPH_OSD_TMAP_HDR 'h'
#define CEPH_OSD_TMAP_SET 's'
#define CEPH_OSD_TMAP_RM 'r'
@@ -297,6 +302,7 @@ enum {
CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */
CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */
CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */
+ CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */
};
enum {
@@ -350,6 +356,9 @@ struct ceph_osd_op {
struct {
__le64 cookie, count;
} __attribute__ ((packed)) pgls;
+ struct {
+ __le64 snapid;
+ } __attribute__ ((packed)) snap;
};
__le32 payload_len;
} __attribute__ ((packed));
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e0bee240b9..9922628532b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -2,6 +2,7 @@
#include "ceph_debug.h"
#include <linux/backing-dev.h>
+#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/inet.h>
#include <linux/in6.h>
@@ -89,7 +90,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
- buf->f_namelen = PATH_MAX;
+ buf->f_namelen = NAME_MAX;
buf->f_frsize = PAGE_CACHE_SIZE;
/* leave fsid little-endian, regardless of host endianness */
@@ -101,12 +102,21 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
}
-static int ceph_syncfs(struct super_block *sb, int wait)
+static int ceph_sync_fs(struct super_block *sb, int wait)
{
- dout("sync_fs %d\n", wait);
+ struct ceph_client *client = ceph_sb_to_client(sb);
+
+ if (!wait) {
+ dout("sync_fs (non-blocking)\n");
+ ceph_flush_dirty_caps(&client->mdsc);
+ dout("sync_fs (non-blocking) done\n");
+ return 0;
+ }
+
+ dout("sync_fs (blocking)\n");
ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc);
ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc);
- dout("sync_fs %d done\n", wait);
+ dout("sync_fs (blocking) done\n");
return 0;
}
@@ -150,9 +160,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
struct ceph_mount_args *args = client->mount_args;
if (args->flags & CEPH_OPT_FSID)
- seq_printf(m, ",fsidmajor=%llu,fsidminor%llu",
- le64_to_cpu(*(__le64 *)&args->fsid.fsid[0]),
- le64_to_cpu(*(__le64 *)&args->fsid.fsid[8]));
+ seq_printf(m, ",fsid=%pU", &args->fsid);
if (args->flags & CEPH_OPT_NOSHARE)
seq_puts(m, ",noshare");
if (args->flags & CEPH_OPT_DIRSTAT)
@@ -279,7 +287,7 @@ static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
.write_inode = ceph_write_inode,
- .sync_fs = ceph_syncfs,
+ .sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.show_options = ceph_show_options,
.statfs = ceph_statfs,
@@ -322,9 +330,6 @@ const char *ceph_msg_type_name(int type)
* mount options
*/
enum {
- Opt_fsidmajor,
- Opt_fsidminor,
- Opt_monport,
Opt_wsize,
Opt_rsize,
Opt_osdtimeout,
@@ -339,6 +344,7 @@ enum {
Opt_congestion_kb,
Opt_last_int,
/* int args above */
+ Opt_fsid,
Opt_snapdirname,
Opt_name,
Opt_secret,
@@ -355,9 +361,6 @@ enum {
};
static match_table_t arg_tokens = {
- {Opt_fsidmajor, "fsidmajor=%ld"},
- {Opt_fsidminor, "fsidminor=%ld"},
- {Opt_monport, "monport=%d"},
{Opt_wsize, "wsize=%d"},
{Opt_rsize, "rsize=%d"},
{Opt_osdtimeout, "osdtimeout=%d"},
@@ -371,6 +374,7 @@ static match_table_t arg_tokens = {
{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
{Opt_congestion_kb, "write_congestion_kb=%d"},
/* int args above */
+ {Opt_fsid, "fsid=%s"},
{Opt_snapdirname, "snapdirname=%s"},
{Opt_name, "name=%s"},
{Opt_secret, "secret=%s"},
@@ -386,6 +390,36 @@ static match_table_t arg_tokens = {
{-1, NULL}
};
+static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+{
+ int i = 0;
+ char tmp[3];
+ int err = -EINVAL;
+ int d;
+
+ dout("parse_fsid '%s'\n", str);
+ tmp[2] = 0;
+ while (*str && i < 16) {
+ if (ispunct(*str)) {
+ str++;
+ continue;
+ }
+ if (!isxdigit(str[0]) || !isxdigit(str[1]))
+ break;
+ tmp[0] = str[0];
+ tmp[1] = str[1];
+ if (sscanf(tmp, "%x", &d) < 1)
+ break;
+ fsid->fsid[i] = d & 0xff;
+ i++;
+ str += 2;
+ }
+
+ if (i == 16)
+ err = 0;
+ dout("parse_fsid ret %d got fsid %pU", err, fsid);
+ return err;
+}
static struct ceph_mount_args *parse_mount_args(int flags, char *options,
const char *dev_name,
@@ -469,12 +503,6 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
dout("got token %d\n", token);
}
switch (token) {
- case Opt_fsidmajor:
- *(__le64 *)&args->fsid.fsid[0] = cpu_to_le64(intval);
- break;
- case Opt_fsidminor:
- *(__le64 *)&args->fsid.fsid[8] = cpu_to_le64(intval);
- break;
case Opt_ip:
err = ceph_parse_ips(argstr[0].from,
argstr[0].to,
@@ -485,6 +513,11 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
args->flags |= CEPH_OPT_MYIP;
break;
+ case Opt_fsid:
+ err = parse_fsid(argstr[0].from, &args->fsid);
+ if (err == 0)
+ args->flags |= CEPH_OPT_FSID;
+ break;
case Opt_snapdirname:
kfree(args->snapdir_name);
args->snapdir_name = kstrndup(argstr[0].from,
@@ -515,6 +548,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options,
case Opt_osdkeepalivetimeout:
args->osd_keepalive_timeout = intval;
break;
+ case Opt_osd_idle_ttl:
+ args->osd_idle_ttl = intval;
+ break;
case Opt_mount_timeout:
args->mount_timeout = intval;
break;
@@ -630,7 +666,6 @@ static struct ceph_client *ceph_create_client(struct ceph_mount_args *args)
/* caps */
client->min_caps = args->max_readdir;
- ceph_adjust_min_caps(client->min_caps);
/* subsystems */
err = ceph_monc_init(&client->monc, client);
@@ -680,8 +715,6 @@ static void ceph_destroy_client(struct ceph_client *client)
ceph_monc_stop(&client->monc);
- ceph_adjust_min_caps(-client->min_caps);
-
ceph_debugfs_client_cleanup(client);
destroy_workqueue(client->wb_wq);
destroy_workqueue(client->pg_inv_wq);
@@ -706,13 +739,13 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
{
if (client->have_fsid) {
if (ceph_fsid_compare(&client->fsid, fsid)) {
- pr_err("bad fsid, had " FSID_FORMAT " got " FSID_FORMAT,
- PR_FSID(&client->fsid), PR_FSID(fsid));
+ pr_err("bad fsid, had %pU got %pU",
+ &client->fsid, fsid);
return -1;
}
} else {
- pr_info("client%lld fsid " FSID_FORMAT "\n",
- client->monc.auth->global_id, PR_FSID(fsid));
+ pr_info("client%lld fsid %pU\n", client->monc.auth->global_id,
+ fsid);
memcpy(&client->fsid, fsid, sizeof(*fsid));
ceph_debugfs_client_init(client);
client->have_fsid = true;
@@ -926,7 +959,7 @@ static int ceph_compare_super(struct super_block *sb, void *data)
/*
* construct our own bdi so we can control readahead, etc.
*/
-static atomic_long_t bdi_seq = ATOMIC_INIT(0);
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
{
@@ -1043,8 +1076,6 @@ static int __init init_ceph(void)
if (ret)
goto out_msgr;
- ceph_caps_init();
-
ret = register_filesystem(&ceph_fs_type);
if (ret)
goto out_icache;
@@ -1069,7 +1100,6 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
- ceph_caps_finalize();
destroy_caches();
ceph_msgr_exit();
ceph_debugfs_cleanup();
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 10a4a406e88..2482d696f0d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -31,6 +31,12 @@
#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
/*
+ * Supported features
+ */
+#define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK
+#define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR
+
+/*
* mount options
*/
#define CEPH_OPT_FSID (1<<0)
@@ -560,11 +566,13 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
/* what the mds thinks we want */
extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
-extern void ceph_caps_init(void);
-extern void ceph_caps_finalize(void);
-extern void ceph_adjust_min_caps(int delta);
-extern int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need);
-extern int ceph_unreserve_caps(struct ceph_cap_reservation *ctx);
+extern void ceph_caps_init(struct ceph_mds_client *mdsc);
+extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
+extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
+extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx, int need);
+extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx);
extern void ceph_reservation_status(struct ceph_client *client,
int *total, int *avail, int *used,
int *reserved, int *min);
@@ -738,13 +746,6 @@ extern struct kmem_cache *ceph_file_cachep;
extern const char *ceph_msg_type_name(int type);
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
-#define FSID_FORMAT "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" \
- "%02x%02x%02x%02x%02x%02x"
-#define PR_FSID(f) (f)->fsid[0], (f)->fsid[1], (f)->fsid[2], (f)->fsid[3], \
- (f)->fsid[4], (f)->fsid[5], (f)->fsid[6], (f)->fsid[7], \
- (f)->fsid[8], (f)->fsid[9], (f)->fsid[10], (f)->fsid[11], \
- (f)->fsid[12], (f)->fsid[13], (f)->fsid[14], (f)->fsid[15]
-
/* inode.c */
extern const struct inode_operations ceph_file_iops;
@@ -806,13 +807,16 @@ static inline void ceph_remove_cap(struct ceph_cap *cap)
__ceph_remove_cap(cap);
spin_unlock(&inode->i_lock);
}
-extern void ceph_put_cap(struct ceph_cap *cap);
+extern void ceph_put_cap(struct ceph_mds_client *mdsc,
+ struct ceph_cap *cap);
extern void ceph_queue_caps_release(struct inode *inode);
extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
extern int ceph_fsync(struct file *file, int datasync);
extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
+extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
+ int mds);
extern int ceph_get_cap_mds(struct inode *inode);
extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
@@ -857,7 +861,7 @@ extern void ceph_release_page_vector(struct page **pages, int num_pages);
/* dir.c */
extern const struct file_operations ceph_dir_fops;
extern const struct inode_operations ceph_dir_iops;
-extern struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
+extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
ceph_snapdir_dentry_ops;
extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
@@ -888,6 +892,14 @@ extern void ceph_debugfs_cleanup(void);
extern int ceph_debugfs_client_init(struct ceph_client *client);
extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
+/* locks.c */
+extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
+extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
+extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
+extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p,
+ int p_locks, int f_locks);
+extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
+
static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
{
if (dentry && dentry->d_parent)
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 68aeebc6968..097a2654c00 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -337,6 +337,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
}
static int __build_xattrs(struct inode *inode)
+ __releases(inode->i_lock)
+ __acquires(inode->i_lock)
{
u32 namelen;
u32 numattr = 0;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index d6db933df2b..f80a4f25123 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -20,6 +20,7 @@
#include <linux/cdev.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
+#include <linux/tty.h>
#include "internal.h"
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 80f35259680..917b7d449bb 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,7 +2,6 @@ config CIFS
tristate "CIFS support (advanced network filesystem, SMBFS successor)"
depends on INET
select NLS
- select SLOW_WORK
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
@@ -71,14 +70,14 @@ config CIFS_WEAK_PW_HASH
If unsure, say N.
config CIFS_UPCALL
- bool "Kerberos/SPNEGO advanced session setup"
- depends on CIFS && KEYS
- help
- Enables an upcall mechanism for CIFS which accesses
- userspace helper utilities to provide SPNEGO packaged (RFC 4178)
- Kerberos tickets which are needed to mount to certain secure servers
- (for which more secure Kerberos authentication is required). If
- unsure, say N.
+ bool "Kerberos/SPNEGO advanced session setup"
+ depends on CIFS && KEYS
+ select DNS_RESOLVER
+ help
+ Enables an upcall mechanism for CIFS which accesses userspace helper
+ utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets
+ which are needed to mount to certain secure servers (for which more
+ secure Kerberos authentication is required). If unsure, say N.
config CIFS_XATTR
bool "CIFS extended attributes"
@@ -122,6 +121,7 @@ config CIFS_DEBUG2
config CIFS_DFS_UPCALL
bool "DFS feature support"
depends on CIFS && KEYS
+ select DNS_RESOLVER
help
Distributed File System (DFS) support is used to access shares
transparently in an enterprise name space, even if the share
@@ -131,6 +131,15 @@ config CIFS_DFS_UPCALL
IP addresses) which is needed for implicit mounts of DFS junction
points. If unsure, say N.
+config CIFS_FSCACHE
+ bool "Provide CIFS client caching support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
+ help
+ Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
+ to be cached locally on disk through the general filesystem cache
+ manager. If unsure, say N.
+
config CIFS_EXPERIMENTAL
bool "CIFS Experimental Features (EXPERIMENTAL)"
depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 9948c0030e8..adefa60a9bd 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -11,3 +11,5 @@ cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
+
+cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
diff --git a/fs/cifs/README b/fs/cifs/README
index a727b7cb075..7099a526f77 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -301,6 +301,16 @@ A partial list of the supported mount options follows:
gid Set the default gid for inodes (similar to above).
file_mode If CIFS Unix extensions are not supported by the server
this overrides the default mode for file inodes.
+ fsc Enable local disk caching using FS-Cache (off by default). This
+ option could be useful to improve performance on a slow link,
+ heavily loaded server and/or network where reading from the
+ disk is faster than reading from the server (over the network).
+ This could also impact scalability positively as the
+ number of calls to the server are reduced. However, local
+ caching is not suitable for all workloads for e.g. read-once
+ type workloads. So, you need to consider carefully your
+ workload/scenario before using this option. Currently, local
+ disk caching is functional for CIFS files opened as read-only.
dir_mode If CIFS Unix extensions are not supported by the server
this overrides the default mode for directory inodes.
port attempt to contact the server on this tcp port, before
@@ -568,8 +578,9 @@ module can be displayed via modinfo.
Misc /proc/fs/cifs Flags and Debug Info
=======================================
Informational pseudo-files:
-DebugData Displays information about active CIFS sessions
- and shares, as well as the cifs.ko version.
+DebugData Displays information about active CIFS sessions and
+ shares, features enabled as well as the cifs.ko
+ version.
Stats Lists summary resource usage information as well as per
share statistics, if CONFIG_CIFS_STATS in enabled
in the kernel configuration.
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
new file mode 100644
index 00000000000..224d7bbd1fc
--- /dev/null
+++ b/fs/cifs/cache.c
@@ -0,0 +1,331 @@
+/*
+ * fs/cifs/cache.c - CIFS filesystem cache index structure definitions
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "fscache.h"
+#include "cifs_debug.h"
+
+/*
+ * CIFS filesystem definition for FS-Cache
+ */
+struct fscache_netfs cifs_fscache_netfs = {
+ .name = "cifs",
+ .version = 0,
+};
+
+/*
+ * Register CIFS for caching with FS-Cache
+ */
+int cifs_fscache_register(void)
+{
+ return fscache_register_netfs(&cifs_fscache_netfs);
+}
+
+/*
+ * Unregister CIFS for caching
+ */
+void cifs_fscache_unregister(void)
+{
+ fscache_unregister_netfs(&cifs_fscache_netfs);
+}
+
+/*
+ * Key layout of CIFS server cache index object
+ */
+struct cifs_server_key {
+ uint16_t family; /* address family */
+ uint16_t port; /* IP port */
+ union {
+ struct in_addr ipv4_addr;
+ struct in6_addr ipv6_addr;
+ } addr[0];
+};
+
+/*
+ * Server object keyed by {IPaddress,port,family} tuple
+ */
+static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct TCP_Server_Info *server = cookie_netfs_data;
+ const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr;
+ struct cifs_server_key *key = buffer;
+ uint16_t key_len = sizeof(struct cifs_server_key);
+
+ memset(key, 0, key_len);
+
+ /*
+ * Should not be a problem as sin_family/sin6_family overlays
+ * sa_family field
+ */
+ switch (sa->sa_family) {
+ case AF_INET:
+ key->family = server->addr.sockAddr.sin_family;
+ key->port = server->addr.sockAddr.sin_port;
+ key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr;
+ key_len += sizeof(key->addr[0].ipv4_addr);
+ break;
+
+ case AF_INET6:
+ key->family = server->addr.sockAddr6.sin6_family;
+ key->port = server->addr.sockAddr6.sin6_port;
+ key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr;
+ key_len += sizeof(key->addr[0].ipv6_addr);
+ break;
+
+ default:
+ cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family);
+ key_len = 0;
+ break;
+ }
+
+ return key_len;
+}
+
+/*
+ * Server object for FS-Cache
+ */
+const struct fscache_cookie_def cifs_fscache_server_index_def = {
+ .name = "CIFS.server",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = cifs_server_get_key,
+};
+
+/*
+ * Auxiliary data attached to CIFS superblock within the cache
+ */
+struct cifs_fscache_super_auxdata {
+ u64 resource_id; /* unique server resource id */
+};
+
+static char *extract_sharename(const char *treename)
+{
+ const char *src;
+ char *delim, *dst;
+ int len;
+
+ /* skip double chars at the beginning */
+ src = treename + 2;
+
+ /* share name is always preceded by '\\' now */
+ delim = strchr(src, '\\');
+ if (!delim)
+ return ERR_PTR(-EINVAL);
+ delim++;
+ len = strlen(delim);
+
+ /* caller has to free the memory */
+ dst = kstrndup(delim, len, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ return dst;
+}
+
+/*
+ * Superblock object currently keyed by share name
+ */
+static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+ char *sharename;
+ uint16_t len;
+
+ sharename = extract_sharename(tcon->treeName);
+ if (IS_ERR(sharename)) {
+ cFYI(1, "CIFS: couldn't extract sharename\n");
+ sharename = NULL;
+ return 0;
+ }
+
+ len = strlen(sharename);
+ if (len > maxbuf)
+ return 0;
+
+ memcpy(buffer, sharename, len);
+
+ kfree(sharename);
+
+ return len;
+}
+
+static uint16_t
+cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ struct cifs_fscache_super_auxdata auxdata;
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.resource_id = tcon->resource_id;
+
+ if (maxbuf > sizeof(auxdata))
+ maxbuf = sizeof(auxdata);
+
+ memcpy(buffer, &auxdata, maxbuf);
+
+ return maxbuf;
+}
+
+static enum
+fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct cifs_fscache_super_auxdata auxdata;
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+
+ if (datalen != sizeof(auxdata))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.resource_id = tcon->resource_id;
+
+ if (memcmp(data, &auxdata, datalen) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+/*
+ * Superblock object for FS-Cache
+ */
+const struct fscache_cookie_def cifs_fscache_super_index_def = {
+ .name = "CIFS.super",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = cifs_super_get_key,
+ .get_aux = cifs_fscache_super_get_aux,
+ .check_aux = cifs_fscache_super_check_aux,
+};
+
+/*
+ * Auxiliary data attached to CIFS inode within the cache
+ */
+struct cifs_fscache_inode_auxdata {
+ struct timespec last_write_time;
+ struct timespec last_change_time;
+ u64 eof;
+};
+
+static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+ uint16_t keylen;
+
+ /* use the UniqueId as the key */
+ keylen = sizeof(cifsi->uniqueid);
+ if (keylen > maxbuf)
+ keylen = 0;
+ else
+ memcpy(buffer, &cifsi->uniqueid, keylen);
+
+ return keylen;
+}
+
+static void
+cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size)
+{
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ *size = cifsi->vfs_inode.i_size;
+}
+
+static uint16_t
+cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+ auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
+ if (maxbuf > sizeof(auxdata))
+ maxbuf = sizeof(auxdata);
+
+ memcpy(buffer, &auxdata, maxbuf);
+
+ return maxbuf;
+}
+
+static enum
+fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+ struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ if (datalen != sizeof(auxdata))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+ auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
+ if (memcmp(data, &auxdata, datalen) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data)
+{
+ struct cifsInodeInfo *cifsi = cookie_netfs_data;
+ struct pagevec pvec;
+ pgoff_t first;
+ int loop, nr_pages;
+
+ pagevec_init(&pvec, 0);
+ first = 0;
+
+ cFYI(1, "cifs inode 0x%p now uncached", cifsi);
+
+ for (;;) {
+ nr_pages = pagevec_lookup(&pvec,
+ cifsi->vfs_inode.i_mapping, first,
+ PAGEVEC_SIZE - pagevec_count(&pvec));
+ if (!nr_pages)
+ break;
+
+ for (loop = 0; loop < nr_pages; loop++)
+ ClearPageFsCache(pvec.pages[loop]);
+
+ first = pvec.pages[nr_pages - 1]->index + 1;
+
+ pvec.nr = nr_pages;
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
+const struct fscache_cookie_def cifs_fscache_inode_object_def = {
+ .name = "CIFS.uniqueid",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = cifs_fscache_inode_get_key,
+ .get_attr = cifs_fscache_inode_get_attr,
+ .get_aux = cifs_fscache_inode_get_aux,
+ .check_aux = cifs_fscache_inode_check_aux,
+ .now_uncached = cifs_fscache_inode_now_uncached,
+};
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 4fce6e61b34..eb1ba493489 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -119,6 +119,31 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
"Display Internal CIFS Data Structures for Debugging\n"
"---------------------------------------------------\n");
seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
+ seq_printf(m, "Features: ");
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ seq_printf(m, "dfs");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_FSCACHE
+ seq_printf(m, "fscache");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+ seq_printf(m, "lanman");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_POSIX
+ seq_printf(m, "posix");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_UPCALL
+ seq_printf(m, "spnego");
+ seq_putc(m, ' ');
+#endif
+#ifdef CONFIG_CIFS_XATTR
+ seq_printf(m, "xattr");
+#endif
+ seq_putc(m, '\n');
seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
seq_printf(m, "Servers:");
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ac19a6f3dae..d6ced7aa23c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -141,7 +141,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
}
rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
- if (rc != 0) {
+ if (rc < 0) {
cERROR(1, "%s: Failed to resolve server part of %s to IP: %d",
__func__, *devname, rc);
goto compose_mount_options_err;
@@ -150,8 +150,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
* assuming that we have 'unc=' and 'ip=' in
* the original sb_mountdata
*/
- md_len = strlen(sb_mountdata) + strlen(srvIP) +
- strlen(ref->node_name) + 12;
+ md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12;
mountdata = kzalloc(md_len+1, GFP_KERNEL);
if (mountdata == NULL) {
rc = -ENOMEM;
@@ -230,28 +229,22 @@ compose_mount_options_err:
goto compose_mount_options_out;
}
-
-static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent,
- struct dentry *dentry, const struct dfs_info3_param *ref)
+/**
+ * cifs_dfs_do_refmount - mounts specified path using provided refferal
+ * @cifs_sb: parent/root superblock
+ * @fullpath: full path in UNC format
+ * @ref: server's referral
+ */
+static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
+ const char *fullpath, const struct dfs_info3_param *ref)
{
- struct cifs_sb_info *cifs_sb;
struct vfsmount *mnt;
char *mountdata;
char *devname = NULL;
- char *fullpath;
-
- cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
- /*
- * this function gives us a path with a double backslash prefix. We
- * require a single backslash for DFS.
- */
- fullpath = build_path_from_dentry(dentry);
- if (!fullpath)
- return ERR_PTR(-ENOMEM);
+ /* strip first '\' from fullpath */
mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
fullpath + 1, ref, &devname);
- kfree(fullpath);
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
@@ -357,8 +350,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
rc = -EINVAL;
goto out_err;
}
- mnt = cifs_dfs_do_refmount(nd->path.mnt,
- nd->path.dentry, referrals + i);
+ mnt = cifs_dfs_do_refmount(cifs_sb,
+ full_path, referrals + i);
cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
referrals[i].node_name, mnt);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 246a167cb91..9e771450c3b 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -35,6 +35,7 @@
#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */
#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */
#define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/
+#define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */
struct cifs_sb_info {
struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 379bd7d9c05..87044906cd1 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -84,6 +84,9 @@ struct key_type cifs_spnego_key_type = {
/* strlen of ";uid=0x" */
#define UID_KEY_LEN 7
+/* strlen of ";creduid=0x" */
+#define CREDUID_KEY_LEN 11
+
/* strlen of ";user=" */
#define USER_KEY_LEN 6
@@ -107,6 +110,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
IP_KEY_LEN + INET6_ADDRSTRLEN +
MAX_MECH_STR_LEN +
UID_KEY_LEN + (sizeof(uid_t) * 2) +
+ CREDUID_KEY_LEN + (sizeof(uid_t) * 2) +
USER_KEY_LEN + strlen(sesInfo->userName) +
PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
@@ -144,6 +148,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
sprintf(dp, ";uid=0x%x", sesInfo->linux_uid);
dp = description + strlen(description);
+ sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
+
+ dp = description + strlen(description);
sprintf(dp, ";user=%s", sesInfo->userName);
dp = description + strlen(description);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1..b7431afdd76 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -45,8 +45,8 @@
#include "cifs_fs_sb.h"
#include <linux/mm.h>
#include <linux/key-type.h>
-#include "dns_resolve.h"
#include "cifs_spnego.h"
+#include "fscache.h"
#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
int cifsFYI = 0;
@@ -329,6 +329,14 @@ cifs_destroy_inode(struct inode *inode)
}
static void
+cifs_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages(&inode->i_data, 0);
+ end_writeback(inode);
+ cifs_fscache_release_inode_cookie(inode);
+}
+
+static void
cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
{
seq_printf(s, ",addr=");
@@ -473,14 +481,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
+static int cifs_drop_inode(struct inode *inode)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ /* no serverino => unconditional eviction */
+ return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) ||
+ generic_drop_inode(inode);
+}
+
static const struct super_operations cifs_super_ops = {
.put_super = cifs_put_super,
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
.destroy_inode = cifs_destroy_inode,
-/* .drop_inode = generic_delete_inode,
- .delete_inode = cifs_delete_inode, */ /* Do not need above two
- functions unless later we add lazy close of inodes or unless the
+ .drop_inode = cifs_drop_inode,
+ .evict_inode = cifs_evict_inode,
+/* .delete_inode = cifs_delete_inode, */ /* Do not need above
+ function unless later we add lazy close of inodes or unless the
kernel forgets to call us with the same number of releases (closes)
as opens */
.show_options = cifs_show_options,
@@ -892,6 +910,10 @@ init_cifs(void)
cFYI(1, "cifs_max_pending set to max of 256");
}
+ rc = cifs_fscache_register();
+ if (rc)
+ goto out;
+
rc = cifs_init_inodecache();
if (rc)
goto out_clean_proc;
@@ -912,27 +934,13 @@ init_cifs(void)
if (rc)
goto out_unregister_filesystem;
#endif
-#ifdef CONFIG_CIFS_DFS_UPCALL
- rc = register_key_type(&key_type_dns_resolver);
- if (rc)
- goto out_unregister_key_type;
-#endif
- rc = slow_work_register_user(THIS_MODULE);
- if (rc)
- goto out_unregister_resolver_key;
return 0;
- out_unregister_resolver_key:
-#ifdef CONFIG_CIFS_DFS_UPCALL
- unregister_key_type(&key_type_dns_resolver);
- out_unregister_key_type:
-#endif
#ifdef CONFIG_CIFS_UPCALL
- unregister_key_type(&cifs_spnego_key_type);
out_unregister_filesystem:
-#endif
unregister_filesystem(&cifs_fs_type);
+#endif
out_destroy_request_bufs:
cifs_destroy_request_bufs();
out_destroy_mids:
@@ -941,6 +949,8 @@ init_cifs(void)
cifs_destroy_inodecache();
out_clean_proc:
cifs_proc_clean();
+ cifs_fscache_unregister();
+ out:
return rc;
}
@@ -949,9 +959,9 @@ exit_cifs(void)
{
cFYI(DBG2, "exit_cifs");
cifs_proc_clean();
+ cifs_fscache_unregister();
#ifdef CONFIG_CIFS_DFS_UPCALL
cifs_dfs_release_automount_timer();
- unregister_key_type(&key_type_dns_resolver);
#endif
#ifdef CONFIG_CIFS_UPCALL
unregister_key_type(&cifs_spnego_key_type);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a7eb65c84b1..d82f5fb4761 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -114,5 +114,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.64"
+#define CIFS_VERSION "1.65"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a88479ceaad..0cdfb8c32ac 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -16,10 +16,13 @@
* the GNU Lesser General Public License for more details.
*
*/
+#ifndef _CIFS_GLOB_H
+#define _CIFS_GLOB_H
+
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/slab.h>
-#include <linux/slow-work.h>
+#include <linux/workqueue.h>
#include "cifs_fs_sb.h"
#include "cifsacl.h"
/*
@@ -34,7 +37,7 @@
#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */
#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null
termination then *2 for unicode versions */
-#define MAX_PASSWORD_SIZE 16
+#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
#define CIFS_MIN_RCV_POOL 4
@@ -80,8 +83,7 @@ enum statusEnum {
};
enum securityEnum {
- PLAINTXT = 0, /* Legacy with Plaintext passwords */
- LANMAN, /* Legacy LANMAN auth */
+ LANMAN = 0, /* Legacy LANMAN auth */
NTLM, /* Legacy NTLM012 auth with NTLM hash */
NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
@@ -142,7 +144,6 @@ struct TCP_Server_Info {
struct list_head pending_mid_q;
void *Server_NlsInfo; /* BB - placeholder for future NLS info */
unsigned short server_codepage; /* codepage for the server */
- unsigned long ip_address; /* IP addr for the server if known */
enum protocolEnum protocolType;
char versionMajor;
char versionMinor;
@@ -190,19 +191,9 @@ struct TCP_Server_Info {
bool sec_mskerberos; /* supports legacy MS Kerberos */
bool sec_kerberosu2u; /* supports U2U Kerberos */
bool sec_ntlmssp; /* supports NTLMSSP */
-};
-
-/*
- * The following is our shortcut to user information. We surface the uid,
- * and name. We always get the password on the fly in case it
- * has changed. We also hang a list of sessions owned by this user off here.
- */
-struct cifsUidInfo {
- struct list_head userList;
- struct list_head sessionList; /* SMB sessions for this user */
- uid_t linux_uid;
- char user[MAX_USERNAME_SIZE + 1]; /* ascii name of user */
- /* BB may need ptr or callback for PAM or WinBind info */
+#ifdef CONFIG_CIFS_FSCACHE
+ struct fscache_cookie *fscache; /* client index cache cookie */
+#endif
};
/*
@@ -212,9 +203,6 @@ struct cifsSesInfo {
struct list_head smb_ses_list;
struct list_head tcon_list;
struct mutex session_mutex;
-#if 0
- struct cifsUidInfo *uidInfo; /* pointer to user info */
-#endif
struct TCP_Server_Info *server; /* pointer to server info */
int ses_count; /* reference counter */
enum statusEnum status;
@@ -226,7 +214,8 @@ struct cifsSesInfo {
char *serverNOS; /* name of network operating system of server */
char *serverDomain; /* security realm of server */
int Suid; /* remote smb uid */
- uid_t linux_uid; /* local Linux uid */
+ uid_t linux_uid; /* overriding owner of files on the mount */
+ uid_t cred_uid; /* owner of credentials */
int capabilities;
char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
TCP names - will ipv6 and sctp addresses fit? */
@@ -311,6 +300,10 @@ struct cifsTconInfo {
bool local_lease:1; /* check leases (only) on local system not remote */
bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
bool need_reconnect:1; /* connection reset, tid now invalid */
+#ifdef CONFIG_CIFS_FSCACHE
+ u64 resource_id; /* server resource id */
+ struct fscache_cookie *fscache; /* cookie for share */
+#endif
/* BB add field for back pointer to sb struct(s)? */
};
@@ -363,7 +356,7 @@ struct cifsFileInfo {
atomic_t count; /* reference count */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
struct cifs_search_info srch_inf;
- struct slow_work oplock_break; /* slow_work job for oplock breaks */
+ struct work_struct oplock_break; /* work for oplock breaks */
};
/* Take a reference on the file private data */
@@ -398,6 +391,9 @@ struct cifsInodeInfo {
bool invalid_mapping:1; /* pagecache is invalid */
u64 server_eof; /* current file size on server */
u64 uniqueid; /* server inode number */
+#ifdef CONFIG_CIFS_FSCACHE
+ struct fscache_cookie *fscache;
+#endif
struct inode vfs_inode;
};
@@ -732,4 +728,10 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
+void cifs_oplock_break(struct work_struct *work);
+void cifs_oplock_break_get(struct cifsFileInfo *cfile);
+void cifs_oplock_break_put(struct cifsFileInfo *cfile);
+
extern const struct slow_work_ops cifs_oplock_break_ops;
+
+#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb..1f545081408 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -86,7 +86,9 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
-extern int cifs_convert_address(char *src, void *dst);
+extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
+extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
+ unsigned short int port);
extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
extern void header_assemble(struct smb_hdr *, char /* command */ ,
const struct cifsTconInfo *, int /* length of
@@ -106,7 +108,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
__u16 fileHandle, struct file *file,
struct vfsmount *mnt, unsigned int oflags);
extern int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt,
struct super_block *sb,
int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2208f06e4c4..95c2ea67edf 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -48,6 +48,7 @@
#include "nterr.h"
#include "rfc1002pdu.h"
#include "cn_cifs.h"
+#include "fscache.h"
#define CIFS_PORT 445
#define RFC1001_PORT 139
@@ -66,6 +67,7 @@ struct smb_vol {
char *iocharset; /* local code page for mapping to and from Unicode */
char source_rfc1001_name[16]; /* netbios name of client */
char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
+ uid_t cred_uid;
uid_t linux_uid;
gid_t linux_gid;
mode_t file_mode;
@@ -97,6 +99,7 @@ struct smb_vol {
bool noblocksnd:1;
bool noautotune:1;
bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
+ bool fsc:1; /* enable fscache */
unsigned int rsize;
unsigned int wsize;
bool sockopt_tcp_nodelay:1;
@@ -830,7 +833,8 @@ cifs_parse_mount_options(char *options, const char *devname,
/* null target name indicates to use *SMBSERVR default called name
if we end up sending RFC1001 session initialize */
vol->target_rfc1001_name[0] = 0;
- vol->linux_uid = current_uid(); /* use current_euid() instead? */
+ vol->cred_uid = current_uid();
+ vol->linux_uid = current_uid();
vol->linux_gid = current_gid();
/* default to only allowing write access to owner of the mount */
@@ -1257,6 +1261,12 @@ cifs_parse_mount_options(char *options, const char *devname,
} else if ((strnicmp(data, "nocase", 6) == 0) ||
(strnicmp(data, "ignorecase", 10) == 0)) {
vol->nocase = 1;
+ } else if (strnicmp(data, "mand", 4) == 0) {
+ /* ignore */
+ } else if (strnicmp(data, "nomand", 6) == 0) {
+ /* ignore */
+ } else if (strnicmp(data, "_netdev", 7) == 0) {
+ /* ignore */
} else if (strnicmp(data, "brl", 3) == 0) {
vol->nobrl = 0;
} else if ((strnicmp(data, "nobrl", 5) == 0) ||
@@ -1331,6 +1341,8 @@ cifs_parse_mount_options(char *options, const char *devname,
printk(KERN_WARNING "CIFS: Mount option noac not "
"supported. Instead set "
"/proc/fs/cifs/LookupCacheEnabled to 0\n");
+ } else if (strnicmp(data, "fsc", 3) == 0) {
+ vol->fsc = true;
} else
printk(KERN_WARNING "CIFS: Unknown mount option %s\n",
data);
@@ -1380,18 +1392,92 @@ cifs_parse_mount_options(char *options, const char *devname,
return 0;
}
+static bool
+match_address(struct TCP_Server_Info *server, struct sockaddr *addr)
+{
+ struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (addr4->sin_addr.s_addr !=
+ server->addr.sockAddr.sin_addr.s_addr)
+ return false;
+ if (addr4->sin_port &&
+ addr4->sin_port != server->addr.sockAddr.sin_port)
+ return false;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_equal(&addr6->sin6_addr,
+ &server->addr.sockAddr6.sin6_addr))
+ return false;
+ if (addr6->sin6_scope_id !=
+ server->addr.sockAddr6.sin6_scope_id)
+ return false;
+ if (addr6->sin6_port &&
+ addr6->sin6_port != server->addr.sockAddr6.sin6_port)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+static bool
+match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
+{
+ unsigned int secFlags;
+
+ if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
+ secFlags = vol->secFlg;
+ else
+ secFlags = global_secflags | vol->secFlg;
+
+ switch (server->secType) {
+ case LANMAN:
+ if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT)))
+ return false;
+ break;
+ case NTLMv2:
+ if (!(secFlags & CIFSSEC_MAY_NTLMV2))
+ return false;
+ break;
+ case NTLM:
+ if (!(secFlags & CIFSSEC_MAY_NTLM))
+ return false;
+ break;
+ case Kerberos:
+ if (!(secFlags & CIFSSEC_MAY_KRB5))
+ return false;
+ break;
+ case RawNTLMSSP:
+ if (!(secFlags & CIFSSEC_MAY_NTLMSSP))
+ return false;
+ break;
+ default:
+ /* shouldn't happen */
+ return false;
+ }
+
+ /* now check if signing mode is acceptible */
+ if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
+ (server->secMode & SECMODE_SIGN_REQUIRED))
+ return false;
+ else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) &&
+ (server->secMode &
+ (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0)
+ return false;
+
+ return true;
+}
+
static struct TCP_Server_Info *
-cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
+cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
{
- struct list_head *tmp;
struct TCP_Server_Info *server;
- struct sockaddr_in *addr4 = (struct sockaddr_in *) addr;
- struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) addr;
write_lock(&cifs_tcp_ses_lock);
- list_for_each(tmp, &cifs_tcp_ses_list) {
- server = list_entry(tmp, struct TCP_Server_Info,
- tcp_ses_list);
+ list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
/*
* the demux thread can exit on its own while still in CifsNew
* so don't accept any sockets in that state. Since the
@@ -1401,37 +1487,11 @@ cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
if (server->tcpStatus == CifsNew)
continue;
- switch (addr->ss_family) {
- case AF_INET:
- if (addr4->sin_addr.s_addr ==
- server->addr.sockAddr.sin_addr.s_addr) {
- addr4->sin_port = htons(port);
- /* user overrode default port? */
- if (addr4->sin_port) {
- if (addr4->sin_port !=
- server->addr.sockAddr.sin_port)
- continue;
- }
- break;
- } else
- continue;
+ if (!match_address(server, addr))
+ continue;
- case AF_INET6:
- if (ipv6_addr_equal(&addr6->sin6_addr,
- &server->addr.sockAddr6.sin6_addr) &&
- (addr6->sin6_scope_id ==
- server->addr.sockAddr6.sin6_scope_id)) {
- addr6->sin6_port = htons(port);
- /* user overrode default port? */
- if (addr6->sin6_port) {
- if (addr6->sin6_port !=
- server->addr.sockAddr6.sin6_port)
- continue;
- }
- break;
- } else
- continue;
- }
+ if (!match_security(server, vol))
+ continue;
++server->srv_count;
write_unlock(&cifs_tcp_ses_lock);
@@ -1460,6 +1520,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
+ cifs_fscache_release_client_cookie(server);
+
task = xchg(&server->tsk, NULL);
if (task)
force_sig(SIGKILL, task);
@@ -1479,7 +1541,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip);
if (volume_info->UNCip && volume_info->UNC) {
- rc = cifs_convert_address(volume_info->UNCip, &addr);
+ rc = cifs_fill_sockaddr((struct sockaddr *)&addr,
+ volume_info->UNCip,
+ strlen(volume_info->UNCip),
+ volume_info->port);
if (!rc) {
/* we failed translating address */
rc = -EINVAL;
@@ -1499,7 +1564,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
}
/* see if we already have a matching tcp_ses */
- tcp_ses = cifs_find_tcp_session(&addr, volume_info->port);
+ tcp_ses = cifs_find_tcp_session((struct sockaddr *)&addr, volume_info);
if (tcp_ses)
return tcp_ses;
@@ -1543,12 +1608,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cFYI(1, "attempting ipv6 connect");
/* BB should we allow ipv6 on port 139? */
/* other OS never observed in Wild doing 139 with v6 */
- sin_server6->sin6_port = htons(volume_info->port);
memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
sizeof(struct sockaddr_in6));
rc = ipv6_connect(tcp_ses);
} else {
- sin_server->sin_port = htons(volume_info->port);
memcpy(&tcp_ses->addr.sockAddr, sin_server,
sizeof(struct sockaddr_in));
rc = ipv4_connect(tcp_ses);
@@ -1577,6 +1640,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
write_unlock(&cifs_tcp_ses_lock);
+ cifs_fscache_get_client_cookie(tcp_ses);
+
return tcp_ses;
out_err:
@@ -1591,17 +1656,27 @@ out_err:
}
static struct cifsSesInfo *
-cifs_find_smb_ses(struct TCP_Server_Info *server, char *username)
+cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
{
- struct list_head *tmp;
struct cifsSesInfo *ses;
write_lock(&cifs_tcp_ses_lock);
- list_for_each(tmp, &server->smb_ses_list) {
- ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list);
- if (strncmp(ses->userName, username, MAX_USERNAME_SIZE))
- continue;
-
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ switch (server->secType) {
+ case Kerberos:
+ if (vol->cred_uid != ses->cred_uid)
+ continue;
+ break;
+ default:
+ /* anything else takes username/password */
+ if (strncmp(ses->userName, vol->username,
+ MAX_USERNAME_SIZE))
+ continue;
+ if (strlen(vol->username) != 0 &&
+ strncmp(ses->password, vol->password,
+ MAX_PASSWORD_SIZE))
+ continue;
+ }
++ses->ses_count;
write_unlock(&cifs_tcp_ses_lock);
return ses;
@@ -1643,7 +1718,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
xid = GetXid();
- ses = cifs_find_smb_ses(server, volume_info->username);
+ ses = cifs_find_smb_ses(server, volume_info);
if (ses) {
cFYI(1, "Existing smb sess found (status=%d)", ses->status);
@@ -1706,6 +1781,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
if (ses->domainName)
strcpy(ses->domainName, volume_info->domainname);
}
+ ses->cred_uid = volume_info->cred_uid;
ses->linux_uid = volume_info->linux_uid;
ses->overrideSecFlg = volume_info->secFlg;
@@ -1773,6 +1849,7 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
CIFSSMBTDis(xid, tcon);
_FreeXid(xid);
+ cifs_fscache_release_super_cookie(tcon);
tconInfoFree(tcon);
cifs_put_smb_ses(ses);
}
@@ -1843,6 +1920,8 @@ cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info)
list_add(&tcon->tcon_list, &ses->tcon_list);
write_unlock(&cifs_tcp_ses_lock);
+ cifs_fscache_get_super_cookie(tcon);
+
return tcon;
out_fail:
@@ -2397,6 +2476,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
if (pvolume_info->dynperm)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
+ if (pvolume_info->fsc)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
if (pvolume_info->direct_io) {
cFYI(1, "mounting share using direct i/o");
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461c..578d88c5b46 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/mount.h>
+#include <linux/file.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifsglob.h"
@@ -129,12 +130,6 @@ cifs_bp_rename_retry:
return full_path;
}
-/*
- * When called with struct file pointer set to NULL, there is no way we could
- * update file->private_data, but getting it stuck on openFileList provides a
- * way to access it from cifs_fill_filedata and thereby set file->private_data
- * from cifs_open.
- */
struct cifsFileInfo *
cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
struct file *file, struct vfsmount *mnt, unsigned int oflags)
@@ -162,7 +157,7 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
mutex_init(&pCifsFile->lock_mutex);
INIT_LIST_HEAD(&pCifsFile->llist);
atomic_set(&pCifsFile->count, 1);
- slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops);
+ INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
write_lock(&GlobalSMBSeslock);
list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
@@ -184,12 +179,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
}
write_unlock(&GlobalSMBSeslock);
+ file->private_data = pCifsFile;
+
return pCifsFile;
}
int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt, struct super_block *sb,
- int mode, int oflags,
+ struct super_block *sb, int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid)
{
int rc;
@@ -258,19 +254,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
cifs_fattr_to_inode(*pinode, &fattr);
}
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
- * file->private_data.
- */
- if (mnt) {
- struct cifsFileInfo *pfile_info;
-
- pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
- oflags);
- if (pfile_info == NULL)
- rc = -ENOMEM;
- }
-
posix_open_ret:
kfree(presp_data);
return rc;
@@ -298,7 +281,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
int create_options = CREATE_NOT_DIR;
__u32 oplock = 0;
int oflags;
- bool posix_create = false;
/*
* BB below access is probably too much for mknod to request
* but we have to do query and setpathinfo so requesting
@@ -339,7 +321,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = cifs_posix_open(full_path, &newinode,
- nd ? nd->path.mnt : NULL,
inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
/* EIO could indicate that (posix open) operation is not
supported, despite what server claimed in capability
@@ -347,7 +328,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
handled in posix open */
if (rc == 0) {
- posix_create = true;
if (newinode == NULL) /* query inode info */
goto cifs_create_get_file_info;
else /* success, no need to query */
@@ -478,21 +458,28 @@ cifs_create_set_dentry:
else
cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
- /* nfsd case - nfs srv does not set nd */
- if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
- /* mknod case - do not leave file open */
- CIFSSMBClose(xid, tcon, fileHandle);
- } else if (!(posix_create) && (newinode)) {
+ if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
struct cifsFileInfo *pfile_info;
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo
- * pointer to file->private_data.
- */
- pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
+ struct file *filp;
+
+ filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
+ goto cifs_create_out;
+ }
+
+ pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
nd->path.mnt, oflags);
- if (pfile_info == NULL)
+ if (pfile_info == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
rc = -ENOMEM;
+ }
+ } else {
+ CIFSSMBClose(xid, tcon, fileHandle);
}
+
cifs_create_out:
kfree(buf);
kfree(full_path);
@@ -636,6 +623,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
bool posix_open = false;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
+ struct cifsFileInfo *cfile;
struct inode *newInode = NULL;
char *full_path = NULL;
struct file *filp;
@@ -703,7 +691,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
(nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
(nd->intent.open.flags & O_CREAT)) {
- rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+ rc = cifs_posix_open(full_path, &newInode,
parent_dir_inode->i_sb,
nd->intent.open.create_mode,
nd->intent.open.flags, &oplock,
@@ -733,8 +721,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
else
direntry->d_op = &cifs_dentry_ops;
d_add(direntry, newInode);
- if (posix_open)
- filp = lookup_instantiate_filp(nd, direntry, NULL);
+ if (posix_open) {
+ filp = lookup_instantiate_filp(nd, direntry,
+ generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ goto lookup_out;
+ }
+
+ cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
+ nd->path.mnt,
+ nd->intent.open.flags);
+ if (cfile == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ rc = -ENOMEM;
+ goto lookup_out;
+ }
+ }
/* since paths are not looked up by component - the parent
directories are presumed to be good here */
renew_parental_timestamps(direntry);
@@ -755,6 +760,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
is a common return code */
}
+lookup_out:
kfree(full_path);
FreeXid(xid);
return ERR_PTR(rc);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 4db2c5e7283..0eb87026cad 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -4,6 +4,8 @@
* Copyright (c) 2007 Igor Mammedov
* Author(s): Igor Mammedov (niallain@gmail.com)
* Steve French (sfrench@us.ibm.com)
+ * Wang Lei (wang840925@gmail.com)
+ * David Howells (dhowells@redhat.com)
*
* Contains the CIFS DFS upcall routines used for hostname to
* IP address translation.
@@ -24,145 +26,73 @@
*/
#include <linux/slab.h>
-#include <keys/user-type.h>
+#include <linux/dns_resolver.h>
#include "dns_resolve.h"
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
-/* Checks if supplied name is IP address
- * returns:
- * 1 - name is IP
- * 0 - name is not IP
- */
-static int
-is_ip(char *name)
-{
- struct sockaddr_storage ss;
-
- return cifs_convert_address(name, &ss);
-}
-
-static int
-dns_resolver_instantiate(struct key *key, const void *data,
- size_t datalen)
-{
- int rc = 0;
- char *ip;
-
- ip = kmalloc(datalen + 1, GFP_KERNEL);
- if (!ip)
- return -ENOMEM;
-
- memcpy(ip, data, datalen);
- ip[datalen] = '\0';
-
- /* make sure this looks like an address */
- if (!is_ip(ip)) {
- kfree(ip);
- return -EINVAL;
- }
-
- key->type_data.x[0] = datalen;
- key->payload.data = ip;
-
- return rc;
-}
-
-static void
-dns_resolver_destroy(struct key *key)
-{
- kfree(key->payload.data);
-}
-
-struct key_type key_type_dns_resolver = {
- .name = "dns_resolver",
- .def_datalen = sizeof(struct in_addr),
- .describe = user_describe,
- .instantiate = dns_resolver_instantiate,
- .destroy = dns_resolver_destroy,
- .match = user_match,
-};
-
-/* Resolves server name to ip address.
- * input:
- * unc - server UNC
- * output:
- * *ip_addr - pointer to server ip, caller responcible for freeing it.
- * return 0 on success
+/**
+ * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
+ * @unc: UNC path specifying the server
+ * @ip_addr: Where to return the IP address.
+ *
+ * The IP address will be returned in string form, and the caller is
+ * responsible for freeing it.
+ *
+ * Returns length of result on success, -ve on error.
*/
int
dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
{
- int rc = -EAGAIN;
- struct key *rkey = ERR_PTR(-EAGAIN);
+ struct sockaddr_storage ss;
+ const char *hostname, *sep;
char *name;
- char *data = NULL;
- int len;
+ int len, rc;
if (!ip_addr || !unc)
return -EINVAL;
- /* search for server name delimiter */
len = strlen(unc);
if (len < 3) {
cFYI(1, "%s: unc is too short: %s", __func__, unc);
return -EINVAL;
}
+
+ /* Discount leading slashes for cifs */
len -= 2;
- name = memchr(unc+2, '\\', len);
- if (!name) {
+ hostname = unc + 2;
+
+ /* Search for server name delimiter */
+ sep = memchr(hostname, '\\', len);
+ if (sep)
+ len = sep - unc;
+ else
cFYI(1, "%s: probably server name is whole unc: %s",
- __func__, unc);
- } else {
- len = (name - unc) - 2/* leading // */;
- }
+ __func__, unc);
+
+ /* Try to interpret hostname as an IPv4 or IPv6 address */
+ rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len);
+ if (rc > 0)
+ goto name_is_IP_address;
+
+ /* Perform the upcall */
+ rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
+ if (rc < 0)
+ cERROR(1, "%s: unable to resolve: %*.*s",
+ __func__, len, len, hostname);
+ else
+ cFYI(1, "%s: resolved: %*.*s to %s",
+ __func__, len, len, hostname, *ip_addr);
+ return rc;
- name = kmalloc(len+1, GFP_KERNEL);
- if (!name) {
- rc = -ENOMEM;
- return rc;
- }
- memcpy(name, unc+2, len);
+name_is_IP_address:
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+ memcpy(name, hostname, len);
name[len] = 0;
-
- if (is_ip(name)) {
- cFYI(1, "%s: it is IP, skipping dns upcall: %s",
- __func__, name);
- data = name;
- goto skip_upcall;
- }
-
- rkey = request_key(&key_type_dns_resolver, name, "");
- if (!IS_ERR(rkey)) {
- len = rkey->type_data.x[0];
- data = rkey->payload.data;
- } else {
- cERROR(1, "%s: unable to resolve: %s", __func__, name);
- goto out;
- }
-
-skip_upcall:
- if (data) {
- *ip_addr = kmalloc(len + 1, GFP_KERNEL);
- if (*ip_addr) {
- memcpy(*ip_addr, data, len + 1);
- if (!IS_ERR(rkey))
- cFYI(1, "%s: resolved: %s to %s", __func__,
- name,
- *ip_addr
- );
- rc = 0;
- } else {
- rc = -ENOMEM;
- }
- if (!IS_ERR(rkey))
- key_put(rkey);
- }
-
-out:
- kfree(name);
- return rc;
+ cFYI(1, "%s: unc is IP, skipping dns upcall: %s", __func__, name);
+ *ip_addr = name;
+ return 0;
}
-
-
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 966e9288930..d3f5d27f4d0 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -24,8 +24,6 @@
#define _DNS_RESOLVE_H
#ifdef __KERNEL__
-#include <linux/key-type.h>
-extern struct key_type key_type_dns_resolver;
extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr);
#endif /* KERNEL */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f1ff785b229..db11fdef0e9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -40,6 +40,7 @@
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "fscache.h"
static inline int cifs_convert_flags(unsigned int flags)
{
@@ -162,44 +163,12 @@ psx_client_can_cache:
return 0;
}
-static struct cifsFileInfo *
-cifs_fill_filedata(struct file *file)
-{
- struct list_head *tmp;
- struct cifsFileInfo *pCifsFile = NULL;
- struct cifsInodeInfo *pCifsInode = NULL;
-
- /* search inode for this file and fill in file->private_data */
- pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- read_lock(&GlobalSMBSeslock);
- list_for_each(tmp, &pCifsInode->openFileList) {
- pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
- if ((pCifsFile->pfile == NULL) &&
- (pCifsFile->pid == current->tgid)) {
- /* mode set in cifs_create */
-
- /* needed for writepage */
- pCifsFile->pfile = file;
- file->private_data = pCifsFile;
- break;
- }
- }
- read_unlock(&GlobalSMBSeslock);
-
- if (file->private_data != NULL) {
- return pCifsFile;
- } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
- cERROR(1, "could not find file instance for "
- "new file %p", file);
- return NULL;
-}
-
/* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
- struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
+static inline int cifs_open_inode_helper(struct inode *inode,
struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
char *full_path, int xid)
{
+ struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
struct timespec temp;
int rc;
@@ -213,36 +182,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
/* if not oplocked, invalidate inode pages if mtime or file
size changed */
temp = cifs_NTtimeToUnix(buf->LastWriteTime);
- if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
- (file->f_path.dentry->d_inode->i_size ==
+ if (timespec_equal(&inode->i_mtime, &temp) &&
+ (inode->i_size ==
(loff_t)le64_to_cpu(buf->EndOfFile))) {
cFYI(1, "inode unchanged on server");
} else {
- if (file->f_path.dentry->d_inode->i_mapping) {
+ if (inode->i_mapping) {
/* BB no need to lock inode until after invalidate
since namei code should already have it locked? */
- rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+ rc = filemap_write_and_wait(inode->i_mapping);
if (rc != 0)
- CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+ pCifsInode->write_behind_rc = rc;
}
cFYI(1, "invalidating remote inode since open detected it "
"changed");
- invalidate_remote_inode(file->f_path.dentry->d_inode);
+ invalidate_remote_inode(inode);
}
client_can_cache:
if (pTcon->unix_ext)
- rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
- full_path, inode->i_sb, xid);
+ rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+ xid);
else
- rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
- full_path, buf, inode->i_sb, xid, NULL);
+ rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+ xid, NULL);
if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
pCifsInode->clientCanCacheAll = true;
pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p",
- file->f_path.dentry->d_inode);
+ cFYI(1, "Exclusive Oplock granted on inode %p", inode);
} else if ((*oplock & 0xF) == OPLOCK_READ)
pCifsInode->clientCanCacheRead = true;
@@ -256,7 +224,7 @@ int cifs_open(struct inode *inode, struct file *file)
__u32 oplock;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *pCifsFile;
+ struct cifsFileInfo *pCifsFile = NULL;
struct cifsInodeInfo *pCifsInode;
char *full_path = NULL;
int desiredAccess;
@@ -270,12 +238,6 @@ int cifs_open(struct inode *inode, struct file *file)
tcon = cifs_sb->tcon;
pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- pCifsFile = cifs_fill_filedata(file);
- if (pCifsFile) {
- rc = 0;
- FreeXid(xid);
- return rc;
- }
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
@@ -299,8 +261,7 @@ int cifs_open(struct inode *inode, struct file *file)
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
oflags |= SMB_O_CREAT;
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, &inode, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -308,9 +269,23 @@ int cifs_open(struct inode *inode, struct file *file)
/* no need for special case handling of setting mode
on read only files needed here */
- pCifsFile = cifs_fill_filedata(file);
- cifs_posix_open_inode_helper(inode, file, pCifsInode,
- oplock, netfid);
+ rc = cifs_posix_open_inode_helper(inode, file,
+ pCifsInode, oplock, netfid);
+ if (rc != 0) {
+ CIFSSMBClose(xid, tcon, netfid);
+ goto out;
+ }
+
+ pCifsFile = cifs_new_fileinfo(inode, netfid, file,
+ file->f_path.mnt,
+ oflags);
+ if (pCifsFile == NULL) {
+ CIFSSMBClose(xid, tcon, netfid);
+ rc = -ENOMEM;
+ }
+
+ cifs_fscache_set_inode_cookie(inode, file);
+
goto out;
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
@@ -391,16 +366,18 @@ int cifs_open(struct inode *inode, struct file *file)
goto out;
}
+ rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
+ if (rc != 0)
+ goto out;
+
pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
file->f_flags);
- file->private_data = pCifsFile;
- if (file->private_data == NULL) {
+ if (pCifsFile == NULL) {
rc = -ENOMEM;
goto out;
}
- rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
- &oplock, buf, full_path, xid);
+ cifs_fscache_set_inode_cookie(inode, file);
if (oplock & CIFS_CREATE_ACTION) {
/* time to set mode which we can not set earlier due to
@@ -456,7 +433,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
__u16 netfid;
if (file->private_data)
- pCifsFile = (struct cifsFileInfo *)file->private_data;
+ pCifsFile = file->private_data;
else
return -EBADF;
@@ -513,8 +490,7 @@ reopen_error_exit:
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, NULL, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -595,8 +571,7 @@ int cifs_close(struct inode *inode, struct file *file)
int xid, timeout;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
- struct cifsFileInfo *pSMBFile =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *pSMBFile = file->private_data;
xid = GetXid();
@@ -671,8 +646,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
{
int rc = 0;
int xid;
- struct cifsFileInfo *pCFileStruct =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *pCFileStruct = file->private_data;
char *ptmp;
cFYI(1, "Closedir inode = 0x%p", inode);
@@ -893,8 +867,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
length, pfLock,
posix_lock_type, wait_flag);
} else {
- struct cifsFileInfo *fid =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *fid = file->private_data;
if (numLock) {
rc = CIFSSMBLock(xid, tcon, netfid, length,
@@ -995,7 +968,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
if (file->private_data == NULL)
return -EBADF;
- open_file = (struct cifsFileInfo *) file->private_data;
+ open_file = file->private_data;
rc = generic_write_checks(file, poffset, &write_size, 0);
if (rc)
@@ -1097,7 +1070,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
if (file->private_data == NULL)
return -EBADF;
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
xid = GetXid();
@@ -1681,8 +1654,7 @@ int cifs_fsync(struct file *file, int datasync)
int xid;
int rc = 0;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *smbfile =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *smbfile = file->private_data;
struct inode *inode = file->f_path.dentry->d_inode;
xid = GetXid();
@@ -1786,7 +1758,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
if ((file->f_flags & O_ACCMODE) == O_WRONLY)
cFYI(1, "attempting read on write only file instance");
@@ -1867,7 +1839,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
if ((file->f_flags & O_ACCMODE) == O_WRONLY)
cFYI(1, "attempting read on write only file instance");
@@ -1952,6 +1924,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
bytes_read -= PAGE_CACHE_SIZE;
continue;
}
+ page_cache_release(page);
target = kmap_atomic(page, KM_USER0);
@@ -1971,6 +1944,9 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
SetPageUptodate(page);
unlock_page(page);
data += PAGE_CACHE_SIZE;
+
+ /* add page to FS-Cache */
+ cifs_readpage_to_fscache(mapping->host, page);
}
return;
}
@@ -1997,10 +1973,19 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
pTcon = cifs_sb->tcon;
+ /*
+ * Reads as many pages as possible from fscache. Returns -ENOBUFS
+ * immediately if the cookie is negative
+ */
+ rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
+ &num_pages);
+ if (rc == 0)
+ goto read_complete;
+
cFYI(DBG2, "rpages: num pages %d", num_pages);
for (i = 0; i < num_pages; ) {
unsigned contig_pages;
@@ -2111,6 +2096,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
smb_read_data = NULL;
}
+read_complete:
FreeXid(xid);
return rc;
}
@@ -2121,6 +2107,11 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
char *read_data;
int rc;
+ /* Is the page cached? */
+ rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
+ if (rc == 0)
+ goto read_complete;
+
page_cache_get(page);
read_data = kmap(page);
/* for reads over a certain size could initiate async read ahead */
@@ -2140,11 +2131,17 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
flush_dcache_page(page);
SetPageUptodate(page);
+
+ /* send this page to the cache */
+ cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
+
rc = 0;
io_error:
kunmap(page);
page_cache_release(page);
+
+read_complete:
return rc;
}
@@ -2294,8 +2291,23 @@ out:
return rc;
}
-static void
-cifs_oplock_break(struct slow_work *work)
+static int cifs_release_page(struct page *page, gfp_t gfp)
+{
+ if (PagePrivate(page))
+ return 0;
+
+ return cifs_fscache_release_page(page, gfp);
+}
+
+static void cifs_invalidate_page(struct page *page, unsigned long offset)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
+
+ if (offset == 0)
+ cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
+}
+
+void cifs_oplock_break(struct work_struct *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
@@ -2332,33 +2344,30 @@ cifs_oplock_break(struct slow_work *work)
LOCKING_ANDX_OPLOCK_RELEASE, false);
cFYI(1, "Oplock release rc = %d", rc);
}
+
+ /*
+ * We might have kicked in before is_valid_oplock_break()
+ * finished grabbing reference for us. Make sure it's done by
+ * waiting for GlobalSMSSeslock.
+ */
+ write_lock(&GlobalSMBSeslock);
+ write_unlock(&GlobalSMBSeslock);
+
+ cifs_oplock_break_put(cfile);
}
-static int
-cifs_oplock_break_get(struct slow_work *work)
+void cifs_oplock_break_get(struct cifsFileInfo *cfile)
{
- struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
- oplock_break);
mntget(cfile->mnt);
cifsFileInfo_get(cfile);
- return 0;
}
-static void
-cifs_oplock_break_put(struct slow_work *work)
+void cifs_oplock_break_put(struct cifsFileInfo *cfile)
{
- struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
- oplock_break);
mntput(cfile->mnt);
cifsFileInfo_put(cfile);
}
-const struct slow_work_ops cifs_oplock_break_ops = {
- .get_ref = cifs_oplock_break_get,
- .put_ref = cifs_oplock_break_put,
- .execute = cifs_oplock_break,
-};
-
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
.readpages = cifs_readpages,
@@ -2367,6 +2376,8 @@ const struct address_space_operations cifs_addr_ops = {
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
+ .releasepage = cifs_release_page,
+ .invalidatepage = cifs_invalidate_page,
/* .sync_page = cifs_sync_page, */
/* .direct_IO = */
};
@@ -2383,6 +2394,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
+ .releasepage = cifs_release_page,
+ .invalidatepage = cifs_invalidate_page,
/* .sync_page = cifs_sync_page, */
/* .direct_IO = */
};
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
new file mode 100644
index 00000000000..9f3f5c4be16
--- /dev/null
+++ b/fs/cifs/fscache.c
@@ -0,0 +1,236 @@
+/*
+ * fs/cifs/fscache.c - CIFS filesystem cache interface
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Author(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "fscache.h"
+#include "cifsglob.h"
+#include "cifs_debug.h"
+#include "cifs_fs_sb.h"
+
+void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
+{
+ server->fscache =
+ fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
+ &cifs_fscache_server_index_def, server);
+ cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server,
+ server->fscache);
+}
+
+void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
+{
+ cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server,
+ server->fscache);
+ fscache_relinquish_cookie(server->fscache, 0);
+ server->fscache = NULL;
+}
+
+void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon)
+{
+ struct TCP_Server_Info *server = tcon->ses->server;
+
+ tcon->fscache =
+ fscache_acquire_cookie(server->fscache,
+ &cifs_fscache_super_index_def, tcon);
+ cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)",
+ server->fscache, tcon->fscache);
+}
+
+void cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon)
+{
+ cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache);
+ fscache_relinquish_cookie(tcon->fscache, 0);
+ tcon->fscache = NULL;
+}
+
+static void cifs_fscache_enable_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ if (cifsi->fscache)
+ return;
+
+ cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ cifsi);
+ cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)",
+ cifs_sb->tcon->fscache, cifsi->fscache);
+}
+
+void cifs_fscache_release_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ if (cifsi->fscache) {
+ cFYI(1, "CIFS releasing inode cookie (0x%p)",
+ cifsi->fscache);
+ fscache_relinquish_cookie(cifsi->fscache, 0);
+ cifsi->fscache = NULL;
+ }
+}
+
+static void cifs_fscache_disable_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ if (cifsi->fscache) {
+ cFYI(1, "CIFS disabling inode cookie (0x%p)",
+ cifsi->fscache);
+ fscache_relinquish_cookie(cifsi->fscache, 1);
+ cifsi->fscache = NULL;
+ }
+}
+
+void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
+{
+ if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+ cifs_fscache_disable_inode_cookie(inode);
+ else {
+ cifs_fscache_enable_inode_cookie(inode);
+ cFYI(1, "CIFS: fscache inode cookie set");
+ }
+}
+
+void cifs_fscache_reset_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct fscache_cookie *old = cifsi->fscache;
+
+ if (cifsi->fscache) {
+ /* retire the current fscache cache and get a new one */
+ fscache_relinquish_cookie(cifsi->fscache, 1);
+
+ cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ cifsi);
+ cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p",
+ cifsi->fscache, old);
+ }
+}
+
+int cifs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ if (PageFsCache(page)) {
+ struct inode *inode = page->mapping->host;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ cFYI(1, "CIFS: fscache release page (0x%p/0x%p)",
+ page, cifsi->fscache);
+ if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
+ return 0;
+ }
+
+ return 1;
+}
+
+static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
+ int error)
+{
+ cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)",
+ page, error);
+ if (!error)
+ SetPageUptodate(page);
+ unlock_page(page);
+}
+
+/*
+ * Retrieve a page from FS-Cache
+ */
+int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+
+ cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p",
+ CIFS_I(inode)->fscache, page, inode);
+ ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
+ cifs_readpage_from_fscache_complete,
+ NULL,
+ GFP_KERNEL);
+ switch (ret) {
+
+ case 0: /* page found in fscache, read submitted */
+ cFYI(1, "CIFS: readpage_from_fscache: submitted");
+ return ret;
+ case -ENOBUFS: /* page won't be cached */
+ case -ENODATA: /* page not in cache */
+ cFYI(1, "CIFS: readpage_from_fscache %d", ret);
+ return 1;
+
+ default:
+ cERROR(1, "unknown error ret = %d", ret);
+ }
+ return ret;
+}
+
+/*
+ * Retrieve a set of pages from FS-Cache
+ */
+int __cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ int ret;
+
+ cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)",
+ CIFS_I(inode)->fscache, *nr_pages, inode);
+ ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
+ pages, nr_pages,
+ cifs_readpage_from_fscache_complete,
+ NULL,
+ mapping_gfp_mask(mapping));
+ switch (ret) {
+ case 0: /* read submitted to the cache for all pages */
+ cFYI(1, "CIFS: readpages_from_fscache: submitted");
+ return ret;
+
+ case -ENOBUFS: /* some pages are not cached and can't be */
+ case -ENODATA: /* some pages are not cached */
+ cFYI(1, "CIFS: readpages_from_fscache: no page");
+ return 1;
+
+ default:
+ cFYI(1, "unknown error ret = %d", ret);
+ }
+
+ return ret;
+}
+
+void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+
+ cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p",
+ CIFS_I(inode)->fscache, page, inode);
+ ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
+ if (ret != 0)
+ fscache_uncache_page(CIFS_I(inode)->fscache, page);
+}
+
+void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct fscache_cookie *cookie = cifsi->fscache;
+
+ cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie);
+ fscache_wait_on_page_write(cookie, page);
+ fscache_uncache_page(cookie, page);
+}
+
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
new file mode 100644
index 00000000000..31b88ec2341
--- /dev/null
+++ b/fs/cifs/fscache.h
@@ -0,0 +1,136 @@
+/*
+ * fs/cifs/fscache.h - CIFS filesystem cache interface definitions
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _CIFS_FSCACHE_H
+#define _CIFS_FSCACHE_H
+
+#include <linux/fscache.h>
+
+#include "cifsglob.h"
+
+#ifdef CONFIG_CIFS_FSCACHE
+
+extern struct fscache_netfs cifs_fscache_netfs;
+extern const struct fscache_cookie_def cifs_fscache_server_index_def;
+extern const struct fscache_cookie_def cifs_fscache_super_index_def;
+extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
+
+extern int cifs_fscache_register(void);
+extern void cifs_fscache_unregister(void);
+
+/*
+ * fscache.c
+ */
+extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
+extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
+extern void cifs_fscache_get_super_cookie(struct cifsTconInfo *);
+extern void cifs_fscache_release_super_cookie(struct cifsTconInfo *);
+
+extern void cifs_fscache_release_inode_cookie(struct inode *);
+extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
+extern void cifs_fscache_reset_inode_cookie(struct inode *);
+
+extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
+extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
+extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
+extern int __cifs_readpages_from_fscache(struct inode *,
+ struct address_space *,
+ struct list_head *,
+ unsigned *);
+
+extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
+
+static inline void cifs_fscache_invalidate_page(struct page *page,
+ struct inode *inode)
+{
+ if (PageFsCache(page))
+ __cifs_fscache_invalidate_page(page, inode);
+}
+
+static inline int cifs_readpage_from_fscache(struct inode *inode,
+ struct page *page)
+{
+ if (CIFS_I(inode)->fscache)
+ return __cifs_readpage_from_fscache(inode, page);
+
+ return -ENOBUFS;
+}
+
+static inline int cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ if (CIFS_I(inode)->fscache)
+ return __cifs_readpages_from_fscache(inode, mapping, pages,
+ nr_pages);
+ return -ENOBUFS;
+}
+
+static inline void cifs_readpage_to_fscache(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __cifs_readpage_to_fscache(inode, page);
+}
+
+#else /* CONFIG_CIFS_FSCACHE */
+static inline int cifs_fscache_register(void) { return 0; }
+static inline void cifs_fscache_unregister(void) {}
+
+static inline void
+cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
+static inline void
+cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
+static inline void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) {}
+static inline void
+cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) {}
+
+static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
+static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
+ struct file *filp) {}
+static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
+static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ return 1; /* May release page */
+}
+
+static inline void cifs_fscache_invalidate_page(struct page *page,
+ struct inode *inode) {}
+static inline int
+cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ return -ENOBUFS;
+}
+
+static inline int cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return -ENOBUFS;
+}
+
+static inline void cifs_readpage_to_fscache(struct inode *inode,
+ struct page *page) {}
+
+#endif /* CONFIG_CIFS_FSCACHE */
+
+#endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a5..4bc47e5b5f2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -29,6 +29,7 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "fscache.h"
static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
@@ -288,7 +289,7 @@ int cifs_get_file_info_unix(struct file *filp)
struct inode *inode = filp->f_path.dentry->d_inode;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsTconInfo *tcon = cifs_sb->tcon;
- struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
+ struct cifsFileInfo *cfile = filp->private_data;
xid = GetXid();
rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -515,7 +516,7 @@ int cifs_get_file_info(struct file *filp)
struct inode *inode = filp->f_path.dentry->d_inode;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsTconInfo *tcon = cifs_sb->tcon;
- struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
+ struct cifsFileInfo *cfile = filp->private_data;
xid = GetXid();
rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -723,18 +724,17 @@ cifs_find_inode(struct inode *inode, void *opaque)
{
struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
+ /* don't match inode with different uniqueid */
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
- /*
- * uh oh -- it's a directory. We can't use it since hardlinked dirs are
- * verboten. Disable serverino and return it as if it were found, the
- * caller can discard it, generate a uniqueid and retry the find
- */
- if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
+ /* don't match inode of different type */
+ if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
+ return 0;
+
+ /* if it's not a directory or has no dentries, then flag it */
+ if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry))
fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
- cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
- }
return 1;
}
@@ -748,6 +748,27 @@ cifs_init_inode(struct inode *inode, void *opaque)
return 0;
}
+/*
+ * walk dentry list for an inode and report whether it has aliases that
+ * are hashed. We use this to determine if a directory inode can actually
+ * be used.
+ */
+static bool
+inode_has_hashed_dentries(struct inode *inode)
+{
+ struct dentry *dentry;
+
+ spin_lock(&dcache_lock);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
+ spin_unlock(&dcache_lock);
+ return true;
+ }
+ }
+ spin_unlock(&dcache_lock);
+ return false;
+}
+
/* Given fattrs, get a corresponding inode */
struct inode *
cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
@@ -763,12 +784,16 @@ retry_iget5_locked:
inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
if (inode) {
- /* was there a problematic inode number collision? */
+ /* was there a potentially problematic inode collision? */
if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
- iput(inode);
- fattr->cf_uniqueid = iunique(sb, ROOT_I);
fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
- goto retry_iget5_locked;
+
+ if (inode_has_hashed_dentries(inode)) {
+ cifs_autodisable_serverino(CIFS_SB(sb));
+ iput(inode);
+ fattr->cf_uniqueid = iunique(sb, ROOT_I);
+ goto retry_iget5_locked;
+ }
}
cifs_fattr_to_inode(inode, fattr);
@@ -776,6 +801,10 @@ retry_iget5_locked:
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
+#ifdef CONFIG_CIFS_FSCACHE
+ /* initialize per-inode cache cookie pointer */
+ CIFS_I(inode)->fscache = NULL;
+#endif
unlock_new_inode(inode);
}
}
@@ -807,6 +836,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
if (!inode)
return ERR_PTR(-ENOMEM);
+#ifdef CONFIG_CIFS_FSCACHE
+ /* populate tcon->resource_id */
+ cifs_sb->tcon->resource_id = CIFS_I(inode)->uniqueid;
+#endif
+
if (rc && cifs_sb->tcon->ipc) {
cFYI(1, "ipc connection - fake read inode");
inode->i_mode |= S_IFDIR;
@@ -1401,6 +1435,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
if (rc == 0 || rc != -ETXTBSY)
return rc;
+ /* open-file renames don't work across directories */
+ if (to_dentry->d_parent != from_dentry->d_parent)
+ return rc;
+
/* open the file to be renamed -- we need DELETE perms */
rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
CREATE_NOT_DIR, &srcfid, &oplock, NULL,
@@ -1564,6 +1602,7 @@ cifs_invalidate_mapping(struct inode *inode)
cifs_i->write_behind_rc = rc;
}
invalidate_remote_inode(inode);
+ cifs_fscache_reset_inode_cookie(inode);
}
int cifs_revalidate_file(struct file *filp)
@@ -1659,26 +1698,16 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
return rc;
}
-static int cifs_vmtruncate(struct inode *inode, loff_t offset)
+static void cifs_setsize(struct inode *inode, loff_t offset)
{
loff_t oldsize;
- int err;
spin_lock(&inode->i_lock);
- err = inode_newsize_ok(inode, offset);
- if (err) {
- spin_unlock(&inode->i_lock);
- goto out;
- }
-
oldsize = inode->i_size;
i_size_write(inode, offset);
spin_unlock(&inode->i_lock);
+
truncate_pagecache(inode, oldsize, offset);
- if (inode->i_op->truncate)
- inode->i_op->truncate(inode);
-out:
- return err;
}
static int
@@ -1751,7 +1780,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
if (rc == 0) {
cifsInode->server_eof = attrs->ia_size;
- rc = cifs_vmtruncate(inode, attrs->ia_size);
+ cifs_setsize(inode, attrs->ia_size);
cifs_truncate_page(inode->i_mapping, inode->i_size);
}
@@ -1776,14 +1805,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
xid = GetXid();
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
- /* check if we have permission to change attrs */
- rc = inode_change_ok(inode, attrs);
- if (rc < 0)
- goto out;
- else
- rc = 0;
- }
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+ attrs->ia_valid |= ATTR_FORCE;
+
+ rc = inode_change_ok(inode, attrs);
+ if (rc < 0)
+ goto out;
full_path = build_path_from_dentry(direntry);
if (full_path == NULL) {
@@ -1869,18 +1896,24 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
CIFS_MOUNT_MAP_SPECIAL_CHR);
}
- if (!rc) {
- rc = inode_setattr(inode, attrs);
+ if (rc)
+ goto out;
- /* force revalidate when any of these times are set since some
- of the fs types (eg ext3, fat) do not have fine enough
- time granularity to match protocol, and we do not have a
- a way (yet) to query the server fs's time granularity (and
- whether it rounds times down).
- */
- if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)))
- cifsInode->time = 0;
- }
+ if ((attrs->ia_valid & ATTR_SIZE) &&
+ attrs->ia_size != i_size_read(inode))
+ truncate_setsize(inode, attrs->ia_size);
+
+ setattr_copy(inode, attrs);
+ mark_inode_dirty(inode);
+
+ /* force revalidate when any of these times are set since some
+ of the fs types (eg ext3, fat) do not have fine enough
+ time granularity to match protocol, and we do not have a
+ a way (yet) to query the server fs's time granularity (and
+ whether it rounds times down).
+ */
+ if (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+ cifsInode->time = 0;
out:
kfree(args);
kfree(full_path);
@@ -1905,14 +1938,13 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
cFYI(1, "setattr on file %s attrs->iavalid 0x%x",
direntry->d_name.name, attrs->ia_valid);
- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
- /* check if we have permission to change attrs */
- rc = inode_change_ok(inode, attrs);
- if (rc < 0) {
- FreeXid(xid);
- return rc;
- } else
- rc = 0;
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+ attrs->ia_valid |= ATTR_FORCE;
+
+ rc = inode_change_ok(inode, attrs);
+ if (rc < 0) {
+ FreeXid(xid);
+ return rc;
}
full_path = build_path_from_dentry(direntry);
@@ -2020,8 +2052,17 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
/* do not need local check to inode_check_ok since the server does
that */
- if (!rc)
- rc = inode_setattr(inode, attrs);
+ if (rc)
+ goto cifs_setattr_exit;
+
+ if ((attrs->ia_valid & ATTR_SIZE) &&
+ attrs->ia_size != i_size_read(inode))
+ truncate_setsize(inode, attrs->ia_size);
+
+ setattr_copy(inode, attrs);
+ mark_inode_dirty(inode);
+ return 0;
+
cifs_setattr_exit:
kfree(full_path);
FreeXid(xid);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 505926f1ee6..9d38a71c8e1 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -41,8 +41,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
__u64 ExtAttrMask = 0;
__u64 caps;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *pSMBFile =
- (struct cifsFileInfo *)filep->private_data;
+ struct cifsFileInfo *pSMBFile = filep->private_data;
#endif /* CONFIG_CIFS_POSIX */
xid = GetXid();
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1394aa37f26..3ccadc1326d 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -498,7 +498,6 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
struct cifsTconInfo *tcon;
struct cifsInodeInfo *pCifsInode;
struct cifsFileInfo *netfile;
- int rc;
cFYI(1, "Checking for oplock break or dnotify response");
if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) &&
@@ -583,13 +582,18 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
pCifsInode->clientCanCacheAll = false;
if (pSMB->OplockLevel == 0)
pCifsInode->clientCanCacheRead = false;
- rc = slow_work_enqueue(&netfile->oplock_break);
- if (rc) {
- cERROR(1, "failed to enqueue oplock "
- "break: %d\n", rc);
- } else {
- netfile->oplock_break_cancelled = false;
- }
+
+ /*
+ * cifs_oplock_break_put() can't be called
+ * from here. Get reference after queueing
+ * succeeded. cifs_oplock_break() will
+ * synchronize using GlobalSMSSeslock.
+ */
+ if (queue_work(system_nrt_wq,
+ &netfile->oplock_break))
+ cifs_oplock_break_get(netfile);
+ netfile->oplock_break_cancelled = false;
+
read_unlock(&GlobalSMBSeslock);
read_unlock(&cifs_tcp_ses_lock);
return true;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index d35d52889cb..f97851119e6 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -61,6 +61,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
{ERRremcd, -EACCES},
{ERRdiffdevice, -EXDEV},
{ERRnofiles, -ENOENT},
+ {ERRwriteprot, -EROFS},
{ERRbadshare, -ETXTBSY},
{ERRlock, -EACCES},
{ERRunsup, -EINVAL},
@@ -139,17 +140,18 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
* Returns 0 on failure.
*/
static int
-cifs_inet_pton(const int address_family, const char *cp, void *dst)
+cifs_inet_pton(const int address_family, const char *cp, int len, void *dst)
{
int ret = 0;
/* calculate length by finding first slash or NULL */
if (address_family == AF_INET)
- ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL);
+ ret = in4_pton(cp, len, dst, '\\', NULL);
else if (address_family == AF_INET6)
- ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
+ ret = in6_pton(cp, len, dst , '\\', NULL);
- cFYI(DBG2, "address conversion returned %d for %s", ret, cp);
+ cFYI(DBG2, "address conversion returned %d for %*.*s",
+ ret, len, len, cp);
if (ret > 0)
ret = 1;
return ret;
@@ -164,43 +166,66 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
* Returns 0 on failure.
*/
int
-cifs_convert_address(char *src, void *dst)
+cifs_convert_address(struct sockaddr *dst, const char *src, int len)
{
- int rc;
- char *pct, *endp;
+ int rc, alen, slen;
+ const char *pct;
+ char *endp, scope_id[13];
struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
/* IPv4 address */
- if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) {
+ if (cifs_inet_pton(AF_INET, src, len, &s4->sin_addr.s_addr)) {
s4->sin_family = AF_INET;
return 1;
}
- /* temporarily terminate string */
- pct = strchr(src, '%');
- if (pct)
- *pct = '\0';
-
- rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr);
-
- /* repair temp termination (if any) and make pct point to scopeid */
- if (pct)
- *pct++ = '%';
+ /* attempt to exclude the scope ID from the address part */
+ pct = memchr(src, '%', len);
+ alen = pct ? pct - src : len;
+ rc = cifs_inet_pton(AF_INET6, src, alen, &s6->sin6_addr.s6_addr);
if (!rc)
return rc;
s6->sin6_family = AF_INET6;
if (pct) {
+ /* grab the scope ID */
+ slen = len - (alen + 1);
+ if (slen <= 0 || slen > 12)
+ return 0;
+ memcpy(scope_id, pct + 1, slen);
+ scope_id[slen] = '\0';
+
s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
- if (!*pct || *endp)
+ if (endp != scope_id + slen)
return 0;
}
return rc;
}
+int
+cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
+ const unsigned short int port)
+{
+ if (!cifs_convert_address(dst, src, len))
+ return 0;
+
+ switch (dst->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)dst)->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)dst)->sin6_port = htons(port);
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
/*****************************************************************************
convert a NT status code to a dos class/code
*****************************************************************************/
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index daf1753af67..d5e591fab47 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -847,6 +847,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
+ if (tmp_buf == NULL) {
+ rc = -ENOMEM;
+ break;
+ }
+
for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
if (current_entry == NULL) {
/* evaluate whether this case is an error */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2..0a57cb7db5d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
/* calculate session key */
setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
- if (first_time) /* should this be moved into common code
- with similar ntlmv2 path? */
- /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
- response BB FIXME, v2_sess_key); */
-
- /* copy session key */
-
- /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
- bcc_ptr += LM2_SESS_KEY_SIZE; */
+ /* FIXME: calculate MAC key */
memcpy(bcc_ptr, (char *)v2_sess_key,
sizeof(struct ntlmv2_resp));
bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index c5084d27db7..7f16cb825fe 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -76,6 +76,7 @@
#define ERRnofiles 18 /* A File Search command can find no
more files matching the specified
criteria. */
+#define ERRwriteprot 19 /* media is write protected */
#define ERRgeneral 31
#define ERRbadshare 32 /* The sharing mode specified for an
Open conflicts with existing FIDs on
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d97f9935a02..6526e6f21ec 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -35,7 +35,7 @@
#include "coda_int.h"
/* VFS super_block ops */
-static void coda_clear_inode(struct inode *);
+static void coda_evict_inode(struct inode *);
static void coda_put_super(struct super_block *);
static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -93,7 +93,7 @@ static const struct super_operations coda_super_operations =
{
.alloc_inode = coda_alloc_inode,
.destroy_inode = coda_destroy_inode,
- .clear_inode = coda_clear_inode,
+ .evict_inode = coda_evict_inode,
.put_super = coda_put_super,
.statfs = coda_statfs,
.remount_fs = coda_remount,
@@ -224,8 +224,10 @@ static void coda_put_super(struct super_block *sb)
printk("Coda: Bye bye.\n");
}
-static void coda_clear_inode(struct inode *inode)
+static void coda_evict_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+ end_writeback(inode);
coda_cache_clear_inode(inode);
}
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 66b9cf79c5b..de89645777c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -177,7 +177,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
nbytes = req->uc_outSize; /* don't have more space! */
}
if (copy_from_user(req->uc_data, buf, nbytes)) {
- req->uc_flags |= REQ_ABORT;
+ req->uc_flags |= CODA_REQ_ABORT;
wake_up(&req->uc_sleep);
retval = -EFAULT;
goto out;
@@ -254,8 +254,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
retval = -EFAULT;
/* If request was not a signal, enqueue and don't free */
- if (!(req->uc_flags & REQ_ASYNC)) {
- req->uc_flags |= REQ_READ;
+ if (!(req->uc_flags & CODA_REQ_ASYNC)) {
+ req->uc_flags |= CODA_REQ_READ;
list_add_tail(&(req->uc_chain), &vcp->vc_processing);
goto out;
}
@@ -315,19 +315,19 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
list_del(&req->uc_chain);
/* Async requests need to be freed here */
- if (req->uc_flags & REQ_ASYNC) {
+ if (req->uc_flags & CODA_REQ_ASYNC) {
CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
kfree(req);
continue;
}
- req->uc_flags |= REQ_ABORT;
+ req->uc_flags |= CODA_REQ_ABORT;
wake_up(&req->uc_sleep);
}
list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) {
list_del(&req->uc_chain);
- req->uc_flags |= REQ_ABORT;
+ req->uc_flags |= CODA_REQ_ABORT;
wake_up(&req->uc_sleep);
}
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f09c5ed76f6..b8893ab6f9e 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -604,7 +604,7 @@ static void coda_unblock_signals(sigset_t *old)
(((r)->uc_opcode != CODA_CLOSE && \
(r)->uc_opcode != CODA_STORE && \
(r)->uc_opcode != CODA_RELEASE) || \
- (r)->uc_flags & REQ_READ))
+ (r)->uc_flags & CODA_REQ_READ))
static inline void coda_waitfor_upcall(struct upc_req *req)
{
@@ -624,7 +624,7 @@ static inline void coda_waitfor_upcall(struct upc_req *req)
set_current_state(TASK_UNINTERRUPTIBLE);
/* got a reply */
- if (req->uc_flags & (REQ_WRITE | REQ_ABORT))
+ if (req->uc_flags & (CODA_REQ_WRITE | CODA_REQ_ABORT))
break;
if (blocked && time_after(jiffies, timeout) &&
@@ -708,7 +708,7 @@ static int coda_upcall(struct venus_comm *vcp,
coda_waitfor_upcall(req);
/* Op went through, interrupt or not... */
- if (req->uc_flags & REQ_WRITE) {
+ if (req->uc_flags & CODA_REQ_WRITE) {
out = (union outputArgs *)req->uc_data;
/* here we map positive Venus errors to kernel errors */
error = -out->oh.result;
@@ -717,13 +717,13 @@ static int coda_upcall(struct venus_comm *vcp,
}
error = -EINTR;
- if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) {
+ if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) {
printk(KERN_WARNING "coda: Unexpected interruption.\n");
goto exit;
}
/* Interrupted before venus read it. */
- if (!(req->uc_flags & REQ_READ))
+ if (!(req->uc_flags & CODA_REQ_READ))
goto exit;
/* Venus saw the upcall, make sure we can send interrupt signal */
@@ -747,7 +747,7 @@ static int coda_upcall(struct venus_comm *vcp,
sig_inputArgs->ih.opcode = CODA_SIGNAL;
sig_inputArgs->ih.unique = req->uc_unique;
- sig_req->uc_flags = REQ_ASYNC;
+ sig_req->uc_flags = CODA_REQ_ASYNC;
sig_req->uc_opcode = sig_inputArgs->ih.opcode;
sig_req->uc_unique = sig_inputArgs->ih.unique;
sig_req->uc_inSize = sizeof(struct coda_in_hdr);
diff --git a/fs/compat.c b/fs/compat.c
index f0b391c5055..718c7062aec 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -8,13 +8,14 @@
* Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
* Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
* Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
- * Copyright (C) 2003 Pavel Machek (pavel@suse.cz)
+ * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/linkage.h>
#include <linux/compat.h>
@@ -76,7 +77,8 @@ int compat_printk(const char *fmt, ...)
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
*/
-asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __user *t)
+asmlinkage long compat_sys_utime(const char __user *filename,
+ struct compat_utimbuf __user *t)
{
struct timespec tv[2];
@@ -90,7 +92,7 @@ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __
return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
}
-asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, struct compat_timespec __user *t, int flags)
+asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags)
{
struct timespec tv[2];
@@ -105,7 +107,7 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, st
return do_utimes(dfd, filename, t ? tv : NULL, flags);
}
-asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t)
+asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t)
{
struct timespec tv[2];
@@ -124,7 +126,7 @@ asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, st
return do_utimes(dfd, filename, t ? tv : NULL, 0);
}
-asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
+asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t)
{
return compat_sys_futimesat(AT_FDCWD, filename, t);
}
@@ -168,7 +170,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
return err;
}
-asmlinkage long compat_sys_newstat(char __user * filename,
+asmlinkage long compat_sys_newstat(const char __user * filename,
struct compat_stat __user *statbuf)
{
struct kstat stat;
@@ -180,7 +182,7 @@ asmlinkage long compat_sys_newstat(char __user * filename,
return cp_compat_stat(&stat, statbuf);
}
-asmlinkage long compat_sys_newlstat(char __user * filename,
+asmlinkage long compat_sys_newlstat(const char __user * filename,
struct compat_stat __user *statbuf)
{
struct kstat stat;
@@ -193,7 +195,8 @@ asmlinkage long compat_sys_newlstat(char __user * filename,
}
#ifndef __ARCH_WANT_STAT64
-asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
+asmlinkage long compat_sys_newfstatat(unsigned int dfd,
+ const char __user *filename,
struct compat_stat __user *statbuf, int flag)
{
struct kstat stat;
@@ -266,7 +269,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
error = user_path(pathname, &path);
if (!error) {
struct kstatfs tmp;
- error = vfs_statfs(path.dentry, &tmp);
+ error = vfs_statfs(&path, &tmp);
if (!error)
error = put_compat_statfs(buf, &tmp);
path_put(&path);
@@ -284,7 +287,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
file = fget(fd);
if (!file)
goto out;
- error = vfs_statfs(file->f_path.dentry, &tmp);
+ error = vfs_statfs(&file->f_path, &tmp);
if (!error)
error = put_compat_statfs(buf, &tmp);
fput(file);
@@ -334,7 +337,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
error = user_path(pathname, &path);
if (!error) {
struct kstatfs tmp;
- error = vfs_statfs(path.dentry, &tmp);
+ error = vfs_statfs(&path, &tmp);
if (!error)
error = put_compat_statfs64(buf, &tmp);
path_put(&path);
@@ -355,7 +358,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
file = fget(fd);
if (!file)
goto out;
- error = vfs_statfs(file->f_path.dentry, &tmp);
+ error = vfs_statfs(&file->f_path, &tmp);
if (!error)
error = put_compat_statfs64(buf, &tmp);
fput(file);
@@ -378,7 +381,7 @@ asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
sb = user_get_super(new_decode_dev(dev));
if (!sb)
return -EINVAL;
- err = vfs_statfs(sb->s_root, &sbuf);
+ err = statfs_by_dentry(sb->s_root, &sbuf);
drop_super(sb);
if (err)
return err;
@@ -626,7 +629,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
tot_len += len;
if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
goto out;
- if (!access_ok(vrfy_dir(type), buf, len)) {
+ if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
@@ -836,9 +839,10 @@ static int do_nfs4_super_data_conv(void *raw_data)
#define NCPFS_NAME "ncpfs"
#define NFS4_NAME "nfs4"
-asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
- char __user * type, unsigned long flags,
- void __user * data)
+asmlinkage long compat_sys_mount(const char __user * dev_name,
+ const char __user * dir_name,
+ const char __user * type, unsigned long flags,
+ const void __user * data)
{
char *kernel_type;
unsigned long data_page;
@@ -891,8 +895,6 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
return retval;
}
-#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
-
struct compat_old_linux_dirent {
compat_ulong_t d_ino;
compat_ulong_t d_offset;
@@ -981,7 +983,8 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
struct compat_linux_dirent __user * dirent;
struct compat_getdents_callback *buf = __buf;
compat_ulong_t d_ino;
- int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(compat_long_t));
+ int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
+ namlen + 2, sizeof(compat_long_t));
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
@@ -1068,8 +1071,8 @@ static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t
{
struct linux_dirent64 __user *dirent;
struct compat_getdents_callback64 *buf = __buf;
- int jj = NAME_OFFSET(dirent);
- int reclen = ALIGN(jj + namlen + 1, sizeof(u64));
+ int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
+ sizeof(u64));
u64 off;
buf->error = -EINVAL; /* only used if we fail.. */
@@ -1193,11 +1196,10 @@ out:
if (iov != iovstack)
kfree(iov);
if ((ret + (type == READ)) > 0) {
- struct dentry *dentry = file->f_path.dentry;
if (type == READ)
- fsnotify_access(dentry);
+ fsnotify_access(file);
else
- fsnotify_modify(dentry);
+ fsnotify_modify(file);
}
return ret;
}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 641640dc7ae..03e59aa318e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -4,7 +4,7 @@
* Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
* Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
* Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
- * Copyright (C) 2003 Pavel Machek (pavel@suse.cz)
+ * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz)
*
* These routines maintain argument size conversion between 32bit and 64bit
* ioctls.
@@ -131,23 +131,6 @@ static int w_long(unsigned int fd, unsigned int cmd,
return err;
}
-static int rw_long(unsigned int fd, unsigned int cmd,
- compat_ulong_t __user *argp)
-{
- mm_segment_t old_fs = get_fs();
- int err;
- unsigned long val;
-
- if(get_user(val, argp))
- return -EFAULT;
- set_fs (KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long)&val);
- set_fs (old_fs);
- if (!err && put_user(val, argp))
- return -EFAULT;
- return err;
-}
-
struct compat_video_event {
int32_t type;
compat_time_t timestamp;
@@ -594,15 +577,12 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
return err;
}
-static int ioc_settimeout(unsigned int fd, unsigned int cmd,
- compat_ulong_t __user *argp)
-{
- return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, argp);
-}
-
/* Bluetooth ioctls */
-#define HCIUARTSETPROTO _IOW('U', 200, int)
-#define HCIUARTGETPROTO _IOR('U', 201, int)
+#define HCIUARTSETPROTO _IOW('U', 200, int)
+#define HCIUARTGETPROTO _IOR('U', 201, int)
+#define HCIUARTGETDEVICE _IOR('U', 202, int)
+#define HCIUARTSETFLAGS _IOW('U', 203, int)
+#define HCIUARTGETFLAGS _IOR('U', 204, int)
#define BNEPCONNADD _IOW('B', 200, int)
#define BNEPCONNDEL _IOW('B', 201, int)
@@ -966,6 +946,7 @@ COMPATIBLE_IOCTL(TIOCGPGRP)
COMPATIBLE_IOCTL(TIOCGPTN)
COMPATIBLE_IOCTL(TIOCSPTLCK)
COMPATIBLE_IOCTL(TIOCSERGETLSR)
+COMPATIBLE_IOCTL(TIOCSIG)
#ifdef TCGETS2
COMPATIBLE_IOCTL(TCGETS2)
COMPATIBLE_IOCTL(TCSETS2)
@@ -1281,13 +1262,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
COMPATIBLE_IOCTL(OSS_GETVERSION)
-/* AUTOFS */
-COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
-COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
-COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
-COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
/* Raw devices */
COMPATIBLE_IOCTL(RAW_SETBIND)
COMPATIBLE_IOCTL(RAW_GETBIND)
@@ -1328,6 +1302,8 @@ COMPATIBLE_IOCTL(HCISETLINKPOL)
COMPATIBLE_IOCTL(HCISETLINKMODE)
COMPATIBLE_IOCTL(HCISETACLMTU)
COMPATIBLE_IOCTL(HCISETSCOMTU)
+COMPATIBLE_IOCTL(HCIBLOCKADDR)
+COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
COMPATIBLE_IOCTL(HCIINQUIRY)
COMPATIBLE_IOCTL(HCIUARTSETPROTO)
COMPATIBLE_IOCTL(HCIUARTGETPROTO)
@@ -1552,9 +1528,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
case RAW_GETBIND:
return raw_ioctl(fd, cmd, argp);
#endif
-#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
- case AUTOFS_IOC_SETTIMEOUT32:
- return ioc_settimeout(fd, cmd, argp);
/* One SMB ioctl needs translations. */
#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
case SMB_IOC_GETMOUNTUID_32:
@@ -1609,9 +1582,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
case KDSKBMETA:
case KDSKBLED:
case KDSETLED:
- /* AUTOFS */
- case AUTOFS_IOC_READY:
- case AUTOFS_IOC_FAIL:
/* NBD */
case NBD_SET_SOCK:
case NBD_SET_BLKSIZE:
@@ -1729,8 +1699,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
goto out_fput;
}
- if (!filp->f_op ||
- (!filp->f_op->ioctl && !filp->f_op->unlocked_ioctl))
+ if (!filp->f_op || !filp->f_op->unlocked_ioctl)
goto do_ioctl;
break;
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 41645142b88..cf78d44a8d6 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -72,10 +72,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
if (!sd)
return -EINVAL;
- error = simple_setattr(dentry, iattr);
- if (error)
- return error;
-
sd_iattr = sd->s_iattr;
if (!sd_iattr) {
/* setting attributes for the first time, allocate now */
@@ -89,9 +85,12 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
sd->s_iattr = sd_iattr;
}
-
/* attributes were changed atleast once in past */
+ error = simple_setattr(dentry, iattr);
+ if (error)
+ return error;
+
if (ia_valid & ATTR_UID)
sd_iattr->ia_uid = iattr->ia_uid;
if (ia_valid & ATTR_GID)
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index dd3634e4c96..a53b130b366 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -39,66 +39,55 @@ static DEFINE_MUTEX(read_mutex);
#define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1)
#define OFFSET(x) ((x)->i_ino)
-
-static int cramfs_iget5_test(struct inode *inode, void *opaque)
-{
- struct cramfs_inode *cramfs_inode = opaque;
- return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
-}
-
-static int cramfs_iget5_set(struct inode *inode, void *opaque)
+static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode)
{
- struct cramfs_inode *cramfs_inode = opaque;
- inode->i_ino = CRAMINO(cramfs_inode);
- return 0;
+ static struct timespec zerotime;
+ inode->i_mode = cramfs_inode->mode;
+ inode->i_uid = cramfs_inode->uid;
+ inode->i_size = cramfs_inode->size;
+ inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
+ inode->i_gid = cramfs_inode->gid;
+ /* Struct copy intentional */
+ inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+ /* inode->i_nlink is left 1 - arguably wrong for directories,
+ but it's the best we can do without reading the directory
+ contents. 1 yields the right result in GNU find, even
+ without -noleaf option. */
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_fop = &generic_ro_fops;
+ inode->i_data.a_ops = &cramfs_aops;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &cramfs_dir_inode_operations;
+ inode->i_fop = &cramfs_directory_operations;
+ } else if (S_ISLNK(inode->i_mode)) {
+ inode->i_op = &page_symlink_inode_operations;
+ inode->i_data.a_ops = &cramfs_aops;
+ } else {
+ init_special_inode(inode, inode->i_mode,
+ old_decode_dev(cramfs_inode->size));
+ }
}
static struct inode *get_cramfs_inode(struct super_block *sb,
struct cramfs_inode * cramfs_inode)
{
- struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
- cramfs_iget5_test, cramfs_iget5_set,
- cramfs_inode);
- static struct timespec zerotime;
-
- if (inode && (inode->i_state & I_NEW)) {
- inode->i_mode = cramfs_inode->mode;
- inode->i_uid = cramfs_inode->uid;
- inode->i_size = cramfs_inode->size;
- inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
- inode->i_gid = cramfs_inode->gid;
- /* Struct copy intentional */
- inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
- /* inode->i_nlink is left 1 - arguably wrong for directories,
- but it's the best we can do without reading the directory
- contents. 1 yields the right result in GNU find, even
- without -noleaf option. */
- if (S_ISREG(inode->i_mode)) {
- inode->i_fop = &generic_ro_fops;
- inode->i_data.a_ops = &cramfs_aops;
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &cramfs_dir_inode_operations;
- inode->i_fop = &cramfs_directory_operations;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &page_symlink_inode_operations;
- inode->i_data.a_ops = &cramfs_aops;
- } else {
- init_special_inode(inode, inode->i_mode,
- old_decode_dev(cramfs_inode->size));
+ struct inode *inode;
+ if (CRAMINO(cramfs_inode) == 1) {
+ inode = new_inode(sb);
+ if (inode) {
+ inode->i_ino = 1;
+ setup_inode(inode, cramfs_inode);
+ }
+ } else {
+ inode = iget_locked(sb, CRAMINO(cramfs_inode));
+ if (inode) {
+ setup_inode(inode, cramfs_inode);
+ unlock_new_inode(inode);
}
- unlock_new_inode(inode);
}
return inode;
}
-static void cramfs_drop_inode(struct inode *inode)
-{
- if (inode->i_ino == 1)
- generic_delete_inode(inode);
- else
- generic_drop_inode(inode);
-}
-
/*
* We have our own block cache: don't fill up the buffer cache
* with the rom-image, because the way the filesystem is set
@@ -542,7 +531,6 @@ static const struct super_operations cramfs_ops = {
.put_super = cramfs_put_super,
.remount_fs = cramfs_remount,
.statfs = cramfs_statfs,
- .drop_inode = cramfs_drop_inode,
};
static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a63..4d13bf50b7b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
*/
static void prune_dcache(int count)
{
- struct super_block *sb, *n;
+ struct super_block *sb, *p = NULL;
int w_count;
int unused = dentry_stat.nr_unused;
int prune_ratio;
@@ -550,7 +550,7 @@ static void prune_dcache(int count)
else
prune_ratio = unused / count;
spin_lock(&sb_lock);
- list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+ list_for_each_entry(sb, &super_blocks, s_list) {
if (list_empty(&sb->s_instances))
continue;
if (sb->s_nr_dentry_unused == 0)
@@ -590,12 +590,16 @@ static void prune_dcache(int count)
up_read(&sb->s_umount);
}
spin_lock(&sb_lock);
+ if (p)
+ __put_super(p);
count -= pruned;
- __put_super(sb);
+ p = sb;
/* more work left to do? */
if (count <= 0)
break;
}
+ if (p)
+ __put_super(p);
spin_unlock(&sb_lock);
spin_unlock(&dcache_lock);
}
@@ -894,7 +898,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
@@ -1901,48 +1905,30 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
}
/**
- * __d_path - return the path of a dentry
+ * Prepend path string to a buffer
+ *
* @path: the dentry/vfsmount to report
* @root: root vfsmnt/dentry (may be modified by this function)
- * @buffer: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name. If the entry has been deleted
- * the string " (deleted)" is appended. Note that this is ambiguous.
- *
- * Returns a pointer into the buffer or an error code if the
- * path was too long.
+ * @buffer: pointer to the end of the buffer
+ * @buflen: pointer to buffer length
*
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * Caller holds the dcache_lock.
*
* If path is not reachable from the supplied root, then the value of
* root is changed (without modifying refcounts).
*/
-char *__d_path(const struct path *path, struct path *root,
- char *buffer, int buflen)
+static int prepend_path(const struct path *path, struct path *root,
+ char **buffer, int *buflen)
{
struct dentry *dentry = path->dentry;
struct vfsmount *vfsmnt = path->mnt;
- char *end = buffer + buflen;
- char *retval;
+ bool slash = false;
+ int error = 0;
spin_lock(&vfsmount_lock);
- prepend(&end, &buflen, "\0", 1);
- if (d_unlinked(dentry) &&
- (prepend(&end, &buflen, " (deleted)", 10) != 0))
- goto Elong;
-
- if (buflen < 1)
- goto Elong;
- /* Get '/' right */
- retval = end-1;
- *retval = '/';
-
- for (;;) {
+ while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;
- if (dentry == root->dentry && vfsmnt == root->mnt)
- break;
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
/* Global root? */
if (vfsmnt->mnt_parent == vfsmnt) {
@@ -1954,28 +1940,88 @@ char *__d_path(const struct path *path, struct path *root,
}
parent = dentry->d_parent;
prefetch(parent);
- if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
- (prepend(&end, &buflen, "/", 1) != 0))
- goto Elong;
- retval = end;
+ error = prepend_name(buffer, buflen, &dentry->d_name);
+ if (!error)
+ error = prepend(buffer, buflen, "/", 1);
+ if (error)
+ break;
+
+ slash = true;
dentry = parent;
}
out:
+ if (!error && !slash)
+ error = prepend(buffer, buflen, "/", 1);
+
spin_unlock(&vfsmount_lock);
- return retval;
+ return error;
global_root:
- retval += 1; /* hit the slash */
- if (prepend_name(&retval, &buflen, &dentry->d_name) != 0)
- goto Elong;
+ /*
+ * Filesystems needing to implement special "root names"
+ * should do so with ->d_dname()
+ */
+ if (IS_ROOT(dentry) &&
+ (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
+ WARN(1, "Root dentry has weird name <%.*s>\n",
+ (int) dentry->d_name.len, dentry->d_name.name);
+ }
root->mnt = vfsmnt;
root->dentry = dentry;
goto out;
+}
-Elong:
- retval = ERR_PTR(-ENAMETOOLONG);
- goto out;
+/**
+ * __d_path - return the path of a dentry
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry (may be modified by this function)
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name.
+ *
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
+ *
+ * "buflen" should be positive. Caller holds the dcache_lock.
+ *
+ * If path is not reachable from the supplied root, then the value of
+ * root is changed (without modifying refcounts).
+ */
+char *__d_path(const struct path *path, struct path *root,
+ char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ int error;
+
+ prepend(&res, &buflen, "\0", 1);
+ error = prepend_path(path, root, &res, &buflen);
+ if (error)
+ return ERR_PTR(error);
+
+ return res;
+}
+
+/*
+ * same as __d_path but appends "(deleted)" for unlinked files.
+ */
+static int path_with_deleted(const struct path *path, struct path *root,
+ char **buf, int *buflen)
+{
+ prepend(buf, buflen, "\0", 1);
+ if (d_unlinked(path->dentry)) {
+ int error = prepend(buf, buflen, " (deleted)", 10);
+ if (error)
+ return error;
+ }
+
+ return prepend_path(path, root, buf, buflen);
+}
+
+static int prepend_unreachable(char **buffer, int *buflen)
+{
+ return prepend(buffer, buflen, "(unreachable)", 13);
}
/**
@@ -1996,9 +2042,10 @@ Elong:
*/
char *d_path(const struct path *path, char *buf, int buflen)
{
- char *res;
+ char *res = buf + buflen;
struct path root;
struct path tmp;
+ int error;
/*
* We have various synthetic filesystems that never get mounted. On
@@ -2010,19 +2057,51 @@ char *d_path(const struct path *path, char *buf, int buflen)
if (path->dentry->d_op && path->dentry->d_op->d_dname)
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
- read_lock(&current->fs->lock);
- root = current->fs->root;
- path_get(&root);
- read_unlock(&current->fs->lock);
+ get_fs_root(current->fs, &root);
spin_lock(&dcache_lock);
tmp = root;
- res = __d_path(path, &tmp, buf, buflen);
+ error = path_with_deleted(path, &tmp, &res, &buflen);
+ if (error)
+ res = ERR_PTR(error);
spin_unlock(&dcache_lock);
path_put(&root);
return res;
}
EXPORT_SYMBOL(d_path);
+/**
+ * d_path_with_unreachable - return the path of a dentry
+ * @path: path to report
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * The difference from d_path() is that this prepends "(unreachable)"
+ * to paths which are unreachable from the current process' root.
+ */
+char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ struct path root;
+ struct path tmp;
+ int error;
+
+ if (path->dentry->d_op && path->dentry->d_op->d_dname)
+ return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+
+ get_fs_root(current->fs, &root);
+ spin_lock(&dcache_lock);
+ tmp = root;
+ error = path_with_deleted(path, &tmp, &res, &buflen);
+ if (!error && !path_equal(&tmp, &root))
+ error = prepend_unreachable(&res, &buflen);
+ spin_unlock(&dcache_lock);
+ path_put(&root);
+ if (error)
+ res = ERR_PTR(error);
+
+ return res;
+}
+
/*
* Helper function for dentry_operations.d_dname() members
*/
@@ -2047,16 +2126,12 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
/*
* Write full pathname from the root of the filesystem into the buffer.
*/
-char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
{
char *end = buf + buflen;
char *retval;
- spin_lock(&dcache_lock);
prepend(&end, &buflen, "\0", 1);
- if (d_unlinked(dentry) &&
- (prepend(&end, &buflen, "//deleted", 9) != 0))
- goto Elong;
if (buflen < 1)
goto Elong;
/* Get '/' right */
@@ -2074,7 +2149,28 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
retval = end;
dentry = parent;
}
+ return retval;
+Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+}
+EXPORT_SYMBOL(__dentry_path);
+
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+ char *p = NULL;
+ char *retval;
+
+ spin_lock(&dcache_lock);
+ if (d_unlinked(dentry)) {
+ p = buf + buflen;
+ if (prepend(&p, &buflen, "//deleted", 10) != 0)
+ goto Elong;
+ buflen++;
+ }
+ retval = __dentry_path(dentry, buf, buflen);
spin_unlock(&dcache_lock);
+ if (!IS_ERR(retval) && p)
+ *p = '/'; /* restore '/' overriden with '\0' */
return retval;
Elong:
spin_unlock(&dcache_lock);
@@ -2108,27 +2204,30 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
if (!page)
return -ENOMEM;
- read_lock(&current->fs->lock);
- pwd = current->fs->pwd;
- path_get(&pwd);
- root = current->fs->root;
- path_get(&root);
- read_unlock(&current->fs->lock);
+ get_fs_root_and_pwd(current->fs, &root, &pwd);
error = -ENOENT;
spin_lock(&dcache_lock);
if (!d_unlinked(pwd.dentry)) {
unsigned long len;
struct path tmp = root;
- char * cwd;
+ char *cwd = page + PAGE_SIZE;
+ int buflen = PAGE_SIZE;
- cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE);
+ prepend(&cwd, &buflen, "\0", 1);
+ error = prepend_path(&pwd, &tmp, &cwd, &buflen);
spin_unlock(&dcache_lock);
- error = PTR_ERR(cwd);
- if (IS_ERR(cwd))
+ if (error)
goto out;
+ /* Unreachable from current root */
+ if (!path_equal(&tmp, &root)) {
+ error = prepend_unreachable(&cwd, &buflen);
+ if (error)
+ goto out;
+ }
+
error = -ERANGE;
len = PAGE_SIZE + page - cwd;
if (len <= size) {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aacf531..51f270b479b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
{
ssize_t transferred = 0;
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
transferred = dio->i_size - offset;
}
- if (dio->end_io && dio->result)
- dio->end_io(dio->iocb, offset, transferred,
- dio->map_bh.b_private);
-
- if (dio->flags & DIO_LOCKING)
- /* lockdep: non-owner release */
- up_read_non_owner(&dio->inode->i_alloc_sem);
-
if (ret == 0)
ret = dio->page_errors;
if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
if (ret == 0)
ret = transferred;
+ if (dio->end_io && dio->result) {
+ dio->end_io(dio->iocb, offset, transferred,
+ dio->map_bh.b_private, ret, is_async);
+ } else if (is_async) {
+ aio_complete(dio->iocb, ret, 0);
+ }
+
+ if (dio->flags & DIO_LOCKING)
+ /* lockdep: non-owner release */
+ up_read_non_owner(&dio->inode->i_alloc_sem);
+
return ret;
}
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
- aio_complete(dio->iocb, ret, 0);
+ dio_complete(dio, dio->iocb->ki_pos, 0, true);
kfree(dio);
}
}
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (ret2 == 0) {
- ret = dio_complete(dio, offset, ret);
+ ret = dio_complete(dio, offset, ret, false);
kfree(dio);
} else
BUG_ON(ret != -EIOCBQUEUED);
@@ -1134,8 +1136,27 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
return ret;
}
+/*
+ * This is a library function for use by filesystem drivers.
+ *
+ * The locking rules are governed by the flags parameter:
+ * - if the flags value contains DIO_LOCKING we use a fancy locking
+ * scheme for dumb filesystems.
+ * For writes this function is called under i_mutex and returns with
+ * i_mutex held, for reads, i_mutex is not held on entry, but it is
+ * taken and dropped again before returning.
+ * For reads and writes i_alloc_sem is taken in shared mode and released
+ * on I/O completion (which may happen asynchronously after returning to
+ * the caller).
+ *
+ * - if the flags value does NOT contain DIO_LOCKING we don't use any
+ * internal locking but rather rely on the filesystem to synchronize
+ * direct I/O reads/writes versus each other and truncate.
+ * For reads and writes both i_mutex and i_alloc_sem are not held on
+ * entry and are never taken.
+ */
ssize_t
-__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
dio_submit_t submit_io, int flags)
@@ -1231,57 +1252,4 @@ __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
out:
return retval;
}
-EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
-
-/*
- * This is a library function for use by filesystem drivers.
- *
- * The locking rules are governed by the flags parameter:
- * - if the flags value contains DIO_LOCKING we use a fancy locking
- * scheme for dumb filesystems.
- * For writes this function is called under i_mutex and returns with
- * i_mutex held, for reads, i_mutex is not held on entry, but it is
- * taken and dropped again before returning.
- * For reads and writes i_alloc_sem is taken in shared mode and released
- * on I/O completion (which may happen asynchronously after returning to
- * the caller).
- *
- * - if the flags value does NOT contain DIO_LOCKING we don't use any
- * internal locking but rather rely on the filesystem to synchronize
- * direct I/O reads/writes versus each other and truncate.
- * For reads and writes both i_mutex and i_alloc_sem are not held on
- * entry and are never taken.
- */
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
- struct block_device *bdev, const struct iovec *iov, loff_t offset,
- unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
- dio_submit_t submit_io, int flags)
-{
- ssize_t retval;
-
- retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
- offset, nr_segs, get_block, end_io, submit_io, flags);
- /*
- * In case of error extending write may have instantiated a few
- * blocks outside i_size. Trim these off again for DIO_LOCKING.
- * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
- * their own manner. This is a further example of where the old
- * truncate sequence is inadequate.
- *
- * NOTE: filesystems with their own locking have to handle this
- * on their own.
- */
- if (flags & DIO_LOCKING) {
- if (unlikely((rw & WRITE) && retval < 0)) {
- loff_t isize = i_size_read(inode);
- loff_t end = offset + iov_length(iov, nr_segs);
-
- if (end > isize)
- vmtruncate(inode, isize);
- }
- }
-
- return retval;
-}
EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index c0d35c62052..37a34c2c622 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -248,7 +248,7 @@ static struct connection *assoc2con(int assoc_id)
for (i = 0 ; i < CONN_HASH_SIZE; i++) {
hlist_for_each_entry(con, h, &connection_hash[i], list) {
- if (con && con->sctp_assoc == assoc_id) {
+ if (con->sctp_assoc == assoc_id) {
mutex_unlock(&connections_lock);
return con;
}
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 2c6ad518100..ef17e0169da 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -81,24 +81,11 @@ static struct genl_ops dlm_nl_ops = {
int __init dlm_netlink_init(void)
{
- int rv;
-
- rv = genl_register_family(&family);
- if (rv)
- return rv;
-
- rv = genl_register_ops(&family, &dlm_nl_ops);
- if (rv < 0)
- goto err;
- return 0;
- err:
- genl_unregister_family(&family);
- return rv;
+ return genl_register_family_with_ops(&family, &dlm_nl_ops, 1);
}
void dlm_netlink_exit(void)
{
- genl_unregister_ops(&family, &dlm_nl_ops);
genl_unregister_family(&family);
}
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 83c4f600786..2195c213ab2 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_lock(&inode_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
continue;
if (inode->i_mapping->nrpages == 0)
continue;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 1cc087635a5..a2e3b562e65 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -762,7 +762,7 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
/**
* ecryptfs_init_crypt_ctx
- * @crypt_stat: Uninitilized crypt stats structure
+ * @crypt_stat: Uninitialized crypt stats structure
*
* Initialize the crypto context.
*
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index e8fcf4e2ed7..622c9514080 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -199,7 +199,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
"the persistent file for the dentry with name "
"[%s]; rc = [%d]\n", __func__,
ecryptfs_dentry->d_name.name, rc);
- goto out;
+ goto out_free;
}
}
if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
@@ -207,7 +207,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
rc = -EPERM;
printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
"file must hence be opened RO\n", __func__);
- goto out;
+ goto out_free;
}
ecryptfs_set_file_lower(
file, ecryptfs_inode_to_private(inode)->lower_file);
@@ -292,12 +292,40 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
return rc;
}
-static int ecryptfs_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg);
+static long
+ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct file *lower_file = NULL;
+ long rc = -ENOTTY;
+
+ if (ecryptfs_file_to_private(file))
+ lower_file = ecryptfs_file_to_lower(file);
+ if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
+ rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
+ return rc;
+}
+
+#ifdef CONFIG_COMPAT
+static long
+ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct file *lower_file = NULL;
+ long rc = -ENOIOCTLCMD;
+
+ if (ecryptfs_file_to_private(file))
+ lower_file = ecryptfs_file_to_lower(file);
+ if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
+ rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
+ return rc;
+}
+#endif
const struct file_operations ecryptfs_dir_fops = {
.readdir = ecryptfs_readdir,
- .ioctl = ecryptfs_ioctl,
+ .unlocked_ioctl = ecryptfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ecryptfs_compat_ioctl,
+#endif
.open = ecryptfs_open,
.flush = ecryptfs_flush,
.release = ecryptfs_release,
@@ -313,7 +341,10 @@ const struct file_operations ecryptfs_main_fops = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.readdir = ecryptfs_readdir,
- .ioctl = ecryptfs_ioctl,
+ .unlocked_ioctl = ecryptfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ecryptfs_compat_ioctl,
+#endif
.mmap = generic_file_mmap,
.open = ecryptfs_open,
.flush = ecryptfs_flush,
@@ -322,20 +353,3 @@ const struct file_operations ecryptfs_main_fops = {
.fasync = ecryptfs_fasync,
.splice_read = generic_file_splice_read,
};
-
-static int
-ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- int rc = 0;
- struct file *lower_file = NULL;
-
- if (ecryptfs_file_to_private(file))
- lower_file = ecryptfs_file_to_lower(file);
- if (lower_file && lower_file->f_op && lower_file->f_op->ioctl)
- rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode),
- lower_file, cmd, arg);
- else
- rc = -ENOTTY;
- return rc;
-}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 31ef5252f0f..6c55113e722 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -264,7 +264,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
printk(KERN_ERR "%s: Out of memory whilst attempting "
"to allocate ecryptfs_dentry_info struct\n",
__func__);
- goto out_dput;
+ goto out_put;
}
ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry);
ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt);
@@ -339,14 +339,85 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
out_free_kmem:
kmem_cache_free(ecryptfs_header_cache_2, page_virt);
goto out;
-out_dput:
+out_put:
dput(lower_dentry);
+ mntput(lower_mnt);
d_drop(ecryptfs_dentry);
out:
return rc;
}
/**
+ * ecryptfs_new_lower_dentry
+ * @ename: The name of the new dentry.
+ * @lower_dir_dentry: Parent directory of the new dentry.
+ * @nd: nameidata from last lookup.
+ *
+ * Create a new dentry or get it from lower parent dir.
+ */
+static struct dentry *
+ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
+ struct nameidata *nd)
+{
+ struct dentry *new_dentry;
+ struct dentry *tmp;
+ struct inode *lower_dir_inode;
+
+ lower_dir_inode = lower_dir_dentry->d_inode;
+
+ tmp = d_alloc(lower_dir_dentry, name);
+ if (!tmp)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&lower_dir_inode->i_mutex);
+ new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
+ mutex_unlock(&lower_dir_inode->i_mutex);
+
+ if (!new_dentry)
+ new_dentry = tmp;
+ else
+ dput(tmp);
+
+ return new_dentry;
+}
+
+
+/**
+ * ecryptfs_lookup_one_lower
+ * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
+ * @lower_dir_dentry: lower parent directory
+ *
+ * Get the lower dentry from vfs. If lower dentry does not exist yet,
+ * create it.
+ */
+static struct dentry *
+ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
+ struct dentry *lower_dir_dentry)
+{
+ struct nameidata nd;
+ struct vfsmount *lower_mnt;
+ struct qstr *name;
+ int err;
+
+ name = &ecryptfs_dentry->d_name;
+ lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
+ ecryptfs_dentry->d_parent));
+ err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
+ mntput(lower_mnt);
+
+ if (!err) {
+ /* we dont need the mount */
+ mntput(nd.path.mnt);
+ return nd.path.dentry;
+ }
+ if (err != -ENOENT)
+ return ERR_PTR(err);
+
+ /* create a new lower dentry */
+ return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
+}
+
+/**
* ecryptfs_lookup
* @ecryptfs_dir_inode: The eCryptfs directory inode
* @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -373,14 +444,12 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
goto out_d_drop;
}
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
- mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
- lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
- lower_dir_dentry,
- ecryptfs_dentry->d_name.len);
- mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+
+ lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
+ lower_dir_dentry);
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
- ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+ ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
"[%d] on lower_dentry = [%s]\n", __func__, rc,
encrypted_and_encoded_name);
goto out_d_drop;
@@ -402,14 +471,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
"filename; rc = [%d]\n", __func__, rc);
goto out_d_drop;
}
- mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
- lower_dentry = lookup_one_len(encrypted_and_encoded_name,
- lower_dir_dentry,
- encrypted_and_encoded_name_size - 1);
- mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+ lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
+ lower_dir_dentry);
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
- ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+ ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
"[%d] on lower_dentry = [%s]\n", __func__, rc,
encrypted_and_encoded_name);
goto out_d_drop;
@@ -804,10 +870,20 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
size_t num_zeros = (PAGE_CACHE_SIZE
- (ia->ia_size & ~PAGE_CACHE_MASK));
+
+ /*
+ * XXX(truncate) this should really happen at the begginning
+ * of ->setattr. But the code is too messy to that as part
+ * of a larger patch. ecryptfs is also totally missing out
+ * on the inode_change_ok check at the beginning of
+ * ->setattr while would include this.
+ */
+ rc = inode_newsize_ok(inode, ia->ia_size);
+ if (rc)
+ goto out;
+
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
- rc = simple_setsize(inode, ia->ia_size);
- if (rc)
- goto out;
+ truncate_setsize(inode, ia->ia_size);
lower_ia->ia_size = ia->ia_size;
lower_ia->ia_valid |= ATTR_SIZE;
goto out;
@@ -830,7 +906,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
goto out;
}
}
- simple_setsize(inode, ia->ia_size);
+ truncate_setsize(inode, ia->ia_size);
rc = ecryptfs_write_inode_size_to_metadata(inode);
if (rc) {
printk(KERN_ERR "Problem with "
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 2d8dbce9d48..bcb68c0cb1f 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -31,9 +31,9 @@ static struct mutex ecryptfs_msg_ctx_lists_mux;
static struct hlist_head *ecryptfs_daemon_hash;
struct mutex ecryptfs_daemon_hash_mux;
-static int ecryptfs_hash_buckets;
+static int ecryptfs_hash_bits;
#define ecryptfs_uid_hash(uid) \
- hash_long((unsigned long)uid, ecryptfs_hash_buckets)
+ hash_long((unsigned long)uid, ecryptfs_hash_bits)
static u32 ecryptfs_msg_counter;
static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
@@ -274,7 +274,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
struct user_namespace *user_ns, struct pid *pid,
u32 seq)
{
- struct ecryptfs_daemon *daemon;
+ struct ecryptfs_daemon *uninitialized_var(daemon);
struct ecryptfs_msg_ctx *msg_ctx;
size_t msg_size;
struct nsproxy *nsproxy;
@@ -486,18 +486,19 @@ int ecryptfs_init_messaging(void)
}
mutex_init(&ecryptfs_daemon_hash_mux);
mutex_lock(&ecryptfs_daemon_hash_mux);
- ecryptfs_hash_buckets = 1;
- while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
- ecryptfs_hash_buckets++;
+ ecryptfs_hash_bits = 1;
+ while (ecryptfs_number_of_users >> ecryptfs_hash_bits)
+ ecryptfs_hash_bits++;
ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
- * ecryptfs_hash_buckets), GFP_KERNEL);
+ * (1 << ecryptfs_hash_bits)),
+ GFP_KERNEL);
if (!ecryptfs_daemon_hash) {
rc = -ENOMEM;
printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
mutex_unlock(&ecryptfs_daemon_hash_mux);
goto out;
}
- for (i = 0; i < ecryptfs_hash_buckets; i++)
+ for (i = 0; i < (1 << ecryptfs_hash_bits); i++)
INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
mutex_unlock(&ecryptfs_daemon_hash_mux);
ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
@@ -554,7 +555,7 @@ void ecryptfs_release_messaging(void)
int i;
mutex_lock(&ecryptfs_daemon_hash_mux);
- for (i = 0; i < ecryptfs_hash_buckets; i++) {
+ for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
int rc;
hlist_for_each_entry(daemon, elem,
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 0435886e4a9..f7fc286a3aa 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -118,11 +118,15 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
*/
static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
- return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf);
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+
+ if (!lower_dentry->d_sb->s_op->statfs)
+ return -ENOSYS;
+ return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
}
/**
- * ecryptfs_clear_inode
+ * ecryptfs_evict_inode
* @inode - The ecryptfs inode
*
* Called by iput() when the inode reference count reached zero
@@ -131,8 +135,10 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
* on the inode free list. We use this to drop out reference to the
* lower inode.
*/
-static void ecryptfs_clear_inode(struct inode *inode)
+static void ecryptfs_evict_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+ end_writeback(inode);
iput(ecryptfs_inode_to_lower(inode));
}
@@ -184,6 +190,6 @@ const struct super_operations ecryptfs_sops = {
.drop_inode = generic_delete_inode,
.statfs = ecryptfs_statfs,
.remount_fs = NULL,
- .clear_inode = ecryptfs_clear_inode,
+ .evict_inode = ecryptfs_evict_inode,
.show_options = ecryptfs_show_options
};
diff --git a/fs/exec.c b/fs/exec.c
index e19de6a8033..7761837e450 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -28,7 +28,6 @@
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
-#include <linux/smp_lock.h>
#include <linux/swap.h>
#include <linux/string.h>
#include <linux/init.h>
@@ -129,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
goto exit;
- fsnotify_open(file->f_path.dentry);
+ fsnotify_open(file);
error = -ENOEXEC;
if(file->f_op) {
@@ -653,6 +652,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
else
stack_base = vma->vm_start - stack_expand;
#endif
+ current->mm->start_stack = bprm->p;
ret = expand_stack(vma, stack_base);
if (ret)
ret = -EFAULT;
@@ -683,7 +683,7 @@ struct file *open_exec(const char *name)
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
goto exit;
- fsnotify_open(file->f_path.dentry);
+ fsnotify_open(file);
err = deny_write_access(file);
if (err)
@@ -1891,13 +1891,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
*/
clear_thread_flag(TIF_SIGPENDING);
- /*
- * lock_kernel() because format_corename() is controlled by sysctl, which
- * uses lock_kernel()
- */
- lock_kernel();
ispipe = format_corename(corename, signr);
- unlock_kernel();
if (ispipe) {
int dump_count;
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 22721b2fd89..2dc925fa101 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -256,7 +256,6 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
}
/* inode.c */
-void exofs_truncate(struct inode *inode);
int exofs_setattr(struct dentry *, struct iattr *);
int exofs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -264,7 +263,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
extern struct inode *exofs_iget(struct super_block *, unsigned long);
struct inode *exofs_new_inode(struct inode *, int);
extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
-extern void exofs_delete_inode(struct inode *);
+extern void exofs_evict_inode(struct inode *);
/* dir.c: */
int exofs_add_link(struct dentry *, struct inode *);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index fef6899be39..68cb23e3bb9 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -30,9 +30,6 @@
* along with exofs; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-
-#include <linux/buffer_head.h>
-
#include "exofs.h"
static int exofs_release_file(struct inode *inode, struct file *filp)
@@ -40,19 +37,27 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
return 0;
}
+/* exofs_file_fsync - flush the inode to disk
+ *
+ * Note, in exofs all metadata is written as part of inode, regardless.
+ * The writeout is synchronous
+ */
static int exofs_file_fsync(struct file *filp, int datasync)
{
int ret;
- struct address_space *mapping = filp->f_mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = filp->f_mapping->host;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0, /* metadata-only; caller takes care of data */
+ };
struct super_block *sb;
- ret = filemap_write_and_wait(mapping);
- if (ret)
- return ret;
+ if (!(inode->i_state & I_DIRTY))
+ return 0;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return 0;
- /* sync the inode attributes */
- ret = write_inode_now(inode, 1);
+ ret = sync_inode(inode, &wbc);
/* This is a good place to write the sb */
/* TODO: Sechedule an sb-sync on create */
@@ -65,9 +70,9 @@ static int exofs_file_fsync(struct file *filp, int datasync)
static int exofs_flush(struct file *file, fl_owner_t id)
{
- exofs_file_fsync(file, 1);
+ int ret = vfs_fsync(file, 0);
/* TODO: Flush the OSD target */
- return 0;
+ return ret;
}
const struct file_operations exofs_file_operations = {
@@ -86,6 +91,5 @@ const struct file_operations exofs_file_operations = {
};
const struct inode_operations exofs_file_inode_operations = {
- .truncate = exofs_truncate,
.setattr = exofs_setattr,
};
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 4bb6ef822e4..eb7368ebd8c 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -32,9 +32,6 @@
*/
#include <linux/slab.h>
-#include <linux/writeback.h>
-#include <linux/buffer_head.h>
-#include <scsi/scsi_device.h>
#include "exofs.h"
@@ -697,6 +694,13 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
return write_exec(&pcol);
}
+/* i_mutex held using inode->i_size directly */
+static void _write_failed(struct inode *inode, loff_t to)
+{
+ if (to > inode->i_size)
+ truncate_pagecache(inode, to, inode->i_size);
+}
+
int exofs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -710,7 +714,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
fsdata);
if (ret) {
EXOFS_DBGMSG("simple_write_begin faild\n");
- return ret;
+ goto out;
}
page = *pagep;
@@ -725,6 +729,9 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
EXOFS_DBGMSG("__readpage_filler faild\n");
}
}
+out:
+ if (unlikely(ret))
+ _write_failed(mapping->host, pos + len);
return ret;
}
@@ -750,6 +757,10 @@ static int exofs_write_end(struct file *file, struct address_space *mapping,
int ret;
ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
+ if (unlikely(ret))
+ _write_failed(inode, pos + len);
+
+ /* TODO: once simple_write_end marks inode dirty remove */
if (i_size != inode->i_size)
mark_inode_dirty(inode);
return ret;
@@ -759,15 +770,13 @@ static int exofs_releasepage(struct page *page, gfp_t gfp)
{
EXOFS_DBGMSG("page 0x%lx\n", page->index);
WARN_ON(1);
- return try_to_free_buffers(page);
+ return 0;
}
static void exofs_invalidatepage(struct page *page, unsigned long offset)
{
- EXOFS_DBGMSG("page_has_buffers=>%d\n", page_has_buffers(page));
+ EXOFS_DBGMSG("page 0x%lx offset 0x%lx\n", page->index, offset);
WARN_ON(1);
-
- block_invalidatepage(page, offset);
}
const struct address_space_operations exofs_aops = {
@@ -808,87 +817,55 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
}
-/*
- * get_block_t - Fill in a buffer_head
- * An OSD takes care of block allocation so we just fake an allocation by
- * putting in the inode's sector_t in the buffer_head.
- * TODO: What about the case of create==0 and @iblock does not exist in the
- * object?
- */
-static int exofs_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- map_bh(bh_result, inode->i_sb, iblock);
- return 0;
-}
-
const struct osd_attr g_attr_logical_length = ATTR_DEF(
OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
-static int _do_truncate(struct inode *inode)
+static int _do_truncate(struct inode *inode, loff_t newsize)
{
struct exofs_i_info *oi = exofs_i(inode);
- loff_t isize = i_size_read(inode);
int ret;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
+ ret = exofs_oi_truncate(oi, (u64)newsize);
+ if (likely(!ret))
+ truncate_setsize(inode, newsize);
- ret = exofs_oi_truncate(oi, (u64)isize);
- EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
+ EXOFS_DBGMSG("(0x%lx) size=0x%llx ret=>%d\n",
+ inode->i_ino, newsize, ret);
return ret;
}
/*
- * Truncate a file to the specified size - all we have to do is set the size
- * attribute. We make sure the object exists first.
- */
-void exofs_truncate(struct inode *inode)
-{
- struct exofs_i_info *oi = exofs_i(inode);
- int ret;
-
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
- || S_ISLNK(inode->i_mode)))
- return;
- if (exofs_inode_is_fast_symlink(inode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
-
- /* if we are about to truncate an object, and it hasn't been
- * created yet, wait
- */
- if (unlikely(wait_obj_created(oi)))
- goto fail;
-
- ret = _do_truncate(inode);
- if (ret)
- goto fail;
-
-out:
- mark_inode_dirty(inode);
- return;
-fail:
- make_bad_inode(inode);
- goto out;
-}
-
-/*
- * Set inode attributes - just call generic functions.
+ * Set inode attributes - update size attribute on OSD if needed,
+ * otherwise just call generic functions.
*/
int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = dentry->d_inode;
int error;
+ /* if we are about to modify an object, and it hasn't been
+ * created yet, wait
+ */
+ error = wait_obj_created(exofs_i(inode));
+ if (unlikely(error))
+ return error;
+
error = inode_change_ok(inode, iattr);
- if (error)
+ if (unlikely(error))
return error;
- error = inode_setattr(inode, iattr);
- return error;
+ if ((iattr->ia_valid & ATTR_SIZE) &&
+ iattr->ia_size != i_size_read(inode)) {
+ error = _do_truncate(inode, iattr->ia_size);
+ if (unlikely(error))
+ return error;
+ }
+
+ setattr_copy(inode, iattr);
+ mark_inode_dirty(inode);
+ return 0;
}
static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
@@ -1325,7 +1302,7 @@ static void delete_done(struct exofs_io_state *ios, void *p)
* from the OSD here. We make sure the object was created before we try and
* delete it.
*/
-void exofs_delete_inode(struct inode *inode)
+void exofs_evict_inode(struct inode *inode)
{
struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb;
@@ -1335,30 +1312,27 @@ void exofs_delete_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
- if (is_bad_inode(inode))
+ /* TODO: should do better here */
+ if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
- mark_inode_dirty(inode);
- exofs_update_inode(inode, inode_needs_sync(inode));
-
inode->i_size = 0;
- if (inode->i_blocks)
- exofs_truncate(inode);
+ end_writeback(inode);
- clear_inode(inode);
+ /* if we are deleting an obj that hasn't been created yet, wait */
+ if (!obj_created(oi)) {
+ BUG_ON(!obj_2bcreated(oi));
+ wait_event(oi->i_wq, obj_created(oi));
+ /* ignore the error attempt a remove anyway */
+ }
+ /* Now Remove the OSD objects */
ret = exofs_get_io_state(&sbi->layout, &ios);
if (unlikely(ret)) {
EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
return;
}
- /* if we are deleting an obj that hasn't been created yet, wait */
- if (!obj_created(oi)) {
- BUG_ON(!obj_2bcreated(oi));
- wait_event(oi->i_wq, obj_created(oi));
- }
-
ios->obj.id = exofs_oi_objno(oi);
ios->done = delete_done;
ios->private = sbi;
@@ -1374,5 +1348,5 @@ void exofs_delete_inode(struct inode *inode)
return;
no_delete:
- clear_inode(inode);
+ end_writeback(inode);
}
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 4337cad7777..6550bf70e41 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -305,8 +305,6 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
struct _striping_info {
u64 obj_offset;
u64 group_length;
- u64 total_group_length;
- u64 Major;
unsigned dev;
unsigned unit_off;
};
@@ -343,8 +341,6 @@ static void _calc_stripe_info(struct exofs_io_st