authorDave Chinner <dchinner@redhat.com>2010-11-30 15:16:02 +1100
committerAlex Elder <aelder@sgi.com>2010-12-01 07:40:20 -0600
commit309c848002052edbec650075a1eb098b17c17f35 (patch)
tree7e3e38c9ebcfa539716298c0f8a0000b45cffd8e /fs
parent90810b9e82a36c3c57c1aeb8b2918b242a130b26 (diff)
xfs: delayed alloc blocks beyond EOF are valid after writeback
There is an assumption in the parts of XFS that flushing a dirty file will make all the delayed allocation blocks disappear from an inode. That is, that after calling xfs_flush_pages() then ip->i_delayed_blks will be zero. This is an invalid assumption as we may have specualtive preallocation beyond EOF and they are recorded in ip->i_delayed_blks. A flush of the dirty pages of an inode will not change the state of these blocks beyond EOF, so a non-zero deeelalloc block count after a flush is valid. The bmap code has an invalid ASSERT() that needs to be removed, and the swapext code has a bug in that while it swaps the data forks around, it fails to swap the i_delayed_blks counter associated with the fork and hence can get the block accounting wrong. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs')
2 files changed, 20 insertions, 2 deletions
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 08b179fa9e8..4111cd3966c 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5471,8 +5471,13 @@ xfs_getbmap(
if (error)
goto out_unlock_iolock;
- ASSERT(ip->i_delayed_blks == 0);
+ /*
+ * even after flushing the inode, there can still be delalloc
+ * blocks on the inode beyond EOF due to speculative
+ * preallocation. These are not removed until the release
+ * function is called or the inode is inactivated. Hence we
+ * cannot assert here that ip->i_delayed_blks == 0.
+ */
lock = xfs_ilock_map_shared(ip);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3b9582c60a2..e60490bc00a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -377,6 +377,19 @@ xfs_swap_extents(
ip->i_d.di_format = tip->i_d.di_format;
tip->i_d.di_format = tmp;
+ /*
+ * The extents in the source inode could still contain speculative
+ * preallocation beyond EOF (e.g. the file is open but not modified
+ * while defrag is in progress). In that case, we need to copy over the
+ * number of delalloc blocks the data fork in the source inode is
+ * tracking beyond EOF so that when the fork is truncated away when the
+ * temporary inode is unlinked we don't underrun the i_delayed_blks
+ * counter on that inode.
+ */
+ ASSERT(tip->i_delayed_blks == 0);
+ tip->i_delayed_blks = ip->i_delayed_blks;
+ ip->i_delayed_blks = 0;
ilf_fields = XFS_ILOG_CORE;
switch(ip->i_d.di_format) {