]> git.neil.brown.name Git - LaFS.git/commitdiff
Remove need to hold i_mutex when flushing data into inode
authorNeilBrown <neilb@suse.de>
Thu, 19 Mar 2009 01:47:37 +0000 (12:47 +1100)
committerNeilBrown <neilb@suse.de>
Thu, 19 Mar 2009 01:47:37 +0000 (12:47 +1100)
Holding i_mutex while flush data into the inode is a problem because
it can triggered deadlocks if something hold i_mutex and waits for
checkpoint to finish - checkpoint might do the flush.

So check the i_size after clearing B_Dirty.  If it changes after this,
we will just do another write-out and fix things up.

cluster.c

index b358a43822f168713ec2bd2f30178946e76738b2..3235ba2e924f91458d71d99b80536a91369807b3 100644 (file)
--- a/cluster.c
+++ b/cluster.c
@@ -422,7 +422,7 @@ static int flush_data_to_inode(struct block *b)
         */
 
        char *ibuf, *dbuf;
-       loff_t size = b->inode->i_size;
+       loff_t size = i_size_read(b->inode);
        struct super_block *sb = b->inode->i_sb;
        struct lafs_inode *lai = LAFSI(b->inode);
        struct datablock *db;
@@ -443,19 +443,16 @@ static int flush_data_to_inode(struct block *b)
                    lafs_leaf_next(lai->iblock, 1) != 0xffffffff)
                        return 0;
        }
-       lai->depth = 0;
-       lai->iblock->depth = 0;
-
-       /* safe to reference ->dblock as b is a dirty child */
-       db = getdref(lai->dblock, MKREF(flush2db));
-       ibuf = map_dblock(db);
-       dbuf = map_dblock_2(dblk(b));
-       if (test_bit(B_Phase1, &b->flags))
-               set_bit(B_WritePhase1, &b->flags);
-       else
-               clear_bit(B_WritePhase1, &b->flags);
-
        if (test_and_clear_bit(B_Dirty, &b->flags)) {
+               /* Check size again, just in case it changed */
+               size = i_size_read(b->inode);
+               if (size + lai->metadata_size > sb->s_blocksize) {
+                       /* Lost a race, better give up restoring Dirty big */
+                       if (test_and_set_bit(B_Dirty, &b->flags))
+                               if (test_and_set_bit(B_Credit, &b->flags))
+                                       lafs_space_return(fs, 1);
+                       return 0;
+               }
                if (test_and_set_bit(B_Credit, &lai->iblock->b.flags))
                        lafs_space_return(fs, 1);
                lafs_dirty_iblock(lai->iblock);
@@ -475,6 +472,18 @@ static int flush_data_to_inode(struct block *b)
                printk("Wasn't dirty or realloc: %s\n", strblk(b));
                BUG();
        }
+       lai->depth = 0;
+       lai->iblock->depth = 0;
+
+       /* safe to reference ->dblock as b is a dirty child */
+       db = getdref(lai->dblock, MKREF(flush2db));
+       ibuf = map_dblock(db);
+       dbuf = map_dblock_2(dblk(b));
+       if (test_bit(B_Phase1, &b->flags))
+               set_bit(B_WritePhase1, &b->flags);
+       else
+               clear_bit(B_WritePhase1, &b->flags);
+
        if (!test_and_clear_bit(B_UnincCredit, &b->flags))
                BUG();
        /* 1 from Dirty or Realloc.  1 from UninCredit */
@@ -529,30 +538,22 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
            b->parent == lai->iblock &&                 /* No indexing */
            lai->type >= TypeBase &&                    /* 'size' is meaningful */
            size + lai->metadata_size <= sb->s_blocksize) {
-
-               mutex_lock(&b->inode->i_mutex);
-               /* need to recheck size after getting the lock */
-               size = i_size_read(b->inode);
-               if (size + lai->metadata_size <= sb->s_blocksize) {
-                       int success = flush_data_to_inode(b);
-                       mutex_unlock(&b->inode->i_mutex);
-                       if (success) {
-                               lafs_iounlock_block(b, B_IOLock);
-                               return 0;
-                       }
-                       /* There are conflicting blocks in the index tree */
-                       if (test_and_clear_bit(B_Realloc, &b->flags))
-                               lafs_space_return(fs, 1);
-                       if (!test_bit(B_Dirty, &b->flags) ||
-                           !test_bit(B_Pinned, &b->flags)) {
-                               /* It is safe to just leave this for later */
-                               lafs_iounlock_block(b, B_IOLock);
-                               return 0;
-                       }
-                       /* We really need to write this, so use a full block. */
-                       WARN_ON(1);
-               } else
-                       mutex_unlock(&b->inode->i_mutex);
+               int success = flush_data_to_inode(b);
+               if (success) {
+                       lafs_iounlock_block(b, B_IOLock);
+                       return 0;
+               }
+               /* There are conflicting blocks in the index tree */
+               if (test_and_clear_bit(B_Realloc, &b->flags))
+                       lafs_space_return(fs, 1);
+               if (!test_bit(B_Dirty, &b->flags) ||
+                   !test_bit(B_Pinned, &b->flags)) {
+                       /* It is safe to just leave this for later */
+                       lafs_iounlock_block(b, B_IOLock);
+                       return 0;
+               }
+               /* We really need to write this, so use a full block. */
+               WARN_ON(1);
        }
 
        /* The symbiosis between the datablock and the indexblock for an