]> git.neil.brown.name Git - LaFS.git/commitdiff
Simplify writeout rules for inode data block.
authorNeilBrown <neilb@suse.de>
Sat, 15 Aug 2009 07:09:22 +0000 (17:09 +1000)
committerNeilBrown <neilb@suse.de>
Sat, 15 Aug 2009 07:09:22 +0000 (17:09 +1000)
Previously we did not write an inode data block until the
InoIdx was ready.
This is not good if we need to sync an inode well before checkpoint
runs.
So just write an inde data block when we find it, but ensure not to
send an inode data block during checkpoint until the InoIdx block is
ready.

Note: it is now clear why an inode has two sets of credits, one on the
data block and one on the index block.  The first set may be needed to
sync the inode metadata.  The second may be need to update the
indexing information - they are copied across to the data block for
this purpose.

checkpoint.c
cluster.c
index.c

index 686d2cce21d5f161417a72de3230ab0bf10dd2c4..e0d5e93cb31483efdb355a69919560c181533e76 100644 (file)
@@ -294,6 +294,8 @@ static int prepare_checkpoint(struct fs *fs)
 struct block *lafs_get_flushable(struct fs *fs, int phase)
 {
        struct block *b;
+       struct inode *in;
+       struct lafs_inode *lai;
 
  retry:
        spin_lock(&fs->lock);
@@ -303,6 +305,31 @@ struct block *lafs_get_flushable(struct fs *fs, int phase)
                b = list_entry(fs->phase_leafs[phase].next, struct block, lru);
        else
                b = NULL;
+
+       /* If this is an inode data block and the InoIdx block is
+        * also flushable (or will be soon), then de-pin this block.
+        * The InoIdx will hold and eventually pin it.
+        */
+       if (b && !test_bit(B_Index, &b->flags) &&
+           LAFSI(b->inode)->type == TypeInodeFile &&
+           (in = dblk(b)->my_inode) &&
+           (lai = LAFSI(in)) &&
+           lai->iblock != NULL &&
+           test_bit(B_Pinned, &lai->iblock->b.flags) &&
+           (phase == !!test_bit(B_Phase1, &lai->iblock->b.flags) ||
+            (phase == -1 && test_bit(B_Realloc, &lai->iblock->b.flags)))
+               ) {
+               /* Don't really want this to be pinned yet. When the
+                * InoIdx block gets proceed we will get pinned again.
+                */
+               list_del_init(&b->lru);
+               clear_bit(B_Pinned, &b->flags);
+               if (!test_bit(B_Root, &b->flags))
+                       atomic_dec(&b->parent->pincnt[!!test_bit(B_Phase1, &b->flags)]);
+               spin_unlock(&fs->lock);
+               putref(b, MKREF(leaf));
+               goto retry;
+       }
        if (b)
                /* the list counted a reference.  Now we hold it */
                list_del_init(&b->lru);
@@ -349,13 +376,6 @@ static void do_checkpoint(void *data)
                                    test_bit(B_Realloc, &b->flags))) {
                                lafs_cluster_allocate(b, 0);
                                unlock = 0;
-                       } else if (test_bit(B_InoIdx, &b->flags) &&
-                                  (test_bit(B_Dirty, &LAFSI(b->inode)->dblock->b.flags) ||
-                                   test_bit(B_Realloc, &LAFSI(b->inode)->dblock->b.flags)) && 
-                                  !test_bit(B_Pinned, &LAFSI(b->inode)->dblock->b.flags)) {
-                               /* inode data block is dirty/realloc, so allocate it */
-                               lafs_cluster_allocate(b, 0);
-                               unlock = 0;
                        } else if (test_bit(B_Index, &b->flags)) {
                                if (atomic_read(&iblk(b)->pincnt[oldphase])
                                    == 0) {
index 7dd6aa807f34800bda19a200ac2faa13de4b5a50..bc80310a8ddaea10adb5a361cd8e0e5cc559fb33 100644 (file)
--- a/cluster.c
+++ b/cluster.c
@@ -511,7 +511,6 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
        struct wc *wc = &fs->wc[cnum];
        struct lafs_inode *lai;
        loff_t size;
-       struct indexblock *inob;
        int used;
 
 
@@ -553,48 +552,26 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
 
        /* The symbiosis between the datablock and the indexblock for an
         * inode needs to be dealt with here.
-        * We don't want to write the datablock until the indexblock is
-        * ready (there may be incorporation still to happen)
-        * But when it is ready, we want to write the datablock, not the
-        * indexblock (as the datablock knows where the address needs to
-        * be incorporated).
-        * So if this is the datablock we flush the inode if appropriate
-        */
-
-       /* FIXME do I need any locking here?  I don't think so
-        * but it is hard to be sure
+        * The data block will not normally be written until the InoIdx
+        * block is unpinned or phase-flipped.  However if it is, as the
+        * result of some 'sync' style operation, then we write it even
+        * though the index info might not be uptodate.  The rest of the
+        * content will be safe for roll-forward to pick up, and the rest
+        * will be written when the InoIdx block is ready.
+        *
+        * When the InoIdx block is ready, we don't want to write it as
+        * there is nowhere for it to be incorporated in to.  Rather we
+        * move the pinning and credits across to the Data block
+        * which will subsequently be written.
         */
-       if (!test_bit(B_Index, &b->flags) &&
-           lai->type == TypeInodeFile &&
-           dblk(b)->my_inode != NULL &&
-           !test_bit(B_Pinned, &b->flags) /* Once the data block is pinned,
-                                             we write it */
-               ) {
-               spin_lock(&dblk(b)->my_inode->i_data.private_lock);
-               inob = LAFSI(dblk(b)->my_inode)->iblock;
-               if (inob != NULL &&
-                   test_bit(B_Pinned, &inob->b.flags) &&
-                   test_bit(B_Phase1, &inob->b.flags) ==
-                   test_bit(B_Phase1, &b->flags)
-                       ) {
-                       /* Don't allocate yet, until index block is ready */
 
-                       spin_unlock(&dblk(b)->my_inode->i_data.private_lock);
-                       lafs_iounlock_block(b, 0);
-                       return 0;
-               }
-               spin_unlock(&dblk(b)->my_inode->i_data.private_lock);
-       }
-       /* and if this is the InoIdx block, we make sure the datablock
-        * is ready to be written.
-        */
        if (test_bit(B_InoIdx, &b->flags)) {
                struct block *b2;
                int credits = 0;
                /* FIXME should I inode_fillblock here?? */
 
-               if (!test_bit(B_Pinned, &lai->dblock->b.flags) &&
-                   test_bit(B_Pinned, &b->flags)) {
+               BUG_ON(!test_bit(B_Pinned, &b->flags));
+               if (!test_bit(B_Pinned, &lai->dblock->b.flags)) {
                        /* index block pinned, data block not,
                         * carry the pinning across
                         */
diff --git a/index.c b/index.c
index bbbf4051bd272515bcd6535fb503fdb91f0e440a..260a6f8ea4fdb79890fd349ddb992c27a68c515f 100644 (file)
--- a/index.c
+++ b/index.c
@@ -753,12 +753,8 @@ void lafs_refile(struct block *b, int dec)
                      iblk(b)->uninc_next == NULL &&
                      atomic_read(&b->refcnt) == dec+onlru &&
                      atomic_read(&iblk(b)->pincnt[0]) == 0 &&
-                     atomic_read(&iblk(b)->pincnt[1]) == 0)) &&
-                   (!test_bit(B_InoIdx, &b->flags) ||
-                    !(test_bit(B_PinPending, &LAFSI(b->inode)->dblock->b.flags)
-                      || test_bit(B_Dirty, &LAFSI(b->inode)->dblock->b.flags)
-                      || test_bit(B_Realloc, &LAFSI(b->inode)->dblock->b.flags))
-                           )) {
+                     atomic_read(&iblk(b)->pincnt[1]) == 0))
+                       ) {
                        /* Don't need to be Pinned any more */
                        fs = fs_from_inode(b->inode);
                        lafs_checkpoint_lock(fs);