]> git.neil.brown.name Git - LaFS.git/commitdiff
Change flushing of space-accounting blocks.
authorNeilBrown <neilb@suse.de>
Sat, 26 Jun 2010 01:16:10 +0000 (11:16 +1000)
committerNeilBrown <neilb@suse.de>
Sun, 27 Jun 2010 23:15:02 +0000 (09:15 +1000)
Space-accounting blocks need to be flushed very late in the
checkpoint.

We were special casing these, but in an awkward way.

Change it so that these blocks are pinned, but that a checkpoint
doesn't handle them straight away but rather performs a phase_flip
and then queues them for later handling.

This means that we get more consistent behviour of pinned data blocks
and writepage doesn't need to special-base the flushing of segment
usage blocks.

Signed-off-by: NeilBrown <neilb@suse.de>
checkpoint.c
file.c
index.c
lafs.h
quota.c
segments.c
state.h
super.c

index ef49f2bd98be1153c1cd39463c7142006867e876..79b9de74b3b546c93016aca474007634b5589aaa 100644 (file)
@@ -223,6 +223,13 @@ int lafs_print_tree(struct block *b, int depth)
                }
                j++;
        }
+       list_for_each_entry(b2, &dfs->account_leafs, lru) {
+               if (b2 == b) {
+                       printk(" Account(%d) ", j);
+                       break;
+               }
+               j++;
+       }
        list_for_each_entry(b2, &freelist.lru, lru)
                if (b2 == b) {
                        printk(" on free ");
@@ -380,7 +387,24 @@ again:
 
                LAFS_BUG(!!test_bit(B_Phase1, &b->flags) != oldphase, b);
 
-               if (test_bit(B_Index, &b->flags) &&
+               if (test_bit(B_PinPending, &b->flags) &&
+                   !test_bit(B_Index, &b->flags) &&
+                   (LAFSI(b->inode)->type == TypeSegmentMap ||
+                    LAFSI(b->inode)->type == TypeQuota)) {
+                       /* Need to delay handling of this block until
+                        * the phase change has finished, to just
+                        * before we finish the checkpoint.
+                        * Note that we don't check if they are dirty -
+                        * they might not be yet - the whole point of the
+                        * delay is that changes are still arriving.
+                        */
+                       lafs_flip_dblock(dblk(b));
+                       /* access to account_leafs is single-threaded
+                        * by the cleaner thread so no locking needed
+                        */
+                       getref(b, MKREF(accounting));
+                       list_add(&b->lru, &fs->account_leafs);
+               } else if (test_bit(B_Index, &b->flags) &&
                    (iblk(b)->uninc_table.pending_cnt ||
                     iblk(b)->uninc)) {
                        lafs_incorporate(fs, iblk(b));
@@ -433,14 +457,23 @@ again:
        lafs_clusters_done(fs);
 }
 
+static void flush_accounting(struct fs *fs)
+{
+       while (!list_empty(&fs->account_leafs)) {
+               struct block *b = list_first_entry(&fs->account_leafs,
+                                                  struct block,
+                                                  lru);
+               list_del_init(&b->lru);
+               lafs_iolock_block(b);
+               lafs_cluster_allocate(b, 0);
+               putref(b, MKREF(accounting));
+       }
+       fs->qphase = fs->phase;
+}
+
 static void finish_checkpoint(struct fs *fs, int youth)
 {
-       set_bit(CheckpointFlushing, &fs->fsstate);
-       lafs_seg_flush_all(fs);
-       lafs_quota_flush(fs);
-       clear_bit(CheckpointFlushing, &fs->fsstate);
-       if (!test_bit(FinalCheckpoint, &fs->fsstate))
-               lafs_seg_apply_all(fs);
+       flush_accounting(fs);
 
        /* if we are creating a snapshot, special handling is needed */
        if (LAFSI(fs->ss[0].root)->md.fs.usagetable > 1) {
@@ -454,6 +487,9 @@ static void finish_checkpoint(struct fs *fs, int youth)
        dprintk("FinalFlush %d\n", fs->seq);
        lafs_cluster_flush(fs, 0);
 
+       if (!test_bit(FinalCheckpoint, &fs->fsstate))
+               lafs_seg_apply_all(fs);
+
        lafs_write_state(fs);
        dprintk("State written, all done %d\n", fs->seq);
 
diff --git a/file.c b/file.c
index 83ded7a891f99f83409d873622983a7f90fed897..277c02c5cb85d988e0f827a6bbb89ea2ddb52a90 100644 (file)
--- a/file.c
+++ b/file.c
@@ -212,10 +212,6 @@ lafs_writepage(struct page *page, struct writeback_control *wbc)
                if (test_bit(B_Dirty, &b->b.flags)) {
                        if (test_bit(B_PinPending, &b->b.flags))
                                redirty = 1;
-                       else if (LAFSI(ino)->type == TypeSegmentMap &&
-                                !test_bit(CheckpointFlushing, &fs->fsstate))
-                               /* FIXME or quota ?? */
-                               redirty = 1;
                        else if (LAFSI(b->b.inode)->type == TypeInodeFile &&
                                 b->my_inode &&
                                 LAFSI(b->my_inode)->iblock)
diff --git a/index.c b/index.c
index 2870d6765f0875a7c2d192dbc75efadd5f07724d..5884e799f0d6003997cfd92d25d943514c4280be 100644 (file)
--- a/index.c
+++ b/index.c
@@ -455,7 +455,8 @@ static void flip_phase(struct block *b)
 {
        struct indexblock *p;
        int oldphase = !!test_bit(B_Phase1, &b->flags);
-       
+
+       LAFS_BUG(!test_bit(B_Pinned, &b->flags), b);
        if (oldphase)
                clear_bit(B_Phase1, &b->flags);
        else
@@ -479,20 +480,46 @@ static void flip_phase(struct block *b)
                set_bit(B_ICredit, &b->flags);
 }
 
+/* When the pinning of a block needs to be carried across a
+ * checkpoint, we need to 'flip' the phase.
+ * This only applies to blocks that can be pinned by a block that
+ * may not be written until the next phase.
+ * This includes any index block (is it may have some children in one
+ * phase and some in the next) and any space-accounting block
+ * for the same reason.
+ * So indexblocks will need to flip, and use lafs_phase_flip.
+ * TypeSegmentMap and TypeQuota also need to flip and use lafs_flip_dblock.
+ * TypeInodeFile don't need to be phase_flipped, though their InoIdx
+ * block might.  InodeFile blocks are only pinned by metadata transactions
+ * which happen inside a checkpoint lock.
+ */
+
+void lafs_flip_dblock(struct datablock *db)
+{
+       /* This is an accounting block (SegmentMap or Quota)
+        * which we need to write out after ->phase has changed
+        * in the tail of the checkpoint.
+        * We always flip the phase and reallocate from AccountSpace.
+        * If all references get dropped, it will then get unpinned
+        * before the next phase finished - we don't unpin here
+        * (unlike lafs_phase_flip for index blocks).
+        */
+       flip_phase(&db->b);
+       lafs_prealloc(&db->b, AccountSpace);
+       /* Parent might need to be on a leaflist now */
+       lafs_refile(&db->b.parent->b, 0);
+}
+
 void lafs_phase_flip(struct fs *fs, struct indexblock *ib)
 {
        /* We are performing a checkpoint, this block has been written
         * out and now needs to be flipped into the next phase.
-        * This only makes sense for an index block.  Datablocks are
-        * simply unpinned at phase change.
+        *
         * It involves.
         *  - Processing all uninc_next blocks into uninc_table.
         *  - adjusting counts on parent
         *  - moving credits from 'next' to 'this' phase.
         *  - update block counts to included phase-delayed updates.
-        *NO: This is done when 'writing' the InoIdx.
-        * For InoIdx, we transfer the pinning and Credits to the
-        *  Data block rather than release them.
         */
        int oldphase = !!test_bit(B_Phase1, &ib->b.flags);
        struct block *ulist;
diff --git a/lafs.h b/lafs.h
index 75dcfef9549a0e5aaa429cab16204b89f598cdaf..bd937a395c688fbdbb85d658c452ad871f898feb 100644 (file)
--- a/lafs.h
+++ b/lafs.h
@@ -655,6 +655,7 @@ static inline void lafs_pin_block(struct block *b)
 }
 
 int lafs_add_block_address(struct fs *fs, struct block *blk);
+void lafs_flip_dblock(struct datablock *db);
 void lafs_phase_flip(struct fs *fs, struct indexblock *ib);
 struct indexblock * __must_check
 lafs_make_iblock(struct inode *ino, int adopt, int async, REFARG);
@@ -667,7 +668,6 @@ void lafs_write_head(struct fs *fs, struct cluster_head *head, u64 virt,
 void lafs_write_block(struct fs *fs, struct block *b, int dev, struct wc *wc);
 
 /* quota.c */
-void lafs_quota_flush(struct fs *fs);
 int lafs_quota_allocate(struct fs *fs, struct inode *ino, int diff);
 
 #define __wait_event_lock(wq, condition, lock)                         \
diff --git a/quota.c b/quota.c
index 5f74f06994f06c381bff005c501af2c521f48033..478423ff601ce9801d3a237aa4436f5e295edc0d 100644 (file)
--- a/quota.c
+++ b/quota.c
@@ -5,11 +5,6 @@ void lafs_qcommit(struct fs *fs, struct inode *ino, int diff, int phase)
 {
 }
 
-void lafs_quota_flush(struct fs *fs)
-{
-       fs->qphase = fs->phase;
-}
-
 int lafs_quota_allocate(struct fs *fs, struct inode *ino, int diff)
 {
        return 0;
index d698cc80c97490bcd1fbba264887a36995ff2901..ada9ea62914ed95ba890768b9a6c1bc18920f04d 100644 (file)
@@ -393,22 +393,6 @@ void lafs_seg_move(struct fs *fs, u64 oldaddr, u64 newaddr,
        }
 }
 
-/* lafs_seg_flush_all
- * All segment usage tables should be flushed to storage.
- * This is called towards the end of performing a checkpoint, after
- * the entire phase tree has been committed.  The blocks written
- * here record the status of the finishing phase, but they themselves
- * become part of the next phase.  They can be found during roll-forward
- * as their write-clusters are still flagged as being part of a checkpoint.
- */
-void lafs_seg_flush_all(struct fs *fs)
-{
-       int d;
-       for (d = 0; d < fs->devices ; d++)
-               write_inode_now(fs->devs[d].segsum, 0);
-       for (d = 0; d < fs->devices ; d++)
-               write_inode_now(fs->devs[d].segsum, 1);
-}
 
 static void seg_apply(struct fs *fs, struct segsum *ss)
 {
diff --git a/state.h b/state.h
index ad80ff951d5dbb5accac66298f4390a6cd2331e4..fc32c293a8be2660cf9dbaabd4d2d37c857ff243 100644 (file)
--- a/state.h
+++ b/state.h
@@ -89,8 +89,7 @@ struct fs {
 #define FinalCheckpoint 3
 #define CleanerDisabled 4
 #define OrphansRunning 5
-#define CheckpointFlushing 6  /* We are writing the segusage blocks */
-#define CleanerBlocks 7        /* One or more threads is blocked waiting for the
+#define CleanerBlocks 6        /* One or more threads is blocked waiting for the
                         * cleaner to progress - cleaner.need blocks are
                         * needed.
                         */
@@ -160,6 +159,12 @@ struct fs {
                                                 * have no pinned children
                                                 * and are being cleaned
                                                 */
+       struct list_head        account_leafs;  /* list of accounting block
+                                                * that we need to write after
+                                                * the checkpoint is done.
+                                                * They are now pinned to the
+                                                * next phase.
+                                                */
 
        /* Youth management */
        int     youth_next;     /* number to assign to next segment */
@@ -328,7 +333,7 @@ struct block {
                                    * reachability-set as this block
                                    */
 
-       struct list_head        lru; /* phase_leafs, clean_leafs,
+       struct list_head        lru; /* phase_leafs, clean_leafs, account_leafs,
                                      * clhead, pending_blocks */
 
        struct list_head        peers;  /* other blocks that use the same location
diff --git a/super.c b/super.c
index 5bc8ceeedd6f5d46c30e17d5d21675154e0acc75..0819f83920aee602b89e7240ac0cf25032409a0e 100644 (file)
--- a/super.c
+++ b/super.c
@@ -527,6 +527,7 @@ lafs_load(struct options *op, int newest)
        INIT_LIST_HEAD(&fs->phase_leafs[0]);
        INIT_LIST_HEAD(&fs->phase_leafs[1]);
        INIT_LIST_HEAD(&fs->clean_leafs);
+       INIT_LIST_HEAD(&fs->account_leafs);
        atomic_set(&fs->sb_writes_pending, 0);
        init_waitqueue_head(&fs->sb_writes_wait);
        init_waitqueue_head(&fs->async_complete);