}
j++;
}
+ list_for_each_entry(b2, &dfs->account_leafs, lru) {
+ if (b2 == b) {
+ printk(" Account(%d) ", j);
+ break;
+ }
+ j++;
+ }
list_for_each_entry(b2, &freelist.lru, lru)
if (b2 == b) {
printk(" on free ");
LAFS_BUG(!!test_bit(B_Phase1, &b->flags) != oldphase, b);
- if (test_bit(B_Index, &b->flags) &&
+ if (test_bit(B_PinPending, &b->flags) &&
+ !test_bit(B_Index, &b->flags) &&
+ (LAFSI(b->inode)->type == TypeSegmentMap ||
+ LAFSI(b->inode)->type == TypeQuota)) {
+ /* Need to delay handling of this block until
+ * the phase change has finished, to just
+ * before we finish the checkpoint.
+ * Note that we don't check if they are dirty -
+ * they might not be yet - the whole point of the
+ * delay is that changes are still arriving.
+ */
+ lafs_flip_dblock(dblk(b));
+ /* access to account_leafs is single-threaded
+ * by the cleaner thread so no locking needed
+ */
+ getref(b, MKREF(accounting));
+ list_add(&b->lru, &fs->account_leafs);
+ } else if (test_bit(B_Index, &b->flags) &&
(iblk(b)->uninc_table.pending_cnt ||
iblk(b)->uninc)) {
lafs_incorporate(fs, iblk(b));
lafs_clusters_done(fs);
}
+static void flush_accounting(struct fs *fs)
+{
+ while (!list_empty(&fs->account_leafs)) {
+ struct block *b = list_first_entry(&fs->account_leafs,
+ struct block,
+ lru);
+ list_del_init(&b->lru);
+ lafs_iolock_block(b);
+ lafs_cluster_allocate(b, 0);
+ putref(b, MKREF(accounting));
+ }
+ fs->qphase = fs->phase;
+}
+
static void finish_checkpoint(struct fs *fs, int youth)
{
- set_bit(CheckpointFlushing, &fs->fsstate);
- lafs_seg_flush_all(fs);
- lafs_quota_flush(fs);
- clear_bit(CheckpointFlushing, &fs->fsstate);
- if (!test_bit(FinalCheckpoint, &fs->fsstate))
- lafs_seg_apply_all(fs);
+ flush_accounting(fs);
/* if we are creating a snapshot, special handling is needed */
if (LAFSI(fs->ss[0].root)->md.fs.usagetable > 1) {
dprintk("FinalFlush %d\n", fs->seq);
lafs_cluster_flush(fs, 0);
+ if (!test_bit(FinalCheckpoint, &fs->fsstate))
+ lafs_seg_apply_all(fs);
+
lafs_write_state(fs);
dprintk("State written, all done %d\n", fs->seq);
if (test_bit(B_Dirty, &b->b.flags)) {
if (test_bit(B_PinPending, &b->b.flags))
redirty = 1;
- else if (LAFSI(ino)->type == TypeSegmentMap &&
- !test_bit(CheckpointFlushing, &fs->fsstate))
- /* FIXME or quota ?? */
- redirty = 1;
else if (LAFSI(b->b.inode)->type == TypeInodeFile &&
b->my_inode &&
LAFSI(b->my_inode)->iblock)
{
struct indexblock *p;
int oldphase = !!test_bit(B_Phase1, &b->flags);
-
+
+ LAFS_BUG(!test_bit(B_Pinned, &b->flags), b);
if (oldphase)
clear_bit(B_Phase1, &b->flags);
else
set_bit(B_ICredit, &b->flags);
}
+/* When the pinning of a block needs to be carried across a
+ * checkpoint, we need to 'flip' the phase.
+ * This only applies to blocks that can be pinned by a block that
+ * may not be written until the next phase.
+ * This includes any index block (is it may have some children in one
+ * phase and some in the next) and any space-accounting block
+ * for the same reason.
+ * So indexblocks will need to flip, and use lafs_phase_flip.
+ * TypeSegmentMap and TypeQuota also need to flip and use lafs_flip_dblock.
+ * TypeInodeFile don't need to be phase_flipped, though their InoIdx
+ * block might. InodeFile blocks are only pinned by metadata transactions
+ * which happen inside a checkpoint lock.
+ */
+
+void lafs_flip_dblock(struct datablock *db)
+{
+ /* This is an accounting block (SegmentMap or Quota)
+ * which we need to write out after ->phase has changed
+ * in the tail of the checkpoint.
+ * We always flip the phase and reallocate from AccountSpace.
+ * If all references get dropped, it will then get unpinned
+ * before the next phase finished - we don't unpin here
+ * (unlike lafs_phase_flip for index blocks).
+ */
+ flip_phase(&db->b);
+ lafs_prealloc(&db->b, AccountSpace);
+ /* Parent might need to be on a leaflist now */
+ lafs_refile(&db->b.parent->b, 0);
+}
+
void lafs_phase_flip(struct fs *fs, struct indexblock *ib)
{
/* We are performing a checkpoint, this block has been written
* out and now needs to be flipped into the next phase.
- * This only makes sense for an index block. Datablocks are
- * simply unpinned at phase change.
+ *
* It involves.
* - Processing all uninc_next blocks into uninc_table.
* - adjusting counts on parent
* - moving credits from 'next' to 'this' phase.
* - update block counts to included phase-delayed updates.
- *NO: This is done when 'writing' the InoIdx.
- * For InoIdx, we transfer the pinning and Credits to the
- * Data block rather than release them.
*/
int oldphase = !!test_bit(B_Phase1, &ib->b.flags);
struct block *ulist;
}
int lafs_add_block_address(struct fs *fs, struct block *blk);
+void lafs_flip_dblock(struct datablock *db);
void lafs_phase_flip(struct fs *fs, struct indexblock *ib);
struct indexblock * __must_check
lafs_make_iblock(struct inode *ino, int adopt, int async, REFARG);
void lafs_write_block(struct fs *fs, struct block *b, int dev, struct wc *wc);
/* quota.c */
-void lafs_quota_flush(struct fs *fs);
int lafs_quota_allocate(struct fs *fs, struct inode *ino, int diff);
#define __wait_event_lock(wq, condition, lock) \
{
}
-void lafs_quota_flush(struct fs *fs)
-{
- fs->qphase = fs->phase;
-}
-
int lafs_quota_allocate(struct fs *fs, struct inode *ino, int diff)
{
return 0;
}
}
-/* lafs_seg_flush_all
- * All segment usage tables should be flushed to storage.
- * This is called towards the end of performing a checkpoint, after
- * the entire phase tree has been committed. The blocks written
- * here record the status of the finishing phase, but they themselves
- * become part of the next phase. They can be found during roll-forward
- * as their write-clusters are still flagged as being part of a checkpoint.
- */
-void lafs_seg_flush_all(struct fs *fs)
-{
- int d;
- for (d = 0; d < fs->devices ; d++)
- write_inode_now(fs->devs[d].segsum, 0);
- for (d = 0; d < fs->devices ; d++)
- write_inode_now(fs->devs[d].segsum, 1);
-}
static void seg_apply(struct fs *fs, struct segsum *ss)
{
#define FinalCheckpoint 3
#define CleanerDisabled 4
#define OrphansRunning 5
-#define CheckpointFlushing 6 /* We are writing the segusage blocks */
-#define CleanerBlocks 7 /* One or more threads is blocked waiting for the
+#define CleanerBlocks 6 /* One or more threads is blocked waiting for the
* cleaner to progress - cleaner.need blocks are
* needed.
*/
* have no pinned children
* and are being cleaned
*/
+ struct list_head account_leafs; /* list of accounting block
+ * that we need to write after
+ * the checkpoint is done.
+ * They are now pinned to the
+ * next phase.
+ */
/* Youth management */
int youth_next; /* number to assign to next segment */
* reachability-set as this block
*/
- struct list_head lru; /* phase_leafs, clean_leafs,
+ struct list_head lru; /* phase_leafs, clean_leafs, account_leafs,
* clhead, pending_blocks */
struct list_head peers; /* other blocks that use the same location
INIT_LIST_HEAD(&fs->phase_leafs[0]);
INIT_LIST_HEAD(&fs->phase_leafs[1]);
INIT_LIST_HEAD(&fs->clean_leafs);
+ INIT_LIST_HEAD(&fs->account_leafs);
atomic_set(&fs->sb_writes_pending, 0);
init_waitqueue_head(&fs->sb_writes_wait);
init_waitqueue_head(&fs->async_complete);