From a37ad14465745f376aa2f2c973d9b0836ce4d854 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 28 Jun 2010 18:10:51 +1000 Subject: [PATCH] Revise space allocation for cleaning. We prefer to allocate whole segments for cleaning, but can only do that if there is enough space. If we cannot allocate whole segments, then just cleaning to the main segment is perfectly acceptable. So allow a 'clean_reserved' number which is a number of blocks that have been reserved for cleaning - normally some number of segments. The cleaner write whole segments while this number is big enough, then gives up so the remainder will go to the main segment and not create partial clean segments. CleanSpace now never fails. The next patch will cause the cleaner to be more reserved in how much it asks for. Signed-off-by: NeilBrown --- block.c | 6 +----- clean.c | 7 ++++--- cluster.c | 49 +++++++++++++++++++++++++++++++++++++------------ lafs.h | 2 +- segments.c | 44 ++++++++++++++++++-------------------------- state.h | 3 +-- super.c | 6 +++--- 7 files changed, 65 insertions(+), 52 deletions(-) diff --git a/block.c b/block.c index 3fc0bdf..1cfc456 100644 --- a/block.c +++ b/block.c @@ -414,11 +414,7 @@ lafs_reserve_block(struct block *b, int alloc_type) if (err == 0) return 0; - /* FIXME maybe CleanSpace should return -EAGAIN if there - * is a good chance that the cleaner will help out soon?? - * I wonder how "soon" can be defined. - */ - if (alloc_type == CleanSpace || alloc_type == NewSpace) + if (alloc_type == NewSpace) return -ENOSPC; if (alloc_type == ReleaseSpace) return -EAGAIN; diff --git a/clean.c b/clean.c index 2e3f45a..cd38706 100644 --- a/clean.c +++ b/clean.c @@ -225,8 +225,10 @@ static struct block *first_in_seg(struct block *b, struct fs *fs, static void cleaner_flush(struct fs *fs) { struct block *b; + int err = 0; dprintk("Start cleaner_flush\n"); - while ((b = lafs_get_flushable(fs, -1)) != NULL) { + while (!err && + (b = lafs_get_flushable(fs, -1)) != NULL) { int unlock = 1; dprintk("cleaning %s\n", strblk(b)); @@ -252,13 +254,12 @@ static void cleaner_flush(struct fs *fs) iblk(b)->uninc_table.pending_cnt)) { lafs_incorporate(fs, iblk(b)); } else { - lafs_cluster_allocate(b, 1); + err = lafs_cluster_allocate(b, 1); unlock = 0; } if (unlock) lafs_iounlock_block(b); putref(b, MKREF(leaf)); - } lafs_cluster_flush(fs, 1); } diff --git a/cluster.c b/cluster.c index 188f3fc..0dcce8f 100644 --- a/cluster.c +++ b/cluster.c @@ -369,8 +369,11 @@ static u64 seg_addr(struct fs *fs, struct segpos *seg) return addr; } -static void new_segment(struct fs *fs, int cnum) +static int new_segment(struct fs *fs, int cnum) { + /* new_segment can fail if cnum > 0 and there is no + * clean_reserved + */ struct wc *wc = fs->wc + cnum; u64 p; unsigned int dev; @@ -385,7 +388,9 @@ static void new_segment(struct fs *fs, int cnum) lafs_seg_deref(fs, p, 0); } - /* FIXME */ + if (cnum && + fs->clean_reserved < fs->max_segment) + return -ENOSPC; /* This gets a reference on the 'segsum' */ lafs_free_get(fs, &dev, &seg, 0); wc->seg.dev = dev; @@ -393,6 +398,7 @@ static void new_segment(struct fs *fs, int cnum) seg_setpos(fs, &wc->seg, (p = segtovirt(fs, dev, seg))); wc->remaining = seg_remainder(fs, &wc->seg); + return 0; } /*------------------------------------------------------------------------- @@ -526,7 +532,7 @@ give_up: return 0; } -unsigned long long lafs_cluster_allocate(struct block *b, int cnum) +int lafs_cluster_allocate(struct block *b, int cnum) { struct super_block *sb = b->inode->i_sb; struct fs *fs = sb->s_fs_info; @@ -534,6 +540,7 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum) struct lafs_inode *lai; loff_t size; int used; + int err = 0; LAFS_BUG(test_bit(B_Index, &b->flags) && iblk(b)->uninc_table.pending_cnt > 0, b); @@ -671,7 +678,7 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum) !test_bit(B_Realloc, &b->flags)) { /* block just got truncated, so don't write it. */ lafs_iounlock_block(b); - return wc->cluster_seq; + return 0; } if (!test_bit(B_Index, &b->flags) && @@ -711,13 +718,21 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum) putref(b, MKREF(leaf)); } - while (wc->remaining < 1) { + while (!err && wc->remaining < 2) { // printk("Call flush - remaining = %d\n", wc->remaining); if (wc->slhead.b == NULL) - new_segment(fs, cnum); + err = new_segment(fs, cnum); else cluster_flush(fs, cnum); } + if (err) { + /* No cleaner segments - this will have to go + * out with a checkpoint + */ + lafs_iounlock_block(b); + mutex_unlock(&wc->lock); + return -ENOSPC; + } if (test_and_set_bit(B_Writeback, &b->flags)) LAFS_BUG(1, b); @@ -740,7 +755,7 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum) lafs_space_return(fs, credits); lafs_writeback_done(b); mutex_unlock(&wc->lock); - return wc->cluster_seq; /* FIXME is this really needed - or right */ + return 0; } /* insert into list ensuring there is enough space * in cluster head @@ -788,7 +803,7 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum) if (wc->remaining == 0) cluster_flush(fs, cnum); mutex_unlock(&wc->lock); - return wc->cluster_seq; /* FIXME is this really needed - or right */ + return 0; } /*------------------------------------------------------------------------- @@ -1145,7 +1160,10 @@ static void cluster_flush(struct fs *fs, int cnum) segend = wc->seg; /* We may write zeros from here */ seg_step(fs, &wc->seg); wc->remaining = seg_remainder(fs, &wc->seg); - if (wc->remaining < 2) + /* Need to make sure out ->next_addr gets set properly + * for non-cleaning segments + */ + if (wc->remaining < 2 && cnum == 0) new_segment(fs, cnum); /* Fill in the cluster header */ @@ -1174,7 +1192,12 @@ static void cluster_flush(struct fs *fs, int cnum) wc->cluster_seq++; wc->chead->Hlength = cpu_to_le16(wc->chead_size); wc->chead->Clength = cpu_to_le16(cluster_size); - fs->free_blocks -= cluster_size; + spin_lock(&fs->lock); + if (cnum) + fs->clean_reserved -= cluster_size; + else + fs->free_blocks -= cluster_size; + spin_unlock(&fs->lock); /* FIXME if this is just a header, no data blocks, * then use VerifyNull. Alternately if there is * no-one waiting for a sync to complete (how do we @@ -1494,10 +1517,12 @@ int lafs_cluster_init(struct fs *fs, int cnum, u64 addr, u64 prev, u64 seq) wc->remaining = 0; else wc->remaining = seg_remainder(fs, &wc->seg) - 1;/*1 for header*/ - if (prev) + if (prev && cnum == 0) { /* if prev == 0 this is a brand new segment for cleaning */ + spin_lock(&fs->lock); fs->free_blocks += wc->remaining+1; - + spin_unlock(&fs->lock); + } wc->cnum = cnum; return 0; diff --git a/lafs.h b/lafs.h index 5060639..93b7dd3 100644 --- a/lafs.h +++ b/lafs.h @@ -640,7 +640,7 @@ void lafs_wake_cleaner(struct fs *fs); void lafs_unclean(struct datablock *db); /* cluster.c */ -unsigned long long lafs_cluster_allocate(struct block *b, int cnum); +int lafs_cluster_allocate(struct block *b, int cnum); void lafs_cluster_flush(struct fs *fs, int cnum); int lafs_calc_cluster_csum(struct cluster_head *head); int lafs_cluster_init(struct fs *fs, int cnum, u64 addr, u64 prev, u64 seq); diff --git a/segments.c b/segments.c index c8f2194..85e91ad 100644 --- a/segments.c +++ b/segments.c @@ -579,8 +579,7 @@ int lafs_space_alloc(struct fs *fs, int credits, int why) * exist, so doing this will eventually free up space. This must * never fail, but can block. * CleanSpace means we want to write a block to relocate it to - * a 'cleaning' segment. This may fail (e.g. if we need a checkpoint - * first to release from empty segments) but may not block. + * a 'cleaning' segment. This may never fail. * AccountSpace means we absolutely need this block now, and it is * a BUG is there is no space available. */ @@ -593,39 +592,33 @@ int lafs_space_alloc(struct fs *fs, int credits, int why) watermark += 1 * fs->max_segment; /* FALL THROUGH */ case ReleaseSpace: - if (fs->clean_reserved >= 3 * fs->max_segment) - watermark += fs->clean_reserved; - else - watermark += 3 * fs->max_segment; + watermark += 3 * fs->max_segment; /* FALL THROUGH */ case CleanSpace: - /* Minimal watermark for clean space? - * Just need a few blocks for a checkpoint ? - */ - watermark += 20; - /* FALL THROUGH */ case AccountSpace: /* Definitely no water mark here. */ break; } - if (fs->rolled) { + if (fs->rolled && watermark) { /* We cannot account properly before roll-forward has * completed. FIXME once it has completed we need to * check and invalidate the FS if there was a problem. */ - if (fs->free_segs < fs->allocated_blocks + if (fs->free_blocks < fs->allocated_blocks + credits + watermark) credits = 0; /* Sorry, no room */ } - - if (credits == 0) { - if (why == AccountSpace) - /* FIXME I should switch to READ-ONLY here, - * not BUG. - */ + if (fs->rolled && watermark == 0) { + /* When including the clean_reserved space, there should + * be room for these controlled allocations + */ + if (fs->free_blocks + fs->clean_reserved < + fs->allocated_blocks + credits) BUG(); + } + if (credits == 0) { if (!test_bit(CleanerBlocks, &fs->fsstate) || fs->cleaner.need > watermark + fs->max_segment) { fs->cleaner.need = watermark + fs->max_segment; @@ -635,7 +628,7 @@ int lafs_space_alloc(struct fs *fs, int credits, int why) } fs->allocated_blocks += credits; - BUG_ON(fs->free_blocks < fs->allocated_blocks); + BUG_ON(fs->free_blocks + fs->clean_reserved < fs->allocated_blocks); spin_unlock(&fs->alloc_lock); return credits; } @@ -1088,7 +1081,6 @@ again: fs->segtrack->free.cnt--; segdelete(fs->segtrack, ss); - fs->free_segs -= fs->devs[*dev].segment_size; spin_unlock(&fs->lock); /* now need to reserve/dirty/reference the youth and @@ -1288,8 +1280,9 @@ static void clean_free(struct fs *fs) struct datablock *db; int err; ss = segfollow(fs->segtrack, ssn); - fs->free_blocks += fs->devs[ss->dev].segment_size; // FIXME locking?? - fs->free_segs += fs->devs[ss->dev].segment_size; // FIXME locking?? + spin_lock(&fs->lock); + fs->free_blocks += fs->devs[ss->dev].segment_size; + spin_unlock(&fs->lock); db = lafs_get_block(fs->devs[ss->dev].segsum, ss->segment >> (fs->prime_sb->s_blocksize_bits-1), NULL, GFP_KERNEL | __GFP_NOFAIL, @@ -1740,12 +1733,11 @@ unsigned long lafs_scan_seg(struct fs *fs) for (i = 0; i < segments ; i++) if (yp[i] == cpu_to_le16(0)) { if (fs->scan.first_free_pass) { + spin_lock(&fs->lock); fs->free_blocks += fs->devs[fs->scan.free_dev] .segment_size; - fs->free_segs += - fs->devs[fs->scan.free_dev] - .segment_size; + spin_unlock(&fs->lock); } if (add_free(fs, fs->scan.free_dev, firstseg + i, &yp[i])) { diff --git a/state.h b/state.h index fc32c29..085fc19 100644 --- a/state.h +++ b/state.h @@ -134,9 +134,8 @@ struct fs { /* counters for (pre)allocating space. */ spinlock_t alloc_lock; u64 free_blocks; /* initialised from free segment info */ - u64 free_segs; /* counts blocks in completely free segments */ u64 allocated_blocks; /* Blocks that have been (pre)allocated */ - u64 clean_reserved; /* Blocks reserved for cleaning */ + u64 clean_reserved; /* Blocks reserved for cleaner segments */ u64 max_segment; /* largest segment size */ u64 total_free; /* free space found in all segments this scan */ u64 total_free_prev; /* " " " in previous scan */ diff --git a/super.c b/super.c index dc46b76..80699b4 100644 --- a/super.c +++ b/super.c @@ -1016,13 +1016,13 @@ static int lafs_statfs(struct dentry *de, struct kstatfs *buf) /* "bavail" is "blocks we could succeed in adding to the filesystem". * "bfree" is effectively total blocks - used blocks */ - buf->f_bavail = fs->free_blocks - fs->allocated_blocks; + buf->f_bavail = fs->free_blocks + fs->clean_reserved - fs->allocated_blocks; buf->f_bfree = buf->f_blocks - (root->md.fs.cblocks_used + root->md.fs.pblocks_used + root->md.fs.ablocks_used); - dprintk("df: tot=%ld free=%ld avail=%ld(%ld-%ld) cb=%ld pb=%ld ab=%ld\n", + dprintk("df: tot=%ld free=%ld avail=%ld(%ld-%ld-%ld) cb=%ld pb=%ld ab=%ld\n", (long)buf->f_blocks, (long)buf->f_bfree, (long)buf->f_bavail, - (long)fs->free_blocks, (long)fs->allocated_blocks, + (long)fs->free_blocks, (long)fs->clean_reserved, (long)fs->allocated_blocks, (long)root->md.fs.cblocks_used, (long)root->md.fs.pblocks_used, (long)root->md.fs.ablocks_used); -- 2.39.5