]> git.neil.brown.name Git - LaFS.git/commitdiff
Revise space allocation for cleaning.
authorNeilBrown <neilb@suse.de>
Mon, 28 Jun 2010 08:10:51 +0000 (18:10 +1000)
committerNeilBrown <neilb@suse.de>
Mon, 28 Jun 2010 08:10:51 +0000 (18:10 +1000)
We prefer to allocate whole segments for cleaning, but can only
do that if there is enough space.
If we cannot allocate whole segments, then just cleaning to the
main segment is perfectly acceptable.

So allow a 'clean_reserved' number which is a number of blocks that
have been reserved for cleaning - normally some number of segments.
The cleaner write whole segments while this number is big enough,
then gives up so the remainder will go to the main segment and not
create partial clean segments.

CleanSpace now never fails.  The next patch will cause the cleaner
to be more reserved in how much it asks for.

Signed-off-by: NeilBrown <neilb@suse.de>
block.c
clean.c
cluster.c
lafs.h
segments.c
state.h
super.c

diff --git a/block.c b/block.c
index 3fc0bdfe5dc808d64dc9b9815799561c5610656a..1cfc4569cd7381a5a80fd043d749a189dff0673f 100644 (file)
--- a/block.c
+++ b/block.c
@@ -414,11 +414,7 @@ lafs_reserve_block(struct block *b, int alloc_type)
 
        if (err == 0)
                return 0;
-       /* FIXME maybe CleanSpace should return -EAGAIN if there
-        * is a good chance that the cleaner will help out soon??
-        * I wonder how "soon" can be defined.
-        */
-       if (alloc_type == CleanSpace || alloc_type == NewSpace)
+       if (alloc_type == NewSpace)
                return -ENOSPC;
        if (alloc_type == ReleaseSpace)
                return -EAGAIN;
diff --git a/clean.c b/clean.c
index 2e3f45a5a445cff6e8c740122ffbbb72ac6db14b..cd387065d7534549ecf31fc16db3f02fda87d057 100644 (file)
--- a/clean.c
+++ b/clean.c
@@ -225,8 +225,10 @@ static struct block *first_in_seg(struct block *b, struct fs *fs,
 static void cleaner_flush(struct fs *fs)
 {
        struct block *b;
+       int err = 0;
        dprintk("Start cleaner_flush\n");
-       while ((b = lafs_get_flushable(fs, -1)) != NULL) {
+       while (!err &&
+              (b = lafs_get_flushable(fs, -1)) != NULL) {
                int unlock = 1;
 
                dprintk("cleaning %s\n", strblk(b));
@@ -252,13 +254,12 @@ static void cleaner_flush(struct fs *fs)
                            iblk(b)->uninc_table.pending_cnt)) {
                        lafs_incorporate(fs, iblk(b));
                } else {
-                       lafs_cluster_allocate(b, 1);
+                       err = lafs_cluster_allocate(b, 1);
                        unlock = 0;
                }
                if (unlock)
                        lafs_iounlock_block(b);
                putref(b, MKREF(leaf));
-
        }
        lafs_cluster_flush(fs, 1);
 }
index 188f3fcba53f274b3791e8d5c62a3faa7c26e743..0dcce8fd6178cf687c39d7c6db203af3c4065152 100644 (file)
--- a/cluster.c
+++ b/cluster.c
@@ -369,8 +369,11 @@ static u64 seg_addr(struct fs *fs, struct segpos *seg)
        return addr;
 }
 
-static void new_segment(struct fs *fs, int cnum)
+static int new_segment(struct fs *fs, int cnum)
 {
+       /* new_segment can fail if cnum > 0 and there is no
+        * clean_reserved
+        */
        struct wc *wc = fs->wc + cnum;
        u64 p;
        unsigned int dev;
@@ -385,7 +388,9 @@ static void new_segment(struct fs *fs, int cnum)
                lafs_seg_deref(fs, p, 0);
        }
 
-       /* FIXME */
+       if (cnum &&
+           fs->clean_reserved < fs->max_segment)
+                       return -ENOSPC;
        /* This gets a reference on the 'segsum' */
        lafs_free_get(fs, &dev, &seg, 0);
        wc->seg.dev = dev;
@@ -393,6 +398,7 @@ static void new_segment(struct fs *fs, int cnum)
        seg_setpos(fs, &wc->seg, (p = segtovirt(fs, dev, seg)));
 
        wc->remaining = seg_remainder(fs, &wc->seg);
+       return 0;
 }
 
 /*-------------------------------------------------------------------------
@@ -526,7 +532,7 @@ give_up:
        return 0;
 }
 
-unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
+int lafs_cluster_allocate(struct block *b, int cnum)
 {
        struct super_block *sb = b->inode->i_sb;
        struct fs *fs = sb->s_fs_info;
@@ -534,6 +540,7 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
        struct lafs_inode *lai;
        loff_t size;
        int used;
+       int err = 0;
 
        LAFS_BUG(test_bit(B_Index, &b->flags) &&
                 iblk(b)->uninc_table.pending_cnt > 0, b);
@@ -671,7 +678,7 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
            !test_bit(B_Realloc, &b->flags)) {
                /* block just got truncated, so don't write it. */
                lafs_iounlock_block(b);
-               return wc->cluster_seq;
+               return 0;
        }
 
        if (!test_bit(B_Index, &b->flags) &&
@@ -711,13 +718,21 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
                        putref(b, MKREF(leaf));
        }
 
-       while (wc->remaining < 1) {
+       while (!err && wc->remaining < 2) {
                // printk("Call flush - remaining = %d\n", wc->remaining);
                if (wc->slhead.b == NULL)
-                       new_segment(fs, cnum);
+                       err = new_segment(fs, cnum);
                else
                        cluster_flush(fs, cnum);
        }
+       if (err) {
+               /* No cleaner segments - this will have to go
+                * out with a checkpoint
+                */
+               lafs_iounlock_block(b);
+               mutex_unlock(&wc->lock);
+               return -ENOSPC;
+       }
 
        if (test_and_set_bit(B_Writeback, &b->flags))
                LAFS_BUG(1, b);
@@ -740,7 +755,7 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
                lafs_space_return(fs, credits);
                lafs_writeback_done(b);
                mutex_unlock(&wc->lock);
-               return wc->cluster_seq; /* FIXME is this really needed - or right */
+               return 0;
        }
        /* insert into list ensuring there is enough space
         * in cluster head
@@ -788,7 +803,7 @@ unsigned long long lafs_cluster_allocate(struct block *b, int cnum)
        if (wc->remaining == 0)
                cluster_flush(fs, cnum);
        mutex_unlock(&wc->lock);
-       return wc->cluster_seq; /* FIXME is this really needed - or right */
+       return 0;
 }
 
 /*-------------------------------------------------------------------------
@@ -1145,7 +1160,10 @@ static void cluster_flush(struct fs *fs, int cnum)
        segend = wc->seg; /* We may write zeros from here */
        seg_step(fs, &wc->seg);
        wc->remaining = seg_remainder(fs, &wc->seg);
-       if (wc->remaining < 2)
+       /* Need to make sure out ->next_addr gets set properly
+        * for non-cleaning segments
+        */
+       if (wc->remaining < 2 && cnum == 0)
                new_segment(fs, cnum);
 
        /* Fill in the cluster header */
@@ -1174,7 +1192,12 @@ static void cluster_flush(struct fs *fs, int cnum)
        wc->cluster_seq++;
        wc->chead->Hlength = cpu_to_le16(wc->chead_size);
        wc->chead->Clength = cpu_to_le16(cluster_size);
-       fs->free_blocks -= cluster_size;
+       spin_lock(&fs->lock);
+       if (cnum)
+               fs->clean_reserved -= cluster_size;
+       else
+               fs->free_blocks -= cluster_size;
+       spin_unlock(&fs->lock);
        /* FIXME if this is just a header, no data blocks,
         * then use VerifyNull.  Alternately if there is
         * no-one waiting for a sync to complete (how do we
@@ -1494,10 +1517,12 @@ int lafs_cluster_init(struct fs *fs, int cnum, u64 addr, u64 prev, u64 seq)
                wc->remaining = 0;
        else
                wc->remaining = seg_remainder(fs, &wc->seg) - 1;/*1 for header*/
-       if (prev)
+       if (prev && cnum == 0) {
                /* if prev == 0 this is a brand new segment for cleaning */
+               spin_lock(&fs->lock);
                fs->free_blocks += wc->remaining+1;
-
+               spin_unlock(&fs->lock);
+       }
        wc->cnum = cnum;
 
        return 0;
diff --git a/lafs.h b/lafs.h
index 5060639facacc73fcf56f3dfe00763748d04f52f..93b7dd3f1c59f6188df9448c77867e8c8729c2b7 100644 (file)
--- a/lafs.h
+++ b/lafs.h
@@ -640,7 +640,7 @@ void lafs_wake_cleaner(struct fs *fs);
 void lafs_unclean(struct datablock *db);
 
 /* cluster.c */
-unsigned long long lafs_cluster_allocate(struct block *b, int cnum);
+int lafs_cluster_allocate(struct block *b, int cnum);
 void lafs_cluster_flush(struct fs *fs, int cnum);
 int lafs_calc_cluster_csum(struct cluster_head *head);
 int lafs_cluster_init(struct fs *fs, int cnum, u64 addr, u64 prev, u64 seq);
index c8f219486f7771eedb08a5ce167408d36311747e..85e91ad5171e4a738e25a479a7f3423f829fae21 100644 (file)
@@ -579,8 +579,7 @@ int lafs_space_alloc(struct fs *fs, int credits, int why)
         *   exist, so doing this will eventually free up space.  This must
         *   never fail, but can block.
         * CleanSpace means we want to write a block to relocate it to
-        *   a 'cleaning' segment.  This may fail (e.g. if we need a checkpoint
-        *   first to release from empty segments) but may not block.
+        *   a 'cleaning' segment.   This may never fail.
         * AccountSpace means we absolutely need this block now, and it is
         *   a BUG is there is no space available.
         */
@@ -593,39 +592,33 @@ int lafs_space_alloc(struct fs *fs, int credits, int why)
                watermark += 1 * fs->max_segment;
                /* FALL THROUGH */
        case ReleaseSpace:
-               if (fs->clean_reserved >= 3 * fs->max_segment)
-                       watermark += fs->clean_reserved;
-               else
-                       watermark += 3 * fs->max_segment;
+               watermark += 3 * fs->max_segment;
                /* FALL THROUGH */
        case CleanSpace:
-               /* Minimal watermark for clean space?
-                * Just need a few blocks for a checkpoint ?
-                */
-               watermark += 20;
-               /* FALL THROUGH */
        case AccountSpace:
                /* Definitely no water mark here. */
                break;
        }
 
-       if (fs->rolled) {
+       if (fs->rolled && watermark) {
                /* We cannot account properly before roll-forward has
                 * completed. FIXME once it has completed we need to
                 * check and invalidate the FS if there was a problem.
                 */
-               if (fs->free_segs < fs->allocated_blocks
+               if (fs->free_blocks < fs->allocated_blocks
                    + credits + watermark)
                        credits = 0; /* Sorry, no room */
        }
-
-       if (credits == 0) {
-               if (why == AccountSpace)
-                       /* FIXME I should switch to READ-ONLY here,
-                        * not BUG.
-                        */
+       if (fs->rolled && watermark == 0) {
+               /* When including the clean_reserved space, there should
+                * be room for these controlled allocations
+                */
+               if (fs->free_blocks + fs->clean_reserved <
+                   fs->allocated_blocks + credits)
                        BUG();
+       }
 
+       if (credits == 0) {
                if (!test_bit(CleanerBlocks, &fs->fsstate) ||
                    fs->cleaner.need > watermark + fs->max_segment) {
                        fs->cleaner.need = watermark + fs->max_segment;
@@ -635,7 +628,7 @@ int lafs_space_alloc(struct fs *fs, int credits, int why)
        }
 
        fs->allocated_blocks += credits;
-       BUG_ON(fs->free_blocks < fs->allocated_blocks);
+       BUG_ON(fs->free_blocks + fs->clean_reserved < fs->allocated_blocks);
        spin_unlock(&fs->alloc_lock);
        return credits;
 }
@@ -1088,7 +1081,6 @@ again:
                fs->segtrack->free.cnt--;
                segdelete(fs->segtrack, ss);
 
-               fs->free_segs -= fs->devs[*dev].segment_size;
                spin_unlock(&fs->lock);
 
                /* now need to reserve/dirty/reference the youth and
@@ -1288,8 +1280,9 @@ static void clean_free(struct fs *fs)
                struct datablock *db;
                int err;
                ss = segfollow(fs->segtrack, ssn);
-               fs->free_blocks += fs->devs[ss->dev].segment_size; // FIXME locking??
-               fs->free_segs += fs->devs[ss->dev].segment_size; // FIXME locking??
+               spin_lock(&fs->lock);
+               fs->free_blocks += fs->devs[ss->dev].segment_size;
+               spin_unlock(&fs->lock);
                db = lafs_get_block(fs->devs[ss->dev].segsum,
                                    ss->segment >> (fs->prime_sb->s_blocksize_bits-1),
                                    NULL, GFP_KERNEL | __GFP_NOFAIL,
@@ -1740,12 +1733,11 @@ unsigned long lafs_scan_seg(struct fs *fs)
                for (i = 0; i < segments ; i++)
                        if (yp[i] == cpu_to_le16(0)) {
                                if (fs->scan.first_free_pass) {
+                                       spin_lock(&fs->lock);
                                        fs->free_blocks +=
                                                fs->devs[fs->scan.free_dev]
                                                .segment_size;
-                                       fs->free_segs +=
-                                               fs->devs[fs->scan.free_dev]
-                                               .segment_size;
+                                       spin_unlock(&fs->lock);
                                }
                                if (add_free(fs, fs->scan.free_dev, firstseg + i,
                                             &yp[i])) {
diff --git a/state.h b/state.h
index fc32c293a8be2660cf9dbaabd4d2d37c857ff243..085fc197264d60fc4b8391438144b387323a2457 100644 (file)
--- a/state.h
+++ b/state.h
@@ -134,9 +134,8 @@ struct fs {
        /* counters for (pre)allocating space. */
        spinlock_t alloc_lock;
        u64     free_blocks; /* initialised from free segment info */
-       u64     free_segs; /* counts blocks in completely free segments */
        u64     allocated_blocks; /* Blocks that have been (pre)allocated */
-       u64     clean_reserved; /* Blocks reserved for cleaning */
+       u64     clean_reserved; /* Blocks reserved for cleaner segments */
        u64     max_segment; /* largest segment size */
        u64     total_free;  /* free space found in all segments this scan */
        u64     total_free_prev; /* "  "   " in previous scan */
diff --git a/super.c b/super.c
index dc46b76ea81362dc1bc2a2617ec34357b9f54d9e..80699b4cf7209cfb9006b82c85637218c440b94d 100644 (file)
--- a/super.c
+++ b/super.c
@@ -1016,13 +1016,13 @@ static int lafs_statfs(struct dentry *de, struct kstatfs *buf)
        /* "bavail" is "blocks we could succeed in adding to the filesystem".
         * "bfree" is effectively total blocks - used blocks
         */
-       buf->f_bavail = fs->free_blocks - fs->allocated_blocks;
+       buf->f_bavail = fs->free_blocks + fs->clean_reserved - fs->allocated_blocks;
        buf->f_bfree = buf->f_blocks - (root->md.fs.cblocks_used +
                                        root->md.fs.pblocks_used +
                                        root->md.fs.ablocks_used);
-       dprintk("df: tot=%ld free=%ld avail=%ld(%ld-%ld) cb=%ld pb=%ld ab=%ld\n",
+       dprintk("df: tot=%ld free=%ld avail=%ld(%ld-%ld-%ld) cb=%ld pb=%ld ab=%ld\n",
                (long)buf->f_blocks, (long)buf->f_bfree, (long)buf->f_bavail,
-               (long)fs->free_blocks, (long)fs->allocated_blocks,
+               (long)fs->free_blocks, (long)fs->clean_reserved, (long)fs->allocated_blocks,
                (long)root->md.fs.cblocks_used, (long)root->md.fs.pblocks_used,
                (long)root->md.fs.ablocks_used);