From b997e5008541577b6203b2c32f4c0bb0dc042e78 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 4 May 2011 13:15:00 +1000 Subject: [PATCH] FORMAT CHANGE use 32bit block counts in segusage file. This allows much bigger segments which can be useful. Continue to use 16bit youth numbers. It isn't clear that this is needed, but condensing the range for old segments seems to make sense. Signed-off-by: NeilBrown --- layout.h | 20 ++++++---- roll.c | 2 - segments.c | 107 ++++++++++++++++++++++++++++++----------------------- state.h | 6 +-- 4 files changed, 76 insertions(+), 59 deletions(-) diff --git a/layout.h b/layout.h index ceb1d1c..afa81ed 100644 --- a/layout.h +++ b/layout.h @@ -46,14 +46,13 @@ struct lafs_state { u8 uuid[16]; u32 levels; u32 devices; - u32 nonlog_segment; /* segment number and */ - u16 nonlog_dev; /* device number of active non-logged segment */ - u16 nonlog_offset; /* offset into above segment of next non-logged + u32 nonlog_offset; /* offset into following segment of next non-logged * block to allocate */ - u32 maxsnapshot; + u32 nonlog_segment; /* segment number and */ + u16 nonlog_dev; /* device number of active non-logged segment */ u16 nextyouth; - u16 pad0; + u32 maxsnapshot; u64 checkpointcluster; /* (array block) */ u64 root_inodes[0]; /* (array block) */ @@ -95,11 +94,10 @@ struct cluster_head { u8 uuid[16]; u64 seq; u32 flags; + u16 verify_type; u16 Hlength; /* header length - (bytes) */ - u16 Clength; /* cluster length including header - (blocks) */ + u32 Clength; /* cluster length including header - (blocks) */ u32 checksum; /* over Hlength bytes */ - u16 verify_type; - u16 pad0; u8 verify_data[16]; u64 next_addr; /* (Array block) */ u64 this_addr; /* (array block) */ @@ -284,3 +282,9 @@ static int inline decay_undo(int y) { return y + 16384; } + +/* seg usage uses 4 bytes - so shift is 2 + * youth uses 2 bytes - so shift - 1 + */ +#define USAGE_SHIFT 2 +#define YOUTH_SHIFT 1 diff --git a/roll.c b/roll.c index 11acf65..e8ebbba 100644 --- a/roll.c +++ b/roll.c @@ -73,8 +73,6 @@ roll_valid(struct fs *fs, struct cluster_head *ch, unsigned long long addr) default: return 0; } - if (ch->pad0 != 0) - return 0; if (le16_to_cpu(ch->Clength) > fs->max_segment) return 0; return 1; diff --git a/segments.c b/segments.c index ec64737..66dc920 100644 --- a/segments.c +++ b/segments.c @@ -63,7 +63,7 @@ * we cannot load the segusage block for each snapshot and then parse them * in parallel. Instead we allocate space to store a max usage and * merge each block one at a time into that max. We then combine the - * max with the youth to get a 32bit weight... I wonder if that is good. + * max with the youth to get a 64bit weight... I wonder if that is good. * */ @@ -185,13 +185,14 @@ retry: INIT_HLIST_NODE(&new->hash); dv = fs->devs + devnum; addr = LAFSI(fs->ss[ssnum].root)->md.fs.usagetable * dv->tablesize; - addr += segnum >> (fs->blocksize_bits-1); + addr += segnum >> (fs->blocksize_bits - USAGE_SHIFT); new->ssblk = lafs_get_block(dv->segsum, addr, NULL, GFP_KERNEL, MKREF(ss)); if (ssnum == 0) new->youthblk = lafs_get_block(dv->segsum, - segnum >> (fs->blocksize_bits-1), + segnum >> (fs->blocksize_bits + - YOUTH_SHIFT), NULL, GFP_KERNEL, MKREF(ssyouth)); @@ -393,17 +394,17 @@ static void seg_inc(struct fs *fs, struct segsum *ss, int diff, int in_phase) if (!in_phase) atomic_add(diff, &ss->delayed); else { - u16 *b, *p; + u32 *b, *p; b = map_dblock(ss->ssblk); spin_lock(&fs->stable_lock); - p = &b[ss->segnum & ((fs->blocksize-1)>>1)]; - //BUG_ON(diff < 0 && le16_to_cpu(*p) < -diff); - if (diff < 0 && le16_to_cpu(*p) < -diff) { - printk("diff=%d p=%d segnum=%d\n", diff, le16_to_cpu(*p), + p = &b[ss->segnum & ((fs->blocksize-1)>>USAGE_SHIFT)]; + //BUG_ON(diff < 0 && le32_to_cpu(*p) < -diff); + if (diff < 0 && le32_to_cpu(*p) < -diff) { + printk("diff=%d p=%d segnum=%d\n", diff, le32_to_cpu(*p), ss->segnum); BUG(); } - *p = cpu_to_le16(le16_to_cpu(*p) + diff); + *p = cpu_to_le32(le32_to_cpu(*p) + diff); spin_unlock(&fs->stable_lock); unmap_dblock(ss->ssblk, b); lafs_dirty_dblock(ss->ssblk); @@ -460,7 +461,7 @@ static void set_youth(struct fs *fs, struct segsum *ss) y = decay_undo(y); ybuf = map_dblock(ss->youthblk); youthp = ybuf + (ss->segnum & ((1 << (fs->blocksize_bits - - 1)) - 1)); + - YOUTH_SHIFT)) - 1)); if (le16_to_cpu(*youthp) < 8) { *youthp = cpu_to_le16(y); fs->youth_next++; @@ -792,17 +793,17 @@ int lafs_space_alloc(struct fs *fs, int credits, int why) * again and remove it properly so it can become cleanable later. */ -#define SCORE_MAX 0xFFFFFFFC /* Maximum normal score */ -#define SCORE_ACTIVE 0xFFFFFFFD /* This segment is being written to */ -#define SCORE_CLEANING 0xFFFFFFFE /* This segment in being cleaned */ -#define SCORE_DEAD 0xFFFFFFFF /* This segment is to be removed */ +#define SCORE_MAX 0xFFFFFFFFFFFFFFFCULL /* Maximum normal score */ +#define SCORE_ACTIVE 0xFFFFFFFFFFFFFFFDULL /* This segment is being written to */ +#define SCORE_CLEANING 0xFFFFFFFFFFFFFFFEULL /* This segment in being cleaned */ +#define SCORE_DEAD 0xFFFFFFFFFFFFFFFFULL /* This segment is to be removed */ struct segstat { u16 next; u16 dev; u32 segment; - u32 score; - u16 usage; + u64 score; + u32 usage; u16 skip[0]; /* or larger... */ }; @@ -1157,7 +1158,7 @@ static u16 seg_pop(struct segtracker *st, struct slist *which) } static struct segstat *seg_add_new(struct segtracker *st, struct slist *which, int atend, - int dev, u32 seg, int score, int usage, + int dev, u32 seg, long long score, int usage, u16 *where[SEG_NUM_HEIGHTS]) { int ssn; @@ -1468,7 +1469,8 @@ void lafs_clean_free(struct fs *fs) fs->free_blocks += fs->devs[ss->dev].segment_size; spin_unlock(&fs->lock); db = lafs_get_block(fs->devs[ss->dev].segsum, - ss->segment >> (fs->blocksize_bits-1), + ss->segment >> (fs->blocksize_bits + - YOUTH_SHIFT), NULL, GFP_KERNEL | __GFP_NOFAIL, MKREF(cleanfree)); err = lafs_read_block(db); @@ -1477,7 +1479,7 @@ void lafs_clean_free(struct fs *fs) if (err == 0) { u16 *b = map_dblock(db); spin_lock(&fs->stable_lock); - b[ss->segment & ((fs->blocksize-1)>>1)] = 0; + b[ss->segment & ((fs->blocksize-1)>>YOUTH_SHIFT)] = 0; spin_unlock(&fs->stable_lock); unmap_dblock(db, b); lafs_dirty_dblock(db); @@ -1520,7 +1522,7 @@ void lafs_dump_cleanable(void) for (ssn = st->cleanable.first; ssn != 0xffff; ssn = ss->next) { ss = segfollow(st, ssn); - printk("%3d: %3d/%-4d %5d %d\n", + printk("%3d: %3d/%-4d %5d %lld\n", i, ss->dev, ss->segment, ss->usage, @@ -1533,7 +1535,7 @@ void lafs_dump_cleanable(void) for (ssn = st->cleanable.first; ssn != 0xffff; ssn = ss->next) { ss = segfollow(st, ssn); - printk("%3d: %3d/%-4d %5d %d\n", + printk("%3d: %3d/%-4d %5d %lld\n", i, ss->dev, ss->segment, ss->usage, @@ -1546,7 +1548,7 @@ void lafs_dump_cleanable(void) for (ssn = st->free.first; ssn != 0xffff; ssn = ss->next) { ss = segfollow(st, ssn); - printk("%3d: %3d/%-4d %5d %d\n", + printk("%3d: %3d/%-4d %5d %lld\n", ssn, ss->dev, ss->segment, ss->usage, @@ -1559,7 +1561,7 @@ void lafs_dump_cleanable(void) for (ssn = st->clean.first; ssn != 0xffff; ssn = ss->next) { ss = segfollow(st, ssn); - printk("%3d: %3d/%-4d %5d %d\n", + printk("%3d: %3d/%-4d %5d %lld\n", ssn, ss->dev, ss->segment, ss->usage, @@ -1620,8 +1622,8 @@ retry: *dev = ss->dev; *seg = ss->segment; - dprintk("SEG: cleanable %d/%d score=%d usage=%d\n", - ss->dev, ss->segment, ss->score, ss->usage); + dprintk("SEG: cleanable %d/%d score=%llu usage=%d\n", + ss->dev, ss->segment, (unsigned long long)ss->score, ss->usage); ss->score = SCORE_CLEANING; if (ss->usage == 0) { @@ -1643,9 +1645,9 @@ retry: } static int add_cleanable(struct fs *fs, unsigned int dev, u32 seg, - u16 youth, u16 usage) + u16 youth, u32 usage) { - u32 score; + u64 score; struct segstat *ss; u32 segsize; u16 *where[SEG_NUM_HEIGHTS]; @@ -1670,10 +1672,13 @@ static int add_cleanable(struct fs *fs, unsigned int dev, u32 seg, if (test_bit(EmergencyClean, &fs->fsstate)) score = usage; - else - /* 0x10000 is to ensure this score is always + else { + /* 0x100000000 is to ensure this score is always * more than the above score */ - score = youth * usage / segsize + 0x10000; + score = (u64)youth * usage; + do_div(score, segsize); + score += 0x100000000; + } spin_lock(&fs->lock); if (score > SCORE_MAX) @@ -1740,19 +1745,20 @@ static int add_cleanable(struct fs *fs, unsigned int dev, u32 seg, return 1; } -static void merge_usage(struct fs *fs, u16 *d) +static void merge_usage(struct fs *fs, u32 *d) { - u16 *u = fs->scan.free_usages; - int segperblk = fs->blocksize / 2; + u32 *u = fs->scan.free_usages; + int segperblk = fs->blocksize >> USAGE_SHIFT; int i; for (i = 0; i < segperblk; i++) - if (le16_to_cpu(d[i]) > le16_to_cpu(u[i])) + if (le32_to_cpu(d[i]) > le32_to_cpu(u[i])) u[i] = d[i]; } unsigned long lafs_scan_seg(struct fs *fs) { + /* FIXME this comment is very out-dated */ /* Process one block of youth or segment-usage data. We * collect free segments (youth==0) into a table that is kept * sorted to ensure against duplicates. It is treated like a @@ -1803,6 +1809,7 @@ unsigned long lafs_scan_seg(struct fs *fs) */ int dev = fs->scan.free_dev; int block = fs->scan.free_block + 1; + int youthblock = block >> (USAGE_SHIFT - YOUTH_SHIFT); int err; while (dev < 0 || @@ -1822,7 +1829,7 @@ unsigned long lafs_scan_seg(struct fs *fs) } } if (fs->scan.youth_db) - if (fs->scan.youth_db->b.fileaddr != block || + if (fs->scan.youth_db->b.fileaddr != youthblock || dev < 0 || fs->scan.youth_db->b.inode != fs->devs[dev].segsum) { putdref(fs->scan.youth_db, MKREF(youth_scan)); @@ -1837,7 +1844,7 @@ unsigned long lafs_scan_seg(struct fs *fs) if (fs->scan.youth_db == NULL) fs->scan.youth_db = lafs_get_block(fs->devs[dev].segsum, - block, + youthblock, NULL, GFP_KERNEL, MKREF(youth_scan)); if (!fs->scan.youth_db) { printk("EEEEEKKKKK get_block failed\n"); @@ -1864,10 +1871,11 @@ unsigned long lafs_scan_seg(struct fs *fs) spin_lock(&fs->lock); fs->scan.free_block = block; fs->scan.free_dev = dev; - if (!err && fs->scan.do_decay) { + if (!err && fs->scan.do_decay && + youthblock << (USAGE_SHIFT - YOUTH_SHIFT) == block) { u16 *yp = map_dblock(fs->scan.youth_db); int i; - int segperblk = fs->blocksize / 2; + int segperblk = fs->blocksize >> YOUTH_SHIFT; for (i = 0 ; i < segperblk ; i++) { int y = le16_to_cpu(yp[i]); @@ -1891,11 +1899,11 @@ unsigned long lafs_scan_seg(struct fs *fs) */ struct datablock *db; char *d; - u16 *yp; + u16 *yp, *yp0; int i; int firstseg; - int segperblk = fs->blocksize / 2; + int segperblk = fs->blocksize >> USAGE_SHIFT; int segments = segperblk; int segcount; int blks; @@ -1933,7 +1941,10 @@ unsigned long lafs_scan_seg(struct fs *fs) segments = segcount % segperblk; firstseg = fs->scan.free_block * segperblk; - yp = map_dblock(fs->scan.youth_db); + yp0 = yp = map_dblock(fs->scan.youth_db); + yp += (fs->scan.free_block - + (fs->scan.youth_db->b.fileaddr << (USAGE_SHIFT - YOUTH_SHIFT))) + * segperblk; for (i = 0; i < segments ; i++) if (yp[i] == cpu_to_le16(0)) { if (fs->scan.first_free_pass) { @@ -1955,7 +1966,7 @@ unsigned long lafs_scan_seg(struct fs *fs) fs->devs[fs->scan.free_dev] .segment_size /*- 1*/; } - unmap_dblock(fs->scan.youth_db, yp); + unmap_dblock(fs->scan.youth_db, yp0); fs->scan.usage0_db = db; fs->scan.free_stage = 2; @@ -1964,7 +1975,7 @@ unsigned long lafs_scan_seg(struct fs *fs) while (fs->scan.free_stage > 1 && fs->scan.free_stage < fs->maxsnapshot + 1) { struct datablock *db; - u16 *d; + u32 *d; if (fs->ss[fs->scan.free_stage-1].root == NULL) { fs->scan.free_stage++; @@ -2011,15 +2022,19 @@ unsigned long lafs_scan_seg(struct fs *fs) * cleanable segments now */ u16 *yp = map_dblock(fs->scan.youth_db); - u16 *up = fs->scan.free_usages; + u16 *yp0 = yp; + u32 *up = fs->scan.free_usages; int i; - int segperblk = fs->blocksize / 2; + int segperblk = fs->blocksize >> USAGE_SHIFT; int segments = segperblk; int segcount = fs->devs[fs->scan.free_dev].segment_count; int blks = segcount / segments; if (fs->scan.free_block == blks) segments = segcount % segperblk; + yp += (fs->scan.free_block - + (fs->scan.youth_db->b.fileaddr << (USAGE_SHIFT - YOUTH_SHIFT))) + * segperblk; for (i = 0; i < segments; i++) if (add_cleanable(fs, fs->scan.free_dev, i + fs->scan.free_block * segperblk, @@ -2032,7 +2047,7 @@ unsigned long lafs_scan_seg(struct fs *fs) (void)getdref(fs->scan.usage0_db, MKREF(intable)); } - unmap_dblock(fs->scan.youth_db, yp); + unmap_dblock(fs->scan.youth_db, yp0); putdref(fs->scan.usage0_db, MKREF(usage0)); fs->scan.usage0_db = NULL; fs->scan.free_stage = 0; diff --git a/state.h b/state.h index 765b295..be8b204 100644 --- a/state.h +++ b/state.h @@ -66,7 +66,7 @@ struct fs { u32 nonlog_segment; unsigned short nonlog_dev; - u16 nonlog_offset; + u32 nonlog_offset; u32 maxsnapshot; u64 checkpointcluster; @@ -217,7 +217,7 @@ struct fs { } unused, free, cleanable, clean; unsigned short head[SEG_NUM_HEIGHTS]; /* head of skiplist */ int total; - int max_score; + long long max_score; int sorted_size; } segtrack[1]; @@ -231,7 +231,7 @@ struct fs { int first_free_pass; /* true the first time */ int done, do_decay; /* cleared on each checkpoint */ struct datablock *youth_db, *usage0_db; - u16 *free_usages; /* This is an allocated page */ + u32 *free_usages; /* This is an allocated page */ int trace; } scan; -- 2.39.5