From: NeilBrown Date: Fri, 17 Sep 2010 05:38:10 +0000 (+1000) Subject: lafs_refile: lots of locking and refcount changes. X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=62a5530b5189dcd44aedf52267a389e7137cf16f;p=LaFS.git lafs_refile: lots of locking and refcount changes. See the comments added to lafs.h for more detail. There are really several changes and fixes in here but I don't think it is worth the effort to separate them all out. Signed-off-by: NeilBrown --- diff --git a/block.c b/block.c index c883c9c..2ac721b 100644 --- a/block.c +++ b/block.c @@ -159,7 +159,10 @@ lafs_get_block(struct inode *ino, unsigned long index, struct page *p, b = (struct datablock *)p->private; b += index & ((1<i_data.private_lock); getdref_locked(b, REF); + spin_unlock(&ino->i_data.private_lock); if (unlock) { unlock_page(p); @@ -200,7 +203,10 @@ void lafs_invalidate_page(struct page *page, unsigned long offset) * wait for any pending IO to complete (so page can be freed) */ for (i = 0; i < (1<i_data.private_lock); + (void)getdref_locked(&b[i], MKREF(inval)); + spin_unlock(&ino->i_data.private_lock); + if (b_start >= offset && test_and_clear_bit(B_Async, &b[i].b.flags)) putdref(&b[i], MKREF(Async)); @@ -531,24 +537,37 @@ erase_dblock_locked(struct datablock *b) * block lives in. * Need private_lock to be allowed to dereference ->iblock * though if b was dirty we shouldn't.... FIXME. + * We need to hold the ref to idb for the getiref_locked_needsync to + * be safe. */ struct indexblock *ib; - spin_lock(&b->b.inode->i_data.private_lock); + struct datablock *idb = lafs_inode_dblock(b->b.inode, SYNC, MKREF(erase)); + if (IS_ERR(idb)) { + /* not much we can do here */ + BUG(); + goto skip; + } + spin_lock(&lafs_hash_lock); ib = LAFSI(b->b.inode)->iblock; if (ib) - getiref_locked(ib, MKREF(erasedblock)); - spin_unlock(&b->b.inode->i_data.private_lock); + getiref_locked_needsync(ib, MKREF(erasedblock)); + spin_unlock(&lafs_hash_lock); + sync_ref(&ib->b); + putdref(idb, MKREF(erase)); if (ib) { lafs_iolock_written(&ib->b); if (ib->depth == 0) { LAFS_BUG(LAFSI(b->b.inode)->depth != ib->depth, &b->b); + ib->depth = 1; + LAFSI(b->b.inode)->depth = 1; lafs_clear_index(ib); clear_bit(B_PhysValid, &b->b.flags); } lafs_iounlock_block(&ib->b); putiref(ib, MKREF(erasedblock)); } + skip:; } if (LAFSI(b->b.inode)->type == TypeInodeFile) { diff --git a/checkpoint.c b/checkpoint.c index 7eaa664..4701938 100644 --- a/checkpoint.c +++ b/checkpoint.c @@ -337,19 +337,16 @@ retry: b = list_entry(fs->clean_leafs.next, struct block, lru); else if (phase >= 0 && !list_empty(&fs->phase_leafs[phase])) b = list_entry(fs->phase_leafs[phase].next, struct block, lru); - else - b = NULL; - - if (b) { - if (test_bit(B_InoIdx, &b->flags)) - getiref_locked(iblk(b), MKREF(flushable)); - else - getref_locked(b, MKREF(flushable)); - list_del_init(&b->lru); + else { + spin_unlock(&fs->lock); + return NULL; } + + getref_locked_needsync(b, MKREF(flushable)); + list_del_init(&b->lru); spin_unlock(&fs->lock); - if (!b) - return b; + + sync_ref(b); /* Need an iolock, but if the block is in writeback, * or unpinned or otherwise not a leaf, we don't want it. diff --git a/clean.c b/clean.c index cf4bc1e..f135cf5 100644 --- a/clean.c +++ b/clean.c @@ -86,6 +86,9 @@ static struct block *first_in_seg(struct block *b, struct fs *fs, } if (p && test_bit(B_InoIdx, &p->flags)) { struct datablock *db = LAFSI(p->inode)->dblock; + spin_unlock(&as->private_lock); + as = &db->b.inode->i_data; + spin_lock(&as->private_lock); p = &db->b; } diff --git a/index.c b/index.c index 87b617b..a68823c 100644 --- a/index.c +++ b/index.c @@ -60,12 +60,6 @@ static int lafs_shrinker(int nr_to_scan, /*gfp_t*/unsigned int gfp_mask) struct indexblock, b.lru); /* Check if it is really free */ - if (test_bit(B_InoIdx, &ib->b.flags)) { - spin_lock(&ib->b.inode->i_data.private_lock); - if (atomic_read(&ib->b.refcnt) == 0) - LAFSI(ib->b.inode)->iblock = NULL; - spin_unlock(&ib->b.inode->i_data.private_lock); - } if (atomic_read(&ib->b.refcnt)) { list_del_init(&ib->b.lru); @@ -73,6 +67,9 @@ static int lafs_shrinker(int nr_to_scan, /*gfp_t*/unsigned int gfp_mask) freelist.freecnt--; continue; } + if (test_bit(B_InoIdx, &ib->b.flags)) + LAFSI(ib->b.inode)->iblock = NULL; + /* Any pinned iblock must have children, be * refcounted, or be on a 'leaf' lru, which gives * a refcount. So this unreffed block cannot be @@ -109,7 +106,7 @@ void lafs_release_index(struct list_head *head) * They need to both obey the same locking rules, and ensure * blocks are removed from each of siblings, lru, and hash. * This list is an inode's free_index and are linked on - * b.siblings. There are all on the freelist.lru + * b.siblings. They are all on the freelist.lru */ LIST_HEAD(togo); spin_lock(&lafs_hash_lock); @@ -231,17 +228,19 @@ ihash_lookup(struct inode *inode, faddr_t addr, int depth, ib = NULL; } if (ib) - getiref_locked(ib, REF); + getiref_locked_needsync(ib, REF); spin_unlock(&lafs_hash_lock); + if (ib) + sync_ref(&ib->b); return ib; } -struct indexblock *getiref_locked(struct indexblock *ib, REFARG) +struct indexblock *lafs_getiref_locked(struct indexblock *ib) { - struct datablock *db; int ok = 0; if (!ib) return ib; + LAFS_BUG(!test_bit(B_InoIdx, &ib->b.flags), &ib->b); if (spin_is_locked(&ib->b.inode->i_data.private_lock)) ok = 1; if (spin_is_locked(&lafs_hash_lock)) @@ -250,19 +249,19 @@ struct indexblock *getiref_locked(struct indexblock *ib, REFARG) ok = 1; LAFS_BUG(!ok, &ib->b); - if (!test_bit(B_InoIdx, &ib->b.flags)) { - getref_locked(&ib->b, REF); - return ib; - } - if (atomic_read(&ib->b.refcnt) == 0) { + if (atomic_inc_return(&ib->b.refcnt) == 1) { /* First reference to an InoIdx block implies a reference * to the dblock + * If two different threads come here under different locks, one could + * exit before the ref on the dblock is taken. However as each + * thread must own an indirect reference to be here, the last indirect ref to + * the dblock cannot disappear before all threads have exited this function, + * by which time this direct ref will be active. */ - db = getdref_locked(LAFSI(ib->b.inode)->dblock, MKREF(iblock)); + struct datablock *db; + db = getdref(LAFSI(ib->b.inode)->dblock, MKREF(iblock)); LAFS_BUG(db == NULL, &ib->b); } - add_ref(&ib->b, REF, __FILE__, __LINE__); - __getref(&ib->b); return ib; } @@ -366,6 +365,7 @@ block_adopt(struct block *blk, struct indexblock *parent) blk->parent = parent; getiref(parent, MKREF(child)); /* Remove from the per-inode free list */ + spin_lock(&lafs_hash_lock); if (!test_bit(B_InoIdx, &blk->flags)) list_move(&blk->siblings, &parent->children); else { @@ -378,6 +378,7 @@ block_adopt(struct block *blk, struct indexblock *parent) */ (void)getdref(LAFSI(blk->inode)->dblock, MKREF(pdblock)); } + spin_unlock(&lafs_hash_lock); } spin_unlock(&as->private_lock); } @@ -432,11 +433,11 @@ void lafs_pin_block_ph(struct block *b, int ph) static void pin_all_children(struct indexblock *ib) { - /* If we cannot get new credits are phase_flip, we need to + /* If we cannot get new credits for phase_flip, we need to * pin all dirty data-block children so that they get * written and don't remain to require a phase_flip * on the next checkpoint (at which point there will be - * no credit left + * no credit left) * So if any dirty child is found, set phase. We don't * need PinPending as it is already Dirty. All other * requirement for pinning will already be met. @@ -459,6 +460,7 @@ restart: spin_lock(&ib->b.inode->i_data.private_lock); b = list_prepare_entry(b, &ib->children, siblings); list_for_each_entry_continue(b, &ib->children, siblings) { + /* FIXME I don't descend through inode blocks to the file below! */ if (test_bit(B_Index, &b->flags)) { /* restart down */ depth++; @@ -725,6 +727,7 @@ int lafs_is_leaf(struct block *b, int ph) */ struct inode *myi; struct lafs_inode *lai; + int rv = 1; if (!test_bit(B_Pinned, &b->flags) || test_bit(B_Writeback, &b->flags)) @@ -749,17 +752,15 @@ int lafs_is_leaf(struct block *b, int ph) ph = !!test_bit(B_Phase1, &b->flags); if (lai->iblock && test_bit(B_Pinned, &lai->iblock->b.flags) && - !!test_bit(B_Phase1, &lai->iblock->b.flags) == ph) { + !!test_bit(B_Phase1, &lai->iblock->b.flags) == ph) /* iblock still pinned in the same phase, so * this block isn't a leaf */ - spin_unlock(&lai->vfs_inode.i_data.private_lock); - rcu_iput(myi); - return 0; - } + rv = 0; + spin_unlock(&lai->vfs_inode.i_data.private_lock); rcu_iput(myi); - return 1; + return rv; } /* @@ -848,18 +849,21 @@ static void set_lru(struct block *b) */ if (test_bit(B_OnFree, &b->flags)) { spin_lock(&lafs_hash_lock); - if (test_and_clear_bit(B_OnFree, &b->flags)) + if (test_and_clear_bit(B_OnFree, &b->flags)) { list_del_init(&b->lru); + freelist.freecnt--; + } spin_unlock(&lafs_hash_lock); } fs = fs_from_inode(b->inode); spin_lock(&fs->lock); if (list_empty(&b->lru)) { - ph = !!test_bit(B_Phase1, &b->flags); if (test_bit(B_Realloc, &b->flags)) list_add(&b->lru, &fs->clean_leafs); - else + else { + ph = !!test_bit(B_Phase1, &b->flags); list_add(&b->lru, &fs->phase_leafs[ph]); + } } spin_unlock(&fs->lock); @@ -924,10 +928,7 @@ void lafs_refile(struct block *b, int dec) lafs_checkpoint_unlock(fs); } - - - if (dec && _putref_norefile(b)) { - spin_lock(&lafs_hash_lock); + if (dec && atomic_dec_and_lock(&b->refcnt, &b->inode->i_data.private_lock)) { /* PinPending disappears with the last non-lru reference, * (or possibly at other times). */ @@ -935,22 +936,15 @@ void lafs_refile(struct block *b, int dec) ph = !!test_bit(B_Phase1, &b->flags); /* See if we still need to be pinned */ - /* FIXME need some locking here ... */ if (test_bit(B_Pinned, &b->flags) && - !test_bit(B_PinPending, &b->flags) && !test_bit(B_Dirty, &b->flags) && !test_bit(B_Realloc, &b->flags) && (!test_bit(B_Index, &b->flags) || - (iblk(b)->uninc_table.pending_cnt == 0 && - iblk(b)->uninc == NULL && - iblk(b)->uninc_next == NULL && - atomic_read(&iblk(b)->pincnt[0]) == 0 && - atomic_read(&iblk(b)->pincnt[1]) == 0)) + iblk(b)->uninc_table.pending_cnt == 0) ) { /* Don't need to be Pinned any more */ - lafs_checkpoint_lock(fs); if (test_and_clear_bit(B_Pinned, &b->flags)) { - if (test_bit(B_Index, &b->flags) && !list_empty_careful(&b->lru)) { + if (!list_empty_careful(&b->lru)) { spin_lock(&fs->lock); list_del_init(&b->lru); spin_unlock(&fs->lock); @@ -972,7 +966,6 @@ void lafs_refile(struct block *b, int dec) } } } - lafs_checkpoint_unlock(fs); } /* check the ->parent link */ @@ -1019,10 +1012,10 @@ void lafs_refile(struct block *b, int dec) } if (test_and_clear_bit(B_SegRef, &b->flags)) physref = b->physaddr; - spin_lock(&b->inode->i_data.private_lock); + LAFS_BUG(test_bit(B_PrimaryRef, &b->flags), b); list_del_init(&b->siblings); - spin_unlock(&b->inode->i_data.private_lock); + if (test_bit(B_Index, &b->flags)) list_add(&b->siblings, &LAFSI(b->inode)->free_index); @@ -1057,21 +1050,30 @@ void lafs_refile(struct block *b, int dec) printk("Problem: %s\n", strblk(b)); LAFS_BUG(b->parent != NULL, b); /* put it on the lru */ + spin_lock(&lafs_hash_lock); if (!test_and_set_bit(B_OnFree, &b->flags)) { LAFS_BUG(!list_empty(&b->lru), b); list_move(&b->lru, &freelist.lru); + freelist.freecnt++; } - freelist.freecnt++; + spin_unlock(&lafs_hash_lock); } /* last reference to a dblock with no page * requires special handling * The first block on a page must be freed, * the other blocks hold a reference on that * first block which must be dropped. + * However it is possible that a new reference was taken + * via _leafs. If so we have now cleared Pinned so + * get_flushable will immediately put this again. + * so we should leave this cleanup to later. + * Unlike other tests, this one isn't idempotent, so + * need the check on refcnt */ db = dblk(b); if (!test_bit(B_Index, &b->flags) && - !PagePrivate(db->page)) { + !PagePrivate(db->page) && + atomic_read(&b->refcnt) == 0) { int bits = (PAGE_SHIFT - b->inode->i_blkbits); int mask = (1<flags)) { - spin_lock(&b->inode->i_data - .private_lock); LAFS_BUG(LAFSI(b->inode)->iblock != iblk(b), b); LAFS_BUG(next, next); next = &LAFSI(b->inode)->dblock->b; del_ref(next, MKREF(iblock), __FILE__, __LINE__); - spin_unlock(&b->inode->i_data - .private_lock); } /* Free a delayed-release inode */ @@ -1110,12 +1110,18 @@ void lafs_refile(struct block *b, int dec) test_bit(I_Destroyed, &LAFSI(myi)->iflags))) { dblk(b)->my_inode = NULL; LAFSI(myi)->dblock = NULL; + /* Don't need lafs_hash_lock to clear iblock as + * new refs on iblock are only taken while holding + * dblock, and we know dblock has no references. + * You would need an iget to get a ref on dblock now, + * and because I_Destroyed, we know that isn't possible. + */ LAFSI(myi)->iblock = NULL; - spin_unlock(&lafs_hash_lock); + spin_unlock(&b->inode->i_data.private_lock); if (test_bit(I_Destroyed, &LAFSI(myi)->iflags)) lafs_destroy_inode(myi); } else - spin_unlock(&lafs_hash_lock); + spin_unlock(&b->inode->i_data.private_lock); } rcu_iput(myi); @@ -1162,19 +1168,20 @@ lafs_make_iblock(struct inode *ino, int adopt, int async, REFARG) BUG_ON(lai->dblock == NULL); BUG_ON(atomic_read(&lai->dblock->b.refcnt) == 0); retry: - spin_lock(&lai->vfs_inode.i_data.private_lock); + spin_lock(&lafs_hash_lock); if (lai->iblock) - ib = getiref_locked(lai->iblock, REF); + ib = getiref_locked_needsync(lai->iblock, REF); else if (new) { lai->iblock = new; ib = new; new = NULL; (void)getdref(lai->dblock, MKREF(iblock)); } - spin_unlock(&lai->vfs_inode.i_data.private_lock); + spin_unlock(&lafs_hash_lock); if (new) lafs_iblock_free(new); if (ib) { + sync_ref(&ib->b); if (adopt) { err = setparent(lai->dblock, async); if (err == 0) diff --git a/inode.c b/inode.c index 02a5356..489560c 100644 --- a/inode.c +++ b/inode.c @@ -472,15 +472,32 @@ void lafs_inode_checkpin(struct inode *ino) } } -struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG) +struct datablock *lafs_inode_get_dblock(struct inode *ino, REFARG) { - struct datablock *db = NULL; - int err; + struct datablock *db; spin_lock(&ino->i_data.private_lock); - if (LAFSI(ino)->dblock) - db = getdref_locked(LAFSI(ino)->dblock, REF); + db = LAFSI(ino)->dblock; + if (db) { + if (db->b.inode == ino) + getdref_locked(db, REF); + else { + spin_lock_nested(&db->b.inode->i_data.private_lock, 1); + getdref_locked(db, REF); + spin_unlock(&db->b.inode->i_data.private_lock); + } + } spin_unlock(&ino->i_data.private_lock); + return db; +} + +struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG) +{ + struct datablock *db; + int err; + + db = lafs_inode_get_dblock(ino, REF); + if (!db) db = lafs_get_block(ino_from_sb(ino->i_sb), ino->i_ino, NULL, GFP_KERNEL, REF); @@ -661,14 +678,7 @@ void lafs_delete_inode(struct inode *ino) */ BUG_ON(test_bit(I_Trunc, &LAFSI(ino)->iflags)); - spin_lock(&ino->i_data.private_lock); - b = LAFSI(ino)->dblock; - /* FIXME we use b unconditionally, so either we don't - * need the test, or we need to make the block if it - * doesn't exist.. */ - if (b) - getdref_locked(b, MKREF(delete_inode)); - spin_unlock(&ino->i_data.private_lock); + b = lafs_inode_dblock(ino, SYNC, MKREF(delete_inode)); i_size_write(ino, 0); truncate_inode_pages(&ino->i_data, 0); @@ -676,14 +686,13 @@ void lafs_delete_inode(struct inode *ino) set_bit(I_Trunc, &LAFSI(ino)->iflags); set_bit(I_Deleting, &LAFSI(ino)->iflags); - set_bit(B_Claimed, &b->b.flags); - lafs_add_orphan(fs, b); - + if (!IS_ERR(b)) { + set_bit(B_Claimed, &b->b.flags); + lafs_add_orphan(fs, b); + dprintk("PUNCH hole for %d\n", (int)b->b.fileaddr); + putdref(b, MKREF(delete_inode)); + } inode_map_free(fs, ino->i_sb, ino->i_ino); - // FIXME what to do on error (-ENOMEM ) - - dprintk("PUNCH hole for %d\n", (int)b->b.fileaddr); - putdref(b, MKREF(delete_inode)); clear_inode(ino); } diff --git a/lafs.h b/lafs.h index c1a0e9a..a8e5951 100644 --- a/lafs.h +++ b/lafs.h @@ -45,7 +45,7 @@ extern void lafs_dump_tree(void); #define lafs_make_iblock(a,b,c,d) _lafs_make_iblock(a,b,c,d) #define lafs_leaf_find(a,b,c,d,e,f) _lafs_leaf_find(a,b,c,d,e,f) #define lafs_inode_dblock(a,b,c) _lafs_inode_dblock(a,b,c) -#define getiref_locked(a,b) _lafs_getiref_locked(a,b) +#define lafs_inode_get_dblock(a,b) _lafs_inode_get_dblock(a,b) #else #define REFARG void #define REF 0 @@ -64,7 +64,7 @@ extern void lafs_dump_tree(void); #define lafs_make_iblock(a,b,c,d) _lafs_make_iblock(a,b,c) #define lafs_leaf_find(a,b,c,d,e,f) _lafs_leaf_find(a,b,c,d,e) #define lafs_inode_dblock(a,b,c) _lafs_inode_dblock(a,b) -#define getiref_locked(a,b) _lafs_getiref_locked(a) +#define lafs_inode_get_dblock(a,b) _lafs_inode_get_dblock(a) #endif #define strblk(a) lafs_strblk(a) @@ -151,6 +151,7 @@ struct inode *lafs_new_inode(struct fs *fs, struct super_block *sb, int lafs_lock_inode(struct inode *ino); void lafs_inode_fillblock(struct inode *ino); struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG); +struct datablock *lafs_inode_get_dblock(struct inode *ino, REFARG); int lafs_inode_handle_orphan(struct datablock *b); struct datablock *lafs_get_block(struct inode *ino, unsigned long index, @@ -421,6 +422,8 @@ static inline struct inode *iget_my_inode(struct datablock *db) int lafs_is_leaf(struct block *b, int ph); void lafs_refile(struct block *b, int dec); +extern struct indexblock *lafs_getiref_locked(struct indexblock *ib); + extern spinlock_t lafs_hash_lock; static inline struct block *__getref(struct block *b) @@ -436,55 +439,38 @@ static inline struct block *_getref(struct block *b) return __getref(b); } +static inline struct datablock *_getdref_locked(struct datablock *b) +{ + LAFS_BUG(!spin_is_locked(&b->b.inode->i_data.private_lock), &b->b); + __getref(&b->b); + return b; +} + static inline struct block *_getref_locked(struct block *b) { - int ok = 0; - if (test_bit(B_Index, &b->flags)) { - /* InoIdx must use getiref_locked which goes straight to - * __getref - */ - BUG_ON(test_bit(B_InoIdx, &b->flags)); - - if (spin_is_locked(&b->inode->i_data.private_lock)) - ok = 1; - if (spin_is_locked(&lafs_hash_lock)) - ok = 1; - // FIXME more tests - just when is this allowed? - } else { - struct datablock *db = dblk(b); - struct inode *ino = NULL; - - if (db->page && - (PageLocked(db->page) || - ((ino = rcu_my_inode(db)) != NULL && - spin_is_locked(&ino->i_data.private_lock)) - )) - ok = 1; - rcu_iput(ino); ino = NULL; - if ((ino = rcu_my_inode(db)) != NULL && - spin_is_locked(&ino->i_data.private_lock)) - ok = 1; - rcu_iput(ino); - if (spin_is_locked(&b->inode->i_data.private_lock)) - ok = 1; - } - if (test_bit(B_Dirty, &b->flags) || test_bit(B_Realloc, &b->flags)) - if (spin_is_locked(&fs_from_inode(b->inode)->lock)) - ok = 1; - if(spin_is_locked(&fs_from_inode(b->inode)->lock)) - /* Removing from leaf list */ - ok = 1; - if (!ok) - printk("Not locked: %s\n", strblk(b)); - BUG_ON(!ok); + LAFS_BUG(!spin_is_locked(&b->inode->i_data.private_lock), b); + if (test_bit(B_InoIdx, &b->flags)) + return &lafs_getiref_locked(iblk(b))->b; + __getref(b); + return b; +} - return __getref(b); +static inline struct indexblock *_getiref_locked_needsync(struct indexblock *b) +{ + LAFS_BUG(!spin_is_locked(&lafs_hash_lock), &b->b); + if (test_bit(B_InoIdx, &b->b.flags)) + return lafs_getiref_locked(b); + __getref(&b->b); + return b; } -static inline int _putref_norefile(struct block *b) +static inline struct block *_getref_locked_needsync(struct block *b) { - BUG_ON(atomic_read(&b->refcnt)==0); - return atomic_dec_and_test(&b->refcnt); + LAFS_BUG(!spin_is_locked(&fs_from_inode(b->inode)->lock), b); + if (test_bit(B_InoIdx, &b->flags)) + return &lafs_getiref_locked(iblk(b))->b; + __getref(b); + return b; } static inline void _putref(struct block *b) @@ -495,25 +481,70 @@ static inline void _putref(struct block *b) lafs_refile(b, 1); } -struct indexblock *getiref_locked(struct indexblock *ib, REFARG); - #if DEBUG_REF == 0 #define _reflog(c,blk,ref,add) (c(blk)) #else #define _reflog(c,blk,ref,add) ({ !blk ? 0 : \ add? add_ref(blk,ref,__FILE__,__LINE__) : \ del_ref(blk,ref, __FILE__,__LINE__); c(blk); }) +#define _refxlog(c,blk,ref,add) ({ \ + add? add_ref(&(blk)->b,ref,__FILE__,__LINE__) : \ + del_ref(&(blk)->b,ref, __FILE__,__LINE__); c(blk); }) #endif #define getref(blk, r) ( ({BUG_ON((blk) && ! atomic_read(&(blk)->refcnt));}), _reflog(_getref, blk,r,1)) #define getref_locked(blk, r) _reflog(_getref_locked, blk,r,1) -#define putref_norefile(blk, r) _reflog(_putref_norefile, blk,r,0) +#define getdref_locked(blk, r) _refxlog(_getdref_locked, blk,r,1) +#define getref_locked_needsync(blk, r) _reflog(_getref_locked_needsync, blk,r,1) +#define getiref_locked_needsync(blk, r) _refxlog(_getiref_locked_needsync, blk,r,1) #define putref(blk, r) _reflog(_putref, blk,r,0) #define getdref(blk, r) ( ({BUG_ON((blk) && ! atomic_read(&(blk)->b.refcnt));}), _reflog(_getref, (&(blk)->b),r,1),blk) #define getiref(blk, r) ( ({BUG_ON((blk) && ! atomic_read(&(blk)->b.refcnt));}), _reflog(_getref, (&(blk)->b),r,1),blk) -#define getdref_locked(blk, r) (_reflog(_getref_locked, (&(blk)->b),r,1),blk) #define putdref(blk, r) ({ if (blk) _reflog(_putref, (&(blk)->b),r,0);}) #define putiref(blk, r) ({ if (blk) _reflog(_putref, (&(blk)->b),r,0);}) +/* When we get a ref on a block other than under b->inode->i_data.private_lock, we + * need to call this to ensure that lafs_refile isn't still handling a refcnt->0 transition. + */ +static inline void sync_ref(struct block *b) +{ + spin_lock(&b->inode->i_data.private_lock); + spin_unlock(&b->inode->i_data.private_lock); +} + +/* + * Notes on locking and block references. + * There are several places that can hold uncounted references to blocks. + * When we try to convert such a reference to a counted reference we need to be careful. + * We must increment the refcount under a lock that protects the particular reference, and + * then must call sync_ref above to ensure our new reference is safe. + * When the last counted reference on a block is dropped (in lafs_refile) we hold private_lock + * while cleaning up and sync_ref assures that cleanup is finished. + * Before we can free a block we must synchronise with these other locks and + * then ensure that the refcount is still zero. + * For index blocks that means taking lafs_hash_lock and ensuring the count is still zero. + * For most data blocks we need to remove from the lru list using fs->lock, and check the count is + * still zero. + * For inode data blocks we need to follow the inode and break the ->dblock lock under the inodes + * own lock - but only if the refcount is still zero. + * + * The list of non-counted references and the locks which protect them is: + * + * index hash table lafs_hash_lock + * index freelist lafs_hash_lock + * phase_leafs / clean_leafs fs->lock + * inode->iblock lafs_hash_lock + * inode->dblock inode->i_data.private_lock + * inode->free_index lafs_hash_lock + * page->private page lock + * + * Lock ordering among these is: + * inode->i_data.private_lock + * LAFSI(inode)->dblock->inode->i_data.private_lock + * fs->lock + * lafs_hash_lock + * + */ + int __must_check lafs_setparent(struct datablock *blk); /* diff --git a/modify.c b/modify.c index 3067858..424feba 100644 --- a/modify.c +++ b/modify.c @@ -390,7 +390,9 @@ void lafs_clear_index(struct indexblock *ib) *(u16 *)(buf+offset) = cpu_to_le16(IBLK_INDIRECT); else if (ib->depth > 1) *(u16 *)(buf+offset) = cpu_to_le16(IBLK_INDEX); - + else + LAFS_BUG(1, &ib->b); + if (offset) { /* just to be on the safe side ... */ ((struct la_inode*)(buf))->depth = ib->depth; diff --git a/orphan.c b/orphan.c index 30f18b8..ac538f7 100644 --- a/orphan.c +++ b/orphan.c @@ -498,6 +498,8 @@ long lafs_run_orphans(struct fs *fs) struct datablock, orphans); list_move(&db->orphans, &done); + getdref(db, MKREF(run_orphans)); + spin_unlock(&fs->lock); ino = orphan_inode(db); if (!ino && !test_bit(B_Claimed, &db->b.flags)) /* OK not to have an inode */; @@ -508,10 +510,10 @@ long lafs_run_orphans(struct fs *fs) */ orphan_iput(ino); timeout = msecs_to_jiffies(500); + putdref(db, MKREF(run_orphans)); + spin_lock(&fs->lock); continue; } - getdref(db, MKREF(run_orphans)); - spin_unlock(&fs->lock); if (!ino) lafs_orphan_forget(fs, db); diff --git a/super.c b/super.c index 064e960..3f968f9 100644 --- a/super.c +++ b/super.c @@ -1232,17 +1232,12 @@ static void kfree_inode(struct rcu_head *head) } void lafs_destroy_inode(struct inode *inode) { - struct datablock *db = NULL; + struct datablock *db; BUG_ON(!list_empty(&inode->i_sb_list)); // Cannot test i_list as dispose_list just does list_del - if (LAFSI(inode)->dblock) { - spin_lock(&inode->i_data.private_lock); - if (LAFSI(inode)->dblock) - db = getdref_locked(LAFSI(inode)->dblock, - MKREF(destroy)); - spin_unlock(&inode->i_data.private_lock); - } + db = lafs_inode_get_dblock(inode, MKREF(destroy)); + if (db) { set_bit(I_Destroyed, &LAFSI(inode)->iflags); putdref(db, MKREF(destroy)); @@ -1317,20 +1312,20 @@ static int lafs_statfs(struct dentry *de, struct kstatfs *buf) return 0; } +/* FIXME we hold inode_lock while calling drop_inode, so + * extra locking isn't really welcome....??? + */ static void lafs_drop_inode(struct inode *inode) { struct fs *fs = fs_from_inode(inode); - struct datablock *db = NULL; + struct datablock *db; /* This lock that we now hold on the inode could prevent * the cleaner from getting the inode. So after * the complete the drop we might need to wake the cleaner. */ - spin_lock(&inode->i_data.private_lock); - if (LAFSI(inode)->dblock) - db = getdref_locked(LAFSI(inode)->dblock, MKREF(drop)); - spin_unlock(&inode->i_data.private_lock); + db = lafs_inode_get_dblock(inode, MKREF(drop)); generic_drop_inode(inode); if (db && test_bit(B_Async, &db->b.flags))