]> git.neil.brown.name Git - LaFS.git/commitdiff
lafs_refile: lots of locking and refcount changes.
authorNeilBrown <neilb@suse.de>
Fri, 17 Sep 2010 05:38:10 +0000 (15:38 +1000)
committerNeilBrown <neilb@suse.de>
Fri, 17 Sep 2010 05:38:10 +0000 (15:38 +1000)
See the comments added to lafs.h for more detail.

There are really several changes and fixes in here but I don't think
it is worth the effort to separate them all out.

Signed-off-by: NeilBrown <neilb@suse.de>
block.c
checkpoint.c
clean.c
index.c
inode.c
lafs.h
modify.c
orphan.c
super.c

diff --git a/block.c b/block.c
index c883c9cff1a0061fd87de48ae2cd3741c0096e6d..2ac721bfc794176c4a8da722fe6c8c0f953e7fd5 100644 (file)
--- a/block.c
+++ b/block.c
@@ -159,7 +159,10 @@ lafs_get_block(struct inode *ino, unsigned long index, struct page *p,
 
        b = (struct datablock *)p->private;
        b += index & ((1<<bits)-1);
+       /* spinlock is just to sync with lafs_refile */
+       spin_lock(&ino->i_data.private_lock);
        getdref_locked(b, REF);
+       spin_unlock(&ino->i_data.private_lock);
 
        if (unlock) {
                unlock_page(p);
@@ -200,7 +203,10 @@ void lafs_invalidate_page(struct page *page, unsigned long offset)
                 *   wait for any pending IO to complete (so page can be freed)
                 */
                for (i = 0; i < (1<<bits); i++) {
-                       getref_locked(&b[i].b, MKREF(inval));
+                       spin_lock(&ino->i_data.private_lock);
+                       (void)getdref_locked(&b[i], MKREF(inval));
+                       spin_unlock(&ino->i_data.private_lock);
+
                        if (b_start >= offset &&
                            test_and_clear_bit(B_Async, &b[i].b.flags))
                                putdref(&b[i], MKREF(Async));
@@ -531,24 +537,37 @@ erase_dblock_locked(struct datablock *b)
                 * block lives in.
                 * Need private_lock to be allowed to dereference ->iblock
                 * though if b was dirty we shouldn't.... FIXME.
+                * We need to hold the ref to idb for the getiref_locked_needsync to
+                * be safe.
                 */
                struct indexblock *ib;
-               spin_lock(&b->b.inode->i_data.private_lock);
+               struct datablock *idb = lafs_inode_dblock(b->b.inode, SYNC, MKREF(erase));
+               if (IS_ERR(idb)) {
+                       /* not much we can do here */
+                       BUG();
+                       goto skip;
+               }
+               spin_lock(&lafs_hash_lock);
                ib = LAFSI(b->b.inode)->iblock;
                if (ib)
-                       getiref_locked(ib, MKREF(erasedblock));
-               spin_unlock(&b->b.inode->i_data.private_lock);
+                       getiref_locked_needsync(ib, MKREF(erasedblock));
+               spin_unlock(&lafs_hash_lock);
+               sync_ref(&ib->b);
+               putdref(idb, MKREF(erase));
                if (ib) {
                        lafs_iolock_written(&ib->b);
                        if (ib->depth == 0) {
                                LAFS_BUG(LAFSI(b->b.inode)->depth !=
                                         ib->depth, &b->b);
+                               ib->depth = 1;
+                               LAFSI(b->b.inode)->depth = 1;
                                lafs_clear_index(ib);
                                clear_bit(B_PhysValid, &b->b.flags);
                        }
                        lafs_iounlock_block(&ib->b);
                        putiref(ib, MKREF(erasedblock));
                }
+       skip:;
        }
 
        if (LAFSI(b->b.inode)->type == TypeInodeFile) {
index 7eaa664e344908421b3f3f3596b45524e0ac89c8..47019380cb4acb5f9647b28b946a1a34333ffc9e 100644 (file)
@@ -337,19 +337,16 @@ retry:
                b = list_entry(fs->clean_leafs.next, struct block, lru);
        else if (phase >= 0 && !list_empty(&fs->phase_leafs[phase]))
                b = list_entry(fs->phase_leafs[phase].next, struct block, lru);
-       else
-               b = NULL;
-
-       if (b) {
-               if (test_bit(B_InoIdx, &b->flags))
-                       getiref_locked(iblk(b), MKREF(flushable));
-               else
-                       getref_locked(b, MKREF(flushable));
-               list_del_init(&b->lru);
+       else {
+               spin_unlock(&fs->lock);
+               return NULL;
        }
+
+       getref_locked_needsync(b, MKREF(flushable));
+       list_del_init(&b->lru);
        spin_unlock(&fs->lock);
-       if (!b)
-               return b;
+
+       sync_ref(b);
 
        /* Need an iolock, but if the block is in writeback,
         * or unpinned or otherwise not a leaf, we don't want it.
diff --git a/clean.c b/clean.c
index cf4bc1e123fe6f5de18cf96c051e0f5a622cb607..f135cf5a4c2abcc187bc22e2ac602f668ddc3e69 100644 (file)
--- a/clean.c
+++ b/clean.c
@@ -86,6 +86,9 @@ static struct block *first_in_seg(struct block *b, struct fs *fs,
        }
        if (p && test_bit(B_InoIdx, &p->flags)) {
                struct datablock *db = LAFSI(p->inode)->dblock;
+               spin_unlock(&as->private_lock);
+               as = &db->b.inode->i_data;
+               spin_lock(&as->private_lock);
                p = &db->b;
        }
 
diff --git a/index.c b/index.c
index 87b617bba9b19623f7a0da6b7a837a44d39900d7..a68823cd32410f48cc21b46b97fc49e4830d6a72 100644 (file)
--- a/index.c
+++ b/index.c
@@ -60,12 +60,6 @@ static int lafs_shrinker(int nr_to_scan, /*gfp_t*/unsigned int gfp_mask)
                                                           struct indexblock,
                                                           b.lru);
                        /* Check if it is really free */
-                       if (test_bit(B_InoIdx, &ib->b.flags)) {
-                               spin_lock(&ib->b.inode->i_data.private_lock);
-                               if (atomic_read(&ib->b.refcnt) == 0)
-                                       LAFSI(ib->b.inode)->iblock = NULL;
-                               spin_unlock(&ib->b.inode->i_data.private_lock);
-                       }
 
                        if (atomic_read(&ib->b.refcnt)) {
                                list_del_init(&ib->b.lru);
@@ -73,6 +67,9 @@ static int lafs_shrinker(int nr_to_scan, /*gfp_t*/unsigned int gfp_mask)
                                freelist.freecnt--;
                                continue;
                        }
+                       if (test_bit(B_InoIdx, &ib->b.flags))
+                               LAFSI(ib->b.inode)->iblock = NULL;
+
                        /* Any pinned iblock must have children, be
                         * refcounted, or be on a 'leaf' lru, which gives
                         * a refcount.  So this unreffed block cannot be
@@ -109,7 +106,7 @@ void lafs_release_index(struct list_head *head)
         * They need to both obey the same locking rules, and ensure
         * blocks are removed from each of siblings, lru, and hash.
         * This list is an inode's free_index and are linked on
-        * b.siblings.  There are all on the freelist.lru
+        * b.siblings.  They are all on the freelist.lru
         */
        LIST_HEAD(togo);
        spin_lock(&lafs_hash_lock);
@@ -231,17 +228,19 @@ ihash_lookup(struct inode *inode, faddr_t addr, int depth,
                        ib = NULL;
        }
        if (ib)
-               getiref_locked(ib, REF);
+               getiref_locked_needsync(ib, REF);
        spin_unlock(&lafs_hash_lock);
+       if (ib)
+               sync_ref(&ib->b);
        return ib;
 }
 
-struct indexblock *getiref_locked(struct indexblock *ib, REFARG)
+struct indexblock *lafs_getiref_locked(struct indexblock *ib)
 {
-       struct datablock *db;
        int ok = 0;
        if (!ib)
                return ib;
+       LAFS_BUG(!test_bit(B_InoIdx, &ib->b.flags), &ib->b);
        if (spin_is_locked(&ib->b.inode->i_data.private_lock))
                ok = 1;
        if (spin_is_locked(&lafs_hash_lock))
@@ -250,19 +249,19 @@ struct indexblock *getiref_locked(struct indexblock *ib, REFARG)
                ok = 1;
        LAFS_BUG(!ok, &ib->b);
 
-       if (!test_bit(B_InoIdx, &ib->b.flags)) {
-               getref_locked(&ib->b, REF);
-               return ib;
-       }
-       if (atomic_read(&ib->b.refcnt) == 0) {
+       if (atomic_inc_return(&ib->b.refcnt) == 1) {
                /* First reference to an InoIdx block implies a reference
                 * to the dblock
+                * If two different threads come here under different locks, one could
+                * exit before the ref on the dblock is taken.  However as each
+                * thread must own an indirect reference to be here, the last indirect ref to
+                * the dblock cannot disappear before all threads have exited this function,
+                * by which time this direct ref will be active.
                 */
-               db = getdref_locked(LAFSI(ib->b.inode)->dblock, MKREF(iblock));
+               struct datablock *db;
+               db = getdref(LAFSI(ib->b.inode)->dblock, MKREF(iblock));
                LAFS_BUG(db == NULL, &ib->b);
        }
-       add_ref(&ib->b, REF, __FILE__, __LINE__);
-       __getref(&ib->b);
        return ib;
 }
 
@@ -366,6 +365,7 @@ block_adopt(struct block *blk, struct indexblock *parent)
                blk->parent = parent;
                getiref(parent, MKREF(child));
                /* Remove from the per-inode free list */
+               spin_lock(&lafs_hash_lock);
                if (!test_bit(B_InoIdx, &blk->flags))
                        list_move(&blk->siblings, &parent->children);
                else {
@@ -378,6 +378,7 @@ block_adopt(struct block *blk, struct indexblock *parent)
                         */
                        (void)getdref(LAFSI(blk->inode)->dblock, MKREF(pdblock));
                }
+               spin_unlock(&lafs_hash_lock);
        }
        spin_unlock(&as->private_lock);
 }
@@ -432,11 +433,11 @@ void lafs_pin_block_ph(struct block *b, int ph)
 
 static void pin_all_children(struct indexblock *ib)
 {
-       /* If we cannot get new credits are phase_flip, we need to
+       /* If we cannot get new credits for phase_flip, we need to
         * pin all dirty data-block children so that they get
         * written and don't remain to require a phase_flip
         * on the next checkpoint (at which point there will be
-        * no credit left
+        * no credit left)
         * So if any dirty child is found, set phase.  We don't
         * need PinPending as it is already Dirty.  All other
         * requirement for pinning will already be met.
@@ -459,6 +460,7 @@ restart:
        spin_lock(&ib->b.inode->i_data.private_lock);
        b = list_prepare_entry(b, &ib->children, siblings);
        list_for_each_entry_continue(b, &ib->children, siblings) {
+               /* FIXME I don't descend through inode blocks to the file below! */
                if (test_bit(B_Index, &b->flags)) {
                        /* restart down */
                        depth++;
@@ -725,6 +727,7 @@ int lafs_is_leaf(struct block *b, int ph)
         */
        struct inode *myi;
        struct lafs_inode *lai;
+       int rv = 1;
 
        if (!test_bit(B_Pinned, &b->flags) ||
            test_bit(B_Writeback, &b->flags))
@@ -749,17 +752,15 @@ int lafs_is_leaf(struct block *b, int ph)
                ph = !!test_bit(B_Phase1, &b->flags);
        if (lai->iblock &&
            test_bit(B_Pinned, &lai->iblock->b.flags) &&
-           !!test_bit(B_Phase1, &lai->iblock->b.flags) == ph) {
+           !!test_bit(B_Phase1, &lai->iblock->b.flags) == ph)
                /* iblock still pinned in the same phase, so
                 * this block isn't a leaf
                 */
-               spin_unlock(&lai->vfs_inode.i_data.private_lock);
-               rcu_iput(myi);
-               return 0;
-       }
+               rv = 0;
+
        spin_unlock(&lai->vfs_inode.i_data.private_lock);
        rcu_iput(myi);
-       return 1;
+       return rv;
 }
 
 /*
@@ -848,18 +849,21 @@ static void set_lru(struct block *b)
         */
        if (test_bit(B_OnFree, &b->flags)) {
                spin_lock(&lafs_hash_lock);
-               if (test_and_clear_bit(B_OnFree, &b->flags))
+               if (test_and_clear_bit(B_OnFree, &b->flags)) {
                        list_del_init(&b->lru);
+                       freelist.freecnt--;
+               }
                spin_unlock(&lafs_hash_lock);
        }
        fs = fs_from_inode(b->inode);
        spin_lock(&fs->lock);
        if (list_empty(&b->lru)) {
-               ph = !!test_bit(B_Phase1, &b->flags);
                if (test_bit(B_Realloc, &b->flags))
                        list_add(&b->lru, &fs->clean_leafs);
-               else
+               else {
+                       ph = !!test_bit(B_Phase1, &b->flags);
                        list_add(&b->lru, &fs->phase_leafs[ph]);
+               }
 
        }
        spin_unlock(&fs->lock);
@@ -924,10 +928,7 @@ void lafs_refile(struct block *b, int dec)
                        lafs_checkpoint_unlock(fs);
                }
 
-
-
-               if (dec && _putref_norefile(b)) {
-                       spin_lock(&lafs_hash_lock);
+               if (dec && atomic_dec_and_lock(&b->refcnt, &b->inode->i_data.private_lock)) {
                        /* PinPending disappears with the last non-lru reference,
                         * (or possibly at other times).
                         */
@@ -935,22 +936,15 @@ void lafs_refile(struct block *b, int dec)
 
                        ph = !!test_bit(B_Phase1, &b->flags);
                        /* See if we still need to be pinned */
-                       /* FIXME need some locking here ... */
                        if (test_bit(B_Pinned, &b->flags) &&
-                           !test_bit(B_PinPending, &b->flags) &&
                            !test_bit(B_Dirty, &b->flags) &&
                            !test_bit(B_Realloc, &b->flags) &&
                            (!test_bit(B_Index, &b->flags) ||
-                            (iblk(b)->uninc_table.pending_cnt == 0 &&
-                             iblk(b)->uninc == NULL &&
-                             iblk(b)->uninc_next == NULL &&
-                             atomic_read(&iblk(b)->pincnt[0]) == 0 &&
-                             atomic_read(&iblk(b)->pincnt[1]) == 0))
+                            iblk(b)->uninc_table.pending_cnt == 0)
                                ) {
                                /* Don't need to be Pinned any more */
-                               lafs_checkpoint_lock(fs);
                                if (test_and_clear_bit(B_Pinned, &b->flags)) {
-                                       if (test_bit(B_Index, &b->flags) && !list_empty_careful(&b->lru)) {
+                                       if (!list_empty_careful(&b->lru)) {
                                                spin_lock(&fs->lock);
                                                list_del_init(&b->lru);
                                                spin_unlock(&fs->lock);
@@ -972,7 +966,6 @@ void lafs_refile(struct block *b, int dec)
                                                }
                                        }
                                }
-                               lafs_checkpoint_unlock(fs);
                        }
 
                        /* check the ->parent link */
@@ -1019,10 +1012,10 @@ void lafs_refile(struct block *b, int dec)
                                }
                                if (test_and_clear_bit(B_SegRef, &b->flags))
                                        physref = b->physaddr;
-                               spin_lock(&b->inode->i_data.private_lock);
+
                                LAFS_BUG(test_bit(B_PrimaryRef, &b->flags), b);
                                list_del_init(&b->siblings);
-                               spin_unlock(&b->inode->i_data.private_lock);
+
                                if (test_bit(B_Index, &b->flags))
                                        list_add(&b->siblings,
                                                 &LAFSI(b->inode)->free_index);
@@ -1057,21 +1050,30 @@ void lafs_refile(struct block *b, int dec)
                                        printk("Problem: %s\n", strblk(b));
                                LAFS_BUG(b->parent != NULL, b);
                                /* put it on the lru */
+                               spin_lock(&lafs_hash_lock);
                                if (!test_and_set_bit(B_OnFree, &b->flags)) {
                                        LAFS_BUG(!list_empty(&b->lru), b);
                                        list_move(&b->lru, &freelist.lru);
+                                       freelist.freecnt++;
                                }
-                               freelist.freecnt++;
+                               spin_unlock(&lafs_hash_lock);
                        }
                        /* last reference to a dblock with no page
                         * requires special handling
                         * The first block on a page must be freed,
                         * the other blocks hold a reference on that
                         * first block which must be dropped.
+                        * However it is possible that a new reference was taken
+                        * via _leafs. If so we have now cleared Pinned so
+                        * get_flushable will immediately put this again.
+                        * so we should leave this cleanup to later.
+                        * Unlike other tests, this one isn't idempotent, so
+                        * need the check on refcnt
                         */
                        db = dblk(b);
                        if (!test_bit(B_Index, &b->flags) &&
-                           !PagePrivate(db->page)) {
+                           !PagePrivate(db->page) &&
+                           atomic_read(&b->refcnt) == 0) {
                                int bits = (PAGE_SHIFT -
                                            b->inode->i_blkbits);
                                int mask = (1<<bits) - 1;
@@ -1088,19 +1090,17 @@ void lafs_refile(struct block *b, int dec)
                                }
                        }
                        /* last reference to an iblock requires that we
-                        * deref the dblock
+                        * deref the dblock.  We don't need to re-check
+                        * refcnt here as a racing getiref_locked will take an
+                        * extra dblock reference it.
                         */
                        if (test_bit(B_InoIdx, &b->flags)) {
-                               spin_lock(&b->inode->i_data
-                                         .private_lock);
                                LAFS_BUG(LAFSI(b->inode)->iblock
                                         != iblk(b), b);
                                LAFS_BUG(next, next);
                                next = &LAFSI(b->inode)->dblock->b;
                                del_ref(next, MKREF(iblock),
                                        __FILE__, __LINE__);
-                               spin_unlock(&b->inode->i_data
-                                           .private_lock);
                        }
 
                        /* Free a delayed-release inode */
@@ -1110,12 +1110,18 @@ void lafs_refile(struct block *b, int dec)
                             test_bit(I_Destroyed, &LAFSI(myi)->iflags))) {
                                dblk(b)->my_inode = NULL;
                                LAFSI(myi)->dblock = NULL;
+                               /* Don't need lafs_hash_lock to clear iblock as
+                                * new refs on iblock are only taken while holding
+                                * dblock, and we know dblock has no references.
+                                * You would need an iget to get a ref on dblock now,
+                                * and because I_Destroyed, we know that isn't possible.
+                                */
                                LAFSI(myi)->iblock = NULL;
-                               spin_unlock(&lafs_hash_lock);
+                               spin_unlock(&b->inode->i_data.private_lock);
                                if (test_bit(I_Destroyed, &LAFSI(myi)->iflags))
                                        lafs_destroy_inode(myi);
                        } else
-                               spin_unlock(&lafs_hash_lock);
+                               spin_unlock(&b->inode->i_data.private_lock);
                }
                rcu_iput(myi);
 
@@ -1162,19 +1168,20 @@ lafs_make_iblock(struct inode *ino, int adopt, int async, REFARG)
        BUG_ON(lai->dblock == NULL);
        BUG_ON(atomic_read(&lai->dblock->b.refcnt) == 0);
 retry:
-       spin_lock(&lai->vfs_inode.i_data.private_lock);
+       spin_lock(&lafs_hash_lock);
        if (lai->iblock)
-               ib = getiref_locked(lai->iblock, REF);
+               ib = getiref_locked_needsync(lai->iblock, REF);
        else if (new) {
                lai->iblock = new;
                ib = new;
                new = NULL;
                (void)getdref(lai->dblock, MKREF(iblock));
        }
-       spin_unlock(&lai->vfs_inode.i_data.private_lock);
+       spin_unlock(&lafs_hash_lock);
        if (new)
                lafs_iblock_free(new);
        if (ib) {
+               sync_ref(&ib->b);
                if (adopt) {
                        err = setparent(lai->dblock, async);
                        if (err == 0)
diff --git a/inode.c b/inode.c
index 02a535670bc6418ae497925f68419b9acd4b0a57..489560cca916c3bd3a61ed8e73fc4f8d2b6b7de5 100644 (file)
--- a/inode.c
+++ b/inode.c
@@ -472,15 +472,32 @@ void lafs_inode_checkpin(struct inode *ino)
        }
 }
 
-struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG)
+struct datablock *lafs_inode_get_dblock(struct inode *ino, REFARG)
 {
-       struct datablock *db = NULL;
-       int err;
+       struct datablock *db;
 
        spin_lock(&ino->i_data.private_lock);
-       if (LAFSI(ino)->dblock)
-               db = getdref_locked(LAFSI(ino)->dblock, REF);
+       db = LAFSI(ino)->dblock;
+       if (db) {
+               if (db->b.inode == ino)
+                       getdref_locked(db, REF);
+               else {
+                       spin_lock_nested(&db->b.inode->i_data.private_lock, 1);
+                       getdref_locked(db, REF);
+                       spin_unlock(&db->b.inode->i_data.private_lock);
+               }
+       }
        spin_unlock(&ino->i_data.private_lock);
+       return db;
+}
+
+struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG)
+{
+       struct datablock *db;
+       int err;
+
+       db = lafs_inode_get_dblock(ino, REF);
+
        if (!db)
                db = lafs_get_block(ino_from_sb(ino->i_sb), ino->i_ino, NULL,
                                    GFP_KERNEL, REF);
@@ -661,14 +678,7 @@ void lafs_delete_inode(struct inode *ino)
         */
        BUG_ON(test_bit(I_Trunc, &LAFSI(ino)->iflags));
 
-       spin_lock(&ino->i_data.private_lock);
-       b = LAFSI(ino)->dblock;
-       /* FIXME we use b unconditionally, so either we don't
-        * need the test, or we need to make the block if it
-        * doesn't exist.. */
-       if (b)
-               getdref_locked(b, MKREF(delete_inode));
-       spin_unlock(&ino->i_data.private_lock);
+       b = lafs_inode_dblock(ino, SYNC, MKREF(delete_inode));
 
        i_size_write(ino, 0);
        truncate_inode_pages(&ino->i_data, 0);
@@ -676,14 +686,13 @@ void lafs_delete_inode(struct inode *ino)
        set_bit(I_Trunc, &LAFSI(ino)->iflags);
 
        set_bit(I_Deleting, &LAFSI(ino)->iflags);
-       set_bit(B_Claimed, &b->b.flags);
-       lafs_add_orphan(fs, b);
-
+       if (!IS_ERR(b)) {
+               set_bit(B_Claimed, &b->b.flags);
+               lafs_add_orphan(fs, b);
+               dprintk("PUNCH hole for %d\n", (int)b->b.fileaddr);
+               putdref(b, MKREF(delete_inode));
+       }
        inode_map_free(fs, ino->i_sb,  ino->i_ino);
-       // FIXME what to do on error (-ENOMEM )
-
-       dprintk("PUNCH hole for %d\n", (int)b->b.fileaddr);
-       putdref(b, MKREF(delete_inode));
 
        clear_inode(ino);
 }
diff --git a/lafs.h b/lafs.h
index c1a0e9a4a0a1881eadcc68b950bf8d1e2c3cd04b..a8e59510fea3253e299d534d5bb84d63d7c97141 100644 (file)
--- a/lafs.h
+++ b/lafs.h
@@ -45,7 +45,7 @@ extern void lafs_dump_tree(void);
 #define lafs_make_iblock(a,b,c,d) _lafs_make_iblock(a,b,c,d)
 #define lafs_leaf_find(a,b,c,d,e,f) _lafs_leaf_find(a,b,c,d,e,f)
 #define lafs_inode_dblock(a,b,c) _lafs_inode_dblock(a,b,c)
-#define        getiref_locked(a,b) _lafs_getiref_locked(a,b)
+#define lafs_inode_get_dblock(a,b) _lafs_inode_get_dblock(a,b)
 #else
 #define REFARG void
 #define REF    0
@@ -64,7 +64,7 @@ extern void lafs_dump_tree(void);
 #define lafs_make_iblock(a,b,c,d) _lafs_make_iblock(a,b,c)
 #define lafs_leaf_find(a,b,c,d,e,f) _lafs_leaf_find(a,b,c,d,e)
 #define lafs_inode_dblock(a,b,c) _lafs_inode_dblock(a,b)
-#define        getiref_locked(a,b) _lafs_getiref_locked(a)
+#define lafs_inode_get_dblock(a,b) _lafs_inode_get_dblock(a)
 #endif
 #define strblk(a) lafs_strblk(a)
 
@@ -151,6 +151,7 @@ struct inode *lafs_new_inode(struct fs *fs, struct super_block *sb,
 int lafs_lock_inode(struct inode *ino);
 void lafs_inode_fillblock(struct inode *ino);
 struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG);
+struct datablock *lafs_inode_get_dblock(struct inode *ino, REFARG);
 int lafs_inode_handle_orphan(struct datablock *b);
 
 struct datablock *lafs_get_block(struct inode *ino, unsigned long index,
@@ -421,6 +422,8 @@ static inline struct inode *iget_my_inode(struct datablock *db)
 int lafs_is_leaf(struct block *b, int ph);
 void lafs_refile(struct block *b, int dec);
 
+extern struct indexblock *lafs_getiref_locked(struct indexblock *ib);
+
 extern spinlock_t lafs_hash_lock;
 
 static inline struct block *__getref(struct block *b)
@@ -436,55 +439,38 @@ static inline struct block *_getref(struct block *b)
        return __getref(b);
 }
 
+static inline struct datablock *_getdref_locked(struct datablock *b)
+{
+       LAFS_BUG(!spin_is_locked(&b->b.inode->i_data.private_lock), &b->b);
+       __getref(&b->b);
+       return b;
+}
+
 static inline struct block *_getref_locked(struct block *b)
 {
-       int ok = 0;
-       if (test_bit(B_Index, &b->flags)) {
-               /* InoIdx must use getiref_locked which goes straight to
-                * __getref
-                */
-               BUG_ON(test_bit(B_InoIdx, &b->flags));
-
-               if (spin_is_locked(&b->inode->i_data.private_lock))
-                       ok = 1;
-               if (spin_is_locked(&lafs_hash_lock))
-                       ok = 1;
-               // FIXME more tests - just when is this allowed?
-       } else {
-               struct datablock *db = dblk(b);
-               struct inode *ino = NULL;
-
-               if (db->page &&
-                   (PageLocked(db->page) ||
-                    ((ino = rcu_my_inode(db)) != NULL &&
-                     spin_is_locked(&ino->i_data.private_lock))
-                           ))
-                       ok = 1;
-               rcu_iput(ino); ino = NULL;
-               if ((ino = rcu_my_inode(db)) != NULL &&
-                   spin_is_locked(&ino->i_data.private_lock))
-                       ok = 1;
-               rcu_iput(ino);
-               if (spin_is_locked(&b->inode->i_data.private_lock))
-                       ok = 1;
-       }
-       if (test_bit(B_Dirty, &b->flags) || test_bit(B_Realloc, &b->flags))
-               if (spin_is_locked(&fs_from_inode(b->inode)->lock))
-                       ok = 1;
-       if(spin_is_locked(&fs_from_inode(b->inode)->lock))
-               /* Removing from leaf list */
-               ok = 1;
-       if (!ok)
-               printk("Not locked: %s\n", strblk(b));
-       BUG_ON(!ok);
+       LAFS_BUG(!spin_is_locked(&b->inode->i_data.private_lock), b);
+       if (test_bit(B_InoIdx, &b->flags))
+               return &lafs_getiref_locked(iblk(b))->b;
+       __getref(b);
+       return b;
+}
 
-       return __getref(b);
+static inline struct indexblock *_getiref_locked_needsync(struct indexblock *b)
+{
+       LAFS_BUG(!spin_is_locked(&lafs_hash_lock), &b->b);
+       if (test_bit(B_InoIdx, &b->b.flags))
+               return lafs_getiref_locked(b);
+       __getref(&b->b);
+       return b;
 }
 
-static inline int _putref_norefile(struct block *b)
+static inline struct block *_getref_locked_needsync(struct block *b)
 {
-       BUG_ON(atomic_read(&b->refcnt)==0);
-       return atomic_dec_and_test(&b->refcnt);
+       LAFS_BUG(!spin_is_locked(&fs_from_inode(b->inode)->lock), b);
+       if (test_bit(B_InoIdx, &b->flags))
+               return &lafs_getiref_locked(iblk(b))->b;
+       __getref(b);
+       return b;
 }
 
 static inline void _putref(struct block *b)
@@ -495,25 +481,70 @@ static inline void _putref(struct block *b)
        lafs_refile(b, 1);
 }
 
-struct indexblock *getiref_locked(struct indexblock *ib, REFARG);
-
 #if DEBUG_REF == 0
 #define _reflog(c,blk,ref,add) (c(blk))
 #else
 #define _reflog(c,blk,ref,add) ({ !blk ? 0 : \
                        add? add_ref(blk,ref,__FILE__,__LINE__) : \
                        del_ref(blk,ref, __FILE__,__LINE__); c(blk); })
+#define _refxlog(c,blk,ref,add) ({ \
+                       add? add_ref(&(blk)->b,ref,__FILE__,__LINE__) : \
+                       del_ref(&(blk)->b,ref, __FILE__,__LINE__); c(blk); })
 #endif
 #define getref(blk, r) ( ({BUG_ON((blk) && ! atomic_read(&(blk)->refcnt));}), _reflog(_getref, blk,r,1))
 #define getref_locked(blk, r) _reflog(_getref_locked, blk,r,1)
-#define putref_norefile(blk, r) _reflog(_putref_norefile, blk,r,0)
+#define getdref_locked(blk, r) _refxlog(_getdref_locked, blk,r,1)
+#define getref_locked_needsync(blk, r) _reflog(_getref_locked_needsync, blk,r,1)
+#define getiref_locked_needsync(blk, r) _refxlog(_getiref_locked_needsync, blk,r,1)
 #define putref(blk, r) _reflog(_putref, blk,r,0)
 #define getdref(blk, r) ( ({BUG_ON((blk) && ! atomic_read(&(blk)->b.refcnt));}), _reflog(_getref, (&(blk)->b),r,1),blk)
 #define getiref(blk, r) ( ({BUG_ON((blk) && ! atomic_read(&(blk)->b.refcnt));}), _reflog(_getref, (&(blk)->b),r,1),blk)
-#define getdref_locked(blk, r) (_reflog(_getref_locked, (&(blk)->b),r,1),blk)
 #define putdref(blk, r) ({ if (blk) _reflog(_putref, (&(blk)->b),r,0);})
 #define putiref(blk, r) ({ if (blk) _reflog(_putref, (&(blk)->b),r,0);})
 
+/* When we get a ref on a block other than under b->inode->i_data.private_lock, we
+ * need to call this to ensure that lafs_refile isn't still handling a refcnt->0 transition.
+ */
+static inline void sync_ref(struct block *b)
+{
+       spin_lock(&b->inode->i_data.private_lock);
+       spin_unlock(&b->inode->i_data.private_lock);
+}
+
+/*
+ * Notes on locking and block references.
+ * There are several places that can hold uncounted references to blocks.
+ * When we try to convert such a reference to a counted reference we need to be careful.
+ * We must increment the refcount under a lock that protects the particular reference, and
+ * then must call sync_ref above to ensure our new reference is safe.
+ * When the last counted reference on a block is dropped (in lafs_refile) we hold private_lock
+ * while cleaning up and sync_ref assures that cleanup is finished.
+ * Before we can free a block we must synchronise with these other locks and
+ * then ensure that the refcount is still zero.
+ * For index blocks that means taking lafs_hash_lock and ensuring the count is still zero.
+ * For most data blocks we need to remove from the lru list using fs->lock, and check the count is
+ * still zero.
+ * For inode data blocks we need to follow the inode and break the ->dblock lock under the inodes
+ * own lock - but only if the refcount is still zero.
+ *
+ * The list of non-counted references and the locks which protect them is:
+ *
+ *         index hash table                      lafs_hash_lock
+ *         index freelist                        lafs_hash_lock
+ *         phase_leafs / clean_leafs             fs->lock
+ *         inode->iblock                         lafs_hash_lock
+ *         inode->dblock                         inode->i_data.private_lock
+ *         inode->free_index                     lafs_hash_lock
+ *         page->private                         page lock
+ *
+ * Lock ordering among these is:
+ *   inode->i_data.private_lock
+ *      LAFSI(inode)->dblock->inode->i_data.private_lock
+ *         fs->lock
+ *            lafs_hash_lock
+ *
+ */
+
 int __must_check lafs_setparent(struct datablock *blk);
 
 /*
index 30678581251471e3db5b9e8071a4b66dfdc6b409..424febaba374e875177b0c9d64d12a90a3bd5e61 100644 (file)
--- a/modify.c
+++ b/modify.c
@@ -390,7 +390,9 @@ void lafs_clear_index(struct indexblock *ib)
                *(u16 *)(buf+offset) = cpu_to_le16(IBLK_INDIRECT);
        else if (ib->depth > 1)
                *(u16 *)(buf+offset) = cpu_to_le16(IBLK_INDEX);
-
+       else
+               LAFS_BUG(1, &ib->b);
+       
        if (offset) {
                /* just to be on the safe side ... */
                ((struct la_inode*)(buf))->depth = ib->depth;
index 30f18b8b474e1a416bc5dc1cc2656e0d6fe3c2d5..ac538f7bcb28aef2bfedb3326a9b42209aede497 100644 (file)
--- a/orphan.c
+++ b/orphan.c
@@ -498,6 +498,8 @@ long lafs_run_orphans(struct fs *fs)
                                struct datablock,
                                orphans);
                list_move(&db->orphans, &done);
+               getdref(db, MKREF(run_orphans));
+               spin_unlock(&fs->lock);
                ino = orphan_inode(db);
                if (!ino && !test_bit(B_Claimed, &db->b.flags))
                        /* OK not to have an inode */;
@@ -508,10 +510,10 @@ long lafs_run_orphans(struct fs *fs)
                         */
                        orphan_iput(ino);
                        timeout = msecs_to_jiffies(500);
+                       putdref(db, MKREF(run_orphans));
+                       spin_lock(&fs->lock);
                        continue;
                }
-               getdref(db, MKREF(run_orphans));
-               spin_unlock(&fs->lock);
 
                if (!ino)
                        lafs_orphan_forget(fs, db);
diff --git a/super.c b/super.c
index 064e96073207c38210b4c40b21f5a884416cf2fe..3f968f902de9c238d7e55bdcb27da0ca351713cf 100644 (file)
--- a/super.c
+++ b/super.c
@@ -1232,17 +1232,12 @@ static void kfree_inode(struct rcu_head *head)
 }
 void lafs_destroy_inode(struct inode *inode)
 {
-       struct datablock *db = NULL;
+       struct datablock *db;
 
        BUG_ON(!list_empty(&inode->i_sb_list));
        // Cannot test i_list as dispose_list just does list_del
-       if (LAFSI(inode)->dblock) {
-               spin_lock(&inode->i_data.private_lock);
-               if (LAFSI(inode)->dblock)
-                       db = getdref_locked(LAFSI(inode)->dblock,
-                                           MKREF(destroy));
-               spin_unlock(&inode->i_data.private_lock);
-       }
+       db = lafs_inode_get_dblock(inode, MKREF(destroy));
+
        if (db) {
                set_bit(I_Destroyed, &LAFSI(inode)->iflags);
                putdref(db, MKREF(destroy));
@@ -1317,20 +1312,20 @@ static int lafs_statfs(struct dentry *de, struct kstatfs *buf)
        return 0;
 }
 
+/* FIXME we hold inode_lock while calling drop_inode, so
+ * extra locking isn't really welcome....???
+ */
 static void lafs_drop_inode(struct inode *inode)
 {
        struct fs *fs = fs_from_inode(inode);
-       struct datablock *db = NULL;
+       struct datablock *db;
 
        /* This lock that we now hold on the inode could prevent
         * the cleaner from getting the inode.  So after
         * the complete the drop we might need to wake the cleaner.
         */
 
-       spin_lock(&inode->i_data.private_lock);
-       if (LAFSI(inode)->dblock)
-               db = getdref_locked(LAFSI(inode)->dblock, MKREF(drop));
-       spin_unlock(&inode->i_data.private_lock);
+       db = lafs_inode_get_dblock(inode, MKREF(drop));
 
        generic_drop_inode(inode);
        if (db && test_bit(B_Async, &db->b.flags))