]> git.neil.brown.name Git - LaFS.git/commitdiff
rcu locking protection for ->my_inode
authorNeilBrown <neilb@suse.de>
Sun, 1 Aug 2010 03:53:23 +0000 (13:53 +1000)
committerNeilBrown <neilb@suse.de>
Mon, 9 Aug 2010 02:01:43 +0000 (12:01 +1000)
We use rcu to free inodes, and use rcu locking to protect
access to ->my_inode.

Part of this required that once I_Deleting is set it stays set,
so remove the pointless clearing of it.

Signed-off-by: NeilBrown <neilb@suse.de>
block.c
checkpoint.c
cluster.c
file.c
index.c
inode.c
lafs.h
orphan.c
segments.c
state.h
super.c

diff --git a/block.c b/block.c
index 107048d96eed6b5f8e8a553c8924650d6e28bb6e..0c771e2ea3c37a69878e7a05ec4b4443aad4fe11 100644 (file)
--- a/block.c
+++ b/block.c
@@ -441,6 +441,7 @@ lafs_pin_dblock(struct datablock *b, int alloc_type)
        int err;
        struct fs *fs = fs_from_inode(b->b.inode);
        struct block *blk;
+       struct inode *ino = NULL;
 
        /* We don't pin a datablock of an inode if there is an
         * InoIdx block. We pin the InoIdx block instead.
@@ -448,10 +449,9 @@ lafs_pin_dblock(struct datablock *b, int alloc_type)
         * only when the index block has swapped phase and the
         * data block is waiting to be written.
         */
-       if (LAFSI(b->b.inode)->type == TypeInodeFile &&
-           b->my_inode &&
-           LAFSI(b->my_inode)->iblock) {
-               struct indexblock *ib = lafs_make_iblock(b->my_inode, ADOPT, SYNC,
+       if ((ino = rcu_my_inode(b)) != NULL &&
+           LAFSI(ino)->iblock) {
+               struct indexblock *ib = lafs_make_iblock(ino, ADOPT, SYNC,
                                                         MKREF(pindb));
                if (IS_ERR(ib))
                        blk = getref(&b->b, MKREF(pindb));
@@ -459,6 +459,7 @@ lafs_pin_dblock(struct datablock *b, int alloc_type)
                        blk = &ib->b;
        } else
                blk = getref(&b->b, MKREF(pindb));
+       rcu_iput(ino);
 
        LAFS_BUG(!test_bit(B_PinPending, &b->b.flags), &b->b);
        if (LAFSI(b->b.inode)->type != TypeSegmentMap) {
@@ -539,10 +540,13 @@ lafs_erase_dblock(struct datablock *b)
                }
        }
 
-       if (LAFSI(b->b.inode)->type == TypeInodeFile &&
-           LAFSI(b->my_inode)->iblock)
-               LAFS_BUG(LAFSI(b->my_inode)->iblock->depth > 1,
-                        &b->b);
+       if (LAFSI(b->b.inode)->type == TypeInodeFile) {
+               struct inode *ino = rcu_my_inode(b);
+               if (ino && LAFSI(ino)->iblock)
+                       LAFS_BUG(LAFSI(ino)->iblock->depth > 1,
+                                &b->b);
+               rcu_iput(ino);
+       }
 
        clear_bit(B_Valid, &b->b.flags);
        lafs_unclean(b);
index ebcbed1f1955c5f87f889794733c9944d29721a4..4ab6b2c4b162f21cf9027795437fdd8c346f1829 100644 (file)
@@ -188,6 +188,7 @@ int lafs_print_tree(struct block *b, int depth)
        int j;
        struct block *b2;
        int credits = 0;
+       struct inode *ino = NULL;
 
        if (depth > 20) {
                printk("... aborting at %d\n", depth);
@@ -276,16 +277,16 @@ int lafs_print_tree(struct block *b, int depth)
                        LAFS_BUG(b->parent != ib, b);
                        credits += lafs_print_tree(b, depth+2);
                }
-       } else if (LAFSI(b->inode)->type == TypeInodeFile &&
-                  dblk(b)->my_inode &&
-                  LAFSI(dblk(b)->my_inode)->iblock &&
-                  LAFSI(dblk(b)->my_inode)->iblock->b.parent
+       } else if ((ino = rcu_my_inode(dblk(b))) != NULL &&
+                  LAFSI(ino)->iblock &&
+                  LAFSI(ino)->iblock->b.parent
                ) {
-               LAFS_BUG(LAFSI(dblk(b)->my_inode)->iblock->b.parent != b->parent,
+               LAFS_BUG(LAFSI(ino)->iblock->b.parent != b->parent,
                         b);
-               credits += lafs_print_tree(&LAFSI(dblk(b)->my_inode)->iblock->b,
+               credits += lafs_print_tree(&LAFSI(ino)->iblock->b,
                                           depth+1);
        }
+       rcu_iput(ino);
        return credits;
 }
 
index d5757320ead054c9d5a85baa603f0c424831f98f..da9643ab0a172d1a77c5ab0ca790520a50eb4d64 100644 (file)
--- a/cluster.c
+++ b/cluster.c
@@ -740,11 +740,11 @@ int lafs_cluster_allocate(struct block *b, int cnum)
                return 0;
        }
 
-       if (!test_bit(B_Index, &b->flags) &&
-           LAFSI(b->inode)->type == TypeInodeFile) {
-               struct inode *myi = dblk(b)->my_inode;
+       if (!test_bit(B_Index, &b->flags)) {
+               struct inode *myi = rcu_my_inode(dblk(b));
                if (myi && test_and_clear_bit(I_Dirty, &LAFSI(myi)->iflags))
                        lafs_inode_fillblock(myi);
+               rcu_iput(myi);
        }
 
        mutex_lock(&wc->lock);
diff --git a/file.c b/file.c
index 437be731c4117870e1ae8cfdb13fba7b49424557..60c0c58f8eea620d71e041c5b260f5469a49f26b 100644 (file)
--- a/file.c
+++ b/file.c
@@ -316,13 +316,14 @@ lafs_writepage(struct page *page, struct writeback_control *wbc)
                 * Need to be careful with inodes too.
                 */
                if (test_bit(B_Dirty, &b->b.flags)) {
+                       struct inode *myi = NULL;
                        if (test_bit(B_PinPending, &b->b.flags))
                                redirty = 1;
-                       else if (LAFSI(b->b.inode)->type == TypeInodeFile &&
-                                b->my_inode &&
-                                LAFSI(b->my_inode)->iblock)
+                       else if ((myi = rcu_my_inode(b)) != NULL &&
+                                LAFSI(myi)->iblock)
                                redirty = 1;
                        else {
+                               rcu_iput(myi); myi = NULL;
                                lafs_iolock_written(&b->b);
                                /* block might have been invalidated,
                                 * or Pinned, while we waited */
@@ -332,14 +333,16 @@ lafs_writepage(struct page *page, struct writeback_control *wbc)
                                if (!test_bit(B_Dirty, &b->b.flags))
                                        lafs_iounlock_block(&b->b);
                                else if (test_bit(B_PinPending, &b->b.flags) ||
-                                        (LAFSI(b->b.inode)->type == TypeInodeFile &&
-                                         b->my_inode &&
-                                         LAFSI(b->my_inode)->iblock)) {
+                                        ((myi = rcu_my_inode(b)) != NULL &&
+                                         LAFSI(myi)->iblock)) {
                                        redirty = 1;
                                        lafs_iounlock_block(&b->b);
-                               } else
+                               } else {
+                                       rcu_iput(myi); myi = NULL;
                                        lafs_cluster_allocate(&b->b, 0);
+                               }
                        }
+                       rcu_iput(myi);
                }
                putdref(b, MKREF(writepage));
        }
diff --git a/index.c b/index.c
index ceda44320d44205ba7814ab00db2870af1bb88ec..60583af9c6b5487300d12a9931529f50e2d7b766 100644 (file)
--- a/index.c
+++ b/index.c
@@ -713,6 +713,7 @@ int lafs_is_leaf(struct block *b, int ph)
        /* This is pinned and not iolocked.  Should it be on a leaf
         * list?
         */
+       struct inode *myi;
        struct lafs_inode *lai;
 
        if (!test_bit(B_Pinned, &b->flags) ||
@@ -727,12 +728,12 @@ int lafs_is_leaf(struct block *b, int ph)
        }
 
        /* This is a data block */
-       if (LAFSI(b->inode)->type != TypeInodeFile ||
-           dblk(b)->my_inode == NULL)
+       myi = rcu_my_inode(dblk(b));
+       if (!myi) {
                /* Non-inode data blocks are always leafs */
                return 1;
-
-       lai = LAFSI(dblk(b)->my_inode);
+       }
+       lai = LAFSI(myi);
        spin_lock(&lai->vfs_inode.i_data.private_lock);
        if (ph < 0)
                ph = !!test_bit(B_Phase1, &b->flags);
@@ -743,9 +744,11 @@ int lafs_is_leaf(struct block *b, int ph)
                 * this block isn't a leaf
                 */
                spin_unlock(&lai->vfs_inode.i_data.private_lock);
+               rcu_iput(myi);
                return 0;
        }
        spin_unlock(&lai->vfs_inode.i_data.private_lock);
+       rcu_iput(myi);
        return 1;
 }
 
@@ -753,7 +756,6 @@ void lafs_refile(struct block *b, int dec)
 {
        struct block *next = NULL, *next_parent = NULL;
        struct fs *fs = NULL;
-       struct inode *in;
        u64 physref = 0;
 
        if (!b)
@@ -767,23 +769,22 @@ void lafs_refile(struct block *b, int dec)
                struct block *cb;
                c[0] = c[1] = 0;
                list_for_each_entry(cb, &iblk(b)->children, siblings) {
+                       struct inode *myi = NULL;
                        if (test_bit(B_Pinned, &cb->flags)) {
                                int pp = !!test_bit(B_Phase1, &cb->flags);
                                c[pp]++;
                        }
                        /* InoIdx block might be pinned but is not a direct
                         * child */
-                       if (LAFSI(cb->inode)->type == TypeInodeFile &&
-                           !test_bit(B_Index, &cb->flags) &&
-                           dblk(cb)->my_inode &&
-                           LAFSI(dblk(cb)->my_inode)->iblock &&
-                           test_bit(B_Pinned, &(LAFSI(dblk(cb)->my_inode)
-                                                ->iblock->b.flags))) {
+                       if (!test_bit(B_Index, &cb->flags) &&
+                           (myi = rcu_my_inode(dblk(cb))) != NULL &&
+                           LAFSI(myi)->iblock &&
+                           test_bit(B_Pinned, &LAFSI(myi)->iblock->b.flags)) {
                                int pp = !!test_bit(B_Phase1,
-                                                   &LAFSI(dblk(cb)->my_inode)
-                                                   ->iblock->b.flags);
+                                                   &LAFSI(myi)->iblock->b.flags);
                                c[pp]++;
                        }
+                       rcu_iput(myi);
                }
                if (c[0] != atomic_read(&iblk(b)->pincnt[0]) ||
                    c[1] != atomic_read(&iblk(b)->pincnt[1])) {
@@ -799,23 +800,24 @@ void lafs_refile(struct block *b, int dec)
                struct block *cb;
                c[0] = c[1] = 0;
                list_for_each_entry(cb, &ib->children, siblings) {
+                       struct inode *myi = NULL;
                        if (test_bit(B_Pinned, &cb->flags)) {
                                int pp = !!test_bit(B_Phase1, &cb->flags);
                                c[pp]++;
                        }
                        /* InoIdx block might be pinned but is not a direct
                         * child */
-                       if (LAFSI(cb->inode)->type == TypeInodeFile &&
-                           !test_bit(B_Index, &cb->flags) &&
-                           dblk(cb)->my_inode &&
-                           LAFSI(dblk(cb)->my_inode)->iblock &&
-                           test_bit(B_Pinned, &(LAFSI(dblk(cb)->my_inode)
+                       if (!test_bit(B_Index, &cb->flags) &&
+                           (myi = rcu_my_inode(dblk(cb))) != NULL &&
+                           LAFSI(myi)->iblock &&
+                           test_bit(B_Pinned, &(LAFSI(myi)
                                                 ->iblock->b.flags))) {
                                int pp = !!test_bit(B_Phase1,
-                                                   &LAFSI(dblk(cb)->my_inode)
+                                                   &LAFSI(myi)
                                                    ->iblock->b.flags);
                                c[pp]++;
                        }
+                       rcu_iput(myi);
                }
                if (c[0] != atomic_read(&ib->pincnt[0]) ||
                    c[1] != atomic_read(&ib->pincnt[1])) {
@@ -837,6 +839,8 @@ void lafs_refile(struct block *b, int dec)
                int free_me = 0;
                int onlru = 0;
                struct inode *checkpin = NULL;
+               struct inode *myi = NULL;
+
                spin_lock(&lafs_hash_lock);
 
                if (!list_empty(&b->lru) &&
@@ -1047,18 +1051,18 @@ void lafs_refile(struct block *b, int dec)
                /* Free a delayed-release inode */
                if (atomic_read(&b->refcnt) == 0 &&
                    !test_bit(B_Index, &b->flags) &&
-                   LAFSI(b->inode)->type == TypeInodeFile &&
-                   (in = dblk(b)->my_inode) != NULL &&
+                   (myi = rcu_my_inode(dblk(b))) != NULL &&
                    (!PagePrivate(dblk(b)->page) ||
-                    test_bit(I_Destroyed, &LAFSI(in)->iflags))) {
+                    test_bit(I_Destroyed, &LAFSI(myi)->iflags))) {
                        dblk(b)->my_inode = NULL;
-                       LAFSI(in)->dblock = NULL;
-                       LAFSI(in)->iblock = NULL;
+                       LAFSI(myi)->dblock = NULL;
+                       LAFSI(myi)->iblock = NULL;
                        spin_unlock(&lafs_hash_lock);
-                       if (test_bit(I_Destroyed, &LAFSI(in)->iflags))
-                               lafs_destroy_inode(in);
+                       if (test_bit(I_Destroyed, &LAFSI(myi)->iflags))
+                               lafs_destroy_inode(myi);
                } else
                        spin_unlock(&lafs_hash_lock);
+               rcu_iput(myi);
 
                if (physref) {
                        lafs_seg_deref(fs, physref, 0);
diff --git a/inode.c b/inode.c
index fc978f1cc2f25f12477610e58c8449e743acee45..60bcd8493c2be47e7e2b1f94aa9755c2a8b5c322 100644 (file)
--- a/inode.c
+++ b/inode.c
@@ -156,7 +156,7 @@ lafs_iget(struct super_block *sb, ino_t inum, int async)
        if (err)
                goto err;
 
-       oldino = b->my_inode;
+       oldino = rcu_my_inode(b);
        if (oldino) {
                /* The inode is new, but the block thinks it has an
                 * old inode, so we must be in the process of destroying
@@ -171,10 +171,11 @@ lafs_iget(struct super_block *sb, ino_t inum, int async)
                        LAFSI(oldino)->iblock = NULL;
                }
                spin_unlock(&oldino->i_data.private_lock);
-               if (b->my_inode) {
-                       err = -ENOENT;
-                       goto err;
-               }
+       }
+       rcu_iput(oldino);
+       if (b->my_inode) {
+               err = -ENOENT;
+               goto err;
        }
 
        err = lafs_import_inode(ino, b);
@@ -397,7 +398,7 @@ lafs_import_inode(struct inode *ino, struct datablock *b)
         * other when freed
         */
        li->dblock = b;
-       b->my_inode = ino;
+       rcu_assign_pointer(b->my_inode, ino);
 
 out:
        if (err && li->type)
@@ -458,7 +459,7 @@ struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG)
                return db;
 
        LAFSI(ino)->dblock = db;
-       db->my_inode = ino;
+       rcu_assign_pointer(db->my_inode, ino);
        if (async)
                err = lafs_read_block_async(db);
        else
@@ -590,7 +591,7 @@ void lafs_clear_inode(struct inode *ino)
        li->type = 0;
 
        /* Now is a good time to break the linkage between
-        * inode and dblock - but node if the file is
+        * inode and dblock - but not if the file is
         * being deleted
         */
        if (!test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
@@ -723,6 +724,9 @@ static int prune_some(void *data, u32 addr, u64 paddr, int len)
 
 int lafs_inode_handle_orphan(struct datablock *b)
 {
+       /* Don't need rcu protection for my_inode run_orphan
+        * holds a reference
+        */
        struct indexblock *ib, *ib2;
        struct inode *ino = b->my_inode;
        struct fs *fs = fs_from_inode(ino);
@@ -751,7 +755,6 @@ int lafs_inode_handle_orphan(struct datablock *b)
                               LAFSI(ino)->ciblocks +
                               LAFSI(ino)->piblocks);
                        lafs_erase_dblock(b);
-                       clear_bit(I_Deleting, &LAFSI(ino)->iflags);
                        lafs_orphan_release(fs, b, NULL);
                } else if (ino->i_nlink || LAFSI(ino)->type == 0)
                        lafs_orphan_release(fs, b, NULL);
diff --git a/lafs.h b/lafs.h
index 675635bb01fa37ca8b8092325d7fdd49120db461..554211ea4c112fe5b8497268478e47d7be5b5f2d 100644 (file)
--- a/lafs.h
+++ b/lafs.h
@@ -365,6 +365,39 @@ static inline int set_phase(struct block *b, int ph)
                return 0;
 }
 
+/*
+ * db->my_inode is protected by rcu. We can 'get' it and
+ * remain rcu_protected, or 'iget' it and be protected by a
+ * refcount
+ */
+static inline struct inode *rcu_my_inode(struct datablock *db)
+{
+       struct inode *ino;
+       if (db == NULL)
+               return NULL;
+       if (LAFSI(db->b.inode)->type != TypeInodeFile)
+               return NULL;
+       rcu_read_lock();
+       ino = rcu_dereference(db->my_inode);
+       if (ino)
+               return ino;
+       rcu_read_unlock();
+       return NULL;
+}
+static inline void rcu_iput(struct inode *ino)
+{
+       if (ino)
+               rcu_read_unlock();
+}
+
+static inline struct inode *iget_my_inode(struct datablock *db)
+{
+       struct inode *ino = rcu_my_inode(db);
+       struct inode *rv = igrab(ino);
+       rcu_iput(ino);
+       return rv;
+}
+
 /*
  * blocks (data and index) are reference counted.
  * 'getref' increments the reference count, and could remove the
@@ -408,17 +441,19 @@ static inline struct block *_getref_locked(struct block *b)
                // FIXME more tests - just when is this allowed?
        } else {
                struct datablock *db = dblk(b);
+               struct inode *ino = NULL;
+
                if (db->page &&
                    (PageLocked(db->page) ||
-                    (LAFSI(db->b.inode)->type == TypeInodeFile &&
-                     db->my_inode &&
-                     spin_is_locked(&db->my_inode->i_data.private_lock))
+                    ((ino = rcu_my_inode(db)) != NULL &&
+                     spin_is_locked(&ino->i_data.private_lock))
                            ))
                        ok = 1;
-               if (LAFSI(db->b.inode)->type == TypeInodeFile &&
-                   db->my_inode &&
-                   spin_is_locked(&db->my_inode->i_data.private_lock))
+               rcu_iput(ino); ino = NULL;
+               if ((ino = rcu_my_inode(db)) != NULL &&
+                   spin_is_locked(&ino->i_data.private_lock))
                        ok = 1;
+               rcu_iput(ino);
                if (spin_is_locked(&b->inode->i_data.private_lock))
                        ok = 1;
        }
index ed009e7b8c21dcd922ffd4eedbde77e67cf4f1ba..47963c1e6fb3db6fe88172897dc5fc3b86ada3c1 100644 (file)
--- a/orphan.c
+++ b/orphan.c
@@ -375,7 +375,7 @@ void lafs_orphan_release(struct fs *fs, struct datablock *b, struct inode *ino)
 
                bino = lafs_iget_fs(fs,
                                    le32_to_cpu(last.filesys),
-                                   le32_to_cpu(last.inum));
+                                   le32_to_cpu(last.inum), ASYNC);
                if (bino)
                        bbl = lafs_get_block(bino,
                                             le32_to_cpu(last.addr),
@@ -433,12 +433,20 @@ out:
 
 static struct inode *orphan_inode(struct datablock *db)
 {
+       struct inode *ino, *rv = NULL;
        switch (LAFSI(db->b.inode)->type) {
        case TypeInodeFile:
-               if (db->my_inode &&
-                   test_bit(I_Deleting, &LAFSI(db->my_inode)->iflags))
-                       return db->my_inode;
-               return igrab(db->my_inode);
+               ino = rcu_my_inode(db);
+               if (ino &&
+                   test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
+                       /* don't need rcu while I_Deleting is set */
+                       rcu_iput(ino);
+                       return ino;
+               }
+               if (ino)
+                       rv = igrab(ino);
+               rcu_iput(ino);
+               return rv;
        case TypeDir:
                return igrab(db->b.inode);
        default:
index 0398b470e183e497054969e4a4df928eca7e32d5..e878339078400588efe704e63ede175d5c3c228b 100644 (file)
@@ -498,6 +498,7 @@ static int count_credits(struct block *b)
 {
        int credits = 0;
        struct indexblock *ib = iblk(b);
+       struct inode *ino = NULL;
 
        if (test_bit(B_Credit, &b->flags))
                credits++;
@@ -520,14 +521,14 @@ static int count_credits(struct block *b)
                        credits += count_credits(b);
                }
                credits += ib->uninc_table.credits;
-       } else if (LAFSI(b->inode)->type == TypeInodeFile &&
-                  dblk(b)->my_inode &&
-                  LAFSI(dblk(b)->my_inode)->iblock &&
-                  LAFSI(dblk(b)->my_inode)->iblock->b.parent
+       } else if ((ino = rcu_my_inode(dblk(b))) != NULL &&
+                  LAFSI(ino)->iblock &&
+                  LAFSI(ino)->iblock->b.parent
                ) {
-               LAFS_BUG(LAFSI(dblk(b)->my_inode)->iblock->b.parent != b->parent, b);
-               credits += count_credits(&LAFSI(dblk(b)->my_inode)->iblock->b);
+               LAFS_BUG(LAFSI(ino)->iblock->b.parent != b->parent, b);
+               credits += count_credits(&LAFSI(ino)->iblock->b);
        }
+       rcu_iput(ino);
        return credits;
 }
 
diff --git a/state.h b/state.h
index 66e903e83800af440e7eb2b9597c0351a47aedaa..fc74870e2f759ee46218a7c6393ad21d9ac6e361 100644 (file)
--- a/state.h
+++ b/state.h
@@ -601,6 +601,11 @@ struct lafs_inode {
                        u32     gracetime; /* multiplier for graceunits */
                        u32     graceunits; /* seconds per unit */
                } quota;
+
+               /* We free inodes using rcu, by which time any
+                * metadata is irrelevant and the type is zero
+                */
+               struct rcu_head rcu;
        } md;
 };
 
diff --git a/super.c b/super.c
index 256d05f68f426714c15e5c4725890325c9c4901a..046d76f7f923da62c915e0193c510222f6f2df7d 100644 (file)
--- a/super.c
+++ b/super.c
@@ -667,9 +667,11 @@ static int show_orphans(struct fs *fs)
        printk("Orphans:\n");
        list_for_each_entry(db, &fs->pending_orphans,
                            orphans) {
+               struct inode *ino = iget_my_inode(db);
                printk("orphan=%s\n", strblk(&db->b));
-               if (LAFSI(db->b.inode)->type == TypeInodeFile)
-                       lafs_print_tree(&LAFSI(db->my_inode)->iblock->b, 0);
+               if (ino)
+                       lafs_print_tree(&LAFSI(ino)->iblock->b, 0);
+               iput(ino);
        }
        printk("cleaner active: %d %d\n", fs->cleaner.active,
               fs->scan.done);
@@ -1167,6 +1169,12 @@ static struct inode *lafs_alloc_inode(struct super_block *sb)
        return &li->vfs_inode;
 }
 
+static void kfree_inode(struct rcu_head *head)
+{
+       struct lafs_inode *lai = container_of(head, struct lafs_inode,
+                                             md.rcu);
+       kfree(lai);
+}
 void lafs_destroy_inode(struct inode *inode)
 {
        struct datablock *db = NULL;
@@ -1185,7 +1193,8 @@ void lafs_destroy_inode(struct inode *inode)
                putdref(db, MKREF(destroy));
        } else {
                lafs_release_index(&LAFSI(inode)->free_index);
-               kfree(LAFSI(inode));
+               call_rcu(&LAFSI(inode)->md.rcu,
+                        kfree_inode);
        }
 }