From: NeilBrown Date: Sun, 1 Aug 2010 03:53:23 +0000 (+1000) Subject: rcu locking protection for ->my_inode X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=7d8a42806c511a0fbb0627c8e9449016886213e7;p=LaFS.git rcu locking protection for ->my_inode We use rcu to free inodes, and use rcu locking to protect access to ->my_inode. Part of this required that once I_Deleting is set it stays set, so remove the pointless clearing of it. Signed-off-by: NeilBrown --- diff --git a/block.c b/block.c index 107048d..0c771e2 100644 --- a/block.c +++ b/block.c @@ -441,6 +441,7 @@ lafs_pin_dblock(struct datablock *b, int alloc_type) int err; struct fs *fs = fs_from_inode(b->b.inode); struct block *blk; + struct inode *ino = NULL; /* We don't pin a datablock of an inode if there is an * InoIdx block. We pin the InoIdx block instead. @@ -448,10 +449,9 @@ lafs_pin_dblock(struct datablock *b, int alloc_type) * only when the index block has swapped phase and the * data block is waiting to be written. */ - if (LAFSI(b->b.inode)->type == TypeInodeFile && - b->my_inode && - LAFSI(b->my_inode)->iblock) { - struct indexblock *ib = lafs_make_iblock(b->my_inode, ADOPT, SYNC, + if ((ino = rcu_my_inode(b)) != NULL && + LAFSI(ino)->iblock) { + struct indexblock *ib = lafs_make_iblock(ino, ADOPT, SYNC, MKREF(pindb)); if (IS_ERR(ib)) blk = getref(&b->b, MKREF(pindb)); @@ -459,6 +459,7 @@ lafs_pin_dblock(struct datablock *b, int alloc_type) blk = &ib->b; } else blk = getref(&b->b, MKREF(pindb)); + rcu_iput(ino); LAFS_BUG(!test_bit(B_PinPending, &b->b.flags), &b->b); if (LAFSI(b->b.inode)->type != TypeSegmentMap) { @@ -539,10 +540,13 @@ lafs_erase_dblock(struct datablock *b) } } - if (LAFSI(b->b.inode)->type == TypeInodeFile && - LAFSI(b->my_inode)->iblock) - LAFS_BUG(LAFSI(b->my_inode)->iblock->depth > 1, - &b->b); + if (LAFSI(b->b.inode)->type == TypeInodeFile) { + struct inode *ino = rcu_my_inode(b); + if (ino && LAFSI(ino)->iblock) + LAFS_BUG(LAFSI(ino)->iblock->depth > 1, + &b->b); + rcu_iput(ino); + } clear_bit(B_Valid, &b->b.flags); lafs_unclean(b); diff --git a/checkpoint.c b/checkpoint.c index ebcbed1..4ab6b2c 100644 --- a/checkpoint.c +++ b/checkpoint.c @@ -188,6 +188,7 @@ int lafs_print_tree(struct block *b, int depth) int j; struct block *b2; int credits = 0; + struct inode *ino = NULL; if (depth > 20) { printk("... aborting at %d\n", depth); @@ -276,16 +277,16 @@ int lafs_print_tree(struct block *b, int depth) LAFS_BUG(b->parent != ib, b); credits += lafs_print_tree(b, depth+2); } - } else if (LAFSI(b->inode)->type == TypeInodeFile && - dblk(b)->my_inode && - LAFSI(dblk(b)->my_inode)->iblock && - LAFSI(dblk(b)->my_inode)->iblock->b.parent + } else if ((ino = rcu_my_inode(dblk(b))) != NULL && + LAFSI(ino)->iblock && + LAFSI(ino)->iblock->b.parent ) { - LAFS_BUG(LAFSI(dblk(b)->my_inode)->iblock->b.parent != b->parent, + LAFS_BUG(LAFSI(ino)->iblock->b.parent != b->parent, b); - credits += lafs_print_tree(&LAFSI(dblk(b)->my_inode)->iblock->b, + credits += lafs_print_tree(&LAFSI(ino)->iblock->b, depth+1); } + rcu_iput(ino); return credits; } diff --git a/cluster.c b/cluster.c index d575732..da9643a 100644 --- a/cluster.c +++ b/cluster.c @@ -740,11 +740,11 @@ int lafs_cluster_allocate(struct block *b, int cnum) return 0; } - if (!test_bit(B_Index, &b->flags) && - LAFSI(b->inode)->type == TypeInodeFile) { - struct inode *myi = dblk(b)->my_inode; + if (!test_bit(B_Index, &b->flags)) { + struct inode *myi = rcu_my_inode(dblk(b)); if (myi && test_and_clear_bit(I_Dirty, &LAFSI(myi)->iflags)) lafs_inode_fillblock(myi); + rcu_iput(myi); } mutex_lock(&wc->lock); diff --git a/file.c b/file.c index 437be73..60c0c58 100644 --- a/file.c +++ b/file.c @@ -316,13 +316,14 @@ lafs_writepage(struct page *page, struct writeback_control *wbc) * Need to be careful with inodes too. */ if (test_bit(B_Dirty, &b->b.flags)) { + struct inode *myi = NULL; if (test_bit(B_PinPending, &b->b.flags)) redirty = 1; - else if (LAFSI(b->b.inode)->type == TypeInodeFile && - b->my_inode && - LAFSI(b->my_inode)->iblock) + else if ((myi = rcu_my_inode(b)) != NULL && + LAFSI(myi)->iblock) redirty = 1; else { + rcu_iput(myi); myi = NULL; lafs_iolock_written(&b->b); /* block might have been invalidated, * or Pinned, while we waited */ @@ -332,14 +333,16 @@ lafs_writepage(struct page *page, struct writeback_control *wbc) if (!test_bit(B_Dirty, &b->b.flags)) lafs_iounlock_block(&b->b); else if (test_bit(B_PinPending, &b->b.flags) || - (LAFSI(b->b.inode)->type == TypeInodeFile && - b->my_inode && - LAFSI(b->my_inode)->iblock)) { + ((myi = rcu_my_inode(b)) != NULL && + LAFSI(myi)->iblock)) { redirty = 1; lafs_iounlock_block(&b->b); - } else + } else { + rcu_iput(myi); myi = NULL; lafs_cluster_allocate(&b->b, 0); + } } + rcu_iput(myi); } putdref(b, MKREF(writepage)); } diff --git a/index.c b/index.c index ceda443..60583af 100644 --- a/index.c +++ b/index.c @@ -713,6 +713,7 @@ int lafs_is_leaf(struct block *b, int ph) /* This is pinned and not iolocked. Should it be on a leaf * list? */ + struct inode *myi; struct lafs_inode *lai; if (!test_bit(B_Pinned, &b->flags) || @@ -727,12 +728,12 @@ int lafs_is_leaf(struct block *b, int ph) } /* This is a data block */ - if (LAFSI(b->inode)->type != TypeInodeFile || - dblk(b)->my_inode == NULL) + myi = rcu_my_inode(dblk(b)); + if (!myi) { /* Non-inode data blocks are always leafs */ return 1; - - lai = LAFSI(dblk(b)->my_inode); + } + lai = LAFSI(myi); spin_lock(&lai->vfs_inode.i_data.private_lock); if (ph < 0) ph = !!test_bit(B_Phase1, &b->flags); @@ -743,9 +744,11 @@ int lafs_is_leaf(struct block *b, int ph) * this block isn't a leaf */ spin_unlock(&lai->vfs_inode.i_data.private_lock); + rcu_iput(myi); return 0; } spin_unlock(&lai->vfs_inode.i_data.private_lock); + rcu_iput(myi); return 1; } @@ -753,7 +756,6 @@ void lafs_refile(struct block *b, int dec) { struct block *next = NULL, *next_parent = NULL; struct fs *fs = NULL; - struct inode *in; u64 physref = 0; if (!b) @@ -767,23 +769,22 @@ void lafs_refile(struct block *b, int dec) struct block *cb; c[0] = c[1] = 0; list_for_each_entry(cb, &iblk(b)->children, siblings) { + struct inode *myi = NULL; if (test_bit(B_Pinned, &cb->flags)) { int pp = !!test_bit(B_Phase1, &cb->flags); c[pp]++; } /* InoIdx block might be pinned but is not a direct * child */ - if (LAFSI(cb->inode)->type == TypeInodeFile && - !test_bit(B_Index, &cb->flags) && - dblk(cb)->my_inode && - LAFSI(dblk(cb)->my_inode)->iblock && - test_bit(B_Pinned, &(LAFSI(dblk(cb)->my_inode) - ->iblock->b.flags))) { + if (!test_bit(B_Index, &cb->flags) && + (myi = rcu_my_inode(dblk(cb))) != NULL && + LAFSI(myi)->iblock && + test_bit(B_Pinned, &LAFSI(myi)->iblock->b.flags)) { int pp = !!test_bit(B_Phase1, - &LAFSI(dblk(cb)->my_inode) - ->iblock->b.flags); + &LAFSI(myi)->iblock->b.flags); c[pp]++; } + rcu_iput(myi); } if (c[0] != atomic_read(&iblk(b)->pincnt[0]) || c[1] != atomic_read(&iblk(b)->pincnt[1])) { @@ -799,23 +800,24 @@ void lafs_refile(struct block *b, int dec) struct block *cb; c[0] = c[1] = 0; list_for_each_entry(cb, &ib->children, siblings) { + struct inode *myi = NULL; if (test_bit(B_Pinned, &cb->flags)) { int pp = !!test_bit(B_Phase1, &cb->flags); c[pp]++; } /* InoIdx block might be pinned but is not a direct * child */ - if (LAFSI(cb->inode)->type == TypeInodeFile && - !test_bit(B_Index, &cb->flags) && - dblk(cb)->my_inode && - LAFSI(dblk(cb)->my_inode)->iblock && - test_bit(B_Pinned, &(LAFSI(dblk(cb)->my_inode) + if (!test_bit(B_Index, &cb->flags) && + (myi = rcu_my_inode(dblk(cb))) != NULL && + LAFSI(myi)->iblock && + test_bit(B_Pinned, &(LAFSI(myi) ->iblock->b.flags))) { int pp = !!test_bit(B_Phase1, - &LAFSI(dblk(cb)->my_inode) + &LAFSI(myi) ->iblock->b.flags); c[pp]++; } + rcu_iput(myi); } if (c[0] != atomic_read(&ib->pincnt[0]) || c[1] != atomic_read(&ib->pincnt[1])) { @@ -837,6 +839,8 @@ void lafs_refile(struct block *b, int dec) int free_me = 0; int onlru = 0; struct inode *checkpin = NULL; + struct inode *myi = NULL; + spin_lock(&lafs_hash_lock); if (!list_empty(&b->lru) && @@ -1047,18 +1051,18 @@ void lafs_refile(struct block *b, int dec) /* Free a delayed-release inode */ if (atomic_read(&b->refcnt) == 0 && !test_bit(B_Index, &b->flags) && - LAFSI(b->inode)->type == TypeInodeFile && - (in = dblk(b)->my_inode) != NULL && + (myi = rcu_my_inode(dblk(b))) != NULL && (!PagePrivate(dblk(b)->page) || - test_bit(I_Destroyed, &LAFSI(in)->iflags))) { + test_bit(I_Destroyed, &LAFSI(myi)->iflags))) { dblk(b)->my_inode = NULL; - LAFSI(in)->dblock = NULL; - LAFSI(in)->iblock = NULL; + LAFSI(myi)->dblock = NULL; + LAFSI(myi)->iblock = NULL; spin_unlock(&lafs_hash_lock); - if (test_bit(I_Destroyed, &LAFSI(in)->iflags)) - lafs_destroy_inode(in); + if (test_bit(I_Destroyed, &LAFSI(myi)->iflags)) + lafs_destroy_inode(myi); } else spin_unlock(&lafs_hash_lock); + rcu_iput(myi); if (physref) { lafs_seg_deref(fs, physref, 0); diff --git a/inode.c b/inode.c index fc978f1..60bcd84 100644 --- a/inode.c +++ b/inode.c @@ -156,7 +156,7 @@ lafs_iget(struct super_block *sb, ino_t inum, int async) if (err) goto err; - oldino = b->my_inode; + oldino = rcu_my_inode(b); if (oldino) { /* The inode is new, but the block thinks it has an * old inode, so we must be in the process of destroying @@ -171,10 +171,11 @@ lafs_iget(struct super_block *sb, ino_t inum, int async) LAFSI(oldino)->iblock = NULL; } spin_unlock(&oldino->i_data.private_lock); - if (b->my_inode) { - err = -ENOENT; - goto err; - } + } + rcu_iput(oldino); + if (b->my_inode) { + err = -ENOENT; + goto err; } err = lafs_import_inode(ino, b); @@ -397,7 +398,7 @@ lafs_import_inode(struct inode *ino, struct datablock *b) * other when freed */ li->dblock = b; - b->my_inode = ino; + rcu_assign_pointer(b->my_inode, ino); out: if (err && li->type) @@ -458,7 +459,7 @@ struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG) return db; LAFSI(ino)->dblock = db; - db->my_inode = ino; + rcu_assign_pointer(db->my_inode, ino); if (async) err = lafs_read_block_async(db); else @@ -590,7 +591,7 @@ void lafs_clear_inode(struct inode *ino) li->type = 0; /* Now is a good time to break the linkage between - * inode and dblock - but node if the file is + * inode and dblock - but not if the file is * being deleted */ if (!test_bit(I_Deleting, &LAFSI(ino)->iflags)) { @@ -723,6 +724,9 @@ static int prune_some(void *data, u32 addr, u64 paddr, int len) int lafs_inode_handle_orphan(struct datablock *b) { + /* Don't need rcu protection for my_inode run_orphan + * holds a reference + */ struct indexblock *ib, *ib2; struct inode *ino = b->my_inode; struct fs *fs = fs_from_inode(ino); @@ -751,7 +755,6 @@ int lafs_inode_handle_orphan(struct datablock *b) LAFSI(ino)->ciblocks + LAFSI(ino)->piblocks); lafs_erase_dblock(b); - clear_bit(I_Deleting, &LAFSI(ino)->iflags); lafs_orphan_release(fs, b, NULL); } else if (ino->i_nlink || LAFSI(ino)->type == 0) lafs_orphan_release(fs, b, NULL); diff --git a/lafs.h b/lafs.h index 675635b..554211e 100644 --- a/lafs.h +++ b/lafs.h @@ -365,6 +365,39 @@ static inline int set_phase(struct block *b, int ph) return 0; } +/* + * db->my_inode is protected by rcu. We can 'get' it and + * remain rcu_protected, or 'iget' it and be protected by a + * refcount + */ +static inline struct inode *rcu_my_inode(struct datablock *db) +{ + struct inode *ino; + if (db == NULL) + return NULL; + if (LAFSI(db->b.inode)->type != TypeInodeFile) + return NULL; + rcu_read_lock(); + ino = rcu_dereference(db->my_inode); + if (ino) + return ino; + rcu_read_unlock(); + return NULL; +} +static inline void rcu_iput(struct inode *ino) +{ + if (ino) + rcu_read_unlock(); +} + +static inline struct inode *iget_my_inode(struct datablock *db) +{ + struct inode *ino = rcu_my_inode(db); + struct inode *rv = igrab(ino); + rcu_iput(ino); + return rv; +} + /* * blocks (data and index) are reference counted. * 'getref' increments the reference count, and could remove the @@ -408,17 +441,19 @@ static inline struct block *_getref_locked(struct block *b) // FIXME more tests - just when is this allowed? } else { struct datablock *db = dblk(b); + struct inode *ino = NULL; + if (db->page && (PageLocked(db->page) || - (LAFSI(db->b.inode)->type == TypeInodeFile && - db->my_inode && - spin_is_locked(&db->my_inode->i_data.private_lock)) + ((ino = rcu_my_inode(db)) != NULL && + spin_is_locked(&ino->i_data.private_lock)) )) ok = 1; - if (LAFSI(db->b.inode)->type == TypeInodeFile && - db->my_inode && - spin_is_locked(&db->my_inode->i_data.private_lock)) + rcu_iput(ino); ino = NULL; + if ((ino = rcu_my_inode(db)) != NULL && + spin_is_locked(&ino->i_data.private_lock)) ok = 1; + rcu_iput(ino); if (spin_is_locked(&b->inode->i_data.private_lock)) ok = 1; } diff --git a/orphan.c b/orphan.c index ed009e7..47963c1 100644 --- a/orphan.c +++ b/orphan.c @@ -375,7 +375,7 @@ void lafs_orphan_release(struct fs *fs, struct datablock *b, struct inode *ino) bino = lafs_iget_fs(fs, le32_to_cpu(last.filesys), - le32_to_cpu(last.inum)); + le32_to_cpu(last.inum), ASYNC); if (bino) bbl = lafs_get_block(bino, le32_to_cpu(last.addr), @@ -433,12 +433,20 @@ out: static struct inode *orphan_inode(struct datablock *db) { + struct inode *ino, *rv = NULL; switch (LAFSI(db->b.inode)->type) { case TypeInodeFile: - if (db->my_inode && - test_bit(I_Deleting, &LAFSI(db->my_inode)->iflags)) - return db->my_inode; - return igrab(db->my_inode); + ino = rcu_my_inode(db); + if (ino && + test_bit(I_Deleting, &LAFSI(ino)->iflags)) { + /* don't need rcu while I_Deleting is set */ + rcu_iput(ino); + return ino; + } + if (ino) + rv = igrab(ino); + rcu_iput(ino); + return rv; case TypeDir: return igrab(db->b.inode); default: diff --git a/segments.c b/segments.c index 0398b47..e878339 100644 --- a/segments.c +++ b/segments.c @@ -498,6 +498,7 @@ static int count_credits(struct block *b) { int credits = 0; struct indexblock *ib = iblk(b); + struct inode *ino = NULL; if (test_bit(B_Credit, &b->flags)) credits++; @@ -520,14 +521,14 @@ static int count_credits(struct block *b) credits += count_credits(b); } credits += ib->uninc_table.credits; - } else if (LAFSI(b->inode)->type == TypeInodeFile && - dblk(b)->my_inode && - LAFSI(dblk(b)->my_inode)->iblock && - LAFSI(dblk(b)->my_inode)->iblock->b.parent + } else if ((ino = rcu_my_inode(dblk(b))) != NULL && + LAFSI(ino)->iblock && + LAFSI(ino)->iblock->b.parent ) { - LAFS_BUG(LAFSI(dblk(b)->my_inode)->iblock->b.parent != b->parent, b); - credits += count_credits(&LAFSI(dblk(b)->my_inode)->iblock->b); + LAFS_BUG(LAFSI(ino)->iblock->b.parent != b->parent, b); + credits += count_credits(&LAFSI(ino)->iblock->b); } + rcu_iput(ino); return credits; } diff --git a/state.h b/state.h index 66e903e..fc74870 100644 --- a/state.h +++ b/state.h @@ -601,6 +601,11 @@ struct lafs_inode { u32 gracetime; /* multiplier for graceunits */ u32 graceunits; /* seconds per unit */ } quota; + + /* We free inodes using rcu, by which time any + * metadata is irrelevant and the type is zero + */ + struct rcu_head rcu; } md; }; diff --git a/super.c b/super.c index 256d05f..046d76f 100644 --- a/super.c +++ b/super.c @@ -667,9 +667,11 @@ static int show_orphans(struct fs *fs) printk("Orphans:\n"); list_for_each_entry(db, &fs->pending_orphans, orphans) { + struct inode *ino = iget_my_inode(db); printk("orphan=%s\n", strblk(&db->b)); - if (LAFSI(db->b.inode)->type == TypeInodeFile) - lafs_print_tree(&LAFSI(db->my_inode)->iblock->b, 0); + if (ino) + lafs_print_tree(&LAFSI(ino)->iblock->b, 0); + iput(ino); } printk("cleaner active: %d %d\n", fs->cleaner.active, fs->scan.done); @@ -1167,6 +1169,12 @@ static struct inode *lafs_alloc_inode(struct super_block *sb) return &li->vfs_inode; } +static void kfree_inode(struct rcu_head *head) +{ + struct lafs_inode *lai = container_of(head, struct lafs_inode, + md.rcu); + kfree(lai); +} void lafs_destroy_inode(struct inode *inode) { struct datablock *db = NULL; @@ -1185,7 +1193,8 @@ void lafs_destroy_inode(struct inode *inode) putdref(db, MKREF(destroy)); } else { lafs_release_index(&LAFSI(inode)->free_index); - kfree(LAFSI(inode)); + call_rcu(&LAFSI(inode)->md.rcu, + kfree_inode); } }