]> git.neil.brown.name Git - LaFS.git/commitdiff
Multiple improvements to inode dirtying
authorNeilBrown <neilb@suse.de>
Thu, 3 Sep 2009 05:28:22 +0000 (15:28 +1000)
committerNeilBrown <neilb@suse.de>
Thu, 3 Sep 2009 05:28:22 +0000 (15:28 +1000)
- in a few places we didn't called dirty_inode when we should have
  done.
- after inode_{inc,dec}_link_count we don't need to dirty the inode
  because it has already been done.
- handle I_Dirty properly rather than unpinning the block
- allocate NCredits for inodes so that I_Dirty can succeed.

README
checkpoint.c
dir.c
index.c
inode.c
lafs.h
modify.c
roll.c

diff --git a/README b/README
index 97c055787af8fa99bfddc4a27ca94b2d391e630a..da4fab52cd2d0a31aef54cff91985c3b9a292698 100644 (file)
--- a/README
+++ b/README
@@ -3600,3 +3600,74 @@ FIXED orphans don't get cleaned up.  It seems a 'create' fails and leaves
  These dir blocks are currently fully populated with numbers.
  This seems to be the pattern with all non-7 blocks.
 
+
+ 02Sep2009
+  Found a problem, possibly related to the dir blocks not being
+  cleaned up.
+  When lafs_incorporate sets ->depth to 1 it doesn't dirty the inode,
+  so that fact is never copied in to the datablock.
+  On further exploration, the I_Dirty bit is set but never used, which
+  isn't good.
+  So: exactly when do we copy inode into datablock, and what do we do
+  when dirty_inode is call (if anything).
+  We could just set I_Dirty when dirty_inode is called, checking that
+  the block is Pinned which it usually will be.
+  Then we copy inode to data just before writing data block.
+  However that defeats transactional properties.  We to copy in the
+  same transaction, and that means either straight away, or when
+  the data block's phase changes.
+  So dirty_inode either copies to the block, or sets I_Dirty.
+  When lafs_refile unpins an inode data block, it need to check
+  I_Dirty and possibly re-dirty it.
+
+  To redirty it we must steal the NCredits.  Any further dirty attempt
+  will have to allocate more.
+  The stealing is done automatically by dirty_dblock, so we just flip
+  the phase and call dirty_inode ... making sure it doesn't try to
+  prealloc too hard.
+
+  Need to review when inodes get dirtied.
+    - commit_write only sets I_Dirty !
+
+    We call lafs_dirty_inode:
+      dir_create_commit - a child of inode is PinPending
+      lafs_create - ditto
+      lafs_link - before dir_create_commit
+      lafs_unlink, lafs_rmdir - data block is pinned
+      lafs_symlink - before create_commit
+      lafs_mkdir - before create_commit, or block pinned
+      lafs_mknod - before create_commit
+      lafs_rename - (moved to) before create_commit/update_commit
+                     or data block is pinned
+      lafs_dir_handle_orphan - (assured that) child is pinned.
+      choose_free_inum - child is pinned
+      lafs_incorporate - block is pinned
+
+    So either the data block is pinned, or the index block is pinned.
+    In either case it is OK to set something to Dirty.
+
+    (the new) lafs_dirty_vfs_inode gets called by mark_dirty_inode{,_sync}
+    this is called from:
+        inode_inc_link_count
+       inode_dec_link_count
+       ..various quota ops...
+       inode_setattr
+       __set_page_dirty (Which we don't use)
+       other buffer stuff
+       other quota stuff we won't use
+       touch_atime
+       file_update_time
+       page_symlink
+
+    only the time updates are interesting.  Others we have locking
+    for.
+    file_update_time is called from generic_file_aio_write_nlock etc
+    before ->prepare_write/->commit_write.  So they can pick up the
+    change.
+    Similarly before set_page_dirty is called.
+    touch_atime is called from do_follow_link and readlink and
+    file_accessed which is called all over the place.
+
+    So what to do?
+    If block is pinned, then dirty it to ensure writeout.
+    If not, don't.  But copy data in any case.
index 80d03c04d88158eeba2340e78751f48593a89fe4..264f6dc4d84c3144bb7104f2ae56b3bcde49131e 100644 (file)
@@ -325,7 +325,7 @@ struct block *lafs_get_flushable(struct fs *fs, int phase)
             (phase == -1 && test_bit(B_Realloc, &lai->iblock->b.flags)))
                ) {
                /* Don't really want this to be pinned yet. When the
-                * InoIdx block gets proceed we will get pinned again.
+                * InoIdx block gets processed we will get pinned again.
                 */
                list_del_init(&b->lru);
                clear_bit(B_Pinned, &b->flags);
diff --git a/dir.c b/dir.c
index 3dc2a8148c16a5ca6e45a9c2eb1a771d42077873..cfd23a7a9c1c508ff831ee7fd0b49d4f56082704 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -314,9 +314,11 @@ dir_create_commit(struct dirop_handle *doh,
                kfree(doh->temp);
                doh->temp = NULL;
                lafs_dirty_dblock(doh->new);
-               if (((doh->new->b.fileaddr+1) << dir->i_blkbits) > dir->i_size)
+               if (((doh->new->b.fileaddr+1) << dir->i_blkbits) > dir->i_size) {
                        i_size_write(dir, ((doh->new->b.fileaddr+1)
                                           << dir->i_blkbits));
+                       lafs_dirty_inode(dir);
+               }
                clear_bit(B_PinPending, &doh->new->b.flags);
                putdref(doh->new, MKREF(dir_new));
        } else
@@ -663,7 +665,6 @@ lafs_link(struct dentry *from, struct inode *dir, struct dentry *to)
                goto abort_unlock;
 
        inode_inc_link_count(inode);
-       lafs_dirty_inode(inode);
        lafs_inode_checkpin(inode);
        clear_bit(B_PinPending, &inodb->b.flags);
        putdref(inodb, MKREF(inode_update));
@@ -729,7 +730,6 @@ lafs_unlink(struct inode *dir, struct dentry *de)
                       DIROP_UNLINK, NULL);
        dir_delete_commit(&doh, fs, dir, de->d_name.name, de->d_name.len);
        lafs_checkpoint_unlock(fs);
-       lafs_dirty_inode(inode);
        lafs_inode_checkpin(inode);
        clear_bit(B_PinPending, &inodb->b.flags);
        putdref(inodb, MKREF(inode_update));
@@ -813,10 +813,8 @@ lafs_rmdir(struct inode *dir, struct dentry *de)
        dir_log_commit(&uh, fs, dir, &de->d_name, inode->i_ino,
                       DIROP_UNLINK, NULL);
        dir_delete_commit(&doh, fs, dir, de->d_name.name, de->d_name.len);
-       lafs_dirty_inode(inode);
        clear_bit(B_PinPending, &inodb->b.flags);
        putdref(inodb, MKREF(inode_update));
-       lafs_dirty_inode(dir);
        lafs_inode_checkpin(inode);
        lafs_inode_checkpin(dir);
        lafs_checkpoint_unlock(fs);
@@ -890,7 +888,7 @@ lafs_symlink(struct inode *dir, struct dentry *de,
        lafs_dirty_dblock(b);
        clear_bit(B_PinPending, &b->b.flags);
        putdref(b, MKREF(symlink));
-       ino->i_size = l;
+       i_size_write(ino, l);
        lafs_dirty_inode(ino);
        lafs_inode_checkpin(ino);
 
@@ -953,7 +951,6 @@ lafs_mkdir(struct inode *dir, struct dentry *de, int mode)
        lai->md.file.parent = dir->i_ino;
        inode_inc_link_count(dir);
        ino->i_nlink = 2; /* From parent, and from '.' */
-       lafs_dirty_inode(dir);
        lafs_dirty_inode(ino);
        lafs_inode_checkpin(dir);
        lafs_inode_checkpin(ino);
@@ -1151,29 +1148,26 @@ lafs_rename(struct inode *old_dir, struct dentry *old_dentry,
                       0,
                       new_inode ? DIROP_REN_OLD_TARGET : DIROP_REN_NEW_TARGET,
                       &renhandle);
-       if (new_inode) {
+       if (S_ISDIR(old_inode->i_mode)) {
+               inode_dec_link_count(old_dir);
+               if (!new_inode)
+                       inode_inc_link_count(new_dir);
+       }
+       if (new_inode)
                dir_update_commit(fs, old_inode->i_ino,
                                  mode_to_dt(old_inode->i_mode),
                                  &new_doh);
-       else
+       else
                dir_create_commit(&new_doh, fs, new_dir,
                                  new_dentry->d_name.name,
                                  new_dentry->d_name.len,
                                  old_inode->i_ino,
                                  mode_to_dt(old_inode->i_mode));
-       if (S_ISDIR(old_inode->i_mode)) {
-               inode_dec_link_count(old_dir);
-               if (!new_inode)
-                       inode_inc_link_count(new_dir);
-               lafs_dirty_inode(old_dir);
-               lafs_dirty_inode(new_dir);
-       }
        LAFSI(old_inode)->md.file.parent = new_dir->i_ino;
        if (new_inode) {
                if (S_ISDIR(new_inode->i_mode))
                        inode_dec_link_count(new_inode);
                inode_dec_link_count(new_inode);
-               lafs_dirty_inode(new_inode);
                lafs_inode_checkpin(new_inode);
        }
        lafs_dirty_inode(old_inode);
@@ -1345,6 +1339,10 @@ void lafs_dir_handle_orphan(struct datablock *b)
                loff_t bnum;
                unmap_dblock(b, buf);
 
+               err = lafs_pin_dblock(b, ReleaseSpace);
+               if (err)
+                       goto abort;
+
                bnum = 1;
                err = lafs_find_next(dir, &bnum);
                if (err < 0)
@@ -1369,6 +1367,7 @@ void lafs_dir_handle_orphan(struct datablock *b)
                        lafs_dirty_inode(dir);
                }
                lafs_erase_dblock(b);
+               clear_bit(B_PinPending, &b->b.flags);
        } else {
                unmap_dblock(b, buf);
        }
diff --git a/index.c b/index.c
index 28690d16c09909ae07647de7e73d298b1628ff94..a35ce623d5819e6d8004549577c19d1b3c0d2082 100644 (file)
--- a/index.c
+++ b/index.c
@@ -468,18 +468,32 @@ void lafs_phase_flip(struct fs *fs, struct block *b)
            !test_bit(B_PinPending, &b->flags) &&
            !test_bit(B_Dirty, &b->flags) &&
            !test_bit(B_Realloc, &b->flags) &&
-           iblk(b)->uninc_table.pending_cnt == 0 &&
-           iblk(b)->uninc == NULL &&
-           iblk(b)->uninc_next == NULL &&
            atomic_read(&b->refcnt) == 1 && /* This is us */
-           atomic_read(&iblk(b)->pincnt[0]) == 0 &&
-           atomic_read(&iblk(b)->pincnt[1]) == 0 &&
+           (!test_bit(B_Index, &b->flags) ||
+            (iblk(b)->uninc_table.pending_cnt == 0 &&
+             iblk(b)->uninc == NULL &&
+             iblk(b)->uninc_next == NULL &&
+             atomic_read(&iblk(b)->pincnt[0]) == 0 &&
+             atomic_read(&iblk(b)->pincnt[1]) == 0)) &&
            (!test_bit(B_InoIdx, &b->flags) ||
             !(test_bit(B_PinPending, &LAFSI(b->inode)->dblock->b.flags)
               || test_bit(B_Dirty, &LAFSI(b->inode)->dblock->b.flags)
               || test_bit(B_Realloc, &LAFSI(b->inode)->dblock->b.flags)))
                ) {
-               if (test_and_clear_bit(B_Pinned, &b->flags)) {
+               if (!test_bit(B_Index, &b->flags) &&
+                   LAFSI(b->inode)->type == TypeInodeFile &&
+                   test_and_clear_bit(I_Dirty, &LAFSI(dblk(b)->my_inode)->iflags)) {
+                       /* Need to keep pinned in new phase and make new
+                        * changes.
+                        */
+                       atomic_inc(&b->parent->pincnt[1-oldphase]);
+                       atomic_dec(&b->parent->pincnt[oldphase]);
+                       if (oldphase)
+                               clear_bit(B_Phase1, &b->flags);
+                       else
+                               set_bit(B_Phase1, &b->flags);
+                       lafs_dirty_inode(b->inode);
+               } else if (test_and_clear_bit(B_Pinned, &b->flags)) {
                        if (!test_bit(B_Root, &b->flags)) {
                                atomic_dec(&b->parent->pincnt[oldphase]);
                                lafs_refile(&b->parent->b, 0);
diff --git a/inode.c b/inode.c
index 6a81ec1ef119a177bd1d7677efc20533168d5f3b..74e71832975339093aa4a2b2acc44af31e2f2364 100644 (file)
--- a/inode.c
+++ b/inode.c
@@ -815,15 +815,21 @@ void lafs_inode_handle_orphan(struct datablock *b)
 
 void lafs_dirty_inode(struct inode *ino)
 {
-       /* This inode must be pinned to a phase.
-        * If it is 'this' phase, copy data into block.
-        * If not, just set I_Dirty.
+       /* this is called in one of three cases:
+        * 1/ by lafs internally when dblock or iblock is pinned and
+        *    ready to be dirties
+        * 2/ by writeout before requesting a write - to update mtime
+        * 3/ by read to update atime
+        *
+        * In 1/ we are free to dirty the block.
+        * In 2/ we can just update the block as it will get dirtied soon
+        * In 3/ we just punt for now and worry about it later
         */
-       struct datablock *db;
        struct block *b;
-       struct fs *fs = fs_from_inode(ino);
+       struct datablock *db;
+       int nodirty = 0;
        int ino_phase;
-
+       struct fs *fs = fs_from_inode(ino);
        spin_lock(&ino->i_data.private_lock);
        if (LAFSI(ino)->iblock)
                b = &getiref_locked(LAFSI(ino)->iblock,
@@ -835,41 +841,23 @@ void lafs_dirty_inode(struct inode *ino)
        db = LAFSI(ino)->dblock;
        spin_unlock(&ino->i_data.private_lock);
 
-       lafs_checkpoint_lock(fs);
-       if (b == NULL || !test_bit(B_Pinned, &b->flags)) {
-               /* must be a timestamp update.  No need to
-                * journal those - they'll eventually get
-                * processed ... or lost (for atime if we are too busy).;
-                */
-               //WARN_ON(1);
-               set_bit(I_Dirty, &LAFSI(ino)->iflags);
-               lafs_checkpoint_unlock(fs);
-               if (b)
-                       putref(b, MKREF(dirty_inode));
-               return;
-       }
+       BUG_ON(b==NULL);
 
-       if (lafs_prealloc(b, ReleaseSpace)) {
-               /* Very tight on space... this is probably an atime
-                * update when we just happen to be pinned.  Any other
-                * update should prealloc before proceeding
-                */
-               set_bit(I_Dirty, &LAFSI(ino)->iflags);
-               lafs_checkpoint_unlock(fs);
-               putref(b, MKREF(dirty_inode));
-               return;
-       }
-       if (b != &db->b &&
-           test_bit(B_Pinned, &db->b.flags))
+       if (test_bit(B_Pinned, &db->b.flags))
                ino_phase = !!test_bit(B_Phase1, &db->b.flags);
-       else
+       else if (test_bit(B_Pinned, &b->flags))
                ino_phase = !!test_bit(B_Phase1, &b->flags);
+       else {
+               ino_phase = fs->phase;
+               nodirty = 1;
+       }
 
-       if (ino_phase == fs->phase)
+       if (ino_phase == fs->phase) {
                lafs_inode_fillblock(ino);
-       else
+               if (!nodirty)
+                       lafs_dirty_dblock(db);
+       } else
                set_bit(I_Dirty, &LAFSI(ino)->iflags);
-       lafs_checkpoint_unlock(fs);
        putref(b, MKREF(dirty_inode));
 }
 
@@ -1056,7 +1044,6 @@ void lafs_inode_fillblock(struct inode *ino)
        }
        }
        unmap_dblock(db, lai);
-       lafs_dirty_dblock(db);
 }
 
 /*-----------------------------------------------------------------------
@@ -1135,8 +1122,8 @@ choose_free_inum(struct fs *fs, struct inode *filesys, u32 *inump,
                        memset(buf, 0xff, filesys->i_sb->s_blocksize);
                        unmap_dblock(b, buf);
                        set_bit(B_Valid, &b->b.flags);
-                       lafs_inode_fillblock(im);
                        LAFSI(im)->md.inodemap.size = bnum+1;
+                       lafs_dirty_inode(im);
                        lafs_dirty_dblock(b);
                        clear_bit(B_PinPending, &b->b.flags);
                        lafs_checkpoint_unlock(fs);
diff --git a/lafs.h b/lafs.h
index 2e6bfb85925577746fb6152e812450ed7cb90d52..8474b3cbced02cde0ed3b049633bf27b2770aaaf 100644 (file)
--- a/lafs.h
+++ b/lafs.h
@@ -129,6 +129,7 @@ void lafs_inode_checkpin(struct inode *ino);
 void lafs_clear_inode(struct inode *ino);
 void lafs_delete_inode(struct inode *ino);
 void lafs_dirty_inode(struct inode *ino);
+void lafs_dirty_vfs_inode(struct inode *ino);
 int lafs_write_inode(struct inode *ino, int wait);
 struct inode *lafs_new_inode(struct fs *fs, struct inode *dir, int type,
                             int inum, int mode, struct datablock **inodbp);
index 712174b59d34320c9bb03427131071490d19fee9..b4a21ec2e62546d404542731530a537d0f3bb462 100644 (file)
--- a/modify.c
+++ b/modify.c
@@ -444,6 +444,10 @@ static void grow_index_tree(struct indexblock *ib, struct indexblock *new)
        ibuf = map_iblock(new);
        memcpy(ib->data, ibuf + offset, blksize - offset);
        memset(ib->data + blksize - offset, 0, offset);
+       new->depth = ib->depth + 1;
+       LAFSI(new->b.inode)->depth++;
+       ((struct la_inode*)(ibuf))->depth = LAFSI(new->b.inode)->depth;
+       
        unmap_iblock(new, ibuf);
        set_bit(B_Valid, &new->b.flags);
 
@@ -452,8 +456,6 @@ static void grow_index_tree(struct indexblock *ib, struct indexblock *new)
        list_add(&ib->b.siblings, &new->children);
        INIT_LIST_HEAD(&new->b.siblings);
 
-       new->depth = ib->depth + 1;
-       LAFSI(new->b.inode)->depth++;
        LAFS_BUG(new->depth != LAFSI(new->b.inode)->depth, &ib->b);
        lafs_clear_index(new);
 
@@ -1564,6 +1566,7 @@ void lafs_incorporate(struct fs *fs, struct indexblock *ib)
                                memset(buf, 0, blocksize - offset);
                                *(u16 *)(buf) = cpu_to_le16(IBLK_EXTENT);
                                LAFSI(ib->b.inode)->depth = 1;
+                               ((struct la_inode*)(buf-offset))->depth = 1;
                                ib->depth = 1;
                        }
                } else
@@ -1796,6 +1799,7 @@ int __must_check lafs_prealloc(struct block *blk, int why)
                                credits--;
                }
                if (test_bit(B_Index, &b->flags) ||
+                   LAFSI(b->inode)->type == TypeInodeFile ||
                    LAFSI(b->inode)->type == TypeOrphanList ||
                    LAFSI(b->inode)->type == TypeQuota ||
                    LAFSI(b->inode)->type == TypeSegmentMap) {
diff --git a/roll.c b/roll.c
index 1c51337656bac4689f0eeff6ffc0e20106a29483..4418f638c15198cd6b643a45186a2c827bbf136d 100644 (file)
--- a/roll.c
+++ b/roll.c
@@ -327,6 +327,9 @@ roll_block(struct fs *fs, int fsnum, int inum, int trunc, int flg,
                if (err)
                        break;
 
+               /* FIXME do I need to dirty the inode to flush
+                * this change into the datablock?
+                */
                if (li->type >= TypeBase &&
                    inode->i_size <= (bnum << inode->i_blkbits))
                        inode->i_size = ((bnum) << inode->i_blkbits) + type;