From: NeilBrown Date: Mon, 12 Jul 2010 03:26:13 +0000 (+1000) Subject: Fix up writeout and flushing. X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=f90959e6f492b66fbaf04bfe337fa079415e480c;p=LaFS.git Fix up writeout and flushing. writepage should never flush. sync_page should, if any block is dirty cluster_flush should tell the backing dev to start writing write_block and related functions don't need or use 'dev' arg. Signed-off-by: NeilBrown --- diff --git a/cluster.c b/cluster.c index 484c3ae..f2371ec 100644 --- a/cluster.c +++ b/cluster.c @@ -1301,7 +1301,7 @@ static void cluster_flush(struct fs *fs, int cnum) lafs_write_head(fs, page_address(wc->page[wc->pending_next]) + i*sb->s_blocksize, - head_addr[i], segend.dev, wc); + head_addr[i], wc); while (!list_empty(&wc->clhead)) { int credits = 0; @@ -1342,7 +1342,7 @@ static void cluster_flush(struct fs *fs, int cnum) list_del(&b->lru); list_add(&b->lru, &wc->pending_blocks[(wc->pending_next+3)%4]); spin_unlock(&fs->lock); - lafs_write_block(fs, b, segend.dev, wc); + lafs_write_block(fs, b, wc); lafs_refile(b, 0); } skip_discard(wc->slhead.next[0]); @@ -1376,6 +1376,12 @@ static void cluster_flush(struct fs *fs, int cnum) } dprintk("D %d\n", wake); + if (wc->pending_vfy_type[wc->pending_next] == VerifyNull || + wc->pending_vfy_type[(wc->pending_next+3)%4] == VerifyNext || + wc->pending_vfy_type[(wc->pending_next+2)%4] == VerifyNext2) + blk_run_backing_dev(fs->devs[segend.dev].sb->s_bdev->bd_inode_backing_dev_info, NULL); + + wc->pending_next = (wc->pending_next+1) % 4; /* now re-initialise the cluster information */ cluster_reset(fs, wc); diff --git a/file.c b/file.c index 65b1425..38a079a 100644 --- a/file.c +++ b/file.c @@ -191,7 +191,6 @@ static int lafs_writepage(struct page *page, struct writeback_control *wbc) { struct inode *ino = page->mapping->host; - struct fs *fs = fs_from_inode(ino); struct datablock *b0 = NULL; int blocks = PAGE_SIZE >> ino->i_blkbits; int i; @@ -205,7 +204,7 @@ lafs_writepage(struct page *page, struct writeback_control *wbc) if (i == 0) b0 = getdref(b, MKREF(writepage0)); - /* We need to check PinPending, otherwise we must be called + /* We need to check PinPending, otherwise we might be called * to flush out a page that is currently part of a transaction. * Need to be careful with inodes too. */ @@ -223,13 +222,16 @@ lafs_writepage(struct page *page, struct writeback_control *wbc) /* FIXME do I need to test if the iblock has appeared * while we waited too?? */ - if (test_bit(B_PinPending, &b->b.flags)) { + if (!test_bit(B_Dirty, &b->b.flags)) + lafs_iounlock_block(&b->b); + else if (test_bit(B_PinPending, &b->b.flags) || + (LAFSI(b->b.inode)->type == TypeInodeFile && + b->my_inode && + LAFSI(b->my_inode)->iblock)) { redirty = 1; lafs_iounlock_block(&b->b); - } else if (test_bit(B_Dirty, &b->b.flags)) + } else lafs_cluster_allocate(&b->b, 0); - else - lafs_iounlock_block(&b->b); } } putdref(b, MKREF(writepage)); @@ -245,15 +247,14 @@ lafs_writepage(struct page *page, struct writeback_control *wbc) } unlock_page(page); - if (b0) - putdref(b0, MKREF(writepage0)); + putdref(b0, MKREF(writepage0)); if (!b0 || redirty) return 0; if (wbc->for_writepages && LAFSI(ino)->depth == 0) { /* We really want the data to be safe soon, not just - * the page to be clean. - * so write the inode. + * the page to be clean. And the data is in the inode. + * So write the inode. */ struct datablock *b = lafs_inode_dblock(ino, SYNC, MKREF(writepageflush)); @@ -267,9 +268,6 @@ lafs_writepage(struct page *page, struct writeback_control *wbc) } putdref(b, MKREF(writepageflush)); } -// FIXME need to make sure a cluster_flush happens some time!!! if (wbc->sync_mode != WB_SYNC_NONE) - lafs_cluster_flush(fs, 0); - dprintk("WRITEPAGE flush\n"); return 0; } @@ -281,14 +279,31 @@ static void lafs_sync_page(struct page *page) struct inode *ino; struct address_space *mapping; struct fs *fs; + int bits; + int i; mapping = page->mapping; if (!mapping) return; ino = mapping->host; fs = fs_from_inode(ino); + bits = PAGE_SHIFT - ino->i_blkbits; - lafs_cluster_flush(fs, 0); + for (i = 0; i < (1<b.flags)) { + putdref(b, MKREF(sync_page)); + lafs_cluster_flush(fs, 0); + break; + } + putdref(b, MKREF(sync_page)); + } } const struct file_operations lafs_file_file_operations = { diff --git a/io.c b/io.c index a1b53f9..c962562 100644 --- a/io.c +++ b/io.c @@ -558,11 +558,12 @@ lafs_read_block_async(struct datablock *b) */ static void write_block(struct fs *fs, struct page *p, int offset, - u64 virt, int dev, struct wc *wc, int head) + u64 virt, struct wc *wc, int head) { struct bio *bio = bio_alloc(GFP_NOIO, 1); sector_t uninitialized_var(sect); int which = wc->pending_next; + int dev; virttophys(fs, virt, &dev, §); @@ -588,19 +589,19 @@ static void write_block(struct fs *fs, struct page *p, int offset, } void lafs_write_head(struct fs *fs, struct cluster_head *head, u64 virt, - int dev, struct wc *wc) + struct wc *wc) { write_block(fs, virt_to_page(head), offset_in_page(head), - virt, dev, wc, 1); + virt, wc, 1); } -void lafs_write_block(struct fs *fs, struct block *b, int dev, struct wc *wc) +void lafs_write_block(struct fs *fs, struct block *b, struct wc *wc) { if (test_bit(B_Index, &b->flags)) write_block(fs, virt_to_page(iblk(b)->data), offset_in_page(iblk(b)->data), - b->physaddr, dev, wc, 0); + b->physaddr, wc, 0); else write_block(fs, dblk(b)->page, dblock_offset(dblk(b)), - b->physaddr, dev, wc, 0); + b->physaddr, wc, 0); } diff --git a/lafs.h b/lafs.h index eb54ef0..690df19 100644 --- a/lafs.h +++ b/lafs.h @@ -669,8 +669,8 @@ lafs_iblock_get(struct inode *ino, faddr_t addr, int depth, paddr_t phys, REFARG /* io.c */ void lafs_write_head(struct fs *fs, struct cluster_head *head, u64 virt, - int dev, struct wc *wc); -void lafs_write_block(struct fs *fs, struct block *b, int dev, struct wc *wc); + struct wc *wc); +void lafs_write_block(struct fs *fs, struct block *b, struct wc *wc); /* quota.c */ int lafs_quota_allocate(struct fs *fs, struct inode *ino, int diff); diff --git a/rules.doc b/rules.doc index acc9159..746974f 100644 --- a/rules.doc +++ b/rules.doc @@ -131,3 +131,27 @@ Lock ordering: ->private_lock fs->lock phase_flip + +-------------------------- +my_inode points from datablock in inode file to the inode. +LAFSI(inode)->dblock points back. +These are not reference counted, rather whichever object is destroyed +first breaks both links. + +if ->iblock is set, then a ref is held on ->block. So if we hold +a ref on a block with a parent, then we can access ->inode->dblock +without locking. +Otherwise we need private_lock + +We only clear ->my_inode when the refcount on the block reaches +zero, so if we have a refcount on the dblock, and my_inode is not NULL, +we can dereference it safely. + +used: + to find iblock from dblock - that should be locked - sometime just testing + to find inode that might need to have inode_fillblock called + to find inode to truncate in orphan handling + +But wait: we don't destroy an inode with a dblock until dblock +refcount reaches 0. So if we hold a dblock, it is always safe to +test/deref my_inode. \ No newline at end of file