}
static int
-ext2_writepages(struct address_space *mapping, int *nr_to_write)
+ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
int ret;
int err;
ret = write_mapping_buffers(mapping);
- err = mpage_writepages(mapping, nr_to_write, ext2_get_block);
+ err = mpage_writepages(mapping, wbc, ext2_get_block);
if (!ret)
ret = err;
return ret;
/* For writeback mode, we can use mpage_writepages() */
static int
-ext3_writepages(struct address_space *mapping, int *nr_to_write)
+ext3_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
int ret;
int err;
ret = write_mapping_buffers(mapping);
- err = mpage_writepages(mapping, nr_to_write, ext3_get_block);
+ err = mpage_writepages(mapping, wbc, ext3_get_block);
if (!ret)
ret = err;
return ret;
/*
* Write a single inode's dirty pages and inode data out to disk.
* If `sync' is set, wait on the writeout.
- * If `nr_to_write' is not NULL, subtract the number of written pages
- * from *nr_to_write.
+ * Subtract the number of written pages from nr_to_write.
*
* Normally it is not legal for a single process to lock more than one
* page at a time, due to ab/ba deadlock problems. But writepages()
*
* Called under inode_lock.
*/
-static void __sync_single_inode(struct inode *inode, int wait, int *nr_to_write)
+static void
+__sync_single_inode(struct inode *inode, int wait,
+ struct writeback_control *wbc)
{
unsigned dirty;
unsigned long orig_dirtied_when;
mapping->dirtied_when = 0; /* assume it's whole-file writeback */
spin_unlock(&inode_lock);
- do_writepages(mapping, nr_to_write);
+ do_writepages(mapping, wbc);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
* Write out an inode's dirty pages. Called under inode_lock.
*/
static void
-__writeback_single_inode(struct inode *inode, int sync, int *nr_to_write)
+__writeback_single_inode(struct inode *inode, int sync,
+ struct writeback_control *wbc)
{
if (current_is_pdflush() && (inode->i_state & I_LOCK))
return;
iput(inode);
spin_lock(&inode_lock);
}
- __sync_single_inode(inode, sync, nr_to_write);
+ __sync_single_inode(inode, sync, wbc);
}
/*
* thrlttled threads: we don't want them all piling up on __wait_on_inode.
*/
static void
-sync_sb_inodes(struct backing_dev_info *single_bdi, struct super_block *sb,
- int sync_mode, int *nr_to_write, unsigned long *older_than_this)
+sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
{
struct list_head *tmp;
struct list_head *head;
struct backing_dev_info *bdi;
int really_sync;
- if (single_bdi && mapping->backing_dev_info != single_bdi) {
+ if (wbc->bdi && mapping->backing_dev_info != wbc->bdi) {
if (sb != blockdev_superblock)
break; /* inappropriate superblock */
list_move(&inode->i_list, &sb->s_dirty);
if (time_after(mapping->dirtied_when, start))
break;
- if (older_than_this &&
- time_after(mapping->dirtied_when, *older_than_this))
+ if (wbc->older_than_this && time_after(mapping->dirtied_when,
+ *wbc->older_than_this))
goto out;
bdi = mapping->backing_dev_info;
if (current_is_pdflush() && !writeback_acquire(bdi))
break;
- really_sync = (sync_mode == WB_SYNC_ALL);
- if ((sync_mode == WB_SYNC_LAST) && (head->prev == head))
+ really_sync = (wbc->sync_mode == WB_SYNC_ALL);
+ if ((wbc->sync_mode == WB_SYNC_LAST) && (head->prev == head))
really_sync = 1;
BUG_ON(inode->i_state & I_FREEING);
__iget(inode);
list_move(&inode->i_list, &sb->s_dirty);
- __writeback_single_inode(inode, really_sync, nr_to_write);
- if (sync_mode == WB_SYNC_HOLD) {
+ __writeback_single_inode(inode, really_sync, wbc);
+ if (wbc->sync_mode == WB_SYNC_HOLD) {
mapping->dirtied_when = jiffies;
list_move(&inode->i_list, &sb->s_dirty);
}
spin_unlock(&inode_lock);
iput(inode);
spin_lock(&inode_lock);
- if (nr_to_write && *nr_to_write <= 0)
+ if (wbc->nr_to_write <= 0)
break;
}
out:
}
/*
+ * Start writeback of dirty pagecache data against all unlocked inodes.
+ *
+ * Note:
+ * We don't need to grab a reference to superblock here. If it has non-empty
+ * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
+ * past sync_inodes_sb() until both the ->s_dirty and ->s_io lists are
+ * empty. Since __sync_single_inode() regains inode_lock before it finally moves
+ * inode from superblock lists we are OK.
+ *
+ * If `older_than_this' is non-zero then only flush inodes which have a
+ * flushtime older than *older_than_this.
+ *
* If `bdi' is non-zero then we will scan the first inode against each
* superblock until we find the matching ones. One group will be the dirty
* inodes against a filesystem. Then when we hit the dummy blockdev superblock,
* sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not
* super-efficient but we're about to do a ton of I/O...
*/
-static void
-__writeback_unlocked_inodes(struct backing_dev_info *bdi, int *nr_to_write,
- enum writeback_sync_modes sync_mode,
- unsigned long *older_than_this)
+void
+writeback_inodes(struct writeback_control *wbc)
{
struct super_block *sb;
for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
if (!list_empty(&sb->s_dirty) || !list_empty(&sb->s_io)) {
spin_unlock(&sb_lock);
- sync_sb_inodes(bdi, sb, sync_mode, nr_to_write,
- older_than_this);
+ sync_sb_inodes(sb, wbc);
spin_lock(&sb_lock);
}
- if (nr_to_write && *nr_to_write <= 0)
+ if (wbc->nr_to_write <= 0)
break;
}
spin_unlock(&sb_lock);
spin_unlock(&inode_lock);
}
-/*
- * Start writeback of dirty pagecache data against all unlocked inodes.
- *
- * Note:
- * We don't need to grab a reference to superblock here. If it has non-empty
- * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
- * past sync_inodes_sb() until both the ->s_dirty and ->s_io lists are
- * empty. Since __sync_single_inode() regains inode_lock before it finally moves
- * inode from superblock lists we are OK.
- *
- * If `older_than_this' is non-zero then only flush inodes which have a
- * flushtime older than *older_than_this.
- *
- * This is a "memory cleansing" operation, not a "data integrity" operation.
- */
-void writeback_unlocked_inodes(int *nr_to_write,
- enum writeback_sync_modes sync_mode,
- unsigned long *older_than_this)
-{
- __writeback_unlocked_inodes(NULL, nr_to_write,
- sync_mode, older_than_this);
-}
-/*
- * Perform writeback of dirty data against a particular queue.
- *
- * This is for writer throttling. We don't want processes to write back
- * other process's data, espsecially when the other data belongs to a
- * different spindle.
- */
-void writeback_backing_dev(struct backing_dev_info *bdi, int *nr_to_write,
- enum writeback_sync_modes sync_mode,
- unsigned long *older_than_this)
-{
- __writeback_unlocked_inodes(bdi, nr_to_write,
- sync_mode, older_than_this);
-}
-
/*
* writeback and wait upon the filesystem's dirty inodes. The caller will
* do this in two passes - one to write, and one to wait. WB_SYNC_HOLD is
void sync_inodes_sb(struct super_block *sb, int wait)
{
struct page_state ps;
- int nr_to_write;
+ struct writeback_control wbc = {
+ .bdi = NULL,
+ .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
+ .older_than_this = NULL,
+ .nr_to_write = 0,
+ };
get_page_state(&ps);
- nr_to_write = ps.nr_dirty + ps.nr_dirty / 4;
-
+ wbc.nr_to_write = ps.nr_dirty + ps.nr_dirty / 4;
spin_lock(&inode_lock);
- sync_sb_inodes(NULL, sb, wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
- &nr_to_write, NULL);
+ sync_sb_inodes(sb, &wbc);
spin_unlock(&inode_lock);
}
void write_inode_now(struct inode *inode, int sync)
{
+ struct writeback_control wbc = {
+ .nr_to_write = LONG_MAX,
+ };
+
spin_lock(&inode_lock);
- __writeback_single_inode(inode, sync, NULL);
+ __writeback_single_inode(inode, sync, &wbc);
spin_unlock(&inode_lock);
if (sync)
wait_on_inode(inode);
return block_write_full_page(page, jfs_get_block);
}
-static int jfs_writepages(struct address_space *mapping, int *nr_to_write)
+static int jfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
- return mpage_writepages(mapping, nr_to_write, jfs_get_block);
+ return mpage_writepages(mapping, wbc, jfs_get_block);
}
static int jfs_readpage(struct file *file, struct page *page)
* address space and writepage() all of them.
*
* @mapping: address space structure to write
- * @nr_to_write: subtract the number of written pages from *@nr_to_write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @get_block: the filesystem's block mapper function.
* If this is NULL then use a_ops->writepage. Otherwise, go
* direct-to-BIO.
*/
int
mpage_writepages(struct address_space *mapping,
- int *nr_to_write, get_block_t get_block)
+ struct writeback_control *wbc, get_block_t get_block)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
__set_page_dirty_nobuffers(page);
ret = 0;
}
- if (ret || (nr_to_write && --(*nr_to_write) <= 0))
+ if (ret || (--(wbc->nr_to_write) <= 0))
done = 1;
} else {
unlock_page(page);
*/
struct page;
struct address_space;
+struct writeback_control;
struct address_space_operations {
int (*writepage)(struct page *);
int (*sync_page)(struct page *);
/* Write back some dirty pages from this mapping. */
- int (*writepages)(struct address_space *, int *nr_to_write);
+ int (*writepages)(struct address_space *, struct writeback_control *);
/* Perform a writeback as a memory-freeing operation. */
- int (*vm_writeback)(struct page *, int *nr_to_write);
+ int (*vm_writeback)(struct page *, struct writeback_control *);
/* Set a page dirty */
int (*set_page_dirty)(struct page *page);
extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
extern int generic_file_open(struct inode * inode, struct file * filp);
-extern int generic_vm_writeback(struct page *page, int *nr_to_write);
+extern int generic_vm_writeback(struct page *page,
+ struct writeback_control *wbc);
extern struct file_operations generic_ro_fops;
* nested includes. Get it right in the .c file).
*/
+struct writeback_control;
+
int mpage_readpages(struct address_space *mapping, struct list_head *pages,
unsigned nr_pages, get_block_t get_block);
int mpage_readpage(struct page *page, get_block_t get_block);
int mpage_writepages(struct address_space *mapping,
- int *nr_to_write, get_block_t get_block);
+ struct writeback_control *wbc, get_block_t get_block);
static inline int
-generic_writepages(struct address_space *mapping, int *nr_to_write)
+generic_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
- return mpage_writepages(mapping, nr_to_write, NULL);
+ return mpage_writepages(mapping, wbc, NULL);
}
WB_SYNC_HOLD = 3, /* Hold the inode on sb_dirty for sys_sync() */
};
-void writeback_unlocked_inodes(int *nr_to_write,
- enum writeback_sync_modes sync_mode,
- unsigned long *older_than_this);
+/*
+ * A control structure which tells the writeback code what to do
+ */
+struct writeback_control {
+ struct backing_dev_info *bdi; /* If !NULL, only write back this
+ queue */
+ enum writeback_sync_modes sync_mode;
+ unsigned long *older_than_this; /* If !NULL, only write back inodes
+ older than this */
+ long nr_to_write; /* Write this many pages, and decrement
+ this for each page written */
+};
+
+void writeback_inodes(struct writeback_control *wbc);
void wake_up_inode(struct inode *inode);
void __wait_on_inode(struct inode * inode);
void sync_inodes_sb(struct super_block *, int wait);
void sync_inodes(int wait);
-void writeback_backing_dev(struct backing_dev_info *bdi, int *nr_to_write,
- enum writeback_sync_modes sync_mode,
- unsigned long *older_than_this);
/* writeback.h requires fs.h; it, too, is not included from here. */
static inline void wait_on_inode(struct inode *inode)
void balance_dirty_pages(struct address_space *mapping);
void balance_dirty_pages_ratelimited(struct address_space *mapping);
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
-int do_writepages(struct address_space *mapping, int *nr_to_write);
+int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
int filemap_fdatawrite(struct address_space *mapping)
{
int ret;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = mapping->nrpages * 2,
+ };
current->flags |= PF_SYNC;
- ret = do_writepages(mapping, NULL);
+ ret = do_writepages(mapping, &wbc);
current->flags &= ~PF_SYNC;
return ret;
}
* It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably
* large amounts of I/O are submitted.
*/
-static inline int sync_writeback_pages(void)
+static inline long sync_writeback_pages(void)
{
return ratelimit_pages + ratelimit_pages / 2;
}
bdi = mapping->backing_dev_info;
if (dirty_and_writeback > sync_thresh) {
- int nr_to_write = sync_writeback_pages();
-
- writeback_backing_dev(bdi, &nr_to_write, WB_SYNC_LAST, NULL);
+ struct writeback_control wbc = {
+ .bdi = bdi,
+ .sync_mode = WB_SYNC_LAST,
+ .older_than_this = NULL,
+ .nr_to_write = sync_writeback_pages(),
+ };
+
+ writeback_inodes(&wbc);
get_page_state(&ps);
} else if (dirty_and_writeback > async_thresh) {
- int nr_to_write = sync_writeback_pages();
-
- writeback_backing_dev(bdi, &nr_to_write, WB_SYNC_NONE, NULL);
+ struct writeback_control wbc = {
+ .bdi = bdi,
+ .sync_mode = WB_SYNC_NONE,
+ .older_than_this = NULL,
+ .nr_to_write = sync_writeback_pages(),
+ };
+
+ writeback_inodes(&wbc);
get_page_state(&ps);
}
{
long min_pages = _min_pages;
long background_thresh;
- int nr_to_write;
+ struct writeback_control wbc = {
+ .bdi = NULL,
+ .sync_mode = WB_SYNC_NONE,
+ .older_than_this = NULL,
+ .nr_to_write = 0,
+ };
CHECK_EMERGENCY_SYNC
do {
struct page_state ps;
-
get_page_state(&ps);
if (ps.nr_dirty < background_thresh && min_pages <= 0)
break;
- nr_to_write = MAX_WRITEBACK_PAGES;
- writeback_unlocked_inodes(&nr_to_write, WB_SYNC_NONE, NULL);
- min_pages -= MAX_WRITEBACK_PAGES - nr_to_write;
- } while (nr_to_write <= 0);
+ wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+ writeback_inodes(&wbc);
+ min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+ } while (wbc.nr_to_write <= 0);
blk_run_queues();
}
unsigned long start_jif;
unsigned long next_jif;
struct page_state ps;
- int nr_to_write;
+ struct writeback_control wbc = {
+ .bdi = NULL,
+ .sync_mode = WB_SYNC_NONE,
+ .older_than_this = &oldest_jif,
+ .nr_to_write = 0,
+ };
sync_supers();
get_page_state(&ps);
oldest_jif = jiffies - (dirty_expire_centisecs * HZ) / 100;
start_jif = jiffies;
next_jif = start_jif + (dirty_writeback_centisecs * HZ) / 100;
- nr_to_write = ps.nr_dirty;
- writeback_unlocked_inodes(&nr_to_write, WB_SYNC_NONE, &oldest_jif);
+ wbc.nr_to_write = ps.nr_dirty;
+ writeback_inodes(&wbc);
blk_run_queues();
yield();
* So. The proper fix is to leave the page locked-and-dirty and to pass
* it all the way down.
*/
-int generic_vm_writeback(struct page *page, int *nr_to_write)
+int generic_vm_writeback(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
unlock_page(page);
if (inode) {
- do_writepages(inode->i_mapping, nr_to_write);
+ do_writepages(inode->i_mapping, wbc);
/*
* This iput() will internally call ext2_discard_prealloc(),
}
EXPORT_SYMBOL(generic_vm_writeback);
-int do_writepages(struct address_space *mapping, int *nr_to_write)
+int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
if (mapping->a_ops->writepages)
- return mapping->a_ops->writepages(mapping, nr_to_write);
- return generic_writepages(mapping, nr_to_write);
+ return mapping->a_ops->writepages(mapping, wbc);
+ return generic_writepages(mapping, wbc);
}
/**
* Swap pages are !PageLocked and PageWriteback while under writeout so that
* memory allocators will throttle against them.
*/
-static int swap_vm_writeback(struct page *page, int *nr_to_write)
+static int swap_vm_writeback(struct page *page, struct writeback_control *wbc)
{
struct address_space *mapping = page->mapping;
unlock_page(page);
- return generic_writepages(mapping, nr_to_write);
+ return generic_writepages(mapping, wbc);
}
struct address_space_operations swap_aops = {
*/
if (PageDirty(page) && is_page_cache_freeable(page) &&
mapping && may_enter_fs) {
- int (*writeback)(struct page *, int *);
+ int (*writeback)(struct page *,
+ struct writeback_control *);
const int cluster_size = SWAP_CLUSTER_MAX;
- int nr_to_write = cluster_size;
+ struct writeback_control wbc = {
+ .nr_to_write = cluster_size,
+ };
writeback = mapping->a_ops->vm_writeback;
if (writeback == NULL)
writeback = generic_vm_writeback;
- (*writeback)(page, &nr_to_write);
- *max_scan -= (cluster_size - nr_to_write);
+ (*writeback)(page, &wbc);
+ *max_scan -= (cluster_size - wbc.nr_to_write);
goto keep;
}