From 9ae30597b1a4b2ef9b79ccf891d1a62814090962 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 2 May 2002 02:04:12 -0700 Subject: [PATCH] [PATCH] VM dirty page balancing - The balance_dirty_pages() logic is simply wrong. It goes: if (value > threshold) go_and_write(value - threshold); which is just fine for a single process writing data. But for many processes, they *all* go and bring things back into balance, and too much data gets written out. - The go_and_write(this much) logic is inoperative, because I turned off the ->writeback_mapping() function in ext2. So a call to writeback_unlocked_inodes(this_much) doesn't actually decrement and test *this_much. It will walk every inode, all the time. Silly. So quickly fixing the above things, the amount of dirty+writeback memory in the machine nicely stabilises at 500 megabytes across the run. --- fs/ext2/inode.c | 2 + fs/proc/proc_misc.c | 4 +- include/linux/page-flags.h | 85 ++++++++++++++++++++------------------ mm/filemap.c | 2 +- mm/page-writeback.c | 28 +++++++------ mm/page_alloc.c | 4 +- 6 files changed, 66 insertions(+), 59 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index a5553473b7a0..7200da15a9bf 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -591,6 +591,8 @@ struct address_space_operations ext2_aops = { commit_write: generic_commit_write, bmap: ext2_bmap, direct_IO: ext2_direct_IO, + writeback_mapping: generic_writeback_mapping, + vm_writeback: generic_vm_writeback, }; /* diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 27c283f0e27e..4127b50c5e5d 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -159,7 +159,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "SwapTotal: %8lu kB\n" "SwapFree: %8lu kB\n" "Dirty: %8lu kB\n" - "Locked: %8lu kB\n", + "Writeback: %8lu kB\n", K(i.totalram), K(i.freeram), K(i.sharedram), @@ -175,7 +175,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(i.totalswap), K(i.freeswap), K(ps.nr_dirty), - K(ps.nr_locked) + K(ps.nr_writeback) ); return proc_calc_metrics(page, start, off, count, eof, len); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a5431a8bf42c..943e87c09c54 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -13,7 +13,7 @@ * * The PG_private bitflag is set if page->private contains a valid value. * - * During disk I/O, PG_locked_dontuse is used. This bit is set before I/O and + * During disk I/O, PG_locked is used. This bit is set before I/O and * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks * waiting for the I/O on this page to complete. * @@ -28,7 +28,7 @@ * * Note that the referenced bit, the page->lru list_head and the active, * inactive_dirty and inactive_clean lists are protected by the - * pagemap_lru_lock, and *NOT* by the usual PG_locked_dontuse bit! + * pagemap_lru_lock, and *NOT* by the usual PG_locked bit! * * PG_error is set to indicate that an I/O error occurred on this page. * @@ -47,7 +47,7 @@ * locked- and dirty-page accounting. The top eight bits of page->flags are * used for page->zone, so putting flag bits there doesn't work. */ -#define PG_locked_dontuse 0 /* Page is locked. Don't touch. */ +#define PG_locked 0 /* Page is locked. Don't touch. */ #define PG_error 1 #define PG_referenced 2 #define PG_uptodate 3 @@ -71,7 +71,7 @@ */ extern struct page_state { unsigned long nr_dirty; - unsigned long nr_locked; + unsigned long nr_writeback; unsigned long nr_pagecache; } ____cacheline_aligned_in_smp page_states[NR_CPUS]; @@ -91,37 +91,16 @@ extern void get_page_state(struct page_state *ret); /* * Manipulation of page state flags */ -#define PageLocked(page) test_bit(PG_locked_dontuse, &(page)->flags) -#define SetPageLocked(page) \ - do { \ - if (!test_and_set_bit(PG_locked_dontuse, \ - &(page)->flags)) \ - inc_page_state(nr_locked); \ - } while (0) -#define TestSetPageLocked(page) \ - ({ \ - int ret; \ - ret = test_and_set_bit(PG_locked_dontuse, \ - &(page)->flags); \ - if (!ret) \ - inc_page_state(nr_locked); \ - ret; \ - }) -#define ClearPageLocked(page) \ - do { \ - if (test_and_clear_bit(PG_locked_dontuse, \ - &(page)->flags)) \ - dec_page_state(nr_locked); \ - } while (0) -#define TestClearPageLocked(page) \ - ({ \ - int ret; \ - ret = test_and_clear_bit(PG_locked_dontuse, \ - &(page)->flags); \ - if (ret) \ - dec_page_state(nr_locked); \ - ret; \ - }) +#define PageLocked(page) \ + test_bit(PG_locked, &(page)->flags) +#define SetPageLocked(page) \ + set_bit(PG_locked, &(page)->flags) +#define TestSetPageLocked(page) \ + test_and_set_bit(PG_locked, &(page)->flags) +#define ClearPageLocked(page) \ + clear_bit(PG_locked, &(page)->flags) +#define TestClearPageLocked(page) \ + test_and_clear_bit(PG_locked, &(page)->flags) #define PageError(page) test_bit(PG_error, &(page)->flags) #define SetPageError(page) set_bit(PG_error, &(page)->flags) @@ -201,12 +180,36 @@ extern void get_page_state(struct page_state *ret); #define PagePrivate(page) test_bit(PG_private, &(page)->flags) #define PageWriteback(page) test_bit(PG_writeback, &(page)->flags) -#define SetPageWriteback(page) set_bit(PG_writeback, &(page)->flags) -#define ClearPageWriteback(page) clear_bit(PG_writeback, &(page)->flags) -#define TestSetPageWriteback(page) \ - test_and_set_bit(PG_writeback, &(page)->flags) -#define TestClearPageWriteback(page) \ - test_and_clear_bit(PG_writeback, &(page)->flags) +#define SetPageWriteback(page) \ + do { \ + if (!test_and_set_bit(PG_writeback, \ + &(page)->flags)) \ + inc_page_state(nr_writeback); \ + } while (0) +#define TestSetPageWriteback(page) \ + ({ \ + int ret; \ + ret = test_and_set_bit(PG_writeback, \ + &(page)->flags); \ + if (!ret) \ + inc_page_state(nr_writeback); \ + ret; \ + }) +#define ClearPageWriteback(page) \ + do { \ + if (test_and_clear_bit(PG_writeback, \ + &(page)->flags)) \ + dec_page_state(nr_writeback); \ + } while (0) +#define TestClearPageWriteback(page) \ + ({ \ + int ret; \ + ret = test_and_clear_bit(PG_writeback, \ + &(page)->flags); \ + if (ret) \ + dec_page_state(nr_writeback); \ + ret; \ + }) /* * The PageSwapCache predicate doesn't use a PG_flag at this time, diff --git a/mm/filemap.c b/mm/filemap.c index c4689a9da3b3..67162e75ced7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -628,7 +628,7 @@ static void wait_on_page_bit(struct page *page, int bit_nr) */ void ___wait_on_page_locked(struct page *page) { - wait_on_page_bit(page, PG_locked_dontuse); + wait_on_page_bit(page, PG_locked); } EXPORT_SYMBOL(___wait_on_page_locked); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 9f0a544d699e..802af920b9ac 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -29,12 +29,12 @@ /* * Start background writeback (via pdflush) at this level */ -static int dirty_background_ratio = 30; +static int dirty_background_ratio = 40; /* * The generator of dirty data starts async writeback at this level */ -static int dirty_async_ratio = 45; +static int dirty_async_ratio = 50; /* * The generator of dirty data performs sync writeout at this level @@ -62,25 +62,28 @@ void balance_dirty_pages(struct address_space *mapping) int async_thresh; int sync_thresh; int wake_pdflush = 0; - unsigned long dirty_and_locked; + unsigned long dirty_and_writeback; get_page_state(&ps); - dirty_and_locked = ps.nr_dirty + ps.nr_locked; + dirty_and_writeback = ps.nr_dirty + ps.nr_writeback; background_thresh = (dirty_background_ratio * tot) / 100; async_thresh = (dirty_async_ratio * tot) / 100; sync_thresh = (dirty_sync_ratio * tot) / 100; - if (dirty_and_locked > sync_thresh) { - int nr_to_write = dirty_and_locked - async_thresh; + if (dirty_and_writeback > sync_thresh) { + int nr_to_write = 1500; + printk("sync thresh\n"); writeback_unlocked_inodes(&nr_to_write, WB_SYNC_LAST, NULL); + get_page_state(&ps); + dirty_and_writeback = ps.nr_dirty + ps.nr_writeback; wake_pdflush = 1; - } else if (dirty_and_locked > async_thresh) { - int nr_to_write = dirty_and_locked - async_thresh; + } else if (dirty_and_writeback > async_thresh) { + int nr_to_write = 1500; writeback_unlocked_inodes(&nr_to_write, WB_SYNC_NONE, NULL); - } else if (dirty_and_locked > background_thresh) { + } else if (dirty_and_writeback > background_thresh) { wake_pdflush = 1; } @@ -88,9 +91,8 @@ void balance_dirty_pages(struct address_space *mapping) /* * There is no flush thread against this device. Start one now. */ - get_page_state(&ps); - if (ps.nr_dirty > 0) { - pdflush_flush(ps.nr_dirty); + if (dirty_and_writeback > async_thresh) { + pdflush_flush(dirty_and_writeback - async_thresh); yield(); } } @@ -109,7 +111,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) preempt_disable(); cpu = smp_processor_id(); - if (ratelimits[cpu].count++ >= 32) { + if (ratelimits[cpu].count++ >= 1000) { ratelimits[cpu].count = 0; preempt_enable(); balance_dirty_pages(mapping); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a377932a4249..b0a264628f69 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -584,7 +584,7 @@ void get_page_state(struct page_state *ret) int pcpu; ret->nr_dirty = 0; - ret->nr_locked = 0; + ret->nr_writeback = 0; ret->nr_pagecache = 0; for (pcpu = 0; pcpu < smp_num_cpus; pcpu++) { @@ -592,7 +592,7 @@ void get_page_state(struct page_state *ret) ps = &page_states[cpu_logical_map(pcpu)]; ret->nr_dirty += ps->nr_dirty; - ret->nr_locked += ps->nr_locked; + ret->nr_writeback += ps->nr_writeback; ret->nr_pagecache += ps->nr_pagecache; } } -- 2.39.5