[PATCH] strengthen the `incremental min' logic in the page

author Andrew Morton <akpm@digeo.com>

Fri, 22 Nov 2002 03:32:24 +0000 (19:32 -0800)

committer Linus Torvalds <torvalds@penguin.transmeta.com>

Fri, 22 Nov 2002 03:32:24 +0000 (19:32 -0800)
author Andrew Morton <akpm@digeo.com>
Fri, 22 Nov 2002 03:32:24 +0000 (19:32 -0800)
committer Linus Torvalds <torvalds@penguin.transmeta.com>
Fri, 22 Nov 2002 03:32:24 +0000 (19:32 -0800)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 03742e19be53d5c87a29372f31a851bec7dd8a08..16b70897ca4224c7e06a8f9ae77a1ef50df36b50 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -411,12 +411,25 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
  }
  
  /*
- * This is the 'heart' of the zoned buddy allocator:
+ * This is the 'heart' of the zoned buddy allocator.
+ *
+ * Herein lies the mysterious "incremental min".  That's the
+ *
+ *     min += z->pages_low;
+ *
+ * thing.  The intent here is to provide additional protection to low zones for
+ * allocation requests which _could_ use higher zones.  So a GFP_HIGHMEM
+ * request is not allowed to dip as deeply into the normal zone as a GFP_KERNEL
+ * request.  This preserves additional space in those lower zones for requests
+ * which really do need memory from those zones.  It means that on a decent
+ * sized machine, GFP_HIGHMEM and GFP_KERNEL requests basically leave the DMA
+ * zone untouched.
   */
  struct page *
  __alloc_pages(unsigned int gfp_mask, unsigned int order,
                 struct zonelist *zonelist)
  {
+       const int wait = gfp_mask & __GFP_WAIT;
         unsigned long min;
         struct zone **zones, *classzone;
         struct page *page;
@@ -424,7 +437,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
         int i;
         int cold;
  
-       if (gfp_mask & __GFP_WAIT)
+       if (wait)
                 might_sleep();
  
         cold = 0;
@@ -441,9 +454,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
         for (i = 0; zones[i] != NULL; i++) {
                 struct zone *z = zones[i];
  
-               /* the incremental min is allegedly to discourage fallback */
                 min += z->pages_low;
-               if (z->free_pages > min || z->free_pages >= z->pages_high) {
+               if (z->free_pages > min ||
+                               (!wait && z->free_pages >= z->pages_high)) {
                         page = buffered_rmqueue(z, order, cold);
                         if (page)
                                 return page;
@@ -468,7 +481,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
                 if (gfp_mask & __GFP_HIGH)
                         local_min >>= 2;
                 min += local_min;
-               if (z->free_pages > min || z->free_pages >= z->pages_high) {
+               if (z->free_pages > min ||
+                               (!wait && z->free_pages >= z->pages_high)) {
                         page = buffered_rmqueue(z, order, cold);
                         if (page)
                                 return page;
@@ -490,7 +504,7 @@ rebalance:
         }
  
         /* Atomic allocations - we can't balance anything */
-       if (!(gfp_mask & __GFP_WAIT))
+       if (!wait)
                 goto nopage;
  
         inc_page_state(allocstall);
@@ -505,7 +519,8 @@ rebalance:
                 struct zone *z = zones[i];
  
                 min += z->pages_min;
-               if (z->free_pages > min || z->free_pages >= z->pages_high) {
+               if (z->free_pages > min ||
+                               (!wait && z->free_pages >= z->pages_high)) {
                         page = buffered_rmqueue(z, order, cold);
                         if (page)
                                 return page;
diff --git a/mm/vmscan.c b/mm/vmscan.c

index ae4a0ae1ab3cf0ead36c8ceecc083a69d86fc3d8..42754033157bf3c6457eacbae29c1dd920256ebc 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -712,28 +712,28 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
   * This is the direct reclaim path, for page-allocating processes.  We only
   * try to reclaim pages from zones which will satisfy the caller's allocation
   * request.
+ *
+ * We reclaim from a zone even if that zone is over pages_high.  Because:
+ * a) The caller may be trying to free *extra* pages to satisfy a higher-order
+ *    allocation or
+ * b) The zones may be over pages_high but they must go *over* pages_high to
+ *    satisfy the `incremental min' zone defense algorithm.
+ *
+ * Returns the number of reclaimed pages.
   */
  static int
  shrink_caches(struct zone *classzone, int priority, int *total_scanned,
-               int gfp_mask, const int nr_pages, int order,
-               struct page_state *ps)
+               int gfp_mask, const int nr_pages, struct page_state *ps)
  {
         struct zone *first_classzone;
         struct zone *zone;
-       int nr_mapped = 0;
         int ret = 0;
  
         first_classzone = classzone->zone_pgdat->node_zones;
         for (zone = classzone; zone >= first_classzone; zone--) {
+               int to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX);
+               int nr_mapped = 0;
                 int max_scan;
-               int to_reclaim;
-
-               to_reclaim = zone->pages_high - zone->free_pages;
-               if (order == 0 && to_reclaim < 0)
-                       continue;       /* zone has enough memory */
-
-               to_reclaim = min(to_reclaim, SWAP_CLUSTER_MAX);
-               to_reclaim = max(to_reclaim, nr_pages);
  
                 /*
                  * If we cannot reclaim `nr_pages' pages by scanning twice
@@ -744,8 +744,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned,
                         max_scan = to_reclaim * 2;
                 ret += shrink_zone(zone, max_scan, gfp_mask,
                                 to_reclaim, &nr_mapped, ps, priority);
-               *total_scanned += max_scan;
-               *total_scanned += nr_mapped;
+               *total_scanned += max_scan + nr_mapped;
                 if (ret >= nr_pages)
                         break;
         }
@@ -786,11 +785,11 @@ try_to_free_pages(struct zone *classzone,
                 get_page_state(&ps);
                 nr_reclaimed += shrink_caches(classzone, priority,
                                         &total_scanned, gfp_mask,
-                                       nr_pages, order, &ps);
+                                       nr_pages, &ps);
                 if (nr_reclaimed >= nr_pages)
                         return 1;
                 if (total_scanned == 0)
-                       return 1;       /* All zones had enough free memory */
+                       printk("%s: I am buggy\n", __FUNCTION__);
                 if (!(gfp_mask & __GFP_FS))
                         break;          /* Let the caller handle it */
                 /*
author	Andrew Morton <akpm@digeo.com>
	Fri, 22 Nov 2002 03:32:24 +0000 (19:32 -0800)
committer	Linus Torvalds <torvalds@penguin.transmeta.com>
	Fri, 22 Nov 2002 03:32:24 +0000 (19:32 -0800)
mm/page_alloc.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history