}
/*
- * This is the 'heart' of the zoned buddy allocator:
+ * This is the 'heart' of the zoned buddy allocator.
+ *
+ * Herein lies the mysterious "incremental min". That's the
+ *
+ * min += z->pages_low;
+ *
+ * thing. The intent here is to provide additional protection to low zones for
+ * allocation requests which _could_ use higher zones. So a GFP_HIGHMEM
+ * request is not allowed to dip as deeply into the normal zone as a GFP_KERNEL
+ * request. This preserves additional space in those lower zones for requests
+ * which really do need memory from those zones. It means that on a decent
+ * sized machine, GFP_HIGHMEM and GFP_KERNEL requests basically leave the DMA
+ * zone untouched.
*/
struct page *
__alloc_pages(unsigned int gfp_mask, unsigned int order,
struct zonelist *zonelist)
{
+ const int wait = gfp_mask & __GFP_WAIT;
unsigned long min;
struct zone **zones, *classzone;
struct page *page;
int i;
int cold;
- if (gfp_mask & __GFP_WAIT)
+ if (wait)
might_sleep();
cold = 0;
for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i];
- /* the incremental min is allegedly to discourage fallback */
min += z->pages_low;
- if (z->free_pages > min || z->free_pages >= z->pages_high) {
+ if (z->free_pages > min ||
+ (!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
if (page)
return page;
if (gfp_mask & __GFP_HIGH)
local_min >>= 2;
min += local_min;
- if (z->free_pages > min || z->free_pages >= z->pages_high) {
+ if (z->free_pages > min ||
+ (!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
if (page)
return page;
}
/* Atomic allocations - we can't balance anything */
- if (!(gfp_mask & __GFP_WAIT))
+ if (!wait)
goto nopage;
inc_page_state(allocstall);
struct zone *z = zones[i];
min += z->pages_min;
- if (z->free_pages > min || z->free_pages >= z->pages_high) {
+ if (z->free_pages > min ||
+ (!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
if (page)
return page;
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
* request.
+ *
+ * We reclaim from a zone even if that zone is over pages_high. Because:
+ * a) The caller may be trying to free *extra* pages to satisfy a higher-order
+ * allocation or
+ * b) The zones may be over pages_high but they must go *over* pages_high to
+ * satisfy the `incremental min' zone defense algorithm.
+ *
+ * Returns the number of reclaimed pages.
*/
static int
shrink_caches(struct zone *classzone, int priority, int *total_scanned,
- int gfp_mask, const int nr_pages, int order,
- struct page_state *ps)
+ int gfp_mask, const int nr_pages, struct page_state *ps)
{
struct zone *first_classzone;
struct zone *zone;
- int nr_mapped = 0;
int ret = 0;
first_classzone = classzone->zone_pgdat->node_zones;
for (zone = classzone; zone >= first_classzone; zone--) {
+ int to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX);
+ int nr_mapped = 0;
int max_scan;
- int to_reclaim;
-
- to_reclaim = zone->pages_high - zone->free_pages;
- if (order == 0 && to_reclaim < 0)
- continue; /* zone has enough memory */
-
- to_reclaim = min(to_reclaim, SWAP_CLUSTER_MAX);
- to_reclaim = max(to_reclaim, nr_pages);
/*
* If we cannot reclaim `nr_pages' pages by scanning twice
max_scan = to_reclaim * 2;
ret += shrink_zone(zone, max_scan, gfp_mask,
to_reclaim, &nr_mapped, ps, priority);
- *total_scanned += max_scan;
- *total_scanned += nr_mapped;
+ *total_scanned += max_scan + nr_mapped;
if (ret >= nr_pages)
break;
}
get_page_state(&ps);
nr_reclaimed += shrink_caches(classzone, priority,
&total_scanned, gfp_mask,
- nr_pages, order, &ps);
+ nr_pages, &ps);
if (nr_reclaimed >= nr_pages)
return 1;
if (total_scanned == 0)
- return 1; /* All zones had enough free memory */
+ printk("%s: I am buggy\n", __FUNCTION__);
if (!(gfp_mask & __GFP_FS))
break; /* Let the caller handle it */
/*