* Frees a list of pages.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free, or 0 for all on the list.
+ *
+ * If the zone was previously in an "all pages pinned" state then look to
+ * see if this freeing clears that state.
+ *
+ * And clear the zone's pages_scanned counter, to hold off the "all pages are
+ * pinned" detection logic.
*/
static int
free_pages_bulk(struct zone *zone, int count,
base = zone->zone_mem_map;
area = zone->free_area + order;
spin_lock_irqsave(&zone->lock, flags);
+ zone->all_unreclaimable = 0;
+ zone->pages_scanned = 0;
while (!list_empty(list) && count--) {
page = list_entry(list->prev, struct page, list);
/* have to delete it as __free_pages_bulk list manipulates */
}
/* we're somewhat low on memory, failed to find what we needed */
- for (i = 0; zones[i] != NULL; i++) {
- struct zone *z = zones[i];
- if (z->free_pages <= z->pages_low &&
- waitqueue_active(&z->zone_pgdat->kswapd_wait))
- wake_up_interruptible(&z->zone_pgdat->kswapd_wait);
- }
+ for (i = 0; zones[i] != NULL; i++)
+ wakeup_kswapd(zones[i]);
/* Go through the zonelist again, taking __GFP_HIGH into account */
min = 1UL << order;
nr_taken++;
}
zone->nr_inactive -= nr_taken;
+ zone->pages_scanned += nr_taken;
spin_unlock_irq(&zone->lru_lock);
if (nr_taken == 0)
* satisfy the `incremental min' zone defense algorithm.
*
* Returns the number of reclaimed pages.
+ *
+ * If a zone is deemed to be full of pinned pages then just give it a light
+ * scan then give up on it.
*/
static int
shrink_caches(struct zone *classzone, int priority, int *total_scanned,
int nr_mapped = 0;
int max_scan;
+ if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ continue; /* Let kswapd poll it */
+
/*
* If we cannot reclaim `nr_pages' pages by scanning twice
* that many pages then fall back to the next zone.
* special.
*
* Returns the number of pages which were actually freed.
+ *
+ * There is special handling here for zones which are full of pinned pages.
+ * This can happen if the pages are all mlocked, or if they are all used by
+ * device drivers (say, ZONE_DMA). Or if they are all in use by hugetlb.
+ * What we do is to detect the case where all pages in the zone have been
+ * scanned twice and there has been zero successful reclaim. Mark the zone as
+ * dead and from now on, only perform a short scan. Basically we're polling
+ * the zone for when the problem goes away.
*/
static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps)
{
int max_scan;
int to_reclaim;
+ if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ continue;
+
if (nr_pages && to_free > 0) { /* Software suspend */
to_reclaim = min(to_free, SWAP_CLUSTER_MAX*8);
} else { /* Zone balancing */
to_free -= shrink_zone(zone, max_scan, GFP_KSWAPD,
to_reclaim, &nr_mapped, ps, priority);
shrink_slab(max_scan + nr_mapped, GFP_KSWAPD);
+ if (zone->all_unreclaimable)
+ continue;
+ if (zone->pages_scanned > zone->present_pages * 2)
+ zone->all_unreclaimable = 1;
}
if (all_zones_ok)
break;
}
}
+/*
+ * A zone is low on free memory, so wake its kswapd task to service it.
+ */
+void wakeup_kswapd(struct zone *zone)
+{
+ if (zone->free_pages > zone->pages_low)
+ return;
+ if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
+ return;
+ wake_up_interruptible(&zone->zone_pgdat->kswapd_wait);
+}
+
#ifdef CONFIG_SOFTWARE_SUSPEND
/*
* Try to free `nr_pages' of memory, system-wide. Returns the number of freed
static int __init kswapd_init(void)
{
pg_data_t *pgdat;
- printk("Starting kswapd\n");
swap_setup();
for_each_pgdat(pgdat)
kernel_thread(kswapd, pgdat, CLONE_KERNEL);