As the page free path makes no distinction between cache hot and cold
pages, there is no real useful ordering of pages in the free list that
allocation requests can take advantage of. Juding from the users of
__GFP_COLD, it is likely that a number of them are the result of copying
other sites instead of actually measuring the impact. Remove the
__GFP_COLD parameter which simplifies a number of paths in the page
allocator.
This is potentially controversial but bear in mind that the size of the
per-cpu pagelists versus modern cache sizes means that the whole per-cpu
list can often fit in the L3 cache. Hence, there is only a potential
benefit for microbenchmarks that alloc/free pages in a tight loop. It's
even worse when THP is taken into account which has little or no chance
of getting a cache-hot page as the per-cpu list is bypassed and the
zeroing of multiple pages will thrash the cache anyway.
The truncate microbenchmarks are not shown as this patch affects the
allocation path and not the free path. A page fault microbenchmark was
tested but it showed no sigificant difference which is not surprising
given that the __GFP_COLD branches are a miniscule percentage of the
fault path.
Link: http://lkml.kernel.org/r/20171018075952.10627-9-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
rc = ena_alloc_rx_page(rx_ring, rx_info,
- __GFP_COLD | GFP_ATOMIC | __GFP_COMP);
+ GFP_ATOMIC | __GFP_COMP);
if (unlikely(rc < 0)) {
netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
"failed to alloc buffer for rx queue %d\n",
order = alloc_order;
/* Try to obtain pages, decreasing order if necessary */
- gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
+ gfp = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
while (order >= 0) {
pages = alloc_pages_node(node, gfp, order);
if (pages)
buff->flags = 0U;
buff->len = AQ_CFG_RX_FRAME_MAX;
- buff->page = alloc_pages(GFP_ATOMIC | __GFP_COLD |
- __GFP_COMP, pages_order);
+ buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order);
if (!buff->page) {
err = -ENOMEM;
goto err_exit;
struct sk_buff *skb;
struct octeon_skb_page_info *skb_pg_info;
- page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ page = alloc_page(GFP_ATOMIC);
if (unlikely(!page))
return NULL;
if (mlx4_en_prepare_rx_desc(priv, ring,
ring->actual_size,
- GFP_KERNEL | __GFP_COLD)) {
+ GFP_KERNEL)) {
if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
en_err(priv, "Failed to allocate enough rx buffers\n");
return -ENOMEM;
do {
if (mlx4_en_prepare_rx_desc(priv, ring,
ring->prod & ring->size_mask,
- GFP_ATOMIC | __GFP_COLD |
- __GFP_MEMALLOC))
+ GFP_ATOMIC | __GFP_MEMALLOC))
break;
ring->prod++;
} while (likely(--missing));
} else {
struct page *page;
- page = alloc_page(GFP_KERNEL | __GFP_COLD);
+ page = alloc_page(GFP_KERNEL);
frag = page ? page_address(page) : NULL;
}
if (!frag) {
} else {
struct page *page;
- page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ page = alloc_page(GFP_ATOMIC);
frag = page ? page_address(page) : NULL;
}
if (!frag) {
{
if (!rx_ring->pg_chunk.page) {
u64 map;
- rx_ring->pg_chunk.page = alloc_pages(__GFP_COLD | __GFP_COMP |
- GFP_ATOMIC,
+ rx_ring->pg_chunk.page = alloc_pages(__GFP_COMP | GFP_ATOMIC,
qdev->lbq_buf_order);
if (unlikely(!rx_ring->pg_chunk.page)) {
netif_err(qdev, drv, qdev->ndev,
do {
page = ef4_reuse_page(rx_queue);
if (page == NULL) {
- page = alloc_pages(__GFP_COLD | __GFP_COMP |
+ page = alloc_pages(__GFP_COMP |
(atomic ? GFP_ATOMIC : GFP_KERNEL),
efx->rx_buffer_order);
if (unlikely(page == NULL))
do {
page = efx_reuse_page(rx_queue);
if (page == NULL) {
- page = alloc_pages(__GFP_COLD | __GFP_COMP |
+ page = alloc_pages(__GFP_COMP |
(atomic ? GFP_ATOMIC : GFP_KERNEL),
efx->rx_buffer_order);
if (unlikely(page == NULL))
dma_addr_t pages_dma;
/* Try to obtain pages, decreasing order if necessary */
- gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
+ gfp |= __GFP_COMP | __GFP_NOWARN;
while (order >= 0) {
pages = alloc_pages(gfp, order);
if (pages)
sw_data[0] = (u32)bufptr;
} else {
/* Allocate a secondary receive queue entry */
- page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
+ page = alloc_page(GFP_ATOMIC | GFP_DMA);
if (unlikely(!page)) {
dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n");
goto fail;
int err;
bool oom;
- gfp |= __GFP_COLD;
do {
if (vi->mergeable_rx_bufs)
err = add_recvbuf_mergeable(vi, rq, gfp);
}
for (npages = 1; npages < max_pages; npages++) {
- page = page_cache_alloc_cold(inode->i_mapping);
+ page = page_cache_alloc(inode->i_mapping);
if (!page)
break;
page_pool[npages] = page;
goto backing_page_already_present;
if (!newpage) {
- newpage = __page_cache_alloc(cachefiles_gfp |
- __GFP_COLD);
+ newpage = __page_cache_alloc(cachefiles_gfp);
if (!newpage)
goto nomem_monitor;
}
goto backing_page_already_present;
if (!newpage) {
- newpage = __page_cache_alloc(cachefiles_gfp |
- __GFP_COLD);
+ newpage = __page_cache_alloc(cachefiles_gfp);
if (!newpage)
goto nomem;
}
#define ___GFP_HIGH 0x20u
#define ___GFP_IO 0x40u
#define ___GFP_FS 0x80u
-#define ___GFP_COLD 0x100u
#define ___GFP_NOWARN 0x200u
#define ___GFP_RETRY_MAYFAIL 0x400u
#define ___GFP_NOFAIL 0x800u
/*
* Action modifiers
*
- * __GFP_COLD indicates that the caller does not expect to be used in the near
- * future. Where possible, a cache-cold page will be returned.
- *
* __GFP_NOWARN suppresses allocation failure reports.
*
* __GFP_COMP address compound page metadata.
*
* __GFP_ZERO returns a zeroed page on success.
*/
-#define __GFP_COLD ((__force gfp_t)___GFP_COLD)
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
return __page_cache_alloc(mapping_gfp_mask(x));
}
-static inline struct page *page_cache_alloc_cold(struct address_space *x)
-{
- return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
-}
-
static inline gfp_t readahead_gfp_mask(struct address_space *x)
{
- return mapping_gfp_mask(x) |
- __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN;
+ return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
}
typedef int filler_t(void *, struct page *);
* 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
* code in gfp_to_alloc_flags that should be enforcing this.
*/
- gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC;
+ gfp_mask |= __GFP_COMP | __GFP_MEMALLOC;
return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
}
* Also it is possible to set different flags by OR'ing
* in one or more of the following additional @flags:
*
- * %__GFP_COLD - Request cache-cold pages instead of
- * trying to return cache-warm pages.
- *
* %__GFP_HIGH - This allocation has high priority and may use emergency pools.
*
* %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail
{(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \
{(unsigned long)__GFP_IO, "__GFP_IO"}, \
{(unsigned long)__GFP_FS, "__GFP_FS"}, \
- {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \
{(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \
{(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \
{(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \
*/
static inline int get_highmem_buffer(int safe_needed)
{
- buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
+ buffer = get_image_page(GFP_ATOMIC, safe_needed);
return buffer ? 0 : -ENOMEM;
}
while (nr_pages-- > 0) {
struct page *page;
- page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
+ page = alloc_image_page(GFP_ATOMIC);
if (!page)
goto err_out;
memory_bm_set_bit(copy_bm, page_to_pfn(page));
* Ok, it wasn't cached, so we need to create a new
* page..
*/
- page = page_cache_alloc_cold(mapping);
+ page = page_cache_alloc(mapping);
if (!page) {
error = -ENOMEM;
goto out;
int ret;
do {
- page = __page_cache_alloc(gfp_mask|__GFP_COLD);
+ page = __page_cache_alloc(gfp_mask);
if (!page)
return -ENOMEM;
repeat:
page = find_get_page(mapping, index);
if (!page) {
- page = __page_cache_alloc(gfp | __GFP_COLD);
+ page = __page_cache_alloc(gfp);
if (!page)
return ERR_PTR(-ENOMEM);
err = add_to_page_cache_lru(page, mapping, index, gfp);
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
- int migratetype, bool cold)
+ int migratetype)
{
int i, alloced = 0;
* merge IO requests if the physical pages are ordered
* properly.
*/
- if (likely(!cold))
- list_add(&page->lru, list);
- else
- list_add_tail(&page->lru, list);
+ list_add(&page->lru, list);
list = &page->lru;
alloced++;
if (is_migrate_cma(get_pcppage_migratetype(page)))
/* Remove page from the per-cpu list, caller must protect the list */
static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
- bool cold, struct per_cpu_pages *pcp,
+ struct per_cpu_pages *pcp,
struct list_head *list)
{
struct page *page;
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
- migratetype, cold);
+ migratetype);
if (unlikely(list_empty(list)))
return NULL;
}
- if (cold)
- page = list_last_entry(list, struct page, lru);
- else
- page = list_first_entry(list, struct page, lru);
-
+ page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
pcp->count--;
} while (check_new_pcp(page));
{
struct per_cpu_pages *pcp;
struct list_head *list;
- bool cold = ((gfp_flags & __GFP_COLD) != 0);
struct page *page;
unsigned long flags;
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
- page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
+ page = __rmqueue_pcplist(zone, migratetype, pcp, list);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
struct page **pages, int page_start, int page_end)
{
- const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
+ const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
unsigned int cpu, tcpu;
int i;
*/
void *netdev_alloc_frag(unsigned int fragsz)
{
- return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+ return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(netdev_alloc_frag);
void *napi_alloc_frag(unsigned int fragsz)
{
- return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+ return __napi_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(napi_alloc_frag);
{ "__GFP_ATOMIC", "_A" },
{ "__GFP_IO", "I" },
{ "__GFP_FS", "F" },
- { "__GFP_COLD", "CO" },
{ "__GFP_NOWARN", "NWR" },
{ "__GFP_RETRY_MAYFAIL", "R" },
{ "__GFP_NOFAIL", "NF" },