char *name;
#ifdef CONFIG_PPC64
#ifdef CONFIG_PM
- void (*iommu_save)(void);
void (*iommu_restore)(void);
#endif
- #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+ #ifdef CONFIG_MEMORY_HOTPLUG
unsigned long (*memory_block_size)(void);
#endif
#endif /* CONFIG_PPC64 */
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_32BIT_OFF_T if X86_32
select ARCH_CLOCKSOURCE_INIT
+ select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION
- select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM)
+ select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64
select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if (PGTABLE_LEVELS > 2) && (X86_64 || X86_PAE)
select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
#include <linux/device.h>
#include <linux/errno.h>
+ #include <linux/slab.h>
#include <linux/fsi-occ.h>
+#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
#include "common.h"
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page, int extra_count);
+void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
+void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
+int folio_migrate_mapping(struct address_space *mapping,
+ struct folio *newfolio, struct folio *folio, int extra_count);
+
+ extern bool numa_demotion_enabled;
#else
static inline void putback_movable_pages(struct list_head *l) {}
void put_pages_list(struct list_head *pages);
void split_page(struct page *page, unsigned int order);
-void copy_huge_page(struct page *dst, struct page *src);
+void folio_copy(struct folio *dst, struct folio *src);
+ unsigned long nr_free_buffer_pages(void);
+
/*
* Compound pages have a destructor function. Provide a
* prototype for that function and accessor functions.
static int open_brace[XBC_DEPTH_MAX] __initdata;
static int brace_index __initdata;
- memblock_free_ptr(addr, size);
+#ifdef __KERNEL__
+static inline void * __init xbc_alloc_mem(size_t size)
+{
+ return memblock_alloc(size, SMP_CACHE_BYTES);
+}
+
+static inline void __init xbc_free_mem(void *addr, size_t size)
+{
++ memblock_free(addr, size);
+}
+
+#else /* !__KERNEL__ */
+
+static inline void *xbc_alloc_mem(size_t size)
+{
+ return malloc(size);
+}
+
+static inline void xbc_free_mem(void *addr, size_t size)
+{
+ free(addr);
+}
+#endif
+/**
+ * xbc_get_info() - Get the information of loaded boot config
+ * @node_size: A pointer to store the number of nodes.
+ * @data_size: A pointer to store the size of bootconfig data.
+ *
+ * Get the number of used nodes in @node_size if it is not NULL,
+ * and the size of bootconfig data in @data_size if it is not NULL.
+ * Return 0 if the boot config is initialized, or return -ENODEV.
+ */
+int __init xbc_get_info(int *node_size, size_t *data_size)
+{
+ if (!xbc_data)
+ return -ENODEV;
+
+ if (node_size)
+ *node_size = xbc_node_num;
+ if (data_size)
+ *data_size = xbc_data_size;
+ return 0;
+}
+
static int __init xbc_parse_error(const char *msg, const char *p)
{
xbc_err_msg = msg;
BUG();
smp_mb__after_atomic();
- wake_up_page(page, PG_writeback);
- acct_reclaim_writeback(page);
- put_page(page);
+ folio_wake(folio, PG_writeback);
++ acct_reclaim_writeback(folio);
+ folio_put(folio);
}
-EXPORT_SYMBOL(end_page_writeback);
+EXPORT_SYMBOL(folio_end_writeback);
/*
* After completing I/O on a page, call this routine to update the page
void page_writeback_init(void);
-void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page,
+static inline void *folio_raw_mapping(struct folio *folio)
+{
+ unsigned long mapping = (unsigned long)folio->mapping;
+
+ return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
+}
+
++void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
+ int nr_throttled);
-static inline void acct_reclaim_writeback(struct page *page)
++static inline void acct_reclaim_writeback(struct folio *folio)
+ {
- pg_data_t *pgdat = page_pgdat(page);
++ pg_data_t *pgdat = folio_pgdat(folio);
+ int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled);
+
+ if (nr_throttled)
- __acct_reclaim_writeback(pgdat, page, nr_throttled);
++ __acct_reclaim_writeback(pgdat, folio, nr_throttled);
+ }
+
+ static inline void wake_throttle_isolated(pg_data_t *pgdat)
+ {
+ wait_queue_head_t *wqh;
+
+ wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED];
+ if (waitqueue_active(wqh))
+ wake_up(wqh);
+ }
+
vm_fault_t do_swap_page(struct vm_fault *vmf);
+void folio_rotate_reclaimable(struct folio *folio);
+bool __folio_end_writeback(struct folio *folio);
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
if (do_memsw_account())
page_counter_uncharge(&memcg->memsw, nr_pages);
}
- #endif
-static void commit_charge(struct page *page, struct mem_cgroup *memcg)
+static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
{
- VM_BUG_ON_PAGE(page_memcg(page), page);
+ VM_BUG_ON_FOLIO(folio_memcg(folio), folio);
/*
* Any of the following ensures page's memcg stability:
*
}
EXPORT_SYMBOL(alloc_pages);
+struct folio *folio_alloc(gfp_t gfp, unsigned order)
+{
+ struct page *page = alloc_pages(gfp | __GFP_COMP, order);
+
+ if (page && order > 1)
+ prep_transhuge_page(page);
+ return (struct folio *)page;
+}
+EXPORT_SYMBOL(folio_alloc);
+
+ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
+ struct mempolicy *pol, unsigned long nr_pages,
+ struct page **page_array)
+ {
+ int nodes;
+ unsigned long nr_pages_per_node;
+ int delta;
+ int i;
+ unsigned long nr_allocated;
+ unsigned long total_allocated = 0;
+
+ nodes = nodes_weight(pol->nodes);
+ nr_pages_per_node = nr_pages / nodes;
+ delta = nr_pages - nodes * nr_pages_per_node;
+
+ for (i = 0; i < nodes; i++) {
+ if (delta) {
+ nr_allocated = __alloc_pages_bulk(gfp,
+ interleave_nodes(pol), NULL,
+ nr_pages_per_node + 1, NULL,
+ page_array);
+ delta--;
+ } else {
+ nr_allocated = __alloc_pages_bulk(gfp,
+ interleave_nodes(pol), NULL,
+ nr_pages_per_node, NULL, page_array);
+ }
+
+ page_array += nr_allocated;
+ total_allocated += nr_allocated;
+ }
+
+ return total_allocated;
+ }
+
+ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
+ struct mempolicy *pol, unsigned long nr_pages,
+ struct page **page_array)
+ {
+ gfp_t preferred_gfp;
+ unsigned long nr_allocated = 0;
+
+ preferred_gfp = gfp | __GFP_NOWARN;
+ preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+
+ nr_allocated = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes,
+ nr_pages, NULL, page_array);
+
+ if (nr_allocated < nr_pages)
+ nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL,
+ nr_pages - nr_allocated, NULL,
+ page_array + nr_allocated);
+ return nr_allocated;
+ }
+
+ /* alloc pages bulk and mempolicy should be considered at the
+ * same time in some situation such as vmalloc.
+ *
+ * It can accelerate memory allocation especially interleaving
+ * allocate memory.
+ */
+ unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+ unsigned long nr_pages, struct page **page_array)
+ {
+ struct mempolicy *pol = &default_policy;
+
+ if (!in_interrupt() && !(gfp & __GFP_THISNODE))
+ pol = get_task_policy(current);
+
+ if (pol->mode == MPOL_INTERLEAVE)
+ return alloc_pages_bulk_array_interleave(gfp, pol,
+ nr_pages, page_array);
+
+ if (pol->mode == MPOL_PREFERRED_MANY)
+ return alloc_pages_bulk_array_preferred_many(gfp,
+ numa_node_id(), pol, nr_pages, page_array);
+
+ return __alloc_pages_bulk(gfp, policy_node(gfp, pol, numa_node_id()),
+ policy_nodemask(gfp, pol), nr_pages, NULL,
+ page_array);
+ }
+
int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
{
struct mempolicy *pol = mpol_dup(vma_policy(src));
unlock_page(page);
}
-void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page,
+ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
+ {
+ wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
+ long timeout, ret;
+ DEFINE_WAIT(wait);
+
+ /*
+ * Do not throttle IO workers, kthreads other than kswapd or
+ * workqueues. They may be required for reclaim to make
+ * forward progress (e.g. journalling workqueues or kthreads).
+ */
+ if (!current_is_kswapd() &&
+ current->flags & (PF_IO_WORKER|PF_KTHREAD))
+ return;
+
+ /*
+ * These figures are pulled out of thin air.
+ * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many
+ * parallel reclaimers which is a short-lived event so the timeout is
+ * short. Failing to make progress or waiting on writeback are
+ * potentially long-lived events so use a longer timeout. This is shaky
+ * logic as a failure to make progress could be due to anything from
+ * writeback to a slow device to excessive references pages at the tail
+ * of the inactive LRU.
+ */
+ switch(reason) {
+ case VMSCAN_THROTTLE_WRITEBACK:
+ timeout = HZ/10;
+
+ if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
+ WRITE_ONCE(pgdat->nr_reclaim_start,
+ node_page_state(pgdat, NR_THROTTLED_WRITTEN));
+ }
+
+ break;
+ case VMSCAN_THROTTLE_NOPROGRESS:
+ timeout = HZ/2;
+ break;
+ case VMSCAN_THROTTLE_ISOLATED:
+ timeout = HZ/50;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ timeout = HZ;
+ break;
+ }
+
+ prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+ ret = schedule_timeout(timeout);
+ finish_wait(wqh, &wait);
+
+ if (reason == VMSCAN_THROTTLE_WRITEBACK)
+ atomic_dec(&pgdat->nr_writeback_throttled);
+
+ trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout),
+ jiffies_to_usecs(timeout - ret),
+ reason);
+ }
+
+ /*
+ * Account for pages written if tasks are throttled waiting on dirty
+ * pages to clean. If enough pages have been cleaned since throttling
+ * started then wakeup the throttled tasks.
+ */
- inc_node_page_state(page, NR_THROTTLED_WRITTEN);
++void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
+ int nr_throttled)
+ {
+ unsigned long nr_written;
+
++ node_stat_add_folio(folio, NR_THROTTLED_WRITTEN);
+
+ /*
+ * This is an inaccurate read as the per-cpu deltas may not
+ * be synchronised. However, given that the system is
+ * writeback throttled, it is not worth taking the penalty
+ * of getting an accurate count. At worst, the throttle
+ * timeout guarantees forward progress.
+ */
+ nr_written = node_page_state(pgdat, NR_THROTTLED_WRITTEN) -
+ READ_ONCE(pgdat->nr_reclaim_start);
+
+ if (nr_written > SWAP_CLUSTER_MAX * nr_throttled)
+ wake_up(&pgdat->reclaim_wait[VMSCAN_THROTTLE_WRITEBACK]);
+ }
+
/* possible outcome of pageout() */
typedef enum {
/* failed to write page out, page is locked */