bool (*release_folio)(struct folio *, gfp_t);
void (*free_folio)(struct folio *);
int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
- bool (*isolate_page) (struct page *, isolate_mode_t);
int (*migratepage)(struct address_space *, struct page *, struct page *);
- void (*putback_page) (struct page *);
int (*launder_folio)(struct folio *);
bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
int (*error_remove_page)(struct address_space *, struct page *);
release_folio: yes
free_folio: yes
direct_IO:
-isolate_page: yes
migratepage: yes (both)
-putback_page: yes
launder_folio: yes
is_partially_uptodate: yes
error_remove_page: yes
bool (*release_folio)(struct folio *, gfp_t);
void (*free_folio)(struct folio *);
ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
- /* isolate a page for migration */
- bool (*isolate_page) (struct page *, isolate_mode_t);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
- /* put migration-failed page back to right list */
- void (*putback_page) (struct page *);
int (*launder_folio) (struct folio *);
bool (*is_partially_uptodate) (struct folio *, size_t from,
data directly between the storage and the application's address
space.
-``isolate_page``
- Called by the VM when isolating a movable non-lru page. If page
- is successfully isolated, VM marks the page as PG_isolated via
- __SetPageIsolated.
-
``migrate_page``
This is used to compact the physical memory usage. If the VM
wants to relocate a page (maybe off a memory card that is
page to this function. migrate_page should transfer any private
data across and update any references that it has to the page.
-``putback_page``
- Called by the VM when isolated page's migration fails.
-
``launder_folio``
Called before freeing a folio - it writes back the dirty folio.
To prevent redirtying the folio, it is kept locked during the
Non-LRU page migration
======================
-Although migration originally aimed for reducing the latency of memory accesses
-for NUMA, compaction also uses migration to create high-order pages.
+Although migration originally aimed for reducing the latency of memory
+accesses for NUMA, compaction also uses migration to create high-order
+pages. For compaction purposes, it is also useful to be able to move
+non-LRU pages, such as zsmalloc and virtio-balloon pages.
-Current problem of the implementation is that it is designed to migrate only
-*LRU* pages. However, there are potential non-LRU pages which can be migrated
-in drivers, for example, zsmalloc, virtio-balloon pages.
-
-For virtio-balloon pages, some parts of migration code path have been hooked
-up and added virtio-balloon specific functions to intercept migration logics.
-It's too specific to a driver so other drivers who want to make their pages
-movable would have to add their own specific hooks in the migration path.
-
-To overcome the problem, VM supports non-LRU page migration which provides
-generic functions for non-LRU movable pages without driver specific hooks
-in the migration path.
-
-If a driver wants to make its pages movable, it should define three functions
-which are function pointers of struct address_space_operations.
-
-1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);``
-
- What VM expects from isolate_page() function of driver is to return *true*
- if driver isolates the page successfully. On returning true, VM marks the page
- as PG_isolated so concurrent isolation in several CPUs skip the page
- for isolation. If a driver cannot isolate the page, it should return *false*.
-
- Once page is successfully isolated, VM uses page.lru fields so driver
- shouldn't expect to preserve values in those fields.
-
-2. ``int (*migratepage) (struct address_space *mapping,``
-| ``struct page *newpage, struct page *oldpage, enum migrate_mode);``
-
- After isolation, VM calls migratepage() of driver with the isolated page.
- The function of migratepage() is to move the contents of the old page to the
- new page
- and set up fields of struct page newpage. Keep in mind that you should
- indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
- under page_lock if you migrated the oldpage successfully and returned
- MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
- can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
- because VM interprets -EAGAIN as "temporary migration failure". On returning
- any error except -EAGAIN, VM will give up the page migration without
- retrying.
-
- Driver shouldn't touch the page.lru field while in the migratepage() function.
-
-3. ``void (*putback_page)(struct page *);``
-
- If migration fails on the isolated page, VM should return the isolated page
- to the driver so VM calls the driver's putback_page() with the isolated page.
- In this function, the driver should put the isolated page back into its own data
- structure.
-
-Non-LRU movable page flags
-
- There are two page flags for supporting non-LRU movable page.
-
- * PG_movable
-
- Driver should use the function below to make page movable under page_lock::
-
- void __SetPageMovable(struct page *page, struct address_space *mapping)
-
- It needs argument of address_space for registering migration
- family functions which will be called by VM. Exactly speaking,
- PG_movable is not a real flag of struct page. Rather, VM
- reuses the page->mapping's lower bits to represent it::
-
- #define PAGE_MAPPING_MOVABLE 0x2
- page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
-
- so driver shouldn't access page->mapping directly. Instead, driver should
- use page_mapping() which masks off the low two bits of page->mapping under
- page lock so it can get the right struct address_space.
-
- For testing of non-LRU movable pages, VM supports __PageMovable() function.
- However, it doesn't guarantee to identify non-LRU movable pages because
- the page->mapping field is unified with other variables in struct page.
- If the driver releases the page after isolation by VM, page->mapping
- doesn't have a stable value although it has PAGE_MAPPING_MOVABLE set
- (look at __ClearPageMovable). But __PageMovable() is cheap to call whether
- page is LRU or non-LRU movable once the page has been isolated because LRU
- pages can never have PAGE_MAPPING_MOVABLE set in page->mapping. It is also
- good for just peeking to test non-LRU movable pages before more expensive
- checking with lock_page() in pfn scanning to select a victim.
-
- For guaranteeing non-LRU movable page, VM provides PageMovable() function.
- Unlike __PageMovable(), PageMovable() validates page->mapping and
- mapping->a_ops->isolate_page under lock_page(). The lock_page() prevents
- sudden destroying of page->mapping.
-
- Drivers using __SetPageMovable() should clear the flag via
- __ClearMovablePage() under page_lock() before the releasing the page.
-
- * PG_isolated
-
- To prevent concurrent isolation among several CPUs, VM marks isolated page
- as PG_isolated under lock_page(). So if a CPU encounters PG_isolated
- non-LRU movable page, it can skip it. Driver doesn't need to manipulate the
- flag because VM will set/clear it automatically. Keep in mind that if the
- driver sees a PG_isolated page, it means the page has been isolated by the
- VM so it shouldn't touch the page.lru field.
- The PG_isolated flag is aliased with the PG_reclaim flag so drivers
- shouldn't use PG_isolated for its own purposes.
+If a driver wants to make its pages movable, it should define a struct
+movable_operations. It then needs to call __SetPageMovable() on each
+page that it may be able to move. This uses the ``page->mapping`` field,
+so this field is not available for the driver to use for other purposes.
Monitoring Migration
=====================
Christoph Lameter, May 8, 2006.
Minchan Kim, Mar 28, 2016.
+
+.. kernel-doc:: include/linux/migrate.h
#include <linux/stringify.h>
#include <linux/swap.h>
#include <linux/device.h>
-#include <linux/mount.h>
-#include <linux/pseudo_fs.h>
-#include <linux/magic.h>
#include <linux/balloon_compaction.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
};
#ifdef CONFIG_BALLOON_COMPACTION
-static struct vfsmount *balloon_mnt;
-
-static int cmm_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
- .name = "ppc-cmm",
- .init_fs_context = cmm_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
struct page *newpage, struct page *page,
enum migrate_mode mode)
return MIGRATEPAGE_SUCCESS;
}
-static int cmm_balloon_compaction_init(void)
+static void cmm_balloon_compaction_init(void)
{
- int rc;
-
balloon_devinfo_init(&b_dev_info);
b_dev_info.migratepage = cmm_migratepage;
-
- balloon_mnt = kern_mount(&balloon_fs);
- if (IS_ERR(balloon_mnt)) {
- rc = PTR_ERR(balloon_mnt);
- balloon_mnt = NULL;
- return rc;
- }
-
- b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
- if (IS_ERR(b_dev_info.inode)) {
- rc = PTR_ERR(b_dev_info.inode);
- b_dev_info.inode = NULL;
- kern_unmount(balloon_mnt);
- balloon_mnt = NULL;
- return rc;
- }
-
- b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
- return 0;
-}
-static void cmm_balloon_compaction_deinit(void)
-{
- if (b_dev_info.inode)
- iput(b_dev_info.inode);
- b_dev_info.inode = NULL;
- kern_unmount(balloon_mnt);
- balloon_mnt = NULL;
}
#else /* CONFIG_BALLOON_COMPACTION */
-static int cmm_balloon_compaction_init(void)
-{
- return 0;
-}
-
-static void cmm_balloon_compaction_deinit(void)
+static void cmm_balloon_compaction_init(void)
{
}
#endif /* CONFIG_BALLOON_COMPACTION */
if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
return -EOPNOTSUPP;
- rc = cmm_balloon_compaction_init();
- if (rc)
- return rc;
+ cmm_balloon_compaction_init();
rc = register_oom_notifier(&cmm_oom_nb);
if (rc < 0)
out_oom_notifier:
unregister_oom_notifier(&cmm_oom_nb);
out_balloon_compaction:
- cmm_balloon_compaction_deinit();
return rc;
}
unregister_memory_notifier(&cmm_mem_nb);
cmm_free_pages(atomic_long_read(&loaned_pages));
cmm_unregister_sysfs(&cmm_dev);
- cmm_balloon_compaction_deinit();
}
/**
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/mount.h>
-#include <linux/pseudo_fs.h>
#include <linux/balloon_compaction.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
#ifdef CONFIG_BALLOON_COMPACTION
-
-static int vmballoon_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type vmballoon_fs = {
- .name = "balloon-vmware",
- .init_fs_context = vmballoon_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
-static struct vfsmount *vmballoon_mnt;
-
/**
* vmballoon_migratepage() - migrates a balloon page.
* @b_dev_info: balloon device information descriptor.
return ret;
}
-/**
- * vmballoon_compaction_deinit() - removes compaction related data.
- *
- * @b: pointer to the balloon.
- */
-static void vmballoon_compaction_deinit(struct vmballoon *b)
-{
- if (!IS_ERR(b->b_dev_info.inode))
- iput(b->b_dev_info.inode);
-
- b->b_dev_info.inode = NULL;
- kern_unmount(vmballoon_mnt);
- vmballoon_mnt = NULL;
-}
-
/**
* vmballoon_compaction_init() - initialized compaction for the balloon.
*
*
* Return: zero on success or error code on failure.
*/
-static __init int vmballoon_compaction_init(struct vmballoon *b)
+static __init void vmballoon_compaction_init(struct vmballoon *b)
{
- vmballoon_mnt = kern_mount(&vmballoon_fs);
- if (IS_ERR(vmballoon_mnt))
- return PTR_ERR(vmballoon_mnt);
-
b->b_dev_info.migratepage = vmballoon_migratepage;
- b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
-
- if (IS_ERR(b->b_dev_info.inode))
- return PTR_ERR(b->b_dev_info.inode);
-
- b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
- return 0;
}
#else /* CONFIG_BALLOON_COMPACTION */
-
-static void vmballoon_compaction_deinit(struct vmballoon *b)
-{
-}
-
-static int vmballoon_compaction_init(struct vmballoon *b)
+static inline void vmballoon_compaction_init(struct vmballoon *b)
{
- return 0;
}
-
#endif /* CONFIG_BALLOON_COMPACTION */
static int __init vmballoon_init(void)
* balloon_devinfo_init() .
*/
balloon_devinfo_init(&balloon.b_dev_info);
- error = vmballoon_compaction_init(&balloon);
- if (error)
- goto fail;
+ vmballoon_compaction_init(&balloon);
INIT_LIST_HEAD(&balloon.huge_pages);
spin_lock_init(&balloon.comm_lock);
return 0;
fail:
vmballoon_unregister_shrinker(&balloon);
- vmballoon_compaction_deinit(&balloon);
return error;
}
*/
vmballoon_send_start(&balloon, 0);
vmballoon_pop(&balloon);
-
- /* Only once we popped the balloon, compaction can be deinit */
- vmballoon_compaction_deinit(&balloon);
}
module_exit(vmballoon_exit);
#include <linux/oom.h>
#include <linux/wait.h>
#include <linux/mm.h>
-#include <linux/mount.h>
-#include <linux/magic.h>
-#include <linux/pseudo_fs.h>
#include <linux/page_reporting.h>
/*
(1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
#define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
-#ifdef CONFIG_BALLOON_COMPACTION
-static struct vfsmount *balloon_mnt;
-#endif
-
enum virtio_balloon_vq {
VIRTIO_BALLOON_VQ_INFLATE,
VIRTIO_BALLOON_VQ_DEFLATE,
return MIGRATEPAGE_SUCCESS;
}
-
-static int balloon_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
- .name = "balloon-kvm",
- .init_fs_context = balloon_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
#endif /* CONFIG_BALLOON_COMPACTION */
static unsigned long shrink_free_pages(struct virtio_balloon *vb,
goto out_free_vb;
#ifdef CONFIG_BALLOON_COMPACTION
- balloon_mnt = kern_mount(&balloon_fs);
- if (IS_ERR(balloon_mnt)) {
- err = PTR_ERR(balloon_mnt);
- goto out_del_vqs;
- }
-
vb->vb_dev_info.migratepage = virtballoon_migratepage;
- vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
- if (IS_ERR(vb->vb_dev_info.inode)) {
- err = PTR_ERR(vb->vb_dev_info.inode);
- goto out_kern_unmount;
- }
- vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
#endif
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
/*
*/
if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
err = -ENOSPC;
- goto out_iput;
+ goto out_del_vqs;
}
vb->balloon_wq = alloc_workqueue("balloon-wq",
WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
if (!vb->balloon_wq) {
err = -ENOMEM;
- goto out_iput;
+ goto out_del_vqs;
}
INIT_WORK(&vb->report_free_page_work, report_free_page_func);
vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
out_del_balloon_wq:
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
destroy_workqueue(vb->balloon_wq);
-out_iput:
-#ifdef CONFIG_BALLOON_COMPACTION
- iput(vb->vb_dev_info.inode);
-out_kern_unmount:
- kern_unmount(balloon_mnt);
out_del_vqs:
-#endif
vdev->config->del_vqs(vdev);
out_free_vb:
kfree(vb);
}
remove_common(vb);
-#ifdef CONFIG_BALLOON_COMPACTION
- if (vb->vb_dev_info.inode)
- iput(vb->vb_dev_info.inode);
-
- kern_unmount(balloon_mnt);
-#endif
kfree(vb);
}
struct list_head pages; /* Pages enqueued & handled to Host */
int (*migratepage)(struct balloon_dev_info *, struct page *newpage,
struct page *page, enum migrate_mode mode);
- struct inode *inode;
};
extern struct page *balloon_page_alloc(void);
spin_lock_init(&balloon->pages_lock);
INIT_LIST_HEAD(&balloon->pages);
balloon->migratepage = NULL;
- balloon->inode = NULL;
}
#ifdef CONFIG_BALLOON_COMPACTION
-extern const struct address_space_operations balloon_aops;
+extern const struct movable_operations balloon_mops;
/*
* balloon_page_insert - insert a page into the balloon's page list and make
struct page *page)
{
__SetPageOffline(page);
- __SetPageMovable(page, balloon->inode->i_mapping);
+ __SetPageMovable(page, &balloon_mops);
set_page_private(page, (unsigned long)balloon);
list_add(&page->lru, &balloon->pages);
}
*/
int (*migratepage) (struct address_space *,
struct page *, struct page *, enum migrate_mode);
- bool (*isolate_page)(struct page *, isolate_mode_t);
- void (*putback_page)(struct page *);
int (*launder_folio)(struct folio *);
bool (*is_partially_uptodate) (struct folio *, size_t from,
size_t count);
*/
#define MIGRATEPAGE_SUCCESS 0
+/**
+ * struct movable_operations - Driver page migration
+ * @isolate_page:
+ * The VM calls this function to prepare the page to be moved. The page
+ * is locked and the driver should not unlock it. The driver should
+ * return ``true`` if the page is movable and ``false`` if it is not
+ * currently movable. After this function returns, the VM uses the
+ * page->lru field, so the driver must preserve any information which
+ * is usually stored here.
+ *
+ * @migrate_page:
+ * After isolation, the VM calls this function with the isolated
+ * @src page. The driver should copy the contents of the
+ * @src page to the @dst page and set up the fields of @dst page.
+ * Both pages are locked.
+ * If page migration is successful, the driver should call
+ * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS.
+ * If the driver cannot migrate the page at the moment, it can return
+ * -EAGAIN. The VM interprets this as a temporary migration failure and
+ * will retry it later. Any other error value is a permanent migration
+ * failure and migration will not be retried.
+ * The driver shouldn't touch the @src->lru field while in the
+ * migrate_page() function. It may write to @dst->lru.
+ *
+ * @putback_page:
+ * If migration fails on the isolated page, the VM informs the driver
+ * that the page is no longer a candidate for migration by calling
+ * this function. The driver should put the isolated page back into
+ * its own data structure.
+ */
+struct movable_operations {
+ bool (*isolate_page)(struct page *, isolate_mode_t);
+ int (*migrate_page)(struct page *dst, struct page *src,
+ enum migrate_mode);
+ void (*putback_page)(struct page *);
+};
+
/* Defined in mm/debug.c: */
extern const char *migrate_reason_names[MR_TYPES];
#endif
#ifdef CONFIG_COMPACTION
-extern int PageMovable(struct page *page);
-extern void __SetPageMovable(struct page *page, struct address_space *mapping);
-extern void __ClearPageMovable(struct page *page);
+bool PageMovable(struct page *page);
+void __SetPageMovable(struct page *page, const struct movable_operations *ops);
+void __ClearPageMovable(struct page *page);
#else
-static inline int PageMovable(struct page *page) { return 0; }
+static inline bool PageMovable(struct page *page) { return false; }
static inline void __SetPageMovable(struct page *page,
- struct address_space *mapping)
+ const struct movable_operations *ops)
{
}
static inline void __ClearPageMovable(struct page *page)
return PageMovable(&folio->page);
}
+static inline
+const struct movable_operations *page_movable_ops(struct page *page)
+{
+ VM_BUG_ON(!__PageMovable(page));
+
+ return (const struct movable_operations *)
+ ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE);
+}
+
#ifdef CONFIG_NUMA_BALANCING
extern int migrate_misplaced_page(struct page *page,
struct vm_area_struct *vma, int node);
* structure which KSM associates with that merged page. See ksm.h.
*
* PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable
- * page and then page->mapping points a struct address_space.
+ * page and then page->mapping points to a struct movable_operations.
*
* Please note that, confusingly, "page_mapping" refers to the inode
* address_space which maps the page from disk; whereas "page_mapped"
/* Since UDF 2.01 is ISO 13346 based... */
#define UDF_SUPER_MAGIC 0x15013346
-#define BALLOON_KVM_MAGIC 0x13661366
-#define ZSMALLOC_MAGIC 0x58295829
#define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */
#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
-#define Z3FOLD_MAGIC 0x33
-#define PPC_CMM_MAGIC 0xc7571590
#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
#endif /* __LINUX_MAGIC_H__ */
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
}
-
/* move_to_new_page() counterpart for a ballooned page */
-static int balloon_page_migrate(struct address_space *mapping,
- struct page *newpage, struct page *page,
+static int balloon_page_migrate(struct page *newpage, struct page *page,
enum migrate_mode mode)
{
struct balloon_dev_info *balloon = balloon_page_device(page);
return balloon->migratepage(balloon, newpage, page, mode);
}
-const struct address_space_operations balloon_aops = {
- .migratepage = balloon_page_migrate,
+const struct movable_operations balloon_mops = {
+ .migrate_page = balloon_page_migrate,
.isolate_page = balloon_page_isolate,
.putback_page = balloon_page_putback,
};
-EXPORT_SYMBOL_GPL(balloon_aops);
+EXPORT_SYMBOL_GPL(balloon_mops);
#endif /* CONFIG_BALLOON_COMPACTION */
}
#ifdef CONFIG_COMPACTION
-
-int PageMovable(struct page *page)
+bool PageMovable(struct page *page)
{
- struct address_space *mapping;
+ const struct movable_operations *mops;
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (!__PageMovable(page))
- return 0;
+ return false;
- mapping = page_mapping(page);
- if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
- return 1;
+ mops = page_movable_ops(page);
+ if (mops)
+ return true;
- return 0;
+ return false;
}
EXPORT_SYMBOL(PageMovable);
-void __SetPageMovable(struct page *page, struct address_space *mapping)
+void __SetPageMovable(struct page *page, const struct movable_operations *mops)
{
VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
- page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
+ VM_BUG_ON_PAGE((unsigned long)mops & PAGE_MAPPING_MOVABLE, page);
+ page->mapping = (void *)((unsigned long)mops | PAGE_MAPPING_MOVABLE);
}
EXPORT_SYMBOL(__SetPageMovable);
{
VM_BUG_ON_PAGE(!PageMovable(page), page);
/*
- * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
- * flag so that VM can catch up released page by driver after isolation.
- * With it, VM migration doesn't try to put it back.
+ * This page still has the type of a movable page, but it's
+ * actually not movable any more.
*/
- page->mapping = (void *)((unsigned long)page->mapping &
- PAGE_MAPPING_MOVABLE);
+ page->mapping = (void *)PAGE_MAPPING_MOVABLE;
}
EXPORT_SYMBOL(__ClearPageMovable);
int isolate_movable_page(struct page *page, isolate_mode_t mode)
{
- struct address_space *mapping;
+ const struct movable_operations *mops;
/*
* Avoid burning cycles with pages that are yet under __free_pages(),
if (!PageMovable(page) || PageIsolated(page))
goto out_no_isolated;
- mapping = page_mapping(page);
- VM_BUG_ON_PAGE(!mapping, page);
+ mops = page_movable_ops(page);
+ VM_BUG_ON_PAGE(!mops, page);
- if (!mapping->a_ops->isolate_page(page, mode))
+ if (!mops->isolate_page(page, mode))
goto out_no_isolated;
/* Driver shouldn't use PG_isolated bit of page->flags */
static void putback_movable_page(struct page *page)
{
- struct address_space *mapping;
+ const struct movable_operations *mops = page_movable_ops(page);
- mapping = page_mapping(page);
- mapping->a_ops->putback_page(page);
+ mops->putback_page(page);
ClearPageIsolated(page);
}
static int move_to_new_folio(struct folio *dst, struct folio *src,
enum migrate_mode mode)
{
- struct address_space *mapping;
int rc = -EAGAIN;
bool is_lru = !__PageMovable(&src->page);
VM_BUG_ON_FOLIO(!folio_test_locked(src), src);
VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst);
- mapping = folio_mapping(src);
-
if (likely(is_lru)) {
+ struct address_space *mapping = folio_mapping(src);
+
if (!mapping)
rc = migrate_page(mapping, &dst->page, &src->page, mode);
else if (mapping->a_ops->migratepage)
rc = fallback_migrate_page(mapping, &dst->page,
&src->page, mode);
} else {
+ const struct movable_operations *mops;
+
/*
* In case of non-lru page, it could be released after
* isolation step. In that case, we shouldn't try migration.
goto out;
}
- rc = mapping->a_ops->migratepage(mapping, &dst->page,
- &src->page, mode);
+ mops = page_movable_ops(&src->page);
+ rc = mops->migrate_page(&dst->page, &src->page, mode);
WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
!folio_test_isolated(src));
}
return swap_address_space(folio_swap_entry(folio));
mapping = folio->mapping;
- if ((unsigned long)mapping & PAGE_MAPPING_ANON)
+ if ((unsigned long)mapping & PAGE_MAPPING_FLAGS)
return NULL;
- return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
+ return mapping;
}
EXPORT_SYMBOL(folio_mapping);
#include <linux/node.h>
#include <linux/compaction.h>
#include <linux/percpu.h>
-#include <linux/mount.h>
-#include <linux/pseudo_fs.h>
-#include <linux/fs.h>
#include <linux/preempt.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/zpool.h>
-#include <linux/magic.h>
#include <linux/kmemleak.h>
/*
* @compact_wq: workqueue for page layout background optimization
* @release_wq: workqueue for safe page release
* @work: work_struct for safe page release
- * @inode: inode for z3fold pseudo filesystem
*
* This structure is allocated at pool creation time and maintains metadata
* pertaining to a particular z3fold pool.
struct workqueue_struct *compact_wq;
struct workqueue_struct *release_wq;
struct work_struct work;
- struct inode *inode;
};
/*
}
}
-static int z3fold_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type z3fold_fs = {
- .name = "z3fold",
- .init_fs_context = z3fold_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
-static struct vfsmount *z3fold_mnt;
-static int __init z3fold_mount(void)
-{
- int ret = 0;
-
- z3fold_mnt = kern_mount(&z3fold_fs);
- if (IS_ERR(z3fold_mnt))
- ret = PTR_ERR(z3fold_mnt);
-
- return ret;
-}
-
-static void z3fold_unmount(void)
-{
- kern_unmount(z3fold_mnt);
-}
-
-static const struct address_space_operations z3fold_aops;
-static int z3fold_register_migration(struct z3fold_pool *pool)
-{
- pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb);
- if (IS_ERR(pool->inode)) {
- pool->inode = NULL;
- return 1;
- }
-
- pool->inode->i_mapping->private_data = pool;
- pool->inode->i_mapping->a_ops = &z3fold_aops;
- return 0;
-}
-
-static void z3fold_unregister_migration(struct z3fold_pool *pool)
-{
- if (pool->inode)
- iput(pool->inode);
-}
-
/* Initializes the z3fold header of a newly allocated z3fold page */
static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
struct z3fold_pool *pool, gfp_t gfp)
pool->release_wq = create_singlethread_workqueue(pool->name);
if (!pool->release_wq)
goto out_wq;
- if (z3fold_register_migration(pool))
- goto out_rwq;
INIT_WORK(&pool->work, free_pages_work);
pool->ops = ops;
return pool;
-out_rwq:
- destroy_workqueue(pool->release_wq);
out_wq:
destroy_workqueue(pool->compact_wq);
out_unbuddied:
destroy_workqueue(pool->compact_wq);
destroy_workqueue(pool->release_wq);
- z3fold_unregister_migration(pool);
free_percpu(pool->unbuddied);
kfree(pool);
}
+static const struct movable_operations z3fold_mops;
+
/**
* z3fold_alloc() - allocates a region of a given size
* @pool: z3fold pool from which to allocate
}
if (can_sleep) {
lock_page(page);
- __SetPageMovable(page, pool->inode->i_mapping);
+ __SetPageMovable(page, &z3fold_mops);
unlock_page(page);
} else {
WARN_ON(!trylock_page(page));
- __SetPageMovable(page, pool->inode->i_mapping);
+ __SetPageMovable(page, &z3fold_mops);
unlock_page(page);
}
z3fold_page_lock(zhdr);
return false;
}
-static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage,
- struct page *page, enum migrate_mode mode)
+static int z3fold_page_migrate(struct page *newpage, struct page *page,
+ enum migrate_mode mode)
{
struct z3fold_header *zhdr, *new_zhdr;
struct z3fold_pool *pool;
- struct address_space *new_mapping;
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
* so we only have to reinitialize it.
*/
INIT_LIST_HEAD(&new_zhdr->buddy);
- new_mapping = page_mapping(page);
__ClearPageMovable(page);
get_page(newpage);
spin_lock(&pool->lock);
list_add(&newpage->lru, &pool->lru);
spin_unlock(&pool->lock);
- __SetPageMovable(newpage, new_mapping);
+ __SetPageMovable(newpage, &z3fold_mops);
z3fold_page_unlock(new_zhdr);
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
z3fold_page_unlock(zhdr);
}
-static const struct address_space_operations z3fold_aops = {
+static const struct movable_operations z3fold_mops = {
.isolate_page = z3fold_page_isolate,
- .migratepage = z3fold_page_migrate,
+ .migrate_page = z3fold_page_migrate,
.putback_page = z3fold_page_putback,
};
static int __init init_z3fold(void)
{
- int ret;
-
/*
* Make sure the z3fold header is not larger than the page size and
* there has remaining spaces for its buddy.
*/
BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE);
- ret = z3fold_mount();
- if (ret)
- return ret;
-
zpool_register_driver(&z3fold_zpool_driver);
return 0;
static void __exit exit_z3fold(void)
{
- z3fold_unmount();
zpool_unregister_driver(&z3fold_zpool_driver);
}
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/magic.h>
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/highmem.h>
#include <linux/debugfs.h>
#include <linux/zsmalloc.h>
#include <linux/zpool.h>
-#include <linux/mount.h>
-#include <linux/pseudo_fs.h>
#include <linux/migrate.h>
#include <linux/wait.h>
#include <linux/pagemap.h>
static struct dentry *zs_stat_root;
#endif
-#ifdef CONFIG_COMPACTION
-static struct vfsmount *zsmalloc_mnt;
-#endif
-
/*
* We assign a page to ZS_ALMOST_EMPTY fullness group when:
* n <= N / f, where
struct dentry *stat_dentry;
#endif
#ifdef CONFIG_COMPACTION
- struct inode *inode;
struct work_struct free_work;
#endif
/* protect page/zspage migration */
unsigned int freeobj;
struct page *first_page;
struct list_head list; /* fullness list */
+ struct zs_pool *pool;
#ifdef CONFIG_COMPACTION
rwlock_t lock;
#endif
}
#ifdef CONFIG_COMPACTION
-static int zs_register_migration(struct zs_pool *pool);
-static void zs_unregister_migration(struct zs_pool *pool);
static void migrate_lock_init(struct zspage *zspage);
static void migrate_read_lock(struct zspage *zspage);
static void migrate_read_unlock(struct zspage *zspage);
static void init_deferred_free(struct zs_pool *pool);
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
#else
-static int zsmalloc_mount(void) { return 0; }
-static void zsmalloc_unmount(void) {}
-static int zs_register_migration(struct zs_pool *pool) { return 0; }
-static void zs_unregister_migration(struct zs_pool *pool) {}
static void migrate_lock_init(struct zspage *zspage) {}
static void migrate_read_lock(struct zspage *zspage) {}
static void migrate_read_unlock(struct zspage *zspage) {}
create_page_chain(class, zspage, pages);
init_zspage(class, zspage);
+ zspage->pool = pool;
return zspage;
}
migrate_read_unlock(zspage);
}
-static int zs_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type zsmalloc_fs = {
- .name = "zsmalloc",
- .init_fs_context = zs_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
-static int zsmalloc_mount(void)
-{
- int ret = 0;
-
- zsmalloc_mnt = kern_mount(&zsmalloc_fs);
- if (IS_ERR(zsmalloc_mnt))
- ret = PTR_ERR(zsmalloc_mnt);
-
- return ret;
-}
-
-static void zsmalloc_unmount(void)
-{
- kern_unmount(zsmalloc_mnt);
-}
-
static void migrate_lock_init(struct zspage *zspage)
{
rwlock_init(&zspage->lock);
zspage->isolated--;
}
+static const struct movable_operations zsmalloc_mops;
+
static void replace_sub_page(struct size_class *class, struct zspage *zspage,
struct page *newpage, struct page *oldpage)
{
set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
if (unlikely(ZsHugePage(zspage)))
newpage->index = oldpage->index;
- __SetPageMovable(newpage, page_mapping(oldpage));
+ __SetPageMovable(newpage, &zsmalloc_mops);
}
static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
return true;
}
-static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
- struct page *page, enum migrate_mode mode)
+static int zs_page_migrate(struct page *newpage, struct page *page,
+ enum migrate_mode mode)
{
struct zs_pool *pool;
struct size_class *class;
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
- pool = mapping->private_data;
+ /* The page is locked, so this pointer must remain valid */
+ zspage = get_zspage(page);
+ pool = zspage->pool;
/*
* The pool migrate_lock protects the race between zpage migration
* and zs_free.
*/
write_lock(&pool->migrate_lock);
- zspage = get_zspage(page);
class = zspage_class(pool, zspage);
/*
migrate_write_unlock(zspage);
}
-static const struct address_space_operations zsmalloc_aops = {
+static const struct movable_operations zsmalloc_mops = {
.isolate_page = zs_page_isolate,
- .migratepage = zs_page_migrate,
+ .migrate_page = zs_page_migrate,
.putback_page = zs_page_putback,
};
-static int zs_register_migration(struct zs_pool *pool)
-{
- pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb);
- if (IS_ERR(pool->inode)) {
- pool->inode = NULL;
- return 1;
- }
-
- pool->inode->i_mapping->private_data = pool;
- pool->inode->i_mapping->a_ops = &zsmalloc_aops;
- return 0;
-}
-
-static void zs_unregister_migration(struct zs_pool *pool)
-{
- flush_work(&pool->free_work);
- iput(pool->inode);
-}
-
/*
* Caller should hold page_lock of all pages in the zspage
* In here, we cannot use zspage meta data.
schedule_work(&pool->free_work);
}
+static void zs_flush_migration(struct zs_pool *pool)
+{
+ flush_work(&pool->free_work);
+}
+
static void init_deferred_free(struct zs_pool *pool)
{
INIT_WORK(&pool->free_work, async_free_zspage);
do {
WARN_ON(!trylock_page(page));
- __SetPageMovable(page, pool->inode->i_mapping);
+ __SetPageMovable(page, &zsmalloc_mops);
unlock_page(page);
} while ((page = get_next_page(page)) != NULL);
}
+#else
+static inline void zs_flush_migration(struct zs_pool *pool) { }
#endif
/*
/* debug only, don't abort if it fails */
zs_pool_stat_create(pool, name);
- if (zs_register_migration(pool))
- goto err;
-
/*
* Not critical since shrinker is only used to trigger internal
* defragmentation of the pool which is pretty optional thing. If
int i;
zs_unregister_shrinker(pool);
- zs_unregister_migration(pool);
+ zs_flush_migration(pool);
zs_pool_stat_destroy(pool);
for (i = 0; i < ZS_SIZE_CLASSES; i++) {
{
int ret;
- ret = zsmalloc_mount();
- if (ret)
- goto out;
-
ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare",
zs_cpu_prepare, zs_cpu_dead);
if (ret)
- goto hp_setup_fail;
+ goto out;
#ifdef CONFIG_ZPOOL
zpool_register_driver(&zs_zpool_driver);
return 0;
-hp_setup_fail:
- zsmalloc_unmount();
out:
return ret;
}
#ifdef CONFIG_ZPOOL
zpool_unregister_driver(&zs_zpool_driver);
#endif
- zsmalloc_unmount();
cpuhp_remove_state(CPUHP_MM_ZS_PREPARE);
zs_stat_exit();