]> git.baikalelectronics.ru Git - kernel.git/commitdiff
userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support
authorMike Rapoport <rppt@linux.vnet.ibm.com>
Wed, 22 Feb 2017 23:43:25 +0000 (15:43 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Feb 2017 00:41:28 +0000 (16:41 -0800)
shmem_mcopy_atomic_pte is the low level routine that implements the
userfaultfd UFFDIO_COPY command.  It is based on the existing
mcopy_atomic_pte routine with modifications for shared memory pages.

Link: http://lkml.kernel.org/r/20161216144821.5183-29-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/shmem_fs.h
mm/shmem.c

index ff078e7043b6908ce5ca96c16eaf83e4fbfbd980..fdaac9d4d46d28a4106ef7295e1ced78bfda9afb 100644 (file)
@@ -124,4 +124,15 @@ static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
 }
 #endif
 
+#ifdef CONFIG_SHMEM
+extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+                                 struct vm_area_struct *dst_vma,
+                                 unsigned long dst_addr,
+                                 unsigned long src_addr,
+                                 struct page **pagep);
+#else
+#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+                              src_addr, pagep)        ({ BUG(); 0; })
+#endif
+
 #endif
index 7d52cd4b504ddf019c89061003ce7cfdf8a85ccb..14de2a9e5083e6f547fe3083d2ba1e349089e837 100644 (file)
@@ -70,6 +70,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
 #include <uapi/linux/memfd.h>
+#include <linux/rmap.h>
 
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
@@ -2178,6 +2179,115 @@ bool shmem_mapping(struct address_space *mapping)
        return mapping->a_ops == &shmem_aops;
 }
 
+int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
+                          pmd_t *dst_pmd,
+                          struct vm_area_struct *dst_vma,
+                          unsigned long dst_addr,
+                          unsigned long src_addr,
+                          struct page **pagep)
+{
+       struct inode *inode = file_inode(dst_vma->vm_file);
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+       struct address_space *mapping = inode->i_mapping;
+       gfp_t gfp = mapping_gfp_mask(mapping);
+       pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
+       struct mem_cgroup *memcg;
+       spinlock_t *ptl;
+       void *page_kaddr;
+       struct page *page;
+       pte_t _dst_pte, *dst_pte;
+       int ret;
+
+       if (!*pagep) {
+               ret = -ENOMEM;
+               if (shmem_acct_block(info->flags, 1))
+                       goto out;
+               if (sbinfo->max_blocks) {
+                       if (percpu_counter_compare(&sbinfo->used_blocks,
+                                                  sbinfo->max_blocks) >= 0)
+                               goto out_unacct_blocks;
+                       percpu_counter_inc(&sbinfo->used_blocks);
+               }
+
+               page = shmem_alloc_page(gfp, info, pgoff);
+               if (!page)
+                       goto out_dec_used_blocks;
+
+               page_kaddr = kmap_atomic(page);
+               ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
+                                    PAGE_SIZE);
+               kunmap_atomic(page_kaddr);
+
+               /* fallback to copy_from_user outside mmap_sem */
+               if (unlikely(ret)) {
+                       *pagep = page;
+                       /* don't free the page */
+                       return -EFAULT;
+               }
+       } else {
+               page = *pagep;
+               *pagep = NULL;
+       }
+
+       ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
+       if (ret)
+               goto out_release;
+
+       ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
+       if (!ret) {
+               ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
+               radix_tree_preload_end();
+       }
+       if (ret)
+               goto out_release_uncharge;
+
+       mem_cgroup_commit_charge(page, memcg, false, false);
+
+       _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+       if (dst_vma->vm_flags & VM_WRITE)
+               _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+
+       ret = -EEXIST;
+       dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+       if (!pte_none(*dst_pte))
+               goto out_release_uncharge_unlock;
+
+       __SetPageUptodate(page);
+
+       lru_cache_add_anon(page);
+
+       spin_lock(&info->lock);
+       info->alloced++;
+       inode->i_blocks += BLOCKS_PER_PAGE;
+       shmem_recalc_inode(inode);
+       spin_unlock(&info->lock);
+
+       inc_mm_counter(dst_mm, mm_counter_file(page));
+       page_add_file_rmap(page, false);
+       set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+       /* No need to invalidate - it was non-present before */
+       update_mmu_cache(dst_vma, dst_addr, dst_pte);
+       unlock_page(page);
+       pte_unmap_unlock(dst_pte, ptl);
+       ret = 0;
+out:
+       return ret;
+out_release_uncharge_unlock:
+       pte_unmap_unlock(dst_pte, ptl);
+out_release_uncharge:
+       mem_cgroup_cancel_charge(page, memcg, false);
+out_release:
+       put_page(page);
+out_dec_used_blocks:
+       if (sbinfo->max_blocks)
+               percpu_counter_add(&sbinfo->used_blocks, -1);
+out_unacct_blocks:
+       shmem_unacct_blocks(info->flags, 1);
+       goto out;
+}
+
 #ifdef CONFIG_TMPFS
 static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_short_symlink_operations;