From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 12 Jun 2020 19:19:13 +0000 (-0700)
Subject: Merge tag 'iommu-drivers-move-v5.8' of git://git.kernel.org/pub/scm/linux/kernel... 
X-Git-Tag: baikal/mips/sdk5.9~13391
X-Git-Url: https://git.baikalelectronics.ru/sdk/?a=commitdiff_plain;h=0a699d3adb5a428f669da93f4752019134548672;p=kernel.git

Merge tag 'iommu-drivers-move-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu driver directory structure cleanup from Joerg Roedel:
 "Move the Intel and AMD IOMMU drivers into their own subdirectory.

  Both drivers consist of several files by now and giving them their own
  directory unclutters the IOMMU top-level directory a bit"

* tag 'iommu-drivers-move-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
  iommu/vt-d: Move Intel IOMMU driver into subdirectory
  iommu/amd: Move AMD IOMMU driver into subdirectory
---

0a699d3adb5a428f669da93f4752019134548672
diff --cc drivers/iommu/amd/iommu_v2.c
index 0000000000000,c8a7b6b392221..e4b025c5637c4
mode 000000,100644..100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@@ -1,0 -1,981 +1,981 @@@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+  * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
+  * Author: Joerg Roedel <jroedel@suse.de>
+  */
+ 
+ #define pr_fmt(fmt)     "AMD-Vi: " fmt
+ 
+ #include <linux/mmu_notifier.h>
+ #include <linux/amd-iommu.h>
+ #include <linux/mm_types.h>
+ #include <linux/profile.h>
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/sched/mm.h>
+ #include <linux/wait.h>
+ #include <linux/pci.h>
+ #include <linux/gfp.h>
+ 
+ #include "amd_iommu.h"
+ 
+ MODULE_LICENSE("GPL v2");
+ MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>");
+ 
+ #define MAX_DEVICES		0x10000
+ #define PRI_QUEUE_SIZE		512
+ 
+ struct pri_queue {
+ 	atomic_t inflight;
+ 	bool finish;
+ 	int status;
+ };
+ 
+ struct pasid_state {
+ 	struct list_head list;			/* For global state-list */
+ 	atomic_t count;				/* Reference count */
+ 	unsigned mmu_notifier_count;		/* Counting nested mmu_notifier
+ 						   calls */
+ 	struct mm_struct *mm;			/* mm_struct for the faults */
+ 	struct mmu_notifier mn;                 /* mmu_notifier handle */
+ 	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */
+ 	struct device_state *device_state;	/* Link to our device_state */
+ 	int pasid;				/* PASID index */
+ 	bool invalid;				/* Used during setup and
+ 						   teardown of the pasid */
+ 	spinlock_t lock;			/* Protect pri_queues and
+ 						   mmu_notifer_count */
+ 	wait_queue_head_t wq;			/* To wait for count == 0 */
+ };
+ 
+ struct device_state {
+ 	struct list_head list;
+ 	u16 devid;
+ 	atomic_t count;
+ 	struct pci_dev *pdev;
+ 	struct pasid_state **states;
+ 	struct iommu_domain *domain;
+ 	int pasid_levels;
+ 	int max_pasids;
+ 	amd_iommu_invalid_ppr_cb inv_ppr_cb;
+ 	amd_iommu_invalidate_ctx inv_ctx_cb;
+ 	spinlock_t lock;
+ 	wait_queue_head_t wq;
+ };
+ 
+ struct fault {
+ 	struct work_struct work;
+ 	struct device_state *dev_state;
+ 	struct pasid_state *state;
+ 	struct mm_struct *mm;
+ 	u64 address;
+ 	u16 devid;
+ 	u16 pasid;
+ 	u16 tag;
+ 	u16 finish;
+ 	u16 flags;
+ };
+ 
+ static LIST_HEAD(state_list);
+ static spinlock_t state_lock;
+ 
+ static struct workqueue_struct *iommu_wq;
+ 
+ static void free_pasid_states(struct device_state *dev_state);
+ 
+ static u16 device_id(struct pci_dev *pdev)
+ {
+ 	u16 devid;
+ 
+ 	devid = pdev->bus->number;
+ 	devid = (devid << 8) | pdev->devfn;
+ 
+ 	return devid;
+ }
+ 
+ static struct device_state *__get_device_state(u16 devid)
+ {
+ 	struct device_state *dev_state;
+ 
+ 	list_for_each_entry(dev_state, &state_list, list) {
+ 		if (dev_state->devid == devid)
+ 			return dev_state;
+ 	}
+ 
+ 	return NULL;
+ }
+ 
+ static struct device_state *get_device_state(u16 devid)
+ {
+ 	struct device_state *dev_state;
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&state_lock, flags);
+ 	dev_state = __get_device_state(devid);
+ 	if (dev_state != NULL)
+ 		atomic_inc(&dev_state->count);
+ 	spin_unlock_irqrestore(&state_lock, flags);
+ 
+ 	return dev_state;
+ }
+ 
+ static void free_device_state(struct device_state *dev_state)
+ {
+ 	struct iommu_group *group;
+ 
+ 	/*
+ 	 * First detach device from domain - No more PRI requests will arrive
+ 	 * from that device after it is unbound from the IOMMUv2 domain.
+ 	 */
+ 	group = iommu_group_get(&dev_state->pdev->dev);
+ 	if (WARN_ON(!group))
+ 		return;
+ 
+ 	iommu_detach_group(dev_state->domain, group);
+ 
+ 	iommu_group_put(group);
+ 
+ 	/* Everything is down now, free the IOMMUv2 domain */
+ 	iommu_domain_free(dev_state->domain);
+ 
+ 	/* Finally get rid of the device-state */
+ 	kfree(dev_state);
+ }
+ 
+ static void put_device_state(struct device_state *dev_state)
+ {
+ 	if (atomic_dec_and_test(&dev_state->count))
+ 		wake_up(&dev_state->wq);
+ }
+ 
+ /* Must be called under dev_state->lock */
+ static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
+ 						  int pasid, bool alloc)
+ {
+ 	struct pasid_state **root, **ptr;
+ 	int level, index;
+ 
+ 	level = dev_state->pasid_levels;
+ 	root  = dev_state->states;
+ 
+ 	while (true) {
+ 
+ 		index = (pasid >> (9 * level)) & 0x1ff;
+ 		ptr   = &root[index];
+ 
+ 		if (level == 0)
+ 			break;
+ 
+ 		if (*ptr == NULL) {
+ 			if (!alloc)
+ 				return NULL;
+ 
+ 			*ptr = (void *)get_zeroed_page(GFP_ATOMIC);
+ 			if (*ptr == NULL)
+ 				return NULL;
+ 		}
+ 
+ 		root   = (struct pasid_state **)*ptr;
+ 		level -= 1;
+ 	}
+ 
+ 	return ptr;
+ }
+ 
+ static int set_pasid_state(struct device_state *dev_state,
+ 			   struct pasid_state *pasid_state,
+ 			   int pasid)
+ {
+ 	struct pasid_state **ptr;
+ 	unsigned long flags;
+ 	int ret;
+ 
+ 	spin_lock_irqsave(&dev_state->lock, flags);
+ 	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+ 
+ 	ret = -ENOMEM;
+ 	if (ptr == NULL)
+ 		goto out_unlock;
+ 
+ 	ret = -ENOMEM;
+ 	if (*ptr != NULL)
+ 		goto out_unlock;
+ 
+ 	*ptr = pasid_state;
+ 
+ 	ret = 0;
+ 
+ out_unlock:
+ 	spin_unlock_irqrestore(&dev_state->lock, flags);
+ 
+ 	return ret;
+ }
+ 
+ static void clear_pasid_state(struct device_state *dev_state, int pasid)
+ {
+ 	struct pasid_state **ptr;
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&dev_state->lock, flags);
+ 	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+ 
+ 	if (ptr == NULL)
+ 		goto out_unlock;
+ 
+ 	*ptr = NULL;
+ 
+ out_unlock:
+ 	spin_unlock_irqrestore(&dev_state->lock, flags);
+ }
+ 
+ static struct pasid_state *get_pasid_state(struct device_state *dev_state,
+ 					   int pasid)
+ {
+ 	struct pasid_state **ptr, *ret = NULL;
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&dev_state->lock, flags);
+ 	ptr = __get_pasid_state_ptr(dev_state, pasid, false);
+ 
+ 	if (ptr == NULL)
+ 		goto out_unlock;
+ 
+ 	ret = *ptr;
+ 	if (ret)
+ 		atomic_inc(&ret->count);
+ 
+ out_unlock:
+ 	spin_unlock_irqrestore(&dev_state->lock, flags);
+ 
+ 	return ret;
+ }
+ 
+ static void free_pasid_state(struct pasid_state *pasid_state)
+ {
+ 	kfree(pasid_state);
+ }
+ 
+ static void put_pasid_state(struct pasid_state *pasid_state)
+ {
+ 	if (atomic_dec_and_test(&pasid_state->count))
+ 		wake_up(&pasid_state->wq);
+ }
+ 
+ static void put_pasid_state_wait(struct pasid_state *pasid_state)
+ {
+ 	atomic_dec(&pasid_state->count);
+ 	wait_event(pasid_state->wq, !atomic_read(&pasid_state->count));
+ 	free_pasid_state(pasid_state);
+ }
+ 
+ static void unbind_pasid(struct pasid_state *pasid_state)
+ {
+ 	struct iommu_domain *domain;
+ 
+ 	domain = pasid_state->device_state->domain;
+ 
+ 	/*
+ 	 * Mark pasid_state as invalid, no more faults will we added to the
+ 	 * work queue after this is visible everywhere.
+ 	 */
+ 	pasid_state->invalid = true;
+ 
+ 	/* Make sure this is visible */
+ 	smp_wmb();
+ 
+ 	/* After this the device/pasid can't access the mm anymore */
+ 	amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
+ 
+ 	/* Make sure no more pending faults are in the queue */
+ 	flush_workqueue(iommu_wq);
+ }
+ 
+ static void free_pasid_states_level1(struct pasid_state **tbl)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < 512; ++i) {
+ 		if (tbl[i] == NULL)
+ 			continue;
+ 
+ 		free_page((unsigned long)tbl[i]);
+ 	}
+ }
+ 
+ static void free_pasid_states_level2(struct pasid_state **tbl)
+ {
+ 	struct pasid_state **ptr;
+ 	int i;
+ 
+ 	for (i = 0; i < 512; ++i) {
+ 		if (tbl[i] == NULL)
+ 			continue;
+ 
+ 		ptr = (struct pasid_state **)tbl[i];
+ 		free_pasid_states_level1(ptr);
+ 	}
+ }
+ 
+ static void free_pasid_states(struct device_state *dev_state)
+ {
+ 	struct pasid_state *pasid_state;
+ 	int i;
+ 
+ 	for (i = 0; i < dev_state->max_pasids; ++i) {
+ 		pasid_state = get_pasid_state(dev_state, i);
+ 		if (pasid_state == NULL)
+ 			continue;
+ 
+ 		put_pasid_state(pasid_state);
+ 
+ 		/*
+ 		 * This will call the mn_release function and
+ 		 * unbind the PASID
+ 		 */
+ 		mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+ 
+ 		put_pasid_state_wait(pasid_state); /* Reference taken in
+ 						      amd_iommu_bind_pasid */
+ 
+ 		/* Drop reference taken in amd_iommu_bind_pasid */
+ 		put_device_state(dev_state);
+ 	}
+ 
+ 	if (dev_state->pasid_levels == 2)
+ 		free_pasid_states_level2(dev_state->states);
+ 	else if (dev_state->pasid_levels == 1)
+ 		free_pasid_states_level1(dev_state->states);
+ 	else
+ 		BUG_ON(dev_state->pasid_levels != 0);
+ 
+ 	free_page((unsigned long)dev_state->states);
+ }
+ 
+ static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
+ {
+ 	return container_of(mn, struct pasid_state, mn);
+ }
+ 
+ static void mn_invalidate_range(struct mmu_notifier *mn,
+ 				struct mm_struct *mm,
+ 				unsigned long start, unsigned long end)
+ {
+ 	struct pasid_state *pasid_state;
+ 	struct device_state *dev_state;
+ 
+ 	pasid_state = mn_to_state(mn);
+ 	dev_state   = pasid_state->device_state;
+ 
+ 	if ((start ^ (end - 1)) < PAGE_SIZE)
+ 		amd_iommu_flush_page(dev_state->domain, pasid_state->pasid,
+ 				     start);
+ 	else
+ 		amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid);
+ }
+ 
+ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
+ {
+ 	struct pasid_state *pasid_state;
+ 	struct device_state *dev_state;
+ 	bool run_inv_ctx_cb;
+ 
+ 	might_sleep();
+ 
+ 	pasid_state    = mn_to_state(mn);
+ 	dev_state      = pasid_state->device_state;
+ 	run_inv_ctx_cb = !pasid_state->invalid;
+ 
+ 	if (run_inv_ctx_cb && dev_state->inv_ctx_cb)
+ 		dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid);
+ 
+ 	unbind_pasid(pasid_state);
+ }
+ 
+ static const struct mmu_notifier_ops iommu_mn = {
+ 	.release		= mn_release,
+ 	.invalidate_range       = mn_invalidate_range,
+ };
+ 
+ static void set_pri_tag_status(struct pasid_state *pasid_state,
+ 			       u16 tag, int status)
+ {
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&pasid_state->lock, flags);
+ 	pasid_state->pri[tag].status = status;
+ 	spin_unlock_irqrestore(&pasid_state->lock, flags);
+ }
+ 
+ static void finish_pri_tag(struct device_state *dev_state,
+ 			   struct pasid_state *pasid_state,
+ 			   u16 tag)
+ {
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&pasid_state->lock, flags);
+ 	if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
+ 	    pasid_state->pri[tag].finish) {
+ 		amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
+ 				       pasid_state->pri[tag].status, tag);
+ 		pasid_state->pri[tag].finish = false;
+ 		pasid_state->pri[tag].status = PPR_SUCCESS;
+ 	}
+ 	spin_unlock_irqrestore(&pasid_state->lock, flags);
+ }
+ 
+ static void handle_fault_error(struct fault *fault)
+ {
+ 	int status;
+ 
+ 	if (!fault->dev_state->inv_ppr_cb) {
+ 		set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+ 		return;
+ 	}
+ 
+ 	status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
+ 					      fault->pasid,
+ 					      fault->address,
+ 					      fault->flags);
+ 	switch (status) {
+ 	case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
+ 		set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
+ 		break;
+ 	case AMD_IOMMU_INV_PRI_RSP_INVALID:
+ 		set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+ 		break;
+ 	case AMD_IOMMU_INV_PRI_RSP_FAIL:
+ 		set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
+ 		break;
+ 	default:
+ 		BUG();
+ 	}
+ }
+ 
+ static bool access_error(struct vm_area_struct *vma, struct fault *fault)
+ {
+ 	unsigned long requested = 0;
+ 
+ 	if (fault->flags & PPR_FAULT_EXEC)
+ 		requested |= VM_EXEC;
+ 
+ 	if (fault->flags & PPR_FAULT_READ)
+ 		requested |= VM_READ;
+ 
+ 	if (fault->flags & PPR_FAULT_WRITE)
+ 		requested |= VM_WRITE;
+ 
+ 	return (requested & ~vma->vm_flags) != 0;
+ }
+ 
+ static void do_fault(struct work_struct *work)
+ {
+ 	struct fault *fault = container_of(work, struct fault, work);
+ 	struct vm_area_struct *vma;
+ 	vm_fault_t ret = VM_FAULT_ERROR;
+ 	unsigned int flags = 0;
+ 	struct mm_struct *mm;
+ 	u64 address;
+ 
+ 	mm = fault->state->mm;
+ 	address = fault->address;
+ 
+ 	if (fault->flags & PPR_FAULT_USER)
+ 		flags |= FAULT_FLAG_USER;
+ 	if (fault->flags & PPR_FAULT_WRITE)
+ 		flags |= FAULT_FLAG_WRITE;
+ 	flags |= FAULT_FLAG_REMOTE;
+ 
 -	down_read(&mm->mmap_sem);
++	mmap_read_lock(mm);
+ 	vma = find_extend_vma(mm, address);
+ 	if (!vma || address < vma->vm_start)
+ 		/* failed to get a vma in the right range */
+ 		goto out;
+ 
+ 	/* Check if we have the right permissions on the vma */
+ 	if (access_error(vma, fault))
+ 		goto out;
+ 
+ 	ret = handle_mm_fault(vma, address, flags);
+ out:
 -	up_read(&mm->mmap_sem);
++	mmap_read_unlock(mm);
+ 
+ 	if (ret & VM_FAULT_ERROR)
+ 		/* failed to service fault */
+ 		handle_fault_error(fault);
+ 
+ 	finish_pri_tag(fault->dev_state, fault->state, fault->tag);
+ 
+ 	put_pasid_state(fault->state);
+ 
+ 	kfree(fault);
+ }
+ 
+ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
+ {
+ 	struct amd_iommu_fault *iommu_fault;
+ 	struct pasid_state *pasid_state;
+ 	struct device_state *dev_state;
+ 	struct pci_dev *pdev = NULL;
+ 	unsigned long flags;
+ 	struct fault *fault;
+ 	bool finish;
+ 	u16 tag, devid;
+ 	int ret;
+ 
+ 	iommu_fault = data;
+ 	tag         = iommu_fault->tag & 0x1ff;
+ 	finish      = (iommu_fault->tag >> 9) & 1;
+ 
+ 	devid = iommu_fault->device_id;
+ 	pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
+ 					   devid & 0xff);
+ 	if (!pdev)
+ 		return -ENODEV;
+ 
+ 	ret = NOTIFY_DONE;
+ 
+ 	/* In kdump kernel pci dev is not initialized yet -> send INVALID */
+ 	if (amd_iommu_is_attach_deferred(NULL, &pdev->dev)) {
+ 		amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
+ 				       PPR_INVALID, tag);
+ 		goto out;
+ 	}
+ 
+ 	dev_state = get_device_state(iommu_fault->device_id);
+ 	if (dev_state == NULL)
+ 		goto out;
+ 
+ 	pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
+ 	if (pasid_state == NULL || pasid_state->invalid) {
+ 		/* We know the device but not the PASID -> send INVALID */
+ 		amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
+ 				       PPR_INVALID, tag);
+ 		goto out_drop_state;
+ 	}
+ 
+ 	spin_lock_irqsave(&pasid_state->lock, flags);
+ 	atomic_inc(&pasid_state->pri[tag].inflight);
+ 	if (finish)
+ 		pasid_state->pri[tag].finish = true;
+ 	spin_unlock_irqrestore(&pasid_state->lock, flags);
+ 
+ 	fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
+ 	if (fault == NULL) {
+ 		/* We are OOM - send success and let the device re-fault */
+ 		finish_pri_tag(dev_state, pasid_state, tag);
+ 		goto out_drop_state;
+ 	}
+ 
+ 	fault->dev_state = dev_state;
+ 	fault->address   = iommu_fault->address;
+ 	fault->state     = pasid_state;
+ 	fault->tag       = tag;
+ 	fault->finish    = finish;
+ 	fault->pasid     = iommu_fault->pasid;
+ 	fault->flags     = iommu_fault->flags;
+ 	INIT_WORK(&fault->work, do_fault);
+ 
+ 	queue_work(iommu_wq, &fault->work);
+ 
+ 	ret = NOTIFY_OK;
+ 
+ out_drop_state:
+ 
+ 	if (ret != NOTIFY_OK && pasid_state)
+ 		put_pasid_state(pasid_state);
+ 
+ 	put_device_state(dev_state);
+ 
+ out:
+ 	return ret;
+ }
+ 
+ static struct notifier_block ppr_nb = {
+ 	.notifier_call = ppr_notifier,
+ };
+ 
+ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+ 			 struct task_struct *task)
+ {
+ 	struct pasid_state *pasid_state;
+ 	struct device_state *dev_state;
+ 	struct mm_struct *mm;
+ 	u16 devid;
+ 	int ret;
+ 
+ 	might_sleep();
+ 
+ 	if (!amd_iommu_v2_supported())
+ 		return -ENODEV;
+ 
+ 	devid     = device_id(pdev);
+ 	dev_state = get_device_state(devid);
+ 
+ 	if (dev_state == NULL)
+ 		return -EINVAL;
+ 
+ 	ret = -EINVAL;
+ 	if (pasid < 0 || pasid >= dev_state->max_pasids)
+ 		goto out;
+ 
+ 	ret = -ENOMEM;
+ 	pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
+ 	if (pasid_state == NULL)
+ 		goto out;
+ 
+ 
+ 	atomic_set(&pasid_state->count, 1);
+ 	init_waitqueue_head(&pasid_state->wq);
+ 	spin_lock_init(&pasid_state->lock);
+ 
+ 	mm                        = get_task_mm(task);
+ 	pasid_state->mm           = mm;
+ 	pasid_state->device_state = dev_state;
+ 	pasid_state->pasid        = pasid;
+ 	pasid_state->invalid      = true; /* Mark as valid only if we are
+ 					     done with setting up the pasid */
+ 	pasid_state->mn.ops       = &iommu_mn;
+ 
+ 	if (pasid_state->mm == NULL)
+ 		goto out_free;
+ 
+ 	mmu_notifier_register(&pasid_state->mn, mm);
+ 
+ 	ret = set_pasid_state(dev_state, pasid_state, pasid);
+ 	if (ret)
+ 		goto out_unregister;
+ 
+ 	ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
+ 					__pa(pasid_state->mm->pgd));
+ 	if (ret)
+ 		goto out_clear_state;
+ 
+ 	/* Now we are ready to handle faults */
+ 	pasid_state->invalid = false;
+ 
+ 	/*
+ 	 * Drop the reference to the mm_struct here. We rely on the
+ 	 * mmu_notifier release call-back to inform us when the mm
+ 	 * is going away.
+ 	 */
+ 	mmput(mm);
+ 
+ 	return 0;
+ 
+ out_clear_state:
+ 	clear_pasid_state(dev_state, pasid);
+ 
+ out_unregister:
+ 	mmu_notifier_unregister(&pasid_state->mn, mm);
+ 	mmput(mm);
+ 
+ out_free:
+ 	free_pasid_state(pasid_state);
+ 
+ out:
+ 	put_device_state(dev_state);
+ 
+ 	return ret;
+ }
+ EXPORT_SYMBOL(amd_iommu_bind_pasid);
+ 
+ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
+ {
+ 	struct pasid_state *pasid_state;
+ 	struct device_state *dev_state;
+ 	u16 devid;
+ 
+ 	might_sleep();
+ 
+ 	if (!amd_iommu_v2_supported())
+ 		return;
+ 
+ 	devid = device_id(pdev);
+ 	dev_state = get_device_state(devid);
+ 	if (dev_state == NULL)
+ 		return;
+ 
+ 	if (pasid < 0 || pasid >= dev_state->max_pasids)
+ 		goto out;
+ 
+ 	pasid_state = get_pasid_state(dev_state, pasid);
+ 	if (pasid_state == NULL)
+ 		goto out;
+ 	/*
+ 	 * Drop reference taken here. We are safe because we still hold
+ 	 * the reference taken in the amd_iommu_bind_pasid function.
+ 	 */
+ 	put_pasid_state(pasid_state);
+ 
+ 	/* Clear the pasid state so that the pasid can be re-used */
+ 	clear_pasid_state(dev_state, pasid_state->pasid);
+ 
+ 	/*
+ 	 * Call mmu_notifier_unregister to drop our reference
+ 	 * to pasid_state->mm
+ 	 */
+ 	mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+ 
+ 	put_pasid_state_wait(pasid_state); /* Reference taken in
+ 					      amd_iommu_bind_pasid */
+ out:
+ 	/* Drop reference taken in this function */
+ 	put_device_state(dev_state);
+ 
+ 	/* Drop reference taken in amd_iommu_bind_pasid */
+ 	put_device_state(dev_state);
+ }
+ EXPORT_SYMBOL(amd_iommu_unbind_pasid);
+ 
+ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
+ {
+ 	struct device_state *dev_state;
+ 	struct iommu_group *group;
+ 	unsigned long flags;
+ 	int ret, tmp;
+ 	u16 devid;
+ 
+ 	might_sleep();
+ 
+ 	if (!amd_iommu_v2_supported())
+ 		return -ENODEV;
+ 
+ 	if (pasids <= 0 || pasids > (PASID_MASK + 1))
+ 		return -EINVAL;
+ 
+ 	devid = device_id(pdev);
+ 
+ 	dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
+ 	if (dev_state == NULL)
+ 		return -ENOMEM;
+ 
+ 	spin_lock_init(&dev_state->lock);
+ 	init_waitqueue_head(&dev_state->wq);
+ 	dev_state->pdev  = pdev;
+ 	dev_state->devid = devid;
+ 
+ 	tmp = pasids;
+ 	for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
+ 		dev_state->pasid_levels += 1;
+ 
+ 	atomic_set(&dev_state->count, 1);
+ 	dev_state->max_pasids = pasids;
+ 
+ 	ret = -ENOMEM;
+ 	dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
+ 	if (dev_state->states == NULL)
+ 		goto out_free_dev_state;
+ 
+ 	dev_state->domain = iommu_domain_alloc(&pci_bus_type);
+ 	if (dev_state->domain == NULL)
+ 		goto out_free_states;
+ 
+ 	amd_iommu_domain_direct_map(dev_state->domain);
+ 
+ 	ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
+ 	if (ret)
+ 		goto out_free_domain;
+ 
+ 	group = iommu_group_get(&pdev->dev);
+ 	if (!group) {
+ 		ret = -EINVAL;
+ 		goto out_free_domain;
+ 	}
+ 
+ 	ret = iommu_attach_group(dev_state->domain, group);
+ 	if (ret != 0)
+ 		goto out_drop_group;
+ 
+ 	iommu_group_put(group);
+ 
+ 	spin_lock_irqsave(&state_lock, flags);
+ 
+ 	if (__get_device_state(devid) != NULL) {
+ 		spin_unlock_irqrestore(&state_lock, flags);
+ 		ret = -EBUSY;
+ 		goto out_free_domain;
+ 	}
+ 
+ 	list_add_tail(&dev_state->list, &state_list);
+ 
+ 	spin_unlock_irqrestore(&state_lock, flags);
+ 
+ 	return 0;
+ 
+ out_drop_group:
+ 	iommu_group_put(group);
+ 
+ out_free_domain:
+ 	iommu_domain_free(dev_state->domain);
+ 
+ out_free_states:
+ 	free_page((unsigned long)dev_state->states);
+ 
+ out_free_dev_state:
+ 	kfree(dev_state);
+ 
+ 	return ret;
+ }
+ EXPORT_SYMBOL(amd_iommu_init_device);
+ 
+ void amd_iommu_free_device(struct pci_dev *pdev)
+ {
+ 	struct device_state *dev_state;
+ 	unsigned long flags;
+ 	u16 devid;
+ 
+ 	if (!amd_iommu_v2_supported())
+ 		return;
+ 
+ 	devid = device_id(pdev);
+ 
+ 	spin_lock_irqsave(&state_lock, flags);
+ 
+ 	dev_state = __get_device_state(devid);
+ 	if (dev_state == NULL) {
+ 		spin_unlock_irqrestore(&state_lock, flags);
+ 		return;
+ 	}
+ 
+ 	list_del(&dev_state->list);
+ 
+ 	spin_unlock_irqrestore(&state_lock, flags);
+ 
+ 	/* Get rid of any remaining pasid states */
+ 	free_pasid_states(dev_state);
+ 
+ 	put_device_state(dev_state);
+ 	/*
+ 	 * Wait until the last reference is dropped before freeing
+ 	 * the device state.
+ 	 */
+ 	wait_event(dev_state->wq, !atomic_read(&dev_state->count));
+ 	free_device_state(dev_state);
+ }
+ EXPORT_SYMBOL(amd_iommu_free_device);
+ 
+ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+ 				 amd_iommu_invalid_ppr_cb cb)
+ {
+ 	struct device_state *dev_state;
+ 	unsigned long flags;
+ 	u16 devid;
+ 	int ret;
+ 
+ 	if (!amd_iommu_v2_supported())
+ 		return -ENODEV;
+ 
+ 	devid = device_id(pdev);
+ 
+ 	spin_lock_irqsave(&state_lock, flags);
+ 
+ 	ret = -EINVAL;
+ 	dev_state = __get_device_state(devid);
+ 	if (dev_state == NULL)
+ 		goto out_unlock;
+ 
+ 	dev_state->inv_ppr_cb = cb;
+ 
+ 	ret = 0;
+ 
+ out_unlock:
+ 	spin_unlock_irqrestore(&state_lock, flags);
+ 
+ 	return ret;
+ }
+ EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
+ 
+ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
+ 				    amd_iommu_invalidate_ctx cb)
+ {
+ 	struct device_state *dev_state;
+ 	unsigned long flags;
+ 	u16 devid;
+ 	int ret;
+ 
+ 	if (!amd_iommu_v2_supported())
+ 		return -ENODEV;
+ 
+ 	devid = device_id(pdev);
+ 
+ 	spin_lock_irqsave(&state_lock, flags);
+ 
+ 	ret = -EINVAL;
+ 	dev_state = __get_device_state(devid);
+ 	if (dev_state == NULL)
+ 		goto out_unlock;
+ 
+ 	dev_state->inv_ctx_cb = cb;
+ 
+ 	ret = 0;
+ 
+ out_unlock:
+ 	spin_unlock_irqrestore(&state_lock, flags);
+ 
+ 	return ret;
+ }
+ EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
+ 
+ static int __init amd_iommu_v2_init(void)
+ {
+ 	int ret;
+ 
+ 	pr_info("AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>\n");
+ 
+ 	if (!amd_iommu_v2_supported()) {
+ 		pr_info("AMD IOMMUv2 functionality not available on this system\n");
+ 		/*
+ 		 * Load anyway to provide the symbols to other modules
+ 		 * which may use AMD IOMMUv2 optionally.
+ 		 */
+ 		return 0;
+ 	}
+ 
+ 	spin_lock_init(&state_lock);
+ 
+ 	ret = -ENOMEM;
+ 	iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
+ 	if (iommu_wq == NULL)
+ 		goto out;
+ 
+ 	amd_iommu_register_ppr_notifier(&ppr_nb);
+ 
+ 	return 0;
+ 
+ out:
+ 	return ret;
+ }
+ 
+ static void __exit amd_iommu_v2_exit(void)
+ {
+ 	struct device_state *dev_state;
+ 	int i;
+ 
+ 	if (!amd_iommu_v2_supported())
+ 		return;
+ 
+ 	amd_iommu_unregister_ppr_notifier(&ppr_nb);
+ 
+ 	flush_workqueue(iommu_wq);
+ 
+ 	/*
+ 	 * The loop below might call flush_workqueue(), so call
+ 	 * destroy_workqueue() after it
+ 	 */
+ 	for (i = 0; i < MAX_DEVICES; ++i) {
+ 		dev_state = get_device_state(i);
+ 
+ 		if (dev_state == NULL)
+ 			continue;
+ 
+ 		WARN_ON_ONCE(1);
+ 
+ 		put_device_state(dev_state);
+ 		amd_iommu_free_device(dev_state->pdev);
+ 	}
+ 
+ 	destroy_workqueue(iommu_wq);
+ }
+ 
+ module_init(amd_iommu_v2_init);
+ module_exit(amd_iommu_v2_exit);
diff --cc drivers/iommu/intel/svm.c
index 0000000000000,a035ef911fba7..6c87c807a0abb
mode 000000,100644..100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@@ -1,0 -1,1002 +1,1002 @@@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+  * Copyright Â© 2015 Intel Corporation.
+  *
+  * Authors: David Woodhouse <dwmw2@infradead.org>
+  */
+ 
+ #include <linux/intel-iommu.h>
+ #include <linux/mmu_notifier.h>
+ #include <linux/sched.h>
+ #include <linux/sched/mm.h>
+ #include <linux/slab.h>
+ #include <linux/intel-svm.h>
+ #include <linux/rculist.h>
+ #include <linux/pci.h>
+ #include <linux/pci-ats.h>
+ #include <linux/dmar.h>
+ #include <linux/interrupt.h>
+ #include <linux/mm_types.h>
+ #include <linux/ioasid.h>
+ #include <asm/page.h>
+ 
+ #include "intel-pasid.h"
+ 
+ static irqreturn_t prq_event_thread(int irq, void *d);
+ static void intel_svm_drain_prq(struct device *dev, int pasid);
+ 
+ #define PRQ_ORDER 0
+ 
+ int intel_svm_enable_prq(struct intel_iommu *iommu)
+ {
+ 	struct page *pages;
+ 	int irq, ret;
+ 
+ 	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+ 	if (!pages) {
+ 		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
+ 			iommu->name);
+ 		return -ENOMEM;
+ 	}
+ 	iommu->prq = page_address(pages);
+ 
+ 	irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
+ 	if (irq <= 0) {
+ 		pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
+ 		       iommu->name);
+ 		ret = -EINVAL;
+ 	err:
+ 		free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ 		iommu->prq = NULL;
+ 		return ret;
+ 	}
+ 	iommu->pr_irq = irq;
+ 
+ 	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
+ 
+ 	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
+ 				   iommu->prq_name, iommu);
+ 	if (ret) {
+ 		pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
+ 		       iommu->name);
+ 		dmar_free_hwirq(irq);
+ 		iommu->pr_irq = 0;
+ 		goto err;
+ 	}
+ 	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+ 	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+ 	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
+ 
+ 	init_completion(&iommu->prq_complete);
+ 
+ 	return 0;
+ }
+ 
+ int intel_svm_finish_prq(struct intel_iommu *iommu)
+ {
+ 	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+ 	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+ 	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
+ 
+ 	if (iommu->pr_irq) {
+ 		free_irq(iommu->pr_irq, iommu);
+ 		dmar_free_hwirq(iommu->pr_irq);
+ 		iommu->pr_irq = 0;
+ 	}
+ 
+ 	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ 	iommu->prq = NULL;
+ 
+ 	return 0;
+ }
+ 
+ static inline bool intel_svm_capable(struct intel_iommu *iommu)
+ {
+ 	return iommu->flags & VTD_FLAG_SVM_CAPABLE;
+ }
+ 
+ void intel_svm_check(struct intel_iommu *iommu)
+ {
+ 	if (!pasid_supported(iommu))
+ 		return;
+ 
+ 	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
+ 	    !cap_fl1gp_support(iommu->cap)) {
+ 		pr_err("%s SVM disabled, incompatible 1GB page capability\n",
+ 		       iommu->name);
+ 		return;
+ 	}
+ 
+ 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
+ 	    !cap_5lp_support(iommu->cap)) {
+ 		pr_err("%s SVM disabled, incompatible paging mode\n",
+ 		       iommu->name);
+ 		return;
+ 	}
+ 
+ 	iommu->flags |= VTD_FLAG_SVM_CAPABLE;
+ }
+ 
+ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
+ 				unsigned long address, unsigned long pages, int ih)
+ {
+ 	struct qi_desc desc;
+ 
+ 	if (pages == -1) {
+ 		desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+ 			QI_EIOTLB_DID(sdev->did) |
+ 			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ 			QI_EIOTLB_TYPE;
+ 		desc.qw1 = 0;
+ 	} else {
+ 		int mask = ilog2(__roundup_pow_of_two(pages));
+ 
+ 		desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+ 				QI_EIOTLB_DID(sdev->did) |
+ 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+ 				QI_EIOTLB_TYPE;
+ 		desc.qw1 = QI_EIOTLB_ADDR(address) |
+ 				QI_EIOTLB_IH(ih) |
+ 				QI_EIOTLB_AM(mask);
+ 	}
+ 	desc.qw2 = 0;
+ 	desc.qw3 = 0;
+ 	qi_submit_sync(svm->iommu, &desc, 1, 0);
+ 
+ 	if (sdev->dev_iotlb) {
+ 		desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
+ 				QI_DEV_EIOTLB_SID(sdev->sid) |
+ 				QI_DEV_EIOTLB_QDEP(sdev->qdep) |
+ 				QI_DEIOTLB_TYPE;
+ 		if (pages == -1) {
+ 			desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
+ 					QI_DEV_EIOTLB_SIZE;
+ 		} else if (pages > 1) {
+ 			/* The least significant zero bit indicates the size. So,
+ 			 * for example, an "address" value of 0x12345f000 will
+ 			 * flush from 0x123440000 to 0x12347ffff (256KiB). */
+ 			unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
+ 			unsigned long mask = __rounddown_pow_of_two(address ^ last);
+ 
+ 			desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
+ 					(mask - 1)) | QI_DEV_EIOTLB_SIZE;
+ 		} else {
+ 			desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
+ 		}
+ 		desc.qw2 = 0;
+ 		desc.qw3 = 0;
+ 		qi_submit_sync(svm->iommu, &desc, 1, 0);
+ 	}
+ }
+ 
+ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
+ 				unsigned long pages, int ih)
+ {
+ 	struct intel_svm_dev *sdev;
+ 
+ 	rcu_read_lock();
+ 	list_for_each_entry_rcu(sdev, &svm->devs, list)
+ 		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
+ 	rcu_read_unlock();
+ }
+ 
+ /* Pages have been freed at this point */
+ static void intel_invalidate_range(struct mmu_notifier *mn,
+ 				   struct mm_struct *mm,
+ 				   unsigned long start, unsigned long end)
+ {
+ 	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+ 
+ 	intel_flush_svm_range(svm, start,
+ 			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
+ }
+ 
+ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+ {
+ 	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+ 	struct intel_svm_dev *sdev;
+ 
+ 	/* This might end up being called from exit_mmap(), *before* the page
+ 	 * tables are cleared. And __mmu_notifier_release() will delete us from
+ 	 * the list of notifiers so that our invalidate_range() callback doesn't
+ 	 * get called when the page tables are cleared. So we need to protect
+ 	 * against hardware accessing those page tables.
+ 	 *
+ 	 * We do it by clearing the entry in the PASID table and then flushing
+ 	 * the IOTLB and the PASID table caches. This might upset hardware;
+ 	 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
+ 	 * page) so that we end up taking a fault that the hardware really
+ 	 * *has* to handle gracefully without affecting other processes.
+ 	 */
+ 	rcu_read_lock();
+ 	list_for_each_entry_rcu(sdev, &svm->devs, list)
+ 		intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
+ 					    svm->pasid, true);
+ 	rcu_read_unlock();
+ 
+ }
+ 
+ static const struct mmu_notifier_ops intel_mmuops = {
+ 	.release = intel_mm_release,
+ 	.invalidate_range = intel_invalidate_range,
+ };
+ 
+ static DEFINE_MUTEX(pasid_mutex);
+ static LIST_HEAD(global_svm_list);
+ 
+ #define for_each_svm_dev(sdev, svm, d)			\
+ 	list_for_each_entry((sdev), &(svm)->devs, list)	\
+ 		if ((d) != (sdev)->dev) {} else
+ 
+ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
+ 			  struct iommu_gpasid_bind_data *data)
+ {
+ 	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ 	struct dmar_domain *dmar_domain;
+ 	struct intel_svm_dev *sdev;
+ 	struct intel_svm *svm;
+ 	int ret = 0;
+ 
+ 	if (WARN_ON(!iommu) || !data)
+ 		return -EINVAL;
+ 
+ 	if (data->version != IOMMU_GPASID_BIND_VERSION_1 ||
+ 	    data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
+ 		return -EINVAL;
+ 
+ 	if (!dev_is_pci(dev))
+ 		return -ENOTSUPP;
+ 
+ 	/* VT-d supports devices with full 20 bit PASIDs only */
+ 	if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX)
+ 		return -EINVAL;
+ 
+ 	/*
+ 	 * We only check host PASID range, we have no knowledge to check
+ 	 * guest PASID range.
+ 	 */
+ 	if (data->hpasid <= 0 || data->hpasid >= PASID_MAX)
+ 		return -EINVAL;
+ 
+ 	dmar_domain = to_dmar_domain(domain);
+ 
+ 	mutex_lock(&pasid_mutex);
+ 	svm = ioasid_find(NULL, data->hpasid, NULL);
+ 	if (IS_ERR(svm)) {
+ 		ret = PTR_ERR(svm);
+ 		goto out;
+ 	}
+ 
+ 	if (svm) {
+ 		/*
+ 		 * If we found svm for the PASID, there must be at
+ 		 * least one device bond, otherwise svm should be freed.
+ 		 */
+ 		if (WARN_ON(list_empty(&svm->devs))) {
+ 			ret = -EINVAL;
+ 			goto out;
+ 		}
+ 
+ 		for_each_svm_dev(sdev, svm, dev) {
+ 			/*
+ 			 * For devices with aux domains, we should allow
+ 			 * multiple bind calls with the same PASID and pdev.
+ 			 */
+ 			if (iommu_dev_feature_enabled(dev,
+ 						      IOMMU_DEV_FEAT_AUX)) {
+ 				sdev->users++;
+ 			} else {
+ 				dev_warn_ratelimited(dev,
+ 						     "Already bound with PASID %u\n",
+ 						     svm->pasid);
+ 				ret = -EBUSY;
+ 			}
+ 			goto out;
+ 		}
+ 	} else {
+ 		/* We come here when PASID has never been bond to a device. */
+ 		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ 		if (!svm) {
+ 			ret = -ENOMEM;
+ 			goto out;
+ 		}
+ 		/* REVISIT: upper layer/VFIO can track host process that bind
+ 		 * the PASID. ioasid_set = mm might be sufficient for vfio to
+ 		 * check pasid VMM ownership. We can drop the following line
+ 		 * once VFIO and IOASID set check is in place.
+ 		 */
+ 		svm->mm = get_task_mm(current);
+ 		svm->pasid = data->hpasid;
+ 		if (data->flags & IOMMU_SVA_GPASID_VAL) {
+ 			svm->gpasid = data->gpasid;
+ 			svm->flags |= SVM_FLAG_GUEST_PASID;
+ 		}
+ 		ioasid_set_data(data->hpasid, svm);
+ 		INIT_LIST_HEAD_RCU(&svm->devs);
+ 		mmput(svm->mm);
+ 	}
+ 	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ 	if (!sdev) {
+ 		ret = -ENOMEM;
+ 		goto out;
+ 	}
+ 	sdev->dev = dev;
+ 
+ 	/* Only count users if device has aux domains */
+ 	if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
+ 		sdev->users = 1;
+ 
+ 	/* Set up device context entry for PASID if not enabled already */
+ 	ret = intel_iommu_enable_pasid(iommu, sdev->dev);
+ 	if (ret) {
+ 		dev_err_ratelimited(dev, "Failed to enable PASID capability\n");
+ 		kfree(sdev);
+ 		goto out;
+ 	}
+ 
+ 	/*
+ 	 * PASID table is per device for better security. Therefore, for
+ 	 * each bind of a new device even with an existing PASID, we need to
+ 	 * call the nested mode setup function here.
+ 	 */
+ 	spin_lock(&iommu->lock);
+ 	ret = intel_pasid_setup_nested(iommu, dev,
+ 				       (pgd_t *)(uintptr_t)data->gpgd,
+ 				       data->hpasid, &data->vtd, dmar_domain,
+ 				       data->addr_width);
+ 	spin_unlock(&iommu->lock);
+ 	if (ret) {
+ 		dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n",
+ 				    data->hpasid, ret);
+ 		/*
+ 		 * PASID entry should be in cleared state if nested mode
+ 		 * set up failed. So we only need to clear IOASID tracking
+ 		 * data such that free call will succeed.
+ 		 */
+ 		kfree(sdev);
+ 		goto out;
+ 	}
+ 
+ 	svm->flags |= SVM_FLAG_GUEST_MODE;
+ 
+ 	init_rcu_head(&sdev->rcu);
+ 	list_add_rcu(&sdev->list, &svm->devs);
+  out:
+ 	if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
+ 		ioasid_set_data(data->hpasid, NULL);
+ 		kfree(svm);
+ 	}
+ 
+ 	mutex_unlock(&pasid_mutex);
+ 	return ret;
+ }
+ 
+ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
+ {
+ 	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ 	struct intel_svm_dev *sdev;
+ 	struct intel_svm *svm;
+ 	int ret = -EINVAL;
+ 
+ 	if (WARN_ON(!iommu))
+ 		return -EINVAL;
+ 
+ 	mutex_lock(&pasid_mutex);
+ 	svm = ioasid_find(NULL, pasid, NULL);
+ 	if (!svm) {
+ 		ret = -EINVAL;
+ 		goto out;
+ 	}
+ 
+ 	if (IS_ERR(svm)) {
+ 		ret = PTR_ERR(svm);
+ 		goto out;
+ 	}
+ 
+ 	for_each_svm_dev(sdev, svm, dev) {
+ 		ret = 0;
+ 		if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
+ 			sdev->users--;
+ 		if (!sdev->users) {
+ 			list_del_rcu(&sdev->list);
+ 			intel_pasid_tear_down_entry(iommu, dev,
+ 						    svm->pasid, false);
+ 			intel_svm_drain_prq(dev, svm->pasid);
+ 			kfree_rcu(sdev, rcu);
+ 
+ 			if (list_empty(&svm->devs)) {
+ 				/*
+ 				 * We do not free the IOASID here in that
+ 				 * IOMMU driver did not allocate it.
+ 				 * Unlike native SVM, IOASID for guest use was
+ 				 * allocated prior to the bind call.
+ 				 * In any case, if the free call comes before
+ 				 * the unbind, IOMMU driver will get notified
+ 				 * and perform cleanup.
+ 				 */
+ 				ioasid_set_data(pasid, NULL);
+ 				kfree(svm);
+ 			}
+ 		}
+ 		break;
+ 	}
+ out:
+ 	mutex_unlock(&pasid_mutex);
+ 	return ret;
+ }
+ 
+ /* Caller must hold pasid_mutex, mm reference */
+ static int
+ intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
+ 		  struct mm_struct *mm, struct intel_svm_dev **sd)
+ {
+ 	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ 	struct device_domain_info *info;
+ 	struct intel_svm_dev *sdev;
+ 	struct intel_svm *svm = NULL;
+ 	int pasid_max;
+ 	int ret;
+ 
+ 	if (!iommu || dmar_disabled)
+ 		return -EINVAL;
+ 
+ 	if (!intel_svm_capable(iommu))
+ 		return -ENOTSUPP;
+ 
+ 	if (dev_is_pci(dev)) {
+ 		pasid_max = pci_max_pasids(to_pci_dev(dev));
+ 		if (pasid_max < 0)
+ 			return -EINVAL;
+ 	} else
+ 		pasid_max = 1 << 20;
+ 
+ 	/* Bind supervisor PASID shuld have mm = NULL */
+ 	if (flags & SVM_FLAG_SUPERVISOR_MODE) {
+ 		if (!ecap_srs(iommu->ecap) || mm) {
+ 			pr_err("Supervisor PASID with user provided mm.\n");
+ 			return -EINVAL;
+ 		}
+ 	}
+ 
+ 	if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
+ 		struct intel_svm *t;
+ 
+ 		list_for_each_entry(t, &global_svm_list, list) {
+ 			if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
+ 				continue;
+ 
+ 			svm = t;
+ 			if (svm->pasid >= pasid_max) {
+ 				dev_warn(dev,
+ 					 "Limited PASID width. Cannot use existing PASID %d\n",
+ 					 svm->pasid);
+ 				ret = -ENOSPC;
+ 				goto out;
+ 			}
+ 
+ 			/* Find the matching device in svm list */
+ 			for_each_svm_dev(sdev, svm, dev) {
+ 				if (sdev->ops != ops) {
+ 					ret = -EBUSY;
+ 					goto out;
+ 				}
+ 				sdev->users++;
+ 				goto success;
+ 			}
+ 
+ 			break;
+ 		}
+ 	}
+ 
+ 	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ 	if (!sdev) {
+ 		ret = -ENOMEM;
+ 		goto out;
+ 	}
+ 	sdev->dev = dev;
+ 
+ 	ret = intel_iommu_enable_pasid(iommu, dev);
+ 	if (ret) {
+ 		kfree(sdev);
+ 		goto out;
+ 	}
+ 
+ 	info = get_domain_info(dev);
+ 	sdev->did = FLPT_DEFAULT_DID;
+ 	sdev->sid = PCI_DEVID(info->bus, info->devfn);
+ 	if (info->ats_enabled) {
+ 		sdev->dev_iotlb = 1;
+ 		sdev->qdep = info->ats_qdep;
+ 		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
+ 			sdev->qdep = 0;
+ 	}
+ 
+ 	/* Finish the setup now we know we're keeping it */
+ 	sdev->users = 1;
+ 	sdev->ops = ops;
+ 	init_rcu_head(&sdev->rcu);
+ 
+ 	if (!svm) {
+ 		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ 		if (!svm) {
+ 			ret = -ENOMEM;
+ 			kfree(sdev);
+ 			goto out;
+ 		}
+ 		svm->iommu = iommu;
+ 
+ 		if (pasid_max > intel_pasid_max_id)
+ 			pasid_max = intel_pasid_max_id;
+ 
+ 		/* Do not use PASID 0, reserved for RID to PASID */
+ 		svm->pasid = ioasid_alloc(NULL, PASID_MIN,
+ 					  pasid_max - 1, svm);
+ 		if (svm->pasid == INVALID_IOASID) {
+ 			kfree(svm);
+ 			kfree(sdev);
+ 			ret = -ENOSPC;
+ 			goto out;
+ 		}
+ 		svm->notifier.ops = &intel_mmuops;
+ 		svm->mm = mm;
+ 		svm->flags = flags;
+ 		INIT_LIST_HEAD_RCU(&svm->devs);
+ 		INIT_LIST_HEAD(&svm->list);
+ 		ret = -ENOMEM;
+ 		if (mm) {
+ 			ret = mmu_notifier_register(&svm->notifier, mm);
+ 			if (ret) {
+ 				ioasid_free(svm->pasid);
+ 				kfree(svm);
+ 				kfree(sdev);
+ 				goto out;
+ 			}
+ 		}
+ 
+ 		spin_lock(&iommu->lock);
+ 		ret = intel_pasid_setup_first_level(iommu, dev,
+ 				mm ? mm->pgd : init_mm.pgd,
+ 				svm->pasid, FLPT_DEFAULT_DID,
+ 				(mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
+ 				(cpu_feature_enabled(X86_FEATURE_LA57) ?
+ 				 PASID_FLAG_FL5LP : 0));
+ 		spin_unlock(&iommu->lock);
+ 		if (ret) {
+ 			if (mm)
+ 				mmu_notifier_unregister(&svm->notifier, mm);
+ 			ioasid_free(svm->pasid);
+ 			kfree(svm);
+ 			kfree(sdev);
+ 			goto out;
+ 		}
+ 
+ 		list_add_tail(&svm->list, &global_svm_list);
+ 	} else {
+ 		/*
+ 		 * Binding a new device with existing PASID, need to setup
+ 		 * the PASID entry.
+ 		 */
+ 		spin_lock(&iommu->lock);
+ 		ret = intel_pasid_setup_first_level(iommu, dev,
+ 						mm ? mm->pgd : init_mm.pgd,
+ 						svm->pasid, FLPT_DEFAULT_DID,
+ 						(mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
+ 						(cpu_feature_enabled(X86_FEATURE_LA57) ?
+ 						PASID_FLAG_FL5LP : 0));
+ 		spin_unlock(&iommu->lock);
+ 		if (ret) {
+ 			kfree(sdev);
+ 			goto out;
+ 		}
+ 	}
+ 	list_add_rcu(&sdev->list, &svm->devs);
+ success:
+ 	sdev->pasid = svm->pasid;
+ 	sdev->sva.dev = dev;
+ 	if (sd)
+ 		*sd = sdev;
+ 	ret = 0;
+  out:
+ 	return ret;
+ }
+ 
+ /* Caller must hold pasid_mutex */
+ static int intel_svm_unbind_mm(struct device *dev, int pasid)
+ {
+ 	struct intel_svm_dev *sdev;
+ 	struct intel_iommu *iommu;
+ 	struct intel_svm *svm;
+ 	int ret = -EINVAL;
+ 
+ 	iommu = intel_svm_device_to_iommu(dev);
+ 	if (!iommu)
+ 		goto out;
+ 
+ 	svm = ioasid_find(NULL, pasid, NULL);
+ 	if (!svm)
+ 		goto out;
+ 
+ 	if (IS_ERR(svm)) {
+ 		ret = PTR_ERR(svm);
+ 		goto out;
+ 	}
+ 
+ 	for_each_svm_dev(sdev, svm, dev) {
+ 		ret = 0;
+ 		sdev->users--;
+ 		if (!sdev->users) {
+ 			list_del_rcu(&sdev->list);
+ 			/* Flush the PASID cache and IOTLB for this device.
+ 			 * Note that we do depend on the hardware *not* using
+ 			 * the PASID any more. Just as we depend on other
+ 			 * devices never using PASIDs that they have no right
+ 			 * to use. We have a *shared* PASID table, because it's
+ 			 * large and has to be physically contiguous. So it's
+ 			 * hard to be as defensive as we might like. */
+ 			intel_pasid_tear_down_entry(iommu, dev,
+ 						    svm->pasid, false);
+ 			intel_svm_drain_prq(dev, svm->pasid);
+ 			kfree_rcu(sdev, rcu);
+ 
+ 			if (list_empty(&svm->devs)) {
+ 				ioasid_free(svm->pasid);
+ 				if (svm->mm)
+ 					mmu_notifier_unregister(&svm->notifier, svm->mm);
+ 				list_del(&svm->list);
+ 				/* We mandate that no page faults may be outstanding
+ 				 * for the PASID when intel_svm_unbind_mm() is called.
+ 				 * If that is not obeyed, subtle errors will happen.
+ 				 * Let's make them less subtle... */
+ 				memset(svm, 0x6b, sizeof(*svm));
+ 				kfree(svm);
+ 			}
+ 		}
+ 		break;
+ 	}
+  out:
+ 
+ 	return ret;
+ }
+ 
+ /* Page request queue descriptor */
+ struct page_req_dsc {
+ 	union {
+ 		struct {
+ 			u64 type:8;
+ 			u64 pasid_present:1;
+ 			u64 priv_data_present:1;
+ 			u64 rsvd:6;
+ 			u64 rid:16;
+ 			u64 pasid:20;
+ 			u64 exe_req:1;
+ 			u64 pm_req:1;
+ 			u64 rsvd2:10;
+ 		};
+ 		u64 qw_0;
+ 	};
+ 	union {
+ 		struct {
+ 			u64 rd_req:1;
+ 			u64 wr_req:1;
+ 			u64 lpig:1;
+ 			u64 prg_index:9;
+ 			u64 addr:52;
+ 		};
+ 		u64 qw_1;
+ 	};
+ 	u64 priv_data[2];
+ };
+ 
+ #define PRQ_RING_MASK	((0x1000 << PRQ_ORDER) - 0x20)
+ 
+ static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
+ {
+ 	unsigned long requested = 0;
+ 
+ 	if (req->exe_req)
+ 		requested |= VM_EXEC;
+ 
+ 	if (req->rd_req)
+ 		requested |= VM_READ;
+ 
+ 	if (req->wr_req)
+ 		requested |= VM_WRITE;
+ 
+ 	return (requested & ~vma->vm_flags) != 0;
+ }
+ 
+ static bool is_canonical_address(u64 addr)
+ {
+ 	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
+ 	long saddr = (long) addr;
+ 
+ 	return (((saddr << shift) >> shift) == saddr);
+ }
+ 
+ /**
+  * intel_svm_drain_prq - Drain page requests and responses for a pasid
+  * @dev: target device
+  * @pasid: pasid for draining
+  *
+  * Drain all pending page requests and responses related to @pasid in both
+  * software and hardware. This is supposed to be called after the device
+  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
+  * and DevTLB have been invalidated.
+  *
+  * It waits until all pending page requests for @pasid in the page fault
+  * queue are completed by the prq handling thread. Then follow the steps
+  * described in VT-d spec CH7.10 to drain all page requests and page
+  * responses pending in the hardware.
+  */
+ static void intel_svm_drain_prq(struct device *dev, int pasid)
+ {
+ 	struct device_domain_info *info;
+ 	struct dmar_domain *domain;
+ 	struct intel_iommu *iommu;
+ 	struct qi_desc desc[3];
+ 	struct pci_dev *pdev;
+ 	int head, tail;
+ 	u16 sid, did;
+ 	int qdep;
+ 
+ 	info = get_domain_info(dev);
+ 	if (WARN_ON(!info || !dev_is_pci(dev)))
+ 		return;
+ 
+ 	if (!info->pri_enabled)
+ 		return;
+ 
+ 	iommu = info->iommu;
+ 	domain = info->domain;
+ 	pdev = to_pci_dev(dev);
+ 	sid = PCI_DEVID(info->bus, info->devfn);
+ 	did = domain->iommu_did[iommu->seq_id];
+ 	qdep = pci_ats_queue_depth(pdev);
+ 
+ 	/*
+ 	 * Check and wait until all pending page requests in the queue are
+ 	 * handled by the prq handling thread.
+ 	 */
+ prq_retry:
+ 	reinit_completion(&iommu->prq_complete);
+ 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ 	while (head != tail) {
+ 		struct page_req_dsc *req;
+ 
+ 		req = &iommu->prq[head / sizeof(*req)];
+ 		if (!req->pasid_present || req->pasid != pasid) {
+ 			head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ 			continue;
+ 		}
+ 
+ 		wait_for_completion(&iommu->prq_complete);
+ 		goto prq_retry;
+ 	}
+ 
+ 	/*
+ 	 * Perform steps described in VT-d spec CH7.10 to drain page
+ 	 * requests and responses in hardware.
+ 	 */
+ 	memset(desc, 0, sizeof(desc));
+ 	desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
+ 			QI_IWD_FENCE |
+ 			QI_IWD_TYPE;
+ 	desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
+ 			QI_EIOTLB_DID(did) |
+ 			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ 			QI_EIOTLB_TYPE;
+ 	desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
+ 			QI_DEV_EIOTLB_SID(sid) |
+ 			QI_DEV_EIOTLB_QDEP(qdep) |
+ 			QI_DEIOTLB_TYPE |
+ 			QI_DEV_IOTLB_PFSID(info->pfsid);
+ qi_retry:
+ 	reinit_completion(&iommu->prq_complete);
+ 	qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
+ 	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
+ 		wait_for_completion(&iommu->prq_complete);
+ 		goto qi_retry;
+ 	}
+ }
+ 
+ static irqreturn_t prq_event_thread(int irq, void *d)
+ {
+ 	struct intel_iommu *iommu = d;
+ 	struct intel_svm *svm = NULL;
+ 	int head, tail, handled = 0;
+ 
+ 	/* Clear PPR bit before reading head/tail registers, to
+ 	 * ensure that we get a new interrupt if needed. */
+ 	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
+ 
+ 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ 	while (head != tail) {
+ 		struct intel_svm_dev *sdev;
+ 		struct vm_area_struct *vma;
+ 		struct page_req_dsc *req;
+ 		struct qi_desc resp;
+ 		int result;
+ 		vm_fault_t ret;
+ 		u64 address;
+ 
+ 		handled = 1;
+ 
+ 		req = &iommu->prq[head / sizeof(*req)];
+ 
+ 		result = QI_RESP_FAILURE;
+ 		address = (u64)req->addr << VTD_PAGE_SHIFT;
+ 		if (!req->pasid_present) {
+ 			pr_err("%s: Page request without PASID: %08llx %08llx\n",
+ 			       iommu->name, ((unsigned long long *)req)[0],
+ 			       ((unsigned long long *)req)[1]);
+ 			goto no_pasid;
+ 		}
+ 
+ 		if (!svm || svm->pasid != req->pasid) {
+ 			rcu_read_lock();
+ 			svm = ioasid_find(NULL, req->pasid, NULL);
+ 			/* It *can't* go away, because the driver is not permitted
+ 			 * to unbind the mm while any page faults are outstanding.
+ 			 * So we only need RCU to protect the internal idr code. */
+ 			rcu_read_unlock();
+ 			if (IS_ERR_OR_NULL(svm)) {
+ 				pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
+ 				       iommu->name, req->pasid, ((unsigned long long *)req)[0],
+ 				       ((unsigned long long *)req)[1]);
+ 				goto no_pasid;
+ 			}
+ 		}
+ 
+ 		result = QI_RESP_INVALID;
+ 		/* Since we're using init_mm.pgd directly, we should never take
+ 		 * any faults on kernel addresses. */
+ 		if (!svm->mm)
+ 			goto bad_req;
+ 
+ 		/* If address is not canonical, return invalid response */
+ 		if (!is_canonical_address(address))
+ 			goto bad_req;
+ 
+ 		/* If the mm is already defunct, don't handle faults. */
+ 		if (!mmget_not_zero(svm->mm))
+ 			goto bad_req;
+ 
 -		down_read(&svm->mm->mmap_sem);
++		mmap_read_lock(svm->mm);
+ 		vma = find_extend_vma(svm->mm, address);
+ 		if (!vma || address < vma->vm_start)
+ 			goto invalid;
+ 
+ 		if (access_error(vma, req))
+ 			goto invalid;
+ 
+ 		ret = handle_mm_fault(vma, address,
+ 				      req->wr_req ? FAULT_FLAG_WRITE : 0);
+ 		if (ret & VM_FAULT_ERROR)
+ 			goto invalid;
+ 
+ 		result = QI_RESP_SUCCESS;
+ 	invalid:
 -		up_read(&svm->mm->mmap_sem);
++		mmap_read_unlock(svm->mm);
+ 		mmput(svm->mm);
+ 	bad_req:
+ 		/* Accounting for major/minor faults? */
+ 		rcu_read_lock();
+ 		list_for_each_entry_rcu(sdev, &svm->devs, list) {
+ 			if (sdev->sid == req->rid)
+ 				break;
+ 		}
+ 		/* Other devices can go away, but the drivers are not permitted
+ 		 * to unbind while any page faults might be in flight. So it's
+ 		 * OK to drop the 'lock' here now we have it. */
+ 		rcu_read_unlock();
+ 
+ 		if (WARN_ON(&sdev->list == &svm->devs))
+ 			sdev = NULL;
+ 
+ 		if (sdev && sdev->ops && sdev->ops->fault_cb) {
+ 			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
+ 				(req->exe_req << 1) | (req->pm_req);
+ 			sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
+ 					    req->priv_data, rwxp, result);
+ 		}
+ 		/* We get here in the error case where the PASID lookup failed,
+ 		   and these can be NULL. Do not use them below this point! */
+ 		sdev = NULL;
+ 		svm = NULL;
+ 	no_pasid:
+ 		if (req->lpig || req->priv_data_present) {
+ 			/*
+ 			 * Per VT-d spec. v3.0 ch7.7, system software must
+ 			 * respond with page group response if private data
+ 			 * is present (PDP) or last page in group (LPIG) bit
+ 			 * is set. This is an additional VT-d feature beyond
+ 			 * PCI ATS spec.
+ 			 */
+ 			resp.qw0 = QI_PGRP_PASID(req->pasid) |
+ 				QI_PGRP_DID(req->rid) |
+ 				QI_PGRP_PASID_P(req->pasid_present) |
+ 				QI_PGRP_PDP(req->pasid_present) |
+ 				QI_PGRP_RESP_CODE(result) |
+ 				QI_PGRP_RESP_TYPE;
+ 			resp.qw1 = QI_PGRP_IDX(req->prg_index) |
+ 				QI_PGRP_LPIG(req->lpig);
+ 
+ 			if (req->priv_data_present)
+ 				memcpy(&resp.qw2, req->priv_data,
+ 				       sizeof(req->priv_data));
+ 			resp.qw2 = 0;
+ 			resp.qw3 = 0;
+ 			qi_submit_sync(iommu, &resp, 1, 0);
+ 		}
+ 		head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ 	}
+ 
+ 	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
+ 
+ 	/*
+ 	 * Clear the page request overflow bit and wake up all threads that
+ 	 * are waiting for the completion of this handling.
+ 	 */
+ 	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO)
+ 		writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
+ 
+ 	if (!completion_done(&iommu->prq_complete))
+ 		complete(&iommu->prq_complete);
+ 
+ 	return IRQ_RETVAL(handled);
+ }
+ 
+ #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
+ struct iommu_sva *
+ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+ {
+ 	struct iommu_sva *sva = ERR_PTR(-EINVAL);
+ 	struct intel_svm_dev *sdev = NULL;
+ 	int flags = 0;
+ 	int ret;
+ 
+ 	/*
+ 	 * TODO: Consolidate with generic iommu-sva bind after it is merged.
+ 	 * It will require shared SVM data structures, i.e. combine io_mm
+ 	 * and intel_svm etc.
+ 	 */
+ 	if (drvdata)
+ 		flags = *(int *)drvdata;
+ 	mutex_lock(&pasid_mutex);
+ 	ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
+ 	if (ret)
+ 		sva = ERR_PTR(ret);
+ 	else if (sdev)
+ 		sva = &sdev->sva;
+ 	else
+ 		WARN(!sdev, "SVM bind succeeded with no sdev!\n");
+ 
+ 	mutex_unlock(&pasid_mutex);
+ 
+ 	return sva;
+ }
+ 
+ void intel_svm_unbind(struct iommu_sva *sva)
+ {
+ 	struct intel_svm_dev *sdev;
+ 
+ 	mutex_lock(&pasid_mutex);
+ 	sdev = to_intel_svm_dev(sva);
+ 	intel_svm_unbind_mm(sdev->dev, sdev->pasid);
+ 	mutex_unlock(&pasid_mutex);
+ }
+ 
+ int intel_svm_get_pasid(struct iommu_sva *sva)
+ {
+ 	struct intel_svm_dev *sdev;
+ 	int pasid;
+ 
+ 	mutex_lock(&pasid_mutex);
+ 	sdev = to_intel_svm_dev(sva);
+ 	pasid = sdev->pasid;
+ 	mutex_unlock(&pasid_mutex);
+ 
+ 	return pasid;
+ }