From 719dfbf9f9d17bcddfc8b867e56b0dc0ccdc96e4 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 15:18:57 +0800 Subject: [PATCH 01/45] iommu/vt-d: Manage scalalble mode PASID tables In scalable mode, pasid structure is a two level table with a pasid directory table and a pasid table. Any pasid entry can be identified by a pasid value in below way. 1 9 6 5 0 .-----------------------.-------. | PASID | | '-----------------------'-------' .-------------. | | | | | | | | | | | | | .-----------. | .-------------. | | | |----->| PASID Entry | | | | | '-------------' | | | |Plus | | | .-----------. | | | |---->| DIR Entry |-------->| | | '-----------' '-------------' .---------. |Plus | | | Context | | | | | Entry |------->| | '---------' '-----------' This changes the pasid table APIs to support scalable mode PASID directory and PASID table. It also adds a helper to get the PASID table entry according to the pasid value. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 29 ++++++++----- drivers/iommu/intel-pasid.c | 87 ++++++++++++++++++++++++++++++------- drivers/iommu/intel-pasid.h | 12 ++++- drivers/iommu/intel-svm.c | 6 +-- 4 files changed, 100 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index fb63abcf6592..051d1d2bc3cf 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -471,22 +471,25 @@ static LIST_HEAD(device_domain_list); /* * Iterate over elements in device_domain_list and call the specified - * callback @fn against each element. This helper should only be used - * in the context where the device_domain_lock has already been holden. - */ + * callback @fn against each element. +*/ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data) { int ret = 0; + unsigned long flags; struct device_domain_info *info; - assert_spin_locked(&device_domain_lock); + spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &device_domain_list, global) { ret = fn(info, data); - if (ret) - return ret; + if (ret) { + spin_unlock_irqrestore(&device_domain_lock, flags); + return ret; + } } - + spin_unlock_irqrestore(&device_domain_lock, flags); + return 0; } @@ -2560,16 +2563,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, list_add(&info->global, &device_domain_list); if (dev) dev->archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); - if (dev && dev_is_pci(dev) && info->pasid_supported) { + /* PASID table is mandatory for a PCI device in scalable mode. */ + if (dev && dev_is_pci(dev) && sm_supported(iommu)) { ret = intel_pasid_alloc_table(dev); if (ret) { - pr_warn("No pasid table for %s, pasid disabled\n", - dev_name(dev)); - info->pasid_supported = 0; + pr_err("PASID table allocation for %s failed\n", + dev_name(dev)); + dmar_remove_one_dev_info(domain, dev); + return NULL; } } - spin_unlock_irqrestore(&device_domain_lock, flags); if (dev && domain_context_mapping(domain, dev)) { pr_err("Domain context map for %s failed\n", dev_name(dev)); diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 1a82ee1c6e3d..12fcc9fa2282 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -123,12 +123,13 @@ int intel_pasid_alloc_table(struct device *dev) struct pasid_table *pasid_table; struct pasid_table_opaque data; struct page *pages; - size_t size, count; + int max_pasid = 0; int ret, order; + int size; + might_sleep(); info = dev->archdata.iommu; - if (WARN_ON(!info || !dev_is_pci(dev) || - !info->pasid_supported || info->pasid_table)) + if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; /* DMA alias device already has a pasid table, use it: */ @@ -138,17 +139,19 @@ int intel_pasid_alloc_table(struct device *dev) if (ret) goto attach_out; - pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC); + pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); if (!pasid_table) return -ENOMEM; INIT_LIST_HEAD(&pasid_table->dev); - size = sizeof(struct pasid_entry); - count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id); - order = get_order(size * count); + if (info->pasid_supported) + max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)), + intel_pasid_max_id); + + size = max_pasid >> (PASID_PDE_SHIFT - 3); + order = size ? get_order(size) : 0; pages = alloc_pages_node(info->iommu->node, - GFP_ATOMIC | __GFP_ZERO, - order); + GFP_KERNEL | __GFP_ZERO, order); if (!pages) { kfree(pasid_table); return -ENOMEM; @@ -156,7 +159,7 @@ int intel_pasid_alloc_table(struct device *dev) pasid_table->table = page_address(pages); pasid_table->order = order; - pasid_table->max_pasid = count; + pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); attach_out: device_attach_pasid_table(info, pasid_table); @@ -164,14 +167,33 @@ int intel_pasid_alloc_table(struct device *dev) return 0; } +/* Get PRESENT bit of a PASID directory entry. */ +static inline bool +pasid_pde_is_present(struct pasid_dir_entry *pde) +{ + return READ_ONCE(pde->val) & PASID_PTE_PRESENT; +} + +/* Get PASID table from a PASID directory entry. */ +static inline struct pasid_entry * +get_pasid_table_from_pde(struct pasid_dir_entry *pde) +{ + if (!pasid_pde_is_present(pde)) + return NULL; + + return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK); +} + void intel_pasid_free_table(struct device *dev) { struct device_domain_info *info; struct pasid_table *pasid_table; + struct pasid_dir_entry *dir; + struct pasid_entry *table; + int i, max_pde; info = dev->archdata.iommu; - if (!info || !dev_is_pci(dev) || - !info->pasid_supported || !info->pasid_table) + if (!info || !dev_is_pci(dev) || !info->pasid_table) return; pasid_table = info->pasid_table; @@ -180,6 +202,14 @@ void intel_pasid_free_table(struct device *dev) if (!list_empty(&pasid_table->dev)) return; + /* Free scalable mode PASID directory tables: */ + dir = pasid_table->table; + max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; + for (i = 0; i < max_pde; i++) { + table = get_pasid_table_from_pde(&dir[i]); + free_pgtable_page(table); + } + free_pages((unsigned long)pasid_table->table, pasid_table->order); kfree(pasid_table); } @@ -208,17 +238,37 @@ int intel_pasid_get_dev_max_id(struct device *dev) struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) { + struct device_domain_info *info; struct pasid_table *pasid_table; + struct pasid_dir_entry *dir; struct pasid_entry *entries; + int dir_index, index; pasid_table = intel_pasid_get_table(dev); if (WARN_ON(!pasid_table || pasid < 0 || pasid >= intel_pasid_get_dev_max_id(dev))) return NULL; - entries = pasid_table->table; + dir = pasid_table->table; + info = dev->archdata.iommu; + dir_index = pasid >> PASID_PDE_SHIFT; + index = pasid & PASID_PTE_MASK; + + spin_lock(&pasid_lock); + entries = get_pasid_table_from_pde(&dir[dir_index]); + if (!entries) { + entries = alloc_pgtable_page(info->iommu->node); + if (!entries) { + spin_unlock(&pasid_lock); + return NULL; + } + + WRITE_ONCE(dir[dir_index].val, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); + } + spin_unlock(&pasid_lock); - return &entries[pasid]; + return &entries[index]; } /* @@ -226,7 +276,14 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) */ static inline void pasid_clear_entry(struct pasid_entry *pe) { - WRITE_ONCE(pe->val, 0); + WRITE_ONCE(pe->val[0], 0); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); } void intel_pasid_clear_entry(struct device *dev, int pasid) diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 1fb5e12b029a..12f480c2bb8b 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -11,12 +11,20 @@ #define __INTEL_PASID_H #define PASID_MIN 0x1 -#define PASID_MAX 0x20000 +#define PASID_MAX 0x100000 +#define PASID_PTE_MASK 0x3F +#define PASID_PTE_PRESENT 1 +#define PDE_PFN_MASK PAGE_MASK +#define PASID_PDE_SHIFT 6 -struct pasid_entry { +struct pasid_dir_entry { u64 val; }; +struct pasid_entry { + u64 val[8]; +}; + /* The representative of a PASID table */ struct pasid_table { void *table; /* pasid table pointer */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 5d284c00dd27..1e6ca9c67995 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -65,8 +65,6 @@ int intel_svm_init(struct intel_iommu *iommu) order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); if (ecap_dis(iommu->ecap)) { - /* Just making it explicit... */ - BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry)); pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (pages) iommu->pasid_state_table = page_address(pages); @@ -406,9 +404,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ pasid_entry_val |= PASID_ENTRY_FLPM_5LP; entry = intel_pasid_get_entry(dev, svm->pasid); - entry->val = pasid_entry_val; - - wmb(); + WRITE_ONCE(entry->val[0], pasid_entry_val); /* * Flush PASID cache when a PASID table entry becomes -- Gitee From 59928ca592279503cb1c866ec2f60d5be6b8aaec Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 15:47:56 +0800 Subject: [PATCH 02/45] iommu/vt-d: Add 256-bit invalidation descriptor support Intel vt-d spec rev3.0 requires software to use 256-bit descriptors in invalidation queue. As the spec reads in section 6.5.2: Remapping hardware supporting Scalable Mode Translations (ECAP_REG.SMTS=1) allow software to additionally program the width of the descriptors (128-bits or 256-bits) that will be written into the Queue. Software should setup the Invalidation Queue for 256-bit descriptors before progra- mming remapping hardware for scalable-mode translation as 128-bit descriptors are treated as invalid descriptors (see Table 21 in Section 6.5.2.10) in scalable-mode. This patch adds 256-bit invalidation descriptor support if the hardware presents scalable mode capability. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/dmar.c | 92 +++++++++++++++++++---------- drivers/iommu/intel-svm.c | 76 +++++++++++++++--------- drivers/iommu/intel_irq_remapping.c | 6 +- include/linux/intel-iommu.h | 9 ++- 4 files changed, 121 insertions(+), 62 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 187812e35c99..f92ac7477f02 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1173,6 +1173,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) int head, tail; struct q_inval *qi = iommu->qi; int wait_index = (index + 1) % QI_LENGTH; + int shift = qi_shift(iommu); if (qi->desc_status[wait_index] == QI_ABORT) return -EAGAIN; @@ -1185,14 +1186,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) * is cleared. */ if (fault & DMA_FSTS_IQE) { - head = readl(iommu->reg + DMAR_IQH_REG); - if ((head >> DMAR_IQ_SHIFT) == index) { - pr_err("VT-d detected invalid descriptor: " - "low=%llx, high=%llx\n", - (unsigned long long)qi->desc[index].low, - (unsigned long long)qi->desc[index].high); - memcpy(&qi->desc[index], &qi->desc[wait_index], - sizeof(struct qi_desc)); + if ((head >> shift) == index) { + struct qi_desc *desc = qi->desc + head; + + /* + * desc->qw2 and desc->qw3 are either reserved or + * used by software as private data. We won't print + * out these two qw's for security consideration. + */ + pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n", + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); + memcpy(desc, qi->desc + (wait_index << shift), + 1 << shift); writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); return -EINVAL; } @@ -1204,10 +1210,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ if (fault & DMA_FSTS_ITE) { head = readl(iommu->reg + DMAR_IQH_REG); - head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH; head |= 1; tail = readl(iommu->reg + DMAR_IQT_REG); - tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); @@ -1235,15 +1241,14 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) { int rc; struct q_inval *qi = iommu->qi; - struct qi_desc *hw, wait_desc; + int offset, shift, length; + struct qi_desc wait_desc; int wait_index, index; unsigned long flags; if (!qi) return 0; - hw = qi->desc; - restart: rc = 0; @@ -1256,16 +1261,21 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) index = qi->free_head; wait_index = (index + 1) % QI_LENGTH; + shift = qi_shift(iommu); + length = 1 << shift; qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; - hw[index] = *desc; - - wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) | + offset = index << shift; + memcpy(qi->desc + offset, desc, length); + wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; - wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); + wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]); + wait_desc.qw2 = 0; + wait_desc.qw3 = 0; - hw[wait_index] = wait_desc; + offset = wait_index << shift; + memcpy(qi->desc + offset, &wait_desc, length); qi->free_head = (qi->free_head + 2) % QI_LENGTH; qi->free_cnt -= 2; @@ -1274,7 +1284,7 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) * update the HW tail register indicating the presence of * new descriptors. */ - writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG); + writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); while (qi->desc_status[wait_index] != QI_DONE) { /* @@ -1311,8 +1321,10 @@ void qi_global_iec(struct intel_iommu *iommu) { struct qi_desc desc; - desc.low = QI_IEC_TYPE; - desc.high = 0; + desc.qw0 = QI_IEC_TYPE; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; /* should never fail */ qi_submit_sync(&desc, iommu); @@ -1323,9 +1335,11 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, { struct qi_desc desc; - desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) + desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) | QI_CC_GRAN(type) | QI_CC_TYPE; - desc.high = 0; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1344,10 +1358,12 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, if (cap_read_drain(iommu->cap)) dr = 1; - desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; - desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1360,15 +1376,17 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (mask) { WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; - desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else - desc.high = QI_DEV_IOTLB_ADDR(addr); + desc.qw1 = QI_DEV_IOTLB_ADDR(addr); if (qdep >= QI_DEV_IOTLB_MAX_INVS) qdep = 0; - desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1416,16 +1434,24 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) u32 sts; unsigned long flags; struct q_inval *qi = iommu->qi; + u64 val = virt_to_phys(qi->desc); qi->free_head = qi->free_tail = 0; qi->free_cnt = QI_LENGTH; + /* + * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability + * is present. + */ + if (ecap_smts(iommu->ecap)) + val |= (1 << 11) | 1; + raw_spin_lock_irqsave(&iommu->register_lock, flags); /* write zero to the tail reg */ writel(0, iommu->reg + DMAR_IQT_REG); - dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + dmar_writeq(iommu->reg + DMAR_IQA_REG, val); iommu->gcmd |= DMA_GCMD_QIE; writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); @@ -1461,8 +1487,12 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi = iommu->qi; - - desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); + /* + * Need two pages to accommodate 256 descriptors of 256 bits each + * if the remapping hardware supports scalable mode translation. + */ + desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, + !!ecap_smts(iommu->ecap)); if (!desc_page) { kfree(qi); iommu->qi = NULL; diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 1e6ca9c67995..e1c3b5c80ffd 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -161,27 +161,40 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d * because that's the only option the hardware gives us. Despite * the fact that they are actually only accessible through one. */ if (gl) - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE; + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | + QI_EIOTLB_TYPE; else - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; - desc.high = 0; + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc.qw1 = 0; } else { int mask = ilog2(__roundup_pow_of_two(pages)); - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; - desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) | - QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | + QI_EIOTLB_TYPE; + desc.qw1 = QI_EIOTLB_ADDR(address) | + QI_EIOTLB_GL(gl) | + QI_EIOTLB_IH(ih) | + QI_EIOTLB_AM(mask); } + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, svm->iommu); if (sdev->dev_iotlb) { - desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) | - QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE; + desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | + QI_DEV_EIOTLB_SID(sdev->sid) | + QI_DEV_EIOTLB_QDEP(sdev->qdep) | + QI_DEIOTLB_TYPE; if (pages == -1) { - desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE; + desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | + QI_DEV_EIOTLB_SIZE; } else if (pages > 1) { /* The least significant zero bit indicates the size. So, * for example, an "address" value of 0x12345f000 will @@ -189,10 +202,13 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); unsigned long mask = __rounddown_pow_of_two(address ^ last); - desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE; + desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) | + (mask - 1)) | QI_DEV_EIOTLB_SIZE; } else { - desc.high = QI_DEV_EIOTLB_ADDR(address); + desc.qw1 = QI_DEV_EIOTLB_ADDR(address); } + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, svm->iommu); } } @@ -237,8 +253,11 @@ static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *s { struct qi_desc desc; - desc.high = 0; - desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw0 = QI_PC_TYPE | QI_PC_DID(sdev->did) | + QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, svm->iommu); } @@ -669,24 +688,27 @@ static irqreturn_t prq_event_thread(int irq, void *d) no_pasid: if (req->lpig) { /* Page Group Response */ - resp.low = QI_PGRP_PASID(req->pasid) | + resp.qw0 = QI_PGRP_PASID(req->pasid) | QI_PGRP_DID((req->bus << 8) | req->devfn) | QI_PGRP_PASID_P(req->pasid_present) | QI_PGRP_RESP_TYPE; - resp.high = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result); - - qi_submit_sync(&resp, iommu); + resp.qw1 = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_PRIV(req->private) | + QI_PGRP_RESP_CODE(result); } else if (req->srr) { /* Page Stream Response */ - resp.low = QI_PSTRM_IDX(req->prg_index) | - QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) | - QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE; - resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) | + resp.qw0 = QI_PSTRM_IDX(req->prg_index) | + QI_PSTRM_PRIV(req->private) | + QI_PSTRM_BUS(req->bus) | + QI_PSTRM_PASID(req->pasid) | + QI_PSTRM_RESP_TYPE; + resp.qw1 = QI_PSTRM_ADDR(address) | + QI_PSTRM_DEVFN(req->devfn) | QI_PSTRM_RESP_CODE(result); - - qi_submit_sync(&resp, iommu); } + resp.qw2 = 0; + resp.qw3 = 0; + qi_submit_sync(&resp, iommu); head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 967450bd421a..916391f33ca6 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -145,9 +145,11 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) { struct qi_desc desc; - desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) + desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) | QI_IEC_SELECTIVE; - desc.high = 0; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; return qi_submit_sync(&desc, iommu); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b1b4411b4c6b..94e9d9a00cb1 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -340,13 +340,18 @@ enum { #define QI_GRAN_NONG_PASID 2 #define QI_GRAN_PSI_PASID 3 +#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap)) + struct qi_desc { - u64 low, high; + u64 qw0; + u64 qw1; + u64 qw2; + u64 qw3; }; struct q_inval { raw_spinlock_t q_lock; - struct qi_desc *desc; /* invalidation queue */ + void *desc; /* invalidation queue */ int *desc_status; /* desc status */ int free_head; /* first free entry */ int free_tail; /* last free entry */ -- Gitee From 92b6a10e6d0c3418c12a0f92bd21dc54b2e3283d Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 15:52:29 +0800 Subject: [PATCH 03/45] iommu/vt-d: Move page table helpers into header So that they could also be used in other source files. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Cc: Sanjay Kumar Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 43 ------------------------------------- include/linux/intel-iommu.h | 43 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 051d1d2bc3cf..a0cefcb19464 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -315,49 +315,6 @@ static inline void context_clear_entry(struct context_entry *context) context->hi = 0; } -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-10: available - * 11: snoop behavior - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; - -static inline void dma_clear_pte(struct dma_pte *pte) -{ - pte->val = 0; -} - -static inline u64 dma_pte_addr(struct dma_pte *pte) -{ -#ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK; -#else - /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; -#endif -} - -static inline bool dma_pte_present(struct dma_pte *pte) -{ - return (pte->val & 3) != 0; -} - -static inline bool dma_pte_superpage(struct dma_pte *pte) -{ - return (pte->val & DMA_PTE_LARGE_PAGE); -} - -static inline int first_pte_in_page(struct dma_pte *pte) -{ - return !((unsigned long)pte & ~VTD_PAGE_MASK); -} - /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 94e9d9a00cb1..a12892460862 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -507,6 +507,49 @@ static inline void __iommu_flush_cache( clflush_cache_range(addr, size); } +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-10: available + * 11: snoop behavior + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; + +static inline void dma_clear_pte(struct dma_pte *pte) +{ + pte->val = 0; +} + +static inline u64 dma_pte_addr(struct dma_pte *pte) +{ +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK; +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; +#endif +} + +static inline bool dma_pte_present(struct dma_pte *pte) +{ + return (pte->val & 3) != 0; +} + +static inline bool dma_pte_superpage(struct dma_pte *pte) +{ + return (pte->val & DMA_PTE_LARGE_PAGE); +} + +static inline int first_pte_in_page(struct dma_pte *pte) +{ + return !((unsigned long)pte & ~VTD_PAGE_MASK); +} + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); -- Gitee From 4d6b4a4a21f1e94f69c459a3b8adc987b2a38386 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 16:00:32 +0800 Subject: [PATCH 04/45] iommu/vt-d: Add second level page table interface This adds the interfaces to setup or tear down the structures for second level page table translations. This includes types of second level only translation and pass through. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/intel-pasid.c | 279 ++++++++++++++++++++++++++++++++++++ drivers/iommu/intel-pasid.h | 9 ++ include/linux/intel-iommu.h | 3 + 4 files changed, 292 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a0cefcb19464..db5a9d7f94ad 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1285,7 +1285,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -static void iommu_flush_write_buffer(struct intel_iommu *iommu) +void iommu_flush_write_buffer(struct intel_iommu *iommu) { u32 val; unsigned long flag; diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 12fcc9fa2282..3ce006bc7ddf 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -296,3 +296,282 @@ void intel_pasid_clear_entry(struct device *dev, int pasid) pasid_clear_entry(pe); } + +static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) +{ + u64 old; + + old = READ_ONCE(*ptr); + WRITE_ONCE(*ptr, (old & ~mask) | bits); +} + +/* + * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode + * PASID entry. + */ +static inline void +pasid_set_domain_id(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); +} + +/* + * Get domain ID value of a scalable mode PASID entry. + */ +static inline u16 +pasid_get_domain_id(struct pasid_entry *pe) +{ + return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); +} + +/* + * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_slptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); +} + +/* + * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID + * entry. + */ +static inline void +pasid_set_address_width(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); +} + +/* + * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_translation_type(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); +} + +/* + * Enable fault processing by clearing the FPD(Fault Processing + * Disable) field (Bit 1) of a scalable mode PASID entry. + */ +static inline void pasid_set_fault_enable(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 1, 0); +} + +/* + * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_sre(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 0, 1); +} + +/* + * Setup the P(Present) field (Bit 0) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_present(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 0, 1); +} + +/* + * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) +{ + pasid_set_bits(&pe->val[1], 1 << 23, value); +} + +static void +pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, + u16 did, int pasid) +{ + struct qi_desc desc; + + desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; + + qi_submit_sync(&desc, iommu); +} + +static void +iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) +{ + struct qi_desc desc; + + desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; + + qi_submit_sync(&desc, iommu); +} + +static void +devtlb_invalidation_with_pasid(struct intel_iommu *iommu, + struct device *dev, int pasid) +{ + struct device_domain_info *info; + u16 sid, qdep, pfsid; + + info = dev->archdata.iommu; + if (!info || !info->ats_enabled) + return; + + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + pfsid = info->pfsid; + + qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); +} + +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, + struct device *dev, int pasid) +{ + struct pasid_entry *pte; + u16 did; + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return; + + intel_pasid_clear_entry(dev, pasid); + did = pasid_get_domain_id(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + + /* Device IOTLB doesn't need to be flushed in caching mode. */ + if (!cap_caching_mode(iommu->cap)) + devtlb_invalidation_with_pasid(iommu, dev, pasid); +} + +/* + * Set up the scalable mode pasid entry for second only translation type. + */ +int intel_pasid_setup_second_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid) +{ + struct pasid_entry *pte; + struct dma_pte *pgd; + u64 pgd_val; + int agaw; + u16 did; + + /* + * If hardware advertises no support for second level + * translation, return directly. + */ + if (!ecap_slts(iommu->ecap)) { + pr_err("No second level translation support on %s\n", + iommu->name); + return -EINVAL; + } + + /* + * Skip top levels of page tables for iommu which has less agaw + * than default. Unnecessary for PT mode. + */ + pgd = domain->pgd; + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + dev_err(dev, "Invalid domain page table\n"); + return -EINVAL; + } + } + + pgd_val = virt_to_phys(pgd); + did = domain->iommu_did[iommu->seq_id]; + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + pasid_clear_entry(pte); + pasid_set_domain_id(pte, did); + pasid_set_slptr(pte, pgd_val); + pasid_set_address_width(pte, agaw); + pasid_set_translation_type(pte, 2); + pasid_set_fault_enable(pte); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* + * Since it is a second level only translation setup, we should + * set SRE bit as well (addresses are expected to be GPAs). + */ + pasid_set_sre(pte); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} + +/* + * Set up the scalable mode pasid entry for passthrough translation type. + */ +int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid) +{ + u16 did = FLPT_DEFAULT_DID; + struct pasid_entry *pte; + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + pasid_clear_entry(pte); + pasid_set_domain_id(pte, did); + pasid_set_address_width(pte, iommu->agaw); + pasid_set_translation_type(pte, 4); + pasid_set_fault_enable(pte); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* + * We should set SRE bit as well since the addresses are expected + * to be GPAs. + */ + pasid_set_sre(pte); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 12f480c2bb8b..8f2bd8924d01 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -44,4 +44,13 @@ int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); void intel_pasid_clear_entry(struct device *dev, int pasid); +int intel_pasid_setup_second_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid); +int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid); +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, + struct device *dev, int pasid); + #endif /* __INTEL_PASID_H */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a12892460862..e4115ee22185 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -84,6 +84,8 @@ /* * Decoding Capability Register */ +#define ecap_smpwc(e) (((e) >> 48) & 0x1) +#define ecap_slts(e) (((e) >> 46) & 0x1) #define cap_5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) @@ -574,6 +576,7 @@ void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); +void iommu_flush_write_buffer(struct intel_iommu *iommu); #ifdef CONFIG_INTEL_IOMMU_SVM int intel_svm_init(struct intel_iommu *iommu); -- Gitee From e90e6be03556e0c7035611c6ffd798e0e4053ca2 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 16:07:29 +0800 Subject: [PATCH 05/45] iommu/vt-d: Add first level page table interface This adds an interface to setup the PASID entries for first level page table translation. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-pasid.c | 80 +++++++++++++++++++++++++++++++++++++ drivers/iommu/intel-pasid.h | 12 +++++- include/linux/intel-iommu.h | 1 + 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 3ce006bc7ddf..311122eecfa4 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "DMAR: " fmt +#include #include #include #include @@ -390,6 +391,26 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) pasid_set_bits(&pe->val[1], 1 << 23, value); } +/* + * Setup the First Level Page table Pointer field (Bit 140~191) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_flptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); +} + +/* + * Setup the First Level Paging Mode field (Bit 130~131) of a + * scalable mode PASID entry. + */ +static inline void +pasid_set_flpm(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); +} + static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, int pasid) @@ -460,6 +481,65 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, devtlb_invalidation_with_pasid(iommu, dev, pasid); } +/* + * Set up the scalable mode pasid table entry for first only + * translation type. + */ +int intel_pasid_setup_first_level(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, + int pasid, u16 did, int flags) +{ + struct pasid_entry *pte; + + if (!ecap_flts(iommu->ecap)) { + pr_err("No first level translation support on %s\n", + iommu->name); + return -EINVAL; + } + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return -EINVAL; + + pasid_clear_entry(pte); + + /* Setup the first level page table pointer: */ + pasid_set_flptr(pte, (u64)__pa(pgd)); + if (flags & PASID_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + pr_err("No supervisor request support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_sre(pte); + } + +#ifdef CONFIG_X86 + if (cpu_feature_enabled(X86_FEATURE_LA57)) + pasid_set_flpm(pte, 1); +#endif /* CONFIG_X86 */ + + pasid_set_domain_id(pte, did); + pasid_set_address_width(pte, iommu->agaw); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* Setup Present and PASID Granular Transfer Type: */ + pasid_set_translation_type(pte, 1); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} + /* * Set up the scalable mode pasid entry for second only translation type. */ diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 8f2bd8924d01..01dadac6af87 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -17,6 +17,14 @@ #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 +/* + * The SUPERVISOR_MODE flag indicates a first level translation which + * can be used for access to kernel addresses. It is valid only for + * access to the kernel's static 1:1 mapping of physical memory — not + * to vmalloc or even module mappings. + */ +#define PASID_FLAG_SUPERVISOR_MODE BIT(0) + struct pasid_dir_entry { u64 val; }; @@ -43,7 +51,9 @@ struct pasid_table *intel_pasid_get_table(struct device *dev); int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); void intel_pasid_clear_entry(struct device *dev, int pasid); - +int intel_pasid_setup_first_level(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, + int pasid, u16 did, int flags); int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, int pasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e4115ee22185..080e3942f583 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -85,6 +85,7 @@ * Decoding Capability Register */ #define ecap_smpwc(e) (((e) >> 48) & 0x1) +#define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) #define cap_5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) -- Gitee From 8f1f1fca7d6b3802d358622929ab49b64a219ea2 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 16:18:49 +0800 Subject: [PATCH 06/45] iommu/vt-d: Enumerate the scalable mode capability The Intel vt-d spec rev3.0 introduces a new translation mode called scalable mode, which enables PASID-granular translations for first level, second level, nested and pass-through modes. At the same time, the previous Extended Context (ECS) mode is deprecated (no production ever implements ECS). This patch adds enumeration for Scalable Mode and removes the deprecated ECS enumeration. It provides a boot time option to disable scalable mode even hardware claims to support it. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- .../admin-guide/kernel-parameters.txt | 12 ++-- drivers/iommu/intel-iommu.c | 64 +++++-------------- include/linux/intel-iommu.h | 1 + 3 files changed, 24 insertions(+), 53 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fea9487a263c..9bdae309ff8e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1726,12 +1726,12 @@ By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. - ecs_off [Default Off] - By default, extended context tables will be supported if - the hardware advertises that it has support both for the - extended tables themselves, and also PASID support. With - this option set, extended tables will not be used even - on hardware which claims to support them. + sm_off [Default Off] + By default, scalable mode will be supported if the + hardware advertises that it has support for the scalable + mode translation. With this option set, scalable mode + will not be used even on hardware which claims to support + it. tboot_noforce [Default Off] Do not force the Intel IOMMU enabled under tboot. By default, tboot will force Intel IOMMU on, which diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index db5a9d7f94ad..9dccf08fd24f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -386,38 +386,16 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_ecs = 1; -static int intel_iommu_pasid28; +static int intel_iommu_sm = 1; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -/* Broadwell and Skylake have broken ECS support — normal so-called "second - * level" translation of DMA requests-without-PASID doesn't actually happen - * unless you also set the NESTE bit in an extended context-entry. Which of - * course means that SVM doesn't work because it's trying to do nested - * translation of the physical addresses it finds in the process page tables, - * through the IOVA->phys mapping found in the "second level" page tables. - * - * The VT-d specification was retroactively changed to change the definition - * of the capability bits and pretend that Broadwell/Skylake never happened... - * but unfortunately the wrong bit was changed. It's ECS which is broken, but - * for some reason it was the PASID capability bit which was redefined (from - * bit 28 on BDW/SKL to bit 40 in future). - * - * So our test for ECS needs to eschew those implementations which set the old - * PASID capabiity bit 28, since those are the ones on which ECS is broken. - * Unless we are working around the 'pasid28' limitations, that is, by putting - * the device into passthrough mode for normal DMA and thus masking the bug. - */ -#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) -/* PASID support is thus enabled if ECS is enabled and *either* of the old - * or new capability bits are set. */ -#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ - (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) +#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) +#define pasid_supported(iommu) (sm_supported(iommu) && \ + ecap_pasid((iommu)->ecap)) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -500,15 +478,9 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "sp_off", 6)) { pr_info("Disable supported super page\n"); intel_iommu_superpage = 0; - } else if (!strncmp(str, "ecs_off", 7)) { - printk(KERN_INFO - "Intel-IOMMU: disable extended context table support\n"); - intel_iommu_ecs = 0; - } else if (!strncmp(str, "pasid28", 7)) { - printk(KERN_INFO - "Intel-IOMMU: enable pre-production PASID support\n"); - intel_iommu_pasid28 = 1; - iommu_identity_mapping |= IDENTMAP_GFX; + } else if (!strncmp(str, "sm_off", 6)) { + pr_info("Intel-IOMMU: disable scalable mode support\n"); + intel_iommu_sm = 0; } else if (!strncmp(str, "tboot_noforce", 13)) { printk(KERN_INFO "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); @@ -755,7 +727,7 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu u64 *entry; entry = &root->lo; - if (ecs_enabled(iommu)) { + if (sm_supported(iommu)) { if (devfn >= 0x80) { devfn -= 0x80; entry = &root->hi; @@ -918,7 +890,7 @@ static void free_context_table(struct intel_iommu *iommu) if (context) free_pgtable_page(context); - if (!ecs_enabled(iommu)) + if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); @@ -1270,8 +1242,6 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) unsigned long flag; addr = virt_to_phys(iommu->root_entry); - if (ecs_enabled(iommu)) - addr |= DMA_RTADDR_RTT; raw_spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); @@ -1763,7 +1733,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) { + if (pasid_supported(iommu)) { if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); intel_svm_exit(iommu); @@ -2473,8 +2443,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, dmar_find_matched_atsr_unit(pdev)) info->ats_supported = 1; - if (ecs_enabled(iommu)) { - if (pasid_enabled(iommu)) { + if (sm_supported(iommu)) { + if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); if (features >= 0) info->pasid_supported = features | 1; @@ -3312,7 +3282,7 @@ static int __init init_dmars(void) * We need to ensure the system pasid table is no bigger * than the smallest supported. */ - if (pasid_enabled(iommu)) { + if (pasid_supported(iommu)) { u32 temp = 2 << ecap_pss(iommu->ecap); intel_pasid_max_id = min_t(u32, temp, @@ -3373,7 +3343,7 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_supported(iommu)) intel_svm_init(iommu); #endif } @@ -3480,7 +3450,7 @@ static int __init init_dmars(void) iommu_flush_write_buffer(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { /* * Call dmar_alloc_hwirq() with dmar_global_lock held, * could cause possible lock race condition. @@ -4368,7 +4338,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) goto out; #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_supported(iommu)) intel_svm_init(iommu); #endif @@ -4385,7 +4355,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) iommu_flush_write_buffer(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { ret = intel_svm_enable_prq(iommu); if (ret) goto disable_iommu; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 080e3942f583..4ae690fcd289 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -84,6 +84,7 @@ /* * Decoding Capability Register */ +#define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) -- Gitee From 0167f660aee785ce28bc71e0d77d33f0d9479d1a Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 16:31:34 +0800 Subject: [PATCH 07/45] iommu/vt-d: Shared virtual address in scalable mode This patch enables the current SVA (Shared Virtual Address) implementation to work in the scalable mode. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 39 ------------------------- drivers/iommu/intel-pasid.c | 2 +- drivers/iommu/intel-pasid.h | 1 - drivers/iommu/intel-svm.c | 57 +++++++++++-------------------------- include/linux/intel-iommu.h | 2 ++ 5 files changed, 20 insertions(+), 81 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9dccf08fd24f..e0aff7ff7e63 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5258,19 +5258,6 @@ static void intel_iommu_put_resv_regions(struct device *dev, } #ifdef CONFIG_INTEL_IOMMU_SVM -#define MAX_NR_PASID_BITS (20) -static inline unsigned long intel_iommu_get_pts(struct device *dev) -{ - int pts, max_pasid; - - max_pasid = intel_pasid_get_dev_max_id(dev); - pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS); - if (pts < 5) - return 0; - - return pts - 5; -} - int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) { struct device_domain_info *info; @@ -5302,33 +5289,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd sdev->sid = PCI_DEVID(info->bus, info->devfn); if (!(ctx_lo & CONTEXT_PASIDE)) { - if (iommu->pasid_state_table) - context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); - context[1].lo = (u64)virt_to_phys(info->pasid_table->table) | - intel_iommu_get_pts(sdev->dev); - - wmb(); - /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both - * extended to permit requests-with-PASID if the PASIDE bit - * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH, - * however, the PASIDE bit is ignored and requests-with-PASID - * are unconditionally blocked. Which makes less sense. - * So convert from CONTEXT_TT_PASS_THROUGH to one of the new - * "guest mode" translation types depending on whether ATS - * is available or not. Annoyingly, we can't use the new - * modes *unless* PASIDE is set. */ - if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) { - ctx_lo &= ~CONTEXT_TT_MASK; - if (info->ats_supported) - ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2; - else - ctx_lo |= CONTEXT_TT_PT_PASID << 2; - } ctx_lo |= CONTEXT_PASIDE; - if (iommu->pasid_state_table) - ctx_lo |= CONTEXT_DINVE; - if (info->pri_supported) - ctx_lo |= CONTEXT_PRS; context[0].lo = ctx_lo; wmb(); iommu->flush.flush_context(iommu, sdev->did, sdev->sid, diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 311122eecfa4..a53a308efa20 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -287,7 +287,7 @@ static inline void pasid_clear_entry(struct pasid_entry *pe) WRITE_ONCE(pe->val[7], 0); } -void intel_pasid_clear_entry(struct device *dev, int pasid) +static void intel_pasid_clear_entry(struct device *dev, int pasid) { struct pasid_entry *pe; diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 01dadac6af87..8ed62f10d979 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -50,7 +50,6 @@ void intel_pasid_free_table(struct device *dev); struct pasid_table *intel_pasid_get_table(struct device *dev); int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); -void intel_pasid_clear_entry(struct device *dev, int pasid); int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, pgd_t *pgd, int pasid, u16 did, int flags); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index e1c3b5c80ffd..917b5da5f0df 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -29,10 +29,6 @@ #include "intel-pasid.h" -#define PASID_ENTRY_P BIT_ULL(0) -#define PASID_ENTRY_FLPM_5LP BIT_ULL(9) -#define PASID_ENTRY_SRE BIT_ULL(11) - static irqreturn_t prq_event_thread(int irq, void *d); struct pasid_state_entry { @@ -248,20 +244,6 @@ static void intel_invalidate_range(struct mmu_notifier *mn, (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0); } - -static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid) -{ - struct qi_desc desc; - - desc.qw0 = QI_PC_TYPE | QI_PC_DID(sdev->did) | - QI_PC_PASID_SEL | QI_PC_PASID(pasid); - desc.qw1 = 0; - desc.qw2 = 0; - desc.qw3 = 0; - - qi_submit_sync(&desc, svm->iommu); -} - static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); @@ -281,8 +263,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - intel_pasid_clear_entry(sdev->dev, svm->pasid); - intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); } rcu_read_unlock(); @@ -302,11 +283,9 @@ static LIST_HEAD(global_svm_list); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); - struct pasid_entry *entry; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; struct mm_struct *mm = NULL; - u64 pasid_entry_val; int pasid_max; int ret; @@ -415,23 +394,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ kfree(sdev); goto out; } - pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P; - } else - pasid_entry_val = (u64)__pa(init_mm.pgd) | - PASID_ENTRY_P | PASID_ENTRY_SRE; - if (cpu_feature_enabled(X86_FEATURE_LA57)) - pasid_entry_val |= PASID_ENTRY_FLPM_5LP; - - entry = intel_pasid_get_entry(dev, svm->pasid); - WRITE_ONCE(entry->val[0], pasid_entry_val); - - /* - * Flush PASID cache when a PASID table entry becomes - * present. - */ - if (cap_caching_mode(iommu->cap)) - intel_flush_pasid_dev(svm, sdev, svm->pasid); + } + spin_lock(&iommu->lock); + ret = intel_pasid_setup_first_level(iommu, dev, + mm ? mm->pgd : init_mm.pgd, + svm->pasid, FLPT_DEFAULT_DID, + mm ? 0 : PASID_FLAG_SUPERVISOR_MODE); + spin_unlock(&iommu->lock); + if (ret) { + if (mm) + mmu_notifier_unregister(&svm->notifier, mm); + intel_pasid_free_id(svm->pasid); + kfree(svm); + kfree(sdev); + goto out; + } list_add_tail(&svm->list, &global_svm_list); } list_add_rcu(&sdev->list, &svm->devs); @@ -476,10 +454,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * to use. We have a *shared* PASID table, because it's * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ - intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_pasid_tear_down_entry(iommu, dev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); kfree_rcu(sdev, rcu); - intel_pasid_clear_entry(dev, svm->pasid); if (list_empty(&svm->devs)) { intel_pasid_free_id(svm->pasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4ae690fcd289..a5a5e00f8339 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -36,6 +36,8 @@ #include #include +#define CONTEXT_PASIDE BIT_ULL(3) + /* * Intel IOMMU register specification per version 1.0 public spec. */ -- Gitee From 19565b43a380c07d2237cd8061af14fddd48ceb6 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 16:35:01 +0800 Subject: [PATCH 08/45] iommu/vt-d: Remove unused dmar_remove_one_dev_info() argument domain_remove_dev_info() takes a struct dmar_domain * argument, but doesn't use it. Remove it. No functional change intended. The last use of this argument was removed by 127c761598f7 ("iommu/vt-d: Pass device_domain_info to __dmar_remove_one_dev_info"). Signed-off-by: Bjorn Helgaas Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e0aff7ff7e63..33dcd8d6046c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -365,8 +365,7 @@ static int g_num_of_iommus; static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); -static void dmar_remove_one_dev_info(struct dmar_domain *domain, - struct device *dev); +static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); static void domain_context_clear(struct intel_iommu *iommu, struct device *dev); @@ -2498,14 +2497,14 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (ret) { pr_err("PASID table allocation for %s failed\n", dev_name(dev)); - dmar_remove_one_dev_info(domain, dev); + dmar_remove_one_dev_info(dev); return NULL; } } if (dev && domain_context_mapping(domain, dev)) { pr_err("Domain context map for %s failed\n", dev_name(dev)); - dmar_remove_one_dev_info(domain, dev); + dmar_remove_one_dev_info(dev); return NULL; } @@ -3587,7 +3586,7 @@ static int iommu_no_mapping(struct device *dev) * 32 bit DMA is removed from si_domain and fall back * to non-identity mapping. */ - dmar_remove_one_dev_info(si_domain, dev); + dmar_remove_one_dev_info(dev); pr_info("32bit %s uses non-identity mapping\n", dev_name(dev)); return 0; @@ -4552,7 +4551,7 @@ static int device_notifier(struct notifier_block *nb, if (!domain) return 0; - dmar_remove_one_dev_info(domain, dev); + dmar_remove_one_dev_info(dev); if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices)) domain_exit(domain); @@ -4928,8 +4927,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) free_devinfo_mem(info); } -static void dmar_remove_one_dev_info(struct dmar_domain *domain, - struct device *dev) +static void dmar_remove_one_dev_info(struct device *dev) { struct device_domain_info *info; unsigned long flags; @@ -5018,7 +5016,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, old_domain = find_domain(dev); if (old_domain) { rcu_read_lock(); - dmar_remove_one_dev_info(old_domain, dev); + dmar_remove_one_dev_info(dev); rcu_read_unlock(); if (!domain_type_is_vm_or_si(old_domain) && @@ -5065,7 +5063,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, static void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - dmar_remove_one_dev_info(to_dmar_domain(domain), dev); + dmar_remove_one_dev_info(dev); } static int intel_iommu_map(struct iommu_domain *domain, -- Gitee From 4b949305b1cd8e65b964f8ab54dd462a762afcce Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 17:05:11 +0800 Subject: [PATCH 09/45] iommu: Add APIs for multiple domains per device Sharing a physical PCI device in a finer-granularity way is becoming a consensus in the industry. IOMMU vendors are also engaging efforts to support such sharing as well as possible. Among the efforts, the capability of support finer-granularity DMA isolation is a common requirement due to the security consideration. With finer-granularity DMA isolation, subsets of a PCI function can be isolated from each others by the IOMMU. As a result, there is a request in software to attach multiple domains to a physical PCI device. One example of such use model is the Intel Scalable IOV [1] [2]. The Intel vt-d 3.0 spec [3] introduces the scalable mode which enables PASID granularity DMA isolation. This adds the APIs to support multiple domains per device. In order to ease the discussions, we call it 'a domain in auxiliary mode' or simply 'auxiliary domain' when multiple domains are attached to a physical device. The APIs include: * iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX) - Detect both IOMMU and PCI endpoint devices supporting the feature (aux-domain here) without the host driver dependency. * iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) - Check the enabling status of the feature (aux-domain here). The aux-domain interfaces are available only if this returns true. * iommu_dev_enable/disable_feature(dev, IOMMU_DEV_FEAT_AUX) - Enable/disable device specific aux-domain feature. * iommu_aux_attach_device(domain, dev) - Attaches @domain to @dev in the auxiliary mode. Multiple domains could be attached to a single device in the auxiliary mode with each domain representing an isolated address space for an assignable subset of the device. * iommu_aux_detach_device(domain, dev) - Detach @domain which has been attached to @dev in the auxiliary mode. * iommu_aux_get_pasid(domain, dev) - Return ID used for finer-granularity DMA translation. For the Intel Scalable IOV usage model, this will be a PASID. The device which supports Scalable IOV needs to write this ID to the device register so that DMA requests could be tagged with a right PASID prefix. This has been updated with the latest proposal from Joerg posted here [5]. Many people involved in discussions of this design. Kevin Tian Liu Yi L Ashok Raj Sanjay Kumar Jacob Pan Alex Williamson Jean-Philippe Brucker Joerg Roedel and some discussions can be found here [4] [5]. [1] https://software.intel.com/en-us/download/intel-scalable-io-virtualization-technical-specification [2] https://schd.ws/hosted_files/lc32018/00/LC3-SIOV-final.pdf [3] https://software.intel.com/en-us/download/intel-virtualization-technology-for-directed-io-architecture-specification [4] https://lkml.org/lkml/2018/7/26/4 [5] https://www.spinics.net/lists/iommu/msg31874.html Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Suggested-by: Kevin Tian Suggested-by: Jean-Philippe Brucker Suggested-by: Joerg Roedel Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/iommu.c | 96 +++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 65 +++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 84a59762b037..1ac16c542c1a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2362,6 +2362,102 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) } EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); +/* + * Per device IOMMU features. + */ +bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_has_feat) + return ops->dev_has_feat(dev, feat); + + return false; +} +EXPORT_SYMBOL_GPL(iommu_dev_has_feature); + +int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_enable_feat) + return ops->dev_enable_feat(dev, feat); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); + +/* + * The device drivers should do the necessary cleanups before calling this. + * For example, before disabling the aux-domain feature, the device driver + * should detach all aux-domains. Otherwise, this will return -EBUSY. + */ +int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_disable_feat) + return ops->dev_disable_feat(dev, feat); + + return -EBUSY; +} +EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); + +bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_feat_enabled) + return ops->dev_feat_enabled(dev, feat); + + return false; +} +EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled); + +/* + * Aux-domain specific attach/detach. + * + * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns + * true. Also, as long as domains are attached to a device through this + * interface, any tries to call iommu_attach_device() should fail + * (iommu_detach_device() can't fail, so we fail when trying to re-attach). + * This should make us safe against a device being attached to a guest as a + * whole while there are still pasid users on it (aux and sva). + */ +int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) +{ + int ret = -ENODEV; + + if (domain->ops->aux_attach_dev) + ret = domain->ops->aux_attach_dev(domain, dev); + + if (!ret) + trace_attach_device_to_domain(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_aux_attach_device); + +void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) +{ + if (domain->ops->aux_detach_dev) { + domain->ops->aux_detach_dev(domain, dev); + trace_detach_device_from_domain(dev); + } +} +EXPORT_SYMBOL_GPL(iommu_aux_detach_device); + +int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + int ret = -ENODEV; + + if (domain->ops->aux_get_pasid) + ret = domain->ops->aux_get_pasid(domain, dev); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); + /** * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d44f3a6762be..c2456eeba168 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -184,6 +184,11 @@ struct iommu_resv_region { enum iommu_resv_type type; }; +/* Per device IOMMU features */ +enum iommu_dev_features { + IOMMU_DEV_FEAT_AUX, /* Aux-domain feature */ +}; + /** * enum page_response_code - Return status of fault handlers, telling the IOMMU * driver how to proceed with the fault. @@ -361,6 +366,17 @@ struct iommu_ops { struct device *dev, struct tlb_invalidate_info *inv_info); int (*page_response)(struct device *dev, struct page_response_msg *msg); + /* Per device IOMMU features */ + bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); + bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); + int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); + int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); + + /* Aux-domain specific attach/detach entries */ + int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); + unsigned long pgsize_bitmap; #ifdef CONFIG_SMMU_BYPASS_DEV @@ -675,6 +691,14 @@ extern int iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, int *pasid, unsigned long flags, void *drvdata); extern int iommu_sva_unbind_device(struct device *dev, int pasid); +bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features f); +int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); +int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); +bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); +int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); +void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); +int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -984,6 +1008,47 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) return NULL; } +static inline bool +iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) +{ + return false; +} + +static inline bool +iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) +{ + return false; +} + +static inline int +iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) +{ + return -ENODEV; +} + +static inline int +iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) +{ + return -ENODEV; +} + +static inline int +iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) +{ + return -ENODEV; +} + +static inline void +iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) +{ +} + +static inline int +iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + return -ENODEV; +} + static inline struct iommu_domain *iommu_group_share_domain(struct iommu_group *group) { -- Gitee From 0f41b893f72f511d816416c8fdc1c36a422271bc Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 17:13:02 +0800 Subject: [PATCH 10/45] iommu/vt-d: Make intel_iommu_enable_pasid() more generic This moves intel_iommu_enable_pasid() out of the scope of CONFIG_INTEL_IOMMU_SVM with more and more features requiring pasid function. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 21 +++++++-------------- drivers/iommu/intel-svm.c | 19 ++++++++++++++++++- include/linux/intel-iommu.h | 2 +- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 33dcd8d6046c..7c5a3e015e35 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5255,8 +5255,7 @@ static void intel_iommu_put_resv_regions(struct device *dev, kfree(entry); } -#ifdef CONFIG_INTEL_IOMMU_SVM -int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) +int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) { struct device_domain_info *info; struct context_entry *context; @@ -5265,7 +5264,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd u64 ctx_lo; int ret; - domain = get_valid_domain_for_dev(sdev->dev); + domain = get_valid_domain_for_dev(dev); if (!domain) return -EINVAL; @@ -5273,7 +5272,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd spin_lock(&iommu->lock); ret = -EINVAL; - info = sdev->dev->archdata.iommu; + info = dev->archdata.iommu; if (!info || !info->pasid_supported) goto out; @@ -5283,14 +5282,13 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd ctx_lo = context[0].lo; - sdev->did = domain->iommu_did[iommu->seq_id]; - sdev->sid = PCI_DEVID(info->bus, info->devfn); - if (!(ctx_lo & CONTEXT_PASIDE)) { ctx_lo |= CONTEXT_PASIDE; context[0].lo = ctx_lo; wmb(); - iommu->flush.flush_context(iommu, sdev->did, sdev->sid, + iommu->flush.flush_context(iommu, + domain->iommu_did[iommu->seq_id], + PCI_DEVID(info->bus, info->devfn), DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); } @@ -5299,12 +5297,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd if (!info->pasid_enabled) iommu_enable_dev_iotlb(info); - if (info->ats_enabled) { - sdev->dev_iotlb = 1; - sdev->qdep = info->ats_qdep; - if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) - sdev->qdep = 0; - } ret = 0; out: @@ -5314,6 +5306,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd return ret; } +#ifdef CONFIG_INTEL_IOMMU_SVM struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) { struct intel_iommu *iommu; diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 917b5da5f0df..aff6538a2eb8 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -283,6 +283,7 @@ static LIST_HEAD(global_svm_list); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; struct mm_struct *mm = NULL; @@ -346,13 +347,29 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } sdev->dev = dev; - ret = intel_iommu_enable_pasid(iommu, sdev); + ret = intel_iommu_enable_pasid(iommu, dev); if (ret || !pasid) { /* If they don't actually want to assign a PASID, this is * just an enabling check/preparation. */ kfree(sdev); goto out; } + + info = dev->archdata.iommu; + if (!info || !info->pasid_supported) { + kfree(sdev); + goto out; + } + + sdev->did = FLPT_DEFAULT_DID; + sdev->sid = PCI_DEVID(info->bus, info->devfn); + if (info->ats_enabled) { + sdev->dev_iotlb = 1; + sdev->qdep = info->ats_qdep; + if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) + sdev->qdep = 0; + } + /* Finish the setup now we know we're keeping it */ sdev->users = 1; sdev->ops = ops; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a5a5e00f8339..21f21b3cc00f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -581,6 +581,7 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); void iommu_flush_write_buffer(struct intel_iommu *iommu); +int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); #ifdef CONFIG_INTEL_IOMMU_SVM int intel_svm_init(struct intel_iommu *iommu); @@ -611,7 +612,6 @@ struct intel_svm { struct list_head list; }; -extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev); extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev); #endif -- Gitee From d5b62e73bad3deb50a6e10109877cfa7ed2a147c Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 17:18:17 +0800 Subject: [PATCH 11/45] iommu/vt-d: Add per-device IOMMU feature ops entries This adds the iommu ops entries for aux-domain per-device feature query and enable/disable. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 159 ++++++++++++++++++++++++++++++++++++ include/linux/intel-iommu.h | 1 + 2 files changed, 160 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7c5a3e015e35..747956383511 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2432,6 +2432,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->domain = domain; info->iommu = iommu; info->pasid_table = NULL; + info->auxd_enabled = 0; if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -5154,6 +5155,42 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } +static inline bool scalable_mode_support(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool ret = true; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!sm_supported(iommu)) { + ret = false; + break; + } + } + rcu_read_unlock(); + + return ret; +} + +static inline bool iommu_pasid_support(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool ret = true; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!pasid_supported(iommu)) { + ret = false; + break; + } + } + rcu_read_unlock(); + + return ret; +} + static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) @@ -5328,6 +5365,124 @@ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) } #endif /* CONFIG_INTEL_IOMMU_SVM */ +static int intel_iommu_enable_auxd(struct device *dev) +{ + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + u8 bus, devfn; + int ret; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu || dmar_disabled) + return -EINVAL; + + if (!sm_supported(iommu) || !pasid_supported(iommu)) + return -EINVAL; + + ret = intel_iommu_enable_pasid(iommu, dev); + if (ret) + return -ENODEV; + + spin_lock_irqsave(&device_domain_lock, flags); + info = dev->archdata.iommu; + info->auxd_enabled = 1; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; +} + +static int intel_iommu_disable_auxd(struct device *dev) +{ + struct device_domain_info *info; + unsigned long flags; + + spin_lock_irqsave(&device_domain_lock, flags); + info = dev->archdata.iommu; + if (!WARN_ON(!info)) + info->auxd_enabled = 0; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; +} + +/* + * A PCI express designated vendor specific extended capability is defined + * in the section 3.7 of Intel scalable I/O virtualization technical spec + * for system software and tools to detect endpoint devices supporting the + * Intel scalable IO virtualization without host driver dependency. + * + * Returns the address of the matching extended capability structure within + * the device's PCI configuration space or 0 if the device does not support + * it. + */ +static int siov_find_pci_dvsec(struct pci_dev *pdev) +{ + int pos; + u16 vendor, id; + + pos = pci_find_next_ext_capability(pdev, 0, 0x23); + while (pos) { + pci_read_config_word(pdev, pos + 4, &vendor); + pci_read_config_word(pdev, pos + 8, &id); + if (vendor == PCI_VENDOR_ID_INTEL && id == 5) + return pos; + + pos = pci_find_next_ext_capability(pdev, pos, 0x23); + } + + return 0; +} + +static bool +intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat == IOMMU_DEV_FEAT_AUX) { + int ret; + + if (!dev_is_pci(dev) || dmar_disabled || + !scalable_mode_support() || !iommu_pasid_support()) + return false; + + ret = pci_pasid_features(to_pci_dev(dev)); + if (ret < 0) + return false; + + return !!siov_find_pci_dvsec(to_pci_dev(dev)); + } + + return false; +} + +static int +intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat == IOMMU_DEV_FEAT_AUX) + return intel_iommu_enable_auxd(dev); + + return -ENODEV; +} + +static int +intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat == IOMMU_DEV_FEAT_AUX) + return intel_iommu_disable_auxd(dev); + + return -ENODEV; +} + +static bool +intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) +{ + struct device_domain_info *info = dev->archdata.iommu; + + if (feat == IOMMU_DEV_FEAT_AUX) + return scalable_mode_support() && info && info->auxd_enabled; + + return false; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5342,6 +5497,10 @@ const struct iommu_ops intel_iommu_ops = { .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = intel_iommu_put_resv_regions, .device_group = pci_device_group, + .dev_has_feat = intel_iommu_dev_has_feat, + .dev_feat_enabled = intel_iommu_dev_feat_enabled, + .dev_enable_feat = intel_iommu_dev_enable_feat, + .dev_disable_feat = intel_iommu_dev_disable_feat, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 21f21b3cc00f..fe20f9ef7863 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -499,6 +499,7 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; + u8 auxd_enabled:1; /* Multiple domains per device */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ -- Gitee From 1d43ecba85ede7452b31bcbf818cd02bd3f9b2e4 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 17:22:07 +0800 Subject: [PATCH 12/45] iommu/vt-d: Move common code out of iommu_attch_device() This part of code could be used by both normal and aux domain specific attach entries. Hence move them into a common function to avoid duplication. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 60 ++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 747956383511..c64343a6eb22 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4997,35 +4997,14 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) domain_exit(to_dmar_domain(domain)); } -static int intel_iommu_attach_device(struct iommu_domain *domain, - struct device *dev) +static int prepare_domain_attach_device(struct iommu_domain *domain, + struct device *dev) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct intel_iommu *iommu; int addr_width; u8 bus, devfn; - if (device_is_rmrr_locked(dev)) { - dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); - return -EPERM; - } - - /* normally dev is not mapped */ - if (unlikely(domain_context_mapped(dev))) { - struct dmar_domain *old_domain; - - old_domain = find_domain(dev); - if (old_domain) { - rcu_read_lock(); - dmar_remove_one_dev_info(dev); - rcu_read_unlock(); - - if (!domain_type_is_vm_or_si(old_domain) && - list_empty(&old_domain->devices)) - domain_exit(old_domain); - } - } - iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; @@ -5058,7 +5037,40 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, dmar_domain->agaw--; } - return domain_add_dev_info(dmar_domain, dev); + return 0; +} + +static int intel_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + int ret; + + if (device_is_rmrr_locked(dev)) { + dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); + return -EPERM; + } + + /* normally dev is not mapped */ + if (unlikely(domain_context_mapped(dev))) { + struct dmar_domain *old_domain; + + old_domain = find_domain(dev); + if (old_domain) { + rcu_read_lock(); + dmar_remove_one_dev_info(dev); + rcu_read_unlock(); + + if (!domain_type_is_vm_or_si(old_domain) && + list_empty(&old_domain->devices)) + domain_exit(old_domain); + } + } + + ret = prepare_domain_attach_device(domain, dev); + if (ret) + return ret; + + return domain_add_dev_info(to_dmar_domain(domain), dev); } static void intel_iommu_detach_device(struct iommu_domain *domain, -- Gitee From 282586e3536115a210d5976f0250686126f37668 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 17:29:35 +0800 Subject: [PATCH 13/45] iommu/vt-d: Aux-domain specific domain attach/detach When multiple domains per device has been enabled by the device driver, the device will tag the default PASID for the domain to all DMA traffics out of the subset of this device; and the IOMMU should translate the DMA requests in PASID granularity. This adds the intel_iommu_aux_attach/detach_device() ops to support managing PASID granular translation structures when the device driver has enabled multiple domains per device. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 152 ++++++++++++++++++++++++++++++++++++ include/linux/intel-iommu.h | 10 +++ 2 files changed, 162 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c64343a6eb22..07852c8d9339 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2433,6 +2433,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->iommu = iommu; info->pasid_table = NULL; info->auxd_enabled = 0; + INIT_LIST_HEAD(&info->auxiliary_domains); if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -4997,6 +4998,131 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) domain_exit(to_dmar_domain(domain)); } +/* + * Check whether a @domain could be attached to the @dev through the + * aux-domain attach/detach APIs. + */ +static inline bool +is_aux_domain(struct device *dev, struct iommu_domain *domain) +{ + struct device_domain_info *info = dev->archdata.iommu; + + return info && info->auxd_enabled && + domain->type == IOMMU_DOMAIN_UNMANAGED; +} + +static void auxiliary_link_device(struct dmar_domain *domain, + struct device *dev) +{ + struct device_domain_info *info = dev->archdata.iommu; + + assert_spin_locked(&device_domain_lock); + if (WARN_ON(!info)) + return; + + domain->auxd_refcnt++; + list_add(&domain->auxd, &info->auxiliary_domains); +} + +static void auxiliary_unlink_device(struct dmar_domain *domain, + struct device *dev) +{ + struct device_domain_info *info = dev->archdata.iommu; + + assert_spin_locked(&device_domain_lock); + if (WARN_ON(!info)) + return; + + list_del(&domain->auxd); + domain->auxd_refcnt--; + + if (!domain->auxd_refcnt && domain->default_pasid > 0) + intel_pasid_free_id(domain->default_pasid); +} + +static int aux_domain_add_dev(struct dmar_domain *domain, + struct device *dev) +{ + int ret; + u8 bus, devfn; + unsigned long flags; + struct intel_iommu *iommu; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (domain->default_pasid <= 0) { + int pasid; + + pasid = intel_pasid_alloc_id(domain, PASID_MIN, + pci_max_pasids(to_pci_dev(dev)), + GFP_KERNEL); + if (pasid <= 0) { + pr_err("Can't allocate default pasid\n"); + return -ENODEV; + } + domain->default_pasid = pasid; + } + + spin_lock_irqsave(&device_domain_lock, flags); + /* + * iommu->lock must be held to attach domain to iommu and setup the + * pasid entry for second level translation. + */ + spin_lock(&iommu->lock); + ret = domain_attach_iommu(domain, iommu); + if (ret) + goto attach_failed; + + /* Setup the PASID entry for mediated devices: */ + ret = intel_pasid_setup_second_level(iommu, domain, dev, + domain->default_pasid); + if (ret) + goto table_failed; + spin_unlock(&iommu->lock); + + auxiliary_link_device(domain, dev); + + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; + +table_failed: + domain_detach_iommu(domain, iommu); +attach_failed: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + if (!domain->auxd_refcnt && domain->default_pasid > 0) + intel_pasid_free_id(domain->default_pasid); + + return ret; +} + +static void aux_domain_remove_dev(struct dmar_domain *domain, + struct device *dev) +{ + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + + if (!is_aux_domain(dev, &domain->domain)) + return; + + spin_lock_irqsave(&device_domain_lock, flags); + info = dev->archdata.iommu; + iommu = info->iommu; + + auxiliary_unlink_device(domain, dev); + + spin_lock(&iommu->lock); + intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid); + domain_detach_iommu(domain, iommu); + spin_unlock(&iommu->lock); + + spin_unlock_irqrestore(&device_domain_lock, flags); +} + static int prepare_domain_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -5050,6 +5176,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EPERM; } + if (is_aux_domain(dev, domain)) + return -EPERM; + /* normally dev is not mapped */ if (unlikely(domain_context_mapped(dev))) { struct dmar_domain *old_domain; @@ -5073,12 +5202,33 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return domain_add_dev_info(to_dmar_domain(domain), dev); } +static int intel_iommu_aux_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + int ret; + + if (!is_aux_domain(dev, domain)) + return -EPERM; + + ret = prepare_domain_attach_device(domain, dev); + if (ret) + return ret; + + return aux_domain_add_dev(to_dmar_domain(domain), dev); +} + static void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { dmar_remove_one_dev_info(dev); } +static void intel_iommu_aux_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + aux_domain_remove_dev(to_dmar_domain(domain), dev); +} + static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot) @@ -5501,6 +5651,8 @@ const struct iommu_ops intel_iommu_ops = { .domain_free = intel_iommu_domain_free, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, + .aux_attach_dev = intel_iommu_aux_attach_device, + .aux_detach_dev = intel_iommu_aux_detach_device, .map = intel_iommu_map, .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index fe20f9ef7863..ebc54a64374e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -413,9 +413,11 @@ struct dmar_domain { /* Domain ids per IOMMU. Use u16 since * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ + unsigned int auxd_refcnt; /* Refcount of auxiliary attaching */ bool has_iotlb_device; struct list_head devices; /* all devices' list */ + struct list_head auxd; /* link to device's auxiliary list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -434,6 +436,11 @@ struct dmar_domain { 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ u64 max_addr; /* maximum mapped address */ + int default_pasid; /* + * The default pasid used for non-SVM + * traffic on mediated devices. + */ + struct iommu_domain domain; /* generic domain data structure for iommu core */ }; @@ -490,6 +497,9 @@ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ + struct list_head auxiliary_domains; /* auxiliary domains + * attached to this device + */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ u16 pfsid; /* SRIOV physical function source ID */ -- Gitee From f1b358db0ec0b04de3ef4e7b57055804bbd4054a Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 17:34:30 +0800 Subject: [PATCH 14/45] iommu/vt-d: Return ID associated with an auxiliary domain This adds support to return the default pasid associated with an auxiliary domain. The PCI device which is bound with this domain should use this value as the pasid for all DMA requests of the subset of device which is isolated and protected with this domain. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 07852c8d9339..c1328e25754e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5645,6 +5645,15 @@ intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) return false; } +static int +intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + return dmar_domain->default_pasid > 0 ? + dmar_domain->default_pasid : -EINVAL; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5653,6 +5662,7 @@ const struct iommu_ops intel_iommu_ops = { .detach_dev = intel_iommu_detach_device, .aux_attach_dev = intel_iommu_aux_attach_device, .aux_detach_dev = intel_iommu_aux_detach_device, + .aux_get_pasid = intel_iommu_aux_get_pasid, .map = intel_iommu_map, .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, -- Gitee From d8b633f1b84e7d9076725ef62e9bd5e4c6ab4d18 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 17:35:24 +0800 Subject: [PATCH 15/45] vfio/mdev: Add iommu related member in mdev_device A parent device might create different types of mediated devices. For example, a mediated device could be created by the parent device with full isolation and protection provided by the IOMMU. One usage case could be found on Intel platforms where a mediated device is an assignable subset of a PCI, the DMA requests on behalf of it are all tagged with a PASID. Since IOMMU supports PASID-granular translations (scalable mode in VT-d 3.0), this mediated device could be individually protected and isolated by an IOMMU. This patch adds a new member in the struct mdev_device to indicate that the mediated device represented by mdev could be isolated and protected by attaching a domain to a device represented by mdev->iommu_device. It also adds a helper to add or set the iommu device. * mdev_device->iommu_device - This, if set, indicates that the mediated device could be fully isolated and protected by IOMMU via attaching an iommu domain to this device. If empty, it indicates using vendor defined isolation, hence bypass IOMMU. * mdev_set/get_iommu_device(dev, iommu_device) - Set or get the iommu device which represents this mdev in IOMMU's device scope. Drivers don't need to set the iommu device if it uses vendor defined isolation. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Suggested-by: Kevin Tian Suggested-by: Alex Williamson Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kirti Wankhede Acked-by: Alex Williamson Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/vfio/mdev/mdev_core.c | 18 ++++++++++++++++++ drivers/vfio/mdev/mdev_private.h | 1 + include/linux/mdev.h | 14 ++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index e052f62fdea7..50a9c0720615 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -389,6 +389,24 @@ int mdev_device_remove(struct device *dev, bool force_remove) return 0; } +int mdev_set_iommu_device(struct device *dev, struct device *iommu_device) +{ + struct mdev_device *mdev = to_mdev_device(dev); + + mdev->iommu_device = iommu_device; + + return 0; +} +EXPORT_SYMBOL(mdev_set_iommu_device); + +struct device *mdev_get_iommu_device(struct device *dev) +{ + struct mdev_device *mdev = to_mdev_device(dev); + + return mdev->iommu_device; +} +EXPORT_SYMBOL(mdev_get_iommu_device); + static int __init mdev_init(void) { return mdev_bus_register(); diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index b5819b7d7ef7..1b09251f17d6 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -33,6 +33,7 @@ struct mdev_device { struct kref ref; struct list_head next; struct kobject *type_kobj; + struct device *iommu_device; bool active; }; diff --git a/include/linux/mdev.h b/include/linux/mdev.h index b6e048e1045f..c3ab8a9cfcc7 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -15,6 +15,20 @@ struct mdev_device; +/* + * Called by the parent device driver to set the device which represents + * this mdev in iommu protection scope. By default, the iommu device is + * NULL, that indicates using vendor defined isolation. + * + * @dev: the mediated device that iommu will isolate. + * @iommu_device: a pci device which represents the iommu for @dev. + * + * Return 0 for success, otherwise negative error value. + */ +int mdev_set_iommu_device(struct device *dev, struct device *iommu_device); + +struct device *mdev_get_iommu_device(struct device *dev); + /** * struct mdev_parent_ops - Structure to be registered for each parent device to * register the device to mdev module. -- Gitee From fc48216110a501614f7f5b274fb3f5304b732b08 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 18:02:38 +0800 Subject: [PATCH 16/45] vfio/type1: Add domain at(de)taching group helpers This adds helpers to attach or detach a domain to a group. This will replace iommu_attach_group() which only works for non-mdev devices. If a domain is attaching to a group which includes the mediated devices, it should attach to the iommu device (a pci device which represents the mdev in iommu scope) instead. The added helper supports attaching domain to groups for both pci and mdev devices. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Acked-by: Alex Williamson Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/vfio/vfio_iommu_type1.c | 99 ++++++++++++++++++++++++++++----- 1 file changed, 86 insertions(+), 13 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5a106963dd08..f3654f7c295c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1637,6 +1637,75 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) return ret; } +static struct device *vfio_mdev_get_iommu_device(struct device *dev) +{ + struct device *(*fn)(struct device *dev); + struct device *iommu_device; + + fn = symbol_get(mdev_get_iommu_device); + if (fn) { + iommu_device = fn(dev); + symbol_put(mdev_get_iommu_device); + + return iommu_device; + } + + return NULL; +} + +static int vfio_mdev_attach_domain(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + struct device *iommu_device; + + iommu_device = vfio_mdev_get_iommu_device(dev); + if (iommu_device) { + if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) + return iommu_aux_attach_device(domain, iommu_device); + else + return iommu_attach_device(domain, iommu_device); + } + + return -EINVAL; +} + +static int vfio_mdev_detach_domain(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + struct device *iommu_device; + + iommu_device = vfio_mdev_get_iommu_device(dev); + if (iommu_device) { + if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) + iommu_aux_detach_device(domain, iommu_device); + else + iommu_detach_device(domain, iommu_device); + } + + return 0; +} + +static int vfio_iommu_attach_group(struct vfio_domain *domain, + struct vfio_group *group) +{ + if (group->mdev_group) + return iommu_group_for_each_dev(group->iommu_group, + domain->domain, + vfio_mdev_attach_domain); + else + return iommu_attach_group(domain->domain, group->iommu_group); +} + +static void vfio_iommu_detach_group(struct vfio_domain *domain, + struct vfio_group *group) +{ + if (group->mdev_group) + iommu_group_for_each_dev(group->iommu_group, domain->domain, + vfio_mdev_detach_domain); + else + iommu_detach_group(domain->domain, group->iommu_group); +} + static int vfio_iommu_try_attach_group(struct vfio_iommu *iommu, struct vfio_group *group, struct vfio_domain *cur_domain, @@ -1750,7 +1819,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_domain; } - ret = iommu_attach_group(domain->domain, iommu_group); + ret = vfio_iommu_attach_group(domain, group); if (ret) goto out_domain; @@ -1773,15 +1842,19 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, domain->prot |= IOMMU_CACHE; list_for_each_entry(d, &iommu->domain_list, next) { - ret = vfio_iommu_try_attach_group(iommu, group, domain, d); - if (ret < 0) { - goto out_domain; - } else if (!ret) { - list_add(&group->next, &d->group_list); - iommu_domain_free(domain->domain); - kfree(domain); - mutex_unlock(&iommu->lock); - return 0; + if (d->domain->ops == domain->domain->ops && + d->prot == domain->prot) { + vfio_iommu_detach_group(domain, group); + if (!vfio_iommu_attach_group(d, group)) { + list_add(&group->next, &d->group_list); + iommu_domain_free(domain->domain); + kfree(domain); + mutex_unlock(&iommu->lock); + return 0; + } + ret = vfio_iommu_attach_group(domain, group); + if (ret) + goto out_domain; } } @@ -1809,7 +1882,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, return 0; out_detach: - iommu_detach_group(domain->domain, iommu_group); + vfio_iommu_detach_group(domain, group); out_domain: iommu_domain_free(domain->domain); out_free: @@ -1902,7 +1975,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (!group) continue; - iommu_detach_group(domain->domain, iommu_group); + vfio_iommu_detach_group(domain, group); if (group->sva_enabled) { iommu_group_for_each_dev(iommu_group, NULL, vfio_iommu_sva_shutdown); @@ -1976,7 +2049,7 @@ static void vfio_release_domain(struct vfio_domain *domain, bool external) list_for_each_entry_safe(group, group_tmp, &domain->group_list, next) { if (!external) - iommu_detach_group(domain->domain, group->iommu_group); + vfio_iommu_detach_group(domain, group); list_del(&group->next); kfree(group); } -- Gitee From 22cae6942037f17f1dea41d13b7a58b361791aed Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 18:07:23 +0800 Subject: [PATCH 17/45] vfio/type1: Handle different mdev isolation type This adds the support to determine the isolation type of a mediated device group by checking whether it has an iommu device. If an iommu device exists, an iommu domain will be allocated and then attached to the iommu device. Otherwise, keep the same behavior as it is. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Kirti Wankhede Acked-by: Alex Williamson Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/vfio/vfio_iommu_type1.c | 52 ++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index f3654f7c295c..34250dbbff7d 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -598,7 +598,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, mutex_lock(&iommu->lock); /* Fail if notifier list is empty */ - if ((!iommu->external_domain) || (!iommu->notifier.head)) { + if (!iommu->notifier.head) { ret = -EINVAL; goto pin_done; } @@ -681,11 +681,6 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data, mutex_lock(&iommu->lock); - if (!iommu->external_domain) { - mutex_unlock(&iommu->lock); - return -EINVAL; - } - do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); for (i = 0; i < npage; i++) { struct vfio_dma *dma; @@ -1706,6 +1701,33 @@ static void vfio_iommu_detach_group(struct vfio_domain *domain, iommu_detach_group(domain->domain, group->iommu_group); } +static bool vfio_bus_is_mdev(struct bus_type *bus) +{ + struct bus_type *mdev_bus; + bool ret = false; + + mdev_bus = symbol_get(mdev_bus_type); + if (mdev_bus) { + ret = (bus == mdev_bus); + symbol_put(mdev_bus_type); + } + + return ret; +} + +static int vfio_mdev_iommu_device(struct device *dev, void *data) +{ + struct device **old = data, *new; + + new = vfio_mdev_get_iommu_device(dev); + if (!new || (*old && *old != new)) + return -EINVAL; + + *old = new; + + return 0; +} + static int vfio_iommu_try_attach_group(struct vfio_iommu *iommu, struct vfio_group *group, struct vfio_domain *cur_domain, @@ -1750,7 +1772,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, struct vfio_iommu *iommu = iommu_data; struct vfio_group *group; struct vfio_domain *domain, *d; - struct bus_type *bus = NULL, *mdev_bus; + struct bus_type *bus = NULL; int ret; bool resv_msi, msi_remap; phys_addr_t resv_msi_base; @@ -1785,23 +1807,27 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_free; - mdev_bus = symbol_get(mdev_bus_type); + if (vfio_bus_is_mdev(bus)) { + struct device *iommu_device = NULL; + group->mdev_group = true; - if (mdev_bus) { - if ((bus == mdev_bus) && !iommu_present(bus)) { - symbol_put(mdev_bus_type); + /* Determine the isolation type */ + ret = iommu_group_for_each_dev(iommu_group, &iommu_device, + vfio_mdev_iommu_device); + if (ret || !iommu_device) { if (!iommu->external_domain) { INIT_LIST_HEAD(&domain->group_list); iommu->external_domain = domain; - } else + } else { kfree(domain); + } list_add(&group->next, &iommu->external_domain->group_list); mutex_unlock(&iommu->lock); return 0; } - symbol_put(mdev_bus_type); + bus = iommu_device->bus; } domain->domain = iommu_domain_alloc(bus); -- Gitee From d9b3db1b42a0f333e0e1c656ef6ee9d5c20d4ee1 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 18:11:02 +0800 Subject: [PATCH 18/45] iommu/vt-d: Cure VF irqdomain hickup The recent changes to store the MSI irqdomain pointer in struct device missed that Intel DMAR does not register virtual function devices. Due to that a VF device gets the plain PCI-MSI domain assigned and then issues compat MSI messages which get caught by the interrupt remapping unit. Cure that by inheriting the irq domain from the physical function device. Ideally the irqdomain would be associated to the bus, but DMAR can have multiple units and therefore irqdomains on a single bus. The VF 'bus' could of course inherit the domain from the PF, but that'd be yet another x86 oddity. Fixes: 85a8dfc57a0b ("iommm/vt-d: Store irq domain in struct device") Reported-by: Jason Gunthorpe Signed-off-by: Thomas Gleixner Acked-by: Lu Baolu Cc: Joerg Roedel Cc: Bjorn Helgaas Cc: Marc Zyngier Cc: David Woodhouse Link: https://lore.kernel.org/r/draft-87eekymlpz.fsf@nanos.tec.linutronix.de Signed-off-by: Xibo.Wang --- drivers/iommu/dmar.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index f92ac7477f02..bce9ce4cc48f 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -333,6 +333,11 @@ static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) dmar_iommu_notify_scope_dev(info); } +static inline void vf_inherit_msi_domain(struct pci_dev *pdev) +{ + dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev)); +} + static int dmar_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -342,8 +347,20 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, /* Only care about add/remove events for physical functions. * For VFs we actually do the lookup based on the corresponding * PF in device_to_iommu() anyway. */ - if (pdev->is_virtfn) + if (pdev->is_virtfn) { + /* + * Ensure that the VF device inherits the irq domain of the + * PF device. Ideally the device would inherit the domain + * from the bus, but DMAR can have multiple units per bus + * which makes this impossible. The VF 'bus' could inherit + * from the PF device, but that's yet another x86'sism to + * inflict on everybody else. + */ + if (action == BUS_NOTIFY_ADD_DEVICE) + vf_inherit_msi_domain(pdev); return NOTIFY_DONE; + } + if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_REMOVED_DEVICE) return NOTIFY_DONE; -- Gitee From 8e12e510c92b5e4a522661760e142c3eaa111926 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 19:30:04 +0800 Subject: [PATCH 19/45] iommu: Add helpers to set/get default domain type Add a couple of functions to allow changing the default domain type from architecture code and a function for iommu drivers to request whether the default domain is passthrough. Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/iommu.c | 22 ++++++++++++++++++++++ include/linux/iommu.h | 16 ++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1ac16c542c1a..91375174de2b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2289,6 +2289,28 @@ int iommu_request_dm_for_dev(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_request_dm_for_dev); +void iommu_set_default_passthrough(bool cmd_line) +{ + if (cmd_line) + iommu_set_cmd_line_dma_api(); + + iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; +} + +void iommu_set_default_translated(bool cmd_line) +{ + if (cmd_line) + iommu_set_cmd_line_dma_api(); + + iommu_def_domain_type = IOMMU_DOMAIN_DMA; +} + +bool iommu_default_passthrough(void) +{ + return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; +} +EXPORT_SYMBOL_GPL(iommu_default_passthrough); + const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) { const struct iommu_ops *ops = NULL; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c2456eeba168..f0e79bb5db03 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -575,6 +575,9 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); +extern void iommu_set_default_passthrough(bool cmd_line); +extern void iommu_set_default_translated(bool cmd_line); +extern bool iommu_default_passthrough(void); extern struct iommu_resv_region * iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, enum iommu_resv_type type); @@ -828,6 +831,19 @@ static inline int iommu_request_dm_for_dev(struct device *dev) return -ENODEV; } +static inline void iommu_set_default_passthrough(bool cmd_line) +{ +} + +static inline void iommu_set_default_translated(bool cmd_line) +{ +} + +static inline bool iommu_default_passthrough(void) +{ + return true; +} + static inline int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { -- Gitee From 51c957cee071365e71626dfcd5747483eb84fbdd Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 20:04:21 +0800 Subject: [PATCH 20/45] iommu: Remember when default domain type was set on kernel command line Introduce an extensible concept to remember when certain configuration settings for the IOMMU code have been set on the kernel command line. This will be used later to prevent overwriting these settings with other defaults. Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/iommu.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 91375174de2b..54917e1ca3e6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -43,6 +43,8 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; #endif static bool iommu_dma_strict __read_mostly; +static u32 iommu_cmd_line __read_mostly; + struct iommu_callback_data { const struct iommu_ops *ops; }; @@ -84,6 +86,18 @@ static const char * const iommu_group_resv_type_string[] = { [IOMMU_RESV_SW_MSI] = "msi", }; +#define IOMMU_CMD_LINE_DMA_API BIT(0) + +static void iommu_set_cmd_line_dma_api(void) +{ + iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; +} + +static bool __maybe_unused iommu_cmd_line_dma_api(void) +{ + return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API); +} + #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ __ATTR(_name, _mode, _show, _store) @@ -130,6 +144,8 @@ static int __init iommu_set_def_domain_type(char *str) if (ret) return ret; + iommu_set_cmd_line_dma_api(); + iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA; return 0; } -- Gitee From d69e864cf0bab75c52073b333a03b441359d4033 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 20:26:51 +0800 Subject: [PATCH 21/45] iommu/amd: Request passthrough mode from IOMMU core Get rid of the iommu_pass_through variable and request passthrough mode via the new iommu core function. Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/amd_iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index a1f417694a48..c60e294dc3cb 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -448,7 +448,7 @@ static int iommu_init_device(struct device *dev) * invalid address), we ignore the capability for the device so * it'll be forced to go into translation mode. */ - if ((iommu_pass_through || !amd_iommu_force_isolation) && + if ((iommu_default_passthrough() || !amd_iommu_force_isolation) && dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { struct amd_iommu *iommu; @@ -2226,7 +2226,7 @@ static int amd_iommu_add_device(struct device *dev) BUG_ON(!dev_data); - if (iommu_pass_through || dev_data->iommu_v2) + if (dev_data->iommu_v2) iommu_request_dm_for_dev(dev); /* Domains are initialized for this device - have a look what we ended up with */ @@ -2813,7 +2813,7 @@ int __init amd_iommu_init_api(void) int __init amd_iommu_init_dma_ops(void) { - swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0; + swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; iommu_detected = 1; /* -- Gitee From 33fc7950571de1b60a690a31aa580c69cde397ab Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 20:59:40 +0800 Subject: [PATCH 22/45] iommu/vt-d: Request passthrough mode from IOMMU core Get rid of the iommu_pass_through variable and request passthrough mode via the new iommu core function. Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c1328e25754e..d0676e9c449e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3361,7 +3361,7 @@ static int __init init_dmars(void) iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } - if (iommu_pass_through) + if (iommu_default_passthrough()) iommu_identity_mapping |= IDENTMAP_ALL; #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA -- Gitee From ad7eb4887b86c02aef0c635adfd1129bfa40c7ee Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Sun, 24 Apr 2022 21:00:18 +0800 Subject: [PATCH 23/45] iommu/vt-d: Add 256-bit invalidation descriptor support miss some code Intel vt-d spec rev3.0 requires software to use 256-bit descriptors in invalidation queue. As the spec reads in section 6.5.2: Remapping hardware supporting Scalable Mode Translations (ECAP_REG.SMTS=1) allow software to additionally program the width of the descriptors (128-bits or 256-bits) that will be written into the Queue. Software should setup the Invalidation Queue for 256-bit descriptors before progra- mming remapping hardware for scalable-mode translation as 128-bit descriptors are treated as invalid descriptors (see Table 21 in Section 6.5.2.10) in scalable-mode. This patch adds 256-bit invalidation descriptor support if the hardware presents scalable mode capability. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sun Shouxin Signed-off-by: Xibo.Wang --- drivers/iommu/dmar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index bce9ce4cc48f..0ef86f70b77a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1203,6 +1203,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) * is cleared. */ if (fault & DMA_FSTS_IQE) { + head = readl(iommu->reg + DMAR_IQH_REG); if ((head >> shift) == index) { struct qi_desc *desc = qi->desc + head; -- Gitee From d28c40c67fa4c37a131add5d1a92e9fdc7c6b332 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 09:40:05 +0800 Subject: [PATCH 24/45] iommu/vt-d: Reserve a domain id for FL and PT modes Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid entry for first-level or pass-through translation should be programmed with a domain id different from those used for second-level or nested translation. It is recommended that software could use a same domain id for all first-only and pass-through translations. This reserves a domain id for first-level and pass-through translations. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Cc: Sanjay Kumar Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 10 ++++++++++ drivers/iommu/intel-pasid.h | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d0676e9c449e..87019b1512f3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1668,6 +1668,16 @@ static int iommu_init_domains(struct intel_iommu *iommu) */ set_bit(0, iommu->domain_ids); + /* + * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid + * entry for first-level or pass-through translation modes should + * be programmed with a domain id different from those used for + * second-level or nested translation. We reserve a domain id for + * this purpose. + */ + if (sm_supported(iommu)) + set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); + return 0; } diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 8ed62f10d979..91e81280e2d7 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -25,6 +25,12 @@ */ #define PASID_FLAG_SUPERVISOR_MODE BIT(0) +/* + * Domain ID reserved for pasid entries programmed for first-level + * only and pass-through transfer modes. + */ +#define FLPT_DEFAULT_DID 1 + struct pasid_dir_entry { u64 val; }; -- Gitee From 8e630cf27292c336d671db4cd1d5407dfe5923ac Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 11:34:46 +0800 Subject: [PATCH 25/45] vfio/type1: Add domain at(de)taching group helpers This adds helpers to attach or detach a domain to a group. This will replace iommu_attach_group() which only works for non-mdev devices. If a domain is attaching to a group which includes the mediated devices, it should attach to the iommu device (a pci device which represents the mdev in iommu scope) instead. The added helper supports attaching domain to groups for both pci and mdev devices. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Acked-by: Alex Williamson Signed-off-by: Joerg Roedel Signed-off-by: Xibo.Wang --- drivers/vfio/vfio_iommu_type1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 34250dbbff7d..c2c05098b256 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -101,6 +101,7 @@ struct vfio_group { struct iommu_group *iommu_group; struct list_head next; bool sva_enabled; + bool mdev_group; /* An mdev group */ }; struct vfio_mm { -- Gitee From 2e6e1298345297c8546e1a1f9c3b7673812e6343 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 13:57:00 +0800 Subject: [PATCH 26/45] fix build bug /include/linux/intel-iommu.h:39:0: warning: "CONTEXT_PASIDE" redefined Signed-off-by: Sun Shouxin Signed-off-by: Xibo.Wang --- include/linux/dma_remapping.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 21b3e7d33d68..d46babf3c1c4 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -28,7 +28,6 @@ #define CONTEXT_DINVE (1ULL << 8) #define CONTEXT_PRS (1ULL << 9) -#define CONTEXT_PASIDE (1ULL << 11) struct intel_iommu; struct dmar_domain; -- Gitee From 41ae0853c85b17fbd5ed6ea409d0248791a249c1 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 16:32:28 +0800 Subject: [PATCH 27/45] Intel: iommu/vt-d: Expose ISA direct mapping region via iommu_get_resv_regions commit d850c2ee upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. To support mapping ISA region via iommu_group_create_direct_mappings, make sure its exposed by iommu_get_resv_regions. Signed-off-by: default avatarJames Sewart Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 87019b1512f3..e6dfdd91abfa 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5447,6 +5447,19 @@ static void intel_iommu_get_resv_regions(struct device *device, } up_read(&dmar_global_lock); +#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA + if (dev_is_pci(device)) { + struct pci_dev *pdev = to_pci_dev(device); + + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { + reg = iommu_alloc_resv_region(0, 1UL << 24, 0, + IOMMU_RESV_DIRECT); + if (reg) + list_add_tail(®->list, head); + } + } +#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */ + reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, 0, IOMMU_RESV_MSI); -- Gitee From 3dffc90c3d35997619ecb92626391a3a3a4d7ff0 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 16:36:30 +0800 Subject: [PATCH 28/45] Intel: iommu/vt-d: Enable DMA remapping after rmrr mapped commit d8190dc6 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. The rmrr devices require identity map of the rmrr regions before enabling DMA remapping. Otherwise, there will be a window during which DMA from/to the rmrr regions will be blocked. In order to alleviate this, we move enabling DMA remapping after all rmrr regions get mapped. Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e6dfdd91abfa..52c595539866 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3476,11 +3476,6 @@ static int __init init_dmars(void) ret = dmar_set_interrupt(iommu); if (ret) goto free_iommu; - - if (!translation_pre_enabled(iommu)) - iommu_enable_translation(iommu); - - iommu_disable_protect_mem_regions(iommu); } return 0; @@ -4854,7 +4849,6 @@ int __init intel_iommu_init(void) goto out_free_reserved_range; } up_write(&dmar_global_lock); - pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) swiotlb = 0; @@ -4877,6 +4871,16 @@ int __init intel_iommu_init(void) register_memory_notifier(&intel_iommu_memory_nb); cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, intel_iommu_cpu_dead); + + /* Finally, we enable the DMA remapping hardware. */ + for_each_iommu(iommu, drhd) { + if (!translation_pre_enabled(iommu)) + iommu_enable_translation(iommu); + + iommu_disable_protect_mem_regions(iommu); + } + pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); + intel_iommu_enabled = 1; return 0; -- Gitee From dbf8d6c4f92c761d716bf07897f26e688006dc0a Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 16:43:54 +0800 Subject: [PATCH 29/45] Intel: iommu/vt-d: Add device_def_domain_type() helper commit f273a453 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. This helper returns the default domain type that the device requires. Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 52c595539866..5fd76ac60347 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2902,23 +2902,31 @@ static bool device_is_rmrr_locked(struct device *dev) return true; } -static int iommu_should_identity_map(struct device *dev, int startup) +/* + * Return the required default domain type for a specific device. + * + * @dev: the device in query + * @startup: true if this is during early boot + * + * Returns: + * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain + * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain + * - 0: both identity and dynamic domains work for this device + */ +static int device_def_domain_type(struct device *dev, int startup) { if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); if (device_is_rmrr_locked(dev)) - return 0; + return IOMMU_DOMAIN_DMA; if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) - return 1; + return IOMMU_DOMAIN_IDENTITY; if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return 1; - - if (!(iommu_identity_mapping & IDENTMAP_ALL)) - return 0; + return IOMMU_DOMAIN_IDENTITY; /* * We want to start off with all devices in the 1:1 domain, and @@ -2939,14 +2947,14 @@ static int iommu_should_identity_map(struct device *dev, int startup) */ if (!pci_is_pcie(pdev)) { if (!pci_is_root_bus(pdev->bus)) - return 0; + return IOMMU_DOMAIN_DMA; if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) - return 0; + return IOMMU_DOMAIN_DMA; } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) - return 0; + return IOMMU_DOMAIN_DMA; } else { if (device_has_rmrr(dev)) - return 0; + return IOMMU_DOMAIN_DMA; } /* @@ -2967,8 +2975,13 @@ static int iommu_should_identity_map(struct device *dev, int startup) return dma_mask >= dma_get_required_mask(dev); } + return (iommu_identity_mapping & IDENTMAP_ALL) ? + IOMMU_DOMAIN_IDENTITY : 0; +} - return 1; +static inline int iommu_should_identity_map(struct device *dev, int startup) +{ + return device_def_domain_type(dev, startup) == IOMMU_DOMAIN_IDENTITY; } static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) -- Gitee From e446b44188cdd95452cfd2043f94cbc1263f75f1 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 16:53:06 +0800 Subject: [PATCH 30/45] Intel: iommu/vt-d: Delegate the identity domain to upper layer commit 4de354ec upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. This allows the iommu generic layer to allocate an identity domain and attach it to a device. Hence, the identity domain is delegated to upper layer. As a side effect, iommu_identity_mapping can't be used to check the existence of identity domains any more. Signed-off-by: default avatarJames Sewart Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 92 +++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5fd76ac60347..97596e602800 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -371,6 +371,7 @@ static void domain_context_clear(struct intel_iommu *iommu, struct device *dev); static int domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); +static bool device_is_rmrr_locked(struct device *dev); #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; @@ -2779,7 +2780,9 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_init(int hw) { - int nid, ret = 0; + struct dmar_rmrr_unit *rmrr; + struct device *dev; + int i, nid, ret; si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); if (!si_domain) @@ -2790,8 +2793,6 @@ static int __init si_domain_init(int hw) return -EFAULT; } - pr_debug("Identity mapping domain allocated\n"); - if (hw) return 0; @@ -2807,6 +2808,31 @@ static int __init si_domain_init(int hw) } } + /* + * Normally we use DMA domains for devices which have RMRRs. But we + * loose this requirement for graphic and usb devices. Identity map + * the RMRRs for graphic and USB devices so that they could use the + * si_domain. + */ + for_each_rmrr_units(rmrr) { + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, dev) { + unsigned long long start = rmrr->base_address; + unsigned long long end = rmrr->end_address; + + if (device_is_rmrr_locked(dev)) + continue; + + if (WARN_ON(end < start || + end >> agaw_to_width(si_domain->agaw))) + continue; + + ret = iommu_domain_identity_map(si_domain, start, end); + if (ret) + return ret; + } + } + return 0; } @@ -2814,9 +2840,6 @@ static int identity_mapping(struct device *dev) { struct device_domain_info *info; - if (likely(!iommu_identity_mapping)) - return 0; - info = dev->archdata.iommu; if (info && info != DUMMY_DEVICE_DOMAIN_INFO) return (info->domain == si_domain); @@ -3396,12 +3419,9 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - if (iommu_identity_mapping) { - ret = si_domain_init(hw_pass_through); - if (ret) - goto free_iommu; - } - + ret = si_domain_init(hw_pass_through); + if (ret) + goto free_iommu; /* * If we copied translations from a previous kernel in the kdump @@ -3594,9 +3614,6 @@ static int iommu_no_mapping(struct device *dev) if (iommu_dummy(dev)) return 1; - if (!iommu_identity_mapping) - return 0; - found = identity_mapping(dev); if (found) { if (iommu_should_identity_map(dev, 0)) @@ -4997,32 +5014,39 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) struct dmar_domain *dmar_domain; struct iommu_domain *domain; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - - dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); - if (!dmar_domain) { - pr_err("Can't allocate dmar_domain\n"); - return NULL; - } - if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { - pr_err("Domain initialization failed\n"); - domain_exit(dmar_domain); + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); + if (!dmar_domain) { + pr_err("Can't allocate dmar_domain\n"); + return NULL; + } + if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + pr_err("Domain initialization failed\n"); + domain_exit(dmar_domain); + return NULL; + } + domain_update_iommu_cap(dmar_domain); + domain = &dmar_domain->domain; + domain->geometry.aperture_start = 0; + domain->geometry.aperture_end = + __DOMAIN_MAX_ADDR(dmar_domain->gaw); + domain->geometry.force_aperture = true; + + return domain; + case IOMMU_DOMAIN_IDENTITY: + return &si_domain->domain; + default: return NULL; } - domain_update_iommu_cap(dmar_domain); - - domain = &dmar_domain->domain; - domain->geometry.aperture_start = 0; - domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw); - domain->geometry.force_aperture = true; - return domain; + return NULL; } static void intel_iommu_domain_free(struct iommu_domain *domain) { - domain_exit(to_dmar_domain(domain)); + if (domain != &si_domain->domain) + domain_exit(to_dmar_domain(domain)); } /* -- Gitee From 4c0dd156c17aa09f7cf77c15f637f863c96de583 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:05:07 +0800 Subject: [PATCH 31/45] Intel: iommu/vt-d: Delegate the dma domain to upper layer commit fa954e68 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. This allows the iommu generic layer to allocate a dma domain and attach it to a device through the iommu api's. With all types of domains being delegated to upper layer, we can remove an internal flag which was used to distinguish domains mananged internally or externally. Signed-off-by: default avatarJames Sewart Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: zhoul110@chinatelecom.cn's avatarLiang Zhou Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 74 ++++++++++--------------------------- 1 file changed, 19 insertions(+), 55 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 97596e602800..570c9495c124 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -324,14 +324,8 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -/* - * Domain represents a virtual machine, more than one devices - * across iommus may be owned in one domain, e.g. kvm guest. - */ -#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0) - /* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1) +#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ @@ -564,22 +558,11 @@ static inline void free_devinfo_mem(void *vaddr) kmem_cache_free(iommu_devinfo_cache, vaddr); } -static inline int domain_type_is_vm(struct dmar_domain *domain) -{ - return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; -} - static inline int domain_type_is_si(struct dmar_domain *domain) { return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; } -static inline int domain_type_is_vm_or_si(struct dmar_domain *domain) -{ - return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE | - DOMAIN_FLAG_STATIC_IDENTITY); -} - static inline int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { @@ -627,7 +610,9 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) int iommu_id; /* si_domain and vm domain should not get here. */ - BUG_ON(domain_type_is_vm_or_si(domain)); + if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA)) + return NULL; + for_each_domain_iommu(iommu_id, domain) break; @@ -1690,7 +1675,6 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) if (!iommu->domains || !iommu->domain_ids) return; -again: spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry_safe(info, tmp, &device_domain_list, global) { struct dmar_domain *domain; @@ -1704,18 +1688,6 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) domain = info->domain; __dmar_remove_one_dev_info(info); - - if (!domain_type_is_vm_or_si(domain)) { - /* - * The domain_exit() function can't be called under - * device_domain_lock, as it takes this lock itself. - * So release the lock here and re-run the loop - * afterwards. - */ - spin_unlock_irqrestore(&device_domain_lock, flags); - domain_exit(domain); - goto again; - } } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -2302,7 +2274,7 @@ static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - int ret; + int iommu_id, ret; struct intel_iommu *iommu; /* Do the real mapping first */ @@ -2310,17 +2282,8 @@ static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if (ret) return ret; - /* Notify about the new mapping */ - if (domain_type_is_vm(domain)) { - /* VM typed domains can have more than one IOMMUs */ - int iommu_id; - for_each_domain_iommu(iommu_id, domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); - } - } else { - /* General domains only have one IOMMU */ - iommu = domain_get_iommu(domain); + for_each_domain_iommu(iommu_id, domain) { + iommu = g_iommus[iommu_id]; __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); } @@ -4589,9 +4552,6 @@ static int device_notifier(struct notifier_block *nb, return 0; dmar_remove_one_dev_info(dev); - if (!domain_type_is_vm_or_si(domain) && - list_empty(&domain->devices)) - domain_exit(domain); } else if (action == BUS_NOTIFY_ADD_DEVICE) { if (iommu_should_identity_map(dev, 1)) domain_add_dev_info(si_domain, dev); @@ -5015,8 +4975,10 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) struct iommu_domain *domain; switch (type) { + case IOMMU_DOMAIN_DMA: + /* fallthrough */ case IOMMU_DOMAIN_UNMANAGED: - dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); + dmar_domain = alloc_domain(0); if (!dmar_domain) { pr_err("Can't allocate dmar_domain\n"); return NULL; @@ -5026,6 +4988,14 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) domain_exit(dmar_domain); return NULL; } + + if (type == IOMMU_DOMAIN_DMA && + init_iova_flush_queue(&dmar_domain->iovad, + iommu_flush_iova, iova_entry_free)) { + pr_warn("iova flush queue initialization failed\n"); + intel_iommu_strict = 1; + } + domain_update_iommu_cap(dmar_domain); domain = &dmar_domain->domain; domain->geometry.aperture_start = 0; @@ -5235,14 +5205,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct dmar_domain *old_domain; old_domain = find_domain(dev); - if (old_domain) { - rcu_read_lock(); + if (old_domain) dmar_remove_one_dev_info(dev); - rcu_read_unlock(); - - if (!domain_type_is_vm_or_si(old_domain) && - list_empty(&old_domain->devices)) - domain_exit(old_domain); } } -- Gitee From e27a70c9a801b72c9003aad8eedac572133d1a92 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:17:59 +0800 Subject: [PATCH 32/45] Intel: iommu: Add API to request DMA domain for device commit 7423e017 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. Normally during iommu probing a device, a default doamin will be allocated and attached to the device. The domain type of the default domain is statically defined, which results in a situation where the allocated default domain isn't suitable for the device due to some limitations. We already have API iommu_request_dm_for_dev() to replace a DMA domain with an identity one. This adds iommu_request_dma_domain_for_dev() to request a dma domain if an allocated identity domain isn't suitable for the device in question. Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/iommu.c | 36 +++++++++++++++++++++++++----------- include/linux/iommu.h | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 54917e1ca3e6..c0a7504bfe3e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2251,10 +2251,10 @@ struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, return region; } -/* Request that a device is direct mapped by the IOMMU */ -int iommu_request_dm_for_dev(struct device *dev) +static int +request_default_domain_for_dev(struct device *dev, unsigned long type) { - struct iommu_domain *dm_domain; + struct iommu_domain *domain; struct iommu_group *group; int ret; @@ -2267,8 +2267,7 @@ int iommu_request_dm_for_dev(struct device *dev) /* Check if the default domain is already direct mapped */ ret = 0; - if (group->default_domain && - group->default_domain->type == IOMMU_DOMAIN_IDENTITY) + if (group->default_domain && group->default_domain->type == type) goto out; /* Don't change mappings of existing devices */ @@ -2278,23 +2277,26 @@ int iommu_request_dm_for_dev(struct device *dev) /* Allocate a direct mapped domain */ ret = -ENOMEM; - dm_domain = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_IDENTITY); - if (!dm_domain) + domain = __iommu_domain_alloc(dev->bus, type); + if (!domain) goto out; /* Attach the device to the domain */ - ret = __iommu_attach_group(dm_domain, group); + ret = __iommu_attach_group(domain, group); if (ret) { - iommu_domain_free(dm_domain); + iommu_domain_free(domain); goto out; } + iommu_group_create_direct_mappings(group, dev); + /* Make the direct mapped domain the default for this group */ if (group->default_domain) iommu_domain_free(group->default_domain); - group->default_domain = dm_domain; + group->default_domain = domain; - pr_info("Using direct mapping for device %s\n", dev_name(dev)); + dev_info(dev, "Using iommu %s mapping\n", + type == IOMMU_DOMAIN_DMA ? "dma" : "direct"); ret = 0; out: @@ -2305,6 +2307,18 @@ int iommu_request_dm_for_dev(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_request_dm_for_dev); +/* Request that a device is direct mapped by the IOMMU */ +int iommu_request_dm_for_dev(struct device *dev) +{ + return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY); +} + +/* Request that a device can't be direct mapped by the IOMMU */ +int iommu_request_dma_domain_for_dev(struct device *dev) +{ + return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA); +} + void iommu_set_default_passthrough(bool cmd_line) { if (cmd_line) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f0e79bb5db03..9739d3408bc1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -575,6 +575,7 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); +extern int iommu_request_dma_domain_for_dev(struct device *dev); extern void iommu_set_default_passthrough(bool cmd_line); extern void iommu_set_default_translated(bool cmd_line); extern bool iommu_default_passthrough(void); @@ -831,6 +832,11 @@ static inline int iommu_request_dm_for_dev(struct device *dev) return -ENODEV; } +static inline int iommu_request_dma_domain_for_dev(struct device *dev) +{ + return -ENODEV; +} + static inline void iommu_set_default_passthrough(bool cmd_line) { } -- Gitee From b3193d3207f48b43c6b0afa697f8a6caa638deba Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:23:27 +0800 Subject: [PATCH 33/45] Intel: iommu/vt-d: Identify default domains replaced with private commit 942067f1 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. When we put a device into an iommu group, the group's default domain will be attached to the device. There are some corner cases where the type (identity or dma) of the default domain doesn't work for the device and the request of a new default domain results in failure (e.x. multiple devices have already existed in the group). In order to be compatible with the past, we used a private domain. Mark the private domains and disallow some iommu apis (map/unmap/iova_to_phys) on them. Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 63 ++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 570c9495c124..7ca5439daebd 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -327,6 +327,14 @@ static int hw_pass_through = 1; /* si_domain contains mulitple devices */ #define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) +/* + * This is a DMA domain allocated through the iommu domain allocation + * interface. But one or more devices belonging to this domain have + * been chosen to use a private domain. We should avoid to use the + * map/unmap/iova_to_phys APIs on it. + */ +#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) + #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) @@ -4908,6 +4916,7 @@ static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) static void __dmar_remove_one_dev_info(struct device_domain_info *info) { + struct dmar_domain *domain; struct intel_iommu *iommu; unsigned long flags; @@ -4917,6 +4926,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) return; iommu = info->iommu; + domain = info->domain; if (info->dev) { iommu_disable_dev_iotlb(info); @@ -4927,9 +4937,14 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) unlink_domain_info(info); spin_lock_irqsave(&iommu->lock, flags); - domain_detach_iommu(info->domain, iommu); + domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); + /* free the private domain */ + if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) + domain_exit(info->domain); + free_devinfo_mem(info); } @@ -5253,6 +5268,9 @@ static int intel_iommu_map(struct iommu_domain *domain, int prot = 0; int ret; + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return -EINVAL; + if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) @@ -5294,6 +5312,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -5325,6 +5345,9 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return 0; + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); if (pte) phys = dma_pte_addr(pte); @@ -5380,6 +5403,8 @@ static bool intel_iommu_capable(enum iommu_cap cap) static int intel_iommu_add_device(struct device *dev) { + struct dmar_domain *dmar_domain; + struct iommu_domain *domain; struct intel_iommu *iommu; struct iommu_group *group; u8 bus, devfn; @@ -5399,6 +5424,42 @@ static int intel_iommu_add_device(struct device *dev) } iommu_group_put(group); + + domain = iommu_get_domain_for_dev(dev); + dmar_domain = to_dmar_domain(domain); + if (domain->type == IOMMU_DOMAIN_DMA) { + if (device_def_domain_type(dev, 1) == IOMMU_DOMAIN_IDENTITY) { + ret = iommu_request_dm_for_dev(dev); + if (ret) { + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + domain_add_dev_info(si_domain, dev); + dev_info(dev, + "Device uses a private identity domain.\n"); + return 0; + } + + return -ENODEV; + } + } else { + if (device_def_domain_type(dev, 1) == IOMMU_DOMAIN_DMA) { + ret = iommu_request_dma_domain_for_dev(dev); + if (ret) { + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + if (!get_valid_domain_for_dev(dev)) { + dev_warn(dev, + "Failed to get a private domain.\n"); + return -ENOMEM; + } + + dev_info(dev, + "Device uses a private dma domain.\n"); + return 0; + } + + return -ENODEV; + } + } + return 0; unlink: -- Gitee From ae7d51e17d48203f0f5237e395d5e28bb2803e30 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:28:19 +0800 Subject: [PATCH 34/45] Intel: iommu/vt-d: Handle 32bit device with identity default domain commit 98b2fffb upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. The iommu driver doesn't know whether the bit width of a PCI device is sufficient for access to the whole system memory. Hence, the driver checks this when the driver calls into the dma APIs. If a device is using an identity domain, but the bit width is less than the system requirement, we need to use a dma domain instead. This also applies after we delegated the domain life cycle management to the upper layer. Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 64 +++++++++++++------------------------ 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7ca5439daebd..e2cf62f70da0 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2951,24 +2951,6 @@ static int device_def_domain_type(struct device *dev, int startup) return IOMMU_DOMAIN_DMA; } - /* - * At boot time, we don't yet know if devices will be 64-bit capable. - * Assume that they will — if they turn out not to be, then we can - * take them out of the 1:1 domain later. - */ - if (!startup) { - /* - * If the device's dma_mask is less than the system's memory - * size then this is not a candidate for identity mapping. - */ - u64 dma_mask = *dev->dma_mask; - - if (dev->coherent_dma_mask && - dev->coherent_dma_mask < dma_mask) - dma_mask = dev->coherent_dma_mask; - - return dma_mask >= dma_get_required_mask(dev); - } return (iommu_identity_mapping & IDENTMAP_ALL) ? IOMMU_DOMAIN_IDENTITY : 0; } @@ -3580,39 +3562,37 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev) /* Check if the dev needs to go through non-identity map and unmap process.*/ static int iommu_no_mapping(struct device *dev) { - int found; + int ret; if (iommu_dummy(dev)) return 1; + ret = identity_mapping(dev); + if (ret) { + u64 dma_mask = *dev->dma_mask; + if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) + dma_mask = dev->coherent_dma_mask; - found = identity_mapping(dev); - if (found) { - if (iommu_should_identity_map(dev, 0)) + if (dma_mask >= dma_get_required_mask(dev)) return 1; - else { - /* - * 32 bit DMA is removed from si_domain and fall back - * to non-identity mapping. - */ - dmar_remove_one_dev_info(dev); - pr_info("32bit %s uses non-identity mapping\n", - dev_name(dev)); - return 0; - } - } else { /* - * In case of a detached 64 bit DMA device from vm, the device - * is put into si_domain for identity mapping. + * 32 bit DMA is removed from si_domain and fall back + * to non-identity mapping. */ - if (iommu_should_identity_map(dev, 0)) { - int ret; - ret = domain_add_dev_info(si_domain, dev); - if (!ret) { - pr_info("64bit %s uses identity mapping\n", - dev_name(dev)); - return 1; + dmar_remove_one_dev_info(si_domain, dev); + ret = iommu_request_dma_domain_for_dev(dev); + if (ret) { + struct iommu_domain *domain; + struct dmar_domain *dmar_domain; + + domain = iommu_get_domain_for_dev(dev); + if (domain) { + dmar_domain = to_dmar_domain(domain); + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; } + get_valid_domain_for_dev(dev); } + + dev_info(dev, "32bit DMA uses non-identity mapping\n"); } return 0; -- Gitee From c975ab05e097452c730dc918f577f58ad86a57ab Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:33:28 +0800 Subject: [PATCH 35/45] Intel: iommu/vt-d: Cleanup get_valid_domain_for_dev() commit 4ec066c7 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. Previously, get_valid_domain_for_dev() is used to retrieve the DMA domain which has been attached to the device or allocate one if no domain has been attached yet. As we have delegated the DMA domain management to upper layer, this function is used purely to allocate a private DMA domain if the default domain doesn't work for ths device. Cleanup the code for readability. Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 14 +++++++------- include/linux/intel-iommu.h | 1 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e2cf62f70da0..cedb6d7081e9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2603,7 +2603,6 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) } out: - return domain; } @@ -3512,16 +3511,17 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -struct dmar_domain *get_valid_domain_for_dev(struct device *dev) +static struct dmar_domain *get_private_domain_for_dev(struct device *dev) { struct dmar_domain *domain, *tmp; struct dmar_rmrr_unit *rmrr; struct device *i_dev; int i, ret; + /* Device shouldn't be attached by any domains. */ domain = find_domain(dev); if (domain) - goto out; + return NULL; domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) @@ -3614,7 +3614,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, if (iommu_no_mapping(dev)) return paddr; - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return 0; @@ -3826,7 +3826,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele if (iommu_no_mapping(dev)) return intel_nontranslate_map_sg(dev, sglist, nelems, dir); - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return 0; @@ -5425,7 +5425,7 @@ static int intel_iommu_add_device(struct device *dev) ret = iommu_request_dma_domain_for_dev(dev); if (ret) { dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - if (!get_valid_domain_for_dev(dev)) { + if (!get_private_domain_for_dev(dev)) { dev_warn(dev, "Failed to get a private domain.\n"); return -ENOMEM; @@ -5532,7 +5532,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) u64 ctx_lo; int ret; - domain = get_valid_domain_for_dev(dev); + domain = find_domain(sdev->dev); if (!domain) return -EINVAL; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index ebc54a64374e..b88703a79ab3 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -585,7 +585,6 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); -struct dmar_domain *get_valid_domain_for_dev(struct device *dev); void *alloc_pgtable_page(int node); void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); -- Gitee From 5d63049d8130440fc95fdffba435aff20b01098c Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:38:18 +0800 Subject: [PATCH 36/45] Intel: iommu/vt-d: Remove startup parameter from device_def_domain_type() commit 0e31a726 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. It isn't used anywhere. Remove it to make code concise. Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index cedb6d7081e9..80b748a5cef1 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2906,7 +2906,7 @@ static bool device_is_rmrr_locked(struct device *dev) * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain * - 0: both identity and dynamic domains work for this device */ -static int device_def_domain_type(struct device *dev, int startup) +static int device_def_domain_type(struct device *dev) { if (dev_is_pci(dev)) { @@ -2954,16 +2954,16 @@ static int device_def_domain_type(struct device *dev, int startup) IOMMU_DOMAIN_IDENTITY : 0; } -static inline int iommu_should_identity_map(struct device *dev, int startup) +static inline int iommu_should_identity_map(struct device *dev) { - return device_def_domain_type(dev, startup) == IOMMU_DOMAIN_IDENTITY; + return device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY; } static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) { int ret; - if (!iommu_should_identity_map(dev, 1)) + if (!iommu_should_identity_map(dev)) return 0; ret = domain_add_dev_info(si_domain, dev); @@ -3589,7 +3589,7 @@ static int iommu_no_mapping(struct device *dev) dmar_domain = to_dmar_domain(domain); dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; } - get_valid_domain_for_dev(dev); + get_private_domain_for_dev(dev); } dev_info(dev, "32bit DMA uses non-identity mapping\n"); @@ -5408,7 +5408,7 @@ static int intel_iommu_add_device(struct device *dev) domain = iommu_get_domain_for_dev(dev); dmar_domain = to_dmar_domain(domain); if (domain->type == IOMMU_DOMAIN_DMA) { - if (device_def_domain_type(dev, 1) == IOMMU_DOMAIN_IDENTITY) { + if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { ret = iommu_request_dm_for_dev(dev); if (ret) { dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; @@ -5421,7 +5421,7 @@ static int intel_iommu_add_device(struct device *dev) return -ENODEV; } } else { - if (device_def_domain_type(dev, 1) == IOMMU_DOMAIN_DMA) { + if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { ret = iommu_request_dma_domain_for_dev(dev); if (ret) { dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; -- Gitee From 79fc59520cb5edb11fd67674112c6c9fca5e055e Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:42:10 +0800 Subject: [PATCH 37/45] Intel: iommu/vt-d: Remove static identity map code commit df4f3c60 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. The code to prepare the static identity map for various reserved memory ranges in intel_iommu_init() is duplicated with the default domain mechanism now. Remove it to avoid duplication. Signed-off-by: default avatarJames Sewart Signed-off-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 135 +----------------------------------- 1 file changed, 1 insertion(+), 134 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 80b748a5cef1..cfa32570d8bc 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2721,31 +2721,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, rmrr->end_address); } -#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA -static inline void iommu_prepare_isa(void) -{ - struct pci_dev *pdev; - int ret; - - pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); - if (!pdev) - return; - - pr_info("Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1); - - if (ret) - pr_err("Failed to create 0-16MiB identity map - floppy might not work\n"); - - pci_dev_put(pdev); -} -#else -static inline void iommu_prepare_isa(void) -{ - return; -} -#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */ - static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_init(int hw) @@ -2959,63 +2934,6 @@ static inline int iommu_should_identity_map(struct device *dev) return device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY; } -static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) -{ - int ret; - - if (!iommu_should_identity_map(dev)) - return 0; - - ret = domain_add_dev_info(si_domain, dev); - if (!ret) - pr_info("%s identity mapping for device %s\n", - hw ? "Hardware" : "Software", dev_name(dev)); - else if (ret == -ENODEV) - /* device not associated with an iommu */ - ret = 0; - - return ret; -} - - -static int __init iommu_prepare_static_identity_mapping(int hw) -{ - struct pci_dev *pdev = NULL; - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - struct device *dev; - int i; - int ret = 0; - - for_each_pci_dev(pdev) { - ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); - if (ret) - return ret; - } - - for_each_active_iommu(iommu, drhd) - for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { - struct acpi_device_physical_node *pn; - struct acpi_device *adev; - - if (dev->bus != &acpi_bus_type) - continue; - - adev= to_acpi_device(dev); - mutex_lock(&adev->physical_node_lock); - list_for_each_entry(pn, &adev->physical_node_list, node) { - ret = dev_prepare_static_identity_mapping(pn->dev, hw); - if (ret) - break; - } - mutex_unlock(&adev->physical_node_lock); - if (ret) - return ret; - } - - return 0; -} - static void intel_iommu_init_qi(struct intel_iommu *iommu) { /* @@ -3242,7 +3160,7 @@ static int __init init_dmars(void) bool copied_tables = false; struct device *dev; struct intel_iommu *iommu; - int i, ret; + int ret; /* * for each drhd @@ -3335,7 +3253,6 @@ static int __init init_dmars(void) } else { pr_info("Copied translation tables from previous kernel for %s\n", iommu->name); - copied_tables = true; } } @@ -3375,56 +3292,6 @@ static int __init init_dmars(void) if (ret) goto free_iommu; - /* - * If we copied translations from a previous kernel in the kdump - * case, we can not assign the devices to domains now, as that - * would eliminate the old mappings. So skip this part and defer - * the assignment to device driver initialization time. - */ - if (copied_tables) - goto domains_done; - - /* - * If pass through is not set or not enabled, setup context entries for - * identity mappings for rmrr, gfx, and isa and may fall back to static - * identity mapping if iommu_identity_mapping is set. - */ - if (iommu_identity_mapping) { - ret = iommu_prepare_static_identity_mapping(hw_pass_through); - if (ret) { - pr_crit("Failed to setup IOMMU pass-through\n"); - goto free_iommu; - } - } - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - pr_info("Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - /* some BIOS lists non-exist devices in DMAR table. */ - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - ret = iommu_prepare_rmrr_dev(rmrr, dev); - if (ret) - pr_err("Mapping reserved region failed\n"); - } - } - - iommu_prepare_isa(); - -domains_done: - /* * for each drhd * enable fault log -- Gitee From 3a996bfe3f78c1ff1b9f2d7dafd2d05a62a0d217 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:44:57 +0800 Subject: [PATCH 38/45] Intel: iommu/vt-d: Fix a variable set but not used commit e314a7c9 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. The commit "iommu/vt-d: Delegate the dma domain to upper layer" left an unused variable, drivers/iommu/intel-iommu.c: In function 'disable_dmar_iommu': drivers/iommu/intel-iommu.c:1652:23: warning: variable 'domain' set but not used [-Wunused-but-set-variable] Signed-off-by: default avatarQian Cai Signed-off-by: default avatarJoerg Roedel Reported-by: default avatarkernel test robot Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index cfa32570d8bc..afd324fedc59 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1685,16 +1685,12 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry_safe(info, tmp, &device_domain_list, global) { - struct dmar_domain *domain; - if (info->iommu != iommu) continue; if (!info->dev || !info->domain) continue; - domain = info->domain; - __dmar_remove_one_dev_info(info); } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -1926,8 +1922,6 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, static void domain_exit(struct dmar_domain *domain) { - struct page *freelist = NULL; - /* Domain 0 is reserved, so dont process it */ if (!domain) return; @@ -1940,9 +1934,12 @@ static void domain_exit(struct dmar_domain *domain) /* destroy iovas */ put_iova_domain(&domain->iovad); - freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + if (domain->pgd) { + struct page *freelist; - dma_free_pagelist(freelist); + freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + dma_free_pagelist(freelist); + } free_domain_mem(domain); } -- Gitee From b464475d57fef843c6cbe7885b0087552ee25d95 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:51:09 +0800 Subject: [PATCH 39/45] iommu: set group default domain before creating direct mappings commit d3602115 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. iommu_group_create_direct_mappings uses group->default_domain, but right after it is called, request_default_domain_for_dev calls iommu_domain_free for the default domain, and sets the group default domain to a different domain. Move the iommu_group_create_direct_mappings call to after the group default domain is set, so the direct mappings get associated with that domain. Cc: Joerg Roedel Cc: Lu Baolu Cc: iommu@lists.linux-foundation.org Cc: stable@vger.kernel.org Fixes: 7423e017 ("iommu: Add API to request DMA domain for device") Signed-off-by: default avatarJerry Snitselaar Reviewed-by: default avatarLu Baolu Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c0a7504bfe3e..de743904dec3 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2288,13 +2288,13 @@ request_default_domain_for_dev(struct device *dev, unsigned long type) goto out; } - iommu_group_create_direct_mappings(group, dev); - /* Make the direct mapped domain the default for this group */ if (group->default_domain) iommu_domain_free(group->default_domain); group->default_domain = domain; + iommu_group_create_direct_mappings(group, dev); + dev_info(dev, "Using iommu %s mapping\n", type == IOMMU_DOMAIN_DMA ? "dma" : "direct"); -- Gitee From b65a42d42c572b2235a01163b6dc93b06d4bc4f3 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:52:36 +0800 Subject: [PATCH 40/45] Fix the building error of intel-iommu.c Signed-off-by: Sun Shouxin Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index afd324fedc59..85270da934a3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4405,7 +4405,7 @@ static int device_notifier(struct notifier_block *nb, dmar_remove_one_dev_info(dev); } else if (action == BUS_NOTIFY_ADD_DEVICE) { - if (iommu_should_identity_map(dev, 1)) + if (iommu_should_identity_map(dev)) domain_add_dev_info(si_domain, dev); } -- Gitee From 98abc1f6fa70bf4df8958b3e7b4203fa46540d5d Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:57:39 +0800 Subject: [PATCH 41/45] iommu/vt-d: Detach domain before using a private one commit ae23bfb6 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. When the default domain of a group doesn't work for a device, the iommu driver will try to use a private domain. The domain which was previously attached to the device must be detached. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Alex Williamson Fixes: 942067f1 ("iommu/vt-d: Identify default domains replaced with private") Reported-by: default avatarAlex Williamson Link: https://lkml.org/lkml/2019/8/2/1379 Signed-off-by: default avatarLu Baolu Tested-by: default avatarAlex Williamson Signed-off-by: default avatarJoerg Roedel Signed-off-by: default avatarYouquan Song Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 85270da934a3..3490f471d4c5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3442,7 +3442,7 @@ static int iommu_no_mapping(struct device *dev) * 32 bit DMA is removed from si_domain and fall back * to non-identity mapping. */ - dmar_remove_one_dev_info(si_domain, dev); + dmar_remove_one_dev_info(dev); ret = iommu_request_dma_domain_for_dev(dev); if (ret) { struct iommu_domain *domain; @@ -3453,6 +3453,7 @@ static int iommu_no_mapping(struct device *dev) dmar_domain = to_dmar_domain(domain); dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; } + dmar_remove_one_dev_info(dev); get_private_domain_for_dev(dev); } @@ -4799,7 +4800,8 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_lock_irqsave(&device_domain_lock, flags); info = dev->archdata.iommu; - __dmar_remove_one_dev_info(info); + if (info) + __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); } @@ -5275,6 +5277,7 @@ static int intel_iommu_add_device(struct device *dev) if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { ret = iommu_request_dm_for_dev(dev); if (ret) { + dmar_remove_one_dev_info(dev); dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; domain_add_dev_info(si_domain, dev); dev_info(dev, @@ -5288,6 +5291,7 @@ static int intel_iommu_add_device(struct device *dev) if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { ret = iommu_request_dma_domain_for_dev(dev); if (ret) { + dmar_remove_one_dev_info(dev); dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; if (!get_private_domain_for_dev(dev)) { dev_warn(dev, -- Gitee From d564970a49ea4718881117968d6385535bcd37cf Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Mon, 25 Apr 2022 17:59:41 +0800 Subject: [PATCH 42/45] iommu/vt-d: Fix dmar pte read access not set error commit 942067f1 upstream Backport summary: Backport to kernel 4.19.57 to fix kernel panic for intel_iommu=on while passthrough disable on ICX. If the default DMA domain of a group doesn't fit a device, it will still sit in the group but use a private identity domain. When map/unmap/iova_to_phys come through iommu API, the driver should still serve them, otherwise, other devices in the same group will be impacted. Since identity domain has been mapped with the whole available memory space and RMRRs, we don't need to worry about the impact on it. Link: https://www.spinics.net/lists/iommu/msg40416.html Cc: Jerry Snitselaar Reported-by: default avatarJerry Snitselaar Fixes: 942067f1 ("iommu/vt-d: Identify default domains replaced with private") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: default avatarLu Baolu Reviewed-by: default avatarJerry Snitselaar Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3490f471d4c5..03ee5418935e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5114,9 +5114,6 @@ static int intel_iommu_map(struct iommu_domain *domain, int prot = 0; int ret; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return -EINVAL; - if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) @@ -5158,9 +5155,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; - if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -5191,9 +5185,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) - return 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); if (pte) phys = dma_pte_addr(pte); -- Gitee From 2b732a1cf15b5bfbfdf8f5ea99a438b78cbac692 Mon Sep 17 00:00:00 2001 From: Sun Shouxin Date: Tue, 26 Apr 2022 08:36:24 +0800 Subject: [PATCH 43/45] fix build err Signed-off-by: Sun Shouxin Signed-off-by: Xibo.Wang --- drivers/iommu/intel-iommu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 03ee5418935e..1813f38f0b72 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3153,9 +3153,6 @@ static int copy_translation_tables(struct intel_iommu *iommu) static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; - struct dmar_rmrr_unit *rmrr; - bool copied_tables = false; - struct device *dev; struct intel_iommu *iommu; int ret; @@ -5068,7 +5065,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, old_domain = find_domain(dev); if (old_domain) dmar_remove_one_dev_info(dev); - } } ret = prepare_domain_attach_device(domain, dev); @@ -5391,7 +5387,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) u64 ctx_lo; int ret; - domain = find_domain(sdev->dev); + domain = find_domain(dev); if (!domain) return -EINVAL; -- Gitee From 6749185fd58256e8f4e6772f0b02b6839defa7b7 Mon Sep 17 00:00:00 2001 From: "Xibo.Wang" Date: Fri, 21 Oct 2022 15:50:09 +0800 Subject: [PATCH 44/45] iommu: Fix kabi broken fix kabi broken of following functions: iommu_get_domain_for_dev iommu_group_get iommu_iova_to_phys iommu_map iommu_unmap vfio_register_iommu_driver vfio_unregister_iommu_driver Signed-off-by: Xibo.Wang --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9739d3408bc1..3c690a40d7bd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -366,6 +366,7 @@ struct iommu_ops { struct device *dev, struct tlb_invalidate_info *inv_info); int (*page_response)(struct device *dev, struct page_response_msg *msg); +#ifndef __GENKSYMS__ /* Per device IOMMU features */ bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); @@ -376,6 +377,7 @@ struct iommu_ops { int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); +#endif unsigned long pgsize_bitmap; -- Gitee From 423cea207ce1e238b75c8aa5b60cbd0d0bfc30c0 Mon Sep 17 00:00:00 2001 From: "Xibo.Wang" Date: Fri, 21 Oct 2022 15:54:24 +0800 Subject: [PATCH 45/45] mdev: fix kabi broken fix kabi broken of following functions: mdev_dev mdev_from_dev mdev_get_drvdata mdev_parent_dev mdev_register_device mdev_set_drvdata Signed-off-by: Xibo.Wang --- drivers/vfio/mdev/mdev_core.c | 15 ++++++++++----- drivers/vfio/mdev/mdev_private.h | 6 +++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 50a9c0720615..26de2a690cf1 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -266,13 +266,14 @@ EXPORT_SYMBOL(mdev_unregister_device); static void mdev_device_release(struct device *dev) { struct mdev_device *mdev = to_mdev_device(dev); + struct mdev_device_wapper *mdw = container_of(mdev, struct mdev_device_wapper, mdev); mutex_lock(&mdev_list_lock); list_del(&mdev->next); mutex_unlock(&mdev_list_lock); dev_dbg(&mdev->dev, "MDEV: destroying\n"); - kfree(mdev); + kfree(mdw); } int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) @@ -281,6 +282,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) struct mdev_device *mdev, *tmp; struct mdev_parent *parent; struct mdev_type *type = to_mdev_type(kobj); + struct mdev_device_wapper *mdw; parent = mdev_get_parent(type->parent); if (!parent) @@ -297,12 +299,13 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) } } - mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); - if (!mdev) { + mdw = kzalloc(sizeof(*mdw), GFP_KERNEL); + if (!mdw) { mutex_unlock(&mdev_list_lock); ret = -ENOMEM; goto mdev_fail; } + mdev = &mdw->mdev; memcpy(&mdev->uuid, &uuid, sizeof(uuid_le)); list_add(&mdev->next, &mdev_list); @@ -392,8 +395,9 @@ int mdev_device_remove(struct device *dev, bool force_remove) int mdev_set_iommu_device(struct device *dev, struct device *iommu_device) { struct mdev_device *mdev = to_mdev_device(dev); + struct mdev_device_wapper *mdw = container_of(mdev, struct mdev_device_wapper, mdev); - mdev->iommu_device = iommu_device; + mdw->iommu_device = iommu_device; return 0; } @@ -402,8 +406,9 @@ EXPORT_SYMBOL(mdev_set_iommu_device); struct device *mdev_get_iommu_device(struct device *dev) { struct mdev_device *mdev = to_mdev_device(dev); + struct mdev_device_wapper *mdw = container_of(mdev, struct mdev_device_wapper, mdev); - return mdev->iommu_device; + return mdw->iommu_device; } EXPORT_SYMBOL(mdev_get_iommu_device); diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index 1b09251f17d6..cf2c25ed9800 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -33,10 +33,14 @@ struct mdev_device { struct kref ref; struct list_head next; struct kobject *type_kobj; - struct device *iommu_device; bool active; }; +struct mdev_device_wapper { + struct mdev_device mdev; + struct device *iommu_device; +}; + #define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) #define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) -- Gitee