From a3aa255ea91306e0d79a0d44dad74e442f825cc4 Mon Sep 17 00:00:00 2001 From: Song Gao Date: Tue, 7 May 2024 20:01:40 +0800 Subject: [PATCH 01/44] LoongArch: KVM: Add PMU support Upstream: no On LoongArch, the host and guest have their own PMU CSRs registers and they share PMU hardware resources. A set of PMU CSRs consists of a CTRL register and a CNTR register. We can set which PMU CSRs are used by the guest by writing to the GCFG register [24: 26] bits. On KVM side: - we save the host PMU CSRs into structure kvm_context. - If the host supports the PMU feature. - When entering guest mode. we save the host PMU CSRs and restore the guest PMU CSRs. - When exiting guest mode, we save the guest PMU CSRs and restore the host PMU CSRs. Signed-off-by: Song Gao Reviewed-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_csr.h | 7 + arch/loongarch/include/asm/kvm_host.h | 21 ++- arch/loongarch/kvm/exit.c | 5 +- arch/loongarch/kvm/vcpu.c | 230 +++++++++++++++++--------- 4 files changed, 182 insertions(+), 81 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_csr.h b/arch/loongarch/include/asm/kvm_csr.h index 476c9f620dd5..4f8cfd6ebec0 100644 --- a/arch/loongarch/include/asm/kvm_csr.h +++ b/arch/loongarch/include/asm/kvm_csr.h @@ -30,6 +30,7 @@ : [val] "+r" (__v) \ : [reg] "i" (csr) \ : "memory"); \ + __v; \ }) #define gcsr_xchg(v, m, csr) \ @@ -180,6 +181,7 @@ __BUILD_GCSR_OP(tlbidx) #define kvm_save_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_read(gid)) #define kvm_restore_hw_gcsr(csr, gid) (gcsr_write(csr->csrs[gid], gid)) +#define kvm_read_clear_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_write(0, gid)) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu); @@ -213,4 +215,9 @@ static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr, CSR_PERFCTRL_PLV2 | \ CSR_PERFCTRL_PLV3) +#define KVM_PMU_EVENT_ENABLED (CSR_PERFCTRL_PLV0 | \ + CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | \ + CSR_PERFCTRL_PLV3) + #endif /* __ASM_LOONGARCH_KVM_CSR_H__ */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index c146d2ebdb90..561348e67614 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -55,9 +55,14 @@ struct kvm_arch_memory_slot { unsigned long flags; }; +#define KVM_REQ_PMU KVM_ARCH_REQ(0) +#define HOST_MAX_PMNUM 16 struct kvm_context { unsigned long vpid_cache; struct kvm_vcpu *last_vcpu; + /* Save host pmu csr */ + u64 perf_ctrl[HOST_MAX_PMNUM]; + u64 perf_cntr[HOST_MAX_PMNUM]; }; struct kvm_world_switch { @@ -129,7 +134,8 @@ enum emulation_result { #define KVM_LARCH_LASX (0x1 << 2) #define KVM_LARCH_SWCSR_LATEST (0x1 << 3) #define KVM_LARCH_HWCSR_USABLE (0x1 << 4) -#define KVM_LARCH_PERF (0x1 << 5) +#define KVM_GUEST_PMU_ENABLE (0x1 << 5) +#define KVM_GUEST_PMU_ACTIVE (0x1 << 6) struct kvm_vcpu_arch { /* @@ -167,6 +173,9 @@ struct kvm_vcpu_arch { /* CSR state */ struct loongarch_csrs *csr; + /* Guest max PMU CSR id */ + int max_pmu_csrid; + /* GPR used as IO source/target */ u32 io_gpr; @@ -245,6 +254,16 @@ static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; } +static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[LOONGARCH_CPUCFG6] & CPUCFG6_PMP; +} + +static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) +{ + return (arch->cpucfg[LOONGARCH_CPUCFG6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; +} + /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 8affc6d4a66e..9607de0b3673 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -83,9 +83,10 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) rj = inst.reg2csr_format.rj; csrid = inst.reg2csr_format.csr; - if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= LOONGARCH_CSR_PERFCNTR3) { - if (!kvm_own_pmu(vcpu)) { + if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= vcpu->arch.max_pmu_csrid) { + if (kvm_guest_has_pmu(&vcpu->arch)) { vcpu->arch.pc -= 4; + kvm_make_request(KVM_REQ_PMU, vcpu); return EMULATE_DONE; } } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 685f2826d022..c3087dc2f21f 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -140,6 +140,131 @@ static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, return -ENXIO; } +static inline void kvm_save_host_pmu(struct kvm_vcpu *vcpu) +{ + struct kvm_context *context; + + context = this_cpu_ptr(vcpu->kvm->arch.vmcs); + context->perf_ctrl[0] = write_csr_perfctrl0(0); + context->perf_ctrl[1] = write_csr_perfctrl1(0); + context->perf_ctrl[2] = write_csr_perfctrl2(0); + context->perf_ctrl[3] = write_csr_perfctrl3(0); + context->perf_cntr[0] = read_csr_perfcntr0(); + context->perf_cntr[1] = read_csr_perfcntr1(); + context->perf_cntr[2] = read_csr_perfcntr2(); + context->perf_cntr[3] = read_csr_perfcntr3(); +} + +static inline void kvm_restore_host_pmu(struct kvm_vcpu *vcpu) +{ + struct kvm_context *context; + + context = this_cpu_ptr(vcpu->kvm->arch.vmcs); + write_csr_perfcntr0(context->perf_cntr[0]); + write_csr_perfcntr1(context->perf_cntr[1]); + write_csr_perfcntr2(context->perf_cntr[2]); + write_csr_perfcntr3(context->perf_cntr[3]); + write_csr_perfctrl0(context->perf_ctrl[0]); + write_csr_perfctrl1(context->perf_ctrl[1]); + write_csr_perfctrl2(context->perf_ctrl[2]); + write_csr_perfctrl3(context->perf_ctrl[3]); +} + + +static inline void kvm_save_guest_pmu(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); +} + +static inline void kvm_restore_guest_pmu(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); +} + +static void kvm_lose_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (!(vcpu->arch.aux_inuse & KVM_GUEST_PMU_ENABLE)) + return; + if (!(vcpu->arch.aux_inuse & KVM_GUEST_PMU_ACTIVE)) + return; + + kvm_save_guest_pmu(vcpu); + /* Disable pmu access from guest */ + write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF); + + /* + * Clear KVM_GUEST_PMU_ENABLE if the guest is not using PMU CSRs + * when exiting the guest, so that the next time trap into the guest. + * we don't need to deal with PMU CSRs contexts. + */ + val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + if (!(val & KVM_PMU_EVENT_ENABLED)) + vcpu->arch.aux_inuse &= ~KVM_GUEST_PMU_ENABLE; + kvm_restore_host_pmu(vcpu); + + /* KVM_GUEST_PMU_ACTIVE needs to be cleared when exiting the guest */ + vcpu->arch.aux_inuse &= ~KVM_GUEST_PMU_ACTIVE; +} + +static void kvm_own_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + + kvm_save_host_pmu(vcpu); + /* Set PM0-PM(num) to guest */ + val = read_csr_gcfg() & ~CSR_GCFG_GPERF; + val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; + write_csr_gcfg(val); + kvm_restore_guest_pmu(vcpu); +} + +static void kvm_restore_pmu(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.aux_inuse & KVM_GUEST_PMU_ENABLE)) + return; + + kvm_make_request(KVM_REQ_PMU, vcpu); +} + +static void kvm_check_pmu(struct kvm_vcpu *vcpu) +{ + if (!kvm_check_request(KVM_REQ_PMU, vcpu)) + return; + + kvm_own_pmu(vcpu); + + /* + * Set KVM_GUEST PMU_ENABLE and GUEST_PMU_ACTIVE + * when guest has KVM_REQ_PMU request. + */ + vcpu->arch.aux_inuse |= KVM_GUEST_PMU_ENABLE; + vcpu->arch.aux_inuse |= KVM_GUEST_PMU_ACTIVE; +} + /* * kvm_check_requests - check and handle pending vCPU requests * @@ -213,6 +338,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) /* Make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); kvm_check_vpid(vcpu); + kvm_check_pmu(vcpu); vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY); /* Clear KVM_LARCH_SWCSR_LATEST as CSR will change when enter guest */ vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; @@ -243,6 +369,8 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Set a default exit reason */ run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_lose_pmu(vcpu); + guest_timing_exit_irqoff(); guest_state_exit_irqoff(); local_irq_enable(); @@ -506,6 +634,21 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) kvm_write_sw_gcsr(csr, id, val); + /* + * After modifying the PMU CSR register value of the vcpu. + * If the PMU CSRs are used, we need to set KVM_REQ_PMU. + */ + if (id >= LOONGARCH_CSR_PERFCTRL0 && id <= LOONGARCH_CSR_PERFCNTR3) { + unsigned long val; + + val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + if (val & KVM_PMU_EVENT_ENABLED) + kvm_make_request(KVM_REQ_PMU, vcpu); + } + return ret; } @@ -596,7 +739,7 @@ static int kvm_check_cpucfg(int id, u64 val) return 0; case LOONGARCH_CPUCFG6: if (val & CPUCFG6_PMP) { - host = read_cpucfg(6); + host = read_cpucfg(LOONGARCH_CPUCFG6); if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS)) /* Guest pmbits must be the same with host */ return -EINVAL; @@ -691,6 +834,10 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, if (ret) break; vcpu->arch.cpucfg[id] = (u32)v; + if (id == LOONGARCH_CPUCFG6) { + vcpu->arch.max_pmu_csrid = LOONGARCH_CSR_PERFCTRL0 + + 2 * kvm_get_pmu_num(&vcpu->arch) + 1; + } break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { @@ -784,8 +931,8 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { switch (attr->attr) { - case 2: - case 6: + case LOONGARCH_CPUCFG2: + case LOONGARCH_CPUCFG6: return 0; default: return -ENXIO; @@ -1086,77 +1233,6 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) preempt_enable(); } -int kvm_own_pmu(struct kvm_vcpu *vcpu) -{ - unsigned long val; - - if (!kvm_guest_has_pmu(&vcpu->arch)) - return -EINVAL; - - preempt_disable(); - val = read_csr_gcfg() & ~CSR_GCFG_GPERF; - val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; - write_csr_gcfg(val); - - vcpu->arch.aux_inuse |= KVM_LARCH_PERF; - preempt_enable(); - return 0; -} - -static void kvm_lose_pmu(struct kvm_vcpu *vcpu) -{ - struct loongarch_csrs *csr = vcpu->arch.csr; - - if (!(vcpu->arch.aux_inuse & KVM_LARCH_PERF)) - return; - - /* save guest pmu csr */ - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); - kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL0, 0); - kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL1, 0); - kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL2, 0); - kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL3, 0); - /* Disable pmu access from guest */ - write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF); - - if (((kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) | - kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) | - kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) | - kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3)) - & KVM_PMU_PLV_ENABLE) == 0) - vcpu->arch.aux_inuse &= ~KVM_LARCH_PERF; -} - -static void kvm_restore_pmu(struct kvm_vcpu *vcpu) -{ - unsigned long val; - struct loongarch_csrs *csr = vcpu->arch.csr; - - if (!(vcpu->arch.aux_inuse & KVM_LARCH_PERF)) - return; - - /* Set PM0-PM(num) to Guest */ - val = read_csr_gcfg() & ~CSR_GCFG_GPERF; - val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; - write_csr_gcfg(val); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); -} - - int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { int intr = (int)irq->irq; @@ -1295,12 +1371,11 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Control guest page CCA attribute */ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); - /* Restore hardware perf csr */ + /* Restore hardware PMU CSRs */ kvm_restore_pmu(vcpu); - kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); - /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) return 0; @@ -1384,7 +1459,6 @@ static int _kvm_vcpu_put(struct kvm_vcpu *vcpu, int cpu) struct loongarch_csrs *csr = vcpu->arch.csr; kvm_lose_fpu(vcpu); - kvm_lose_pmu(vcpu); /* * Update CSR state from hardware if software CSR state is stale, -- Gitee From 2319dc505d44daee354ddb71624d87106a1ccb76 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 3 Jun 2024 11:49:50 +0800 Subject: [PATCH 02/44] LoongArch: KVM: Add iocsr and mmio bus simulation in kernel Upstream: no Add iocsr and mmio memory read and write simulation to the kernel. When the VM accesses the device address space through iocsr instructions or mmio, it does not need to return to the qemu user mode but directly completes the access in the kernel mode. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/kvm/exit.c | 70 ++++++++++++++++++++++++++++----------- include/linux/kvm_host.h | 1 + 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 9607de0b3673..74028ad40b24 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -115,7 +115,7 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret; - unsigned long val; + unsigned long *val; u32 addr, rd, rj, opcode; /* @@ -128,6 +128,7 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) ret = EMULATE_DO_IOCSR; run->iocsr_io.phys_addr = addr; run->iocsr_io.is_write = 0; + val = &vcpu->arch.gprs[rd]; /* LoongArch is Little endian */ switch (opcode) { @@ -161,15 +162,21 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) break; default: ret = EMULATE_FAIL; - break; + return ret; } - if (ret == EMULATE_DO_IOCSR) { - if (run->iocsr_io.is_write) { - val = vcpu->arch.gprs[rd]; - memcpy(run->iocsr_io.data, &val, run->iocsr_io.len); - } - vcpu->arch.io_gpr = rd; + if (run->iocsr_io.is_write) { + if (!kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + ret = EMULATE_DONE; + else + /* Save data and let user space to write it */ + memcpy(run->iocsr_io.data, val, run->iocsr_io.len); + } else { + if (!kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + ret = EMULATE_DONE; + else + /* Save register id for iocsr read completion */ + vcpu->arch.io_gpr = rd; } return ret; @@ -457,17 +464,31 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) } if (ret == EMULATE_DO_MMIO) { + /* + * if mmio device such as pch pic is emulated in KVM, + * it need not return to user space to handle the mmio + * exception. + */ + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, + run->mmio.len, &vcpu->arch.gprs[rd]); + if (!ret) { + update_pc(&vcpu->arch); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + /* Set for kvm_complete_mmio_read() use */ vcpu->arch.io_gpr = rd; run->mmio.is_write = 0; vcpu->mmio_is_write = 0; - } else { - kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", - inst.word, vcpu->arch.pc, vcpu->arch.badv); - kvm_arch_vcpu_dump_regs(vcpu); - vcpu->mmio_needed = 0; + return EMULATE_DO_MMIO; } + kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", + inst.word, vcpu->arch.pc, vcpu->arch.badv); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->mmio_needed = 0; + return ret; } @@ -605,17 +626,28 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) } if (ret == EMULATE_DO_MMIO) { + /* + * if mmio device such as pch pic is emulated in KVM, + * it need not return to user space to handle the mmio + * exception. + */ + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, + run->mmio.len, data); + if (!ret) + return EMULATE_DONE; + run->mmio.is_write = 1; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; - } else { - vcpu->arch.pc = curr_pc; - kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", - inst.word, vcpu->arch.pc, vcpu->arch.badv); - kvm_arch_vcpu_dump_regs(vcpu); - /* Rollback PC if emulation was unsuccessful */ + return EMULATE_DO_MMIO; } + vcpu->arch.pc = curr_pc; + kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", + inst.word, vcpu->arch.pc, vcpu->arch.badv); + kvm_arch_vcpu_dump_regs(vcpu); + /* Rollback PC if emulation was unsuccessful */ + return ret; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a66645eb5103..a077b9e4989c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -216,6 +216,7 @@ enum kvm_bus { KVM_PIO_BUS, KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_FAST_MMIO_BUS, + KVM_IOCSR_BUS, KVM_NR_BUSES }; -- Gitee From ec9365931f15a6477ee92f97cda3d6f43ceb9586 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 3 Jun 2024 14:12:26 +0800 Subject: [PATCH 03/44] LoongArch: KVM: Add IPI device support Upstream: no Added device model for IPI interrupt controller, implemented basic create destroy interface, and registered device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 4 + arch/loongarch/include/asm/kvm_ipi.h | 36 ++++++ arch/loongarch/kvm/Makefile | 1 + arch/loongarch/kvm/intc/ipi.c | 155 ++++++++++++++++++++++++++ arch/loongarch/kvm/main.c | 7 +- arch/loongarch/kvm/vcpu.c | 3 + include/uapi/linux/kvm.h | 4 + 7 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/kvm_ipi.h create mode 100644 arch/loongarch/kvm/intc/ipi.c diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 561348e67614..5270294cd385 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -19,6 +19,7 @@ #include #include #include +#include /* Loongarch KVM register ids */ #define KVM_GET_IOC_CSR_IDX(id) ((id & KVM_CSR_IDX_MASK) >> LOONGARCH_REG_SHIFT) @@ -109,6 +110,7 @@ struct kvm_arch { s64 time_offset; struct kvm_context __percpu *vmcs; + struct loongarch_ipi *ipi; }; #define CSR_MAX_NUMS 0x800 @@ -206,6 +208,8 @@ struct kvm_vcpu_arch { int last_sched_cpu; /* mp state */ struct kvm_mp_state mp_state; + /* ipi state */ + struct ipi_state ipi_state; /* cpucfg */ u32 cpucfg[KVM_MAX_CPUCFG_REGS]; /* paravirt steal time */ diff --git a/arch/loongarch/include/asm/kvm_ipi.h b/arch/loongarch/include/asm/kvm_ipi.h new file mode 100644 index 000000000000..875a93008802 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_ipi.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef __LS3A_KVM_IPI_H +#define __LS3A_KVM_IPI_H + +#include + +#define LARCH_INT_IPI 12 + +struct loongarch_ipi { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + struct kvm_io_device mail_dev; +}; + +struct ipi_state { + spinlock_t lock; + uint32_t status; + uint32_t en; + uint32_t set; + uint32_t clear; + uint64_t buf[4]; +}; + +#define SMP_MAILBOX 0x1000 +#define KVM_IOCSR_IPI_ADDR_SIZE 0x48 + +#define MAIL_SEND_ADDR (SMP_MAILBOX + IOCSR_MAIL_SEND) +#define KVM_IOCSR_MAIL_ADDR_SIZE 0x118 + +int kvm_loongarch_register_ipi_device(void); +#endif diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index 244467d7792a..69a074ee0d0f 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -18,5 +18,6 @@ kvm-y += timer.o kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vm.o +kvm-y += intc/ipi.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c new file mode 100644 index 000000000000..a9dc0aaec502 --- /dev/null +++ b/arch/loongarch/kvm/intc/ipi.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +static int kvm_loongarch_ipi_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + return 0; +} + +static int kvm_loongarch_ipi_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + return 0; +} + +static int kvm_loongarch_mail_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + return 0; +} + +static const struct kvm_io_device_ops kvm_loongarch_ipi_ops = { + .read = kvm_loongarch_ipi_read, + .write = kvm_loongarch_ipi_write, +}; + +static const struct kvm_io_device_ops kvm_loongarch_mail_ops = { + .write = kvm_loongarch_mail_write, +}; + +static int kvm_loongarch_ipi_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_loongarch_ipi_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static void kvm_loongarch_ipi_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_ipi *ipi; + struct kvm_io_device *device; + + if (!dev) + return; + + kvm = dev->kvm; + if (!kvm) + return; + + ipi = kvm->arch.ipi; + if (!ipi) + return; + + device = &ipi->device; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, device); + + device = &ipi->mail_dev; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, device); + + kfree(ipi); +} + +static int kvm_loongarch_ipi_create(struct kvm_device *dev, u32 type) +{ + struct kvm *kvm; + struct loongarch_ipi *s; + unsigned long addr; + struct kvm_io_device *device; + int ret; + + kvm_info("begin create loongarch ipi in kvm ...\n"); + if (!dev) { + kvm_err("%s: kvm_device ptr is invalid!\n", __func__); + return -EINVAL; + } + + kvm = dev->kvm; + if (kvm->arch.ipi) { + kvm_err("%s: loongarch ipi has been created!\n", __func__); + return -EINVAL; + } + + s = kzalloc(sizeof(struct loongarch_ipi), GFP_KERNEL); + if (!s) + return -ENOMEM; + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize IOCSR device + */ + device = &s->device; + kvm_iodevice_init(device, &kvm_loongarch_ipi_ops); + addr = SMP_MAILBOX; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, addr, + KVM_IOCSR_IPI_ADDR_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kvm_err("%s: initialize IOCSR dev failed, ret = %d\n", __func__, ret); + goto err; + } + + device = &s->mail_dev; + kvm_iodevice_init(device, &kvm_loongarch_mail_ops); + addr = MAIL_SEND_ADDR; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, addr, + KVM_IOCSR_MAIL_ADDR_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + device = &s->device; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, device); + kvm_err("%s: initialize mail box dev failed, ret = %d\n", __func__, ret); + goto err; + } + + kvm->arch.ipi = s; + kvm_info("create loongarch ipi in kvm done!\n"); + + return 0; + +err: + kfree(s); + return -EFAULT; +} + +static struct kvm_device_ops kvm_loongarch_ipi_dev_ops = { + .name = "kvm-loongarch-ipi", + .create = kvm_loongarch_ipi_create, + .destroy = kvm_loongarch_ipi_destroy, + .set_attr = kvm_loongarch_ipi_set_attr, + .get_attr = kvm_loongarch_ipi_get_attr, +}; + +int kvm_loongarch_register_ipi_device(void) +{ + return kvm_register_device_ops(&kvm_loongarch_ipi_dev_ops, + KVM_DEV_TYPE_LA_IPI); +} diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 86a2f2d0cb27..36efc7b38f83 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -312,7 +312,7 @@ void kvm_arch_hardware_disable(void) static int kvm_loongarch_env_init(void) { - int cpu, order; + int cpu, order, ret; void *addr; struct kvm_context *context; @@ -367,6 +367,11 @@ static int kvm_loongarch_env_init(void) kvm_init_gcsr_flag(); + /* Register loongarch ipi interrupt controller interface. */ + ret = kvm_loongarch_register_ipi_device(); + if (ret) + return ret; + return 0; } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index c3087dc2f21f..91f3f13c693d 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1301,6 +1301,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Init */ vcpu->arch.last_sched_cpu = -1; + /* Init ipi_state lock */ + spin_lock_init(&vcpu->arch.ipi_state.lock); + /* * Initialize guest register state to valid architectural reset state. */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ae3a863bc8f6..0cde56ea20b7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1475,7 +1475,11 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LA_IPI, +#define KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_IPI + KVM_DEV_TYPE_MAX, + }; struct kvm_vfio_spapr_tce { -- Gitee From 3b3a64fa1cc15277b1cbe925bfd2a73d7357f6df Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Sat, 15 Jun 2024 18:19:35 +0800 Subject: [PATCH 04/44] LoongArch: KVM: Add IPI read and write function Upstream: no Implementation of IPI interrupt controller address space read and write function simulation. Signed-off-by: Min Zhou Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/include/asm/kvm_ipi.h | 16 ++ arch/loongarch/kvm/intc/ipi.c | 287 +++++++++++++++++++++++++- 3 files changed, 303 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 5270294cd385..d448a7b967a1 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -39,6 +39,8 @@ struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 pages; u64 hugepages; + u64 ipi_read_exits; + u64 ipi_write_exits; }; struct kvm_vcpu_stat { diff --git a/arch/loongarch/include/asm/kvm_ipi.h b/arch/loongarch/include/asm/kvm_ipi.h index 875a93008802..729dfc1e3f40 100644 --- a/arch/loongarch/include/asm/kvm_ipi.h +++ b/arch/loongarch/include/asm/kvm_ipi.h @@ -29,8 +29,24 @@ struct ipi_state { #define SMP_MAILBOX 0x1000 #define KVM_IOCSR_IPI_ADDR_SIZE 0x48 +#define CORE_STATUS_OFF 0x000 +#define CORE_EN_OFF 0x004 +#define CORE_SET_OFF 0x008 +#define CORE_CLEAR_OFF 0x00c +#define CORE_BUF_20 0x020 +#define CORE_BUF_28 0x028 +#define CORE_BUF_30 0x030 +#define CORE_BUF_38 0x038 +#define IOCSR_IPI_SEND 0x040 + +#define IOCSR_MAIL_SEND 0x048 +#define IOCSR_ANY_SEND 0x158 + #define MAIL_SEND_ADDR (SMP_MAILBOX + IOCSR_MAIL_SEND) #define KVM_IOCSR_MAIL_ADDR_SIZE 0x118 +#define MAIL_SEND_OFFSET 0 +#define ANY_SEND_OFFSET (IOCSR_ANY_SEND - IOCSR_MAIL_SEND) + int kvm_loongarch_register_ipi_device(void); #endif diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index a9dc0aaec502..815858671005 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -7,24 +7,307 @@ #include #include +static void ipi_send(struct kvm *kvm, uint64_t data) +{ + struct kvm_vcpu *vcpu; + struct kvm_interrupt irq; + int cpu, action, status; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + + action = 1 << (data & 0x1f); + + spin_lock(&vcpu->arch.ipi_state.lock); + status = vcpu->arch.ipi_state.status; + vcpu->arch.ipi_state.status |= action; + if (status == 0) { + irq.irq = LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } + spin_unlock(&vcpu->arch.ipi_state.lock); +} + +static void ipi_clear(struct kvm_vcpu *vcpu, uint64_t data) +{ + struct kvm_interrupt irq; + + spin_lock(&vcpu->arch.ipi_state.lock); + vcpu->arch.ipi_state.status &= ~data; + if (!vcpu->arch.ipi_state.status) { + irq.irq = -LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } + spin_unlock(&vcpu->arch.ipi_state.lock); +} + +static uint64_t read_mailbox(struct kvm_vcpu *vcpu, int offset, int len) +{ + void *pbuf; + uint64_t ret = 0; + + spin_lock(&vcpu->arch.ipi_state.lock); + pbuf = (void *)vcpu->arch.ipi_state.buf + (offset - 0x20); + if (len == 1) + ret = *(unsigned char *)pbuf; + else if (len == 2) + ret = *(unsigned short *)pbuf; + else if (len == 4) + ret = *(unsigned int *)pbuf; + else if (len == 8) + ret = *(unsigned long *)pbuf; + else + kvm_err("%s: unknown data len: %d\n", __func__, len); + spin_unlock(&vcpu->arch.ipi_state.lock); + + return ret; +} + +static void write_mailbox(struct kvm_vcpu *vcpu, int offset, + uint64_t data, int len) +{ + void *pbuf; + + spin_lock(&vcpu->arch.ipi_state.lock); + pbuf = (void *)vcpu->arch.ipi_state.buf + (offset - 0x20); + if (len == 1) + *(unsigned char *)pbuf = (unsigned char)data; + else if (len == 2) + *(unsigned short *)pbuf = (unsigned short)data; + else if (len == 4) + *(unsigned int *)pbuf = (unsigned int)data; + else if (len == 8) + *(unsigned long *)pbuf = (unsigned long)data; + else + kvm_err("%s: unknown data len: %d\n", __func__, len); + spin_unlock(&vcpu->arch.ipi_state.lock); +} + +static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, + int len, const void *val) +{ + uint64_t data; + uint32_t offset; + int ret = 0; + + data = *(uint64_t *)val; + + offset = (uint32_t)(addr & 0xff); + WARN_ON_ONCE(offset & (len - 1)); + + switch (offset) { + case CORE_STATUS_OFF: + kvm_err("CORE_SET_OFF Can't be write\n"); + ret = -EINVAL; + break; + case CORE_EN_OFF: + spin_lock(&vcpu->arch.ipi_state.lock); + vcpu->arch.ipi_state.en = data; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case IOCSR_IPI_SEND: + ipi_send(vcpu->kvm, data); + break; + case CORE_SET_OFF: + kvm_info("CORE_SET_OFF simulation is required\n"); + ret = -EINVAL; + break; + case CORE_CLEAR_OFF: + /* Just clear the status of the current vcpu */ + ipi_clear(vcpu, data); + break; + case CORE_BUF_20 ... CORE_BUF_38 + 7: + if (offset + len > CORE_BUF_38 + 8) { + kvm_err("%s: invalid offset or len: offset = %d, len = %d\n", + __func__, offset, len); + ret = -EINVAL; + break; + } + write_mailbox(vcpu, offset, data, len); + break; + default: + kvm_err("%s: unknown addr: %llx\n", __func__, addr); + ret = -EINVAL; + break; + } + + return ret; +} + +static int loongarch_ipi_readl(struct kvm_vcpu *vcpu, gpa_t addr, + int len, void *val) +{ + uint32_t offset; + uint64_t res = 0; + int ret = 0; + + offset = (uint32_t)(addr & 0xff); + WARN_ON_ONCE(offset & (len - 1)); + + switch (offset) { + case CORE_STATUS_OFF: + spin_lock(&vcpu->arch.ipi_state.lock); + res = vcpu->arch.ipi_state.status; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case CORE_EN_OFF: + spin_lock(&vcpu->arch.ipi_state.lock); + res = vcpu->arch.ipi_state.en; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case CORE_SET_OFF: + res = 0; + break; + case CORE_CLEAR_OFF: + res = 0; + break; + case CORE_BUF_20 ... CORE_BUF_38 + 7: + if (offset + len > CORE_BUF_38 + 8) { + kvm_err("%s: invalid offset or len: offset = %d, len = %d\n", + __func__, offset, len); + ret = -EINVAL; + break; + } + res = read_mailbox(vcpu, offset, len); + break; + default: + kvm_err("%s: unknown addr: %llx\n", __func__, addr); + ret = -EINVAL; + break; + } + + *(uint64_t *)val = res; + + return ret; +} + static int kvm_loongarch_ipi_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { - return 0; + struct loongarch_ipi *ipi; + int ret; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + + ipi->kvm->stat.ipi_write_exits++; + ret = loongarch_ipi_writel(vcpu, addr, len, val); + + return ret; } static int kvm_loongarch_ipi_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { - return 0; + struct loongarch_ipi *ipi; + int ret; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + + ipi->kvm->stat.ipi_read_exits++; + ret = loongarch_ipi_readl(vcpu, addr, len, val); + + return ret; +} + +static void send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) +{ + int i; + uint32_t val = 0, mask = 0; + /* + * Bit 27-30 is mask for byte writing. + * If the mask is 0, we need not to do anything. + */ + if ((data >> 27) & 0xf) { + /* Read the old val */ + kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + + /* Construct the mask by scanning the bit 27-30 */ + for (i = 0; i < 4; i++) { + if (data & (0x1 << (27 + i))) + mask |= (0xff << (i * 8)); + } + /* Save the old part of val */ + val &= mask; + } + + val |= ((uint32_t)(data >> 32) & ~mask); + kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); +} + +static void mail_send(struct kvm *kvm, uint64_t data) +{ + struct kvm_vcpu *vcpu; + int cpu, mailbox; + int offset; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + + mailbox = ((data & 0xffffffff) >> 2) & 0x7; + offset = SMP_MAILBOX + CORE_BUF_20 + mailbox * 4; + send_ipi_data(vcpu, offset, data); +} + +static void any_send(struct kvm *kvm, uint64_t data) +{ + struct kvm_vcpu *vcpu; + int cpu, offset; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + + offset = data & 0xffff; + send_ipi_data(vcpu, offset, data); } static int kvm_loongarch_mail_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { + struct loongarch_ipi *ipi; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + + addr &= 0xfff; + addr -= IOCSR_MAIL_SEND; + + switch (addr) { + case MAIL_SEND_OFFSET: + mail_send(vcpu->kvm, *(uint64_t *)val); + break; + case ANY_SEND_OFFSET: + any_send(vcpu->kvm, *(uint64_t *)val); + break; + default: + break; + } + return 0; } -- Gitee From 735f157aedfa3d639a7470a905218be8b6426a9c Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Sat, 15 Jun 2024 18:22:57 +0800 Subject: [PATCH 05/44] LoongArch: KVM: Add IPI user mode read and write function Upstream: no Implements the communication interface between the user mode program and the kernel in IPI interrupt control simulation, which is used to obtain or send the simulation data of the interrupt controller in the user mode process, and is used in VM migration or VM saving and restoration. Signed-off-by: Min Zhou Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/uapi/asm/kvm.h | 2 + arch/loongarch/kvm/intc/ipi.c | 91 ++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 9891ed93816a..4776a4de22e4 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -114,4 +114,6 @@ struct kvm_iocsr_entry { #define KVM_IRQCHIP_NUM_PINS 64 #define KVM_MAX_CORES 256 +#define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000002 + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 815858671005..fbf6f7e462cf 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -320,16 +320,103 @@ static const struct kvm_io_device_ops kvm_loongarch_mail_ops = { .write = kvm_loongarch_mail_write, }; +static int kvm_loongarch_ipi_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + uint64_t val; + int cpu, addr; + void *p = NULL; + int len = 4; + struct kvm_vcpu *vcpu; + + cpu = (attr->attr >> 16) & 0x3ff; + addr = attr->attr & 0xff; + + vcpu = kvm_get_vcpu(dev->kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + switch (addr) { + case CORE_STATUS_OFF: + p = &vcpu->arch.ipi_state.status; + break; + case CORE_EN_OFF: + p = &vcpu->arch.ipi_state.en; + break; + case CORE_SET_OFF: + p = &vcpu->arch.ipi_state.set; + break; + case CORE_CLEAR_OFF: + p = &vcpu->arch.ipi_state.clear; + break; + case CORE_BUF_20: + p = &vcpu->arch.ipi_state.buf[0]; + len = 8; + break; + case CORE_BUF_28: + p = &vcpu->arch.ipi_state.buf[1]; + len = 8; + break; + case CORE_BUF_30: + p = &vcpu->arch.ipi_state.buf[2]; + len = 8; + break; + case CORE_BUF_38: + p = &vcpu->arch.ipi_state.buf[3]; + len = 8; + break; + default: + kvm_err("%s: unknown ipi register, addr = %d\n", __func__, addr); + return -EINVAL; + } + + if (is_write) { + if (len == 4) { + if (get_user(val, (uint32_t __user *)attr->addr)) + return -EFAULT; + *(uint32_t *)p = (uint32_t)val; + } else if (len == 8) { + if (get_user(val, (uint64_t __user *)attr->addr)) + return -EFAULT; + *(uint64_t *)p = val; + } + } else { + if (len == 4) { + val = *(uint32_t *)p; + return put_user(val, (uint32_t __user *)attr->addr); + } else if (len == 8) { + val = *(uint64_t *)p; + return put_user(val, (uint64_t __user *)attr->addr); + } + } + + return 0; +} + static int kvm_loongarch_ipi_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + switch (attr->group) { + case KVM_DEV_LOONGARCH_IPI_GRP_REGS: + return kvm_loongarch_ipi_regs_access(dev, attr, false); + default: + kvm_err("%s: unknown group (%d)\n", __func__, attr->group); + return -EINVAL; + } } static int kvm_loongarch_ipi_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + switch (attr->group) { + case KVM_DEV_LOONGARCH_IPI_GRP_REGS: + return kvm_loongarch_ipi_regs_access(dev, attr, true); + default: + kvm_err("%s: unknown group (%d)\n", __func__, attr->group); + return -EINVAL; + } } static void kvm_loongarch_ipi_destroy(struct kvm_device *dev) -- Gitee From 59537b43c2f7c05d8fa8ed6c3279ce71c2bcd296 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 3 Jun 2024 14:30:58 +0800 Subject: [PATCH 06/44] LoongArch: KVM: Add EXTIOI device support Upstream: no Added device model for EXTIOI interrupt controller, implemented basic create destroy interface, and registered device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_extioi.h | 78 +++++++++++++++++ arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/kvm/Makefile | 1 + arch/loongarch/kvm/intc/extioi.c | 111 ++++++++++++++++++++++++ arch/loongarch/kvm/main.c | 6 +- include/uapi/linux/kvm.h | 2 + 6 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/kvm_extioi.h create mode 100644 arch/loongarch/kvm/intc/extioi.c diff --git a/arch/loongarch/include/asm/kvm_extioi.h b/arch/loongarch/include/asm/kvm_extioi.h new file mode 100644 index 000000000000..48a117b2be5d --- /dev/null +++ b/arch/loongarch/include/asm/kvm_extioi.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef LOONGARCH_EXTIOI_H +#define LOONGARCH_EXTIOI_H + +#include + +#define EXTIOI_IRQS 256 +#define EXTIOI_ROUTE_MAX_VCPUS 256 +#define EXTIOI_IRQS_U8_NUMS (EXTIOI_IRQS / 8) +#define EXTIOI_IRQS_U32_NUMS (EXTIOI_IRQS_U8_NUMS / 4) +#define EXTIOI_IRQS_U64_NUMS (EXTIOI_IRQS_U32_NUMS / 2) +/* map to ipnum per 32 irqs */ +#define EXTIOI_IRQS_NODETYPE_COUNT 16 + +#define EXTIOI_BASE 0x1400 +#define EXTIOI_SIZE 0x900 + +#define LS3A_INTC_IP 8 + +struct loongarch_extioi { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + /* hardware state */ + union nodetype { + u64 reg_u64[EXTIOI_IRQS_NODETYPE_COUNT / 4]; + u32 reg_u32[EXTIOI_IRQS_NODETYPE_COUNT / 2]; + uint16_t reg_u16[EXTIOI_IRQS_NODETYPE_COUNT]; + u8 reg_u8[EXTIOI_IRQS_NODETYPE_COUNT * 2]; + } nodetype; + + /* one bit shows the state of one irq */ + union bounce { + u64 reg_u64[EXTIOI_IRQS_U64_NUMS]; + u32 reg_u32[EXTIOI_IRQS_U32_NUMS]; + u8 reg_u8[EXTIOI_IRQS_U8_NUMS]; + } bounce; + + union isr { + u64 reg_u64[EXTIOI_IRQS_U64_NUMS]; + u32 reg_u32[EXTIOI_IRQS_U32_NUMS]; + u8 reg_u8[EXTIOI_IRQS_U8_NUMS]; + } isr; + union coreisr { + u64 reg_u64[EXTIOI_ROUTE_MAX_VCPUS][EXTIOI_IRQS_U64_NUMS]; + u32 reg_u32[EXTIOI_ROUTE_MAX_VCPUS][EXTIOI_IRQS_U32_NUMS]; + u8 reg_u8[EXTIOI_ROUTE_MAX_VCPUS][EXTIOI_IRQS_U8_NUMS]; + } coreisr; + union enable { + u64 reg_u64[EXTIOI_IRQS_U64_NUMS]; + u32 reg_u32[EXTIOI_IRQS_U32_NUMS]; + u8 reg_u8[EXTIOI_IRQS_U8_NUMS]; + } enable; + + /* use one byte to config ipmap for 32 irqs at once */ + union ipmap { + u64 reg_u64; + u32 reg_u32[EXTIOI_IRQS_U32_NUMS / 4]; + u8 reg_u8[EXTIOI_IRQS_U8_NUMS / 4]; + } ipmap; + /* use one byte to config coremap for one irq */ + union coremap { + u64 reg_u64[EXTIOI_IRQS / 8]; + u32 reg_u32[EXTIOI_IRQS / 4]; + u8 reg_u8[EXTIOI_IRQS]; + } coremap; + + DECLARE_BITMAP(sw_coreisr[EXTIOI_ROUTE_MAX_VCPUS][LS3A_INTC_IP], EXTIOI_IRQS); + uint8_t sw_coremap[EXTIOI_IRQS]; +}; + +void extioi_set_irq(struct loongarch_extioi *s, int irq, int level); +int kvm_loongarch_register_extioi_device(void); +#endif /* LOONGARCH_EXTIOI_H */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index d448a7b967a1..c84ed897d46c 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -20,6 +20,7 @@ #include #include #include +#include /* Loongarch KVM register ids */ #define KVM_GET_IOC_CSR_IDX(id) ((id & KVM_CSR_IDX_MASK) >> LOONGARCH_REG_SHIFT) @@ -113,6 +114,7 @@ struct kvm_arch { s64 time_offset; struct kvm_context __percpu *vmcs; struct loongarch_ipi *ipi; + struct loongarch_extioi *extioi; }; #define CSR_MAX_NUMS 0x800 diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index 69a074ee0d0f..2ab3ab019bee 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -19,5 +19,6 @@ kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vm.o kvm-y += intc/ipi.o +kvm-y += intc/extioi.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/intc/extioi.c b/arch/loongarch/kvm/intc/extioi.c new file mode 100644 index 000000000000..2f1b93e95f97 --- /dev/null +++ b/arch/loongarch/kvm/intc/extioi.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +static int kvm_loongarch_extioi_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + return 0; +} + +static int kvm_loongarch_extioi_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + return 0; +} + +static const struct kvm_io_device_ops kvm_loongarch_extioi_ops = { + .read = kvm_loongarch_extioi_read, + .write = kvm_loongarch_extioi_write, +}; + +static int kvm_loongarch_extioi_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_loongarch_extioi_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static void kvm_loongarch_extioi_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_extioi *extioi; + struct kvm_io_device *device; + + if (!dev) + return; + + kvm = dev->kvm; + if (!kvm) + return; + + extioi = kvm->arch.extioi; + if (!extioi) + return; + + device = &extioi->device; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, device); + kfree(extioi); +} + +static int kvm_loongarch_extioi_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct loongarch_extioi *s; + struct kvm_io_device *device; + struct kvm *kvm = dev->kvm; + + /* extioi has been created */ + if (kvm->arch.extioi) + return -EINVAL; + + s = kzalloc(sizeof(struct loongarch_extioi), GFP_KERNEL); + if (!s) + return -ENOMEM; + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize IOCSR device + */ + device = &s->device; + kvm_iodevice_init(device, &kvm_loongarch_extioi_ops); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, EXTIOI_BASE, EXTIOI_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kfree(s); + return -EFAULT; + } + + kvm->arch.extioi = s; + + kvm_info("create extioi device successfully\n"); + return 0; +} + +static struct kvm_device_ops kvm_loongarch_extioi_dev_ops = { + .name = "kvm-loongarch-extioi", + .create = kvm_loongarch_extioi_create, + .destroy = kvm_loongarch_extioi_destroy, + .set_attr = kvm_loongarch_extioi_set_attr, + .get_attr = kvm_loongarch_extioi_get_attr, +}; + +int kvm_loongarch_register_extioi_device(void) +{ + return kvm_register_device_ops(&kvm_loongarch_extioi_dev_ops, + KVM_DEV_TYPE_LA_EXTIOI); +} diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 36efc7b38f83..b5da4341006a 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "trace.h" unsigned long vpid_mask; @@ -372,7 +373,10 @@ static int kvm_loongarch_env_init(void) if (ret) return ret; - return 0; + /* Register loongarch extioi interrupt controller interface. */ + ret = kvm_loongarch_register_extioi_device(); + + return ret; } static void kvm_loongarch_env_exit(void) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0cde56ea20b7..6d580366f380 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1477,6 +1477,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_LA_IPI, #define KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_IPI + KVM_DEV_TYPE_LA_EXTIOI, +#define KVM_DEV_TYPE_LA_EXTIOI KVM_DEV_TYPE_LA_EXTIOI KVM_DEV_TYPE_MAX, -- Gitee From 2dc138292b38d4d4e229c297ec9aae4ac15620c8 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Sat, 15 Jun 2024 18:41:22 +0800 Subject: [PATCH 07/44] LoongArch: KVM: Add EXTIOI read and write functions Upstream: no Implementation of EXTIOI interrupt controller address space read and write function simulation. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_extioi.h | 17 + arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/kvm/intc/extioi.c | 577 +++++++++++++++++++++++- 3 files changed, 594 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_extioi.h b/arch/loongarch/include/asm/kvm_extioi.h index 48a117b2be5d..d2af039a7d6f 100644 --- a/arch/loongarch/include/asm/kvm_extioi.h +++ b/arch/loongarch/include/asm/kvm_extioi.h @@ -19,8 +19,25 @@ #define EXTIOI_BASE 0x1400 #define EXTIOI_SIZE 0x900 +#define EXTIOI_NODETYPE_START 0xa0 +#define EXTIOI_NODETYPE_END 0xbf +#define EXTIOI_IPMAP_START 0xc0 +#define EXTIOI_IPMAP_END 0xc7 +#define EXTIOI_ENABLE_START 0x200 +#define EXTIOI_ENABLE_END 0x21f +#define EXTIOI_BOUNCE_START 0x280 +#define EXTIOI_BOUNCE_END 0x29f +#define EXTIOI_ISR_START 0x300 +#define EXTIOI_ISR_END 0x31f +#define EXTIOI_COREISR_START 0x400 +#define EXTIOI_COREISR_END 0x71f +#define EXTIOI_COREMAP_START 0x800 +#define EXTIOI_COREMAP_END 0x8ff + #define LS3A_INTC_IP 8 +#define EXTIOI_SW_COREMAP_FLAG (1 << 0) + struct loongarch_extioi { spinlock_t lock; struct kvm *kvm; diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index c84ed897d46c..77dc82708d0d 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -42,6 +42,8 @@ struct kvm_vm_stat { u64 hugepages; u64 ipi_read_exits; u64 ipi_write_exits; + u64 extioi_read_exits; + u64 extioi_write_exits; }; struct kvm_vcpu_stat { diff --git a/arch/loongarch/kvm/intc/extioi.c b/arch/loongarch/kvm/intc/extioi.c index 2f1b93e95f97..dd18b7a7599a 100644 --- a/arch/loongarch/kvm/intc/extioi.c +++ b/arch/loongarch/kvm/intc/extioi.c @@ -7,18 +7,591 @@ #include #include +#define loongarch_ext_irq_lock(s, flags) spin_lock_irqsave(&s->lock, flags) +#define loongarch_ext_irq_unlock(s, flags) spin_unlock_irqrestore(&s->lock, flags) + +static void extioi_update_irq(struct loongarch_extioi *s, int irq, int level) +{ + int ipnum, cpu, found, irq_index, irq_mask; + struct kvm_interrupt vcpu_irq; + struct kvm_vcpu *vcpu; + + ipnum = s->ipmap.reg_u8[irq / 32]; + ipnum = count_trailing_zeros(ipnum); + ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0; + + cpu = s->sw_coremap[irq]; + vcpu = kvm_get_vcpu(s->kvm, cpu); + irq_index = irq / 32; + /* length of accessing core isr is 4 bytes */ + irq_mask = 1 << (irq & 0x1f); + + if (level) { + /* if not enable return false */ + if (((s->enable.reg_u32[irq_index]) & irq_mask) == 0) + return; + s->coreisr.reg_u32[cpu][irq_index] |= irq_mask; + found = find_first_bit(s->sw_coreisr[cpu][ipnum], EXTIOI_IRQS); + set_bit(irq, s->sw_coreisr[cpu][ipnum]); + } else { + s->coreisr.reg_u32[cpu][irq_index] &= ~irq_mask; + clear_bit(irq, s->sw_coreisr[cpu][ipnum]); + found = find_first_bit(s->sw_coreisr[cpu][ipnum], EXTIOI_IRQS); + } + + if (found < EXTIOI_IRQS) + /* other irq is handling, need not update parent irq level */ + return; + + vcpu_irq.irq = level ? INT_HWI0 + ipnum : -(INT_HWI0 + ipnum); + kvm_vcpu_ioctl_interrupt(vcpu, &vcpu_irq); +} + +void extioi_set_irq(struct loongarch_extioi *s, int irq, int level) +{ + unsigned long *isr = (unsigned long *)s->isr.reg_u8; + unsigned long flags; + + level ? set_bit(irq, isr) : clear_bit(irq, isr); + if (!level) + return; + loongarch_ext_irq_lock(s, flags); + extioi_update_irq(s, irq, level); + loongarch_ext_irq_unlock(s, flags); +} + +static inline void extioi_enable_irq(struct kvm_vcpu *vcpu, struct loongarch_extioi *s, + int index, u8 mask, int level) +{ + u8 val; + int irq; + + val = mask & s->isr.reg_u8[index]; + irq = ffs(val); + while (irq != 0) { + /* + * enable bit change from 0 to 1, + * need to update irq by pending bits + */ + extioi_update_irq(s, irq - 1 + index * 8, level); + val &= ~(1 << (irq - 1)); + irq = ffs(val); + } +} + +static int loongarch_extioi_writeb(struct kvm_vcpu *vcpu, + struct loongarch_extioi *s, + gpa_t addr, int len, const void *val) +{ + int index, irq, ret = 0; + u8 data, old_data, cpu; + u8 coreisr, old_coreisr; + gpa_t offset; + + data = *(u8 *)val; + offset = addr - EXTIOI_BASE; + + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START); + s->nodetype.reg_u8[index] = data; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of intr driver, need not update upper irq level + */ + index = (offset - EXTIOI_IPMAP_START); + s->ipmap.reg_u8[index] = data; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START); + old_data = s->enable.reg_u8[index]; + s->enable.reg_u8[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u8[index] & ~old_data & s->isr.reg_u8[index]; + extioi_enable_irq(vcpu, s, index, data, 1); + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u8[index] & old_data & s->isr.reg_u8[index]; + extioi_enable_irq(vcpu, s, index, data, 0); + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = offset - EXTIOI_BOUNCE_START; + s->bounce.reg_u8[index] = data; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START); + /* using attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u8[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u8[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + irq = ffs(coreisr); + while (irq != 0) { + extioi_update_irq(s, irq - 1 + index * 8, 0); + coreisr &= ~(1 << (irq - 1)); + irq = ffs(coreisr); + } + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + irq = offset - EXTIOI_COREMAP_START; + index = irq; + s->coremap.reg_u8[index] = data; + + cpu = data & 0xff; + cpu = ffs(cpu) - 1; + cpu = (cpu >= 4) ? 0 : cpu; + + if (s->sw_coremap[irq] == cpu) + break; + + if (test_bit(irq, (unsigned long *)s->isr.reg_u8)) { + /* + * lower irq at old cpu and raise irq at new cpu + */ + extioi_update_irq(s, irq, 0); + s->sw_coremap[irq] = cpu; + extioi_update_irq(s, irq, 1); + } else + s->sw_coremap[irq] = cpu; + + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int loongarch_extioi_writew(struct kvm_vcpu *vcpu, + struct loongarch_extioi *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, ret = 0; + u8 cpu; + u32 data, old_data; + u32 coreisr, old_coreisr; + gpa_t offset; + + data = *(u32 *)val; + offset = addr - EXTIOI_BASE; + + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START) >> 2; + s->nodetype.reg_u32[index] = data; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of intr driver, need not update upper irq level + */ + index = (offset - EXTIOI_IPMAP_START) >> 2; + s->ipmap.reg_u32[index] = data; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START) >> 2; + old_data = s->enable.reg_u32[index]; + s->enable.reg_u32[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index]; + index = index << 2; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + + extioi_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + + extioi_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EXTIOI_BOUNCE_START) >> 2; + s->bounce.reg_u32[index] = data; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START) >> 2; + /* using attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u32[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u32[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + irq = ffs(coreisr); + while (irq != 0) { + extioi_update_irq(s, irq - 1 + index * 32, 0); + coreisr &= ~(1 << (irq - 1)); + irq = ffs(coreisr); + } + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + irq = offset - EXTIOI_COREMAP_START; + index = irq >> 2; + + s->coremap.reg_u32[index] = data; + + for (i = 0; i < sizeof(data); i++) { + cpu = data & 0xff; + cpu = ffs(cpu) - 1; + cpu = (cpu >= 4) ? 0 : cpu; + data = data >> 8; + + if (s->sw_coremap[irq + i] == cpu) + continue; + + if (test_bit(irq, (unsigned long *)s->isr.reg_u8)) { + /* + * lower irq at old cpu and raise irq at new cpu + */ + extioi_update_irq(s, irq + i, 0); + s->sw_coremap[irq + i] = cpu; + extioi_update_irq(s, irq + i, 1); + } else + s->sw_coremap[irq + i] = cpu; + } + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int loongarch_extioi_writel(struct kvm_vcpu *vcpu, + struct loongarch_extioi *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, bits, ret = 0; + u8 cpu; + u64 data, old_data; + u64 coreisr, old_coreisr; + gpa_t offset; + + data = *(u64 *)val; + offset = addr - EXTIOI_BASE; + + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START) >> 3; + s->nodetype.reg_u64[index] = data; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of intr driver, need not update upper irq level + */ + index = (offset - EXTIOI_IPMAP_START) >> 3; + s->ipmap.reg_u64 = data; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START) >> 3; + old_data = s->enable.reg_u64[index]; + s->enable.reg_u64[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index]; + index = index << 3; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + + extioi_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + + extioi_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EXTIOI_BOUNCE_START) >> 3; + s->bounce.reg_u64[index] = data; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START) >> 3; + /* using attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u64[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u64[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + + bits = sizeof(u64) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + extioi_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + irq = offset - EXTIOI_COREMAP_START; + index = irq >> 3; + + s->coremap.reg_u64[index] = data; + + for (i = 0; i < sizeof(data); i++) { + cpu = data & 0xff; + cpu = ffs(cpu) - 1; + cpu = (cpu >= 4) ? 0 : cpu; + data = data >> 8; + + if (s->sw_coremap[irq + i] == cpu) + continue; + + if (test_bit(irq, (unsigned long *)s->isr.reg_u8)) { + /* + * lower irq at old cpu and raise irq at new cpu + */ + extioi_update_irq(s, irq + i, 0); + s->sw_coremap[irq + i] = cpu; + extioi_update_irq(s, irq + i, 1); + } else + s->sw_coremap[irq + i] = cpu; + } + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + static int kvm_loongarch_extioi_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { - return 0; + int ret; + struct loongarch_extioi *extioi = vcpu->kvm->arch.extioi; + unsigned long flags; + + if (!extioi) { + kvm_err("%s: extioi irqchip not valid!\n", __func__); + return -EINVAL; + } + + vcpu->kvm->stat.extioi_write_exits++; + loongarch_ext_irq_lock(extioi, flags); + + switch (len) { + case 1: + ret = loongarch_extioi_writeb(vcpu, extioi, addr, len, val); + break; + case 4: + ret = loongarch_extioi_writew(vcpu, extioi, addr, len, val); + break; + case 8: + ret = loongarch_extioi_writel(vcpu, extioi, addr, len, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access:addr 0x%llx,size %d\n", + __func__, addr, len); + } + + loongarch_ext_irq_unlock(extioi, flags); + + + return ret; +} + +static int loongarch_extioi_readb(struct kvm_vcpu *vcpu, struct loongarch_extioi *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + gpa_t offset; + u64 data; + + offset = addr - EXTIOI_BASE; + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = offset - EXTIOI_NODETYPE_START; + data = s->nodetype.reg_u8[index]; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + index = offset - EXTIOI_IPMAP_START; + data = s->ipmap.reg_u8[index]; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = offset - EXTIOI_ENABLE_START; + data = s->enable.reg_u8[index]; + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + index = offset - EXTIOI_BOUNCE_START; + data = s->bounce.reg_u8[index]; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = offset - EXTIOI_COREISR_START; + data = s->coreisr.reg_u8[vcpu->vcpu_id][index]; + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + index = offset - EXTIOI_COREMAP_START; + data = s->coremap.reg_u8[index]; + break; + default: + ret = -EINVAL; + break; + } + + *(u8 *)val = data; + + return ret; +} + +static int loongarch_extioi_readw(struct kvm_vcpu *vcpu, struct loongarch_extioi *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + gpa_t offset; + u64 data; + + offset = addr - EXTIOI_BASE; + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START) >> 2; + data = s->nodetype.reg_u32[index]; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + index = (offset - EXTIOI_IPMAP_START) >> 2; + data = s->ipmap.reg_u32[index]; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START) >> 2; + data = s->enable.reg_u32[index]; + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + index = (offset - EXTIOI_BOUNCE_START) >> 2; + data = s->bounce.reg_u32[index]; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START) >> 2; + data = s->coreisr.reg_u32[vcpu->vcpu_id][index]; + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + index = (offset - EXTIOI_COREMAP_START) >> 2; + data = s->coremap.reg_u32[index]; + break; + default: + ret = -EINVAL; + break; + } + + *(u32 *)val = data; + + return ret; +} + +static int loongarch_extioi_readl(struct kvm_vcpu *vcpu, struct loongarch_extioi *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + gpa_t offset; + u64 data; + + offset = addr - EXTIOI_BASE; + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START) >> 3; + data = s->nodetype.reg_u64[index]; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + index = (offset - EXTIOI_IPMAP_START) >> 3; + data = s->ipmap.reg_u64; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START) >> 3; + data = s->enable.reg_u64[index]; + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + index = (offset - EXTIOI_BOUNCE_START) >> 3; + data = s->bounce.reg_u64[index]; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START) >> 3; + data = s->coreisr.reg_u64[vcpu->vcpu_id][index]; + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + index = (offset - EXTIOI_COREMAP_START) >> 3; + data = s->coremap.reg_u64[index]; + break; + default: + ret = -EINVAL; + break; + } + + *(u64 *)val = data; + + return ret; } static int kvm_loongarch_extioi_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { - return 0; + int ret; + struct loongarch_extioi *extioi = vcpu->kvm->arch.extioi; + unsigned long flags; + + if (!extioi) { + kvm_err("%s: extioi irqchip not valid!\n", __func__); + return -EINVAL; + } + + vcpu->kvm->stat.extioi_read_exits++; + loongarch_ext_irq_lock(extioi, flags); + + switch (len) { + case 1: + ret = loongarch_extioi_readb(vcpu, extioi, addr, len, val); + break; + case 4: + ret = loongarch_extioi_readw(vcpu, extioi, addr, len, val); + break; + case 8: + ret = loongarch_extioi_readl(vcpu, extioi, addr, len, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access:addr 0x%llx,size %d\n", + __func__, addr, len); + } + + loongarch_ext_irq_unlock(extioi, flags); + + return ret; } static const struct kvm_io_device_ops kvm_loongarch_extioi_ops = { -- Gitee From 571e6f70cce180b4c3bdd98dc44e5725f516754b Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Sat, 15 Jun 2024 18:44:38 +0800 Subject: [PATCH 08/44] LoongArch: KVM: Add EXTIOI user mode read and write functions Upstream: no Implements the communication interface between the user mode program and the kernel in EXTIOI interrupt control simulation, which is used to obtain or send the simulation data of the interrupt controller in the user mode process, and is used in VM migration or VM saving and restoration. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/uapi/asm/kvm.h | 2 + arch/loongarch/kvm/intc/extioi.c | 103 +++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 4776a4de22e4..ca2ef0d9605f 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -116,4 +116,6 @@ struct kvm_iocsr_entry { #define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000002 +#define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000003 + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ diff --git a/arch/loongarch/kvm/intc/extioi.c b/arch/loongarch/kvm/intc/extioi.c index dd18b7a7599a..48141823aaa3 100644 --- a/arch/loongarch/kvm/intc/extioi.c +++ b/arch/loongarch/kvm/intc/extioi.c @@ -47,6 +47,26 @@ static void extioi_update_irq(struct loongarch_extioi *s, int irq, int level) kvm_vcpu_ioctl_interrupt(vcpu, &vcpu_irq); } +static void extioi_set_sw_coreisr(struct loongarch_extioi *s) +{ + int ipnum, cpu, irq_index, irq_mask, irq; + + for (irq = 0; irq < EXTIOI_IRQS; irq++) { + ipnum = s->ipmap.reg_u8[irq / 32]; + ipnum = count_trailing_zeros(ipnum); + ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0; + irq_index = irq / 32; + /* length of accessing core isr is 4 bytes */ + irq_mask = 1 << (irq & 0x1f); + + cpu = s->coremap.reg_u8[irq]; + if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask)) + set_bit(irq, s->sw_coreisr[cpu][ipnum]); + else + clear_bit(irq, s->sw_coreisr[cpu][ipnum]); + } +} + void extioi_set_irq(struct loongarch_extioi *s, int irq, int level) { unsigned long *isr = (unsigned long *)s->isr.reg_u8; @@ -599,16 +619,95 @@ static const struct kvm_io_device_ops kvm_loongarch_extioi_ops = { .write = kvm_loongarch_extioi_write, }; +static int kvm_loongarch_extioi_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int len, addr; + void __user *data; + void *p = NULL; + struct loongarch_extioi *s; + unsigned long flags; + + s = dev->kvm->arch.extioi; + addr = attr->attr; + data = (void __user *)attr->addr; + + loongarch_ext_irq_lock(s, flags); + switch (addr) { + case EXTIOI_NODETYPE_START: + p = s->nodetype.reg_u8; + len = sizeof(s->nodetype); + break; + case EXTIOI_IPMAP_START: + p = s->ipmap.reg_u8; + len = sizeof(s->ipmap); + break; + case EXTIOI_ENABLE_START: + p = s->enable.reg_u8; + len = sizeof(s->enable); + break; + case EXTIOI_BOUNCE_START: + p = s->bounce.reg_u8; + len = sizeof(s->bounce); + break; + case EXTIOI_ISR_START: + p = s->isr.reg_u8; + len = sizeof(s->isr); + break; + case EXTIOI_COREISR_START: + p = s->coreisr.reg_u8; + len = sizeof(s->coreisr); + break; + case EXTIOI_COREMAP_START: + p = s->coremap.reg_u8; + len = sizeof(s->coremap); + break; + case EXTIOI_SW_COREMAP_FLAG: + p = s->sw_coremap; + len = sizeof(s->sw_coremap); + break; + default: + loongarch_ext_irq_unlock(s, flags); + kvm_err("%s: unknown extioi register, addr = %d\n", __func__, addr); + return -EINVAL; + } + + loongarch_ext_irq_unlock(s, flags); + + if (is_write) { + if (copy_from_user(p, data, len)) + return -EFAULT; + } else { + if (copy_to_user(data, p, len)) + return -EFAULT; + } + + if ((addr == EXTIOI_COREISR_START) && is_write) { + loongarch_ext_irq_lock(s, flags); + extioi_set_sw_coreisr(s); + loongarch_ext_irq_unlock(s, flags); + } + + return 0; +} + static int kvm_loongarch_extioi_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + if (attr->group == KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS) + return kvm_loongarch_extioi_regs_access(dev, attr, false); + + return -EINVAL; } static int kvm_loongarch_extioi_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + if (attr->group == KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS) + return kvm_loongarch_extioi_regs_access(dev, attr, true); + + return -EINVAL; } static void kvm_loongarch_extioi_destroy(struct kvm_device *dev) -- Gitee From c4f21bdb9791548c5a12bc22686cb6a7fd53d1e9 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 3 Jun 2024 14:51:29 +0800 Subject: [PATCH 09/44] LoongArch: KVM: Add PCHPIC device support Upstream: no Added device model for PCHPIC interrupt controller, implemented basic create destroy interface, and registered device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 2 + arch/loongarch/include/asm/kvm_pch_pic.h | 30 +++++++ arch/loongarch/kvm/Makefile | 1 + arch/loongarch/kvm/intc/pch_pic.c | 100 +++++++++++++++++++++++ arch/loongarch/kvm/main.c | 6 ++ include/uapi/linux/kvm.h | 2 + 6 files changed, 141 insertions(+) create mode 100644 arch/loongarch/include/asm/kvm_pch_pic.h create mode 100644 arch/loongarch/kvm/intc/pch_pic.c diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 77dc82708d0d..93d78942936a 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include #include #include +#include /* Loongarch KVM register ids */ #define KVM_GET_IOC_CSR_IDX(id) ((id & KVM_CSR_IDX_MASK) >> LOONGARCH_REG_SHIFT) @@ -117,6 +118,7 @@ struct kvm_arch { struct kvm_context __percpu *vmcs; struct loongarch_ipi *ipi; struct loongarch_extioi *extioi; + struct loongarch_pch_pic *pch_pic; }; #define CSR_MAX_NUMS 0x800 diff --git a/arch/loongarch/include/asm/kvm_pch_pic.h b/arch/loongarch/include/asm/kvm_pch_pic.h new file mode 100644 index 000000000000..5aef0e4e3863 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_pch_pic.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef LOONGARCH_PCH_PIC_H +#define LOONGARCH_PCH_PIC_H + +#include + +struct loongarch_pch_pic { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + uint64_t mask; /* 1:disable irq, 0:enable irq */ + uint64_t htmsi_en; /* 1:msi */ + uint64_t edge; /* 1:edge triggered, 0:level triggered */ + uint64_t auto_ctrl0; /* only use default value 00b */ + uint64_t auto_ctrl1; /* only use default value 00b */ + uint64_t last_intirr; /* edge detection */ + uint64_t irr; /* interrupt request register */ + uint64_t isr; /* interrupt service register */ + uint64_t polarity; /* 0: high level trigger, 1: low level trigger */ + uint8_t route_entry[64]; /* default value 0, route to int0: extioi */ + uint8_t htmsi_vector[64]; /* irq route table for routing to extioi */ + uint64_t pch_pic_base; +}; + +int kvm_loongarch_register_pch_pic_device(void); +#endif /* LOONGARCH_PCH_PIC_H */ diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index 2ab3ab019bee..5661f2bc04f6 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -20,5 +20,6 @@ kvm-y += vcpu.o kvm-y += vm.o kvm-y += intc/ipi.o kvm-y += intc/extioi.o +kvm-y += intc/pch_pic.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c new file mode 100644 index 000000000000..4097c00e8294 --- /dev/null +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +static int kvm_loongarch_pch_pic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + return 0; +} + +static int kvm_loongarch_pch_pic_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + return 0; +} + +static const struct kvm_io_device_ops kvm_loongarch_pch_pic_ops = { + .read = kvm_loongarch_pch_pic_read, + .write = kvm_loongarch_pch_pic_write, +}; + +static int kvm_loongarch_pch_pic_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static int kvm_loongarch_pch_pic_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + return 0; +} + +static void kvm_loongarch_pch_pic_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_pch_pic *s; + struct kvm_io_device *device; + + if (!dev) + return; + + kvm = dev->kvm; + if (!kvm) + return; + + s = kvm->arch.pch_pic; + if (!s) + return; + + device = &s->device; + /* unregister pch pic device and free it's memory */ + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, device); + kfree(s); +} + +static int kvm_loongarch_pch_pic_create(struct kvm_device *dev, u32 type) +{ + struct loongarch_pch_pic *s; + struct kvm *kvm = dev->kvm; + + /* pch pic should not has been created */ + if (kvm->arch.pch_pic) + return -EINVAL; + + s = kzalloc(sizeof(struct loongarch_pch_pic), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + s->kvm = kvm; + + + kvm->arch.pch_pic = s; + + kvm_info("create pch pic device successfully\n"); + return 0; +} + +static struct kvm_device_ops kvm_loongarch_pch_pic_dev_ops = { + .name = "kvm-loongarch-pch-pic", + .create = kvm_loongarch_pch_pic_create, + .destroy = kvm_loongarch_pch_pic_destroy, + .set_attr = kvm_loongarch_pch_pic_set_attr, + .get_attr = kvm_loongarch_pch_pic_get_attr, +}; + +int kvm_loongarch_register_pch_pic_device(void) +{ + return kvm_register_device_ops(&kvm_loongarch_pch_pic_dev_ops, + KVM_DEV_TYPE_LA_IOAPIC); +} diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index b5da4341006a..285bd4126e54 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "trace.h" unsigned long vpid_mask; @@ -375,6 +376,11 @@ static int kvm_loongarch_env_init(void) /* Register loongarch extioi interrupt controller interface. */ ret = kvm_loongarch_register_extioi_device(); + if (ret) + return ret; + + /* Register loongarch pch pic interrupt controller interface. */ + ret = kvm_loongarch_register_pch_pic_device(); return ret; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6d580366f380..4a8a497325e0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1475,6 +1475,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LA_IOAPIC = 0x100, +#define KVM_DEV_TYPE_LA_IOAPIC KVM_DEV_TYPE_LA_IOAPIC KVM_DEV_TYPE_LA_IPI, #define KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_EXTIOI, -- Gitee From d50ffb84888e82e044bfc62042a21d6c8de5e7cf Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Sat, 15 Jun 2024 18:56:27 +0800 Subject: [PATCH 10/44] LoongArch: KVM: Add PCHPIC read and write functions Upstream: no Implementation of IPI interrupt controller address space read and write function simulation. Implement interrupt injection interface under loongarch. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 18 ++ arch/loongarch/include/asm/kvm_pch_pic.h | 31 +++ arch/loongarch/include/uapi/asm/kvm.h | 1 + arch/loongarch/kvm/intc/ipi.c | 45 ++-- arch/loongarch/kvm/intc/pch_pic.c | 292 ++++++++++++++++++++++- arch/loongarch/kvm/vm.c | 34 +++ 6 files changed, 403 insertions(+), 18 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 93d78942936a..20a53ff6de3b 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -35,6 +35,22 @@ #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(1) +/* KVM_IRQ_LINE irq field index values */ +#define KVM_LOONGARCH_IRQ_TYPE_SHIFT 24 +#define KVM_LOONGARCH_IRQ_TYPE_MASK 0xff +#define KVM_LOONGARCH_IRQ_VCPU_SHIFT 16 +#define KVM_LOONGARCH_IRQ_VCPU_MASK 0xff +#define KVM_LOONGARCH_IRQ_NUM_SHIFT 0 +#define KVM_LOONGARCH_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_LOONGARCH_IRQ_TYPE_CPU_IP 0 +#define KVM_LOONGARCH_IRQ_TYPE_CPU_IO 1 +#define KVM_LOONGARCH_IRQ_TYPE_HT 2 +#define KVM_LOONGARCH_IRQ_TYPE_MSI 3 +#define KVM_LOONGARCH_IRQ_TYPE_IOAPIC 4 +#define KVM_LOONGARCH_IRQ_TYPE_ROUTE 5 + #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) struct kvm_vm_stat { @@ -45,6 +61,8 @@ struct kvm_vm_stat { u64 ipi_write_exits; u64 extioi_read_exits; u64 extioi_write_exits; + u64 pch_pic_read_exits; + u64 pch_pic_write_exits; }; struct kvm_vcpu_stat { diff --git a/arch/loongarch/include/asm/kvm_pch_pic.h b/arch/loongarch/include/asm/kvm_pch_pic.h index 5aef0e4e3863..91bd5a5ec575 100644 --- a/arch/loongarch/include/asm/kvm_pch_pic.h +++ b/arch/loongarch/include/asm/kvm_pch_pic.h @@ -8,6 +8,35 @@ #include +#define PCH_PIC_SIZE 0x3e8 + +#define PCH_PIC_INT_ID_START 0x0 +#define PCH_PIC_INT_ID_END 0x7 +#define PCH_PIC_MASK_START 0x20 +#define PCH_PIC_MASK_END 0x27 +#define PCH_PIC_HTMSI_EN_START 0x40 +#define PCH_PIC_HTMSI_EN_END 0x47 +#define PCH_PIC_EDGE_START 0x60 +#define PCH_PIC_EDGE_END 0x67 +#define PCH_PIC_CLEAR_START 0x80 +#define PCH_PIC_CLEAR_END 0x87 +#define PCH_PIC_AUTO_CTRL0_START 0xc0 +#define PCH_PIC_AUTO_CTRL0_END 0xc7 +#define PCH_PIC_AUTO_CTRL1_START 0xe0 +#define PCH_PIC_AUTO_CTRL1_END 0xe7 +#define PCH_PIC_ROUTE_ENTRY_START 0x100 +#define PCH_PIC_ROUTE_ENTRY_END 0x13f +#define PCH_PIC_HTMSI_VEC_START 0x200 +#define PCH_PIC_HTMSI_VEC_END 0x23f +#define PCH_PIC_INT_IRR_START 0x380 +#define PCH_PIC_INT_IRR_END 0x38f +#define PCH_PIC_INT_ISR_START 0x3a0 +#define PCH_PIC_INT_ISR_END 0x3af +#define PCH_PIC_POLARITY_START 0x3e0 +#define PCH_PIC_POLARITY_END 0x3e7 +#define PCH_PIC_INT_ID_VAL 0x7000000UL +#define PCH_PIC_INT_ID_VER 0x1UL + struct loongarch_pch_pic { spinlock_t lock; struct kvm *kvm; @@ -26,5 +55,7 @@ struct loongarch_pch_pic { uint64_t pch_pic_base; }; +void pch_pic_set_irq(struct loongarch_pch_pic *s, int irq, int level); +void pch_msi_set_irq(struct kvm *kvm, int irq, int level); int kvm_loongarch_register_pch_pic_device(void); #endif /* LOONGARCH_PCH_PIC_H */ diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index ca2ef0d9605f..2b716f352fae 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -19,6 +19,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 +#define __KVM_HAVE_IRQ_LINE #define KVM_GUESTDBG_USE_SW_BP 0x00010000 /* diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index fbf6f7e462cf..12024d9fdd0b 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -223,9 +223,9 @@ static int kvm_loongarch_ipi_read(struct kvm_vcpu *vcpu, return ret; } -static void send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) +static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { - int i; + int i, ret; uint32_t val = 0, mask = 0; /* * Bit 27-30 is mask for byte writing. @@ -233,8 +233,11 @@ static void send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) */ if ((data >> 27) & 0xf) { /* Read the old val */ - kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); - + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + if (unlikely(ret)) { + kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + return ret; + } /* Construct the mask by scanning the bit 27-30 */ for (i = 0; i < 4; i++) { if (data & (0x1 << (27 + i))) @@ -245,41 +248,48 @@ static void send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) } val |= ((uint32_t)(data >> 32) & ~mask); - kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + if (unlikely(ret)) + kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + + return ret; } -static void mail_send(struct kvm *kvm, uint64_t data) +static int mail_send(struct kvm *kvm, uint64_t data) { struct kvm_vcpu *vcpu; int cpu, mailbox; - int offset; + int offset, ret; cpu = ((data & 0xffffffff) >> 16) & 0x3ff; vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); if (unlikely(vcpu == NULL)) { kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); - return; + return -EINVAL; } mailbox = ((data & 0xffffffff) >> 2) & 0x7; offset = SMP_MAILBOX + CORE_BUF_20 + mailbox * 4; - send_ipi_data(vcpu, offset, data); + ret = send_ipi_data(vcpu, offset, data); + + return ret; } -static void any_send(struct kvm *kvm, uint64_t data) +static int any_send(struct kvm *kvm, uint64_t data) { struct kvm_vcpu *vcpu; - int cpu, offset; + int cpu, offset, ret; cpu = ((data & 0xffffffff) >> 16) & 0x3ff; vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); if (unlikely(vcpu == NULL)) { kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); - return; + return -EINVAL; } offset = data & 0xffff; - send_ipi_data(vcpu, offset, data); + ret = send_ipi_data(vcpu, offset, data); + return ret; } static int kvm_loongarch_mail_write(struct kvm_vcpu *vcpu, @@ -287,6 +297,7 @@ static int kvm_loongarch_mail_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *val) { struct loongarch_ipi *ipi; + int ret; ipi = vcpu->kvm->arch.ipi; if (!ipi) { @@ -299,16 +310,18 @@ static int kvm_loongarch_mail_write(struct kvm_vcpu *vcpu, switch (addr) { case MAIL_SEND_OFFSET: - mail_send(vcpu->kvm, *(uint64_t *)val); + ret = mail_send(vcpu->kvm, *(uint64_t *)val); break; case ANY_SEND_OFFSET: - any_send(vcpu->kvm, *(uint64_t *)val); + ret = any_send(vcpu->kvm, *(uint64_t *)val); break; default: + kvm_err("%s: invalid addr %llx!\n", __func__, addr); + ret = -EINVAL; break; } - return 0; + return ret; } static const struct kvm_io_device_ops kvm_loongarch_ipi_ops = { diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index 4097c00e8294..b1d3d301e02f 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -8,18 +8,306 @@ #include #include +/* update the isr according to irq level and route irq to extioi */ +static void pch_pic_update_irq(struct loongarch_pch_pic *s, int irq, int level) +{ + u64 mask = (1 << irq); + + /* + * set isr and route irq to extioi and + * the route table is in htmsi_vector[] + */ + if (level) { + if (mask & s->irr & ~s->mask) { + s->isr |= mask; + irq = s->htmsi_vector[irq]; + extioi_set_irq(s->kvm->arch.extioi, irq, level); + } + } else { + if (mask & s->isr & ~s->irr) { + s->isr &= ~mask; + irq = s->htmsi_vector[irq]; + extioi_set_irq(s->kvm->arch.extioi, irq, level); + } + } +} + +/* msi irq handler */ +void pch_msi_set_irq(struct kvm *kvm, int irq, int level) +{ + extioi_set_irq(kvm->arch.extioi, irq, level); +} + +/* called when a irq is triggered in pch pic */ +void pch_pic_set_irq(struct loongarch_pch_pic *s, int irq, int level) +{ + u64 mask = (1 << irq); + + spin_lock(&s->lock); + if (level) + /* set irr */ + s->irr |= mask; + else { + /* 0 level signal in edge triggered irq does not mean to clear irq + * The irr register variable is cleared when the cpu writes to the + * PCH_PIC_CLEAR_START address area + */ + if (s->edge & mask) { + spin_unlock(&s->lock); + return; + } + s->irr &= ~mask; + } + pch_pic_update_irq(s, irq, level); + spin_unlock(&s->lock); +} + +/* update batch irqs, the irq_mask is a bitmap of irqs */ +static void pch_pic_update_batch_irqs(struct loongarch_pch_pic *s, u64 irq_mask, int level) +{ + int irq, bits; + + /* find each irq by irqs bitmap and update each irq */ + bits = sizeof(irq_mask) * 8; + irq = find_first_bit((void *)&irq_mask, bits); + while (irq < bits) { + pch_pic_update_irq(s, irq, level); + bitmap_clear((void *)&irq_mask, irq, 1); + irq = find_first_bit((void *)&irq_mask, bits); + } +} + +/* + * pch pic register is 64-bit, but it is accessed by 32-bit, + * so we use high to get whether low or high 32 bits we want + * to read. + */ +static u32 pch_pic_read_reg(u64 *s, int high) +{ + u64 val = *s; + + /* read the high 32 bits when the high is 1 */ + return high ? (u32)(val >> 32) : (u32)val; +} + +/* + * pch pic register is 64-bit, but it is accessed by 32-bit, + * so we use high to get whether low or high 32 bits we want + * to write. + */ +static u32 pch_pic_write_reg(u64 *s, int high, u32 v) +{ + u64 val = *s, data = v; + + if (high) { + /* + * Clear val high 32 bits + * write the high 32 bits when the high is 1 + */ + *s = (val << 32 >> 32) | (data << 32); + val >>= 32; + } else + /* + * Clear val low 32 bits + * write the low 32 bits when the high is 0 + */ + *s = (val >> 32 << 32) | v; + + return (u32)val; +} + +static int loongarch_pch_pic_write(struct loongarch_pch_pic *s, gpa_t addr, + int len, const void *val) +{ + u32 old, data, offset, index; + u64 irq; + int ret; + + ret = 0; + data = *(u32 *)val; + offset = addr - s->pch_pic_base; + + spin_lock(&s->lock); + switch (offset) { + case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: + offset -= PCH_PIC_MASK_START; + /* get whether high or low 32 bits we want to write */ + index = offset >> 2; + old = pch_pic_write_reg(&s->mask, index, data); + + /* enable irq when mask value change to 0 */ + irq = (old & ~data) << (32 * index); + pch_pic_update_batch_irqs(s, irq, 1); + + /* disable irq when mask value change to 1 */ + irq = (~old & data) << (32 * index); + pch_pic_update_batch_irqs(s, irq, 0); + break; + case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: + offset -= PCH_PIC_HTMSI_EN_START; + index = offset >> 2; + pch_pic_write_reg(&s->htmsi_en, index, data); + break; + case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: + offset -= PCH_PIC_EDGE_START; + index = offset >> 2; + /* 1: edge triggered, 0: level triggered */ + pch_pic_write_reg(&s->edge, index, data); + break; + case PCH_PIC_CLEAR_START ... PCH_PIC_CLEAR_END: + offset -= PCH_PIC_CLEAR_START; + index = offset >> 2; + /* write 1 to clear edge irq */ + old = pch_pic_read_reg(&s->irr, index); + /* + * get the irq bitmap which is edge triggered and + * already set and to be cleared + */ + irq = old & pch_pic_read_reg(&s->edge, index) & data; + /* write irr to the new state where irqs have been cleared */ + pch_pic_write_reg(&s->irr, index, old & ~irq); + /* update cleared irqs */ + pch_pic_update_batch_irqs(s, irq, 0); + break; + case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: + offset -= PCH_PIC_AUTO_CTRL0_START; + index = offset >> 2; + /* we only use default mode: fixed interrupt distribution mode */ + pch_pic_write_reg(&s->auto_ctrl0, index, 0); + break; + case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: + offset -= PCH_PIC_AUTO_CTRL1_START; + index = offset >> 2; + /* we only use default mode: fixed interrupt distribution mode */ + pch_pic_write_reg(&s->auto_ctrl1, index, 0); + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + offset -= PCH_PIC_ROUTE_ENTRY_START; + /* only route to int0: extioi */ + s->route_entry[offset] = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + /* route table to extioi */ + offset -= PCH_PIC_HTMSI_VEC_START; + s->htmsi_vector[offset] = (u8)data; + break; + case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: + offset -= PCH_PIC_POLARITY_START; + index = offset >> 2; + + /* we only use defalut value 0: high level triggered */ + pch_pic_write_reg(&s->polarity, index, 0); + break; + default: + ret = -EINVAL; + break; + } + + spin_unlock(&s->lock); + return ret; +} + static int kvm_loongarch_pch_pic_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { - return 0; + int ret; + struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic; + + if (!s) { + kvm_err("%s: pch pic irqchip not valid!\n", __func__); + return -EINVAL; + } + + /* statistics of pch pic writing */ + vcpu->kvm->stat.pch_pic_write_exits++; + ret = loongarch_pch_pic_write(s, addr, len, val); + + return ret; +} + +static int loongarch_pch_pic_read(struct loongarch_pch_pic *s, gpa_t addr, int len, void *val) +{ + int offset, index, ret = 0; + u32 data = 0; + u64 int_id = 0; + + offset = addr - s->pch_pic_base; + + spin_lock(&s->lock); + switch (offset) { + case PCH_PIC_INT_ID_START ... PCH_PIC_INT_ID_END: + /* int id version */ + int_id |= (u64)PCH_PIC_INT_ID_VER << 32; + /* irq number */ + int_id |= (u64)31 << (32 + 16); + /* int id value */ + int_id |= PCH_PIC_INT_ID_VAL; + *(u64 *)val = int_id; + break; + case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: + offset -= PCH_PIC_MASK_START; + index = offset >> 2; + /* read mask reg */ + data = pch_pic_read_reg(&s->mask, index); + *(u32 *)val = data; + break; + case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: + offset -= PCH_PIC_HTMSI_EN_START; + index = offset >> 2; + /* read htmsi enable reg */ + data = pch_pic_read_reg(&s->htmsi_en, index); + *(u32 *)val = data; + break; + case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: + offset -= PCH_PIC_EDGE_START; + index = offset >> 2; + /* read edge enable reg */ + data = pch_pic_read_reg(&s->edge, index); + *(u32 *)val = data; + break; + case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: + case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: + /* we only use default mode: fixed interrupt distribution mode */ + *(u32 *)val = 0; + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + /* only route to int0: extioi */ + *(u8 *)val = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + offset -= PCH_PIC_HTMSI_VEC_START; + /* read htmsi vector */ + data = s->htmsi_vector[offset]; + *(u8 *)val = data; + break; + case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: + /* we only use defalut value 0: high level triggered */ + *(u32 *)val = 0; + break; + default: + ret = -EINVAL; + } + spin_unlock(&s->lock); + return ret; } static int kvm_loongarch_pch_pic_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { - return 0; + int ret; + struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic; + + if (!s) { + kvm_err("%s: pch pic irqchip not valid!\n", __func__); + return -EINVAL; + } + + /* statistics of pch pic reading */ + vcpu->kvm->stat.pch_pic_read_exits++; + ret = loongarch_pch_pic_read(s, addr, len, val); + return ret; } static const struct kvm_io_device_ops kvm_loongarch_pch_pic_ops = { diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 06fd746b03b6..0ce636ddc2df 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -5,6 +5,8 @@ #include #include +#include +#include const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), @@ -104,3 +106,35 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { return -ENOIOCTLCMD; } + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *data, + bool line_status) +{ + bool level; + struct loongarch_pch_pic *s; + int type, vcpu, irq, vcpus, val, ret = 0; + + level = data->level; + val = data->irq; + s = kvm->arch.pch_pic; + vcpus = atomic_read(&kvm->online_vcpus); + + type = (val >> KVM_LOONGARCH_IRQ_TYPE_SHIFT) & KVM_LOONGARCH_IRQ_TYPE_MASK; + vcpu = (val >> KVM_LOONGARCH_IRQ_VCPU_SHIFT) & KVM_LOONGARCH_IRQ_VCPU_MASK; + irq = (val >> KVM_LOONGARCH_IRQ_NUM_SHIFT) & KVM_LOONGARCH_IRQ_NUM_MASK; + + switch (type) { + case KVM_LOONGARCH_IRQ_TYPE_IOAPIC: + if (irq < KVM_IRQCHIP_NUM_PINS) + pch_pic_set_irq(s, irq, level); + else if (irq < 256) + pch_msi_set_irq(kvm, irq, level); + else + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + return ret; +} -- Gitee From 49c918d5544dbb696c1587f640ea3296f5fbdd49 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Sat, 15 Jun 2024 18:59:35 +0800 Subject: [PATCH 11/44] LoongArch: KVM: Add PCHPIC user mode read and write functions Upstream: no Implements the communication interface between the user mode program and the kernel in PCHPIC interrupt control simulation, which is used to obtain or send the simulation data of the interrupt controller in the user mode process, and is used in VM migration or VM saving and restoration. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li --- arch/loongarch/include/uapi/asm/kvm.h | 5 + arch/loongarch/kvm/intc/pch_pic.c | 128 +++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 2b716f352fae..1204c28eaaaf 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -119,4 +119,9 @@ struct kvm_iocsr_entry { #define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000003 +#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL 0x40000004 +#define KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT 0 + +#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS 0x40000005 + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index b1d3d301e02f..556dce6644a2 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -315,16 +315,140 @@ static const struct kvm_io_device_ops kvm_loongarch_pch_pic_ops = { .write = kvm_loongarch_pch_pic_write, }; +static int kvm_loongarch_pch_pic_init(struct kvm_device *dev, u64 addr) +{ + int ret; + struct loongarch_pch_pic *s = dev->kvm->arch.pch_pic; + struct kvm_io_device *device; + struct kvm *kvm = dev->kvm; + + s->pch_pic_base = addr; + device = &s->device; + /* init device by pch pic writing and reading ops */ + kvm_iodevice_init(device, &kvm_loongarch_pch_pic_ops); + mutex_lock(&kvm->slots_lock); + /* register pch pic device */ + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, addr, PCH_PIC_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) + return -EFAULT; + + return 0; +} + +/* used by user space to get or set pch pic registers */ +static int kvm_loongarch_pch_pic_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int addr, len = 8, ret = 0; + void __user *data; + void *p = NULL; + struct loongarch_pch_pic *s; + + s = dev->kvm->arch.pch_pic; + addr = attr->attr; + data = (void __user *)attr->addr; + + spin_lock(&s->lock); + /* get pointer to pch pic register by addr */ + switch (addr) { + case PCH_PIC_MASK_START: + p = &s->mask; + break; + case PCH_PIC_HTMSI_EN_START: + p = &s->htmsi_en; + break; + case PCH_PIC_EDGE_START: + p = &s->edge; + break; + case PCH_PIC_AUTO_CTRL0_START: + p = &s->auto_ctrl0; + break; + case PCH_PIC_AUTO_CTRL1_START: + p = &s->auto_ctrl1; + break; + case PCH_PIC_ROUTE_ENTRY_START: + p = s->route_entry; + len = 64; + break; + case PCH_PIC_HTMSI_VEC_START: + p = s->htmsi_vector; + len = 64; + break; + case PCH_PIC_INT_IRR_START: + p = &s->irr; + break; + case PCH_PIC_INT_ISR_START: + p = &s->isr; + break; + case PCH_PIC_POLARITY_START: + p = &s->polarity; + break; + default: + ret = -EINVAL; + } + + /* write or read value according to is_write */ + if (is_write) { + if (copy_from_user(p, data, len)) + ret = -EFAULT; + } else { + if (copy_to_user(data, p, len)) + ret = -EFAULT; + } + + spin_unlock(&s->lock); + return ret; +} + static int kvm_loongarch_pch_pic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + /* only support pch pic group registers */ + if (attr->group == KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS) + return kvm_loongarch_pch_pic_regs_access(dev, attr, false); + + return -EINVAL; } static int kvm_loongarch_pch_pic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return 0; + int ret = -EINVAL; + u64 addr; + void __user *uaddr = (void __user *)(long)attr->addr; + + switch (attr->group) { + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT: + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + if (!dev->kvm->arch.pch_pic) { + kvm_err("%s: please create pch_pic irqchip first!\n", __func__); + ret = -EFAULT; + break; + } + + ret = kvm_loongarch_pch_pic_init(dev, addr); + break; + default: + kvm_err("%s: unknown group (%d) attr (%lld)\n", __func__, attr->group, + attr->attr); + ret = -EINVAL; + break; + } + break; + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS: + ret = kvm_loongarch_pch_pic_regs_access(dev, attr, true); + break; + default: + break; + } + + return ret; } static void kvm_loongarch_pch_pic_destroy(struct kvm_device *dev) -- Gitee From e66c797131f51dfa22948801f9769e1b42ca98f7 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 3 Jun 2024 15:09:26 +0800 Subject: [PATCH 12/44] LoongArch: KVM: Add irqfd support Upstream: no Enable the KVM_IRQ_ROUTING KVM_IRQCHIP KVM_MSI configuration item, increase the KVM_CAP_IRQCHIP capability, and implement the query interface of the kernel irqchip. Signed-off-by: Xianglai Li --- arch/loongarch/include/uapi/asm/kvm.h | 2 + arch/loongarch/kvm/Kconfig | 4 ++ arch/loongarch/kvm/Makefile | 1 + arch/loongarch/kvm/intc/pch_pic.c | 28 +++++++++ arch/loongarch/kvm/irqfd.c | 87 +++++++++++++++++++++++++++ arch/loongarch/kvm/vm.c | 20 +++++- 6 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/kvm/irqfd.c diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 1204c28eaaaf..834a7003ca97 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -115,6 +115,8 @@ struct kvm_iocsr_entry { #define KVM_IRQCHIP_NUM_PINS 64 #define KVM_MAX_CORES 256 +#define KVM_LOONGARCH_VM_HAVE_IRQCHIP 0x40000001 + #define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000002 #define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000003 diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index fda425babfb2..2f44176a45b5 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -24,6 +24,10 @@ config KVM select HAVE_KVM_DIRTY_RING_ACQ_REL select HAVE_KVM_EVENTFD select HAVE_KVM_VCPU_ASYNC_IOCTL + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD + select HAVE_KVM_MSI select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index 5661f2bc04f6..f363e4b6fcf3 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -21,5 +21,6 @@ kvm-y += vm.o kvm-y += intc/ipi.o kvm-y += intc/extioi.o kvm-y += intc/pch_pic.o +kvm-y += irqfd.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index 556dce6644a2..7d053dbcd5c0 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -451,6 +451,29 @@ static int kvm_loongarch_pch_pic_set_attr(struct kvm_device *dev, return ret; } +static int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + struct kvm_irq_routing_entry *entries; + + u32 nr = KVM_IRQCHIP_NUM_PINS; + int i, ret; + + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + + return 0; +} + static void kvm_loongarch_pch_pic_destroy(struct kvm_device *dev) { struct kvm *kvm; @@ -476,6 +499,7 @@ static void kvm_loongarch_pch_pic_destroy(struct kvm_device *dev) static int kvm_loongarch_pch_pic_create(struct kvm_device *dev, u32 type) { + int ret; struct loongarch_pch_pic *s; struct kvm *kvm = dev->kvm; @@ -483,6 +507,10 @@ static int kvm_loongarch_pch_pic_create(struct kvm_device *dev, u32 type) if (kvm->arch.pch_pic) return -EINVAL; + ret = kvm_setup_default_irq_routing(kvm); + if (ret) + return -ENOMEM; + s = kzalloc(sizeof(struct loongarch_pch_pic), GFP_KERNEL); if (!s) return -ENOMEM; diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c new file mode 100644 index 000000000000..bf67f329ebc9 --- /dev/null +++ b/arch/loongarch/kvm/irqfd.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + /* ioapic pin (0 ~ 64) <---> gsi(0 ~ 64) */ + pch_pic_set_irq(kvm->arch.pch_pic, e->irqchip.pin, level); + + return 0; +} + +/* + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = kvm_set_ioapic_irq; + + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + + if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; + default: + goto out; + } + r = 0; +out: + return r; +} + +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + if (e->type == KVM_IRQ_ROUTING_MSI) { + pch_msi_set_irq(kvm, e->msi.data, 1); + return 0; + } + + return -EWOULDBLOCK; +} + +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + if (!level) + return -1; + + pch_msi_set_irq(kvm, e->msi.data, level); + return 0; +} diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 0ce636ddc2df..e3da437f1b12 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -72,6 +72,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { + case KVM_CAP_IRQCHIP: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: @@ -80,6 +81,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IOEVENTFD: case KVM_CAP_MP_STATE: case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_VM_ATTRIBUTES: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -104,7 +106,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - return -ENOIOCTLCMD; + int r; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: { + r = 1; + break; + } + default: + return -EINVAL; + } + + return r; } int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *data, @@ -138,3 +151,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *data, return ret; } + +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return (bool)((!!kvm->arch.extioi) && (!!kvm->arch.pch_pic)); +} -- Gitee From d51404d4c4cf5853c6361dd02a8e2f9a694b54eb Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 6 May 2024 22:00:47 +0800 Subject: [PATCH 13/44] LoongArch: KVM: Add PV IPI support on host side commit e33bda7ee50c ("LoongArch: KVM: Add PV IPI support on host side") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. On LoongArch system, IPI hw uses iocsr registers. There are one iocsr register access on IPI sending, and two iocsr access on IPI receiving for the IPI interrupt handler. In VM mode all iocsr accessing will cause VM to trap into hypervisor. So with one IPI hw notification there will be three times of trap. In this patch PV IPI is added for VM, hypercall instruction is used for IPI sender, and hypervisor will inject an SWI to the destination vcpu. During the SWI interrupt handler, only CSR.ESTAT register is written to clear irq. CSR.ESTAT register access will not trap into hypervisor, so with PV IPI supported, there is one trap with IPI sender, and no trap with IPI receiver, there is only one trap with IPI notification. Also this patch adds IPI multicast support, the method is similar with x86. With IPI multicast support, IPI notification can be sent to at most 128 vcpus at one time. It greatly reduces the times of trapping into hypervisor. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_para.h | 62 +++++++++++++------------ arch/loongarch/include/asm/kvm_vcpu.h | 10 ++++ arch/loongarch/kvm/exit.c | 67 +++++++++++++-------------- 3 files changed, 75 insertions(+), 64 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 032101b941d9..335fb86778e2 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -7,23 +7,27 @@ */ #define HYPERVISOR_KVM 1 #define HYPERVISOR_VENDOR_SHIFT 8 -#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) -#define KVM_HCALL_CODE_PV_SERVICE 0 +#define HYPERCALL_ENCODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) + +#define KVM_HCALL_CODE_SERVICE 0 #define KVM_HCALL_CODE_SWDBG 1 -#define KVM_HCALL_PV_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE) -#define KVM_HCALL_FUNC_PV_IPI 1 + +#define KVM_HCALL_SERVICE HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE) +#define KVM_HCALL_FUNC_IPI 1 #define KVM_HCALL_FUNC_NOTIFY 2 -#define KVM_HCALL_SWDBG HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) + +#define KVM_HCALL_SWDBG HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) /* * LoongArch hypercall return code */ -#define KVM_HCALL_STATUS_SUCCESS 0 +#define KVM_HCALL_SUCCESS 0 #define KVM_HCALL_INVALID_CODE -1UL #define KVM_HCALL_INVALID_PARAMETER -2UL #define KVM_STEAL_PHYS_VALID BIT_ULL(0) #define KVM_STEAL_PHYS_MASK GENMASK_ULL(63, 6) + struct kvm_steal_time { __u64 steal; __u32 version; @@ -36,16 +40,16 @@ struct kvm_steal_time { * * a0: function identifier * a1-a6: args - * Return value will be placed in v0. + * Return value will be placed in a0. * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. */ -static __always_inline long kvm_hypercall(u64 fid) +static __always_inline long kvm_hypercall0(u64 fid) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r" (fun) : "memory" @@ -56,12 +60,12 @@ static __always_inline long kvm_hypercall(u64 fid) static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r" (fun), "r" (a1) : "memory" @@ -73,17 +77,17 @@ static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0) static __always_inline long kvm_hypercall2(u64 fid, unsigned long arg0, unsigned long arg1) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; register unsigned long a2 asm("a2") = arg1; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) - : "=r" (ret) - : "r" (fun), "r" (a1), "r" (a2) - : "memory" - ); + "hvcl "__stringify(KVM_HCALL_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2) + : "memory" + ); return ret; } @@ -91,14 +95,14 @@ static __always_inline long kvm_hypercall2(u64 fid, static __always_inline long kvm_hypercall3(u64 fid, unsigned long arg0, unsigned long arg1, unsigned long arg2) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; register unsigned long a2 asm("a2") = arg1; register unsigned long a3 asm("a3") = arg2; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r" (fun), "r" (a1), "r" (a2), "r" (a3) : "memory" @@ -108,10 +112,10 @@ static __always_inline long kvm_hypercall3(u64 fid, } static __always_inline long kvm_hypercall4(u64 fid, - unsigned long arg0, unsigned long arg1, unsigned long arg2, - unsigned long arg3) + unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; register unsigned long a2 asm("a2") = arg1; @@ -119,7 +123,7 @@ static __always_inline long kvm_hypercall4(u64 fid, register unsigned long a4 asm("a4") = arg3; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4) : "memory" @@ -129,10 +133,10 @@ static __always_inline long kvm_hypercall4(u64 fid, } static __always_inline long kvm_hypercall5(u64 fid, - unsigned long arg0, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4) + unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, unsigned long arg4) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; register unsigned long a2 asm("a2") = arg1; @@ -141,7 +145,7 @@ static __always_inline long kvm_hypercall5(u64 fid, register unsigned long a5 asm("a5") = arg4; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5) : "memory" @@ -150,7 +154,6 @@ static __always_inline long kvm_hypercall5(u64 fid, return ret; } - static inline unsigned int kvm_arch_para_features(void) { return 0; @@ -165,4 +168,5 @@ static inline bool kvm_check_and_clear_guest_paused(void) { return false; } + #endif /* _ASM_LOONGARCH_KVM_PARA_H */ diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 1da24994b838..d83b0515840a 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -112,4 +112,14 @@ static inline int kvm_queue_exception(struct kvm_vcpu *vcpu, return -1; } +static inline unsigned long kvm_read_reg(struct kvm_vcpu *vcpu, int num) +{ + return vcpu->arch.gprs[num]; +} + +static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long val) +{ + vcpu->arch.gprs[num] = val; +} + #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */ diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 74028ad40b24..e903753e44de 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -160,6 +160,9 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) run->iocsr_io.len = 8; run->iocsr_io.is_write = 1; break; + case CPUCFG_KVM_FEATURE: + vcpu->arch.gprs[rd] = KVM_FEATURE_IPI; + break; default: ret = EMULATE_FAIL; return ret; @@ -761,29 +764,26 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } -static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu) +static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu) { - unsigned long ipi_bitmap; unsigned int min, cpu, i; + unsigned long ipi_bitmap; struct kvm_vcpu *dest; - min = vcpu->arch.gprs[LOONGARCH_GPR_A3]; + min = kvm_read_reg(vcpu, LOONGARCH_GPR_A3); for (i = 0; i < 2; i++, min += BITS_PER_LONG) { - ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i]; + ipi_bitmap = kvm_read_reg(vcpu, LOONGARCH_GPR_A1 + i); if (!ipi_bitmap) continue; cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG); while (cpu < BITS_PER_LONG) { dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min); - cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, - cpu + 1); + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1); if (!dest) continue; - /* - * Send SWI0 to dest vcpu to emulate IPI interrupt - */ + /* Send SWI0 to dest vcpu to emulate IPI interrupt */ kvm_queue_irq(dest, INT_SWI0); kvm_vcpu_kick(dest); } @@ -792,6 +792,27 @@ static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu) return 0; } +/* + * Hypercall emulation always return to guest, Caller should check retval. + */ +static void kvm_handle_service(struct kvm_vcpu *vcpu) +{ + unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0); + long ret; + + switch (func) { + case KVM_HCALL_FUNC_IPI: + kvm_send_pv_ipi(vcpu); + ret = KVM_HCALL_SUCCESS; + break; + default: + ret = KVM_HCALL_INVALID_CODE; + break; + }; + + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); +} + static int kvm_save_notify(struct kvm_vcpu *vcpu) { unsigned long id, data; @@ -811,30 +832,6 @@ static int kvm_save_notify(struct kvm_vcpu *vcpu) return 0; }; -/* - * hypercall emulation always return to guest, Caller should check retval. - */ -static void kvm_handle_pv_service(struct kvm_vcpu *vcpu) -{ - unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0]; - long ret; - - switch (func) { - case KVM_HCALL_FUNC_PV_IPI: - kvm_pv_send_ipi(vcpu); - ret = KVM_HCALL_STATUS_SUCCESS; - break; - case KVM_HCALL_FUNC_NOTIFY: - ret = kvm_save_notify(vcpu); - break; - default: - ret = KVM_HCALL_INVALID_CODE; - break; - }; - - vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret; -} - static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) { larch_inst inst; @@ -846,9 +843,9 @@ static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) ret = RESUME_GUEST; switch (code) { - case KVM_HCALL_PV_SERVICE: + case KVM_HCALL_SERVICE: vcpu->stat.hypercall_exits++; - kvm_handle_pv_service(vcpu); + kvm_handle_service(vcpu); break; case KVM_HCALL_SWDBG: /* KVM_HC_SWDBG only in effective when SW_BP is enabled */ -- Gitee From 2feeae72392273df134253e8e51b8a69e7f38890 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 6 May 2024 22:00:47 +0800 Subject: [PATCH 14/44] LoongArch: KVM: Add PV IPI support on guest side commit 74c16b2e2b0c ("LoongArch: KVM: Add PV IPI support on guest side") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. PARAVIRT config option and PV IPI is added for the guest side, function pv_ipi_init() is used to add IPI sending and IPI receiving hooks. This function firstly checks whether system runs in VM mode, and if kernel runs in VM mode, it will call function kvm_para_available() to detect the current hypervirsor type (now only KVM type detection is supported). The paravirt functions can work only if current hypervisor type is KVM, since there is only KVM supported on LoongArch now. PV IPI uses virtual IPI sender and virtual IPI receiver functions. With virtual IPI sender, IPI message is stored in memory rather than emulated HW. IPI multicast is also supported, and 128 vcpus can received IPIs at the same time like X86 KVM method. Hypercall method is used for IPI sending. With virtual IPI receiver, HW SWI0 is used rather than real IPI HW. Since VCPU has separate HW SWI0 like HW timer, there is no trap in IPI interrupt acknowledge. Since IPI message is stored in memory, there is no trap in getting IPI message. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index a3b39567442f..b29a0943cbd6 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -619,6 +619,15 @@ config RANDOMIZE_BASE_MAX_OFFSET This is limited by the size of the lower address memory, 256MB. +config PARAVIRT + bool "Enable paravirtualization code" + depends on AS_HAS_LVZ_EXTENSION + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + endmenu config ARCH_SELECT_MEMORY_MODEL -- Gitee From 3843c19c13473c3bd153ca45230731f740fa4016 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 6 May 2024 22:00:47 +0800 Subject: [PATCH 15/44] LoongArch: KVM: Add software breakpoint support commit 163e9fc6957f ("LoongArch: KVM: Add software breakpoint support") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. When VM runs in kvm mode, system will not exit to host mode when executing a general software breakpoint instruction such as INSN_BREAK, trap exception happens in guest mode rather than host mode. In order to debug guest kernel on host side, one mechanism should be used to let VM exit to host mode. Here a hypercall instruction with a special code is used for software breakpoint usage. VM exits to host mode and kvm hypervisor identifies the special hypercall code and sets exit_reason with KVM_EXIT_DEBUG. And then let qemu handle it. Idea comes from ppc kvm, one api KVM_REG_LOONGARCH_DEBUG_INST is added to get the hypercall code. VMM needs get sw breakpoint instruction with this api and set the corresponding sw break point for guest kernel. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 5 +++++ arch/loongarch/kvm/exit.c | 12 ++++++------ arch/loongarch/kvm/vcpu.c | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 20a53ff6de3b..6a150214c15e 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -35,6 +35,11 @@ #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(1) +#define KVM_GUESTDBG_SW_BP_MASK \ + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP) +#define KVM_GUESTDBG_VALID_MASK \ + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) + /* KVM_IRQ_LINE irq field index values */ #define KVM_LOONGARCH_IRQ_TYPE_SHIFT 24 #define KVM_LOONGARCH_IRQ_TYPE_MASK 0xff diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index e903753e44de..7a132ce416b4 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -834,9 +834,9 @@ static int kvm_save_notify(struct kvm_vcpu *vcpu) static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) { + int ret; larch_inst inst; unsigned int code; - int ret; inst.word = vcpu->arch.badi; code = inst.reg0i15_format.immediate; @@ -848,13 +848,13 @@ static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) kvm_handle_service(vcpu); break; case KVM_HCALL_SWDBG: - /* KVM_HC_SWDBG only in effective when SW_BP is enabled */ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + /* KVM_HCALL_SWDBG only in effective when SW_BP is enabled */ + if (vcpu->guest_debug & KVM_GUESTDBG_SW_BP_MASK) { vcpu->run->exit_reason = KVM_EXIT_DEBUG; ret = RESUME_HOST; - } else - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; - break; + break; + } + fallthrough; default: /* Treat it as noop intruction, only set return value */ vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 91f3f13c693d..4d8fdd350f91 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -782,7 +782,7 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu, *v = drdtime() + vcpu->kvm->arch.time_offset; break; case KVM_REG_LOONGARCH_DEBUG_INST: - *v = INSN_HVCL + KVM_HCALL_SWDBG; + *v = INSN_HVCL | KVM_HCALL_SWDBG; break; default: ret = -EINVAL; -- Gitee From 8f9e3139c13a26dc96fe5c02daebdb8761e6eca5 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 6 May 2024 22:00:47 +0800 Subject: [PATCH 16/44] LoongArch: KVM: Add mmio trace events support commit 7b7e584f90bf ("LoongArch: KVM: Add mmio trace events support") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Add mmio trace events support, currently generic mmio events KVM_TRACE_MMIO_WRITE/xxx_READ/xx_READ_UNSATISFIED are added here. Also vcpu id field is added for all kvm trace events, since perf KVM tool parses vcpu id information for kvm entry event. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/exit.c | 8 ++++++++ arch/loongarch/kvm/trace.h | 20 ++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 7a132ce416b4..48a59a906b4b 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -484,6 +485,8 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) vcpu->arch.io_gpr = rd; run->mmio.is_write = 0; vcpu->mmio_is_write = 0; + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, run->mmio.len, + run->mmio.phys_addr, NULL); return EMULATE_DO_MMIO; } @@ -531,6 +534,9 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run) break; } + trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, + run->mmio.phys_addr, run->mmio.data); + return er; } @@ -642,6 +648,8 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) run->mmio.is_write = 1; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, run->mmio.len, + run->mmio.phys_addr, data); return EMULATE_DO_MMIO; } diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index c2484ad4cffa..1783397b1bc8 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -19,14 +19,16 @@ DECLARE_EVENT_CLASS(kvm_transition, TP_PROTO(struct kvm_vcpu *vcpu), TP_ARGS(vcpu), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned long, pc) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->pc = vcpu->arch.pc; ), - TP_printk("PC: 0x%08lx", __entry->pc) + TP_printk("vcpu %u PC: 0x%08lx", __entry->vcpu_id, __entry->pc) ); DEFINE_EVENT(kvm_transition, kvm_enter, @@ -54,19 +56,22 @@ DECLARE_EVENT_CLASS(kvm_exit, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), TP_ARGS(vcpu, reason), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned long, pc) __field(unsigned int, reason) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->pc = vcpu->arch.pc; __entry->reason = reason; ), - TP_printk("[%s]PC: 0x%08lx", - __print_symbolic(__entry->reason, - kvm_trace_symbol_exit_types), - __entry->pc) + TP_printk("vcpu %u [%s] PC: 0x%08lx", + __entry->vcpu_id, + __print_symbolic(__entry->reason, + kvm_trace_symbol_exit_types), + __entry->pc) ); DEFINE_EVENT(kvm_exit, kvm_exit_idle, @@ -85,14 +90,17 @@ TRACE_EVENT(kvm_exit_gspr, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int inst_word), TP_ARGS(vcpu, inst_word), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned int, inst_word) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->inst_word = inst_word; ), - TP_printk("Inst word: 0x%08x", __entry->inst_word) + TP_printk("vcpu %u Inst word: 0x%08x", __entry->vcpu_id, + __entry->inst_word) ); #define KVM_TRACE_AUX_SAVE 0 -- Gitee From 81ff3da8319c38917e2210dce10e6c2044133375 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:50 +0800 Subject: [PATCH 17/44] LoongArch: KVM: Sync pending interrupt when getting ESTAT from user mode commit e306e514906c ("LoongArch: KVM: Sync pending interrupt when getting") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Currently interrupts are posted and cleared with the asynchronous mode, meanwhile they are saved in SW state vcpu::arch::irq_pending and vcpu:: arch::irq_clear. When vcpu is ready to run, pending interrupt is written back to CSR.ESTAT register from SW state vcpu::arch::irq_pending at the guest entrance. During VM migration stage, vcpu is put into stopped state, however pending interrupts are not synced to CSR.ESTAT register. So there will be interrupt lost when VCPU is migrated to another host machines. Here in this patch when ESTAT CSR register is read from VMM user mode, pending interrupts are synchronized to ESTAT also. So that VMM can get correct pending interrupts. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/vcpu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 4d8fdd350f91..ab5fc86d8a1a 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -508,6 +508,17 @@ static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) return -EINVAL; if (id == LOONGARCH_CSR_ESTAT) { + preempt_disable(); + vcpu_load(vcpu); + /* + * Sync pending interrupts into ESTAT so that interrupt + * remains during VM migration stage + */ + kvm_deliver_intr(vcpu); + vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; + vcpu_put(vcpu); + preempt_enable(); + /* ESTAT IP0~IP7 get from GINTC */ gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff; *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2); -- Gitee From c51efd285529de2761abba49ce7f55c610077d88 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:50 +0800 Subject: [PATCH 18/44] LoongArch: KVM: Delay secondary mmu tlb flush until guest entry commit b5d4e2325db2 ("LoongArch: KVM: Delay secondary mmu tlb flush until guest entry") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. With hardware assisted virtualization, there are two level HW mmu, one is GVA to GPA mapping, the other is GPA to HPA mapping which is called secondary mmu in generic. If there is page fault for secondary mmu, there needs tlb flush operation indexed with fault GPA address and VMID. VMID is stored at register CSR.GSTAT and will be reload or recalculated before guest entry. Currently CSR.GSTAT is not saved and restored during VCPU context switch, instead it is recalculated during guest entry. So CSR.GSTAT is effective only when a VCPU runs in guest mode, however it may not be effective if the VCPU exits to host mode. Since register CSR.GSTAT may be stale, it may records the VMID of the last schedule-out VCPU, rather than the current VCPU. Function kvm_flush_tlb_gpa() should be called with its real VMID, so here move it to the guest entrance. Also an arch-specific request id KVM_REQ_TLB_FLUSH_GPA is added to flush tlb for secondary mmu, and it can be optimized if VMID is updated, since all guest tlb entries will be invalid if VMID is updated. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 2 ++ arch/loongarch/kvm/main.c | 1 + arch/loongarch/kvm/mmu.c | 3 ++- arch/loongarch/kvm/tlb.c | 5 +---- arch/loongarch/kvm/vcpu.c | 18 ++++++++++++++++++ 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 6a150214c15e..2794c11dd86f 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -33,6 +33,7 @@ #define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(1) #define KVM_GUESTDBG_SW_BP_MASK \ @@ -230,6 +231,7 @@ struct kvm_vcpu_arch { /* vcpu's vpid */ u64 vpid; + gpa_t flush_gpa; /* Frequency of stable timer in Hz */ u64 timer_mhz; diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 285bd4126e54..1f50f6723739 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -244,6 +244,7 @@ void kvm_check_vpid(struct kvm_vcpu *vcpu) kvm_update_vpid(vcpu, cpu); trace_kvm_vpid_change(vcpu, vcpu->arch.vpid); vcpu->cpu = cpu; + kvm_clear_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); } /* Restore GSTAT(0x50).vpid */ diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 915f17527893..03379ebd8f6a 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -940,7 +940,8 @@ int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) return ret; /* Invalidate this entry in the TLB */ - kvm_flush_tlb_gpa(vcpu, gpa); + vcpu->arch.flush_gpa = gpa; + kvm_make_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); return 0; } diff --git a/arch/loongarch/kvm/tlb.c b/arch/loongarch/kvm/tlb.c index 02535df6b51f..ebdbe9264e9c 100644 --- a/arch/loongarch/kvm/tlb.c +++ b/arch/loongarch/kvm/tlb.c @@ -23,10 +23,7 @@ void kvm_flush_tlb_all(void) void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa) { - unsigned long flags; - - local_irq_save(flags); + lockdep_assert_irqs_disabled(); gpa &= (PAGE_MASK << 1); invtlb(INVTLB_GID_ADDR, read_csr_gstat() & CSR_GSTAT_GID, gpa); - local_irq_restore(flags); } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index ab5fc86d8a1a..41315d9d0c8b 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -288,6 +288,16 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static void kvm_late_check_requests(struct kvm_vcpu *vcpu) +{ + lockdep_assert_irqs_disabled(); + if (kvm_check_request(KVM_REQ_TLB_FLUSH_GPA, vcpu)) + if (vcpu->arch.flush_gpa != INVALID_GPA) { + kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa); + vcpu->arch.flush_gpa = INVALID_GPA; + } +} + /* * Check and handle pending signal and vCPU requests etc * Run with irq enabled and preempt enabled @@ -339,6 +349,13 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) smp_store_mb(vcpu->mode, IN_GUEST_MODE); kvm_check_vpid(vcpu); kvm_check_pmu(vcpu); + + /* + * Called after function kvm_check_vpid() + * Since it updates CSR.GSTAT used by kvm_flush_tlb_gpa(), + * and it may also clear KVM_REQ_TLB_FLUSH_GPA pending bit + */ + kvm_late_check_requests(vcpu); vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY); /* Clear KVM_LARCH_SWCSR_LATEST as CSR will change when enter guest */ vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; @@ -1293,6 +1310,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct loongarch_csrs *csr; vcpu->arch.vpid = 0; + vcpu->arch.flush_gpa = INVALID_GPA; hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); vcpu->arch.swtimer.function = kvm_swtimer_wakeup; -- Gitee From dde72983e179d0510df9ae6e394ce2012279daa6 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:51 +0800 Subject: [PATCH 19/44] LoongArch: KVM: Select huge page only if secondary mmu supports it commit 2f56f9ea4dc3 ("LoongArch: KVM: Select huge page only if secondary mmu supports it") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Currently page level selection about secondary mmu depends on memory slot and page level about host mmu. There will be problems if page level of secondary mmu is zero already. Huge page cannot be selected if there is normal page mapped in secondary mmu already, since it is not supported to merge normal pages into huge pages now. So page level selection should depend on the following three conditions. 1. Memslot is aligned for huge page and vm is not migrating. 2. Page level of host mmu is also huge page. 3. Page level of secondary mmu is suituable for huge page. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/mmu.c | 16 +++++++++++++--- arch/loongarch/kvm/vcpu.c | 1 - 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 03379ebd8f6a..67d92b38b0a9 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -890,10 +890,20 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) /* Disable dirty logging on HugePages */ level = 0; - if (!fault_supports_huge_mapping(memslot, hva, write)) { - level = 0; - } else { + if (fault_supports_huge_mapping(memslot, hva, write)) { + /* Check page level about host mmu*/ level = host_pfn_mapping_level(kvm, gfn, memslot); + if (level == 1) { + /* + * Check page level about secondary mmu + * Disable hugepage if it is normal page on + * secondary mmu already + */ + ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); + if (ptep && !kvm_pte_huge(*ptep)) + level = 0; + } + if (level == 1) { gfn = gfn & ~(PTRS_PER_PTE - 1); pfn = pfn & ~(PTRS_PER_PTE - 1); diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 41315d9d0c8b..781640239d16 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1403,7 +1403,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Control guest page CCA attribute */ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); - kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); /* Restore hardware PMU CSRs */ kvm_restore_pmu(vcpu); -- Gitee From 31ea5df6ddb174a63fc415d8fa6bc7dab82ec9c4 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:51 +0800 Subject: [PATCH 20/44] LoongArch: KVM: Discard dirty page tracking on readonly memslot commit b072cbf0233b ("LoongArch: KVM: Discard dirty page tracking on readonly memslot") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. For readonly memslot such as UEFI BIOS or UEFI var space, guest cannot write this memory space directly. So it is not necessary to track dirty pages for readonly memslot. Here we make such optimization in function kvm_arch_commit_memory_region(). Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/mmu.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 67d92b38b0a9..0a1a8f49cc81 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -444,6 +444,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change) { int needs_flush; + u32 old_flags = old ? old->flags : 0; + u32 new_flags = new ? new->flags : 0; + bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES; + + /* Only track memslot flags changed */ + if (change != KVM_MR_FLAGS_ONLY) + return; + + /* Discard dirty page tracking on readonly memslot */ + if ((old_flags & new_flags) & KVM_MEM_READONLY) + return; /* * If dirty page logging is enabled, write protect all pages in the slot @@ -454,9 +465,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * MOVE/DELETE: The old mappings will already have been cleaned up by * kvm_arch_flush_shadow_memslot() */ - if (change == KVM_MR_FLAGS_ONLY && - (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && - new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + if (!(old_flags & KVM_MEM_LOG_DIRTY_PAGES) && log_dirty_pages) { spin_lock(&kvm->mmu_lock); /* Write protect GPA page table entries */ needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn, -- Gitee From d4a91b84976299f48c0d712eb4937589a23333ed Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:51 +0800 Subject: [PATCH 21/44] LoongArch: KVM: Add memory barrier before update pmd entry commit 32d4b999dade ("LoongArch: KVM: Add memory barrier before update pmd entry") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. When updating pmd entry such as allocating new pmd page or splitting huge page into normal page, it is necessary to firstly update all pte entries, and then update pmd entry. It is weak order with LoongArch system, there will be problem if other VCPUs see pmd update firstly while ptes are not updated. Here smp_wmb() is added to assure this. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 0a1a8f49cc81..a3c7c1059f70 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -163,6 +163,7 @@ static kvm_pte_t *kvm_populate_gpa(struct kvm *kvm, child = kvm_mmu_memory_cache_alloc(cache); _kvm_pte_init(child, ctx.invalid_ptes[ctx.level - 1]); + smp_wmb(); /* Make pte visible before pmd */ kvm_set_pte(entry, __pa(child)); } else if (kvm_pte_huge(*entry)) { return entry; @@ -778,6 +779,7 @@ static kvm_pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, kvm_pte_t *ptep, gfn_t g val += PAGE_SIZE; } + smp_wmb(); /* Make pte visible before pmd */ /* The later kvm_flush_tlb_gpa() will flush hugepage tlb */ kvm_set_pte(ptep, __pa(child)); -- Gitee From d7330a20b9c339dba8ed0066a65b373879fbe356 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:51 +0800 Subject: [PATCH 22/44] LoongArch: KVM: Add dirty bitmap initially all set support commit 8c3470425270 ("LoongArch: KVM: Add dirty bitmap initially all set support") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Add KVM_DIRTY_LOG_INITIALLY_SET support on LoongArch system, this feature comes from other architectures like x86 and arm64. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 4 ++++ arch/loongarch/kvm/mmu.c | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 2794c11dd86f..00b3d7fe62d1 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -59,6 +59,10 @@ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) + +#define KVM_DIRTY_LOG_MANUAL_CAPS \ + (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET) + struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 pages; diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index a3c7c1059f70..fa2306c2df4b 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -467,6 +467,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * kvm_arch_flush_shadow_memslot() */ if (!(old_flags & KVM_MEM_LOG_DIRTY_PAGES) && log_dirty_pages) { + /* + * Initially-all-set does not require write protecting any page + * because they're all assumed to be dirty. + */ + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + return; + spin_lock(&kvm->mmu_lock); /* Write protect GPA page table entries */ needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn, -- Gitee From 9de6a601bbac820c94e7382d0adde0d917f5bb58 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:51 +0800 Subject: [PATCH 23/44] LoongArch: KVM: Mark page accessed and dirty with page ref added commit ebf00272da5c ("LoongArch: KVM: Mark page accessed and dirty with page ref added") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Function kvm_map_page_fast() is fast path of secondary mmu page fault flow, pfn is parsed from secondary mmu page table walker. However the corresponding page reference is not added, it is dangerious to access page out of mmu_lock. Here page ref is added inside mmu_lock, function kvm_set_pfn_accessed() and kvm_set_pfn_dirty() is called with page ref added, so that the page will not be freed by others. Also kvm_set_pfn_accessed() is removed here since it is called in the following function kvm_release_pfn_clean(). Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/mmu.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index fa2306c2df4b..539f03e7cf42 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -589,6 +589,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; struct kvm_memory_slot *slot; + struct page *page; spin_lock(&kvm->mmu_lock); @@ -631,19 +632,22 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ if (changed) { kvm_set_pte(ptep, new); pfn = kvm_pte_pfn(new); + page = kvm_pfn_to_refcounted_page(pfn); + if (page) + get_page(page); } spin_unlock(&kvm->mmu_lock); - /* - * Fixme: pfn may be freed after mmu_lock - * kvm_try_get_pfn(pfn)/kvm_release_pfn pair to prevent this? - */ - if (kvm_pte_young(changed)) - kvm_set_pfn_accessed(pfn); + if (changed) { + if (kvm_pte_young(changed)) + kvm_set_pfn_accessed(pfn); - if (kvm_pte_dirty(changed)) { - mark_page_dirty(kvm, gfn); - kvm_set_pfn_dirty(pfn); + if (kvm_pte_dirty(changed)) { + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + if (page) + put_page(page); } return ret; out: @@ -952,7 +956,6 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) kvm_set_pfn_dirty(pfn); } - kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); out: srcu_read_unlock(&kvm->srcu, srcu_idx); -- Gitee From ff28a7b56807438e9db2d0407e9d918e73960cf9 Mon Sep 17 00:00:00 2001 From: Jia Qingtong Date: Tue, 9 Jul 2024 16:25:51 +0800 Subject: [PATCH 24/44] LoongArch: KVM: always make pte young in page map's fast path commit d7ad41a31d91 ("LoongArch: KVM: always make pte young in page map's fast path") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. It seems redundant to check if pte is young before the call to kvm_pte_mkyoung() in kvm_map_page_fast(). Just remove the check. Reviewed-by: Bibo Mao Signed-off-by: Jia Qingtong Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/mmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 539f03e7cf42..d312921f3ab9 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -601,10 +601,8 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ } /* Track access to pages marked old */ - new = *ptep; - if (!kvm_pte_young(new)) - new = kvm_pte_mkyoung(new); - /* call kvm_set_pfn_accessed() after unlock */ + new = kvm_pte_mkyoung(*ptep); + /* call kvm_set_pfn_accessed() after unlock */ if (write && !kvm_pte_dirty(new)) { if (!kvm_pte_write(new)) { -- Gitee From b6fbeefb066d9bb19f40ff3c50ac0d59856ef552 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:51 +0800 Subject: [PATCH 25/44] LoongArch: KVM: Add PV steal time support in host side commit b4ba157044ea ("LoongArch: KVM: Add PV steal time support in host side") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Add ParaVirt steal time feature in host side, VM can search supported features provided by KVM hypervisor, a feature KVM_FEATURE_STEAL_TIME is added here. Like x86, steal time structure is saved in guest memory, one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to enable this feature. One CPU attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to save and restore the base address of steal time structure when a VM is migrated. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 2 +- arch/loongarch/include/asm/kvm_para.h | 1 + arch/loongarch/include/asm/kvm_vcpu.h | 5 ++ arch/loongarch/kvm/Kconfig | 1 + arch/loongarch/kvm/exit.c | 55 ++++++++-------- arch/loongarch/kvm/vcpu.c | 95 +++++++++++++++++++++------ 6 files changed, 113 insertions(+), 46 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 00b3d7fe62d1..0f456b53f368 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) -#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(1) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(1) #define KVM_GUESTDBG_SW_BP_MASK \ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 335fb86778e2..a0ed1aacb8b4 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -35,6 +35,7 @@ struct kvm_steal_time { __u32 pad[12]; }; + /* * Hypercall interface for KVM hypervisor * diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index d83b0515840a..ad6a31f01c56 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -122,4 +122,9 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v vcpu->arch.gprs[num] = val; } +static inline bool kvm_pvtime_supported(void) +{ + return !!sched_info_on(); +} + #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */ diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 2f44176a45b5..461a465e49fd 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -32,6 +32,7 @@ config KVM select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO select KVM_XFER_TO_GUEST_WORK + select SCHED_INFO select MMU_NOTIFIER select PREEMPT_NOTIFIERS help diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 48a59a906b4b..8e415503e8e8 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -259,12 +259,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) vcpu->arch.gprs[rd] = 0; break; case CPUCFG_KVM_FEATURE: - ret = 0; - if ((plv & CSR_CRMD_PLV) == PLV_KERN) { - ret = KVM_FEATURE_PV_IPI; - if (sched_info_on()) - ret |= KVM_FEATURE_STEAL_TIME; - } + ret = KVM_FEATURE_IPI; + if (kvm_pvtime_supported()) + ret |= KVM_FEATURE_STEAL_TIME; vcpu->arch.gprs[rd] = ret; break; default: @@ -742,6 +739,31 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static long kvm_save_notify(struct kvm_vcpu *vcpu) +{ + unsigned long id, data; + + id = kvm_read_reg(vcpu, LOONGARCH_GPR_A1); + data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2); + switch (id) { + case BIT(KVM_FEATURE_STEAL_TIME): + if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) + return KVM_HCALL_INVALID_PARAMETER; + + vcpu->arch.st.guest_addr = data; + if (!(data & KVM_STEAL_PHYS_VALID)) + return 0; + + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + return 0; + default: + return KVM_HCALL_INVALID_CODE; + }; + + return KVM_HCALL_INVALID_CODE; +}; + /* * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. * @vcpu: Virtual CPU context. @@ -821,25 +843,6 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu) kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); } -static int kvm_save_notify(struct kvm_vcpu *vcpu) -{ - unsigned long id, data; - - id = vcpu->arch.gprs[LOONGARCH_GPR_A1]; - data = vcpu->arch.gprs[LOONGARCH_GPR_A2]; - switch (id) { - case KVM_FEATURE_STEAL_TIME: - vcpu->arch.st.guest_addr = data; - vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); - break; - default: - break; - }; - - return 0; -}; - static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) { int ret; @@ -865,7 +868,7 @@ static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) fallthrough; default: /* Treat it as noop intruction, only set return value */ - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE); break; } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 781640239d16..bb60f063e3e2 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -33,12 +33,12 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) { + u32 version; + u64 steal; + gpa_t gpa; + struct kvm_memslots *slots; struct kvm_steal_time __user *st; struct gfn_to_hva_cache *ghc; - struct kvm_memslots *slots; - gpa_t gpa; - u64 steal; - u32 version; ghc = &vcpu->arch.st.cache; gpa = vcpu->arch.st.guest_addr; @@ -48,8 +48,7 @@ static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) gpa &= KVM_STEAL_PHYS_MASK; slots = kvm_memslots(vcpu->kvm); if (slots->generation != ghc->generation || gpa != ghc->gpa) { - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, - sizeof(*st))) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st))) { ghc->gpa = INVALID_GPA; return; } @@ -58,19 +57,17 @@ static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) st = (struct kvm_steal_time __user *)ghc->hva; unsafe_get_user(version, &st->version, out); if (version & 1) - version += 1; + version += 1; /* first time write, random junk */ + version += 1; unsafe_put_user(version, &st->version, out); - /* Make sure st->version is written first */ smp_wmb(); unsafe_get_user(steal, &st->steal, out); - steal += current->sched_info.run_delay - - vcpu->arch.st.last_steal; + steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; unsafe_put_user(steal, &st->steal, out); - /* Make sure st->steal is written first */ smp_wmb(); version += 1; unsafe_put_user(version, &st->version, out); @@ -78,11 +75,6 @@ static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } -static bool kvm_pvtime_supported(void) -{ - return !!sched_info_on(); -} - static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -282,7 +274,7 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) if (kvm_dirty_ring_check_request(vcpu)) return RESUME_HOST; - if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) kvm_update_stolen_time(vcpu); return RESUME_GUEST; @@ -969,6 +961,16 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, return -ENXIO; } +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + return 0; +} + static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -988,7 +990,7 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, return ret; } -static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, +static int kvm_loongarch_cpucfg_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int ret = 0; @@ -1004,6 +1006,23 @@ static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, return ret; } +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 gpa; + u64 __user *user = (u64 __user *)attr->addr; + + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + gpa = vcpu->arch.st.guest_addr; + if (put_user(gpa, user)) + return -EFAULT; + + return 0; +} + static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -1011,7 +1030,7 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_LOONGARCH_VCPU_CPUCFG: - ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); + ret = kvm_loongarch_cpucfg_get_attr(vcpu, attr); break; case KVM_LOONGARCH_VCPU_PVTIME_CTRL: ret = kvm_loongarch_pvtime_get_attr(vcpu, attr); @@ -1029,6 +1048,43 @@ static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu, return -ENXIO; } +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int idx, ret = 0; + u64 gpa, __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + + if (!kvm_pvtime_supported() || + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + if (get_user(gpa, user)) + return -EFAULT; + + if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) + return -EINVAL; + + if (!(gpa & KVM_STEAL_PHYS_VALID)) { + vcpu->arch.st.guest_addr = gpa; + return 0; + } + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) { + vcpu->arch.st.guest_addr = gpa; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + } + + return ret; +} + static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -1403,6 +1459,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Control guest page CCA attribute */ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); /* Restore hardware PMU CSRs */ kvm_restore_pmu(vcpu); -- Gitee From c555314c6840cb0351667d467da714fdc9080ecc Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 9 Jul 2024 16:25:51 +0800 Subject: [PATCH 26/44] LoongArch: KVM: Add PV steal time support in guest side commit 03779999ac30 ("LoongArch: KVM: Add PV steal time support in guest side") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Per-cpu struct kvm_steal_time is added here, its size is 64 bytes and also defined as 64 bytes, so that the whole structure is in one physical page. When a VCPU is online, function pv_enable_steal_time() is called. This function will pass guest physical address of struct kvm_steal_time and tells hypervisor to enable steal time. When a vcpu is offline, physical address is set as 0 and tells hypervisor to disable steal time. Here is an output of vmstat on guest when there is workload on both host and guest. It shows steal time stat information. procs -----------memory---------- -----io---- -system-- ------cpu----- r b swpd free inact active bi bo in cs us sy id wa st 15 1 0 7583616 184112 72208 20 0 162 52 31 6 43 0 20 17 0 0 7583616 184704 72192 0 0 6318 6885 5 60 8 5 22 16 0 0 7583616 185392 72144 0 0 1766 1081 0 49 0 1 50 16 0 0 7583616 184816 72304 0 0 6300 6166 4 62 12 2 20 18 0 0 7583632 184480 72240 0 0 2814 1754 2 58 4 1 35 Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- .../admin-guide/kernel-parameters.txt | 7 +- arch/loongarch/Kconfig | 22 ++ arch/loongarch/kernel/paravirt.c | 286 +++++++++++++++++- 3 files changed, 311 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5102b9fd8da4..805636deeaaf 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3948,9 +3948,10 @@ vulnerability. System may allow data leaks with this option. - no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized - steal time accounting. steal time is computed, but - won't influence scheduler behaviour + no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,LOONGARCH,EARLY] + Disable paravirtualized steal time accounting. steal time + is computed, but won't influence scheduler behaviour + nosync [HW,M68K] Disables sync negotiation for all devices. diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index b29a0943cbd6..8c1854ef9561 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -628,6 +628,17 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + endmenu config ARCH_SELECT_MEMORY_MODEL @@ -680,6 +691,17 @@ source "drivers/cpufreq/Kconfig" source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + endmenu source "arch/loongarch/kvm/Kconfig" diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index 56182c64ab38..f55b7e027d46 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -8,10 +8,10 @@ #include #include +static int has_steal_clock; struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); -static int has_steal_clock; static u64 native_steal_clock(int cpu) { @@ -71,6 +71,62 @@ static int pv_register_steal_time(void) return 0; } +static bool steal_acc = true; + +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} +early_param("no-steal-acc", parse_no_stealacc); + +static u64 paravt_steal_clock(int cpu) +{ + int version; + u64 steal; + struct kvm_steal_time *src; + + src = &per_cpu(steal_time, cpu); + do { + + version = src->version; + virt_rmb(); /* Make sure that the version is read before the steal */ + steal = src->steal; + virt_rmb(); /* Make sure that the steal is read before the next version */ + + } while ((version & 1) || (version != src->version)); + + return steal; +} + +static bool steal_acc = true; + +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} +early_param("no-steal-acc", parse_no_stealacc); + +static u64 paravt_steal_clock(int cpu) +{ + int version; + u64 steal; + struct kvm_steal_time *src; + + src = &per_cpu(steal_time, cpu); + do { + + version = src->version; + virt_rmb(); /* Make sure that the version is read before the steal */ + steal = src->steal; + virt_rmb(); /* Make sure that the steal is read before the next version */ + + } while ((version & 1) || (version != src->version)); + + return steal; +} + #ifdef CONFIG_SMP static void pv_send_ipi_single(int cpu, unsigned int action) { @@ -279,3 +335,231 @@ int __init pv_time_init(void) pr_info("Using stolen time PV\n"); return 0; } + +static int pv_enable_steal_time(void) +{ + int cpu = smp_processor_id(); + unsigned long addr; + struct kvm_steal_time *st; + + if (!has_steal_clock) + return -EPERM; + + st = &per_cpu(steal_time, cpu); + addr = per_cpu_ptr_to_phys(st); + + /* The whole structure kvm_steal_time should be in one page */ + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { + pr_warn("Illegal PV steal time addr %lx\n", addr); + return -EFAULT; + } + + addr |= KVM_STEAL_PHYS_VALID; + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr); + + return 0; +} + +static void pv_disable_steal_time(void) +{ + if (has_steal_clock) + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0); +} + +#ifdef CONFIG_SMP +static int pv_time_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_enable_steal_time(); + local_irq_restore(flags); + + return 0; +} + +static int pv_time_cpu_down_prepare(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_disable_steal_time(); + local_irq_restore(flags); + + return 0; +} +#endif + +static void pv_cpu_reboot(void *unused) +{ + pv_disable_steal_time(); +} + +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + on_each_cpu(pv_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block pv_reboot_nb = { + .notifier_call = pv_reboot_notify, +}; + +int __init pv_time_init(void) +{ + int r, feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + if (!(feature & KVM_FEATURE_STEAL_TIME)) + return 0; + + has_steal_clock = 1; + r = pv_enable_steal_time(); + if (r < 0) { + has_steal_clock = 0; + return 0; + } + register_reboot_notifier(&pv_reboot_nb); + +#ifdef CONFIG_SMP + r = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "loongarch/pv_time:online", + pv_time_cpu_online, pv_time_cpu_down_prepare); + if (r < 0) { + has_steal_clock = 0; + pr_err("Failed to install cpu hotplug callbacks\n"); + return r; + } +#endif + + static_call_update(pv_steal_clock, paravt_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); +#endif + + pr_info("Using paravirt steal-time\n"); + + return 0; +} + +static int pv_enable_steal_time(void) +{ + int cpu = smp_processor_id(); + unsigned long addr; + struct kvm_steal_time *st; + + if (!has_steal_clock) + return -EPERM; + + st = &per_cpu(steal_time, cpu); + addr = per_cpu_ptr_to_phys(st); + + /* The whole structure kvm_steal_time should be in one page */ + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { + pr_warn("Illegal PV steal time addr %lx\n", addr); + return -EFAULT; + } + + addr |= KVM_STEAL_PHYS_VALID; + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr); + + return 0; +} + +static void pv_disable_steal_time(void) +{ + if (has_steal_clock) + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0); +} + +#ifdef CONFIG_SMP +static int pv_time_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_enable_steal_time(); + local_irq_restore(flags); + + return 0; +} + +static int pv_time_cpu_down_prepare(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_disable_steal_time(); + local_irq_restore(flags); + + return 0; +} +#endif + +static void pv_cpu_reboot(void *unused) +{ + pv_disable_steal_time(); +} + +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + on_each_cpu(pv_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block pv_reboot_nb = { + .notifier_call = pv_reboot_notify, +}; + +int __init pv_time_init(void) +{ + int r, feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + if (!(feature & KVM_FEATURE_STEAL_TIME)) + return 0; + + has_steal_clock = 1; + r = pv_enable_steal_time(); + if (r < 0) { + has_steal_clock = 0; + return 0; + } + register_reboot_notifier(&pv_reboot_nb); + +#ifdef CONFIG_SMP + r = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "loongarch/pv_time:online", + pv_time_cpu_online, pv_time_cpu_down_prepare); + if (r < 0) { + has_steal_clock = 0; + pr_err("Failed to install cpu hotplug callbacks\n"); + return r; + } +#endif + + static_call_update(pv_steal_clock, paravt_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); +#endif + + pr_info("Using paravirt steal-time\n"); + + return 0; +} -- Gitee From f4a7ec461375e0a7ad7a6061b3133b1f26c0546a Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 10 Jul 2024 16:50:27 +0800 Subject: [PATCH 27/44] perf kvm: Add kvm-stat for loongarch64 commit 492ac37fa38f ("perf kvm: Add kvm-stat for loongarch64") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Add support for 'perf kvm stat' on loongarch64 platform, now only kvm exit event is supported. Here is example output about "perf kvm --host stat report" command Event name Samples Sample% Time (ns) Time% Mean Time (ns) Mem Store 83969 51.00% 625697070 8.00% 7451 Mem Read 37641 22.00% 112485730 1.00% 2988 Interrupt 15542 9.00% 20620190 0.00% 1326 IOCSR 15207 9.00% 94296190 1.00% 6200 Hypercall 4873 2.00% 12265280 0.00% 2516 Idle 3713 2.00% 6322055860 87.00% 1702681 FPU 1819 1.00% 2750300 0.00% 1511 Inst Fetch 502 0.00% 1341740 0.00% 2672 Mem Modify 324 0.00% 602240 0.00% 1858 CPUCFG 55 0.00% 77610 0.00% 1411 CSR 12 0.00% 19690 0.00% 1640 LASX 3 0.00% 4870 0.00% 1623 LSX 2 0.00% 2100 0.00% 1050 Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- tools/perf/arch/loongarch/Makefile | 1 + tools/perf/arch/loongarch/util/Build | 2 + tools/perf/arch/loongarch/util/header.c | 96 +++++++++++++++ tools/perf/arch/loongarch/util/kvm-stat.c | 139 ++++++++++++++++++++++ 4 files changed, 238 insertions(+) create mode 100644 tools/perf/arch/loongarch/util/header.c create mode 100644 tools/perf/arch/loongarch/util/kvm-stat.c diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile index c392e7af4743..c8be64c5cdb4 100644 --- a/tools/perf/arch/loongarch/Makefile +++ b/tools/perf/arch/loongarch/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 +HAVE_KVM_STAT_SUPPORT := 1 # # Syscall table generation for perf diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build index d776125a2d06..b12d374d7096 100644 --- a/tools/perf/arch/loongarch/util/Build +++ b/tools/perf/arch/loongarch/util/Build @@ -1,5 +1,7 @@ +perf-y += header.o perf-y += perf_regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o diff --git a/tools/perf/arch/loongarch/util/header.c b/tools/perf/arch/loongarch/util/header.c new file mode 100644 index 000000000000..d962dff55512 --- /dev/null +++ b/tools/perf/arch/loongarch/util/header.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of get_cpuid(). + * + * Author: Nikita Shubin + * Bibo Mao + * Huacai Chen + */ + +#include +#include +#include +#include +#include "util/debug.h" +#include "util/header.h" + +/* + * Output example from /proc/cpuinfo + * CPU Family : Loongson-64bit + * Model Name : Loongson-3C5000 + * CPU Revision : 0x10 + * FPU Revision : 0x01 + */ +#define CPUINFO_MODEL "Model Name" +#define CPUINFO "/proc/cpuinfo" + +static char *_get_field(const char *line) +{ + char *line2, *nl; + + line2 = strrchr(line, ' '); + if (!line2) + return NULL; + + line2++; + nl = strrchr(line, '\n'); + if (!nl) + return NULL; + + return strndup(line2, nl - line2); +} + +static char *_get_cpuid(void) +{ + unsigned long line_sz; + char *line, *model, *cpuid; + FILE *file; + + file = fopen(CPUINFO, "r"); + if (file == NULL) + return NULL; + + line = model = cpuid = NULL; + while (getline(&line, &line_sz, file) != -1) { + if (strncmp(line, CPUINFO_MODEL, strlen(CPUINFO_MODEL))) + continue; + + model = _get_field(line); + if (!model) + goto out_free; + break; + } + + if (model && (asprintf(&cpuid, "%s", model) < 0)) + cpuid = NULL; + +out_free: + fclose(file); + free(model); + return cpuid; +} + +int get_cpuid(char *buffer, size_t sz) +{ + int ret = 0; + char *cpuid = _get_cpuid(); + + if (!cpuid) + return EINVAL; + + if (sz < strlen(cpuid)) { + ret = ENOBUFS; + goto out_free; + } + + scnprintf(buffer, sz, "%s", cpuid); + +out_free: + free(cpuid); + return ret; +} + +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + return _get_cpuid(); +} diff --git a/tools/perf/arch/loongarch/util/kvm-stat.c b/tools/perf/arch/loongarch/util/kvm-stat.c new file mode 100644 index 000000000000..a7859a3a9a51 --- /dev/null +++ b/tools/perf/arch/loongarch/util/kvm-stat.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "util/kvm-stat.h" +#include "util/parse-events.h" +#include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/pmus.h" + +#define LOONGARCH_EXCEPTION_INT 0 +#define LOONGARCH_EXCEPTION_PIL 1 +#define LOONGARCH_EXCEPTION_PIS 2 +#define LOONGARCH_EXCEPTION_PIF 3 +#define LOONGARCH_EXCEPTION_PME 4 +#define LOONGARCH_EXCEPTION_FPD 15 +#define LOONGARCH_EXCEPTION_SXD 16 +#define LOONGARCH_EXCEPTION_ASXD 17 +#define LOONGARCH_EXCEPTION_GSPR 22 +#define LOONGARCH_EXCEPTION_CPUCFG 100 +#define LOONGARCH_EXCEPTION_CSR 101 +#define LOONGARCH_EXCEPTION_IOCSR 102 +#define LOONGARCH_EXCEPTION_IDLE 103 +#define LOONGARCH_EXCEPTION_OTHERS 104 +#define LOONGARCH_EXCEPTION_HVC 23 + +#define loongarch_exception_type \ + {LOONGARCH_EXCEPTION_INT, "Interrupt" }, \ + {LOONGARCH_EXCEPTION_PIL, "Mem Read" }, \ + {LOONGARCH_EXCEPTION_PIS, "Mem Store" }, \ + {LOONGARCH_EXCEPTION_PIF, "Inst Fetch" }, \ + {LOONGARCH_EXCEPTION_PME, "Mem Modify" }, \ + {LOONGARCH_EXCEPTION_FPD, "FPU" }, \ + {LOONGARCH_EXCEPTION_SXD, "LSX" }, \ + {LOONGARCH_EXCEPTION_ASXD, "LASX" }, \ + {LOONGARCH_EXCEPTION_GSPR, "Privilege Error" }, \ + {LOONGARCH_EXCEPTION_HVC, "Hypercall" }, \ + {LOONGARCH_EXCEPTION_CPUCFG, "CPUCFG" }, \ + {LOONGARCH_EXCEPTION_CSR, "CSR" }, \ + {LOONGARCH_EXCEPTION_IOCSR, "IOCSR" }, \ + {LOONGARCH_EXCEPTION_IDLE, "Idle" }, \ + {LOONGARCH_EXCEPTION_OTHERS, "Others" } + +define_exit_reasons_table(loongarch_exit_reasons, loongarch_exception_type); + +const char *vcpu_id_str = "vcpu_id"; +const char *kvm_exit_reason = "reason"; +const char *kvm_entry_trace = "kvm:kvm_enter"; +const char *kvm_reenter_trace = "kvm:kvm_reenter"; +const char *kvm_exit_trace = "kvm:kvm_exit"; +const char *kvm_events_tp[] = { + "kvm:kvm_enter", + "kvm:kvm_reenter", + "kvm:kvm_exit", + "kvm:kvm_exit_gspr", + NULL, +}; + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + return exit_event_begin(evsel, sample, key); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + /* + * LoongArch kvm is different with other architectures + * + * There is kvm:kvm_reenter or kvm:kvm_enter event adjacent with + * kvm:kvm_exit event. + * kvm:kvm_enter means returning to vmm and then to guest + * kvm:kvm_reenter means returning to guest immediately + */ + return evsel__name_is(evsel, kvm_entry_trace) || evsel__name_is(evsel, kvm_reenter_trace); +} + +static void event_gspr_get_key(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + unsigned int insn; + + key->key = LOONGARCH_EXCEPTION_OTHERS; + insn = evsel__intval(evsel, sample, "inst_word"); + + switch (insn >> 24) { + case 0: + /* CPUCFG inst trap */ + if ((insn >> 10) == 0x1b) + key->key = LOONGARCH_EXCEPTION_CPUCFG; + break; + case 4: + /* CSR inst trap */ + key->key = LOONGARCH_EXCEPTION_CSR; + break; + case 6: + /* IOCSR inst trap */ + if ((insn >> 15) == 0xc90) + key->key = LOONGARCH_EXCEPTION_IOCSR; + else if ((insn >> 15) == 0xc91) + /* Idle inst trap */ + key->key = LOONGARCH_EXCEPTION_IDLE; + break; + default: + key->key = LOONGARCH_EXCEPTION_OTHERS; + break; + } +} + +static struct child_event_ops child_events[] = { + { .name = "kvm:kvm_exit_gspr", .get_key = event_gspr_get_key }, + { NULL, NULL }, +}; + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .child_ops = child_events, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events, }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "loongarch64"; + kvm->exit_reasons = loongarch_exit_reasons; + return 0; +} -- Gitee From 134cd49c08f0884c1387da206d799d22d969dcfa Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 13 Jun 2024 20:28:03 +0800 Subject: [PATCH 28/44] KVM: Discard zero mask with function kvm_dirty_ring_reset commit 676f819c3e98 ("KVM: Discard zero mask with function kvm_dirty_ring_reset") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Function kvm_reset_dirty_gfn may be called with parameters cur_slot / cur_offset / mask are all zero, it does not represent real dirty page. It is not necessary to clear dirty page in this condition. Also return value of macro __fls() is undefined if mask is zero which is called in funciton kvm_reset_dirty_gfn(). Here just return. Signed-off-by: Bibo Mao Message-ID: <20240613122803.1031511-1-maobibo@loongson.cn> [Move the conditional inside kvm_reset_dirty_gfn; suggested by Sean Christopherson. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Xianglai Li --- virt/kvm/dirty_ring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index c1cd7dfe4a90..27e50190d419 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -55,6 +55,9 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) struct kvm_memory_slot *memslot; int as_id, id; + if (!mask) + return; + as_id = slot >> 16; id = (u16)slot; -- Gitee From 2205e133b66d1c1b4dac5bc70c9ae6ff062710c8 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 26 Aug 2024 23:11:32 +0800 Subject: [PATCH 29/44] LoongArch: KVM: Invalidate guest steal time address on vCPU reset commit 4956e07f05e2 ("LoongArch: KVM: Invalidate guest steal time address on vCPU reset") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. If ParaVirt steal time feature is enabled, there is a percpu gpa address passed from guest vCPU and host modifies guest memory space with this gpa address. When vCPU is reset normally, it will notify host and invalidate gpa address. However if VM is crashed and VMM reboots VM forcely, the vCPU reboot notification callback will not be called in VM. Host needs invalidate the gpa address, else host will modify guest memory during VM reboots. Here it is invalidated from the vCPU KVM_REG_LOONGARCH_VCPU_RESET ioctl interface. Also funciton kvm_reset_timer() is removed at vCPU reset stage, since SW emulated timer is only used in vCPU block state. When a vCPU is removed from the block waiting queue, kvm_restore_timer() is called and SW timer is cancelled. And the timer register is also cleared at VMM when a vCPU is reset. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_vcpu.h | 1 - arch/loongarch/kvm/timer.c | 7 ------- arch/loongarch/kvm/vcpu.c | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index ad6a31f01c56..3f9c7dc6cfdb 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -78,7 +78,6 @@ static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } int kvm_own_pmu(struct kvm_vcpu *vcpu); void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); -void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); void kvm_restore_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index bcc6b6d063d9..74a4b5c272d6 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -188,10 +188,3 @@ void kvm_save_timer(struct kvm_vcpu *vcpu) kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT); preempt_enable(); } - -void kvm_reset_timer(struct kvm_vcpu *vcpu) -{ - write_gcsr_timercfg(0); - kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG, 0); - hrtimer_cancel(&vcpu->arch.swtimer); -} diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index bb60f063e3e2..c2789e28461b 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -870,7 +870,7 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->kvm->arch.time_offset = (signed long)(v - drdtime()); break; case KVM_REG_LOONGARCH_VCPU_RESET: - kvm_reset_timer(vcpu); + vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); break; -- Gitee From b62160a3c583c94aef7edc75502cbd70ca30a7a1 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 11 Sep 2024 23:26:32 +0800 Subject: [PATCH 30/44] LoongArch: Revert qspinlock to test-and-set simple lock on VM commit e5ba90abb2eb ("LoongArch: Revert qspinlock to test-and-set simple lock on VM") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Similar with x86, when VM is detected, revert to a simple test-and-set lock to avoid the horrors of queue preemption. Tested on 3C5000 Dual-way machine with 32 cores and 2 numa nodes, test case is kcbench on kernel mainline 6.10, the detailed command is "kcbench --src /root/src/linux" Performance on host machine kernel compile time performance impact Original 150.29 seconds With patch 150.19 seconds almost no impact Performance on virtual machine: 1. 1 VM with 32 vCPUs and 2 numa node, numa node pinned kernel compile time performance impact Original 170.87 seconds With patch 171.73 seconds almost no impact 2. 2 VMs, each VM with 32 vCPUs and 2 numa node, numa node pinned kernel compile time performance impact Original 2362.04 seconds With patch 354.73 seconds +565% Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/Kbuild | 1 - arch/loongarch/include/asm/paravirt.h | 7 +++++ arch/loongarch/include/asm/qspinlock.h | 41 ++++++++++++++++++++++++++ arch/loongarch/kernel/paravirt.c | 11 +++++++ arch/loongarch/kernel/setup.c | 2 ++ arch/loongarch/kernel/smp.c | 4 ++- 6 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 arch/loongarch/include/asm/qspinlock.h diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 27f66930ab6a..22991a6f0e2b 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -4,7 +4,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += user.h diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h index fe27fb5e82b8..dabc5aec179c 100644 --- a/arch/loongarch/include/asm/paravirt.h +++ b/arch/loongarch/include/asm/paravirt.h @@ -18,6 +18,7 @@ static inline u64 paravirt_steal_clock(int cpu) int pv_ipi_init(void); int __init pv_time_init(void); +int __init pv_spinlock_init(void); #else static inline int pv_ipi_init(void) { @@ -28,5 +29,11 @@ static inline int pv_time_init(void) { return 0; } + +static inline int pv_spinlock_init(void) +{ + return 0; +} + #endif // CONFIG_PARAVIRT #endif diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h new file mode 100644 index 000000000000..e76d3aa1e1eb --- /dev/null +++ b/arch/loongarch/include/asm/qspinlock.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_QSPINLOCK_H +#define _ASM_LOONGARCH_QSPINLOCK_H + +#include + +#ifdef CONFIG_PARAVIRT + +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); + +#define virt_spin_lock virt_spin_lock + +static inline bool virt_spin_lock(struct qspinlock *lock) +{ + int val; + + if (!static_branch_unlikely(&virt_spin_lock_key)) + return false; + + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + +__retry: + val = atomic_read(&lock->val); + + if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { + cpu_relax(); + goto __retry; + } + + return true; +} + +#endif /* CONFIG_PARAVIRT */ + +#include + +#endif // _ASM_LOONGARCH_QSPINLOCK_H diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index f55b7e027d46..658333b889b8 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -12,6 +12,7 @@ static int has_steal_clock; struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); +DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); static u64 native_steal_clock(int cpu) { @@ -563,3 +564,13 @@ int __init pv_time_init(void) return 0; } + +int __init pv_spinlock_init(void) +{ + if (!cpu_has_hypervisor) + return 0; + + static_branch_enable(&virt_spin_lock_key); + + return 0; +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index fe7b6e055829..6ccffb2ab7c8 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -665,6 +665,8 @@ void __init setup_arch(char **cmdline_p) arch_mem_init(cmdline_p); resource_init(); + jump_label_init(); /* Initialise the static keys for paravirtualization */ + #ifdef CONFIG_SMP plat_smp_setup(); prefill_possible_map(); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index bd0bd3decd32..8af0398dd786 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -477,7 +477,7 @@ core_initcall(ipi_pm_init); #endif /* Preload SMP state for boot cpu */ -void smp_prepare_boot_cpu(void) +void __init smp_prepare_boot_cpu(void) { unsigned int cpu, node, rr_node; @@ -510,6 +510,8 @@ void smp_prepare_boot_cpu(void) rr_node = next_node_in(rr_node, node_online_map); } } + + pv_spinlock_init(); } /* called from main before smp_init() */ -- Gitee From cb9d43cc467454bad3e7e69cca1082fd355b04b3 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 11 Sep 2024 23:26:32 +0800 Subject: [PATCH 31/44] LoongArch: KVM: Add VM feature detection function commit a53f48b6327c ("LoongArch: KVM: Add VM feature detection function") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Loongson SIMD Extension (LSX), Loongson Advanced SIMD Extension (LASX) and Loongson Binary Translation (LBT) features are defined in register CPUCFG2. Two kinds of LSX/LASX/LBT feature detection are added here, one is VCPU feature, and the other is VM feature. VCPU feature dection can only work with VCPU thread itself, and requires VCPU thread is created already. So LSX/LASX/LBT feature detection for VM is added also, it can be done even if VM is not created, and also can be done by any threads besides VCPU threads. Here ioctl command KVM_HAS_DEVICE_ATTR is added for VM, and macro KVM_LOONGARCH_VM_FEAT_CTRL is added to check supported feature. And five sub-features relative with LSX/LASX/LBT are added as following: KVM_LOONGARCH_VM_FEAT_LSX KVM_LOONGARCH_VM_FEAT_LASX KVM_LOONGARCH_VM_FEAT_X86BT KVM_LOONGARCH_VM_FEAT_ARMBT KVM_LOONGARCH_VM_FEAT_MIPSBT Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/uapi/asm/kvm.h | 8 +++++ arch/loongarch/kvm/vcpu.c | 6 ++++ arch/loongarch/kvm/vm.c | 49 +++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 834a7003ca97..a9e165fac034 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -85,6 +85,14 @@ struct kvm_fpu { #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) +/* Device Control API on vm fd */ +#define KVM_LOONGARCH_VM_FEAT_CTRL 0 +#define KVM_LOONGARCH_VM_FEAT_LSX 0 +#define KVM_LOONGARCH_VM_FEAT_LASX 1 +#define KVM_LOONGARCH_VM_FEAT_X86BT 2 +#define KVM_LOONGARCH_VM_FEAT_ARMBT 3 +#define KVM_LOONGARCH_VM_FEAT_MIPSBT 4 + /* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 #define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index c2789e28461b..27bdd0d24bf3 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -698,6 +698,12 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) *v |= CPUCFG2_LSX; if (cpu_has_lasx) *v |= CPUCFG2_LASX; + if (cpu_has_lbt_x86) + *v |= CPUCFG2_X86BT; + if (cpu_has_lbt_arm) + *v |= CPUCFG2_ARMBT; + if (cpu_has_lbt_mips) + *v |= CPUCFG2_MIPSBT; return 0; case LOONGARCH_CPUCFG3: diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index e3da437f1b12..486b48510414 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -104,15 +104,64 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } +static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_LOONGARCH_VM_FEAT_LSX: + if (cpu_has_lsx) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_LASX: + if (cpu_has_lasx) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_X86BT: + if (cpu_has_lbt_x86) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_ARMBT: + if (cpu_has_lbt_arm) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_MIPSBT: + if (cpu_has_lbt_mips) + return 0; + return -ENXIO; + default: + return -ENXIO; + } +} + +static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_LOONGARCH_VM_FEAT_CTRL: + return kvm_vm_feature_has_attr(kvm, attr); + case KVM_LOONGARCH_VM_HAVE_IRQCHIP: + return 0; + default: + return -ENXIO; + } +} + int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { int r; + void __user *argp = (void __user *)arg; + struct kvm *kvm = filp->private_data; + struct kvm_device_attr attr; switch (ioctl) { case KVM_CREATE_IRQCHIP: { r = 1; break; } + case KVM_HAS_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_has_attr(kvm, &attr); + } default: return -EINVAL; } -- Gitee From 8cf2a52eba50de4547ca28458150352874466790 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 11 Sep 2024 23:26:32 +0800 Subject: [PATCH 32/44] LoongArch: KVM: Add Binary Translation extension support commit b67ee19a907d ("LoongArch: KVM: Add Binary Translation extension support") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Loongson Binary Translation (LBT) is used to accelerate binary translation, which contains 4 scratch registers (scr0 to scr3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop). Like FPU extension, here a lazy enabling method is used for LBT. the LBT context is saved/restored on the vcpu context switch path. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 13 ++++-- arch/loongarch/include/asm/kvm_vcpu.h | 6 +++ arch/loongarch/kvm/exit.c | 9 ++++ arch/loongarch/kvm/vcpu.c | 61 ++++++++++++++++++++++++++- 4 files changed, 84 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 0f456b53f368..e665907ab64d 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -170,10 +170,9 @@ enum emulation_result { #define KVM_LARCH_FPU (0x1 << 0) #define KVM_LARCH_LSX (0x1 << 1) #define KVM_LARCH_LASX (0x1 << 2) -#define KVM_LARCH_SWCSR_LATEST (0x1 << 3) -#define KVM_LARCH_HWCSR_USABLE (0x1 << 4) -#define KVM_GUEST_PMU_ENABLE (0x1 << 5) -#define KVM_GUEST_PMU_ACTIVE (0x1 << 6) +#define KVM_LARCH_LBT (0x1 << 3) +#define KVM_LARCH_SWCSR_LATEST (0x1 << 4) +#define KVM_LARCH_HWCSR_USABLE (0x1 << 5) struct kvm_vcpu_arch { /* @@ -207,6 +206,7 @@ struct kvm_vcpu_arch { /* FPU state */ struct loongarch_fpu fpu FPU_ALIGN; + struct loongarch_lbt lbt; /* CSR state */ struct loongarch_csrs *csr; @@ -305,6 +305,11 @@ static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) return (arch->cpucfg[LOONGARCH_CPUCFG6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; } +static inline bool kvm_guest_has_lbt(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[2] & (CPUCFG2_X86BT | CPUCFG2_ARMBT | CPUCFG2_MIPSBT); +} + /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 3f9c7dc6cfdb..fb1200d6dd66 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -77,6 +77,12 @@ static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } int kvm_own_pmu(struct kvm_vcpu *vcpu); +#ifdef CONFIG_CPU_HAS_LBT +int kvm_own_lbt(struct kvm_vcpu *vcpu); +#else +static inline int kvm_own_lbt(struct kvm_vcpu *vcpu) { return -EINVAL; } +#endif + void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); void kvm_save_timer(struct kvm_vcpu *vcpu); void kvm_restore_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 8e415503e8e8..47ab50253f82 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -794,6 +794,14 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu) +{ + if (kvm_own_lbt(vcpu)) + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + + return RESUME_GUEST; +} + static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu) { unsigned int min, cpu, i; @@ -908,6 +916,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_FPDIS] = kvm_handle_fpu_disabled, [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled, + [EXCCODE_BTDIS] = kvm_handle_lbt_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, [EXCCODE_HVC] = kvm_handle_hypercall, }; diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 27bdd0d24bf3..c8711ac6576d 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1207,12 +1208,66 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } +#ifdef CONFIG_CPU_HAS_LBT +int kvm_own_lbt(struct kvm_vcpu *vcpu) +{ + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + set_csr_euen(CSR_EUEN_LBTEN); + _restore_lbt(&vcpu->arch.lbt); + vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + preempt_enable(); + + return 0; +} + +static void kvm_lose_lbt(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) { + _save_lbt(&vcpu->arch.lbt); + clear_csr_euen(CSR_EUEN_LBTEN); + vcpu->arch.aux_inuse &= ~KVM_LARCH_LBT; + } + preempt_enable(); +} + +static void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) +{ + /* + * If TM is enabled, top register save/restore will + * cause lbt exception, here enable lbt in advance + */ + if (fcsr & FPU_CSR_TM) + kvm_own_lbt(vcpu); +} + +static void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) + return; + kvm_check_fcsr(vcpu, read_fcsr(LOONGARCH_FCSR0)); + } +} +#else +static inline void kvm_lose_lbt(struct kvm_vcpu *vcpu) { } +static inline void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) { } +static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { } +#endif + /* Enable FPU and restore context */ void kvm_own_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); - /* Enable FPU */ + /* + * Enable FPU for guest + * Set FR and FRE according to guest context + */ + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_FPEN); kvm_restore_fpu(&vcpu->arch.fpu); @@ -1232,6 +1287,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu) preempt_disable(); /* Enable LSX for guest */ + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN); switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { case KVM_LARCH_FPU: @@ -1266,6 +1322,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu) preempt_disable(); + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) { case KVM_LARCH_LSX: @@ -1297,6 +1354,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); + kvm_check_fcsr_alive(vcpu); if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) { kvm_save_lasx(&vcpu->arch.fpu); vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX); @@ -1319,6 +1377,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) /* Disable FPU */ clear_csr_euen(CSR_EUEN_FPEN); } + kvm_lose_lbt(vcpu); preempt_enable(); } -- Gitee From c82fe2a57d909467c1f716b7052193cdcbb35163 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 11 Sep 2024 23:26:32 +0800 Subject: [PATCH 33/44] LoongArch: KVM: Add vm migration support for LBT registers commit acc7f20d54a3 ("LoongArch: KVM: Add vm migration support for LBT registers") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Every vcpu has separate LBT registers. And there are four scr registers, one flags and ftop register for LBT extension. When VM migrates, VMM needs to get LBT registers for every vcpu. Here macro KVM_REG_LOONGARCH_LBT is added for new vcpu lbt register type, the following macro is added to get/put LBT registers. KVM_REG_LOONGARCH_LBT_SCR0 KVM_REG_LOONGARCH_LBT_SCR1 KVM_REG_LOONGARCH_LBT_SCR2 KVM_REG_LOONGARCH_LBT_SCR3 KVM_REG_LOONGARCH_LBT_EFLAGS KVM_REG_LOONGARCH_LBT_FTOP Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/uapi/asm/kvm.h | 9 +++++ arch/loongarch/kvm/vcpu.c | 56 +++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index a9e165fac034..b4c88fe8ad21 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -67,6 +67,7 @@ struct kvm_fpu { #define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL) #define KVM_REG_LOONGARCH_FPSIMD (KVM_REG_LOONGARCH | 0x30000ULL) #define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL) +#define KVM_REG_LOONGARCH_LBT (KVM_REG_LOONGARCH | 0x50000ULL) #define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL) #define KVM_CSR_IDX_MASK 0x7fff #define KVM_CPUCFG_IDX_MASK 0x7fff @@ -80,6 +81,14 @@ struct kvm_fpu { /* Debugging: Special instruction for software breakpoint */ #define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) +/* LBT registers */ +#define KVM_REG_LOONGARCH_LBT_SCR0 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 1) +#define KVM_REG_LOONGARCH_LBT_SCR1 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 2) +#define KVM_REG_LOONGARCH_LBT_SCR2 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 3) +#define KVM_REG_LOONGARCH_LBT_SCR3 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 4) +#define KVM_REG_LOONGARCH_LBT_EFLAGS (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 5) +#define KVM_REG_LOONGARCH_LBT_FTOP (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 6) + #define LOONGARCH_REG_SHIFT 3 #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index c8711ac6576d..ffe531483ab4 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -803,6 +803,34 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu, else ret = -EINVAL; break; + case KVM_REG_LOONGARCH_LBT: + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -ENXIO; + + switch (reg->id) { + case KVM_REG_LOONGARCH_LBT_SCR0: + *v = vcpu->arch.lbt.scr0; + break; + case KVM_REG_LOONGARCH_LBT_SCR1: + *v = vcpu->arch.lbt.scr1; + break; + case KVM_REG_LOONGARCH_LBT_SCR2: + *v = vcpu->arch.lbt.scr2; + break; + case KVM_REG_LOONGARCH_LBT_SCR3: + *v = vcpu->arch.lbt.scr3; + break; + case KVM_REG_LOONGARCH_LBT_EFLAGS: + *v = vcpu->arch.lbt.eflags; + break; + case KVM_REG_LOONGARCH_LBT_FTOP: + *v = vcpu->arch.fpu.ftop; + break; + default: + ret = -EINVAL; + break; + } + break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { case KVM_REG_LOONGARCH_COUNTER: @@ -866,6 +894,34 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, 2 * kvm_get_pmu_num(&vcpu->arch) + 1; } break; + case KVM_REG_LOONGARCH_LBT: + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -ENXIO; + + switch (reg->id) { + case KVM_REG_LOONGARCH_LBT_SCR0: + vcpu->arch.lbt.scr0 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR1: + vcpu->arch.lbt.scr1 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR2: + vcpu->arch.lbt.scr2 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR3: + vcpu->arch.lbt.scr3 = v; + break; + case KVM_REG_LOONGARCH_LBT_EFLAGS: + vcpu->arch.lbt.eflags = v; + break; + case KVM_REG_LOONGARCH_LBT_FTOP: + vcpu->arch.fpu.ftop = v; + break; + default: + ret = -EINVAL; + break; + } + break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { case KVM_REG_LOONGARCH_COUNTER: -- Gitee From 5e9fc6f050bc5be7fe54a630235654e27cddd34e Mon Sep 17 00:00:00 2001 From: Song Gao Date: Thu, 13 Jun 2024 20:05:39 +0800 Subject: [PATCH 34/44] LoongArch: KVM: Add PMU support for guest commit f4e40ea9f78f ("LoongArch: KVM: Add PMU support for guest") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. On LoongArch, the host and guest have their own PMU CSRs registers and they share PMU hardware resources. A set of PMU CSRs consists of a CTRL register and a CNTR register. We can set which PMU CSRs are used by the guest by writing to the GCFG register [24:26] bits. On KVM side: - Save the host PMU CSRs into structure kvm_context. - If the host supports the PMU feature. - When entering guest mode, save the host PMU CSRs and restore the guest PMU CSRs. - When exiting guest mode, save the guest PMU CSRs and restore the host PMU CSRs. Reviewed-by: Bibo Mao Signed-off-by: Song Gao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_csr.h | 7 +- arch/loongarch/include/asm/kvm_host.h | 21 +-- arch/loongarch/include/uapi/asm/kvm.h | 1 + arch/loongarch/kvm/vcpu.c | 243 ++++++++++---------------- arch/loongarch/kvm/vm.c | 4 + 5 files changed, 109 insertions(+), 167 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_csr.h b/arch/loongarch/include/asm/kvm_csr.h index 4f8cfd6ebec0..c3d3a6359ab8 100644 --- a/arch/loongarch/include/asm/kvm_csr.h +++ b/arch/loongarch/include/asm/kvm_csr.h @@ -181,6 +181,7 @@ __BUILD_GCSR_OP(tlbidx) #define kvm_save_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_read(gid)) #define kvm_restore_hw_gcsr(csr, gid) (gcsr_write(csr->csrs[gid], gid)) + #define kvm_read_clear_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_write(0, gid)) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu); @@ -215,9 +216,7 @@ static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr, CSR_PERFCTRL_PLV2 | \ CSR_PERFCTRL_PLV3) -#define KVM_PMU_EVENT_ENABLED (CSR_PERFCTRL_PLV0 | \ - CSR_PERFCTRL_PLV1 | \ - CSR_PERFCTRL_PLV2 | \ - CSR_PERFCTRL_PLV3) +#define KVM_PMU_EVENT_ENABLED (CSR_PERFCTRL_PLV0 | CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | CSR_PERFCTRL_PLV3) #endif /* __ASM_LOONGARCH_KVM_CSR_H__ */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index e665907ab64d..03cdc6312af1 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(1) +#define KVM_REQ_PMU KVM_ARCH_REQ(2) #define KVM_GUESTDBG_SW_BP_MASK \ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP) @@ -90,12 +91,11 @@ struct kvm_arch_memory_slot { unsigned long flags; }; -#define KVM_REQ_PMU KVM_ARCH_REQ(0) #define HOST_MAX_PMNUM 16 struct kvm_context { unsigned long vpid_cache; struct kvm_vcpu *last_vcpu; - /* Save host pmu csr */ + /* Host PMU CSR */ u64 perf_ctrl[HOST_MAX_PMNUM]; u64 perf_cntr[HOST_MAX_PMNUM]; }; @@ -171,8 +171,9 @@ enum emulation_result { #define KVM_LARCH_LSX (0x1 << 1) #define KVM_LARCH_LASX (0x1 << 2) #define KVM_LARCH_LBT (0x1 << 3) -#define KVM_LARCH_SWCSR_LATEST (0x1 << 4) -#define KVM_LARCH_HWCSR_USABLE (0x1 << 5) +#define KVM_LARCH_PMU (0x1 << 4) +#define KVM_LARCH_SWCSR_LATEST (0x1 << 5) +#define KVM_LARCH_HWCSR_USABLE (0x1 << 6) struct kvm_vcpu_arch { /* @@ -295,19 +296,19 @@ static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; } -static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch) +static inline bool kvm_guest_has_lbt(struct kvm_vcpu_arch *arch) { - return arch->cpucfg[LOONGARCH_CPUCFG6] & CPUCFG6_PMP; + return arch->cpucfg[2] & (CPUCFG2_X86BT | CPUCFG2_ARMBT | CPUCFG2_MIPSBT); } -static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) +static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch) { - return (arch->cpucfg[LOONGARCH_CPUCFG6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; + return arch->cpucfg[6] & CPUCFG6_PMP; } -static inline bool kvm_guest_has_lbt(struct kvm_vcpu_arch *arch) +static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) { - return arch->cpucfg[2] & (CPUCFG2_X86BT | CPUCFG2_ARMBT | CPUCFG2_MIPSBT); + return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; } /* Debug: dump vcpu state */ diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index b4c88fe8ad21..359fad4cfa83 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -101,6 +101,7 @@ struct kvm_fpu { #define KVM_LOONGARCH_VM_FEAT_X86BT 2 #define KVM_LOONGARCH_VM_FEAT_ARMBT 3 #define KVM_LOONGARCH_VM_FEAT_MIPSBT 4 +#define KVM_LOONGARCH_VM_FEAT_PMU 5 /* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index ffe531483ab4..6124eefdc8a9 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -32,120 +32,19 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; -static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) -{ - u32 version; - u64 steal; - gpa_t gpa; - struct kvm_memslots *slots; - struct kvm_steal_time __user *st; - struct gfn_to_hva_cache *ghc; - - ghc = &vcpu->arch.st.cache; - gpa = vcpu->arch.st.guest_addr; - if (!(gpa & KVM_STEAL_PHYS_VALID)) - return; - - gpa &= KVM_STEAL_PHYS_MASK; - slots = kvm_memslots(vcpu->kvm); - if (slots->generation != ghc->generation || gpa != ghc->gpa) { - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st))) { - ghc->gpa = INVALID_GPA; - return; - } - } - - st = (struct kvm_steal_time __user *)ghc->hva; - unsafe_get_user(version, &st->version, out); - if (version & 1) - version += 1; /* first time write, random junk */ - - version += 1; - unsafe_put_user(version, &st->version, out); - smp_wmb(); - - unsafe_get_user(steal, &st->steal, out); - steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; - vcpu->arch.st.last_steal = current->sched_info.run_delay; - unsafe_put_user(steal, &st->steal, out); - - smp_wmb(); - version += 1; - unsafe_put_user(version, &st->version, out); -out: - mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); -} - -static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - u64 __user *user = (u64 __user *)attr->addr; - struct kvm *kvm = vcpu->kvm; - u64 gpa; - int ret = 0; - int idx; - - if (!kvm_pvtime_supported() || - attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) - return -ENXIO; - - if (get_user(gpa, user)) - return -EFAULT; - - /* Check the address is in a valid memslot */ - idx = srcu_read_lock(&kvm->srcu); - if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) - ret = -EINVAL; - srcu_read_unlock(&kvm->srcu, idx); - - if (!ret) - vcpu->arch.st.guest_addr = gpa; - - return ret; -} - -static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - u64 __user *user = (u64 __user *)attr->addr; - u64 gpa; - - if (!kvm_pvtime_supported() || - attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) - return -ENXIO; - - gpa = vcpu->arch.st.guest_addr; - if (put_user(gpa, user)) - return -EFAULT; - - return 0; -} - -static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - switch (attr->attr) { - case KVM_LOONGARCH_VCPU_PVTIME_GPA: - if (kvm_pvtime_supported()) - return 0; - } - - return -ENXIO; -} - static inline void kvm_save_host_pmu(struct kvm_vcpu *vcpu) { struct kvm_context *context; context = this_cpu_ptr(vcpu->kvm->arch.vmcs); - context->perf_ctrl[0] = write_csr_perfctrl0(0); - context->perf_ctrl[1] = write_csr_perfctrl1(0); - context->perf_ctrl[2] = write_csr_perfctrl2(0); - context->perf_ctrl[3] = write_csr_perfctrl3(0); context->perf_cntr[0] = read_csr_perfcntr0(); context->perf_cntr[1] = read_csr_perfcntr1(); context->perf_cntr[2] = read_csr_perfcntr2(); context->perf_cntr[3] = read_csr_perfcntr3(); + context->perf_ctrl[0] = write_csr_perfctrl0(0); + context->perf_ctrl[1] = write_csr_perfctrl1(0); + context->perf_ctrl[2] = write_csr_perfctrl2(0); + context->perf_ctrl[3] = write_csr_perfctrl3(0); } static inline void kvm_restore_host_pmu(struct kvm_vcpu *vcpu) @@ -168,14 +67,14 @@ static inline void kvm_save_guest_pmu(struct kvm_vcpu *vcpu) { struct loongarch_csrs *csr = vcpu->arch.csr; - kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); - kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); - kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); - kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); } static inline void kvm_restore_guest_pmu(struct kvm_vcpu *vcpu) @@ -192,70 +91,109 @@ static inline void kvm_restore_guest_pmu(struct kvm_vcpu *vcpu) kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); } +static int kvm_own_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + + if (!kvm_guest_has_pmu(&vcpu->arch)) + return -EINVAL; + + kvm_save_host_pmu(vcpu); + + /* Set PM0-PM(num) to guest */ + val = read_csr_gcfg() & ~CSR_GCFG_GPERF; + val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; + write_csr_gcfg(val); + + kvm_restore_guest_pmu(vcpu); + + return 0; +} + static void kvm_lose_pmu(struct kvm_vcpu *vcpu) { unsigned long val; struct loongarch_csrs *csr = vcpu->arch.csr; - if (!(vcpu->arch.aux_inuse & KVM_GUEST_PMU_ENABLE)) - return; - if (!(vcpu->arch.aux_inuse & KVM_GUEST_PMU_ACTIVE)) + if (!(vcpu->arch.aux_inuse & KVM_LARCH_PMU)) return; kvm_save_guest_pmu(vcpu); + /* Disable pmu access from guest */ write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF); /* - * Clear KVM_GUEST_PMU_ENABLE if the guest is not using PMU CSRs - * when exiting the guest, so that the next time trap into the guest. - * we don't need to deal with PMU CSRs contexts. + * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when + * exiting the guest, so that the next time trap into the guest. + * We don't need to deal with PMU CSRs contexts. */ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); if (!(val & KVM_PMU_EVENT_ENABLED)) - vcpu->arch.aux_inuse &= ~KVM_GUEST_PMU_ENABLE; - kvm_restore_host_pmu(vcpu); + vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU; - /* KVM_GUEST_PMU_ACTIVE needs to be cleared when exiting the guest */ - vcpu->arch.aux_inuse &= ~KVM_GUEST_PMU_ACTIVE; + kvm_restore_host_pmu(vcpu); } -static void kvm_own_pmu(struct kvm_vcpu *vcpu) +static void kvm_restore_pmu(struct kvm_vcpu *vcpu) { - unsigned long val; - - kvm_save_host_pmu(vcpu); - /* Set PM0-PM(num) to guest */ - val = read_csr_gcfg() & ~CSR_GCFG_GPERF; - val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; - write_csr_gcfg(val); - kvm_restore_guest_pmu(vcpu); + if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU)) + kvm_make_request(KVM_REQ_PMU, vcpu); } -static void kvm_restore_pmu(struct kvm_vcpu *vcpu) +static void kvm_check_pmu(struct kvm_vcpu *vcpu) { - if (!(vcpu->arch.aux_inuse & KVM_GUEST_PMU_ENABLE)) - return; - - kvm_make_request(KVM_REQ_PMU, vcpu); + if (kvm_check_request(KVM_REQ_PMU, vcpu)) { + kvm_own_pmu(vcpu); + vcpu->arch.aux_inuse |= KVM_LARCH_PMU; + } } -static void kvm_check_pmu(struct kvm_vcpu *vcpu) +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) { - if (!kvm_check_request(KVM_REQ_PMU, vcpu)) + u32 version; + u64 steal; + gpa_t gpa; + struct kvm_memslots *slots; + struct kvm_steal_time __user *st; + struct gfn_to_hva_cache *ghc; + + ghc = &vcpu->arch.st.cache; + gpa = vcpu->arch.st.guest_addr; + if (!(gpa & KVM_STEAL_PHYS_VALID)) return; - kvm_own_pmu(vcpu); + gpa &= KVM_STEAL_PHYS_MASK; + slots = kvm_memslots(vcpu->kvm); + if (slots->generation != ghc->generation || gpa != ghc->gpa) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st))) { + ghc->gpa = INVALID_GPA; + return; + } + } - /* - * Set KVM_GUEST PMU_ENABLE and GUEST_PMU_ACTIVE - * when guest has KVM_REQ_PMU request. - */ - vcpu->arch.aux_inuse |= KVM_GUEST_PMU_ENABLE; - vcpu->arch.aux_inuse |= KVM_GUEST_PMU_ACTIVE; + st = (struct kvm_steal_time __user *)ghc->hva; + unsafe_get_user(version, &st->version, out); + if (version & 1) + version += 1; /* first time write, random junk */ + + version += 1; + unsafe_put_user(version, &st->version, out); + smp_wmb(); + + unsafe_get_user(steal, &st->steal, out); + steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + unsafe_put_user(steal, &st->steal, out); + + smp_wmb(); + version += 1; + unsafe_put_user(version, &st->version, out); +out: + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } /* @@ -662,10 +600,11 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) if (id >= LOONGARCH_CSR_PERFCTRL0 && id <= LOONGARCH_CSR_PERFCNTR3) { unsigned long val; - val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); - val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); - val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); - val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + if (val & KVM_PMU_EVENT_ENABLED) kvm_make_request(KVM_REQ_PMU, vcpu); } @@ -738,7 +677,7 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) static int kvm_check_cpucfg(int id, u64 val) { - int ret, host; + int ret; u64 mask = 0; ret = _kvm_get_cpucfg_mask(id, &mask); @@ -766,9 +705,8 @@ static int kvm_check_cpucfg(int id, u64 val) return 0; case LOONGARCH_CPUCFG6: if (val & CPUCFG6_PMP) { - host = read_cpucfg(LOONGARCH_CPUCFG6); + u32 host = read_cpucfg(LOONGARCH_CPUCFG6); if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS)) - /* Guest pmbits must be the same with host */ return -EINVAL; if ((val & CPUCFG6_PMNUM) > (host & CPUCFG6_PMNUM)) return -EINVAL; @@ -889,10 +827,9 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, if (ret) break; vcpu->arch.cpucfg[id] = (u32)v; - if (id == LOONGARCH_CPUCFG6) { - vcpu->arch.max_pmu_csrid = LOONGARCH_CSR_PERFCTRL0 + - 2 * kvm_get_pmu_num(&vcpu->arch) + 1; - } + if (id == LOONGARCH_CPUCFG6) + vcpu->arch.max_pmu_csrid = + LOONGARCH_CSR_PERFCTRL0 + 2 * kvm_get_pmu_num(&vcpu->arch) + 1; break; case KVM_REG_LOONGARCH_LBT: if (!kvm_guest_has_lbt(&vcpu->arch)) diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 486b48510414..7a5a9c2bec80 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -127,6 +127,10 @@ static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr if (cpu_has_lbt_mips) return 0; return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_PMU: + if (cpu_has_pmp) + return 0; + return -ENXIO; default: return -ENXIO; } -- Gitee From ff13a1c6508764d32725e0061471dd4d4c1ab9cc Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 12 Sep 2024 20:53:40 +0800 Subject: [PATCH 35/44] LoongArch: KVM: Enable paravirt feature control from VMM commit cdc118f80241 ("LoongArch: KVM: Enable paravirt feature control from VMM") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Export kernel paravirt features to user space, so that VMM can control each single paravirt feature. By default paravirt features will be the same with kvm supported features if VMM does not set it. Also a new feature KVM_FEATURE_VIRT_EXTIOI is added which can be set from user space. This feature indicates that the virt EIOINTC can route interrupts to 256 vCPUs, rather than 4 vCPUs like with real HW. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 7 +++ arch/loongarch/include/asm/kvm_para.h | 2 + arch/loongarch/include/asm/kvm_vcpu.h | 5 +++ arch/loongarch/include/asm/loongarch.h | 9 +--- arch/loongarch/include/uapi/asm/kvm.h | 16 ++++--- arch/loongarch/include/uapi/asm/kvm_para.h | 21 +++++++++ arch/loongarch/kernel/paravirt.c | 8 ++-- arch/loongarch/kvm/exit.c | 19 ++++---- arch/loongarch/kvm/vcpu.c | 52 +++++++++++++++++----- arch/loongarch/kvm/vm.c | 13 ++++++ 10 files changed, 115 insertions(+), 37 deletions(-) create mode 100644 arch/loongarch/include/uapi/asm/kvm_para.h diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 03cdc6312af1..0affac2d8d07 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -141,6 +141,8 @@ struct kvm_arch { unsigned int root_level; spinlock_t phyid_map_lock; struct kvm_phyid_map *phyid_map; + /* Enabled PV features */ + unsigned long pv_features; s64 time_offset; struct kvm_context __percpu *vmcs; @@ -175,6 +177,11 @@ enum emulation_result { #define KVM_LARCH_SWCSR_LATEST (0x1 << 5) #define KVM_LARCH_HWCSR_USABLE (0x1 << 6) +#define LOONGARCH_PV_FEAT_UPDATED BIT_ULL(63) +#define LOONGARCH_PV_FEAT_MASK (BIT(KVM_FEATURE_IPI) | \ + BIT(KVM_FEATURE_STEAL_TIME) | \ + BIT(KVM_FEATURE_VIRT_EXTIOI)) + struct kvm_vcpu_arch { /* * Switch pointer-to-function type to unsigned long diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index a0ed1aacb8b4..4866924b92f3 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -2,6 +2,8 @@ #ifndef _ASM_LOONGARCH_KVM_PARA_H #define _ASM_LOONGARCH_KVM_PARA_H +#include + /* * Hypercall code field */ diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index fb1200d6dd66..4a6a778bbe11 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -132,4 +132,9 @@ static inline bool kvm_pvtime_supported(void) return !!sched_info_on(); } +static inline bool kvm_guest_has_pv_feature(struct kvm_vcpu *vcpu, unsigned int feature) +{ + return vcpu->kvm->arch.pv_features & BIT(feature); +} + #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index d62af76f4bb7..ab5ec51293ad 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -161,15 +161,8 @@ /* * cpucfg index area: 0x40000000 -- 0x400000ff - * SW emulation for KVM hypervirsor + * SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h */ -#define CPUCFG_KVM_BASE 0x40000000UL -#define CPUCFG_KVM_SIZE 0x100 -#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE -#define KVM_SIGNATURE "KVM\0" -#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) -#define KVM_FEATURE_PV_IPI BIT(1) -#define KVM_FEATURE_STEAL_TIME BIT(2) #ifndef __ASSEMBLY__ diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 359fad4cfa83..13c1280662ae 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -95,13 +95,15 @@ struct kvm_fpu { #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) /* Device Control API on vm fd */ -#define KVM_LOONGARCH_VM_FEAT_CTRL 0 -#define KVM_LOONGARCH_VM_FEAT_LSX 0 -#define KVM_LOONGARCH_VM_FEAT_LASX 1 -#define KVM_LOONGARCH_VM_FEAT_X86BT 2 -#define KVM_LOONGARCH_VM_FEAT_ARMBT 3 -#define KVM_LOONGARCH_VM_FEAT_MIPSBT 4 -#define KVM_LOONGARCH_VM_FEAT_PMU 5 +#define KVM_LOONGARCH_VM_FEAT_CTRL 0 +#define KVM_LOONGARCH_VM_FEAT_LSX 0 +#define KVM_LOONGARCH_VM_FEAT_LASX 1 +#define KVM_LOONGARCH_VM_FEAT_X86BT 2 +#define KVM_LOONGARCH_VM_FEAT_ARMBT 3 +#define KVM_LOONGARCH_VM_FEAT_MIPSBT 4 +#define KVM_LOONGARCH_VM_FEAT_PMU 5 +#define KVM_LOONGARCH_VM_FEAT_PV_IPI 6 +#define KVM_LOONGARCH_VM_FEAT_PV_STEALTIME 7 /* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 diff --git a/arch/loongarch/include/uapi/asm/kvm_para.h b/arch/loongarch/include/uapi/asm/kvm_para.h new file mode 100644 index 000000000000..b0604aa9b4bb --- /dev/null +++ b/arch/loongarch/include/uapi/asm/kvm_para.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_KVM_PARA_H +#define _UAPI_ASM_KVM_PARA_H + +#include + +/* + * CPUCFG index area: 0x40000000 -- 0x400000ff + * SW emulation for KVM hypervirsor + */ +#define CPUCFG_KVM_BASE 0x40000000 +#define CPUCFG_KVM_SIZE 0x100 +#define CPUCFG_KVM_SIG (CPUCFG_KVM_BASE + 0) +#define KVM_SIGNATURE "KVM\0" +#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) +#define KVM_FEATURE_IPI 1 +#define KVM_FEATURE_STEAL_TIME 2 +/* BIT 24 - 31 are features configurable by user space vmm */ +#define KVM_FEATURE_VIRT_EXTIOI 24 + +#endif /* _UAPI_ASM_KVM_PARA_H */ diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index 658333b889b8..6d2dbc0b8feb 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -269,7 +269,7 @@ int __init pv_ipi_init(void) if (!cpu_has_hypervisor) return 0; - if (!kvm_para_available()) + if (!(feature & BIT(KVM_FEATURE_IPI))) return 0; /* @@ -356,7 +356,7 @@ static int pv_enable_steal_time(void) } addr |= KVM_STEAL_PHYS_VALID; - kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr); + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), addr); return 0; } @@ -364,7 +364,7 @@ static int pv_enable_steal_time(void) static void pv_disable_steal_time(void) { if (has_steal_clock) - kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0); + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), 0); } #ifdef CONFIG_SMP @@ -416,7 +416,7 @@ int __init pv_time_init(void) return 0; feature = read_cpucfg(CPUCFG_KVM_FEATURE); - if (!(feature & KVM_FEATURE_STEAL_TIME)) + if (!(feature & BIT(KVM_FEATURE_STEAL_TIME))) return 0; has_steal_clock = 1; diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 47ab50253f82..27b55f5b015d 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -259,9 +259,7 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) vcpu->arch.gprs[rd] = 0; break; case CPUCFG_KVM_FEATURE: - ret = KVM_FEATURE_IPI; - if (kvm_pvtime_supported()) - ret |= KVM_FEATURE_STEAL_TIME; + ret = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK; vcpu->arch.gprs[rd] = ret; break; default: @@ -835,18 +833,23 @@ static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu) */ static void kvm_handle_service(struct kvm_vcpu *vcpu) { + long ret = KVM_HCALL_INVALID_CODE; unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0); - long ret; switch (func) { case KVM_HCALL_FUNC_IPI: - kvm_send_pv_ipi(vcpu); - ret = KVM_HCALL_SUCCESS; + if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_IPI)) { + kvm_send_pv_ipi(vcpu); + ret = KVM_HCALL_SUCCESS; + } + break; + case KVM_HCALL_FUNC_NOTIFY: + if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)) + ret = kvm_save_notify(vcpu); break; default: - ret = KVM_HCALL_INVALID_CODE; break; - }; + } kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 6124eefdc8a9..b8d0bc516167 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -954,6 +954,8 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, case LOONGARCH_CPUCFG2: case LOONGARCH_CPUCFG6: return 0; + case CPUCFG_KVM_FEATURE: + return 0; default: return -ENXIO; } @@ -964,8 +966,8 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - if (!kvm_pvtime_supported() || - attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) return -ENXIO; return 0; @@ -997,9 +999,18 @@ static int kvm_loongarch_cpucfg_get_attr(struct kvm_vcpu *vcpu, uint64_t val; uint64_t __user *uaddr = (uint64_t __user *)attr->addr; - ret = _kvm_get_cpucfg_mask(attr->attr, &val); - if (ret) - return ret; + switch (attr->attr) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + ret = _kvm_get_cpucfg_mask(attr->attr, &val); + if (ret) + return ret; + break; + case CPUCFG_KVM_FEATURE: + val = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK; + break; + default: + return -ENXIO; + } put_user(val, uaddr); @@ -1012,8 +1023,8 @@ static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, u64 gpa; u64 __user *user = (u64 __user *)attr->addr; - if (!kvm_pvtime_supported() || - attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) return -ENXIO; gpa = vcpu->arch.st.guest_addr; @@ -1045,7 +1056,28 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - return -ENXIO; + u64 val, valid; + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + + switch (attr->attr) { + case CPUCFG_KVM_FEATURE: + if (get_user(val, user)) + return -EFAULT; + + valid = LOONGARCH_PV_FEAT_MASK; + if (val & ~valid) + return -EINVAL; + + /* All vCPUs need set the same PV features */ + if ((kvm->arch.pv_features & LOONGARCH_PV_FEAT_UPDATED) + && ((kvm->arch.pv_features & valid) != val)) + return -EINVAL; + kvm->arch.pv_features = val | LOONGARCH_PV_FEAT_UPDATED; + return 0; + default: + return -ENXIO; + } } static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, @@ -1055,8 +1087,8 @@ static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, u64 gpa, __user *user = (u64 __user *)attr->addr; struct kvm *kvm = vcpu->kvm; - if (!kvm_pvtime_supported() || - attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) return -ENXIO; if (get_user(gpa, user)) diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 7a5a9c2bec80..6995a36bd36e 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -41,6 +42,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } kvm_init_vmcs(kvm); + + /* Enable all PV features by default */ + kvm->arch.pv_features = BIT(KVM_FEATURE_IPI); + if (kvm_pvtime_supported()) + kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); + kvm->arch.gpa_size = BIT(cpu_vabits - 1); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; kvm->arch.invalid_ptes[0] = 0; @@ -131,6 +138,12 @@ static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr if (cpu_has_pmp) return 0; return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_PV_IPI: + return 0; + case KVM_LOONGARCH_VM_FEAT_PV_STEALTIME: + if (kvm_pvtime_supported()) + return 0; + return -ENXIO; default: return -ENXIO; } -- Gitee From 8a90632410f42a042f5272b0ff8b4a4a2fe0914d Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 12 Sep 2024 22:56:14 +0800 Subject: [PATCH 36/44] LoongArch: KVM: Implement function kvm_para_has_feature() commit 3abb708ec0be ("LoongArch: KVM: Implement function kvm_para_has_feature()") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Implement function kvm_para_has_feature() to detect supported paravirt features. It can be used by device driver to detect and enable paravirt features, such as the EIOINTC irqchip driver is able to detect feature KVM_FEATURE_VIRT_EXTIOI and do some optimization. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_para.h | 10 ++++++ arch/loongarch/kernel/paravirt.c | 46 +++++++++++++-------------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 4866924b92f3..dbea66d7eaa6 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -157,10 +157,20 @@ static __always_inline long kvm_hypercall5(u64 fid, return ret; } +#ifdef CONFIG_PARAVIRT +bool kvm_para_available(void); +unsigned int kvm_arch_para_features(void); +#else +static inline bool kvm_para_available(void) +{ + return false; +} + static inline unsigned int kvm_arch_para_features(void) { return 0; } +#endif static inline unsigned int kvm_arch_para_hints(void) { diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index 6d2dbc0b8feb..92e37d0e6b22 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -249,11 +249,14 @@ static int pv_cpu_down_prepare(unsigned int cpu) } #endif -static bool kvm_para_available(void) +bool kvm_para_available(void) { static int hypervisor_type; int config; + if (!cpu_has_hypervisor) + return false; + if (!hypervisor_type) { config = read_cpucfg(CPUCFG_KVM_SIG); if (!memcmp(&config, KVM_SIGNATURE, 4)) @@ -263,28 +266,31 @@ static bool kvm_para_available(void) return hypervisor_type == HYPERVISOR_KVM; } -int __init pv_ipi_init(void) +unsigned int kvm_arch_para_features(void) { - int feature; + static unsigned int feature; - if (!cpu_has_hypervisor) + if (!kvm_para_available()) return 0; - if (!(feature & BIT(KVM_FEATURE_IPI))) + + if (!feature) + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + + return feature; +} + +int __init pv_ipi_init(void) +{ + if (!kvm_para_has_feature(KVM_FEATURE_IPI)) return 0; - /* - * check whether KVM hypervisor supports pv_ipi or not - */ - feature = read_cpucfg(CPUCFG_KVM_FEATURE); #ifdef CONFIG_SMP - if (feature & KVM_FEATURE_PV_IPI) { - smp_ops.init_ipi = pv_init_ipi; - smp_ops.send_ipi_single = pv_send_ipi_single; - smp_ops.send_ipi_mask = pv_send_ipi_mask; - } + smp_ops.init_ipi = pv_init_ipi; + smp_ops.send_ipi_single = pv_send_ipi_single; + smp_ops.send_ipi_mask = pv_send_ipi_mask; #endif - return 1; + return 0; } static void pv_cpu_reboot(void *unused) @@ -408,15 +414,9 @@ static struct notifier_block pv_reboot_nb = { int __init pv_time_init(void) { - int r, feature; + int r; - if (!cpu_has_hypervisor) - return 0; - if (!kvm_para_available()) - return 0; - - feature = read_cpucfg(CPUCFG_KVM_FEATURE); - if (!(feature & BIT(KVM_FEATURE_STEAL_TIME))) + if (!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) return 0; has_steal_clock = 1; -- Gitee From bffc45fd591a98f878e337ca66a06e6c247657fb Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 28 Aug 2024 12:59:50 +0800 Subject: [PATCH 37/44] LoongArch: KVM: Add KVM hypercalls documentation for LoongArch commit e5ba90abb2eb ("LoongArch: KVM: Add KVM hypercalls documentation for LoongArch") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Add documentation topic for using pv_virt when running as a guest on KVM hypervisor. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li Co-developed-by: Mingcong Bai Signed-off-by: Mingcong Bai Link: https://lore.kernel.org/all/5c338084b1bcccc1d57dce9ddb1e7081@aosc.io/ Signed-off-by: Dandan Zhang [jc: fixed htmldocs build error] Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/4769C036576F8816+20240828045950.3484113-1-zhangdandan@uniontech.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0c7b717f34aa..4851b22f8672 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11551,6 +11551,7 @@ L: kvm@vger.kernel.org L: loongarch@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +F: Documentation/virt/kvm/loongarch/ F: arch/loongarch/include/asm/kvm* F: arch/loongarch/include/uapi/asm/kvm* F: arch/loongarch/kvm/ -- Gitee From cac819dfa16e6c73e2f40e381aae493ee7481720 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 6 May 2024 22:00:47 +0800 Subject: [PATCH 38/44] LoongArch: KVM: Add cpucfg area for kvm hypervisor commit 9753d3037964 ("LoongArch: KVM: Add cpucfg area for kvm hypervisor") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Instruction cpucfg can be used to get processor features. And there is a trap exception when it is executed in VM mode, and also it can be used to provide cpu features to VM. On real hardware cpucfg area 0 - 20 is used by now. Here one specified area 0x40000000 -- 0x400000ff is used for KVM hypervisor to provide PV features, and the area can be extended for other hypervisors in future. This area will never be used for real HW, it is only used by software. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/exit.c | 90 ++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 48 deletions(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 27b55f5b015d..19cb22da35de 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -21,6 +21,47 @@ #include #include "trace.h" +static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) +{ + int rd, rj; + unsigned int index, ret; + + if (inst.reg2_format.opcode != cpucfg_op) + return EMULATE_FAIL; + + rd = inst.reg2_format.rd; + rj = inst.reg2_format.rj; + ++vcpu->stat.cpucfg_exits; + index = vcpu->arch.gprs[rj]; + + /* + * By LoongArch Reference Manual 2.2.10.5 + * Return value is 0 for undefined CPUCFG index + * + * Disable preemption since hw gcsr is accessed + */ + preempt_disable(); + switch (index) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; + break; + case CPUCFG_KVM_SIG: + /* CPUCFG emulation between 0x40000000 -- 0x400000ff */ + vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; + break; + case CPUCFG_KVM_FEATURE: + ret = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK; + vcpu->arch.gprs[rd] = ret; + break; + default: + vcpu->arch.gprs[rd] = 0; + break; + } + preempt_enable(); + + return EMULATE_DONE; +} + static unsigned long kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid) { unsigned long val = 0; @@ -225,52 +266,6 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) return EMULATE_DONE; } -static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) -{ - int rd, rj; - unsigned int index, ret; - unsigned long plv; - - rd = inst.reg2_format.rd; - rj = inst.reg2_format.rj; - ++vcpu->stat.cpucfg_exits; - index = vcpu->arch.gprs[rj]; - - /* - * By LoongArch Reference Manual 2.2.10.5 - * Return value is 0 for undefined cpucfg index - * - * Disable preemption since hw gcsr is accessed - */ - preempt_disable(); - plv = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD) >> CSR_CRMD_PLV_SHIFT; - switch (index) { - case 0 ... (KVM_MAX_CPUCFG_REGS - 1): - vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; - break; - case CPUCFG_KVM_SIG: - /* - * Cpucfg emulation between 0x40000000 -- 0x400000ff - * Return value with 0 if executed in user mode - */ - if ((plv & CSR_CRMD_PLV) == PLV_KERN) - vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; - else - vcpu->arch.gprs[rd] = 0; - break; - case CPUCFG_KVM_FEATURE: - ret = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK; - vcpu->arch.gprs[rd] = ret; - break; - default: - vcpu->arch.gprs[rd] = 0; - break; - } - - preempt_enable(); - return EMULATE_DONE; -} - static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) { unsigned long curr_pc; @@ -287,8 +282,7 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) er = EMULATE_FAIL; switch (((inst.word >> 24) & 0xff)) { case 0x0: /* CPUCFG GSPR */ - if (inst.reg2_format.opcode == cpucfg_op) - er = kvm_emu_cpucfg(vcpu, inst); + er = kvm_emu_cpucfg(vcpu, inst); break; case 0x4: /* CSR{RD,WR,XCHG} GSPR */ er = kvm_handle_csr(vcpu, inst); -- Gitee From 8f1ab9b3efdc61e5d52423d6950bf1678130dfdb Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Wed, 7 Aug 2024 17:37:14 +0800 Subject: [PATCH 39/44] LoongArch: KVM: Remove unnecessary definition of KVM_PRIVATE_MEM_SLOTS commit 296b03ce389b ("LoongArch: KVM: Remove unnecessary definition of KVM_PRIVATE_MEM_SLOTS") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. 1. "KVM_PRIVATE_MEM_SLOTS" is renamed as "KVM_INTERNAL_MEM_SLOTS". 2. "KVM_INTERNAL_MEM_SLOTS" defaults to zero, so it is not necessary to define it in LoongArch's asm/kvm_host.h. Link: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=bdd1c37a315bc50ab14066c4852bc8dcf070451e Link: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=b075450868dbc0950f0942617f222eeb989cad10 Reviewed-by: Bibo Mao Signed-off-by: Wentao Guan Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 0affac2d8d07..fc0382f498b3 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -29,8 +29,6 @@ #define KVM_MAX_VCPUS 256 #define KVM_MAX_CPUCFG_REGS 21 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) -- Gitee From 4d103268f6ba938604f4dd1e4b11d41ad745f355 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 6 May 2024 22:00:47 +0800 Subject: [PATCH 40/44] LoongArch: KVM: Add vcpu mapping from physical cpuid commit 73516e9da512 ("LoongArch: KVM: Add vcpu mapping from physical cpuid") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Physical CPUID is used for interrupt routing for irqchips such as ipi, msgint and eiointc interrupt controllers. Physical CPUID is stored at the CSR register LOONGARCH_CSR_CPUID, it can not be changed once vcpu is created and the physical CPUIDs of two vcpus cannot be the same. Different irqchips have different size declaration about physical CPUID, the max CPUID value for CSR LOONGARCH_CSR_CPUID on Loongson-3A5000 is 512, the max CPUID supported by IPI hardware is 1024, while for eiointc irqchip is 256, and for msgint irqchip is 65536. The smallest value from all interrupt controllers is selected now, and the max cpuid size is defines as 256 by KVM which comes from the eiointc irqchip. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_host.h | 11 +- arch/loongarch/kvm/vcpu.c | 162 +++++++++++++------------- arch/loongarch/kvm/vm.c | 7 +- 3 files changed, 90 insertions(+), 90 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index fc0382f498b3..eaf588a05734 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -107,12 +107,13 @@ struct kvm_world_switch { #define MAX_PGTABLE_LEVELS 4 /* - * Physical cpu id is used for interrupt routing, there are different + * Physical CPUID is used for interrupt routing, there are different * definitions about physical cpuid on different hardwares. - * For LOONGARCH_CSR_CPUID register, max cpuid size if 512 - * For IPI HW, max dest CPUID size 1024 - * For extioi interrupt controller, max dest CPUID size is 256 - * For MSI interrupt controller, max supported CPUID size is 65536 + * + * For LOONGARCH_CSR_CPUID register, max CPUID size if 512 + * For IPI hardware, max destination CPUID size 1024 + * For extioi interrupt controller, max destination CPUID size is 256 + * For msgint interrupt controller, max supported CPUID size is 65536 * * Currently max CPUID is defined as 256 for KVM hypervisor, in future * it will be expanded to 4096, including 16 packages at most. And every diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index b8d0bc516167..e9b397543fdf 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -447,128 +447,125 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return 0; } -static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) -{ - unsigned long gintc; - struct loongarch_csrs *csr = vcpu->arch.csr; - - if (get_gcsr_flag(id) & INVALID_GCSR) - return -EINVAL; - - if (id == LOONGARCH_CSR_ESTAT) { - preempt_disable(); - vcpu_load(vcpu); - /* - * Sync pending interrupts into ESTAT so that interrupt - * remains during VM migration stage - */ - kvm_deliver_intr(vcpu); - vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; - vcpu_put(vcpu); - preempt_enable(); - - /* ESTAT IP0~IP7 get from GINTC */ - gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff; - *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2); - return 0; - } - - /* - * Get software CSR state since software state is consistent - * with hardware for synchronous ioctl - */ - *val = kvm_read_sw_gcsr(csr, id); - - return 0; -} - static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val) { int cpuid; + struct kvm_phyid_map *map; struct loongarch_csrs *csr = vcpu->arch.csr; - struct kvm_phyid_map *map; if (val >= KVM_MAX_PHYID) return -EINVAL; - cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID); + spin_lock(&vcpu->kvm->arch.phyid_map_lock); - if (map->phys_map[cpuid].enabled) { - /* - * Cpuid is already set before - * Forbid changing different cpuid at runtime - */ - if (cpuid != val) { - /* - * Cpuid 0 is initial value for vcpu, maybe invalid - * unset value for vcpu - */ - if (cpuid) { - spin_unlock(&vcpu->kvm->arch.phyid_map_lock); - return -EINVAL; - } - } else { - /* Discard duplicated cpuid set */ + if ((cpuid < KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) { + /* Discard duplicated CPUID set operation */ + if (cpuid == val) { spin_unlock(&vcpu->kvm->arch.phyid_map_lock); return 0; } - } - if (map->phys_map[val].enabled) { /* - * New cpuid is already set with other vcpu - * Forbid sharing the same cpuid between different vcpus + * CPUID is already set before + * Forbid changing to a different CPUID at runtime */ - if (map->phys_map[val].vcpu != vcpu) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + + if (map->phys_map[val].enabled) { + /* Discard duplicated CPUID set operation */ + if (vcpu == map->phys_map[val].vcpu) { spin_unlock(&vcpu->kvm->arch.phyid_map_lock); - return -EINVAL; + return 0; } - /* Discard duplicated cpuid set operation*/ + /* + * New CPUID is already set with other vcpu + * Forbid sharing the same CPUID between different vcpus + */ spin_unlock(&vcpu->kvm->arch.phyid_map_lock); - return 0; + return -EINVAL; } kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val); map->phys_map[val].enabled = true; map->phys_map[val].vcpu = vcpu; - if (map->max_phyid < val) - map->max_phyid = val; spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; } +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu) +{ + int cpuid; + struct kvm_phyid_map *map; + struct loongarch_csrs *csr = vcpu->arch.csr; + + map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID); + + if (cpuid >= KVM_MAX_PHYID) + return; + + spin_lock(&vcpu->kvm->arch.phyid_map_lock); + if (map->phys_map[cpuid].enabled) { + map->phys_map[cpuid].vcpu = NULL; + map->phys_map[cpuid].enabled = false; + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID); + } + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); +} + struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) { - struct kvm_phyid_map *map; + struct kvm_phyid_map *map; if (cpuid >= KVM_MAX_PHYID) return NULL; map = kvm->arch.phyid_map; - if (map->phys_map[cpuid].enabled) - return map->phys_map[cpuid].vcpu; + if (!map->phys_map[cpuid].enabled) + return NULL; - return NULL; + return map->phys_map[cpuid].vcpu; } -static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu) +static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) { - int cpuid; + unsigned long gintc; struct loongarch_csrs *csr = vcpu->arch.csr; - struct kvm_phyid_map *map; - map = vcpu->kvm->arch.phyid_map; - cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); - if (cpuid >= KVM_MAX_PHYID) - return; + if (get_gcsr_flag(id) & INVALID_GCSR) + return -EINVAL; - if (map->phys_map[cpuid].enabled) { - map->phys_map[cpuid].vcpu = NULL; - map->phys_map[cpuid].enabled = false; - kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0); + if (id == LOONGARCH_CSR_ESTAT) { + preempt_disable(); + vcpu_load(vcpu); + /* + * Sync pending interrupts into ESTAT so that interrupt + * remains during VM migration stage + */ + kvm_deliver_intr(vcpu); + vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; + vcpu_put(vcpu); + preempt_enable(); + + /* ESTAT IP0~IP7 get from GINTC */ + gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff; + *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2); + return 0; } + + /* + * Get software CSR state since software state is consistent + * with hardware for synchronous ioctl + */ + *val = kvm_read_sw_gcsr(csr, id); + + return 0; } static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) @@ -579,6 +576,9 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) if (get_gcsr_flag(id) & INVALID_GCSR) return -EINVAL; + if (id == LOONGARCH_CSR_CPUID) + return kvm_set_cpuid(vcpu, val); + if (id == LOONGARCH_CSR_ESTAT) { /* ESTAT IP0~IP7 inject through GINTC */ gintc = (val >> 2) & 0xff; @@ -588,8 +588,7 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc); return ret; - } else if (id == LOONGARCH_CSR_CPUID) - return kvm_set_cpuid(vcpu, val); + } kvm_write_sw_gcsr(csr, id, val); @@ -1491,6 +1490,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Set cpuid */ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id); + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID); /* Start with no pending virtual guest interrupts */ csr->csrs[LOONGARCH_CSR_GINTC] = 0; @@ -1509,8 +1509,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) hrtimer_cancel(&vcpu->arch.swtimer); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); - kfree(vcpu->arch.csr); kvm_drop_cpuid(vcpu); + kfree(vcpu->arch.csr); /* * If the vCPU is freed and reused as another vCPU, we don't want the diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 6995a36bd36e..5f65610aa9fc 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -33,13 +33,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.pgd) return -ENOMEM; - kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), - GFP_KERNEL_ACCOUNT); + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), GFP_KERNEL_ACCOUNT); if (!kvm->arch.phyid_map) { free_page((unsigned long)kvm->arch.pgd); kvm->arch.pgd = NULL; return -ENOMEM; } + spin_lock_init(&kvm->arch.phyid_map_lock); kvm_init_vmcs(kvm); @@ -61,7 +61,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) for (i = 0; i <= kvm->arch.root_level; i++) kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3); - spin_lock_init(&kvm->arch.phyid_map_lock); return 0; } @@ -69,8 +68,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_destroy_vcpus(kvm); free_page((unsigned long)kvm->arch.pgd); - kvfree(kvm->arch.phyid_map); kvm->arch.pgd = NULL; + kvfree(kvm->arch.phyid_map); kvm->arch.phyid_map = NULL; } -- Gitee From f51f26150c6a9ab76f79644e8df2379f14710f06 Mon Sep 17 00:00:00 2001 From: Dandan Zhang Date: Wed, 7 Aug 2024 17:37:14 +0800 Subject: [PATCH 41/44] LoongArch: KVM: Remove undefined a6 argument comment for kvm_hypercall() commit 494b0792d962 ("LoongArch: KVM: Remove undefined a6 argument comment for kvm_hypercall()") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. The kvm_hypercall() set for LoongArch is limited to a1-a5. So the mention of a6 in the comment is undefined that needs to be rectified. Reviewed-by: Bibo Mao Signed-off-by: Wentao Guan Signed-off-by: Dandan Zhang Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_para.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index dbea66d7eaa6..710ca8c4b61d 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -42,9 +42,9 @@ struct kvm_steal_time { * Hypercall interface for KVM hypervisor * * a0: function identifier - * a1-a6: args + * a1-a5: args * Return value will be placed in a0. - * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + * Up to 5 arguments are passed in a1, a2, a3, a4, a5. */ static __always_inline long kvm_hypercall0(u64 fid) { -- Gitee From 85d529bdda9f1adb892ef9badd2a7e1bccd7c001 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 6 Mar 2024 09:12:13 +0800 Subject: [PATCH 42/44] LoongArch: KVM: Remove unnecessary CSR register saving during enter guest commit b99f783106ea ("LoongArch: KVM: Remove unnecessary CSR register saving during enter guest") Conflict: none Backport-reason: Synchronize upstream linux loongarch kvm patch to support loongarch virtualization. Checkpatch: no, to be consistent with upstream commit. Some CSR registers like CRMD/PRMD are saved during enter VM mode now. However they are not restored for actual use, so saving for these CSR registers can be removed. Reviewed-by: Tianrui Zhao Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Xianglai Li --- arch/loongarch/kvm/switch.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index ba976509bfe8..3634431db18a 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -213,12 +213,6 @@ SYM_FUNC_START(kvm_enter_guest) /* Save host GPRs */ kvm_save_host_gpr a2 - /* Save host CRMD, PRMD to stack */ - csrrd a3, LOONGARCH_CSR_CRMD - st.d a3, a2, PT_CRMD - csrrd a3, LOONGARCH_CSR_PRMD - st.d a3, a2, PT_PRMD - addi.d a2, a1, KVM_VCPU_ARCH st.d sp, a2, KVM_ARCH_HSP st.d tp, a2, KVM_ARCH_HTP -- Gitee From 013ea94d74be87d2d7a542313be7313c649041fd Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 28 Oct 2024 09:20:47 +0800 Subject: [PATCH 43/44] LoongArch: Fix cpu hotplug issue Upstream: no On LoongArch system, there are two places to set cpu numa node. One is in arch specified function smp_prepare_boot_cpu(), the other is in generic function early_numa_node_init(). The latter will overwrite the numa node information. With hot-added cpu without numa information, cpu_logical_map() fails to its physical cpuid at beginning since it is not enabled in ACPI MADT table. So function early_cpu_to_node() also fails to get its numa node for hot-added cpu, and generic function early_numa_node_init() will overwrite with incorrect numa node. APIs topo_get_cpu() and topo_add_cpu() is added here, like other architectures logic cpu is allocated when parsing MADT table. When parsing SRAT table or hot-add cpu, logic cpu is acquired by searching all allocated logical cpu with matched physical id. It solves such problems such as: 1. Boot cpu is not the first entry in MADT table, the first entry will be overwritten with later boot cpu. 2. Physical cpu id not presented in MADT table is invalid, in later SRAT/hot-add cpu parsing, invalid physical cpu detected is added 3. For hot-add cpu, its logic cpu is allocated in MADT table parsing, so early_cpu_to_node() can be used for hot-add cpu and cpu_to_node() is correct for hot-add cpu. Signed-off-by: Bibo Mao Signed-off-by: Xianglai Li --- arch/loongarch/include/asm/kvm_csr.h | 5 ----- arch/loongarch/include/asm/kvm_host.h | 10 ---------- arch/loongarch/include/asm/kvm_vcpu.h | 2 -- 3 files changed, 17 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_csr.h b/arch/loongarch/include/asm/kvm_csr.h index c3d3a6359ab8..4a76ce796f1f 100644 --- a/arch/loongarch/include/asm/kvm_csr.h +++ b/arch/loongarch/include/asm/kvm_csr.h @@ -211,11 +211,6 @@ static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr, csr->csrs[gid] |= val & _mask; } -#define KVM_PMU_PLV_ENABLE (CSR_PERFCTRL_PLV0 | \ - CSR_PERFCTRL_PLV1 | \ - CSR_PERFCTRL_PLV2 | \ - CSR_PERFCTRL_PLV3) - #define KVM_PMU_EVENT_ENABLED (CSR_PERFCTRL_PLV0 | CSR_PERFCTRL_PLV1 | \ CSR_PERFCTRL_PLV2 | CSR_PERFCTRL_PLV3) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index eaf588a05734..f2f4813cc8c3 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -292,16 +292,6 @@ static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch) return arch->cpucfg[2] & CPUCFG2_LASX; } -static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch) -{ - return arch->cpucfg[6] & CPUCFG6_PMP; -} - -static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch) -{ - return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT; -} - static inline bool kvm_guest_has_lbt(struct kvm_vcpu_arch *arch) { return arch->cpucfg[2] & (CPUCFG2_X86BT | CPUCFG2_ARMBT | CPUCFG2_MIPSBT); diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 4a6a778bbe11..d7e8f7d50ee0 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -75,8 +75,6 @@ static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } #endif -int kvm_own_pmu(struct kvm_vcpu *vcpu); - #ifdef CONFIG_CPU_HAS_LBT int kvm_own_lbt(struct kvm_vcpu *vcpu); #else -- Gitee From 364b05f0da91602007ef0fb32b2d6c6addea8767 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Thu, 8 Aug 2024 17:13:07 +0800 Subject: [PATCH 44/44] LoongArch: add iommu support Upstream: no Added iommu support for loongarch Signed-off-by: Xianglai Li --- arch/loongarch/configs/loongson3_defconfig | 3 + arch/loongarch/include/asm/device.h | 36 + arch/loongarch/kvm/Kconfig | 1 + drivers/iommu/Kconfig | 15 +- drivers/iommu/Makefile | 1 + drivers/iommu/dma-iommu.c | 4 +- drivers/iommu/loongarch_iommu.c | 1728 ++++++++++++++++++++ drivers/iommu/loongarch_iommu.h | 184 +++ drivers/pci/quirks.c | 4 + drivers/vfio/Kconfig | 2 +- 10 files changed, 1974 insertions(+), 4 deletions(-) create mode 100644 arch/loongarch/include/asm/device.h create mode 100644 drivers/iommu/loongarch_iommu.c create mode 100644 drivers/iommu/loongarch_iommu.h diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index c65573d4a180..8b5976ee228f 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -2200,3 +2200,6 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_STRICT_DEVMEM is not set # CONFIG_RUNTIME_TESTING_MENU is not set +CONFIG_LOONGARCH_IOMMU=m +CONFIG_CMDLINE_EXTEND=y +CONFIG_CMDLINE="vfio_iommu_type1.allow_unsafe_interrupts=1 nokaslr" diff --git a/arch/loongarch/include/asm/device.h b/arch/loongarch/include/asm/device.h new file mode 100644 index 000000000000..30cc6b610335 --- /dev/null +++ b/arch/loongarch/include/asm/device.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arch specific extensions to struct device + * + * This file is released under the GPLv2 + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_LOONGARCH_DEVICE_H +#define _ASM_LOONGARCH_DEVICE_H + +struct dev_archdata { + /* hook for IOMMU specific extension */ + void *iommu; + struct bus_dma_region *dma_range_map; + /* + * On some old 7A chipset, dma address is different from physical + * address, the main difference is that node id. For dma address + * node id starts from bit 36, physical node id starts from + * bit 44. The remaining address below node id is the same. + */ + unsigned long dma_node_mask; + unsigned int dma_node_off; +}; + +struct pdev_archdata { +}; + +struct dma_domain { + struct list_head node; + const struct dma_map_ops *dma_ops; + int domain_nr; +}; +void add_dma_domain(struct dma_domain *domain); +void del_dma_domain(struct dma_domain *domain); + +#endif /* _ASM_LOONGARCH_DEVICE_H*/ diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 461a465e49fd..e899d96f4da6 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -35,6 +35,7 @@ config KVM select SCHED_INFO select MMU_NOTIFIER select PREEMPT_NOTIFIERS + select KVM_VFIO help Support hosting virtualized guest machines using hardware virtualization extensions. You will need diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b49f20ab20d1..b1492ecfff0b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -150,7 +150,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA - def_bool ARM64 || IA64 || X86 + def_bool ARM64 || IA64 || X86 || LOONGARCH select DMA_OPS select IOMMU_API select IOMMU_IOVA @@ -498,4 +498,17 @@ config SPRD_IOMMU Say Y here if you want to use the multimedia devices listed above. +# LOONGARCH IOMMU support +config LOONGARCH_IOMMU + tristate "LOONGARCH IOMMU support" + select IOMMU_API + select IOMMU_DEFAULT_PASSTHROUGH + depends on LOONGARCH + help + With this option you can enable support for LOONGARCH IOMMU hardware in + your system. An IOMMU is a hardware component which provides + remapping of DMA memory accesses from devices. With an LOONGARCH IOMMU you + can isolate the DMA memory of different devices and protect the + system from misbehaving device drivers or hardware. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 769e43d780ce..3f4592bbd7f9 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o +obj-$(CONFIG_LOONGARCH_IOMMU) += loongarch_iommu.o diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index b3a70eb8a349..70349fb5e890 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1743,7 +1743,7 @@ static size_t iommu_dma_max_mapping_size(struct device *dev) return SIZE_MAX; } -static const struct dma_map_ops iommu_dma_ops = { +static const struct dma_map_ops iommu_dmafops = { .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, .free = iommu_dma_free, @@ -1786,7 +1786,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) if (iommu_is_dma_domain(domain)) { if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; - dev->dma_ops = &iommu_dma_ops; + dev->dma_ops = &iommu_dmafops; } return; diff --git a/drivers/iommu/loongarch_iommu.c b/drivers/iommu/loongarch_iommu.c new file mode 100644 index 000000000000..7dfc6459045b --- /dev/null +++ b/drivers/iommu/loongarch_iommu.c @@ -0,0 +1,1728 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Loongson IOMMU Driver + * + * Copyright (C) 2024 Loongson Technology Ltd. + * Author: Lv Chen + * Wang Yang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "loongarch_iommu.h" + +MODULE_LICENSE("GPL"); + +#define LOOP_TIMEOUT 100000 + +#define IVRS_HEADER_LENGTH 48 +#define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 +#define IVHD_DEV_ALL 0x01 +#define IVHD_DEV_SELECT 0x02 +#define IVHD_DEV_SELECT_RANGE_START 0x03 +#define IVHD_DEV_RANGE_END 0x04 +#define IVHD_DEV_ALIAS 0x42 +#define IVHD_DEV_EXT_SELECT 0x46 +#define IVHD_DEV_ACPI_HID 0xf0 + +#define IVHD_HEAD_TYPE10 0x10 +#define IVHD_HEAD_TYPE11 0x11 +#define IVHD_HEAD_TYPE40 0x40 + +#define MAX_BDF_NUM 0xffff + +#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) + +/* + * structure describing one IOMMU in the ACPI table. Typically followed by one + * or more ivhd_entrys. + */ +struct ivhd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 cap_ptr; + u64 mmio_phys; + u16 pci_seg; + u16 info; + u32 efr_attr; + + /* Following only valid on IVHD type 11h and 40h */ + u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ + u64 res; +} __packed; + +/* + * A device entry describing which devices a specific IOMMU translates and + * which requestor ids they use. + */ +struct ivhd_entry { + u8 type; + u16 devid; + u8 flags; + u32 ext; + u32 hidh; + u64 cid; + u8 uidf; + u8 uidl; + u8 uid; +} __packed; + +struct iommu_callback_data { + const struct iommu_ops *ops; +}; + +LIST_HEAD(la_rlookup_iommu_list); +LIST_HEAD(la_iommu_list); /* list of all loongarch + * IOMMUs in the system + */ + +static u32 rlookup_table_size; /* size if the rlookup table */ +static int la_iommu_target_ivhd_type; +u16 la_iommu_last_bdf; /* largest PCI device id + * we have to handle + */ + +int loongarch_iommu_disable; + +#define iommu_write_regl(iommu, off, val) \ + writel(val, iommu->confbase + off) +#define iommu_read_regl(iommu, off) readl(iommu->confbase + off) + +static void iommu_translate_disable(struct loongarch_iommu *iommu) +{ + u32 val; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + /* Disable */ + val = iommu_read_regl(iommu, LA_IOMMU_PFM_CNT_EN); + val &= ~(1 << 31); + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + /* Write cmd */ + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); +} + +static void iommu_translate_enable(struct loongarch_iommu *iommu) +{ + u32 val = 0; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + /* Enable use mem */ + val = iommu_read_regl(iommu, LA_IOMMU_PFM_CNT_EN); + val |= (1 << 29); + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + /* Enable */ + val = iommu_read_regl(iommu, LA_IOMMU_PFM_CNT_EN); + val |= (1 << 31); + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + /* Write cmd */ + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); +} + +static bool la_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + return false; + } +} + +static struct dom_info *to_dom_info(struct iommu_domain *dom) +{ + return container_of(dom, struct dom_info, domain); +} + +static int update_dev_table(struct la_iommu_dev_data *dev_data, int flag) +{ + u32 val = 0; + int index; + unsigned short bdf; + struct loongarch_iommu *iommu; + u16 domain_id; + + if (dev_data == NULL) { + pr_err("%s dev_data is NULL", __func__); + return 0; + } + + if (dev_data->iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return 0; + } + + if (dev_data->iommu_entry == NULL) { + pr_err("%s iommu_entry is NULL", __func__); + return 0; + } + + iommu = dev_data->iommu; + domain_id = dev_data->iommu_entry->id; + bdf = dev_data->bdf; + + /* Set device table */ + if (flag) { + index = find_first_zero_bit(iommu->devtable_bitmap, + MAX_ATTACHED_DEV_ID); + if (index < MAX_ATTACHED_DEV_ID) { + __set_bit(index, iommu->devtable_bitmap); + dev_data->index = index; + } else { + pr_err("%s get id from dev table failed\n", __func__); + return 0; + } + + pr_info("%s bdf %x domain_id %d iommu devid %x iommu segment %d flag %x\n", + __func__, bdf, domain_id, iommu->devid, + iommu->segment, flag); + + val = bdf & 0xffff; + val |= ((domain_id & 0xf) << 16); /* domain id */ + val |= ((index & 0xf) << 24); /* index */ + val |= (0x1 << 20); /* valid */ + iommu_write_regl(iommu, LA_IOMMU_EIVDB, val); + + val = (0x1 << 31) | (0xf << 0); + val |= (0x1 << 29); /* 1: use main memory */ + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); + } else { + /* Flush device table */ + index = dev_data->index; + pr_info("%s bdf %x domain_id %d iommu devid %x iommu segment %d flag %x\n", + __func__, bdf, domain_id, iommu->devid, + iommu->segment, flag); + + val = iommu_read_regl(iommu, LA_IOMMU_EIVDB); + val &= ~(0xffffffff); + val |= ((index & 0xf) << 24); /* index */ + iommu_write_regl(iommu, LA_IOMMU_EIVDB, val); + + val = iommu_read_regl(iommu, LA_IOMMU_PFM_CNT_EN); + val |= (0x1 << 29); /* 1: use main memory */ + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + if (index < MAX_ATTACHED_DEV_ID) + __clear_bit(index, iommu->devtable_bitmap); + } + return 0; +} + +static void flush_iotlb(struct loongarch_iommu *iommu) +{ + u32 val; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + /* Flush all tlb */ + val = iommu_read_regl(iommu, LA_IOMMU_VBTC); + val &= ~0x1f; + val |= 0x5; + iommu_write_regl(iommu, LA_IOMMU_VBTC, val); +} + +static int flush_pgtable_is_busy(struct loongarch_iommu *iommu) +{ + u32 val; + + val = iommu_read_regl(iommu, LA_IOMMU_VBTC); + return val & IOMMU_PGTABLE_BUSY; +} + +static int iommu_flush_iotlb(struct loongarch_iommu *iommu) +{ + u32 retry = 0; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return 0; + } + + flush_iotlb(iommu); + while (flush_pgtable_is_busy(iommu)) { + if (retry == LOOP_TIMEOUT) { + pr_err("LA-IOMMU: iotlb flush busy\n"); + return -EIO; + } + retry++; + udelay(1); + } + iommu_translate_enable(iommu); + return 0; +} + +static void la_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct dom_info *priv = to_dom_info(domain); + struct iommu_info *info; + + spin_lock(&priv->lock); + list_for_each_entry(info, &priv->iommu_devlist, list) + iommu_flush_iotlb(info->iommu); + spin_unlock(&priv->lock); +} + +static void do_attach(struct iommu_info *info, struct la_iommu_dev_data *dev_data) +{ + if (dev_data->count) + return; + + dev_data->count++; + dev_data->iommu_entry = info; + + spin_lock(&info->devlock); + list_add(&dev_data->list, &info->dev_list); + info->dev_cnt += 1; + spin_unlock(&info->devlock); + + update_dev_table(dev_data, 1); + if (info->dev_cnt > 0) + iommu_flush_iotlb(dev_data->iommu); +} + +static void do_detach(struct la_iommu_dev_data *dev_data) +{ + struct iommu_info *info; + + if (!dev_data || !dev_data->iommu_entry || (dev_data->count == 0)) { + pr_err("%s dev_data or iommu_entry is NULL", __func__); + return; + } + dev_data->count--; + info = dev_data->iommu_entry; + list_del(&dev_data->list); + info->dev_cnt -= 1; + update_dev_table(dev_data, 0); + dev_data->iommu_entry = NULL; +} + +static void detach_all_dev_by_domain(struct iommu_info *info) +{ + struct la_iommu_dev_data *dev_data = NULL; + + spin_lock(&info->devlock); + while (!list_empty(&info->dev_list)) { + dev_data = list_first_entry(&info->dev_list, + struct la_iommu_dev_data, list); + do_detach(dev_data); + } + spin_unlock(&info->devlock); +} + +static int domain_id_alloc(struct loongarch_iommu *iommu) +{ + int id = -1; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return id; + } + spin_lock(&iommu->domain_bitmap_lock); + id = find_first_zero_bit(iommu->domain_bitmap, MAX_DOMAIN_ID); + if (id < MAX_DOMAIN_ID) + __set_bit(id, iommu->domain_bitmap); + spin_unlock(&iommu->domain_bitmap_lock); + if (id >= MAX_DOMAIN_ID) + pr_err("LA-IOMMU: Alloc domain id over max domain id\n"); + return id; +} + +static void domain_id_free(struct loongarch_iommu *iommu, int id) +{ + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + if ((id >= 0) && (id < MAX_DOMAIN_ID)) { + spin_lock(&iommu->domain_bitmap_lock); + __clear_bit(id, iommu->domain_bitmap); + spin_unlock(&iommu->domain_bitmap_lock); + } +} + +/* + * Check whether the system has a priv. + * If yes, it returns 1 and if not, it returns 0 + */ +static int has_dom(struct loongarch_iommu *iommu) +{ + int ret = 0; + + spin_lock(&iommu->dom_info_lock); + while (!list_empty(&iommu->dom_list)) { + ret = 1; + break; + } + spin_unlock(&iommu->dom_info_lock); + return ret; +} + +/* + * This function adds a private domain to the global domain list + */ +static struct dom_entry *find_domain_in_list(struct loongarch_iommu *iommu, struct dom_info *priv) +{ + struct dom_entry *entry, *found = NULL; + + if (priv == NULL) + return found; + spin_lock(&iommu->dom_info_lock); + list_for_each_entry(entry, &iommu->dom_list, list) { + if (entry->domain_info == priv) { + found = entry; + break; + } + } + spin_unlock(&iommu->dom_info_lock); + return found; +} + +static void add_domain_to_list(struct loongarch_iommu *iommu, struct dom_info *priv) +{ + struct dom_entry *entry; + + if (priv == NULL) + return; + entry = find_domain_in_list(iommu, priv); + if (entry != NULL) + return; + entry = kzalloc(sizeof(struct dom_entry), GFP_KERNEL); + entry->domain_info = priv; + spin_lock(&iommu->dom_info_lock); + list_add(&entry->list, &iommu->dom_list); + spin_unlock(&iommu->dom_info_lock); +} + +static void del_domain_from_list(struct loongarch_iommu *iommu, struct dom_info *priv) +{ + struct dom_entry *entry; + + entry = find_domain_in_list(iommu, priv); + if (entry == NULL) + return; + spin_lock(&iommu->dom_info_lock); + list_del(&entry->list); + spin_unlock(&iommu->dom_info_lock); + kfree(entry); +} + +static void free_pagetable(void *pt_base, int level) +{ + int i; + unsigned long *ptep, *pgtable; + + ptep = pt_base; + if (level == IOMMU_PT_LEVEL1) { + kfree(pt_base); + return; + } + for (i = 0; i < IOMMU_PTRS_PER_LEVEL; i++, ptep++) { + if (!iommu_pte_present(ptep)) + continue; + + if (((level - 1) == IOMMU_PT_LEVEL1) && iommu_pte_huge(ptep)) { + *ptep = 0; + continue; + } + + pgtable = phys_to_virt(*ptep & IOMMU_PAGE_MASK); + free_pagetable(pgtable, level - 1); + } + kfree(pt_base); +} + +static void iommu_free_pagetable(struct dom_info *info) +{ + free_pagetable(info->pgd, IOMMU_LEVEL_MAX); + info->pgd = NULL; +} + +static struct dom_info *alloc_dom_info(void) +{ + struct dom_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + return NULL; + + info->pgd = kzalloc(IOMMU_PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (info->pgd == NULL) { + kfree(info); + return NULL; + } + INIT_LIST_HEAD(&info->iommu_devlist); + spin_lock_init(&info->lock); + mutex_init(&info->ptl_lock); + info->domain.geometry.aperture_start = 0; + info->domain.geometry.aperture_end = ~0ULL; + info->domain.geometry.force_aperture = true; + + return info; +} + +static void dom_info_free(struct dom_info *info) +{ + if (info->pgd != NULL) { + kfree(info->pgd); + info->pgd = NULL; + } + kfree(info); +} + +static struct iommu_domain *la_iommu_domain_alloc(unsigned int type) +{ + struct dom_info *info; + + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + info = alloc_dom_info(); + if (info == NULL) + return NULL; + break; + default: + return NULL; + } + return &info->domain; +} + +void domain_deattach_iommu(struct dom_info *priv, struct iommu_info *info) +{ + if ((priv == NULL) || (info == NULL) || + (info->dev_cnt != 0) || (info->iommu == NULL)) { + pr_err("%s invalid parameter", __func__); + return; + } + del_domain_from_list(info->iommu, priv); + domain_id_free(info->iommu, info->id); + spin_lock(&priv->lock); + list_del(&info->list); + spin_unlock(&priv->lock); + kfree(info); +} + +static void la_iommu_domain_free(struct iommu_domain *domain) +{ + struct dom_info *priv; + struct loongarch_iommu *iommu = NULL; + struct iommu_info *info, *tmp; + + priv = to_dom_info(domain); + spin_lock(&priv->lock); + list_for_each_entry_safe(info, tmp, &priv->iommu_devlist, list) { + if (info->dev_cnt > 0) + detach_all_dev_by_domain(info); + iommu = info->iommu; + spin_unlock(&priv->lock); + domain_deattach_iommu(priv, info); + spin_lock(&priv->lock); + iommu_flush_iotlb(iommu); + if (!has_dom(iommu)) + iommu_translate_disable(iommu); + } + spin_unlock(&priv->lock); + mutex_lock(&priv->ptl_lock); + iommu_free_pagetable(priv); + mutex_unlock(&priv->ptl_lock); + dom_info_free(priv); +} + +struct iommu_rlookup_entry *lookup_rlooptable(int pcisegment) +{ + struct iommu_rlookup_entry *rlookupentry = NULL; + + list_for_each_entry(rlookupentry, &la_rlookup_iommu_list, list) { + if (rlookupentry->pcisegment == pcisegment) + return rlookupentry; + } + return NULL; +} + +struct loongarch_iommu *find_iommu_by_dev(struct pci_dev *pdev) +{ + int pcisegment; + unsigned short devid; + struct iommu_rlookup_entry *rlookupentry = NULL; + struct loongarch_iommu *iommu = NULL; + struct pci_bus *bus = pdev->bus; + + devid = PCI_DEVID(bus->number, pdev->devfn); + pcisegment = pci_domain_nr(bus); + rlookupentry = lookup_rlooptable(pcisegment); + if (rlookupentry == NULL) { + pr_info("%s find segment %d rlookupentry failed\n", __func__, + pcisegment); + return iommu; + } + iommu = rlookupentry->rlookup_table[devid]; + if (iommu && (!iommu->confbase)) + iommu = NULL; + return iommu; +} + +struct iommu_device *iommu_init_device(struct device *dev) +{ + struct la_iommu_dev_data *dev_data; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; + unsigned short devid; + struct loongarch_iommu *iommu = NULL; + struct iommu_device *iommu_dev = ERR_PTR(-ENODEV); + + if (!dev_is_pci(dev)) + return iommu_dev; + + if (dev->archdata.iommu != NULL || bus == NULL) { + pr_info("LA-IOMMU: bdf:0x%x has added\n", pdev->devfn); + return iommu_dev; + } + iommu = find_iommu_by_dev(pdev); + if (iommu == NULL) { + pci_info(pdev, "%s find iommu failed by dev\n", __func__); + return iommu_dev; + } + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return iommu_dev; + devid = PCI_DEVID(bus->number, pdev->devfn); + dev_data->bdf = devid; + + pci_info(pdev, "%s bdf %#x iommu dev id %#x\n", __func__, dev_data->bdf, iommu->devid); + /* The initial state is 0, and 1 is added only when attach dev */ + dev_data->count = 0; + dev_data->iommu = iommu; + dev_data->dev = dev; + dev->archdata.iommu = dev_data; + iommu_dev = &iommu->iommu_dev; + return iommu_dev; +} + +struct iommu_device *la_iommu_probe_device(struct device *dev) +{ + return iommu_init_device(dev); +} + +static struct iommu_group *la_iommu_device_group(struct device *dev) +{ + struct iommu_group *group; + + /* + * We don't support devices sharing stream IDs other than PCI RID + * aliases, since the necessary ID-to-device lookup becomes rather + * impractical given a potential sparse 32-bit stream ID space. + */ + if (dev_is_pci(dev)) + group = pci_device_group(dev); + else + group = generic_device_group(dev); + return group; +} + +static void la_iommu_remove_device(struct device *dev) +{ + struct la_iommu_dev_data *dev_data; + + iommu_group_remove_device(dev); + dev_data = dev->archdata.iommu; + dev->archdata.iommu = NULL; + kfree(dev_data); +} + +struct iommu_info *get_iommu_info_from_dom(struct dom_info *priv, struct loongarch_iommu *iommu) +{ + struct iommu_info *info; + + spin_lock(&priv->lock); + list_for_each_entry(info, &priv->iommu_devlist, list) { + if (info->iommu == iommu) { + spin_unlock(&priv->lock); + return info; + } + } + spin_unlock(&priv->lock); + return NULL; +} + +struct iommu_info *domain_attach_iommu(struct dom_info *priv, struct loongarch_iommu *iommu) +{ + u32 dir_ctrl; + struct iommu_info *info; + unsigned long phys; + + info = get_iommu_info_from_dom(priv, iommu); + if (info) + return info; + + info = kzalloc(sizeof(struct iommu_info), GFP_KERNEL_ACCOUNT); + if (!info) + return NULL; + + INIT_LIST_HEAD(&info->dev_list); + info->iommu = iommu; + info->id = domain_id_alloc(iommu); + if (info->id == -1) { + pr_info("%s alloc id for domain failed\n", __func__); + kfree(info); + return NULL; + } + + phys = virt_to_phys(priv->pgd); + dir_ctrl = (IOMMU_LEVEL_STRIDE << 26) | (IOMMU_LEVEL_SHIFT(2) << 20); + dir_ctrl |= (IOMMU_LEVEL_STRIDE << 16) | (IOMMU_LEVEL_SHIFT(1) << 10); + dir_ctrl |= (IOMMU_LEVEL_STRIDE << 6) | IOMMU_LEVEL_SHIFT(0); + iommu_write_regl(iommu, LA_IOMMU_DIR_CTRL(info->id), dir_ctrl); + iommu_write_regl(iommu, LA_IOMMU_PGD_HI(info->id), phys >> 32); + iommu_write_regl(iommu, LA_IOMMU_PGD_LO(info->id), phys & UINT_MAX); + + spin_lock(&priv->lock); + list_add(&info->list, &priv->iommu_devlist); + spin_unlock(&priv->lock); + add_domain_to_list(iommu, priv); + return info; +} + +static struct la_iommu_dev_data *get_devdata_from_iommu_info(struct dom_info *info, + struct loongarch_iommu *iommu, unsigned long bdf) +{ + struct iommu_info *entry; + struct la_iommu_dev_data *dev_data, *found = NULL; + + entry = get_iommu_info_from_dom(info, iommu); + if (!entry) + return found; + spin_lock(&entry->devlock); + list_for_each_entry(dev_data, &entry->dev_list, list) { + if (dev_data->bdf == bdf) { + found = dev_data; + break; + } + } + spin_unlock(&entry->devlock); + return found; +} +static void la_iommu_detach_dev(struct device *dev); + +static int la_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct dom_info *priv = to_dom_info(domain); + struct pci_dev *pdev = to_pci_dev(dev); + unsigned char busnum = pdev->bus->number; + struct la_iommu_dev_data *dev_data; + struct loongarch_iommu *iommu; + struct iommu_info *info; + unsigned short bdf; + + la_iommu_detach_dev(dev); + + if (domain == NULL) + return 0; + + bdf = PCI_DEVID(busnum, pdev->devfn); + dev_data = (struct la_iommu_dev_data *)dev->archdata.iommu; + if (dev_data == NULL) { + pci_info(pdev, "%s dev_data is Invalid\n", __func__); + return 0; + } + + iommu = dev_data->iommu; + if (iommu == NULL) { + pci_info(pdev, "%s iommu is Invalid\n", __func__); + return 0; + } + + pci_info(pdev, "%s bdf %#x priv %lx iommu devid %#x\n", __func__, + bdf, (unsigned long)priv, iommu->devid); + dev_data = get_devdata_from_iommu_info(priv, iommu, bdf); + if (dev_data) { + pci_info(pdev, "LA-IOMMU: bdf 0x%x devfn %x has attached, count:0x%x\n", + bdf, pdev->devfn, dev_data->count); + return 0; + } + dev_data = (struct la_iommu_dev_data *)dev->archdata.iommu; + + info = domain_attach_iommu(priv, iommu); + if (!info) { + pci_info(pdev, "domain attach iommu failed\n"); + return 0; + } + dev_data->domain = domain; + do_attach(info, dev_data); + return 0; +} + +static void la_iommu_detach_dev(struct device *dev) +{ + struct iommu_domain *domain; + struct dom_info *priv; + struct pci_dev *pdev = to_pci_dev(dev); + unsigned char busnum = pdev->bus->number; + struct la_iommu_dev_data *dev_data; + struct loongarch_iommu *iommu; + struct iommu_info *iommu_entry = NULL; + unsigned short bdf; + + bdf = PCI_DEVID(busnum, pdev->devfn); + dev_data = (struct la_iommu_dev_data *)dev->archdata.iommu; + if (dev_data == NULL) { + pci_info(pdev, "%s dev_data is Invalid\n", __func__); + return; + } + + domain = dev_data->domain; + if (domain == NULL) + return; + + priv = to_dom_info(domain); + iommu = dev_data->iommu; + if (iommu == NULL) { + pci_info(pdev, "%s iommu is Invalid\n", __func__); + return; + } + dev_data = get_devdata_from_iommu_info(priv, iommu, bdf); + if (dev_data == NULL) { + pci_info(pdev, "%s bdf %#x hasn't attached\n", + __func__, bdf); + return; + } + + iommu = dev_data->iommu; + dev_data->dev = NULL; + iommu_entry = get_iommu_info_from_dom(priv, iommu); + if (iommu_entry == NULL) { + pci_info(pdev, "%s get iommu_entry failed\n", __func__); + return; + } + + spin_lock(&iommu_entry->devlock); + do_detach(dev_data); + spin_unlock(&iommu_entry->devlock); + + pci_info(pdev, "%s iommu devid %x sigment %x\n", __func__, + iommu->devid, iommu->segment); +} + +static unsigned long *iommu_get_pte(void *pt_base, unsigned long vaddr, int level) +{ + int i; + unsigned long *ptep, *pgtable; + + if (level > (IOMMU_LEVEL_MAX - 1)) + return NULL; + pgtable = pt_base; + for (i = IOMMU_LEVEL_MAX - 1; i >= level; i--) { + ptep = iommu_pte_offset(pgtable, vaddr, i); + if (!iommu_pte_present(ptep)) + break; + if (iommu_pte_huge(ptep)) + break; + pgtable = phys_to_virt(*ptep & IOMMU_PAGE_MASK); + } + return ptep; +} + +static int iommu_get_page_table(unsigned long *ptep) +{ + void *addr; + unsigned long pte; + + if (!iommu_pte_present(ptep)) { + addr = kzalloc(IOMMU_PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!addr) + return -ENOMEM; + pte = virt_to_phys(addr) & IOMMU_PAGE_MASK; + pte |= IOMMU_PTE_RW; + *ptep = pte; + } + return 0; +} + +static size_t iommu_page_map(void *pt_base, + unsigned long start, unsigned long end, + phys_addr_t paddr, int level) +{ + unsigned long next, old, step; + unsigned long pte, *ptep, *pgtable; + int ret, huge; + + old = start; + ptep = iommu_pte_offset(pt_base, start, level); + if (level == IOMMU_PT_LEVEL0) { + paddr = paddr & IOMMU_PAGE_MASK; + do { + pte = paddr | IOMMU_PTE_RW; + *ptep = pte; + ptep++; + start += IOMMU_PAGE_SIZE; + paddr += IOMMU_PAGE_SIZE; + } while (start < end); + + return start - old; + } + + do { + next = iommu_ptable_end(start, end, level); + step = next - start; + huge = 0; + if ((level == IOMMU_PT_LEVEL1) && (step == IOMMU_HPAGE_SIZE)) + if (!iommu_pte_present(ptep) || iommu_pte_huge(ptep)) + huge = 1; + + if (huge) { + pte = (paddr & IOMMU_HPAGE_MASK) | + IOMMU_PTE_RW | IOMMU_PTE_HP; + *ptep = pte; + } else { + ret = iommu_get_page_table(ptep); + if (ret != 0) + break; + pgtable = phys_to_virt(*ptep & IOMMU_PAGE_MASK); + iommu_page_map(pgtable, start, next, paddr, level - 1); + } + + ptep++; + paddr += step; + start = next; + } while (start < end); + return start - old; +} + +static int domain_map_page(struct dom_info *priv, unsigned long start, + phys_addr_t paddr, size_t size) +{ + int ret = 0; + phys_addr_t end; + size_t map_size; + + end = start + size; + mutex_lock(&priv->ptl_lock); + map_size = iommu_page_map(priv->pgd, start, + end, paddr, IOMMU_LEVEL_MAX - 1); + if (map_size != size) + ret = -EFAULT; + mutex_unlock(&priv->ptl_lock); + la_iommu_flush_iotlb_all(&priv->domain); + return ret; +} + +static size_t iommu_page_unmap(void *pt_base, + unsigned long start, unsigned long end, int level) +{ + unsigned long next, old; + unsigned long *ptep, *pgtable; + + old = start; + ptep = iommu_pte_offset(pt_base, start, level); + if (level == IOMMU_PT_LEVEL0) { + do { + *ptep++ = 0; + start += IOMMU_PAGE_SIZE; + } while (start < end); + } else { + do { + next = iommu_ptable_end(start, end, level); + if (!iommu_pte_present(ptep)) + continue; + + if (iommu_pte_huge(ptep)) { + if ((next - start) != IOMMU_HPAGE_SIZE) + pr_err( + "Map pte on hugepage not supported now\n"); + *ptep = 0; + } else { + pgtable = phys_to_virt(*ptep & IOMMU_PAGE_MASK); + iommu_page_unmap(pgtable, start, + next, level - 1); + } + } while (ptep++, start = next, start < end); + } + return start - old; +} + +static size_t domain_unmap_page(struct dom_info *priv, + unsigned long start, size_t size) +{ + size_t unmap_len; + unsigned long end; + + end = start + size; + mutex_lock(&priv->ptl_lock); + unmap_len = iommu_page_unmap(priv->pgd, start, + end, (IOMMU_LEVEL_MAX - 1)); + mutex_unlock(&priv->ptl_lock); + la_iommu_flush_iotlb_all(&priv->domain); + return unmap_len; +} + +static int la_iommu_map(struct iommu_domain *domain, unsigned long vaddr, + phys_addr_t paddr, size_t len, int prot, gfp_t gfp) +{ + int ret; + struct dom_info *priv = to_dom_info(domain); + + ret = domain_map_page(priv, vaddr, paddr, len); + return ret; +} + +static size_t la_iommu_unmap(struct iommu_domain *domain, unsigned long vaddr, + size_t len, struct iommu_iotlb_gather *iotlb_gather) +{ + struct dom_info *priv = to_dom_info(domain); + + return domain_unmap_page(priv, vaddr, len); +} + +static phys_addr_t _iommu_iova_to_phys(struct dom_info *info, dma_addr_t vaddr) +{ + unsigned long *ptep; + unsigned long page_size, page_mask; + phys_addr_t paddr; + + mutex_lock(&info->ptl_lock); + ptep = iommu_get_pte(info->pgd, vaddr, IOMMU_PT_LEVEL0); + mutex_unlock(&info->ptl_lock); + + if (!ptep || !iommu_pte_present(ptep)) { + pr_warn_once( + "LA-IOMMU: shadow pte is null or not present with vaddr %llx\n", + vaddr); + paddr = 0; + return paddr; + } + + if (iommu_pte_huge(ptep)) { + page_size = IOMMU_HPAGE_SIZE; + page_mask = IOMMU_HPAGE_MASK; + } else { + page_size = IOMMU_PAGE_SIZE; + page_mask = IOMMU_PAGE_MASK; + } + paddr = *ptep & page_mask; + paddr |= vaddr & (page_size - 1); + return paddr; +} + +static phys_addr_t la_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t vaddr) +{ + struct dom_info *priv = to_dom_info(domain); + phys_addr_t phys; + + spin_lock(&priv->lock); + phys = _iommu_iova_to_phys(priv, vaddr); + spin_unlock(&priv->lock); + return phys; +} + +static void la_domain_set_plaform_dma_ops(struct device *dev) +{ + /* + * loongarch doesn't setup default domains because we can't hook into the + * normal probe path + */ +} + +const struct iommu_ops la_iommu_ops = { + .capable = la_iommu_capable, + .domain_alloc = la_iommu_domain_alloc, + .probe_device = la_iommu_probe_device, + .release_device = la_iommu_remove_device, + .device_group = la_iommu_device_group, + .pgsize_bitmap = LA_IOMMU_PGSIZE, + .owner = THIS_MODULE, + .set_platform_dma_ops = la_domain_set_plaform_dma_ops, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = la_iommu_attach_dev, + .map = la_iommu_map, + .unmap = la_iommu_unmap, + .iova_to_phys = la_iommu_iova_to_phys, + .flush_iotlb_all = la_iommu_flush_iotlb_all, + .free = la_iommu_domain_free, + } +}; + + +struct loongarch_iommu *loongarch_get_iommu_by_devid(struct pci_dev *pdev) +{ + int pcisegment; + unsigned short devid; + struct loongarch_iommu *iommu = NULL; + struct pci_bus *bus = pdev->bus; + + devid = PCI_DEVID(bus->number, pdev->devfn); + pcisegment = pci_domain_nr(pdev->bus); + list_for_each_entry(iommu, &la_iommu_list, list) { + if ((iommu->segment == pcisegment) && + (iommu->devid == devid)) { + return iommu; + } + } + return NULL; +} + +bool check_device_compat(struct pci_dev *pdev) +{ + bool compat = true; + + if ((pdev->revision == 0) && (pdev->device == 0x7a1f)) + compat = false; + return compat; +} + +static int loongarch_iommu_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int ret = 1; + int bitmap_sz = 0; + int tmp; + bool compat = false; + struct loongarch_iommu *iommu = NULL; + resource_size_t base, size; + + iommu = loongarch_get_iommu_by_devid(pdev); + if (iommu == NULL) { + pci_info(pdev, "%s can't find iommu\n", __func__); + return -ENODEV; + } + + compat = check_device_compat(pdev); + if (!compat) { + pci_info(pdev, + "%s The iommu driver is not compatible with this device\n", + __func__); + return -ENODEV; + } + + iommu->pdev = pdev; + base = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + if (!request_mem_region(base, size, "loongarch_iommu")) { + pci_err(pdev, + "%d can't reserve mmio registers base %llx size %llx\n", + __LINE__, base, size); + return -ENOMEM; + } + iommu->confbase_phy = base; + iommu->conf_size = size; + iommu->confbase = ioremap(base, size); + if (iommu->confbase == NULL) { + pci_info(pdev, "%s iommu pci dev bar0 is NULL\n", __func__); + return ret; + } + + pr_info("iommu confbase %llx pgtsize %llx\n", + (u64)iommu->confbase, size); + tmp = MAX_DOMAIN_ID / 8; + bitmap_sz = (MAX_DOMAIN_ID % 8) ? (tmp + 1) : tmp; + iommu->domain_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (iommu->domain_bitmap == NULL) { + pr_err("LA-IOMMU: domain bitmap alloc err bitmap_sz:%d\n", + bitmap_sz); + goto out_err; + } + + tmp = MAX_ATTACHED_DEV_ID / 8; + bitmap_sz = (MAX_ATTACHED_DEV_ID % 8) ? (tmp + 1) : tmp; + iommu->devtable_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (iommu->devtable_bitmap == NULL) { + pr_err("LA-IOMMU: devtable bitmap alloc err bitmap_sz:%d\n", + bitmap_sz); + goto out_err_1; + } + + ret = iommu_device_sysfs_add(&iommu->iommu_dev, &pdev->dev, + NULL, "ivhd-%#x", iommu->devid); + iommu_device_register(&iommu->iommu_dev, &la_iommu_ops, NULL); + return 0; + +out_err_1: + iommu->pdev = NULL; + iounmap(iommu->confbase); + iommu->confbase = NULL; + release_mem_region(iommu->confbase_phy, iommu->conf_size); + iommu->confbase_phy = 0; + iommu->conf_size = 0; + kfree(iommu->domain_bitmap); + iommu->domain_bitmap = NULL; +out_err: + return ret; +} + +static void loongarch_iommu_remove(struct pci_dev *pdev) +{ + struct loongarch_iommu *iommu = NULL; + + iommu = loongarch_get_iommu_by_devid(pdev); + if (iommu == NULL) + return; + if (iommu->domain_bitmap != NULL) { + kfree(iommu->domain_bitmap); + iommu->domain_bitmap = NULL; + } + if (iommu->devtable_bitmap != NULL) { + kfree(iommu->devtable_bitmap); + iommu->devtable_bitmap = NULL; + } + if (iommu->confbase != NULL) { + iounmap(iommu->confbase); + iommu->confbase = NULL; + } + if (iommu->confbase_phy != 0) { + release_mem_region(iommu->confbase_phy, iommu->conf_size); + iommu->confbase_phy = 0; + iommu->conf_size = 0; + } +} + +static int __init check_ivrs_checksum(struct acpi_table_header *table) +{ + int i; + u8 checksum = 0, *p = (u8 *)table; + + for (i = 0; i < table->length; ++i) + checksum += p[i]; + if (checksum != 0) { + /* ACPI table corrupt */ + pr_err("IVRS invalid checksum\n"); + return -ENODEV; + } + return 0; +} + +struct iommu_rlookup_entry *create_rlookup_entry(int pcisegment) +{ + struct iommu_rlookup_entry *rlookupentry = NULL; + + rlookupentry = kzalloc(sizeof(struct iommu_rlookup_entry), + GFP_KERNEL); + if (rlookupentry == NULL) + return rlookupentry; + + rlookupentry->pcisegment = pcisegment; + /* IOMMU rlookup table - find the IOMMU for a specific device */ + rlookupentry->rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (rlookupentry->rlookup_table == NULL) { + kfree(rlookupentry); + rlookupentry = NULL; + } else { + list_add(&rlookupentry->list, &la_rlookup_iommu_list); + } + return rlookupentry; +} + +/* Writes the specific IOMMU for a device into the rlookup table */ +static void __init set_iommu_for_device(struct loongarch_iommu *iommu, + u16 devid) +{ + struct iommu_rlookup_entry *rlookupentry = NULL; + + rlookupentry = lookup_rlooptable(iommu->segment); + if (rlookupentry == NULL) + rlookupentry = create_rlookup_entry(iommu->segment); + if (rlookupentry != NULL) + rlookupentry->rlookup_table[devid] = iommu; +} + +static inline u32 get_ivhd_header_size(struct ivhd_header *h) +{ + u32 size = 0; + + switch (h->type) { + case IVHD_HEAD_TYPE10: + size = 24; + break; + case IVHD_HEAD_TYPE11: + case IVHD_HEAD_TYPE40: + size = 40; + break; + } + return size; +} + +static inline void update_last_devid(u16 devid) +{ + if (devid > la_iommu_last_bdf) + la_iommu_last_bdf = devid; +} + +/* + * This function calculates the length of a given IVHD entry + */ +static inline int ivhd_entry_length(u8 *ivhd) +{ + u32 type = ((struct ivhd_entry *)ivhd)->type; + + if (type < 0x80) { + return 0x04 << (*ivhd >> 6); + } else if (type == IVHD_DEV_ACPI_HID) { + /* For ACPI_HID, offset 21 is uid len */ + return *((u8 *)ivhd + 21) + 22; + } + return 0; +} + +/* + * After reading the highest device id from the IOMMU PCI capability header + * this function looks if there is a higher device id defined in the ACPI table + */ +static int __init find_last_devid_from_ivhd(struct ivhd_header *h) +{ + u8 *p = (void *)h, *end = (void *)h; + struct ivhd_entry *dev; + + u32 ivhd_size = get_ivhd_header_size(h); + + if (!ivhd_size) { + pr_err("la-iommu: Unsupported IVHD type %#x\n", h->type); + return -EINVAL; + } + + p += ivhd_size; + end += h->length; + + while (p < end) { + dev = (struct ivhd_entry *)p; + switch (dev->type) { + case IVHD_DEV_ALL: + /* Use maximum BDF value for DEV_ALL */ + update_last_devid(MAX_BDF_NUM); + break; + case IVHD_DEV_SELECT: + case IVHD_DEV_RANGE_END: + case IVHD_DEV_ALIAS: + case IVHD_DEV_EXT_SELECT: + /* all the above subfield types refer to device ids */ + update_last_devid(dev->devid); + break; + default: + break; + } + p += ivhd_entry_length(p); + } + + WARN_ON(p != end); + + return 0; +} + +/* + * Iterate over all IVHD entries in the ACPI table and find the highest device + * id which we need to handle. This is the first of three functions which parse + * the ACPI table. So we check the checksum here. + */ +static int __init find_last_devid_acpi(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + + p += IVRS_HEADER_LENGTH; + + end += table->length; + while (p < end) { + h = (struct ivhd_header *)p; + if (h->type == la_iommu_target_ivhd_type) { + int ret = find_last_devid_from_ivhd(h); + + if (ret) + return ret; + } + + if (h->length == 0) + break; + + p += h->length; + } + + if (p != end) + return -EINVAL; + return 0; +} + +/* + * Takes a pointer to an loongarch IOMMU entry in the ACPI table and + * initializes the hardware and our data structures with it. + */ +static int __init init_iommu_from_acpi(struct loongarch_iommu *iommu, + struct ivhd_header *h) +{ + u8 *p = (u8 *)h; + u8 *end = p; + u16 devid = 0, devid_start = 0; + u32 dev_i; + struct ivhd_entry *e; + u32 ivhd_size; + + /* + * Done. Now parse the device entries + */ + ivhd_size = get_ivhd_header_size(h); + if (!ivhd_size) { + pr_err("loongarch iommu: Unsupported IVHD type %#x\n", h->type); + return -EINVAL; + } + + if (h->length == 0) + return -EINVAL; + + p += ivhd_size; + end += h->length; + + while (p < end) { + e = (struct ivhd_entry *)p; + switch (e->type) { + case IVHD_DEV_ALL: + for (dev_i = 0; dev_i <= la_iommu_last_bdf; ++dev_i) + set_iommu_for_device(iommu, dev_i); + break; + case IVHD_DEV_SELECT: + + pr_info(" DEV_SELECT\t\t\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + + devid = e->devid; + set_iommu_for_device(iommu, devid); + break; + case IVHD_DEV_SELECT_RANGE_START: + + pr_info(" DEV_SELECT_RANGE_START\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + + devid_start = e->devid; + break; + case IVHD_DEV_RANGE_END: + + pr_info(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + + devid = e->devid; + for (dev_i = devid_start; dev_i <= devid; ++dev_i) + set_iommu_for_device(iommu, dev_i); + break; + default: + break; + } + + p += ivhd_entry_length(p); + } + + return 0; +} + +/* + * This function clues the initialization function for one IOMMU + * together and also allocates the command buffer and programs the + * hardware. It does NOT enable the IOMMU. This is done afterwards. + */ +static int __init init_iommu_one(struct loongarch_iommu *iommu, + struct ivhd_header *h) +{ + int ret; + struct iommu_rlookup_entry *rlookupentry = NULL; + + spin_lock_init(&iommu->domain_bitmap_lock); + spin_lock_init(&iommu->dom_info_lock); + + /* Add IOMMU to internal data structures */ + INIT_LIST_HEAD(&iommu->dom_list); + + list_add_tail(&iommu->list, &la_iommu_list); + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->devid = h->devid; + iommu->segment = h->pci_seg; + ret = init_iommu_from_acpi(iommu, h); + if (ret) { + pr_err("%s init iommu from acpi failed\n", __func__); + return ret; + } + rlookupentry = lookup_rlooptable(iommu->segment); + if (rlookupentry != NULL) { + /* + * Make sure IOMMU is not considered to translate itself. + * The IVRS table tells us so, but this is a lie! + */ + rlookupentry->rlookup_table[iommu->devid] = NULL; + } + return 0; +} + +/* + * Iterates over all IOMMU entries in the ACPI table, allocates the + * IOMMU structure and initializes it with init_iommu_one() + */ +static int __init init_iommu_all(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + struct loongarch_iommu *iommu; + int ret; + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + h = (struct ivhd_header *)p; + + if (h->length == 0) + break; + + if (*p == la_iommu_target_ivhd_type) { + + pr_info("device: %02x:%02x.%01x seg: %d\n", + PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid), + PCI_FUNC(h->devid), h->pci_seg); + + iommu = kzalloc(sizeof(struct loongarch_iommu), + GFP_KERNEL); + if (iommu == NULL) + return -ENOMEM; + + ret = init_iommu_one(iommu, h); + if (ret) { + kfree(iommu); + pr_info("%s init iommu failed\n", __func__); + return ret; + } + } + p += h->length; + } + if (p != end) + return -EINVAL; + return 0; +} + +/** + * get_highest_supported_ivhd_type - Look up the appropriate IVHD type + * @ivrs Pointer to the IVRS header + * + * This function search through all IVDB of the maximum supported IVHD + */ +static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) +{ + u8 *base = (u8 *)ivrs; + struct ivhd_header *ivhd = (struct ivhd_header *) + (base + IVRS_HEADER_LENGTH); + u8 last_type = ivhd->type; + u16 devid = ivhd->devid; + + while (((u8 *)ivhd - base < ivrs->length) && + (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED) && + (ivhd->length > 0)) { + u8 *p = (u8 *) ivhd; + + if (ivhd->devid == devid) + last_type = ivhd->type; + ivhd = (struct ivhd_header *)(p + ivhd->length); + } + return last_type; +} + +static inline unsigned long tbl_size(int entry_size) +{ + unsigned int shift = PAGE_SHIFT + + get_order(((int)la_iommu_last_bdf + 1) * entry_size); + + return 1UL << shift; +} + +static int __init loongarch_iommu_ivrs_init(void) +{ + struct acpi_table_header *ivrs_base; + acpi_status status; + int ret = 0; + + status = acpi_get_table("IVRS", 0, &ivrs_base); + if (status == AE_NOT_FOUND) { + pr_info("%s get ivrs table failed\n", __func__); + return -ENODEV; + } + + /* + * Validate checksum here so we don't need to do it when + * we actually parse the table + */ + ret = check_ivrs_checksum(ivrs_base); + if (ret) + goto out; + + la_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); + pr_info("Using IVHD type %#x\n", la_iommu_target_ivhd_type); + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func + * we need to handle. Upon this information the shared data + * structures for the IOMMUs in the system will be allocated + */ + ret = find_last_devid_acpi(ivrs_base); + if (ret) { + pr_err("%s find last devid failed\n", __func__); + goto out; + } + + rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); + + /* + * now the data structures are allocated and basically initialized + * start the real acpi table scan + */ + ret = init_iommu_all(ivrs_base); +out: + /* Don't leak any ACPI memory */ + acpi_put_table(ivrs_base); + ivrs_base = NULL; + return ret; +} + +static void free_iommu_rlookup_entry(void) +{ + struct loongarch_iommu *iommu = NULL; + struct iommu_rlookup_entry *rlookupentry = NULL; + + while (!list_empty(&la_iommu_list)) { + iommu = list_first_entry(&la_iommu_list, struct loongarch_iommu, list); + list_del(&iommu->list); + kfree(iommu); + } + + while (!list_empty(&la_rlookup_iommu_list)) { + rlookupentry = list_first_entry(&la_rlookup_iommu_list, + struct iommu_rlookup_entry, list); + + list_del(&rlookupentry->list); + if (rlookupentry->rlookup_table != NULL) { + free_pages( + (unsigned long)rlookupentry->rlookup_table, + get_order(rlookup_table_size)); + + rlookupentry->rlookup_table = NULL; + } + kfree(rlookupentry); + } +} + +static int __init la_iommu_setup(char *str) +{ + if (!str) + return -EINVAL; + while (*str) { + if (!strncmp(str, "on", 2)) { + loongarch_iommu_disable = 0; + pr_info("IOMMU enabled\n"); + } else if (!strncmp(str, "off", 3)) { + loongarch_iommu_disable = 1; + pr_info("IOMMU disabled\n"); + } + str += strcspn(str, ","); + while (*str == ',') + str++; + } + return 0; +} +__setup("loongarch_iommu=", la_iommu_setup); + +static const struct pci_device_id loongson_iommu_pci_tbl[] = { + { PCI_DEVICE(0x14, 0x3c0f) }, + { PCI_DEVICE(0x14, 0x7a1f) }, + { 0, } +}; + +static struct pci_driver loongarch_iommu_driver = { + .name = "loongarch-iommu", + .id_table = loongson_iommu_pci_tbl, + .probe = loongarch_iommu_probe, + .remove = loongarch_iommu_remove, +}; + +static int __init loongarch_iommu_driver_init(void) +{ + int ret = 0; + + if (loongarch_iommu_disable == 0) { + ret = loongarch_iommu_ivrs_init(); + if (ret != 0) { + free_iommu_rlookup_entry(); + pr_err("Failed to init iommu by ivrs\n"); + } + + ret = pci_register_driver(&loongarch_iommu_driver); + if (ret != 0) { + pr_err("Failed to register IOMMU driver\n"); + return ret; + } + } + return ret; +} + +static void __exit loongarch_iommu_driver_exit(void) +{ + struct loongarch_iommu *iommu = NULL; + + if (loongarch_iommu_disable == 0) { + list_for_each_entry(iommu, &la_iommu_list, list) { + iommu_device_sysfs_remove(&iommu->iommu_dev); + iommu_device_unregister(&iommu->iommu_dev); + loongarch_iommu_remove(iommu->pdev); + } + free_iommu_rlookup_entry(); + pci_unregister_driver(&loongarch_iommu_driver); + } +} + +module_init(loongarch_iommu_driver_init); +module_exit(loongarch_iommu_driver_exit); diff --git a/drivers/iommu/loongarch_iommu.h b/drivers/iommu/loongarch_iommu.h new file mode 100644 index 000000000000..cf5640d95900 --- /dev/null +++ b/drivers/iommu/loongarch_iommu.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Loongson IOMMU Driver + * + * Copyright (C) 2020-2021 Loongson Technology Ltd. + * Author: Lv Chen + * Wang Yang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef LOONGARCH_IOMMU_H +#define LOONGARCH_IOMMU_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IOVA_WIDTH 47 + +/* Bit value definition for I/O PTE fields */ +#define IOMMU_PTE_PR (1ULL << 0) /* Present */ +#define IOMMU_PTE_HP (1ULL << 1) /* HugePage */ +#define IOMMU_PTE_IR (1ULL << 2) /* Readable */ +#define IOMMU_PTE_IW (1ULL << 3) /* Writeable */ +#define IOMMU_PTE_RW (IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW) + +#define iommu_pte_present(ptep) ((*ptep != 0)) +#define iommu_pte_huge(ptep) ((*ptep) & IOMMU_PTE_HP) + +#define LA_IOMMU_PGSIZE (SZ_16K | SZ_32M) + +#define IOMMU_PT_LEVEL0 0x00 +#define IOMMU_PT_LEVEL1 0x01 + +/* IOMMU page table */ +#define IOMMU_PAGE_SHIFT PAGE_SHIFT +#define IOMMU_PAGE_SIZE (_AC(1, UL) << IOMMU_PAGE_SHIFT) +#define IOMMU_LEVEL_STRIDE (IOMMU_PAGE_SHIFT - 3) +#define IOMMU_PTRS_PER_LEVEL (IOMMU_PAGE_SIZE >> 3) +#define IOMMU_LEVEL_SHIFT(n) (((n) * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT) +#define IOMMU_LEVEL_SIZE(n) (_AC(1, UL) << (((n) * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT)) +#define IOMMU_LEVEL_MASK(n) (~(IOMMU_LEVEL_SIZE(n) - 1)) +#define IOMMU_LEVEL_MAX DIV_ROUND_UP((IOVA_WIDTH - IOMMU_PAGE_SHIFT), IOMMU_LEVEL_STRIDE) +#define IOMMU_PAGE_MASK (~(IOMMU_PAGE_SIZE - 1)) + +#define IOMMU_HPAGE_SIZE (1UL << IOMMU_LEVEL_SHIFT(IOMMU_PT_LEVEL1)) +#define IOMMU_HPAGE_MASK (~(IOMMU_HPAGE_SIZE - 1)) + +/* wired | index | domain | shift */ +#define LA_IOMMU_WIDS 0x10 +/* valid | busy | tlbar/aw | cmd */ +#define LA_IOMMU_VBTC 0x14 +#define IOMMU_PGTABLE_BUSY (1 << 16) +/* enable |index | valid | domain | bdf */ +#define LA_IOMMU_EIVDB 0x18 +/* enable | valid | cmd */ +#define LA_IOMMU_CMD 0x1C +#define LA_IOMMU_PGD0_LO 0x20 +#define LA_IOMMU_PGD0_HI 0x24 +#define STEP_PGD 0x8 +#define STEP_PGD_SHIFT 3 +#define LA_IOMMU_PGD_LO(domain_id) \ + (LA_IOMMU_PGD0_LO + ((domain_id) << STEP_PGD_SHIFT)) +#define LA_IOMMU_PGD_HI(domain_id) \ + (LA_IOMMU_PGD0_HI + ((domain_id) << STEP_PGD_SHIFT)) + +#define LA_IOMMU_DIR_CTRL0 0xA0 +#define LA_IOMMU_DIR_CTRL1 0xA4 +#define LA_IOMMU_DIR_CTRL(x) (LA_IOMMU_DIR_CTRL0 + ((x) << 2)) + +#define LA_IOMMU_SAFE_BASE_HI 0xE0 +#define LA_IOMMU_SAFE_BASE_LO 0xE4 +#define LA_IOMMU_EX_ADDR_LO 0xE8 +#define LA_IOMMU_EX_ADDR_HI 0xEC + +#define LA_IOMMU_PFM_CNT_EN 0x100 + +#define LA_IOMMU_RD_HIT_CNT_0 0x110 +#define LA_IOMMU_RD_MISS_CNT_O 0x114 +#define LA_IOMMU_WR_HIT_CNT_0 0x118 +#define LA_IOMMU_WR_MISS_CNT_0 0x11C +#define LA_IOMMU_RD_HIT_CNT_1 0x120 +#define LA_IOMMU_RD_MISS_CNT_1 0x124 +#define LA_IOMMU_WR_HIT_CNT_1 0x128 +#define LA_IOMMU_WR_MISS_CNT_1 0x12C +#define LA_IOMMU_RD_HIT_CNT_2 0x130 +#define LA_IOMMU_RD_MISS_CNT_2 0x134 +#define LA_IOMMU_WR_HIT_CNT_2 0x138 +#define LA_IOMMU_WR_MISS_CNT_2 0x13C + +#define MAX_DOMAIN_ID 16 +#define MAX_ATTACHED_DEV_ID 16 + +#define iommu_ptable_end(addr, end, level) \ +({ unsigned long __boundary = ((addr) + IOMMU_LEVEL_SIZE(level)) & \ + IOMMU_LEVEL_MASK(level); \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + +/* To find an entry in an iommu page table directory */ +#define iommu_page_index(addr, level) \ + (((addr) >> ((level * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT)) \ + & (IOMMU_PTRS_PER_LEVEL - 1)) + +struct loongarch_iommu { + struct list_head list; /* for la_iommu_list */ + spinlock_t domain_bitmap_lock; /* Lock for domain allocing */ + spinlock_t dom_info_lock; /* Lock for dom_list */ + void *domain_bitmap; /* Bitmap of global domains */ + void *devtable_bitmap; /* Bitmap of devtable */ + struct list_head dom_list; /* List of all domain privates */ + /* PCI device id of the IOMMU device */ + u16 devid; + int segment; /* PCI segment# */ + /* iommu configures the register space base address */ + void *confbase; + /* iommu configures the register space physical base address */ + resource_size_t confbase_phy; + /* iommu configures the register space size */ + resource_size_t conf_size; + struct pci_dev *pdev; + /* Handle for IOMMU core code */ + struct iommu_device iommu_dev; +} loongarch_iommu; + +struct iommu_rlookup_entry { + struct list_head list; + struct loongarch_iommu **rlookup_table; + int pcisegment; +}; + +struct iommu_info { + struct list_head list; /* for dom_info->iommu_devlist */ + struct loongarch_iommu *iommu; + spinlock_t devlock; /* priv dev list lock */ + struct list_head dev_list; /* List of all devices in this domain iommu */ + unsigned int dev_cnt; /* devices assigned to this domain iommu */ + short id; +} iommu_info; + +/* One vm is equal to a domain,one domain has a priv */ +struct dom_info { + struct list_head iommu_devlist; + struct iommu_domain domain; + struct mutex ptl_lock; /* Lock for page table */ + void *pgd; + spinlock_t lock; /* Lock for dom_info->iommu_devlist */ +} dom_info; + +struct dom_entry { + struct list_head list; /* for loongarch_iommu->dom_list */ + struct dom_info *domain_info; +} dom_entry; + +/* A device for passthrough */ +struct la_iommu_dev_data { + struct list_head list; /* for iommu_entry->dev_list */ + struct loongarch_iommu *iommu; + struct iommu_info *iommu_entry; + struct iommu_domain *domain; + struct device *dev; + unsigned short bdf; + int count; + int index; /* index in device table */ +}; + +static inline unsigned long *iommu_pte_offset(unsigned long *ptep, unsigned long addr, int level) +{ + return ptep + iommu_page_index(addr, level); +} +#endif /* LOONGARCH_IOMMU_H */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d80ab4e084d8..356791637dab 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4457,6 +4457,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000, quirk_bridge_cavm_thrx2_pcie_root); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084, quirk_bridge_cavm_thrx2_pcie_root); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LOONGSON, 0x3c09, + quirk_bridge_cavm_thrx2_pcie_root); /* * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero) @@ -5177,6 +5179,8 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, + { PCI_VENDOR_ID_LOONGSON, 0x3c09, pci_quirk_xgene_acs}, + { PCI_VENDOR_ID_LOONGSON, 0x3c19, pci_quirk_xgene_acs}, { 0 } }; diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 6bda6dbb4878..f7103ae71202 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -39,7 +39,7 @@ config VFIO_GROUP config VFIO_CONTAINER bool "Support for the VFIO container /dev/vfio/vfio" - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64 || LOONGARCH) depends on VFIO_GROUP default y help -- Gitee