diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5102b9fd8da4300768aba6877dc4a99288975d8e..805636deeaaf9682cb4cc7121b2406a3e7e5da6c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3948,9 +3948,10 @@ vulnerability. System may allow data leaks with this option. - no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized - steal time accounting. steal time is computed, but - won't influence scheduler behaviour + no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,LOONGARCH,EARLY] + Disable paravirtualized steal time accounting. steal time + is computed, but won't influence scheduler behaviour + nosync [HW,M68K] Disables sync negotiation for all devices. diff --git a/MAINTAINERS b/MAINTAINERS index 0c7b717f34aa1714678982f744cb8a4553eba343..4851b22f86726e29c00e1d5d6d5afcbe6a4ecad7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11551,6 +11551,7 @@ L: kvm@vger.kernel.org L: loongarch@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +F: Documentation/virt/kvm/loongarch/ F: arch/loongarch/include/asm/kvm* F: arch/loongarch/include/uapi/asm/kvm* F: arch/loongarch/kvm/ diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index a3b39567442fe57136dcaa07125d00a113aefc70..8c1854ef95616c8e83d4ccfdeee00a3c60e59afa 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -619,6 +619,26 @@ config RANDOMIZE_BASE_MAX_OFFSET This is limited by the size of the lower address memory, 256MB. +config PARAVIRT + bool "Enable paravirtualization code" + depends on AS_HAS_LVZ_EXTENSION + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + endmenu config ARCH_SELECT_MEMORY_MODEL @@ -671,6 +691,17 @@ source "drivers/cpufreq/Kconfig" source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + endmenu source "arch/loongarch/kvm/Kconfig" diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index c65573d4a1802fe542ebf63549b98a9029cc5eb3..8b5976ee228ff7bf7b96e8bda7547b7fcb6ab6fc 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -2200,3 +2200,6 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_STRICT_DEVMEM is not set # CONFIG_RUNTIME_TESTING_MENU is not set +CONFIG_LOONGARCH_IOMMU=m +CONFIG_CMDLINE_EXTEND=y +CONFIG_CMDLINE="vfio_iommu_type1.allow_unsafe_interrupts=1 nokaslr" diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 27f66930ab6a51173c229a89055a4510b0486701..22991a6f0e2b5500890f49b5c1d51848163b58c2 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -4,7 +4,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += user.h diff --git a/arch/loongarch/include/asm/device.h b/arch/loongarch/include/asm/device.h new file mode 100644 index 0000000000000000000000000000000000000000..30cc6b61033545b3c2f8a8b6078e4e1782f7f1b8 --- /dev/null +++ b/arch/loongarch/include/asm/device.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arch specific extensions to struct device + * + * This file is released under the GPLv2 + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_LOONGARCH_DEVICE_H +#define _ASM_LOONGARCH_DEVICE_H + +struct dev_archdata { + /* hook for IOMMU specific extension */ + void *iommu; + struct bus_dma_region *dma_range_map; + /* + * On some old 7A chipset, dma address is different from physical + * address, the main difference is that node id. For dma address + * node id starts from bit 36, physical node id starts from + * bit 44. The remaining address below node id is the same. + */ + unsigned long dma_node_mask; + unsigned int dma_node_off; +}; + +struct pdev_archdata { +}; + +struct dma_domain { + struct list_head node; + const struct dma_map_ops *dma_ops; + int domain_nr; +}; +void add_dma_domain(struct dma_domain *domain); +void del_dma_domain(struct dma_domain *domain); + +#endif /* _ASM_LOONGARCH_DEVICE_H*/ diff --git a/arch/loongarch/include/asm/kvm_csr.h b/arch/loongarch/include/asm/kvm_csr.h index 476c9f620dd52b64a9fb4b98757bf08da5173f78..4a76ce796f1f401eba8ce246fe8fb5892aee07c4 100644 --- a/arch/loongarch/include/asm/kvm_csr.h +++ b/arch/loongarch/include/asm/kvm_csr.h @@ -30,6 +30,7 @@ : [val] "+r" (__v) \ : [reg] "i" (csr) \ : "memory"); \ + __v; \ }) #define gcsr_xchg(v, m, csr) \ @@ -181,6 +182,8 @@ __BUILD_GCSR_OP(tlbidx) #define kvm_save_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_read(gid)) #define kvm_restore_hw_gcsr(csr, gid) (gcsr_write(csr->csrs[gid], gid)) +#define kvm_read_clear_hw_gcsr(csr, gid) (csr->csrs[gid] = gcsr_write(0, gid)) + int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu); static __always_inline unsigned long kvm_read_sw_gcsr(struct loongarch_csrs *csr, int gid) @@ -208,9 +211,7 @@ static __always_inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr, csr->csrs[gid] |= val & _mask; } -#define KVM_PMU_PLV_ENABLE (CSR_PERFCTRL_PLV0 | \ - CSR_PERFCTRL_PLV1 | \ - CSR_PERFCTRL_PLV2 | \ - CSR_PERFCTRL_PLV3) +#define KVM_PMU_EVENT_ENABLED (CSR_PERFCTRL_PLV0 | CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | CSR_PERFCTRL_PLV3) #endif /* __ASM_LOONGARCH_KVM_CSR_H__ */ diff --git a/arch/loongarch/include/asm/kvm_extioi.h b/arch/loongarch/include/asm/kvm_extioi.h new file mode 100644 index 0000000000000000000000000000000000000000..d2af039a7d6f145d45d8ad74ab2cfaf86f4dfd81 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_extioi.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef LOONGARCH_EXTIOI_H +#define LOONGARCH_EXTIOI_H + +#include + +#define EXTIOI_IRQS 256 +#define EXTIOI_ROUTE_MAX_VCPUS 256 +#define EXTIOI_IRQS_U8_NUMS (EXTIOI_IRQS / 8) +#define EXTIOI_IRQS_U32_NUMS (EXTIOI_IRQS_U8_NUMS / 4) +#define EXTIOI_IRQS_U64_NUMS (EXTIOI_IRQS_U32_NUMS / 2) +/* map to ipnum per 32 irqs */ +#define EXTIOI_IRQS_NODETYPE_COUNT 16 + +#define EXTIOI_BASE 0x1400 +#define EXTIOI_SIZE 0x900 + +#define EXTIOI_NODETYPE_START 0xa0 +#define EXTIOI_NODETYPE_END 0xbf +#define EXTIOI_IPMAP_START 0xc0 +#define EXTIOI_IPMAP_END 0xc7 +#define EXTIOI_ENABLE_START 0x200 +#define EXTIOI_ENABLE_END 0x21f +#define EXTIOI_BOUNCE_START 0x280 +#define EXTIOI_BOUNCE_END 0x29f +#define EXTIOI_ISR_START 0x300 +#define EXTIOI_ISR_END 0x31f +#define EXTIOI_COREISR_START 0x400 +#define EXTIOI_COREISR_END 0x71f +#define EXTIOI_COREMAP_START 0x800 +#define EXTIOI_COREMAP_END 0x8ff + +#define LS3A_INTC_IP 8 + +#define EXTIOI_SW_COREMAP_FLAG (1 << 0) + +struct loongarch_extioi { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + /* hardware state */ + union nodetype { + u64 reg_u64[EXTIOI_IRQS_NODETYPE_COUNT / 4]; + u32 reg_u32[EXTIOI_IRQS_NODETYPE_COUNT / 2]; + uint16_t reg_u16[EXTIOI_IRQS_NODETYPE_COUNT]; + u8 reg_u8[EXTIOI_IRQS_NODETYPE_COUNT * 2]; + } nodetype; + + /* one bit shows the state of one irq */ + union bounce { + u64 reg_u64[EXTIOI_IRQS_U64_NUMS]; + u32 reg_u32[EXTIOI_IRQS_U32_NUMS]; + u8 reg_u8[EXTIOI_IRQS_U8_NUMS]; + } bounce; + + union isr { + u64 reg_u64[EXTIOI_IRQS_U64_NUMS]; + u32 reg_u32[EXTIOI_IRQS_U32_NUMS]; + u8 reg_u8[EXTIOI_IRQS_U8_NUMS]; + } isr; + union coreisr { + u64 reg_u64[EXTIOI_ROUTE_MAX_VCPUS][EXTIOI_IRQS_U64_NUMS]; + u32 reg_u32[EXTIOI_ROUTE_MAX_VCPUS][EXTIOI_IRQS_U32_NUMS]; + u8 reg_u8[EXTIOI_ROUTE_MAX_VCPUS][EXTIOI_IRQS_U8_NUMS]; + } coreisr; + union enable { + u64 reg_u64[EXTIOI_IRQS_U64_NUMS]; + u32 reg_u32[EXTIOI_IRQS_U32_NUMS]; + u8 reg_u8[EXTIOI_IRQS_U8_NUMS]; + } enable; + + /* use one byte to config ipmap for 32 irqs at once */ + union ipmap { + u64 reg_u64; + u32 reg_u32[EXTIOI_IRQS_U32_NUMS / 4]; + u8 reg_u8[EXTIOI_IRQS_U8_NUMS / 4]; + } ipmap; + /* use one byte to config coremap for one irq */ + union coremap { + u64 reg_u64[EXTIOI_IRQS / 8]; + u32 reg_u32[EXTIOI_IRQS / 4]; + u8 reg_u8[EXTIOI_IRQS]; + } coremap; + + DECLARE_BITMAP(sw_coreisr[EXTIOI_ROUTE_MAX_VCPUS][LS3A_INTC_IP], EXTIOI_IRQS); + uint8_t sw_coremap[EXTIOI_IRQS]; +}; + +void extioi_set_irq(struct loongarch_extioi *s, int irq, int level); +int kvm_loongarch_register_extioi_device(void); +#endif /* LOONGARCH_EXTIOI_H */ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index c146d2ebdb90128fa737d441fa5fedadf04fddf0..f2f4813cc8c3a9ec00cdd26e204c9cf29ff55016 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -19,6 +19,9 @@ #include #include #include +#include +#include +#include /* Loongarch KVM register ids */ #define KVM_GET_IOC_CSR_IDX(id) ((id & KVM_CSR_IDX_MASK) >> LOONGARCH_REG_SHIFT) @@ -26,18 +29,49 @@ #define KVM_MAX_VCPUS 256 #define KVM_MAX_CPUCFG_REGS 21 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 -#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(1) +#define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(1) +#define KVM_REQ_PMU KVM_ARCH_REQ(2) + +#define KVM_GUESTDBG_SW_BP_MASK \ + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP) +#define KVM_GUESTDBG_VALID_MASK \ + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) + +/* KVM_IRQ_LINE irq field index values */ +#define KVM_LOONGARCH_IRQ_TYPE_SHIFT 24 +#define KVM_LOONGARCH_IRQ_TYPE_MASK 0xff +#define KVM_LOONGARCH_IRQ_VCPU_SHIFT 16 +#define KVM_LOONGARCH_IRQ_VCPU_MASK 0xff +#define KVM_LOONGARCH_IRQ_NUM_SHIFT 0 +#define KVM_LOONGARCH_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_LOONGARCH_IRQ_TYPE_CPU_IP 0 +#define KVM_LOONGARCH_IRQ_TYPE_CPU_IO 1 +#define KVM_LOONGARCH_IRQ_TYPE_HT 2 +#define KVM_LOONGARCH_IRQ_TYPE_MSI 3 +#define KVM_LOONGARCH_IRQ_TYPE_IOAPIC 4 +#define KVM_LOONGARCH_IRQ_TYPE_ROUTE 5 #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) + +#define KVM_DIRTY_LOG_MANUAL_CAPS \ + (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET) + struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 pages; u64 hugepages; + u64 ipi_read_exits; + u64 ipi_write_exits; + u64 extioi_read_exits; + u64 extioi_write_exits; + u64 pch_pic_read_exits; + u64 pch_pic_write_exits; }; struct kvm_vcpu_stat { @@ -55,9 +89,13 @@ struct kvm_arch_memory_slot { unsigned long flags; }; +#define HOST_MAX_PMNUM 16 struct kvm_context { unsigned long vpid_cache; struct kvm_vcpu *last_vcpu; + /* Host PMU CSR */ + u64 perf_ctrl[HOST_MAX_PMNUM]; + u64 perf_cntr[HOST_MAX_PMNUM]; }; struct kvm_world_switch { @@ -69,12 +107,13 @@ struct kvm_world_switch { #define MAX_PGTABLE_LEVELS 4 /* - * Physical cpu id is used for interrupt routing, there are different + * Physical CPUID is used for interrupt routing, there are different * definitions about physical cpuid on different hardwares. - * For LOONGARCH_CSR_CPUID register, max cpuid size if 512 - * For IPI HW, max dest CPUID size 1024 - * For extioi interrupt controller, max dest CPUID size is 256 - * For MSI interrupt controller, max supported CPUID size is 65536 + * + * For LOONGARCH_CSR_CPUID register, max CPUID size if 512 + * For IPI hardware, max destination CPUID size 1024 + * For extioi interrupt controller, max destination CPUID size is 256 + * For msgint interrupt controller, max supported CPUID size is 65536 * * Currently max CPUID is defined as 256 for KVM hypervisor, in future * it will be expanded to 4096, including 16 packages at most. And every @@ -101,9 +140,14 @@ struct kvm_arch { unsigned int root_level; spinlock_t phyid_map_lock; struct kvm_phyid_map *phyid_map; + /* Enabled PV features */ + unsigned long pv_features; s64 time_offset; struct kvm_context __percpu *vmcs; + struct loongarch_ipi *ipi; + struct loongarch_extioi *extioi; + struct loongarch_pch_pic *pch_pic; }; #define CSR_MAX_NUMS 0x800 @@ -127,9 +171,15 @@ enum emulation_result { #define KVM_LARCH_FPU (0x1 << 0) #define KVM_LARCH_LSX (0x1 << 1) #define KVM_LARCH_LASX (0x1 << 2) -#define KVM_LARCH_SWCSR_LATEST (0x1 << 3) -#define KVM_LARCH_HWCSR_USABLE (0x1 << 4) -#define KVM_LARCH_PERF (0x1 << 5) +#define KVM_LARCH_LBT (0x1 << 3) +#define KVM_LARCH_PMU (0x1 << 4) +#define KVM_LARCH_SWCSR_LATEST (0x1 << 5) +#define KVM_LARCH_HWCSR_USABLE (0x1 << 6) + +#define LOONGARCH_PV_FEAT_UPDATED BIT_ULL(63) +#define LOONGARCH_PV_FEAT_MASK (BIT(KVM_FEATURE_IPI) | \ + BIT(KVM_FEATURE_STEAL_TIME) | \ + BIT(KVM_FEATURE_VIRT_EXTIOI)) struct kvm_vcpu_arch { /* @@ -163,10 +213,14 @@ struct kvm_vcpu_arch { /* FPU state */ struct loongarch_fpu fpu FPU_ALIGN; + struct loongarch_lbt lbt; /* CSR state */ struct loongarch_csrs *csr; + /* Guest max PMU CSR id */ + int max_pmu_csrid; + /* GPR used as IO source/target */ u32 io_gpr; @@ -188,6 +242,7 @@ struct kvm_vcpu_arch { /* vcpu's vpid */ u64 vpid; + gpa_t flush_gpa; /* Frequency of stable timer in Hz */ u64 timer_mhz; @@ -197,6 +252,8 @@ struct kvm_vcpu_arch { int last_sched_cpu; /* mp state */ struct kvm_mp_state mp_state; + /* ipi state */ + struct ipi_state ipi_state; /* cpucfg */ u32 cpucfg[KVM_MAX_CPUCFG_REGS]; /* paravirt steal time */ @@ -235,6 +292,11 @@ static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch) return arch->cpucfg[2] & CPUCFG2_LASX; } +static inline bool kvm_guest_has_lbt(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[2] & (CPUCFG2_X86BT | CPUCFG2_ARMBT | CPUCFG2_MIPSBT); +} + static inline bool kvm_guest_has_pmu(struct kvm_vcpu_arch *arch) { return arch->cpucfg[6] & CPUCFG6_PMP; diff --git a/arch/loongarch/include/asm/kvm_ipi.h b/arch/loongarch/include/asm/kvm_ipi.h new file mode 100644 index 0000000000000000000000000000000000000000..729dfc1e3f401758601a9bf8673f9ee51f8156b4 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_ipi.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef __LS3A_KVM_IPI_H +#define __LS3A_KVM_IPI_H + +#include + +#define LARCH_INT_IPI 12 + +struct loongarch_ipi { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + struct kvm_io_device mail_dev; +}; + +struct ipi_state { + spinlock_t lock; + uint32_t status; + uint32_t en; + uint32_t set; + uint32_t clear; + uint64_t buf[4]; +}; + +#define SMP_MAILBOX 0x1000 +#define KVM_IOCSR_IPI_ADDR_SIZE 0x48 + +#define CORE_STATUS_OFF 0x000 +#define CORE_EN_OFF 0x004 +#define CORE_SET_OFF 0x008 +#define CORE_CLEAR_OFF 0x00c +#define CORE_BUF_20 0x020 +#define CORE_BUF_28 0x028 +#define CORE_BUF_30 0x030 +#define CORE_BUF_38 0x038 +#define IOCSR_IPI_SEND 0x040 + +#define IOCSR_MAIL_SEND 0x048 +#define IOCSR_ANY_SEND 0x158 + +#define MAIL_SEND_ADDR (SMP_MAILBOX + IOCSR_MAIL_SEND) +#define KVM_IOCSR_MAIL_ADDR_SIZE 0x118 + +#define MAIL_SEND_OFFSET 0 +#define ANY_SEND_OFFSET (IOCSR_ANY_SEND - IOCSR_MAIL_SEND) + +int kvm_loongarch_register_ipi_device(void); +#endif diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 032101b941d92fea374afbb18877ed67314007dc..710ca8c4b61d63567cc98495a11a118e2b17f253 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -2,28 +2,34 @@ #ifndef _ASM_LOONGARCH_KVM_PARA_H #define _ASM_LOONGARCH_KVM_PARA_H +#include + /* * Hypercall code field */ #define HYPERVISOR_KVM 1 #define HYPERVISOR_VENDOR_SHIFT 8 -#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) -#define KVM_HCALL_CODE_PV_SERVICE 0 +#define HYPERCALL_ENCODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) + +#define KVM_HCALL_CODE_SERVICE 0 #define KVM_HCALL_CODE_SWDBG 1 -#define KVM_HCALL_PV_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_PV_SERVICE) -#define KVM_HCALL_FUNC_PV_IPI 1 + +#define KVM_HCALL_SERVICE HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE) +#define KVM_HCALL_FUNC_IPI 1 #define KVM_HCALL_FUNC_NOTIFY 2 -#define KVM_HCALL_SWDBG HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) + +#define KVM_HCALL_SWDBG HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) /* * LoongArch hypercall return code */ -#define KVM_HCALL_STATUS_SUCCESS 0 +#define KVM_HCALL_SUCCESS 0 #define KVM_HCALL_INVALID_CODE -1UL #define KVM_HCALL_INVALID_PARAMETER -2UL #define KVM_STEAL_PHYS_VALID BIT_ULL(0) #define KVM_STEAL_PHYS_MASK GENMASK_ULL(63, 6) + struct kvm_steal_time { __u64 steal; __u32 version; @@ -31,21 +37,22 @@ struct kvm_steal_time { __u32 pad[12]; }; + /* * Hypercall interface for KVM hypervisor * * a0: function identifier - * a1-a6: args - * Return value will be placed in v0. - * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + * a1-a5: args + * Return value will be placed in a0. + * Up to 5 arguments are passed in a1, a2, a3, a4, a5. */ -static __always_inline long kvm_hypercall(u64 fid) +static __always_inline long kvm_hypercall0(u64 fid) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r" (fun) : "memory" @@ -56,12 +63,12 @@ static __always_inline long kvm_hypercall(u64 fid) static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r" (fun), "r" (a1) : "memory" @@ -73,17 +80,17 @@ static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0) static __always_inline long kvm_hypercall2(u64 fid, unsigned long arg0, unsigned long arg1) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; register unsigned long a2 asm("a2") = arg1; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) - : "=r" (ret) - : "r" (fun), "r" (a1), "r" (a2) - : "memory" - ); + "hvcl "__stringify(KVM_HCALL_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2) + : "memory" + ); return ret; } @@ -91,14 +98,14 @@ static __always_inline long kvm_hypercall2(u64 fid, static __always_inline long kvm_hypercall3(u64 fid, unsigned long arg0, unsigned long arg1, unsigned long arg2) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; register unsigned long a2 asm("a2") = arg1; register unsigned long a3 asm("a3") = arg2; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r" (fun), "r" (a1), "r" (a2), "r" (a3) : "memory" @@ -108,10 +115,10 @@ static __always_inline long kvm_hypercall3(u64 fid, } static __always_inline long kvm_hypercall4(u64 fid, - unsigned long arg0, unsigned long arg1, unsigned long arg2, - unsigned long arg3) + unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; register unsigned long a2 asm("a2") = arg1; @@ -119,7 +126,7 @@ static __always_inline long kvm_hypercall4(u64 fid, register unsigned long a4 asm("a4") = arg3; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4) : "memory" @@ -129,10 +136,10 @@ static __always_inline long kvm_hypercall4(u64 fid, } static __always_inline long kvm_hypercall5(u64 fid, - unsigned long arg0, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4) + unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, unsigned long arg4) { - register long ret asm("v0"); + register long ret asm("a0"); register unsigned long fun asm("a0") = fid; register unsigned long a1 asm("a1") = arg0; register unsigned long a2 asm("a2") = arg1; @@ -141,7 +148,7 @@ static __always_inline long kvm_hypercall5(u64 fid, register unsigned long a5 asm("a5") = arg4; __asm__ __volatile__( - "hvcl "__stringify(KVM_HCALL_PV_SERVICE) + "hvcl "__stringify(KVM_HCALL_SERVICE) : "=r" (ret) : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5) : "memory" @@ -150,11 +157,20 @@ static __always_inline long kvm_hypercall5(u64 fid, return ret; } +#ifdef CONFIG_PARAVIRT +bool kvm_para_available(void); +unsigned int kvm_arch_para_features(void); +#else +static inline bool kvm_para_available(void) +{ + return false; +} static inline unsigned int kvm_arch_para_features(void) { return 0; } +#endif static inline unsigned int kvm_arch_para_hints(void) { @@ -165,4 +181,5 @@ static inline bool kvm_check_and_clear_guest_paused(void) { return false; } + #endif /* _ASM_LOONGARCH_KVM_PARA_H */ diff --git a/arch/loongarch/include/asm/kvm_pch_pic.h b/arch/loongarch/include/asm/kvm_pch_pic.h new file mode 100644 index 0000000000000000000000000000000000000000..91bd5a5ec575d23662df8e85a28ce44024841db7 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_pch_pic.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#ifndef LOONGARCH_PCH_PIC_H +#define LOONGARCH_PCH_PIC_H + +#include + +#define PCH_PIC_SIZE 0x3e8 + +#define PCH_PIC_INT_ID_START 0x0 +#define PCH_PIC_INT_ID_END 0x7 +#define PCH_PIC_MASK_START 0x20 +#define PCH_PIC_MASK_END 0x27 +#define PCH_PIC_HTMSI_EN_START 0x40 +#define PCH_PIC_HTMSI_EN_END 0x47 +#define PCH_PIC_EDGE_START 0x60 +#define PCH_PIC_EDGE_END 0x67 +#define PCH_PIC_CLEAR_START 0x80 +#define PCH_PIC_CLEAR_END 0x87 +#define PCH_PIC_AUTO_CTRL0_START 0xc0 +#define PCH_PIC_AUTO_CTRL0_END 0xc7 +#define PCH_PIC_AUTO_CTRL1_START 0xe0 +#define PCH_PIC_AUTO_CTRL1_END 0xe7 +#define PCH_PIC_ROUTE_ENTRY_START 0x100 +#define PCH_PIC_ROUTE_ENTRY_END 0x13f +#define PCH_PIC_HTMSI_VEC_START 0x200 +#define PCH_PIC_HTMSI_VEC_END 0x23f +#define PCH_PIC_INT_IRR_START 0x380 +#define PCH_PIC_INT_IRR_END 0x38f +#define PCH_PIC_INT_ISR_START 0x3a0 +#define PCH_PIC_INT_ISR_END 0x3af +#define PCH_PIC_POLARITY_START 0x3e0 +#define PCH_PIC_POLARITY_END 0x3e7 +#define PCH_PIC_INT_ID_VAL 0x7000000UL +#define PCH_PIC_INT_ID_VER 0x1UL + +struct loongarch_pch_pic { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + uint64_t mask; /* 1:disable irq, 0:enable irq */ + uint64_t htmsi_en; /* 1:msi */ + uint64_t edge; /* 1:edge triggered, 0:level triggered */ + uint64_t auto_ctrl0; /* only use default value 00b */ + uint64_t auto_ctrl1; /* only use default value 00b */ + uint64_t last_intirr; /* edge detection */ + uint64_t irr; /* interrupt request register */ + uint64_t isr; /* interrupt service register */ + uint64_t polarity; /* 0: high level trigger, 1: low level trigger */ + uint8_t route_entry[64]; /* default value 0, route to int0: extioi */ + uint8_t htmsi_vector[64]; /* irq route table for routing to extioi */ + uint64_t pch_pic_base; +}; + +void pch_pic_set_irq(struct loongarch_pch_pic *s, int irq, int level); +void pch_msi_set_irq(struct kvm *kvm, int irq, int level); +int kvm_loongarch_register_pch_pic_device(void); +#endif /* LOONGARCH_PCH_PIC_H */ diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 1da24994b838a9acbc905e5130cba451261651d2..d7e8f7d50ee0ce13360336f6c66ba24c746989ea 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -75,10 +75,13 @@ static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } #endif -int kvm_own_pmu(struct kvm_vcpu *vcpu); +#ifdef CONFIG_CPU_HAS_LBT +int kvm_own_lbt(struct kvm_vcpu *vcpu); +#else +static inline int kvm_own_lbt(struct kvm_vcpu *vcpu) { return -EINVAL; } +#endif void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); -void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); void kvm_restore_timer(struct kvm_vcpu *vcpu); @@ -112,4 +115,24 @@ static inline int kvm_queue_exception(struct kvm_vcpu *vcpu, return -1; } +static inline unsigned long kvm_read_reg(struct kvm_vcpu *vcpu, int num) +{ + return vcpu->arch.gprs[num]; +} + +static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long val) +{ + vcpu->arch.gprs[num] = val; +} + +static inline bool kvm_pvtime_supported(void) +{ + return !!sched_info_on(); +} + +static inline bool kvm_guest_has_pv_feature(struct kvm_vcpu *vcpu, unsigned int feature) +{ + return vcpu->kvm->arch.pv_features & BIT(feature); +} + #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index d62af76f4bb7e96a451e0a150aae0b3b219d5b12..ab5ec51293adfafc33e8c86d680f90e7321a8666 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -161,15 +161,8 @@ /* * cpucfg index area: 0x40000000 -- 0x400000ff - * SW emulation for KVM hypervirsor + * SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h */ -#define CPUCFG_KVM_BASE 0x40000000UL -#define CPUCFG_KVM_SIZE 0x100 -#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE -#define KVM_SIGNATURE "KVM\0" -#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) -#define KVM_FEATURE_PV_IPI BIT(1) -#define KVM_FEATURE_STEAL_TIME BIT(2) #ifndef __ASSEMBLY__ diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h index fe27fb5e82b88fa3546b56441ae2cbc0b590350f..dabc5aec179c4a37d330c5078b801f8f77d6aa66 100644 --- a/arch/loongarch/include/asm/paravirt.h +++ b/arch/loongarch/include/asm/paravirt.h @@ -18,6 +18,7 @@ static inline u64 paravirt_steal_clock(int cpu) int pv_ipi_init(void); int __init pv_time_init(void); +int __init pv_spinlock_init(void); #else static inline int pv_ipi_init(void) { @@ -28,5 +29,11 @@ static inline int pv_time_init(void) { return 0; } + +static inline int pv_spinlock_init(void) +{ + return 0; +} + #endif // CONFIG_PARAVIRT #endif diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h new file mode 100644 index 0000000000000000000000000000000000000000..e76d3aa1e1ebe7dcae953bf6dc89d55802e05cfc --- /dev/null +++ b/arch/loongarch/include/asm/qspinlock.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_QSPINLOCK_H +#define _ASM_LOONGARCH_QSPINLOCK_H + +#include + +#ifdef CONFIG_PARAVIRT + +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); + +#define virt_spin_lock virt_spin_lock + +static inline bool virt_spin_lock(struct qspinlock *lock) +{ + int val; + + if (!static_branch_unlikely(&virt_spin_lock_key)) + return false; + + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + +__retry: + val = atomic_read(&lock->val); + + if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { + cpu_relax(); + goto __retry; + } + + return true; +} + +#endif /* CONFIG_PARAVIRT */ + +#include + +#endif // _ASM_LOONGARCH_QSPINLOCK_H diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 9891ed93816a3af2cf08713602ccd8d5b8e35556..13c1280662ae45b5cbf7f14ee55c7121e0ddf339 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -19,6 +19,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 +#define __KVM_HAVE_IRQ_LINE #define KVM_GUESTDBG_USE_SW_BP 0x00010000 /* @@ -66,6 +67,7 @@ struct kvm_fpu { #define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL) #define KVM_REG_LOONGARCH_FPSIMD (KVM_REG_LOONGARCH | 0x30000ULL) #define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL) +#define KVM_REG_LOONGARCH_LBT (KVM_REG_LOONGARCH | 0x50000ULL) #define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL) #define KVM_CSR_IDX_MASK 0x7fff #define KVM_CPUCFG_IDX_MASK 0x7fff @@ -79,11 +81,30 @@ struct kvm_fpu { /* Debugging: Special instruction for software breakpoint */ #define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) +/* LBT registers */ +#define KVM_REG_LOONGARCH_LBT_SCR0 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 1) +#define KVM_REG_LOONGARCH_LBT_SCR1 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 2) +#define KVM_REG_LOONGARCH_LBT_SCR2 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 3) +#define KVM_REG_LOONGARCH_LBT_SCR3 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 4) +#define KVM_REG_LOONGARCH_LBT_EFLAGS (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 5) +#define KVM_REG_LOONGARCH_LBT_FTOP (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 6) + #define LOONGARCH_REG_SHIFT 3 #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) +/* Device Control API on vm fd */ +#define KVM_LOONGARCH_VM_FEAT_CTRL 0 +#define KVM_LOONGARCH_VM_FEAT_LSX 0 +#define KVM_LOONGARCH_VM_FEAT_LASX 1 +#define KVM_LOONGARCH_VM_FEAT_X86BT 2 +#define KVM_LOONGARCH_VM_FEAT_ARMBT 3 +#define KVM_LOONGARCH_VM_FEAT_MIPSBT 4 +#define KVM_LOONGARCH_VM_FEAT_PMU 5 +#define KVM_LOONGARCH_VM_FEAT_PV_IPI 6 +#define KVM_LOONGARCH_VM_FEAT_PV_STEALTIME 7 + /* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 #define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 @@ -114,4 +135,15 @@ struct kvm_iocsr_entry { #define KVM_IRQCHIP_NUM_PINS 64 #define KVM_MAX_CORES 256 +#define KVM_LOONGARCH_VM_HAVE_IRQCHIP 0x40000001 + +#define KVM_DEV_LOONGARCH_IPI_GRP_REGS 0x40000002 + +#define KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS 0x40000003 + +#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL 0x40000004 +#define KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT 0 + +#define KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS 0x40000005 + #endif /* __UAPI_ASM_LOONGARCH_KVM_H */ diff --git a/arch/loongarch/include/uapi/asm/kvm_para.h b/arch/loongarch/include/uapi/asm/kvm_para.h new file mode 100644 index 0000000000000000000000000000000000000000..b0604aa9b4bbd29dfafedbdf59f571bd445c2c2c --- /dev/null +++ b/arch/loongarch/include/uapi/asm/kvm_para.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_KVM_PARA_H +#define _UAPI_ASM_KVM_PARA_H + +#include + +/* + * CPUCFG index area: 0x40000000 -- 0x400000ff + * SW emulation for KVM hypervirsor + */ +#define CPUCFG_KVM_BASE 0x40000000 +#define CPUCFG_KVM_SIZE 0x100 +#define CPUCFG_KVM_SIG (CPUCFG_KVM_BASE + 0) +#define KVM_SIGNATURE "KVM\0" +#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) +#define KVM_FEATURE_IPI 1 +#define KVM_FEATURE_STEAL_TIME 2 +/* BIT 24 - 31 are features configurable by user space vmm */ +#define KVM_FEATURE_VIRT_EXTIOI 24 + +#endif /* _UAPI_ASM_KVM_PARA_H */ diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index 56182c64ab38ebfd7b218a45e45aa725a0a9d48e..92e37d0e6b229a699b4c1825eab3bf1a55e5dd14 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -8,10 +8,11 @@ #include #include +static int has_steal_clock; struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); -static int has_steal_clock; +DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); static u64 native_steal_clock(int cpu) { @@ -71,6 +72,62 @@ static int pv_register_steal_time(void) return 0; } +static bool steal_acc = true; + +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} +early_param("no-steal-acc", parse_no_stealacc); + +static u64 paravt_steal_clock(int cpu) +{ + int version; + u64 steal; + struct kvm_steal_time *src; + + src = &per_cpu(steal_time, cpu); + do { + + version = src->version; + virt_rmb(); /* Make sure that the version is read before the steal */ + steal = src->steal; + virt_rmb(); /* Make sure that the steal is read before the next version */ + + } while ((version & 1) || (version != src->version)); + + return steal; +} + +static bool steal_acc = true; + +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} +early_param("no-steal-acc", parse_no_stealacc); + +static u64 paravt_steal_clock(int cpu) +{ + int version; + u64 steal; + struct kvm_steal_time *src; + + src = &per_cpu(steal_time, cpu); + do { + + version = src->version; + virt_rmb(); /* Make sure that the version is read before the steal */ + steal = src->steal; + virt_rmb(); /* Make sure that the steal is read before the next version */ + + } while ((version & 1) || (version != src->version)); + + return steal; +} + #ifdef CONFIG_SMP static void pv_send_ipi_single(int cpu, unsigned int action) { @@ -192,11 +249,14 @@ static int pv_cpu_down_prepare(unsigned int cpu) } #endif -static bool kvm_para_available(void) +bool kvm_para_available(void) { static int hypervisor_type; int config; + if (!cpu_has_hypervisor) + return false; + if (!hypervisor_type) { config = read_cpucfg(CPUCFG_KVM_SIG); if (!memcmp(&config, KVM_SIGNATURE, 4)) @@ -206,28 +266,31 @@ static bool kvm_para_available(void) return hypervisor_type == HYPERVISOR_KVM; } -int __init pv_ipi_init(void) +unsigned int kvm_arch_para_features(void) { - int feature; + static unsigned int feature; - if (!cpu_has_hypervisor) - return 0; if (!kvm_para_available()) return 0; - /* - * check whether KVM hypervisor supports pv_ipi or not - */ - feature = read_cpucfg(CPUCFG_KVM_FEATURE); + if (!feature) + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + + return feature; +} + +int __init pv_ipi_init(void) +{ + if (!kvm_para_has_feature(KVM_FEATURE_IPI)) + return 0; + #ifdef CONFIG_SMP - if (feature & KVM_FEATURE_PV_IPI) { - smp_ops.init_ipi = pv_init_ipi; - smp_ops.send_ipi_single = pv_send_ipi_single; - smp_ops.send_ipi_mask = pv_send_ipi_mask; - } + smp_ops.init_ipi = pv_init_ipi; + smp_ops.send_ipi_single = pv_send_ipi_single; + smp_ops.send_ipi_mask = pv_send_ipi_mask; #endif - return 1; + return 0; } static void pv_cpu_reboot(void *unused) @@ -279,3 +342,235 @@ int __init pv_time_init(void) pr_info("Using stolen time PV\n"); return 0; } + +static int pv_enable_steal_time(void) +{ + int cpu = smp_processor_id(); + unsigned long addr; + struct kvm_steal_time *st; + + if (!has_steal_clock) + return -EPERM; + + st = &per_cpu(steal_time, cpu); + addr = per_cpu_ptr_to_phys(st); + + /* The whole structure kvm_steal_time should be in one page */ + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { + pr_warn("Illegal PV steal time addr %lx\n", addr); + return -EFAULT; + } + + addr |= KVM_STEAL_PHYS_VALID; + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), addr); + + return 0; +} + +static void pv_disable_steal_time(void) +{ + if (has_steal_clock) + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), 0); +} + +#ifdef CONFIG_SMP +static int pv_time_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_enable_steal_time(); + local_irq_restore(flags); + + return 0; +} + +static int pv_time_cpu_down_prepare(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_disable_steal_time(); + local_irq_restore(flags); + + return 0; +} +#endif + +static void pv_cpu_reboot(void *unused) +{ + pv_disable_steal_time(); +} + +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + on_each_cpu(pv_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block pv_reboot_nb = { + .notifier_call = pv_reboot_notify, +}; + +int __init pv_time_init(void) +{ + int r; + + if (!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) + return 0; + + has_steal_clock = 1; + r = pv_enable_steal_time(); + if (r < 0) { + has_steal_clock = 0; + return 0; + } + register_reboot_notifier(&pv_reboot_nb); + +#ifdef CONFIG_SMP + r = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "loongarch/pv_time:online", + pv_time_cpu_online, pv_time_cpu_down_prepare); + if (r < 0) { + has_steal_clock = 0; + pr_err("Failed to install cpu hotplug callbacks\n"); + return r; + } +#endif + + static_call_update(pv_steal_clock, paravt_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); +#endif + + pr_info("Using paravirt steal-time\n"); + + return 0; +} + +static int pv_enable_steal_time(void) +{ + int cpu = smp_processor_id(); + unsigned long addr; + struct kvm_steal_time *st; + + if (!has_steal_clock) + return -EPERM; + + st = &per_cpu(steal_time, cpu); + addr = per_cpu_ptr_to_phys(st); + + /* The whole structure kvm_steal_time should be in one page */ + if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { + pr_warn("Illegal PV steal time addr %lx\n", addr); + return -EFAULT; + } + + addr |= KVM_STEAL_PHYS_VALID; + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, addr); + + return 0; +} + +static void pv_disable_steal_time(void) +{ + if (has_steal_clock) + kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, KVM_FEATURE_STEAL_TIME, 0); +} + +#ifdef CONFIG_SMP +static int pv_time_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_enable_steal_time(); + local_irq_restore(flags); + + return 0; +} + +static int pv_time_cpu_down_prepare(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + pv_disable_steal_time(); + local_irq_restore(flags); + + return 0; +} +#endif + +static void pv_cpu_reboot(void *unused) +{ + pv_disable_steal_time(); +} + +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + on_each_cpu(pv_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block pv_reboot_nb = { + .notifier_call = pv_reboot_notify, +}; + +int __init pv_time_init(void) +{ + int r, feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + if (!(feature & KVM_FEATURE_STEAL_TIME)) + return 0; + + has_steal_clock = 1; + r = pv_enable_steal_time(); + if (r < 0) { + has_steal_clock = 0; + return 0; + } + register_reboot_notifier(&pv_reboot_nb); + +#ifdef CONFIG_SMP + r = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "loongarch/pv_time:online", + pv_time_cpu_online, pv_time_cpu_down_prepare); + if (r < 0) { + has_steal_clock = 0; + pr_err("Failed to install cpu hotplug callbacks\n"); + return r; + } +#endif + + static_call_update(pv_steal_clock, paravt_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); +#endif + + pr_info("Using paravirt steal-time\n"); + + return 0; +} + +int __init pv_spinlock_init(void) +{ + if (!cpu_has_hypervisor) + return 0; + + static_branch_enable(&virt_spin_lock_key); + + return 0; +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index fe7b6e05582952bcd13b7c9a6649273a7607318b..6ccffb2ab7c854738d988e3560532c6af2620a14 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -665,6 +665,8 @@ void __init setup_arch(char **cmdline_p) arch_mem_init(cmdline_p); resource_init(); + jump_label_init(); /* Initialise the static keys for paravirtualization */ + #ifdef CONFIG_SMP plat_smp_setup(); prefill_possible_map(); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index bd0bd3decd325aa3f41b297beb2a701be829deb8..8af0398dd786aa5f6d6c7a66fa22b49e6e13bb1c 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -477,7 +477,7 @@ core_initcall(ipi_pm_init); #endif /* Preload SMP state for boot cpu */ -void smp_prepare_boot_cpu(void) +void __init smp_prepare_boot_cpu(void) { unsigned int cpu, node, rr_node; @@ -510,6 +510,8 @@ void smp_prepare_boot_cpu(void) rr_node = next_node_in(rr_node, node_online_map); } } + + pv_spinlock_init(); } /* called from main before smp_init() */ diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index fda425babfb203d3fc36052b9722515ab7c568e6..e899d96f4da61c923cf879d2a535defdf0061b93 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -24,12 +24,18 @@ config KVM select HAVE_KVM_DIRTY_RING_ACQ_REL select HAVE_KVM_EVENTFD select HAVE_KVM_VCPU_ASYNC_IOCTL + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD + select HAVE_KVM_MSI select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO select KVM_XFER_TO_GUEST_WORK + select SCHED_INFO select MMU_NOTIFIER select PREEMPT_NOTIFIERS + select KVM_VFIO help Support hosting virtualized guest machines using hardware virtualization extensions. You will need diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index 244467d7792a97c80c6558c39b2b7bf8a898fafd..f363e4b6fcf35ae6e615a40555ec6e744fa84687 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -18,5 +18,9 @@ kvm-y += timer.o kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vm.o +kvm-y += intc/ipi.o +kvm-y += intc/extioi.o +kvm-y += intc/pch_pic.o +kvm-y += irqfd.o CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 8affc6d4a66e6973473673fb176b39064b53287c..19cb22da35de7a79265269362dbf462182bea882 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,47 @@ #include #include "trace.h" +static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) +{ + int rd, rj; + unsigned int index, ret; + + if (inst.reg2_format.opcode != cpucfg_op) + return EMULATE_FAIL; + + rd = inst.reg2_format.rd; + rj = inst.reg2_format.rj; + ++vcpu->stat.cpucfg_exits; + index = vcpu->arch.gprs[rj]; + + /* + * By LoongArch Reference Manual 2.2.10.5 + * Return value is 0 for undefined CPUCFG index + * + * Disable preemption since hw gcsr is accessed + */ + preempt_disable(); + switch (index) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; + break; + case CPUCFG_KVM_SIG: + /* CPUCFG emulation between 0x40000000 -- 0x400000ff */ + vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; + break; + case CPUCFG_KVM_FEATURE: + ret = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK; + vcpu->arch.gprs[rd] = ret; + break; + default: + vcpu->arch.gprs[rd] = 0; + break; + } + preempt_enable(); + + return EMULATE_DONE; +} + static unsigned long kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid) { unsigned long val = 0; @@ -83,9 +125,10 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) rj = inst.reg2csr_format.rj; csrid = inst.reg2csr_format.csr; - if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= LOONGARCH_CSR_PERFCNTR3) { - if (!kvm_own_pmu(vcpu)) { + if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= vcpu->arch.max_pmu_csrid) { + if (kvm_guest_has_pmu(&vcpu->arch)) { vcpu->arch.pc -= 4; + kvm_make_request(KVM_REQ_PMU, vcpu); return EMULATE_DONE; } } @@ -114,7 +157,7 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret; - unsigned long val; + unsigned long *val; u32 addr, rd, rj, opcode; /* @@ -127,6 +170,7 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) ret = EMULATE_DO_IOCSR; run->iocsr_io.phys_addr = addr; run->iocsr_io.is_write = 0; + val = &vcpu->arch.gprs[rd]; /* LoongArch is Little endian */ switch (opcode) { @@ -158,17 +202,26 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) run->iocsr_io.len = 8; run->iocsr_io.is_write = 1; break; + case CPUCFG_KVM_FEATURE: + vcpu->arch.gprs[rd] = KVM_FEATURE_IPI; + break; default: ret = EMULATE_FAIL; - break; + return ret; } - if (ret == EMULATE_DO_IOCSR) { - if (run->iocsr_io.is_write) { - val = vcpu->arch.gprs[rd]; - memcpy(run->iocsr_io.data, &val, run->iocsr_io.len); - } - vcpu->arch.io_gpr = rd; + if (run->iocsr_io.is_write) { + if (!kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + ret = EMULATE_DONE; + else + /* Save data and let user space to write it */ + memcpy(run->iocsr_io.data, val, run->iocsr_io.len); + } else { + if (!kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + ret = EMULATE_DONE; + else + /* Save register id for iocsr read completion */ + vcpu->arch.io_gpr = rd; } return ret; @@ -213,57 +266,6 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) return EMULATE_DONE; } -static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) -{ - int rd, rj; - unsigned int index, ret; - unsigned long plv; - - rd = inst.reg2_format.rd; - rj = inst.reg2_format.rj; - ++vcpu->stat.cpucfg_exits; - index = vcpu->arch.gprs[rj]; - - /* - * By LoongArch Reference Manual 2.2.10.5 - * Return value is 0 for undefined cpucfg index - * - * Disable preemption since hw gcsr is accessed - */ - preempt_disable(); - plv = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD) >> CSR_CRMD_PLV_SHIFT; - switch (index) { - case 0 ... (KVM_MAX_CPUCFG_REGS - 1): - vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; - break; - case CPUCFG_KVM_SIG: - /* - * Cpucfg emulation between 0x40000000 -- 0x400000ff - * Return value with 0 if executed in user mode - */ - if ((plv & CSR_CRMD_PLV) == PLV_KERN) - vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; - else - vcpu->arch.gprs[rd] = 0; - break; - case CPUCFG_KVM_FEATURE: - ret = 0; - if ((plv & CSR_CRMD_PLV) == PLV_KERN) { - ret = KVM_FEATURE_PV_IPI; - if (sched_info_on()) - ret |= KVM_FEATURE_STEAL_TIME; - } - vcpu->arch.gprs[rd] = ret; - break; - default: - vcpu->arch.gprs[rd] = 0; - break; - } - - preempt_enable(); - return EMULATE_DONE; -} - static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) { unsigned long curr_pc; @@ -280,8 +282,7 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) er = EMULATE_FAIL; switch (((inst.word >> 24) & 0xff)) { case 0x0: /* CPUCFG GSPR */ - if (inst.reg2_format.opcode == cpucfg_op) - er = kvm_emu_cpucfg(vcpu, inst); + er = kvm_emu_cpucfg(vcpu, inst); break; case 0x4: /* CSR{RD,WR,XCHG} GSPR */ er = kvm_handle_csr(vcpu, inst); @@ -456,17 +457,33 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) } if (ret == EMULATE_DO_MMIO) { + /* + * if mmio device such as pch pic is emulated in KVM, + * it need not return to user space to handle the mmio + * exception. + */ + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, + run->mmio.len, &vcpu->arch.gprs[rd]); + if (!ret) { + update_pc(&vcpu->arch); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + /* Set for kvm_complete_mmio_read() use */ vcpu->arch.io_gpr = rd; run->mmio.is_write = 0; vcpu->mmio_is_write = 0; - } else { - kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", - inst.word, vcpu->arch.pc, vcpu->arch.badv); - kvm_arch_vcpu_dump_regs(vcpu); - vcpu->mmio_needed = 0; + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, run->mmio.len, + run->mmio.phys_addr, NULL); + return EMULATE_DO_MMIO; } + kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", + inst.word, vcpu->arch.pc, vcpu->arch.badv); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->mmio_needed = 0; + return ret; } @@ -506,6 +523,9 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run) break; } + trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, + run->mmio.phys_addr, run->mmio.data); + return er; } @@ -604,17 +624,30 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) } if (ret == EMULATE_DO_MMIO) { + /* + * if mmio device such as pch pic is emulated in KVM, + * it need not return to user space to handle the mmio + * exception. + */ + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, + run->mmio.len, data); + if (!ret) + return EMULATE_DONE; + run->mmio.is_write = 1; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; - } else { - vcpu->arch.pc = curr_pc; - kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", - inst.word, vcpu->arch.pc, vcpu->arch.badv); - kvm_arch_vcpu_dump_regs(vcpu); - /* Rollback PC if emulation was unsuccessful */ + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, run->mmio.len, + run->mmio.phys_addr, data); + return EMULATE_DO_MMIO; } + vcpu->arch.pc = curr_pc; + kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", + inst.word, vcpu->arch.pc, vcpu->arch.badv); + kvm_arch_vcpu_dump_regs(vcpu); + /* Rollback PC if emulation was unsuccessful */ + return ret; } @@ -698,6 +731,31 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static long kvm_save_notify(struct kvm_vcpu *vcpu) +{ + unsigned long id, data; + + id = kvm_read_reg(vcpu, LOONGARCH_GPR_A1); + data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2); + switch (id) { + case BIT(KVM_FEATURE_STEAL_TIME): + if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) + return KVM_HCALL_INVALID_PARAMETER; + + vcpu->arch.st.guest_addr = data; + if (!(data & KVM_STEAL_PHYS_VALID)) + return 0; + + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + return 0; + default: + return KVM_HCALL_INVALID_CODE; + }; + + return KVM_HCALL_INVALID_CODE; +}; + /* * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. * @vcpu: Virtual CPU context. @@ -728,29 +786,34 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } -static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu) +static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu) +{ + if (kvm_own_lbt(vcpu)) + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + + return RESUME_GUEST; +} + +static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu) { - unsigned long ipi_bitmap; unsigned int min, cpu, i; + unsigned long ipi_bitmap; struct kvm_vcpu *dest; - min = vcpu->arch.gprs[LOONGARCH_GPR_A3]; + min = kvm_read_reg(vcpu, LOONGARCH_GPR_A3); for (i = 0; i < 2; i++, min += BITS_PER_LONG) { - ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1 + i]; + ipi_bitmap = kvm_read_reg(vcpu, LOONGARCH_GPR_A1 + i); if (!ipi_bitmap) continue; cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG); while (cpu < BITS_PER_LONG) { dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min); - cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, - cpu + 1); + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1); if (!dest) continue; - /* - * Send SWI0 to dest vcpu to emulate IPI interrupt - */ + /* Send SWI0 to dest vcpu to emulate IPI interrupt */ kvm_queue_irq(dest, INT_SWI0); kvm_vcpu_kick(dest); } @@ -759,75 +822,58 @@ static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu) return 0; } -static int kvm_save_notify(struct kvm_vcpu *vcpu) -{ - unsigned long id, data; - - id = vcpu->arch.gprs[LOONGARCH_GPR_A1]; - data = vcpu->arch.gprs[LOONGARCH_GPR_A2]; - switch (id) { - case KVM_FEATURE_STEAL_TIME: - vcpu->arch.st.guest_addr = data; - vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); - break; - default: - break; - }; - - return 0; -}; - /* - * hypercall emulation always return to guest, Caller should check retval. + * Hypercall emulation always return to guest, Caller should check retval. */ -static void kvm_handle_pv_service(struct kvm_vcpu *vcpu) +static void kvm_handle_service(struct kvm_vcpu *vcpu) { - unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0]; - long ret; + long ret = KVM_HCALL_INVALID_CODE; + unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0); switch (func) { - case KVM_HCALL_FUNC_PV_IPI: - kvm_pv_send_ipi(vcpu); - ret = KVM_HCALL_STATUS_SUCCESS; + case KVM_HCALL_FUNC_IPI: + if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_IPI)) { + kvm_send_pv_ipi(vcpu); + ret = KVM_HCALL_SUCCESS; + } break; case KVM_HCALL_FUNC_NOTIFY: - ret = kvm_save_notify(vcpu); + if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)) + ret = kvm_save_notify(vcpu); break; default: - ret = KVM_HCALL_INVALID_CODE; break; - }; + } - vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret; + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); } static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) { + int ret; larch_inst inst; unsigned int code; - int ret; inst.word = vcpu->arch.badi; code = inst.reg0i15_format.immediate; ret = RESUME_GUEST; switch (code) { - case KVM_HCALL_PV_SERVICE: + case KVM_HCALL_SERVICE: vcpu->stat.hypercall_exits++; - kvm_handle_pv_service(vcpu); + kvm_handle_service(vcpu); break; case KVM_HCALL_SWDBG: - /* KVM_HC_SWDBG only in effective when SW_BP is enabled */ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + /* KVM_HCALL_SWDBG only in effective when SW_BP is enabled */ + if (vcpu->guest_debug & KVM_GUESTDBG_SW_BP_MASK) { vcpu->run->exit_reason = KVM_EXIT_DEBUG; ret = RESUME_HOST; - } else - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; - break; + break; + } + fallthrough; default: /* Treat it as noop intruction, only set return value */ - vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HCALL_INVALID_CODE; + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE); break; } @@ -867,6 +913,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_FPDIS] = kvm_handle_fpu_disabled, [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled, + [EXCCODE_BTDIS] = kvm_handle_lbt_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, [EXCCODE_HVC] = kvm_handle_hypercall, }; diff --git a/arch/loongarch/kvm/intc/extioi.c b/arch/loongarch/kvm/intc/extioi.c new file mode 100644 index 0000000000000000000000000000000000000000..48141823aaa3aa9a413901279b9a287b531c6894 --- /dev/null +++ b/arch/loongarch/kvm/intc/extioi.c @@ -0,0 +1,783 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +#define loongarch_ext_irq_lock(s, flags) spin_lock_irqsave(&s->lock, flags) +#define loongarch_ext_irq_unlock(s, flags) spin_unlock_irqrestore(&s->lock, flags) + +static void extioi_update_irq(struct loongarch_extioi *s, int irq, int level) +{ + int ipnum, cpu, found, irq_index, irq_mask; + struct kvm_interrupt vcpu_irq; + struct kvm_vcpu *vcpu; + + ipnum = s->ipmap.reg_u8[irq / 32]; + ipnum = count_trailing_zeros(ipnum); + ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0; + + cpu = s->sw_coremap[irq]; + vcpu = kvm_get_vcpu(s->kvm, cpu); + irq_index = irq / 32; + /* length of accessing core isr is 4 bytes */ + irq_mask = 1 << (irq & 0x1f); + + if (level) { + /* if not enable return false */ + if (((s->enable.reg_u32[irq_index]) & irq_mask) == 0) + return; + s->coreisr.reg_u32[cpu][irq_index] |= irq_mask; + found = find_first_bit(s->sw_coreisr[cpu][ipnum], EXTIOI_IRQS); + set_bit(irq, s->sw_coreisr[cpu][ipnum]); + } else { + s->coreisr.reg_u32[cpu][irq_index] &= ~irq_mask; + clear_bit(irq, s->sw_coreisr[cpu][ipnum]); + found = find_first_bit(s->sw_coreisr[cpu][ipnum], EXTIOI_IRQS); + } + + if (found < EXTIOI_IRQS) + /* other irq is handling, need not update parent irq level */ + return; + + vcpu_irq.irq = level ? INT_HWI0 + ipnum : -(INT_HWI0 + ipnum); + kvm_vcpu_ioctl_interrupt(vcpu, &vcpu_irq); +} + +static void extioi_set_sw_coreisr(struct loongarch_extioi *s) +{ + int ipnum, cpu, irq_index, irq_mask, irq; + + for (irq = 0; irq < EXTIOI_IRQS; irq++) { + ipnum = s->ipmap.reg_u8[irq / 32]; + ipnum = count_trailing_zeros(ipnum); + ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0; + irq_index = irq / 32; + /* length of accessing core isr is 4 bytes */ + irq_mask = 1 << (irq & 0x1f); + + cpu = s->coremap.reg_u8[irq]; + if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask)) + set_bit(irq, s->sw_coreisr[cpu][ipnum]); + else + clear_bit(irq, s->sw_coreisr[cpu][ipnum]); + } +} + +void extioi_set_irq(struct loongarch_extioi *s, int irq, int level) +{ + unsigned long *isr = (unsigned long *)s->isr.reg_u8; + unsigned long flags; + + level ? set_bit(irq, isr) : clear_bit(irq, isr); + if (!level) + return; + loongarch_ext_irq_lock(s, flags); + extioi_update_irq(s, irq, level); + loongarch_ext_irq_unlock(s, flags); +} + +static inline void extioi_enable_irq(struct kvm_vcpu *vcpu, struct loongarch_extioi *s, + int index, u8 mask, int level) +{ + u8 val; + int irq; + + val = mask & s->isr.reg_u8[index]; + irq = ffs(val); + while (irq != 0) { + /* + * enable bit change from 0 to 1, + * need to update irq by pending bits + */ + extioi_update_irq(s, irq - 1 + index * 8, level); + val &= ~(1 << (irq - 1)); + irq = ffs(val); + } +} + +static int loongarch_extioi_writeb(struct kvm_vcpu *vcpu, + struct loongarch_extioi *s, + gpa_t addr, int len, const void *val) +{ + int index, irq, ret = 0; + u8 data, old_data, cpu; + u8 coreisr, old_coreisr; + gpa_t offset; + + data = *(u8 *)val; + offset = addr - EXTIOI_BASE; + + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START); + s->nodetype.reg_u8[index] = data; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of intr driver, need not update upper irq level + */ + index = (offset - EXTIOI_IPMAP_START); + s->ipmap.reg_u8[index] = data; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START); + old_data = s->enable.reg_u8[index]; + s->enable.reg_u8[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u8[index] & ~old_data & s->isr.reg_u8[index]; + extioi_enable_irq(vcpu, s, index, data, 1); + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u8[index] & old_data & s->isr.reg_u8[index]; + extioi_enable_irq(vcpu, s, index, data, 0); + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = offset - EXTIOI_BOUNCE_START; + s->bounce.reg_u8[index] = data; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START); + /* using attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u8[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u8[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + irq = ffs(coreisr); + while (irq != 0) { + extioi_update_irq(s, irq - 1 + index * 8, 0); + coreisr &= ~(1 << (irq - 1)); + irq = ffs(coreisr); + } + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + irq = offset - EXTIOI_COREMAP_START; + index = irq; + s->coremap.reg_u8[index] = data; + + cpu = data & 0xff; + cpu = ffs(cpu) - 1; + cpu = (cpu >= 4) ? 0 : cpu; + + if (s->sw_coremap[irq] == cpu) + break; + + if (test_bit(irq, (unsigned long *)s->isr.reg_u8)) { + /* + * lower irq at old cpu and raise irq at new cpu + */ + extioi_update_irq(s, irq, 0); + s->sw_coremap[irq] = cpu; + extioi_update_irq(s, irq, 1); + } else + s->sw_coremap[irq] = cpu; + + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int loongarch_extioi_writew(struct kvm_vcpu *vcpu, + struct loongarch_extioi *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, ret = 0; + u8 cpu; + u32 data, old_data; + u32 coreisr, old_coreisr; + gpa_t offset; + + data = *(u32 *)val; + offset = addr - EXTIOI_BASE; + + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START) >> 2; + s->nodetype.reg_u32[index] = data; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of intr driver, need not update upper irq level + */ + index = (offset - EXTIOI_IPMAP_START) >> 2; + s->ipmap.reg_u32[index] = data; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START) >> 2; + old_data = s->enable.reg_u32[index]; + s->enable.reg_u32[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index]; + index = index << 2; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + + extioi_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + + extioi_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EXTIOI_BOUNCE_START) >> 2; + s->bounce.reg_u32[index] = data; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START) >> 2; + /* using attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u32[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u32[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + irq = ffs(coreisr); + while (irq != 0) { + extioi_update_irq(s, irq - 1 + index * 32, 0); + coreisr &= ~(1 << (irq - 1)); + irq = ffs(coreisr); + } + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + irq = offset - EXTIOI_COREMAP_START; + index = irq >> 2; + + s->coremap.reg_u32[index] = data; + + for (i = 0; i < sizeof(data); i++) { + cpu = data & 0xff; + cpu = ffs(cpu) - 1; + cpu = (cpu >= 4) ? 0 : cpu; + data = data >> 8; + + if (s->sw_coremap[irq + i] == cpu) + continue; + + if (test_bit(irq, (unsigned long *)s->isr.reg_u8)) { + /* + * lower irq at old cpu and raise irq at new cpu + */ + extioi_update_irq(s, irq + i, 0); + s->sw_coremap[irq + i] = cpu; + extioi_update_irq(s, irq + i, 1); + } else + s->sw_coremap[irq + i] = cpu; + } + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int loongarch_extioi_writel(struct kvm_vcpu *vcpu, + struct loongarch_extioi *s, + gpa_t addr, int len, const void *val) +{ + int i, index, irq, bits, ret = 0; + u8 cpu; + u64 data, old_data; + u64 coreisr, old_coreisr; + gpa_t offset; + + data = *(u64 *)val; + offset = addr - EXTIOI_BASE; + + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START) >> 3; + s->nodetype.reg_u64[index] = data; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + /* + * ipmap cannot be set at runtime, can be set only at the beginning + * of intr driver, need not update upper irq level + */ + index = (offset - EXTIOI_IPMAP_START) >> 3; + s->ipmap.reg_u64 = data; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START) >> 3; + old_data = s->enable.reg_u64[index]; + s->enable.reg_u64[index] = data; + /* + * 1: enable irq. + * update irq when isr is set. + */ + data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index]; + index = index << 3; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + + extioi_enable_irq(vcpu, s, index + i, mask, 1); + } + /* + * 0: disable irq. + * update irq when isr is set. + */ + data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index]; + for (i = 0; i < sizeof(data); i++) { + u8 mask = (data >> (i * 8)) & 0xff; + + extioi_enable_irq(vcpu, s, index, mask, 0); + } + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + /* do not emulate hw bounced irq routing */ + index = (offset - EXTIOI_BOUNCE_START) >> 3; + s->bounce.reg_u64[index] = data; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START) >> 3; + /* using attrs to get current cpu index */ + cpu = vcpu->vcpu_id; + coreisr = data; + old_coreisr = s->coreisr.reg_u64[cpu][index]; + /* write 1 to clear interrupt */ + s->coreisr.reg_u64[cpu][index] = old_coreisr & ~coreisr; + coreisr &= old_coreisr; + + bits = sizeof(u64) * 8; + irq = find_first_bit((void *)&coreisr, bits); + while (irq < bits) { + extioi_update_irq(s, irq + index * bits, 0); + bitmap_clear((void *)&coreisr, irq, 1); + irq = find_first_bit((void *)&coreisr, bits); + } + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + irq = offset - EXTIOI_COREMAP_START; + index = irq >> 3; + + s->coremap.reg_u64[index] = data; + + for (i = 0; i < sizeof(data); i++) { + cpu = data & 0xff; + cpu = ffs(cpu) - 1; + cpu = (cpu >= 4) ? 0 : cpu; + data = data >> 8; + + if (s->sw_coremap[irq + i] == cpu) + continue; + + if (test_bit(irq, (unsigned long *)s->isr.reg_u8)) { + /* + * lower irq at old cpu and raise irq at new cpu + */ + extioi_update_irq(s, irq + i, 0); + s->sw_coremap[irq + i] = cpu; + extioi_update_irq(s, irq + i, 1); + } else + s->sw_coremap[irq + i] = cpu; + } + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int kvm_loongarch_extioi_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + int ret; + struct loongarch_extioi *extioi = vcpu->kvm->arch.extioi; + unsigned long flags; + + if (!extioi) { + kvm_err("%s: extioi irqchip not valid!\n", __func__); + return -EINVAL; + } + + vcpu->kvm->stat.extioi_write_exits++; + loongarch_ext_irq_lock(extioi, flags); + + switch (len) { + case 1: + ret = loongarch_extioi_writeb(vcpu, extioi, addr, len, val); + break; + case 4: + ret = loongarch_extioi_writew(vcpu, extioi, addr, len, val); + break; + case 8: + ret = loongarch_extioi_writel(vcpu, extioi, addr, len, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access:addr 0x%llx,size %d\n", + __func__, addr, len); + } + + loongarch_ext_irq_unlock(extioi, flags); + + + return ret; +} + +static int loongarch_extioi_readb(struct kvm_vcpu *vcpu, struct loongarch_extioi *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + gpa_t offset; + u64 data; + + offset = addr - EXTIOI_BASE; + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = offset - EXTIOI_NODETYPE_START; + data = s->nodetype.reg_u8[index]; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + index = offset - EXTIOI_IPMAP_START; + data = s->ipmap.reg_u8[index]; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = offset - EXTIOI_ENABLE_START; + data = s->enable.reg_u8[index]; + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + index = offset - EXTIOI_BOUNCE_START; + data = s->bounce.reg_u8[index]; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = offset - EXTIOI_COREISR_START; + data = s->coreisr.reg_u8[vcpu->vcpu_id][index]; + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + index = offset - EXTIOI_COREMAP_START; + data = s->coremap.reg_u8[index]; + break; + default: + ret = -EINVAL; + break; + } + + *(u8 *)val = data; + + return ret; +} + +static int loongarch_extioi_readw(struct kvm_vcpu *vcpu, struct loongarch_extioi *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + gpa_t offset; + u64 data; + + offset = addr - EXTIOI_BASE; + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START) >> 2; + data = s->nodetype.reg_u32[index]; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + index = (offset - EXTIOI_IPMAP_START) >> 2; + data = s->ipmap.reg_u32[index]; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START) >> 2; + data = s->enable.reg_u32[index]; + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + index = (offset - EXTIOI_BOUNCE_START) >> 2; + data = s->bounce.reg_u32[index]; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START) >> 2; + data = s->coreisr.reg_u32[vcpu->vcpu_id][index]; + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + index = (offset - EXTIOI_COREMAP_START) >> 2; + data = s->coremap.reg_u32[index]; + break; + default: + ret = -EINVAL; + break; + } + + *(u32 *)val = data; + + return ret; +} + +static int loongarch_extioi_readl(struct kvm_vcpu *vcpu, struct loongarch_extioi *s, + gpa_t addr, int len, void *val) +{ + int index, ret = 0; + gpa_t offset; + u64 data; + + offset = addr - EXTIOI_BASE; + switch (offset) { + case EXTIOI_NODETYPE_START ... EXTIOI_NODETYPE_END: + index = (offset - EXTIOI_NODETYPE_START) >> 3; + data = s->nodetype.reg_u64[index]; + break; + case EXTIOI_IPMAP_START ... EXTIOI_IPMAP_END: + index = (offset - EXTIOI_IPMAP_START) >> 3; + data = s->ipmap.reg_u64; + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END: + index = (offset - EXTIOI_ENABLE_START) >> 3; + data = s->enable.reg_u64[index]; + break; + case EXTIOI_BOUNCE_START ... EXTIOI_BOUNCE_END: + index = (offset - EXTIOI_BOUNCE_START) >> 3; + data = s->bounce.reg_u64[index]; + break; + case EXTIOI_COREISR_START ... EXTIOI_COREISR_END: + /* length of accessing core isr is 8 bytes */ + index = (offset - EXTIOI_COREISR_START) >> 3; + data = s->coreisr.reg_u64[vcpu->vcpu_id][index]; + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END: + index = (offset - EXTIOI_COREMAP_START) >> 3; + data = s->coremap.reg_u64[index]; + break; + default: + ret = -EINVAL; + break; + } + + *(u64 *)val = data; + + return ret; +} + +static int kvm_loongarch_extioi_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + int ret; + struct loongarch_extioi *extioi = vcpu->kvm->arch.extioi; + unsigned long flags; + + if (!extioi) { + kvm_err("%s: extioi irqchip not valid!\n", __func__); + return -EINVAL; + } + + vcpu->kvm->stat.extioi_read_exits++; + loongarch_ext_irq_lock(extioi, flags); + + switch (len) { + case 1: + ret = loongarch_extioi_readb(vcpu, extioi, addr, len, val); + break; + case 4: + ret = loongarch_extioi_readw(vcpu, extioi, addr, len, val); + break; + case 8: + ret = loongarch_extioi_readl(vcpu, extioi, addr, len, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access:addr 0x%llx,size %d\n", + __func__, addr, len); + } + + loongarch_ext_irq_unlock(extioi, flags); + + return ret; +} + +static const struct kvm_io_device_ops kvm_loongarch_extioi_ops = { + .read = kvm_loongarch_extioi_read, + .write = kvm_loongarch_extioi_write, +}; + +static int kvm_loongarch_extioi_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int len, addr; + void __user *data; + void *p = NULL; + struct loongarch_extioi *s; + unsigned long flags; + + s = dev->kvm->arch.extioi; + addr = attr->attr; + data = (void __user *)attr->addr; + + loongarch_ext_irq_lock(s, flags); + switch (addr) { + case EXTIOI_NODETYPE_START: + p = s->nodetype.reg_u8; + len = sizeof(s->nodetype); + break; + case EXTIOI_IPMAP_START: + p = s->ipmap.reg_u8; + len = sizeof(s->ipmap); + break; + case EXTIOI_ENABLE_START: + p = s->enable.reg_u8; + len = sizeof(s->enable); + break; + case EXTIOI_BOUNCE_START: + p = s->bounce.reg_u8; + len = sizeof(s->bounce); + break; + case EXTIOI_ISR_START: + p = s->isr.reg_u8; + len = sizeof(s->isr); + break; + case EXTIOI_COREISR_START: + p = s->coreisr.reg_u8; + len = sizeof(s->coreisr); + break; + case EXTIOI_COREMAP_START: + p = s->coremap.reg_u8; + len = sizeof(s->coremap); + break; + case EXTIOI_SW_COREMAP_FLAG: + p = s->sw_coremap; + len = sizeof(s->sw_coremap); + break; + default: + loongarch_ext_irq_unlock(s, flags); + kvm_err("%s: unknown extioi register, addr = %d\n", __func__, addr); + return -EINVAL; + } + + loongarch_ext_irq_unlock(s, flags); + + if (is_write) { + if (copy_from_user(p, data, len)) + return -EFAULT; + } else { + if (copy_to_user(data, p, len)) + return -EFAULT; + } + + if ((addr == EXTIOI_COREISR_START) && is_write) { + loongarch_ext_irq_lock(s, flags); + extioi_set_sw_coreisr(s); + loongarch_ext_irq_unlock(s, flags); + } + + return 0; +} + +static int kvm_loongarch_extioi_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + if (attr->group == KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS) + return kvm_loongarch_extioi_regs_access(dev, attr, false); + + return -EINVAL; +} + +static int kvm_loongarch_extioi_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + if (attr->group == KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS) + return kvm_loongarch_extioi_regs_access(dev, attr, true); + + return -EINVAL; +} + +static void kvm_loongarch_extioi_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_extioi *extioi; + struct kvm_io_device *device; + + if (!dev) + return; + + kvm = dev->kvm; + if (!kvm) + return; + + extioi = kvm->arch.extioi; + if (!extioi) + return; + + device = &extioi->device; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, device); + kfree(extioi); +} + +static int kvm_loongarch_extioi_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct loongarch_extioi *s; + struct kvm_io_device *device; + struct kvm *kvm = dev->kvm; + + /* extioi has been created */ + if (kvm->arch.extioi) + return -EINVAL; + + s = kzalloc(sizeof(struct loongarch_extioi), GFP_KERNEL); + if (!s) + return -ENOMEM; + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize IOCSR device + */ + device = &s->device; + kvm_iodevice_init(device, &kvm_loongarch_extioi_ops); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, EXTIOI_BASE, EXTIOI_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kfree(s); + return -EFAULT; + } + + kvm->arch.extioi = s; + + kvm_info("create extioi device successfully\n"); + return 0; +} + +static struct kvm_device_ops kvm_loongarch_extioi_dev_ops = { + .name = "kvm-loongarch-extioi", + .create = kvm_loongarch_extioi_create, + .destroy = kvm_loongarch_extioi_destroy, + .set_attr = kvm_loongarch_extioi_set_attr, + .get_attr = kvm_loongarch_extioi_get_attr, +}; + +int kvm_loongarch_register_extioi_device(void) +{ + return kvm_register_device_ops(&kvm_loongarch_extioi_dev_ops, + KVM_DEV_TYPE_LA_EXTIOI); +} diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c new file mode 100644 index 0000000000000000000000000000000000000000..12024d9fdd0b5a751474cedbc45bab6bf624c760 --- /dev/null +++ b/arch/loongarch/kvm/intc/ipi.c @@ -0,0 +1,538 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +static void ipi_send(struct kvm *kvm, uint64_t data) +{ + struct kvm_vcpu *vcpu; + struct kvm_interrupt irq; + int cpu, action, status; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + + action = 1 << (data & 0x1f); + + spin_lock(&vcpu->arch.ipi_state.lock); + status = vcpu->arch.ipi_state.status; + vcpu->arch.ipi_state.status |= action; + if (status == 0) { + irq.irq = LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } + spin_unlock(&vcpu->arch.ipi_state.lock); +} + +static void ipi_clear(struct kvm_vcpu *vcpu, uint64_t data) +{ + struct kvm_interrupt irq; + + spin_lock(&vcpu->arch.ipi_state.lock); + vcpu->arch.ipi_state.status &= ~data; + if (!vcpu->arch.ipi_state.status) { + irq.irq = -LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } + spin_unlock(&vcpu->arch.ipi_state.lock); +} + +static uint64_t read_mailbox(struct kvm_vcpu *vcpu, int offset, int len) +{ + void *pbuf; + uint64_t ret = 0; + + spin_lock(&vcpu->arch.ipi_state.lock); + pbuf = (void *)vcpu->arch.ipi_state.buf + (offset - 0x20); + if (len == 1) + ret = *(unsigned char *)pbuf; + else if (len == 2) + ret = *(unsigned short *)pbuf; + else if (len == 4) + ret = *(unsigned int *)pbuf; + else if (len == 8) + ret = *(unsigned long *)pbuf; + else + kvm_err("%s: unknown data len: %d\n", __func__, len); + spin_unlock(&vcpu->arch.ipi_state.lock); + + return ret; +} + +static void write_mailbox(struct kvm_vcpu *vcpu, int offset, + uint64_t data, int len) +{ + void *pbuf; + + spin_lock(&vcpu->arch.ipi_state.lock); + pbuf = (void *)vcpu->arch.ipi_state.buf + (offset - 0x20); + if (len == 1) + *(unsigned char *)pbuf = (unsigned char)data; + else if (len == 2) + *(unsigned short *)pbuf = (unsigned short)data; + else if (len == 4) + *(unsigned int *)pbuf = (unsigned int)data; + else if (len == 8) + *(unsigned long *)pbuf = (unsigned long)data; + else + kvm_err("%s: unknown data len: %d\n", __func__, len); + spin_unlock(&vcpu->arch.ipi_state.lock); +} + +static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, + int len, const void *val) +{ + uint64_t data; + uint32_t offset; + int ret = 0; + + data = *(uint64_t *)val; + + offset = (uint32_t)(addr & 0xff); + WARN_ON_ONCE(offset & (len - 1)); + + switch (offset) { + case CORE_STATUS_OFF: + kvm_err("CORE_SET_OFF Can't be write\n"); + ret = -EINVAL; + break; + case CORE_EN_OFF: + spin_lock(&vcpu->arch.ipi_state.lock); + vcpu->arch.ipi_state.en = data; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case IOCSR_IPI_SEND: + ipi_send(vcpu->kvm, data); + break; + case CORE_SET_OFF: + kvm_info("CORE_SET_OFF simulation is required\n"); + ret = -EINVAL; + break; + case CORE_CLEAR_OFF: + /* Just clear the status of the current vcpu */ + ipi_clear(vcpu, data); + break; + case CORE_BUF_20 ... CORE_BUF_38 + 7: + if (offset + len > CORE_BUF_38 + 8) { + kvm_err("%s: invalid offset or len: offset = %d, len = %d\n", + __func__, offset, len); + ret = -EINVAL; + break; + } + write_mailbox(vcpu, offset, data, len); + break; + default: + kvm_err("%s: unknown addr: %llx\n", __func__, addr); + ret = -EINVAL; + break; + } + + return ret; +} + +static int loongarch_ipi_readl(struct kvm_vcpu *vcpu, gpa_t addr, + int len, void *val) +{ + uint32_t offset; + uint64_t res = 0; + int ret = 0; + + offset = (uint32_t)(addr & 0xff); + WARN_ON_ONCE(offset & (len - 1)); + + switch (offset) { + case CORE_STATUS_OFF: + spin_lock(&vcpu->arch.ipi_state.lock); + res = vcpu->arch.ipi_state.status; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case CORE_EN_OFF: + spin_lock(&vcpu->arch.ipi_state.lock); + res = vcpu->arch.ipi_state.en; + spin_unlock(&vcpu->arch.ipi_state.lock); + break; + case CORE_SET_OFF: + res = 0; + break; + case CORE_CLEAR_OFF: + res = 0; + break; + case CORE_BUF_20 ... CORE_BUF_38 + 7: + if (offset + len > CORE_BUF_38 + 8) { + kvm_err("%s: invalid offset or len: offset = %d, len = %d\n", + __func__, offset, len); + ret = -EINVAL; + break; + } + res = read_mailbox(vcpu, offset, len); + break; + default: + kvm_err("%s: unknown addr: %llx\n", __func__, addr); + ret = -EINVAL; + break; + } + + *(uint64_t *)val = res; + + return ret; +} + +static int kvm_loongarch_ipi_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + struct loongarch_ipi *ipi; + int ret; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + + ipi->kvm->stat.ipi_write_exits++; + ret = loongarch_ipi_writel(vcpu, addr, len, val); + + return ret; +} + +static int kvm_loongarch_ipi_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + struct loongarch_ipi *ipi; + int ret; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + + ipi->kvm->stat.ipi_read_exits++; + ret = loongarch_ipi_readl(vcpu, addr, len, val); + + return ret; +} + +static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) +{ + int i, ret; + uint32_t val = 0, mask = 0; + /* + * Bit 27-30 is mask for byte writing. + * If the mask is 0, we need not to do anything. + */ + if ((data >> 27) & 0xf) { + /* Read the old val */ + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + if (unlikely(ret)) { + kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + return ret; + } + /* Construct the mask by scanning the bit 27-30 */ + for (i = 0; i < 4; i++) { + if (data & (0x1 << (27 + i))) + mask |= (0xff << (i * 8)); + } + /* Save the old part of val */ + val &= mask; + } + + val |= ((uint32_t)(data >> 32) & ~mask); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + if (unlikely(ret)) + kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + + return ret; +} + +static int mail_send(struct kvm *kvm, uint64_t data) +{ + struct kvm_vcpu *vcpu; + int cpu, mailbox; + int offset, ret; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + + mailbox = ((data & 0xffffffff) >> 2) & 0x7; + offset = SMP_MAILBOX + CORE_BUF_20 + mailbox * 4; + ret = send_ipi_data(vcpu, offset, data); + + return ret; +} + +static int any_send(struct kvm *kvm, uint64_t data) +{ + struct kvm_vcpu *vcpu; + int cpu, offset, ret; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + + offset = data & 0xffff; + ret = send_ipi_data(vcpu, offset, data); + return ret; +} + +static int kvm_loongarch_mail_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + struct loongarch_ipi *ipi; + int ret; + + ipi = vcpu->kvm->arch.ipi; + if (!ipi) { + kvm_err("%s: ipi irqchip not valid!\n", __func__); + return -EINVAL; + } + + addr &= 0xfff; + addr -= IOCSR_MAIL_SEND; + + switch (addr) { + case MAIL_SEND_OFFSET: + ret = mail_send(vcpu->kvm, *(uint64_t *)val); + break; + case ANY_SEND_OFFSET: + ret = any_send(vcpu->kvm, *(uint64_t *)val); + break; + default: + kvm_err("%s: invalid addr %llx!\n", __func__, addr); + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct kvm_io_device_ops kvm_loongarch_ipi_ops = { + .read = kvm_loongarch_ipi_read, + .write = kvm_loongarch_ipi_write, +}; + +static const struct kvm_io_device_ops kvm_loongarch_mail_ops = { + .write = kvm_loongarch_mail_write, +}; + +static int kvm_loongarch_ipi_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + uint64_t val; + int cpu, addr; + void *p = NULL; + int len = 4; + struct kvm_vcpu *vcpu; + + cpu = (attr->attr >> 16) & 0x3ff; + addr = attr->attr & 0xff; + + vcpu = kvm_get_vcpu(dev->kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + switch (addr) { + case CORE_STATUS_OFF: + p = &vcpu->arch.ipi_state.status; + break; + case CORE_EN_OFF: + p = &vcpu->arch.ipi_state.en; + break; + case CORE_SET_OFF: + p = &vcpu->arch.ipi_state.set; + break; + case CORE_CLEAR_OFF: + p = &vcpu->arch.ipi_state.clear; + break; + case CORE_BUF_20: + p = &vcpu->arch.ipi_state.buf[0]; + len = 8; + break; + case CORE_BUF_28: + p = &vcpu->arch.ipi_state.buf[1]; + len = 8; + break; + case CORE_BUF_30: + p = &vcpu->arch.ipi_state.buf[2]; + len = 8; + break; + case CORE_BUF_38: + p = &vcpu->arch.ipi_state.buf[3]; + len = 8; + break; + default: + kvm_err("%s: unknown ipi register, addr = %d\n", __func__, addr); + return -EINVAL; + } + + if (is_write) { + if (len == 4) { + if (get_user(val, (uint32_t __user *)attr->addr)) + return -EFAULT; + *(uint32_t *)p = (uint32_t)val; + } else if (len == 8) { + if (get_user(val, (uint64_t __user *)attr->addr)) + return -EFAULT; + *(uint64_t *)p = val; + } + } else { + if (len == 4) { + val = *(uint32_t *)p; + return put_user(val, (uint32_t __user *)attr->addr); + } else if (len == 8) { + val = *(uint64_t *)p; + return put_user(val, (uint64_t __user *)attr->addr); + } + } + + return 0; +} + +static int kvm_loongarch_ipi_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_LOONGARCH_IPI_GRP_REGS: + return kvm_loongarch_ipi_regs_access(dev, attr, false); + default: + kvm_err("%s: unknown group (%d)\n", __func__, attr->group); + return -EINVAL; + } +} + +static int kvm_loongarch_ipi_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_LOONGARCH_IPI_GRP_REGS: + return kvm_loongarch_ipi_regs_access(dev, attr, true); + default: + kvm_err("%s: unknown group (%d)\n", __func__, attr->group); + return -EINVAL; + } +} + +static void kvm_loongarch_ipi_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_ipi *ipi; + struct kvm_io_device *device; + + if (!dev) + return; + + kvm = dev->kvm; + if (!kvm) + return; + + ipi = kvm->arch.ipi; + if (!ipi) + return; + + device = &ipi->device; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, device); + + device = &ipi->mail_dev; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, device); + + kfree(ipi); +} + +static int kvm_loongarch_ipi_create(struct kvm_device *dev, u32 type) +{ + struct kvm *kvm; + struct loongarch_ipi *s; + unsigned long addr; + struct kvm_io_device *device; + int ret; + + kvm_info("begin create loongarch ipi in kvm ...\n"); + if (!dev) { + kvm_err("%s: kvm_device ptr is invalid!\n", __func__); + return -EINVAL; + } + + kvm = dev->kvm; + if (kvm->arch.ipi) { + kvm_err("%s: loongarch ipi has been created!\n", __func__); + return -EINVAL; + } + + s = kzalloc(sizeof(struct loongarch_ipi), GFP_KERNEL); + if (!s) + return -ENOMEM; + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize IOCSR device + */ + device = &s->device; + kvm_iodevice_init(device, &kvm_loongarch_ipi_ops); + addr = SMP_MAILBOX; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, addr, + KVM_IOCSR_IPI_ADDR_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kvm_err("%s: initialize IOCSR dev failed, ret = %d\n", __func__, ret); + goto err; + } + + device = &s->mail_dev; + kvm_iodevice_init(device, &kvm_loongarch_mail_ops); + addr = MAIL_SEND_ADDR; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, addr, + KVM_IOCSR_MAIL_ADDR_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + device = &s->device; + kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, device); + kvm_err("%s: initialize mail box dev failed, ret = %d\n", __func__, ret); + goto err; + } + + kvm->arch.ipi = s; + kvm_info("create loongarch ipi in kvm done!\n"); + + return 0; + +err: + kfree(s); + return -EFAULT; +} + +static struct kvm_device_ops kvm_loongarch_ipi_dev_ops = { + .name = "kvm-loongarch-ipi", + .create = kvm_loongarch_ipi_create, + .destroy = kvm_loongarch_ipi_destroy, + .set_attr = kvm_loongarch_ipi_set_attr, + .get_attr = kvm_loongarch_ipi_get_attr, +}; + +int kvm_loongarch_register_ipi_device(void) +{ + return kvm_register_device_ops(&kvm_loongarch_ipi_dev_ops, + KVM_DEV_TYPE_LA_IPI); +} diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c new file mode 100644 index 0000000000000000000000000000000000000000..7d053dbcd5c066829fa6c12ab15a6aac4d7957be --- /dev/null +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -0,0 +1,540 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +/* update the isr according to irq level and route irq to extioi */ +static void pch_pic_update_irq(struct loongarch_pch_pic *s, int irq, int level) +{ + u64 mask = (1 << irq); + + /* + * set isr and route irq to extioi and + * the route table is in htmsi_vector[] + */ + if (level) { + if (mask & s->irr & ~s->mask) { + s->isr |= mask; + irq = s->htmsi_vector[irq]; + extioi_set_irq(s->kvm->arch.extioi, irq, level); + } + } else { + if (mask & s->isr & ~s->irr) { + s->isr &= ~mask; + irq = s->htmsi_vector[irq]; + extioi_set_irq(s->kvm->arch.extioi, irq, level); + } + } +} + +/* msi irq handler */ +void pch_msi_set_irq(struct kvm *kvm, int irq, int level) +{ + extioi_set_irq(kvm->arch.extioi, irq, level); +} + +/* called when a irq is triggered in pch pic */ +void pch_pic_set_irq(struct loongarch_pch_pic *s, int irq, int level) +{ + u64 mask = (1 << irq); + + spin_lock(&s->lock); + if (level) + /* set irr */ + s->irr |= mask; + else { + /* 0 level signal in edge triggered irq does not mean to clear irq + * The irr register variable is cleared when the cpu writes to the + * PCH_PIC_CLEAR_START address area + */ + if (s->edge & mask) { + spin_unlock(&s->lock); + return; + } + s->irr &= ~mask; + } + pch_pic_update_irq(s, irq, level); + spin_unlock(&s->lock); +} + +/* update batch irqs, the irq_mask is a bitmap of irqs */ +static void pch_pic_update_batch_irqs(struct loongarch_pch_pic *s, u64 irq_mask, int level) +{ + int irq, bits; + + /* find each irq by irqs bitmap and update each irq */ + bits = sizeof(irq_mask) * 8; + irq = find_first_bit((void *)&irq_mask, bits); + while (irq < bits) { + pch_pic_update_irq(s, irq, level); + bitmap_clear((void *)&irq_mask, irq, 1); + irq = find_first_bit((void *)&irq_mask, bits); + } +} + +/* + * pch pic register is 64-bit, but it is accessed by 32-bit, + * so we use high to get whether low or high 32 bits we want + * to read. + */ +static u32 pch_pic_read_reg(u64 *s, int high) +{ + u64 val = *s; + + /* read the high 32 bits when the high is 1 */ + return high ? (u32)(val >> 32) : (u32)val; +} + +/* + * pch pic register is 64-bit, but it is accessed by 32-bit, + * so we use high to get whether low or high 32 bits we want + * to write. + */ +static u32 pch_pic_write_reg(u64 *s, int high, u32 v) +{ + u64 val = *s, data = v; + + if (high) { + /* + * Clear val high 32 bits + * write the high 32 bits when the high is 1 + */ + *s = (val << 32 >> 32) | (data << 32); + val >>= 32; + } else + /* + * Clear val low 32 bits + * write the low 32 bits when the high is 0 + */ + *s = (val >> 32 << 32) | v; + + return (u32)val; +} + +static int loongarch_pch_pic_write(struct loongarch_pch_pic *s, gpa_t addr, + int len, const void *val) +{ + u32 old, data, offset, index; + u64 irq; + int ret; + + ret = 0; + data = *(u32 *)val; + offset = addr - s->pch_pic_base; + + spin_lock(&s->lock); + switch (offset) { + case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: + offset -= PCH_PIC_MASK_START; + /* get whether high or low 32 bits we want to write */ + index = offset >> 2; + old = pch_pic_write_reg(&s->mask, index, data); + + /* enable irq when mask value change to 0 */ + irq = (old & ~data) << (32 * index); + pch_pic_update_batch_irqs(s, irq, 1); + + /* disable irq when mask value change to 1 */ + irq = (~old & data) << (32 * index); + pch_pic_update_batch_irqs(s, irq, 0); + break; + case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: + offset -= PCH_PIC_HTMSI_EN_START; + index = offset >> 2; + pch_pic_write_reg(&s->htmsi_en, index, data); + break; + case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: + offset -= PCH_PIC_EDGE_START; + index = offset >> 2; + /* 1: edge triggered, 0: level triggered */ + pch_pic_write_reg(&s->edge, index, data); + break; + case PCH_PIC_CLEAR_START ... PCH_PIC_CLEAR_END: + offset -= PCH_PIC_CLEAR_START; + index = offset >> 2; + /* write 1 to clear edge irq */ + old = pch_pic_read_reg(&s->irr, index); + /* + * get the irq bitmap which is edge triggered and + * already set and to be cleared + */ + irq = old & pch_pic_read_reg(&s->edge, index) & data; + /* write irr to the new state where irqs have been cleared */ + pch_pic_write_reg(&s->irr, index, old & ~irq); + /* update cleared irqs */ + pch_pic_update_batch_irqs(s, irq, 0); + break; + case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: + offset -= PCH_PIC_AUTO_CTRL0_START; + index = offset >> 2; + /* we only use default mode: fixed interrupt distribution mode */ + pch_pic_write_reg(&s->auto_ctrl0, index, 0); + break; + case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: + offset -= PCH_PIC_AUTO_CTRL1_START; + index = offset >> 2; + /* we only use default mode: fixed interrupt distribution mode */ + pch_pic_write_reg(&s->auto_ctrl1, index, 0); + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + offset -= PCH_PIC_ROUTE_ENTRY_START; + /* only route to int0: extioi */ + s->route_entry[offset] = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + /* route table to extioi */ + offset -= PCH_PIC_HTMSI_VEC_START; + s->htmsi_vector[offset] = (u8)data; + break; + case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: + offset -= PCH_PIC_POLARITY_START; + index = offset >> 2; + + /* we only use defalut value 0: high level triggered */ + pch_pic_write_reg(&s->polarity, index, 0); + break; + default: + ret = -EINVAL; + break; + } + + spin_unlock(&s->lock); + return ret; +} + +static int kvm_loongarch_pch_pic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + int ret; + struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic; + + if (!s) { + kvm_err("%s: pch pic irqchip not valid!\n", __func__); + return -EINVAL; + } + + /* statistics of pch pic writing */ + vcpu->kvm->stat.pch_pic_write_exits++; + ret = loongarch_pch_pic_write(s, addr, len, val); + + return ret; +} + +static int loongarch_pch_pic_read(struct loongarch_pch_pic *s, gpa_t addr, int len, void *val) +{ + int offset, index, ret = 0; + u32 data = 0; + u64 int_id = 0; + + offset = addr - s->pch_pic_base; + + spin_lock(&s->lock); + switch (offset) { + case PCH_PIC_INT_ID_START ... PCH_PIC_INT_ID_END: + /* int id version */ + int_id |= (u64)PCH_PIC_INT_ID_VER << 32; + /* irq number */ + int_id |= (u64)31 << (32 + 16); + /* int id value */ + int_id |= PCH_PIC_INT_ID_VAL; + *(u64 *)val = int_id; + break; + case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: + offset -= PCH_PIC_MASK_START; + index = offset >> 2; + /* read mask reg */ + data = pch_pic_read_reg(&s->mask, index); + *(u32 *)val = data; + break; + case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: + offset -= PCH_PIC_HTMSI_EN_START; + index = offset >> 2; + /* read htmsi enable reg */ + data = pch_pic_read_reg(&s->htmsi_en, index); + *(u32 *)val = data; + break; + case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: + offset -= PCH_PIC_EDGE_START; + index = offset >> 2; + /* read edge enable reg */ + data = pch_pic_read_reg(&s->edge, index); + *(u32 *)val = data; + break; + case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: + case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: + /* we only use default mode: fixed interrupt distribution mode */ + *(u32 *)val = 0; + break; + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: + /* only route to int0: extioi */ + *(u8 *)val = 1; + break; + case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: + offset -= PCH_PIC_HTMSI_VEC_START; + /* read htmsi vector */ + data = s->htmsi_vector[offset]; + *(u8 *)val = data; + break; + case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: + /* we only use defalut value 0: high level triggered */ + *(u32 *)val = 0; + break; + default: + ret = -EINVAL; + } + spin_unlock(&s->lock); + return ret; +} + +static int kvm_loongarch_pch_pic_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + int ret; + struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic; + + if (!s) { + kvm_err("%s: pch pic irqchip not valid!\n", __func__); + return -EINVAL; + } + + /* statistics of pch pic reading */ + vcpu->kvm->stat.pch_pic_read_exits++; + ret = loongarch_pch_pic_read(s, addr, len, val); + return ret; +} + +static const struct kvm_io_device_ops kvm_loongarch_pch_pic_ops = { + .read = kvm_loongarch_pch_pic_read, + .write = kvm_loongarch_pch_pic_write, +}; + +static int kvm_loongarch_pch_pic_init(struct kvm_device *dev, u64 addr) +{ + int ret; + struct loongarch_pch_pic *s = dev->kvm->arch.pch_pic; + struct kvm_io_device *device; + struct kvm *kvm = dev->kvm; + + s->pch_pic_base = addr; + device = &s->device; + /* init device by pch pic writing and reading ops */ + kvm_iodevice_init(device, &kvm_loongarch_pch_pic_ops); + mutex_lock(&kvm->slots_lock); + /* register pch pic device */ + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, addr, PCH_PIC_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) + return -EFAULT; + + return 0; +} + +/* used by user space to get or set pch pic registers */ +static int kvm_loongarch_pch_pic_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + int addr, len = 8, ret = 0; + void __user *data; + void *p = NULL; + struct loongarch_pch_pic *s; + + s = dev->kvm->arch.pch_pic; + addr = attr->attr; + data = (void __user *)attr->addr; + + spin_lock(&s->lock); + /* get pointer to pch pic register by addr */ + switch (addr) { + case PCH_PIC_MASK_START: + p = &s->mask; + break; + case PCH_PIC_HTMSI_EN_START: + p = &s->htmsi_en; + break; + case PCH_PIC_EDGE_START: + p = &s->edge; + break; + case PCH_PIC_AUTO_CTRL0_START: + p = &s->auto_ctrl0; + break; + case PCH_PIC_AUTO_CTRL1_START: + p = &s->auto_ctrl1; + break; + case PCH_PIC_ROUTE_ENTRY_START: + p = s->route_entry; + len = 64; + break; + case PCH_PIC_HTMSI_VEC_START: + p = s->htmsi_vector; + len = 64; + break; + case PCH_PIC_INT_IRR_START: + p = &s->irr; + break; + case PCH_PIC_INT_ISR_START: + p = &s->isr; + break; + case PCH_PIC_POLARITY_START: + p = &s->polarity; + break; + default: + ret = -EINVAL; + } + + /* write or read value according to is_write */ + if (is_write) { + if (copy_from_user(p, data, len)) + ret = -EFAULT; + } else { + if (copy_to_user(data, p, len)) + ret = -EFAULT; + } + + spin_unlock(&s->lock); + return ret; +} + +static int kvm_loongarch_pch_pic_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + /* only support pch pic group registers */ + if (attr->group == KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS) + return kvm_loongarch_pch_pic_regs_access(dev, attr, false); + + return -EINVAL; +} + +static int kvm_loongarch_pch_pic_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret = -EINVAL; + u64 addr; + void __user *uaddr = (void __user *)(long)attr->addr; + + switch (attr->group) { + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT: + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + if (!dev->kvm->arch.pch_pic) { + kvm_err("%s: please create pch_pic irqchip first!\n", __func__); + ret = -EFAULT; + break; + } + + ret = kvm_loongarch_pch_pic_init(dev, addr); + break; + default: + kvm_err("%s: unknown group (%d) attr (%lld)\n", __func__, attr->group, + attr->attr); + ret = -EINVAL; + break; + } + break; + case KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS: + ret = kvm_loongarch_pch_pic_regs_access(dev, attr, true); + break; + default: + break; + } + + return ret; +} + +static int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + struct kvm_irq_routing_entry *entries; + + u32 nr = KVM_IRQCHIP_NUM_PINS; + int i, ret; + + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + + return 0; +} + +static void kvm_loongarch_pch_pic_destroy(struct kvm_device *dev) +{ + struct kvm *kvm; + struct loongarch_pch_pic *s; + struct kvm_io_device *device; + + if (!dev) + return; + + kvm = dev->kvm; + if (!kvm) + return; + + s = kvm->arch.pch_pic; + if (!s) + return; + + device = &s->device; + /* unregister pch pic device and free it's memory */ + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, device); + kfree(s); +} + +static int kvm_loongarch_pch_pic_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct loongarch_pch_pic *s; + struct kvm *kvm = dev->kvm; + + /* pch pic should not has been created */ + if (kvm->arch.pch_pic) + return -EINVAL; + + ret = kvm_setup_default_irq_routing(kvm); + if (ret) + return -ENOMEM; + + s = kzalloc(sizeof(struct loongarch_pch_pic), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + s->kvm = kvm; + + + kvm->arch.pch_pic = s; + + kvm_info("create pch pic device successfully\n"); + return 0; +} + +static struct kvm_device_ops kvm_loongarch_pch_pic_dev_ops = { + .name = "kvm-loongarch-pch-pic", + .create = kvm_loongarch_pch_pic_create, + .destroy = kvm_loongarch_pch_pic_destroy, + .set_attr = kvm_loongarch_pch_pic_set_attr, + .get_attr = kvm_loongarch_pch_pic_get_attr, +}; + +int kvm_loongarch_register_pch_pic_device(void) +{ + return kvm_register_device_ops(&kvm_loongarch_pch_pic_dev_ops, + KVM_DEV_TYPE_LA_IOAPIC); +} diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c new file mode 100644 index 0000000000000000000000000000000000000000..bf67f329ebc962cc6b7e9320074c998e99713921 --- /dev/null +++ b/arch/loongarch/kvm/irqfd.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + /* ioapic pin (0 ~ 64) <---> gsi(0 ~ 64) */ + pch_pic_set_irq(kvm->arch.pch_pic, e->irqchip.pin, level); + + return 0; +} + +/* + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = kvm_set_ioapic_irq; + + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + + if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; + default: + goto out; + } + r = 0; +out: + return r; +} + +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + if (e->type == KVM_IRQ_ROUTING_MSI) { + pch_msi_set_irq(kvm, e->msi.data, 1); + return 0; + } + + return -EWOULDBLOCK; +} + +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + if (!level) + return -1; + + pch_msi_set_irq(kvm, e->msi.data, level); + return 0; +} diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 86a2f2d0cb27e3d213012d6987abde4ab1dae60e..1f50f6723739cde230b961ed429cee62b693db44 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "trace.h" unsigned long vpid_mask; @@ -242,6 +244,7 @@ void kvm_check_vpid(struct kvm_vcpu *vcpu) kvm_update_vpid(vcpu, cpu); trace_kvm_vpid_change(vcpu, vcpu->arch.vpid); vcpu->cpu = cpu; + kvm_clear_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); } /* Restore GSTAT(0x50).vpid */ @@ -312,7 +315,7 @@ void kvm_arch_hardware_disable(void) static int kvm_loongarch_env_init(void) { - int cpu, order; + int cpu, order, ret; void *addr; struct kvm_context *context; @@ -367,7 +370,20 @@ static int kvm_loongarch_env_init(void) kvm_init_gcsr_flag(); - return 0; + /* Register loongarch ipi interrupt controller interface. */ + ret = kvm_loongarch_register_ipi_device(); + if (ret) + return ret; + + /* Register loongarch extioi interrupt controller interface. */ + ret = kvm_loongarch_register_extioi_device(); + if (ret) + return ret; + + /* Register loongarch pch pic interrupt controller interface. */ + ret = kvm_loongarch_register_pch_pic_device(); + + return ret; } static void kvm_loongarch_env_exit(void) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 915f175278931f26164c1b970663542cf0661a12..d312921f3ab9eaaee3a90c5860f8dc0b7657a7cc 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -163,6 +163,7 @@ static kvm_pte_t *kvm_populate_gpa(struct kvm *kvm, child = kvm_mmu_memory_cache_alloc(cache); _kvm_pte_init(child, ctx.invalid_ptes[ctx.level - 1]); + smp_wmb(); /* Make pte visible before pmd */ kvm_set_pte(entry, __pa(child)); } else if (kvm_pte_huge(*entry)) { return entry; @@ -444,6 +445,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change) { int needs_flush; + u32 old_flags = old ? old->flags : 0; + u32 new_flags = new ? new->flags : 0; + bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES; + + /* Only track memslot flags changed */ + if (change != KVM_MR_FLAGS_ONLY) + return; + + /* Discard dirty page tracking on readonly memslot */ + if ((old_flags & new_flags) & KVM_MEM_READONLY) + return; /* * If dirty page logging is enabled, write protect all pages in the slot @@ -454,9 +466,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * MOVE/DELETE: The old mappings will already have been cleaned up by * kvm_arch_flush_shadow_memslot() */ - if (change == KVM_MR_FLAGS_ONLY && - (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && - new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + if (!(old_flags & KVM_MEM_LOG_DIRTY_PAGES) && log_dirty_pages) { + /* + * Initially-all-set does not require write protecting any page + * because they're all assumed to be dirty. + */ + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + return; + spin_lock(&kvm->mmu_lock); /* Write protect GPA page table entries */ needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn, @@ -572,6 +589,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; struct kvm_memory_slot *slot; + struct page *page; spin_lock(&kvm->mmu_lock); @@ -583,10 +601,8 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ } /* Track access to pages marked old */ - new = *ptep; - if (!kvm_pte_young(new)) - new = kvm_pte_mkyoung(new); - /* call kvm_set_pfn_accessed() after unlock */ + new = kvm_pte_mkyoung(*ptep); + /* call kvm_set_pfn_accessed() after unlock */ if (write && !kvm_pte_dirty(new)) { if (!kvm_pte_write(new)) { @@ -614,19 +630,22 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ if (changed) { kvm_set_pte(ptep, new); pfn = kvm_pte_pfn(new); + page = kvm_pfn_to_refcounted_page(pfn); + if (page) + get_page(page); } spin_unlock(&kvm->mmu_lock); - /* - * Fixme: pfn may be freed after mmu_lock - * kvm_try_get_pfn(pfn)/kvm_release_pfn pair to prevent this? - */ - if (kvm_pte_young(changed)) - kvm_set_pfn_accessed(pfn); + if (changed) { + if (kvm_pte_young(changed)) + kvm_set_pfn_accessed(pfn); - if (kvm_pte_dirty(changed)) { - mark_page_dirty(kvm, gfn); - kvm_set_pfn_dirty(pfn); + if (kvm_pte_dirty(changed)) { + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + if (page) + put_page(page); } return ret; out: @@ -769,6 +788,7 @@ static kvm_pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, kvm_pte_t *ptep, gfn_t g val += PAGE_SIZE; } + smp_wmb(); /* Make pte visible before pmd */ /* The later kvm_flush_tlb_gpa() will flush hugepage tlb */ kvm_set_pte(ptep, __pa(child)); @@ -890,10 +910,20 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) /* Disable dirty logging on HugePages */ level = 0; - if (!fault_supports_huge_mapping(memslot, hva, write)) { - level = 0; - } else { + if (fault_supports_huge_mapping(memslot, hva, write)) { + /* Check page level about host mmu*/ level = host_pfn_mapping_level(kvm, gfn, memslot); + if (level == 1) { + /* + * Check page level about secondary mmu + * Disable hugepage if it is normal page on + * secondary mmu already + */ + ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); + if (ptep && !kvm_pte_huge(*ptep)) + level = 0; + } + if (level == 1) { gfn = gfn & ~(PTRS_PER_PTE - 1); pfn = pfn & ~(PTRS_PER_PTE - 1); @@ -924,7 +954,6 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) kvm_set_pfn_dirty(pfn); } - kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); out: srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -940,7 +969,8 @@ int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) return ret; /* Invalidate this entry in the TLB */ - kvm_flush_tlb_gpa(vcpu, gpa); + vcpu->arch.flush_gpa = gpa; + kvm_make_request(KVM_REQ_TLB_FLUSH_GPA, vcpu); return 0; } diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index ba976509bfe819ec51fdaa08f2a1ba4a334755cd..3634431db18a4a4992ea9b7d1ed44545214ff3df 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -213,12 +213,6 @@ SYM_FUNC_START(kvm_enter_guest) /* Save host GPRs */ kvm_save_host_gpr a2 - /* Save host CRMD, PRMD to stack */ - csrrd a3, LOONGARCH_CSR_CRMD - st.d a3, a2, PT_CRMD - csrrd a3, LOONGARCH_CSR_PRMD - st.d a3, a2, PT_PRMD - addi.d a2, a1, KVM_VCPU_ARCH st.d sp, a2, KVM_ARCH_HSP st.d tp, a2, KVM_ARCH_HTP diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index bcc6b6d063d914dbf820b43f2c1308803646b395..74a4b5c272d60e99523e12e89d5e663d53009c2b 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -188,10 +188,3 @@ void kvm_save_timer(struct kvm_vcpu *vcpu) kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT); preempt_enable(); } - -void kvm_reset_timer(struct kvm_vcpu *vcpu) -{ - write_gcsr_timercfg(0); - kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG, 0); - hrtimer_cancel(&vcpu->arch.swtimer); -} diff --git a/arch/loongarch/kvm/tlb.c b/arch/loongarch/kvm/tlb.c index 02535df6b51fbebdf12c23373c36b6f44e2cd144..ebdbe9264e9c60f57f316545572097c0362fcbf0 100644 --- a/arch/loongarch/kvm/tlb.c +++ b/arch/loongarch/kvm/tlb.c @@ -23,10 +23,7 @@ void kvm_flush_tlb_all(void) void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa) { - unsigned long flags; - - local_irq_save(flags); + lockdep_assert_irqs_disabled(); gpa &= (PAGE_MASK << 1); invtlb(INVTLB_GID_ADDR, read_csr_gstat() & CSR_GSTAT_GID, gpa); - local_irq_restore(flags); } diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index c2484ad4cffa2102a61f2bd4d0a6f537a6501906..1783397b1bc88e852b5b424549e00acc1cce0ff8 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -19,14 +19,16 @@ DECLARE_EVENT_CLASS(kvm_transition, TP_PROTO(struct kvm_vcpu *vcpu), TP_ARGS(vcpu), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned long, pc) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->pc = vcpu->arch.pc; ), - TP_printk("PC: 0x%08lx", __entry->pc) + TP_printk("vcpu %u PC: 0x%08lx", __entry->vcpu_id, __entry->pc) ); DEFINE_EVENT(kvm_transition, kvm_enter, @@ -54,19 +56,22 @@ DECLARE_EVENT_CLASS(kvm_exit, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), TP_ARGS(vcpu, reason), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned long, pc) __field(unsigned int, reason) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->pc = vcpu->arch.pc; __entry->reason = reason; ), - TP_printk("[%s]PC: 0x%08lx", - __print_symbolic(__entry->reason, - kvm_trace_symbol_exit_types), - __entry->pc) + TP_printk("vcpu %u [%s] PC: 0x%08lx", + __entry->vcpu_id, + __print_symbolic(__entry->reason, + kvm_trace_symbol_exit_types), + __entry->pc) ); DEFINE_EVENT(kvm_exit, kvm_exit_idle, @@ -85,14 +90,17 @@ TRACE_EVENT(kvm_exit_gspr, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int inst_word), TP_ARGS(vcpu, inst_word), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned int, inst_word) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->inst_word = inst_word; ), - TP_printk("Inst word: 0x%08x", __entry->inst_word) + TP_printk("vcpu %u Inst word: 0x%08x", __entry->vcpu_id, + __entry->inst_word) ); #define KVM_TRACE_AUX_SAVE 0 diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 685f2826d022f1d1e3148e530219db6e4a240f3b..e9b397543fdf7d9a7b2c32214ad07f8ac65fd67f 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -31,14 +32,134 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; +static inline void kvm_save_host_pmu(struct kvm_vcpu *vcpu) +{ + struct kvm_context *context; + + context = this_cpu_ptr(vcpu->kvm->arch.vmcs); + context->perf_cntr[0] = read_csr_perfcntr0(); + context->perf_cntr[1] = read_csr_perfcntr1(); + context->perf_cntr[2] = read_csr_perfcntr2(); + context->perf_cntr[3] = read_csr_perfcntr3(); + context->perf_ctrl[0] = write_csr_perfctrl0(0); + context->perf_ctrl[1] = write_csr_perfctrl1(0); + context->perf_ctrl[2] = write_csr_perfctrl2(0); + context->perf_ctrl[3] = write_csr_perfctrl3(0); +} + +static inline void kvm_restore_host_pmu(struct kvm_vcpu *vcpu) +{ + struct kvm_context *context; + + context = this_cpu_ptr(vcpu->kvm->arch.vmcs); + write_csr_perfcntr0(context->perf_cntr[0]); + write_csr_perfcntr1(context->perf_cntr[1]); + write_csr_perfcntr2(context->perf_cntr[2]); + write_csr_perfcntr3(context->perf_cntr[3]); + write_csr_perfctrl0(context->perf_ctrl[0]); + write_csr_perfctrl1(context->perf_ctrl[1]); + write_csr_perfctrl2(context->perf_ctrl[2]); + write_csr_perfctrl3(context->perf_ctrl[3]); +} + + +static inline void kvm_save_guest_pmu(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); +} + +static inline void kvm_restore_guest_pmu(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); +} + +static int kvm_own_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + + if (!kvm_guest_has_pmu(&vcpu->arch)) + return -EINVAL; + + kvm_save_host_pmu(vcpu); + + /* Set PM0-PM(num) to guest */ + val = read_csr_gcfg() & ~CSR_GCFG_GPERF; + val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; + write_csr_gcfg(val); + + kvm_restore_guest_pmu(vcpu); + + return 0; +} + +static void kvm_lose_pmu(struct kvm_vcpu *vcpu) +{ + unsigned long val; + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (!(vcpu->arch.aux_inuse & KVM_LARCH_PMU)) + return; + + kvm_save_guest_pmu(vcpu); + + /* Disable pmu access from guest */ + write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF); + + /* + * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when + * exiting the guest, so that the next time trap into the guest. + * We don't need to deal with PMU CSRs contexts. + */ + val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); + val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + if (!(val & KVM_PMU_EVENT_ENABLED)) + vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU; + + kvm_restore_host_pmu(vcpu); +} + +static void kvm_restore_pmu(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU)) + kvm_make_request(KVM_REQ_PMU, vcpu); +} + +static void kvm_check_pmu(struct kvm_vcpu *vcpu) +{ + if (kvm_check_request(KVM_REQ_PMU, vcpu)) { + kvm_own_pmu(vcpu); + vcpu->arch.aux_inuse |= KVM_LARCH_PMU; + } +} + static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) { + u32 version; + u64 steal; + gpa_t gpa; + struct kvm_memslots *slots; struct kvm_steal_time __user *st; struct gfn_to_hva_cache *ghc; - struct kvm_memslots *slots; - gpa_t gpa; - u64 steal; - u32 version; ghc = &vcpu->arch.st.cache; gpa = vcpu->arch.st.guest_addr; @@ -48,8 +169,7 @@ static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) gpa &= KVM_STEAL_PHYS_MASK; slots = kvm_memslots(vcpu->kvm); if (slots->generation != ghc->generation || gpa != ghc->gpa) { - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, - sizeof(*st))) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st))) { ghc->gpa = INVALID_GPA; return; } @@ -58,19 +178,17 @@ static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) st = (struct kvm_steal_time __user *)ghc->hva; unsafe_get_user(version, &st->version, out); if (version & 1) - version += 1; + version += 1; /* first time write, random junk */ + version += 1; unsafe_put_user(version, &st->version, out); - /* Make sure st->version is written first */ smp_wmb(); unsafe_get_user(steal, &st->steal, out); - steal += current->sched_info.run_delay - - vcpu->arch.st.last_steal; + steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; unsafe_put_user(steal, &st->steal, out); - /* Make sure st->steal is written first */ smp_wmb(); version += 1; unsafe_put_user(version, &st->version, out); @@ -78,68 +196,6 @@ static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } -static bool kvm_pvtime_supported(void) -{ - return !!sched_info_on(); -} - -static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - u64 __user *user = (u64 __user *)attr->addr; - struct kvm *kvm = vcpu->kvm; - u64 gpa; - int ret = 0; - int idx; - - if (!kvm_pvtime_supported() || - attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) - return -ENXIO; - - if (get_user(gpa, user)) - return -EFAULT; - - /* Check the address is in a valid memslot */ - idx = srcu_read_lock(&kvm->srcu); - if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) - ret = -EINVAL; - srcu_read_unlock(&kvm->srcu, idx); - - if (!ret) - vcpu->arch.st.guest_addr = gpa; - - return ret; -} - -static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - u64 __user *user = (u64 __user *)attr->addr; - u64 gpa; - - if (!kvm_pvtime_supported() || - attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) - return -ENXIO; - - gpa = vcpu->arch.st.guest_addr; - if (put_user(gpa, user)) - return -EFAULT; - - return 0; -} - -static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - switch (attr->attr) { - case KVM_LOONGARCH_VCPU_PVTIME_GPA: - if (kvm_pvtime_supported()) - return 0; - } - - return -ENXIO; -} - /* * kvm_check_requests - check and handle pending vCPU requests * @@ -157,12 +213,22 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) if (kvm_dirty_ring_check_request(vcpu)) return RESUME_HOST; - if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) kvm_update_stolen_time(vcpu); return RESUME_GUEST; } +static void kvm_late_check_requests(struct kvm_vcpu *vcpu) +{ + lockdep_assert_irqs_disabled(); + if (kvm_check_request(KVM_REQ_TLB_FLUSH_GPA, vcpu)) + if (vcpu->arch.flush_gpa != INVALID_GPA) { + kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa); + vcpu->arch.flush_gpa = INVALID_GPA; + } +} + /* * Check and handle pending signal and vCPU requests etc * Run with irq enabled and preempt enabled @@ -213,6 +279,14 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) /* Make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); kvm_check_vpid(vcpu); + kvm_check_pmu(vcpu); + + /* + * Called after function kvm_check_vpid() + * Since it updates CSR.GSTAT used by kvm_flush_tlb_gpa(), + * and it may also clear KVM_REQ_TLB_FLUSH_GPA pending bit + */ + kvm_late_check_requests(vcpu); vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY); /* Clear KVM_LARCH_SWCSR_LATEST as CSR will change when enter guest */ vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; @@ -243,6 +317,8 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Set a default exit reason */ run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_lose_pmu(vcpu); + guest_timing_exit_irqoff(); guest_state_exit_irqoff(); local_irq_enable(); @@ -371,117 +447,125 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return 0; } -static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) -{ - unsigned long gintc; - struct loongarch_csrs *csr = vcpu->arch.csr; - - if (get_gcsr_flag(id) & INVALID_GCSR) - return -EINVAL; - - if (id == LOONGARCH_CSR_ESTAT) { - /* ESTAT IP0~IP7 get from GINTC */ - gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff; - *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2); - return 0; - } - - /* - * Get software CSR state since software state is consistent - * with hardware for synchronous ioctl - */ - *val = kvm_read_sw_gcsr(csr, id); - - return 0; -} - static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val) { int cpuid; + struct kvm_phyid_map *map; struct loongarch_csrs *csr = vcpu->arch.csr; - struct kvm_phyid_map *map; if (val >= KVM_MAX_PHYID) return -EINVAL; - cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID); + spin_lock(&vcpu->kvm->arch.phyid_map_lock); - if (map->phys_map[cpuid].enabled) { - /* - * Cpuid is already set before - * Forbid changing different cpuid at runtime - */ - if (cpuid != val) { - /* - * Cpuid 0 is initial value for vcpu, maybe invalid - * unset value for vcpu - */ - if (cpuid) { - spin_unlock(&vcpu->kvm->arch.phyid_map_lock); - return -EINVAL; - } - } else { - /* Discard duplicated cpuid set */ + if ((cpuid < KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) { + /* Discard duplicated CPUID set operation */ + if (cpuid == val) { spin_unlock(&vcpu->kvm->arch.phyid_map_lock); return 0; } - } - if (map->phys_map[val].enabled) { /* - * New cpuid is already set with other vcpu - * Forbid sharing the same cpuid between different vcpus + * CPUID is already set before + * Forbid changing to a different CPUID at runtime */ - if (map->phys_map[val].vcpu != vcpu) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + + if (map->phys_map[val].enabled) { + /* Discard duplicated CPUID set operation */ + if (vcpu == map->phys_map[val].vcpu) { spin_unlock(&vcpu->kvm->arch.phyid_map_lock); - return -EINVAL; + return 0; } - /* Discard duplicated cpuid set operation*/ + /* + * New CPUID is already set with other vcpu + * Forbid sharing the same CPUID between different vcpus + */ spin_unlock(&vcpu->kvm->arch.phyid_map_lock); - return 0; + return -EINVAL; } kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val); map->phys_map[val].enabled = true; map->phys_map[val].vcpu = vcpu; - if (map->max_phyid < val) - map->max_phyid = val; spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; } +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu) +{ + int cpuid; + struct kvm_phyid_map *map; + struct loongarch_csrs *csr = vcpu->arch.csr; + + map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID); + + if (cpuid >= KVM_MAX_PHYID) + return; + + spin_lock(&vcpu->kvm->arch.phyid_map_lock); + if (map->phys_map[cpuid].enabled) { + map->phys_map[cpuid].vcpu = NULL; + map->phys_map[cpuid].enabled = false; + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID); + } + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); +} + struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) { - struct kvm_phyid_map *map; + struct kvm_phyid_map *map; if (cpuid >= KVM_MAX_PHYID) return NULL; map = kvm->arch.phyid_map; - if (map->phys_map[cpuid].enabled) - return map->phys_map[cpuid].vcpu; + if (!map->phys_map[cpuid].enabled) + return NULL; - return NULL; + return map->phys_map[cpuid].vcpu; } -static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu) +static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) { - int cpuid; + unsigned long gintc; struct loongarch_csrs *csr = vcpu->arch.csr; - struct kvm_phyid_map *map; - map = vcpu->kvm->arch.phyid_map; - cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); - if (cpuid >= KVM_MAX_PHYID) - return; + if (get_gcsr_flag(id) & INVALID_GCSR) + return -EINVAL; - if (map->phys_map[cpuid].enabled) { - map->phys_map[cpuid].vcpu = NULL; - map->phys_map[cpuid].enabled = false; - kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, 0); + if (id == LOONGARCH_CSR_ESTAT) { + preempt_disable(); + vcpu_load(vcpu); + /* + * Sync pending interrupts into ESTAT so that interrupt + * remains during VM migration stage + */ + kvm_deliver_intr(vcpu); + vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; + vcpu_put(vcpu); + preempt_enable(); + + /* ESTAT IP0~IP7 get from GINTC */ + gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff; + *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2); + return 0; } + + /* + * Get software CSR state since software state is consistent + * with hardware for synchronous ioctl + */ + *val = kvm_read_sw_gcsr(csr, id); + + return 0; } static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) @@ -492,6 +576,9 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) if (get_gcsr_flag(id) & INVALID_GCSR) return -EINVAL; + if (id == LOONGARCH_CSR_CPUID) + return kvm_set_cpuid(vcpu, val); + if (id == LOONGARCH_CSR_ESTAT) { /* ESTAT IP0~IP7 inject through GINTC */ gintc = (val >> 2) & 0xff; @@ -501,11 +588,26 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc); return ret; - } else if (id == LOONGARCH_CSR_CPUID) - return kvm_set_cpuid(vcpu, val); + } kvm_write_sw_gcsr(csr, id, val); + /* + * After modifying the PMU CSR register value of the vcpu. + * If the PMU CSRs are used, we need to set KVM_REQ_PMU. + */ + if (id >= LOONGARCH_CSR_PERFCTRL0 && id <= LOONGARCH_CSR_PERFCNTR3) { + unsigned long val; + + val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) | + kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); + + if (val & KVM_PMU_EVENT_ENABLED) + kvm_make_request(KVM_REQ_PMU, vcpu); + } + return ret; } @@ -535,6 +637,12 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) *v |= CPUCFG2_LSX; if (cpu_has_lasx) *v |= CPUCFG2_LASX; + if (cpu_has_lbt_x86) + *v |= CPUCFG2_X86BT; + if (cpu_has_lbt_arm) + *v |= CPUCFG2_ARMBT; + if (cpu_has_lbt_mips) + *v |= CPUCFG2_MIPSBT; return 0; case LOONGARCH_CPUCFG3: @@ -568,7 +676,7 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) static int kvm_check_cpucfg(int id, u64 val) { - int ret, host; + int ret; u64 mask = 0; ret = _kvm_get_cpucfg_mask(id, &mask); @@ -596,9 +704,8 @@ static int kvm_check_cpucfg(int id, u64 val) return 0; case LOONGARCH_CPUCFG6: if (val & CPUCFG6_PMP) { - host = read_cpucfg(6); + u32 host = read_cpucfg(LOONGARCH_CPUCFG6); if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS)) - /* Guest pmbits must be the same with host */ return -EINVAL; if ((val & CPUCFG6_PMNUM) > (host & CPUCFG6_PMNUM)) return -EINVAL; @@ -633,13 +740,41 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu, else ret = -EINVAL; break; + case KVM_REG_LOONGARCH_LBT: + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -ENXIO; + + switch (reg->id) { + case KVM_REG_LOONGARCH_LBT_SCR0: + *v = vcpu->arch.lbt.scr0; + break; + case KVM_REG_LOONGARCH_LBT_SCR1: + *v = vcpu->arch.lbt.scr1; + break; + case KVM_REG_LOONGARCH_LBT_SCR2: + *v = vcpu->arch.lbt.scr2; + break; + case KVM_REG_LOONGARCH_LBT_SCR3: + *v = vcpu->arch.lbt.scr3; + break; + case KVM_REG_LOONGARCH_LBT_EFLAGS: + *v = vcpu->arch.lbt.eflags; + break; + case KVM_REG_LOONGARCH_LBT_FTOP: + *v = vcpu->arch.fpu.ftop; + break; + default: + ret = -EINVAL; + break; + } + break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { case KVM_REG_LOONGARCH_COUNTER: *v = drdtime() + vcpu->kvm->arch.time_offset; break; case KVM_REG_LOONGARCH_DEBUG_INST: - *v = INSN_HVCL + KVM_HCALL_SWDBG; + *v = INSN_HVCL | KVM_HCALL_SWDBG; break; default: ret = -EINVAL; @@ -691,6 +826,37 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, if (ret) break; vcpu->arch.cpucfg[id] = (u32)v; + if (id == LOONGARCH_CPUCFG6) + vcpu->arch.max_pmu_csrid = + LOONGARCH_CSR_PERFCTRL0 + 2 * kvm_get_pmu_num(&vcpu->arch) + 1; + break; + case KVM_REG_LOONGARCH_LBT: + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -ENXIO; + + switch (reg->id) { + case KVM_REG_LOONGARCH_LBT_SCR0: + vcpu->arch.lbt.scr0 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR1: + vcpu->arch.lbt.scr1 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR2: + vcpu->arch.lbt.scr2 = v; + break; + case KVM_REG_LOONGARCH_LBT_SCR3: + vcpu->arch.lbt.scr3 = v; + break; + case KVM_REG_LOONGARCH_LBT_EFLAGS: + vcpu->arch.lbt.eflags = v; + break; + case KVM_REG_LOONGARCH_LBT_FTOP: + vcpu->arch.fpu.ftop = v; + break; + default: + ret = -EINVAL; + break; + } break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { @@ -703,7 +869,7 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->kvm->arch.time_offset = (signed long)(v - drdtime()); break; case KVM_REG_LOONGARCH_VCPU_RESET: - kvm_reset_timer(vcpu); + vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); break; @@ -784,8 +950,10 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { switch (attr->attr) { - case 2: - case 6: + case LOONGARCH_CPUCFG2: + case LOONGARCH_CPUCFG6: + return 0; + case CPUCFG_KVM_FEATURE: return 0; default: return -ENXIO; @@ -794,6 +962,16 @@ static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, return -ENXIO; } +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + return 0; +} + static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -813,22 +991,48 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, return ret; } -static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, +static int kvm_loongarch_cpucfg_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int ret = 0; uint64_t val; uint64_t __user *uaddr = (uint64_t __user *)attr->addr; - ret = _kvm_get_cpucfg_mask(attr->attr, &val); - if (ret) - return ret; + switch (attr->attr) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + ret = _kvm_get_cpucfg_mask(attr->attr, &val); + if (ret) + return ret; + break; + case CPUCFG_KVM_FEATURE: + val = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK; + break; + default: + return -ENXIO; + } put_user(val, uaddr); return ret; } +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 gpa; + u64 __user *user = (u64 __user *)attr->addr; + + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + gpa = vcpu->arch.st.guest_addr; + if (put_user(gpa, user)) + return -EFAULT; + + return 0; +} + static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -836,7 +1040,7 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_LOONGARCH_VCPU_CPUCFG: - ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); + ret = kvm_loongarch_cpucfg_get_attr(vcpu, attr); break; case KVM_LOONGARCH_VCPU_PVTIME_CTRL: ret = kvm_loongarch_pvtime_get_attr(vcpu, attr); @@ -851,7 +1055,65 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - return -ENXIO; + u64 val, valid; + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + + switch (attr->attr) { + case CPUCFG_KVM_FEATURE: + if (get_user(val, user)) + return -EFAULT; + + valid = LOONGARCH_PV_FEAT_MASK; + if (val & ~valid) + return -EINVAL; + + /* All vCPUs need set the same PV features */ + if ((kvm->arch.pv_features & LOONGARCH_PV_FEAT_UPDATED) + && ((kvm->arch.pv_features & valid) != val)) + return -EINVAL; + kvm->arch.pv_features = val | LOONGARCH_PV_FEAT_UPDATED; + return 0; + default: + return -ENXIO; + } +} + +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int idx, ret = 0; + u64 gpa, __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + + if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME) + || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) + return -ENXIO; + + if (get_user(gpa, user)) + return -EFAULT; + + if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) + return -EINVAL; + + if (!(gpa & KVM_STEAL_PHYS_VALID)) { + vcpu->arch.st.guest_addr = gpa; + return 0; + } + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) { + vcpu->arch.st.guest_addr = gpa; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + } + + return ret; } static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, @@ -970,12 +1232,66 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } +#ifdef CONFIG_CPU_HAS_LBT +int kvm_own_lbt(struct kvm_vcpu *vcpu) +{ + if (!kvm_guest_has_lbt(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + set_csr_euen(CSR_EUEN_LBTEN); + _restore_lbt(&vcpu->arch.lbt); + vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + preempt_enable(); + + return 0; +} + +static void kvm_lose_lbt(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) { + _save_lbt(&vcpu->arch.lbt); + clear_csr_euen(CSR_EUEN_LBTEN); + vcpu->arch.aux_inuse &= ~KVM_LARCH_LBT; + } + preempt_enable(); +} + +static void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) +{ + /* + * If TM is enabled, top register save/restore will + * cause lbt exception, here enable lbt in advance + */ + if (fcsr & FPU_CSR_TM) + kvm_own_lbt(vcpu); +} + +static void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) + return; + kvm_check_fcsr(vcpu, read_fcsr(LOONGARCH_FCSR0)); + } +} +#else +static inline void kvm_lose_lbt(struct kvm_vcpu *vcpu) { } +static inline void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) { } +static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { } +#endif + /* Enable FPU and restore context */ void kvm_own_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); - /* Enable FPU */ + /* + * Enable FPU for guest + * Set FR and FRE according to guest context + */ + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_FPEN); kvm_restore_fpu(&vcpu->arch.fpu); @@ -995,6 +1311,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu) preempt_disable(); /* Enable LSX for guest */ + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN); switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { case KVM_LARCH_FPU: @@ -1029,6 +1346,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu) preempt_disable(); + kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr); set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) { case KVM_LARCH_LSX: @@ -1060,6 +1378,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); + kvm_check_fcsr_alive(vcpu); if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) { kvm_save_lasx(&vcpu->arch.fpu); vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX); @@ -1082,81 +1401,11 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) /* Disable FPU */ clear_csr_euen(CSR_EUEN_FPEN); } + kvm_lose_lbt(vcpu); preempt_enable(); } -int kvm_own_pmu(struct kvm_vcpu *vcpu) -{ - unsigned long val; - - if (!kvm_guest_has_pmu(&vcpu->arch)) - return -EINVAL; - - preempt_disable(); - val = read_csr_gcfg() & ~CSR_GCFG_GPERF; - val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; - write_csr_gcfg(val); - - vcpu->arch.aux_inuse |= KVM_LARCH_PERF; - preempt_enable(); - return 0; -} - -static void kvm_lose_pmu(struct kvm_vcpu *vcpu) -{ - struct loongarch_csrs *csr = vcpu->arch.csr; - - if (!(vcpu->arch.aux_inuse & KVM_LARCH_PERF)) - return; - - /* save guest pmu csr */ - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); - kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL0, 0); - kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL1, 0); - kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL2, 0); - kvm_write_hw_gcsr(LOONGARCH_CSR_PERFCTRL3, 0); - /* Disable pmu access from guest */ - write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF); - - if (((kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) | - kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) | - kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) | - kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3)) - & KVM_PMU_PLV_ENABLE) == 0) - vcpu->arch.aux_inuse &= ~KVM_LARCH_PERF; -} - -static void kvm_restore_pmu(struct kvm_vcpu *vcpu) -{ - unsigned long val; - struct loongarch_csrs *csr = vcpu->arch.csr; - - if (!(vcpu->arch.aux_inuse & KVM_LARCH_PERF)) - return; - - /* Set PM0-PM(num) to Guest */ - val = read_csr_gcfg() & ~CSR_GCFG_GPERF; - val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT; - write_csr_gcfg(val); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); - kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3); -} - - int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { int intr = (int)irq->irq; @@ -1206,6 +1455,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct loongarch_csrs *csr; vcpu->arch.vpid = 0; + vcpu->arch.flush_gpa = INVALID_GPA; hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); vcpu->arch.swtimer.function = kvm_swtimer_wakeup; @@ -1225,6 +1475,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Init */ vcpu->arch.last_sched_cpu = -1; + /* Init ipi_state lock */ + spin_lock_init(&vcpu->arch.ipi_state.lock); + /* * Initialize guest register state to valid architectural reset state. */ @@ -1237,6 +1490,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Set cpuid */ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id); + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID); /* Start with no pending virtual guest interrupts */ csr->csrs[LOONGARCH_CSR_GINTC] = 0; @@ -1255,8 +1509,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) hrtimer_cancel(&vcpu->arch.swtimer); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); - kfree(vcpu->arch.csr); kvm_drop_cpuid(vcpu); + kfree(vcpu->arch.csr); /* * If the vCPU is freed and reused as another vCPU, we don't want the @@ -1295,12 +1549,11 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Control guest page CCA attribute */ change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - /* Restore hardware perf csr */ + /* Restore hardware PMU CSRs */ kvm_restore_pmu(vcpu); - kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); - /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) return 0; @@ -1384,7 +1637,6 @@ static int _kvm_vcpu_put(struct kvm_vcpu *vcpu, int cpu) struct loongarch_csrs *csr = vcpu->arch.csr; kvm_lose_fpu(vcpu); - kvm_lose_pmu(vcpu); /* * Update CSR state from hardware if software CSR state is stale, diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 06fd746b03b6193b9560a0fce85ef9db5fbcb8a1..5f65610aa9fce27ce7947707dd08aca0b3c21506 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -5,6 +5,9 @@ #include #include +#include +#include +#include const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), @@ -30,15 +33,21 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.pgd) return -ENOMEM; - kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), - GFP_KERNEL_ACCOUNT); + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), GFP_KERNEL_ACCOUNT); if (!kvm->arch.phyid_map) { free_page((unsigned long)kvm->arch.pgd); kvm->arch.pgd = NULL; return -ENOMEM; } + spin_lock_init(&kvm->arch.phyid_map_lock); kvm_init_vmcs(kvm); + + /* Enable all PV features by default */ + kvm->arch.pv_features = BIT(KVM_FEATURE_IPI); + if (kvm_pvtime_supported()) + kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); + kvm->arch.gpa_size = BIT(cpu_vabits - 1); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; kvm->arch.invalid_ptes[0] = 0; @@ -52,7 +61,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) for (i = 0; i <= kvm->arch.root_level; i++) kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3); - spin_lock_init(&kvm->arch.phyid_map_lock); return 0; } @@ -60,8 +68,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_destroy_vcpus(kvm); free_page((unsigned long)kvm->arch.pgd); - kvfree(kvm->arch.phyid_map); kvm->arch.pgd = NULL; + kvfree(kvm->arch.phyid_map); kvm->arch.phyid_map = NULL; } @@ -70,6 +78,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { + case KVM_CAP_IRQCHIP: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: @@ -78,6 +87,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IOEVENTFD: case KVM_CAP_MP_STATE: case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_VM_ATTRIBUTES: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -100,7 +110,114 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } +static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_LOONGARCH_VM_FEAT_LSX: + if (cpu_has_lsx) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_LASX: + if (cpu_has_lasx) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_X86BT: + if (cpu_has_lbt_x86) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_ARMBT: + if (cpu_has_lbt_arm) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_MIPSBT: + if (cpu_has_lbt_mips) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_PMU: + if (cpu_has_pmp) + return 0; + return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_PV_IPI: + return 0; + case KVM_LOONGARCH_VM_FEAT_PV_STEALTIME: + if (kvm_pvtime_supported()) + return 0; + return -ENXIO; + default: + return -ENXIO; + } +} + +static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_LOONGARCH_VM_FEAT_CTRL: + return kvm_vm_feature_has_attr(kvm, attr); + case KVM_LOONGARCH_VM_HAVE_IRQCHIP: + return 0; + default: + return -ENXIO; + } +} + int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - return -ENOIOCTLCMD; + int r; + void __user *argp = (void __user *)arg; + struct kvm *kvm = filp->private_data; + struct kvm_device_attr attr; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: { + r = 1; + break; + } + case KVM_HAS_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_has_attr(kvm, &attr); + } + default: + return -EINVAL; + } + + return r; +} + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *data, + bool line_status) +{ + bool level; + struct loongarch_pch_pic *s; + int type, vcpu, irq, vcpus, val, ret = 0; + + level = data->level; + val = data->irq; + s = kvm->arch.pch_pic; + vcpus = atomic_read(&kvm->online_vcpus); + + type = (val >> KVM_LOONGARCH_IRQ_TYPE_SHIFT) & KVM_LOONGARCH_IRQ_TYPE_MASK; + vcpu = (val >> KVM_LOONGARCH_IRQ_VCPU_SHIFT) & KVM_LOONGARCH_IRQ_VCPU_MASK; + irq = (val >> KVM_LOONGARCH_IRQ_NUM_SHIFT) & KVM_LOONGARCH_IRQ_NUM_MASK; + + switch (type) { + case KVM_LOONGARCH_IRQ_TYPE_IOAPIC: + if (irq < KVM_IRQCHIP_NUM_PINS) + pch_pic_set_irq(s, irq, level); + else if (irq < 256) + pch_msi_set_irq(kvm, irq, level); + else + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return (bool)((!!kvm->arch.extioi) && (!!kvm->arch.pch_pic)); } diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b49f20ab20d1029848f4d8532bb06db4e10f5473..b1492ecfff0bb175ccdc695ee78ae30f572c8ce7 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -150,7 +150,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA - def_bool ARM64 || IA64 || X86 + def_bool ARM64 || IA64 || X86 || LOONGARCH select DMA_OPS select IOMMU_API select IOMMU_IOVA @@ -498,4 +498,17 @@ config SPRD_IOMMU Say Y here if you want to use the multimedia devices listed above. +# LOONGARCH IOMMU support +config LOONGARCH_IOMMU + tristate "LOONGARCH IOMMU support" + select IOMMU_API + select IOMMU_DEFAULT_PASSTHROUGH + depends on LOONGARCH + help + With this option you can enable support for LOONGARCH IOMMU hardware in + your system. An IOMMU is a hardware component which provides + remapping of DMA memory accesses from devices. With an LOONGARCH IOMMU you + can isolate the DMA memory of different devices and protect the + system from misbehaving device drivers or hardware. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 769e43d780ce89810033064bfd3baa8420889bed..3f4592bbd7f91013cce165e957c8e965873501a6 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o +obj-$(CONFIG_LOONGARCH_IOMMU) += loongarch_iommu.o diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index b3a70eb8a34965c12d51ac23d73688c814cf2ad8..70349fb5e8905221b5c7b93c69081742926ce4f6 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1743,7 +1743,7 @@ static size_t iommu_dma_max_mapping_size(struct device *dev) return SIZE_MAX; } -static const struct dma_map_ops iommu_dma_ops = { +static const struct dma_map_ops iommu_dmafops = { .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, .free = iommu_dma_free, @@ -1786,7 +1786,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) if (iommu_is_dma_domain(domain)) { if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; - dev->dma_ops = &iommu_dma_ops; + dev->dma_ops = &iommu_dmafops; } return; diff --git a/drivers/iommu/loongarch_iommu.c b/drivers/iommu/loongarch_iommu.c new file mode 100644 index 0000000000000000000000000000000000000000..7dfc6459045b98edc57a60b23ebf1269c59f9f26 --- /dev/null +++ b/drivers/iommu/loongarch_iommu.c @@ -0,0 +1,1728 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Loongson IOMMU Driver + * + * Copyright (C) 2024 Loongson Technology Ltd. + * Author: Lv Chen + * Wang Yang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "loongarch_iommu.h" + +MODULE_LICENSE("GPL"); + +#define LOOP_TIMEOUT 100000 + +#define IVRS_HEADER_LENGTH 48 +#define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 +#define IVHD_DEV_ALL 0x01 +#define IVHD_DEV_SELECT 0x02 +#define IVHD_DEV_SELECT_RANGE_START 0x03 +#define IVHD_DEV_RANGE_END 0x04 +#define IVHD_DEV_ALIAS 0x42 +#define IVHD_DEV_EXT_SELECT 0x46 +#define IVHD_DEV_ACPI_HID 0xf0 + +#define IVHD_HEAD_TYPE10 0x10 +#define IVHD_HEAD_TYPE11 0x11 +#define IVHD_HEAD_TYPE40 0x40 + +#define MAX_BDF_NUM 0xffff + +#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) + +/* + * structure describing one IOMMU in the ACPI table. Typically followed by one + * or more ivhd_entrys. + */ +struct ivhd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 cap_ptr; + u64 mmio_phys; + u16 pci_seg; + u16 info; + u32 efr_attr; + + /* Following only valid on IVHD type 11h and 40h */ + u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ + u64 res; +} __packed; + +/* + * A device entry describing which devices a specific IOMMU translates and + * which requestor ids they use. + */ +struct ivhd_entry { + u8 type; + u16 devid; + u8 flags; + u32 ext; + u32 hidh; + u64 cid; + u8 uidf; + u8 uidl; + u8 uid; +} __packed; + +struct iommu_callback_data { + const struct iommu_ops *ops; +}; + +LIST_HEAD(la_rlookup_iommu_list); +LIST_HEAD(la_iommu_list); /* list of all loongarch + * IOMMUs in the system + */ + +static u32 rlookup_table_size; /* size if the rlookup table */ +static int la_iommu_target_ivhd_type; +u16 la_iommu_last_bdf; /* largest PCI device id + * we have to handle + */ + +int loongarch_iommu_disable; + +#define iommu_write_regl(iommu, off, val) \ + writel(val, iommu->confbase + off) +#define iommu_read_regl(iommu, off) readl(iommu->confbase + off) + +static void iommu_translate_disable(struct loongarch_iommu *iommu) +{ + u32 val; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + /* Disable */ + val = iommu_read_regl(iommu, LA_IOMMU_PFM_CNT_EN); + val &= ~(1 << 31); + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + /* Write cmd */ + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); +} + +static void iommu_translate_enable(struct loongarch_iommu *iommu) +{ + u32 val = 0; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + /* Enable use mem */ + val = iommu_read_regl(iommu, LA_IOMMU_PFM_CNT_EN); + val |= (1 << 29); + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + /* Enable */ + val = iommu_read_regl(iommu, LA_IOMMU_PFM_CNT_EN); + val |= (1 << 31); + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + /* Write cmd */ + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); +} + +static bool la_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + return false; + } +} + +static struct dom_info *to_dom_info(struct iommu_domain *dom) +{ + return container_of(dom, struct dom_info, domain); +} + +static int update_dev_table(struct la_iommu_dev_data *dev_data, int flag) +{ + u32 val = 0; + int index; + unsigned short bdf; + struct loongarch_iommu *iommu; + u16 domain_id; + + if (dev_data == NULL) { + pr_err("%s dev_data is NULL", __func__); + return 0; + } + + if (dev_data->iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return 0; + } + + if (dev_data->iommu_entry == NULL) { + pr_err("%s iommu_entry is NULL", __func__); + return 0; + } + + iommu = dev_data->iommu; + domain_id = dev_data->iommu_entry->id; + bdf = dev_data->bdf; + + /* Set device table */ + if (flag) { + index = find_first_zero_bit(iommu->devtable_bitmap, + MAX_ATTACHED_DEV_ID); + if (index < MAX_ATTACHED_DEV_ID) { + __set_bit(index, iommu->devtable_bitmap); + dev_data->index = index; + } else { + pr_err("%s get id from dev table failed\n", __func__); + return 0; + } + + pr_info("%s bdf %x domain_id %d iommu devid %x iommu segment %d flag %x\n", + __func__, bdf, domain_id, iommu->devid, + iommu->segment, flag); + + val = bdf & 0xffff; + val |= ((domain_id & 0xf) << 16); /* domain id */ + val |= ((index & 0xf) << 24); /* index */ + val |= (0x1 << 20); /* valid */ + iommu_write_regl(iommu, LA_IOMMU_EIVDB, val); + + val = (0x1 << 31) | (0xf << 0); + val |= (0x1 << 29); /* 1: use main memory */ + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); + } else { + /* Flush device table */ + index = dev_data->index; + pr_info("%s bdf %x domain_id %d iommu devid %x iommu segment %d flag %x\n", + __func__, bdf, domain_id, iommu->devid, + iommu->segment, flag); + + val = iommu_read_regl(iommu, LA_IOMMU_EIVDB); + val &= ~(0xffffffff); + val |= ((index & 0xf) << 24); /* index */ + iommu_write_regl(iommu, LA_IOMMU_EIVDB, val); + + val = iommu_read_regl(iommu, LA_IOMMU_PFM_CNT_EN); + val |= (0x1 << 29); /* 1: use main memory */ + iommu_write_regl(iommu, LA_IOMMU_PFM_CNT_EN, val); + + if (index < MAX_ATTACHED_DEV_ID) + __clear_bit(index, iommu->devtable_bitmap); + } + return 0; +} + +static void flush_iotlb(struct loongarch_iommu *iommu) +{ + u32 val; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + /* Flush all tlb */ + val = iommu_read_regl(iommu, LA_IOMMU_VBTC); + val &= ~0x1f; + val |= 0x5; + iommu_write_regl(iommu, LA_IOMMU_VBTC, val); +} + +static int flush_pgtable_is_busy(struct loongarch_iommu *iommu) +{ + u32 val; + + val = iommu_read_regl(iommu, LA_IOMMU_VBTC); + return val & IOMMU_PGTABLE_BUSY; +} + +static int iommu_flush_iotlb(struct loongarch_iommu *iommu) +{ + u32 retry = 0; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return 0; + } + + flush_iotlb(iommu); + while (flush_pgtable_is_busy(iommu)) { + if (retry == LOOP_TIMEOUT) { + pr_err("LA-IOMMU: iotlb flush busy\n"); + return -EIO; + } + retry++; + udelay(1); + } + iommu_translate_enable(iommu); + return 0; +} + +static void la_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct dom_info *priv = to_dom_info(domain); + struct iommu_info *info; + + spin_lock(&priv->lock); + list_for_each_entry(info, &priv->iommu_devlist, list) + iommu_flush_iotlb(info->iommu); + spin_unlock(&priv->lock); +} + +static void do_attach(struct iommu_info *info, struct la_iommu_dev_data *dev_data) +{ + if (dev_data->count) + return; + + dev_data->count++; + dev_data->iommu_entry = info; + + spin_lock(&info->devlock); + list_add(&dev_data->list, &info->dev_list); + info->dev_cnt += 1; + spin_unlock(&info->devlock); + + update_dev_table(dev_data, 1); + if (info->dev_cnt > 0) + iommu_flush_iotlb(dev_data->iommu); +} + +static void do_detach(struct la_iommu_dev_data *dev_data) +{ + struct iommu_info *info; + + if (!dev_data || !dev_data->iommu_entry || (dev_data->count == 0)) { + pr_err("%s dev_data or iommu_entry is NULL", __func__); + return; + } + dev_data->count--; + info = dev_data->iommu_entry; + list_del(&dev_data->list); + info->dev_cnt -= 1; + update_dev_table(dev_data, 0); + dev_data->iommu_entry = NULL; +} + +static void detach_all_dev_by_domain(struct iommu_info *info) +{ + struct la_iommu_dev_data *dev_data = NULL; + + spin_lock(&info->devlock); + while (!list_empty(&info->dev_list)) { + dev_data = list_first_entry(&info->dev_list, + struct la_iommu_dev_data, list); + do_detach(dev_data); + } + spin_unlock(&info->devlock); +} + +static int domain_id_alloc(struct loongarch_iommu *iommu) +{ + int id = -1; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return id; + } + spin_lock(&iommu->domain_bitmap_lock); + id = find_first_zero_bit(iommu->domain_bitmap, MAX_DOMAIN_ID); + if (id < MAX_DOMAIN_ID) + __set_bit(id, iommu->domain_bitmap); + spin_unlock(&iommu->domain_bitmap_lock); + if (id >= MAX_DOMAIN_ID) + pr_err("LA-IOMMU: Alloc domain id over max domain id\n"); + return id; +} + +static void domain_id_free(struct loongarch_iommu *iommu, int id) +{ + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + if ((id >= 0) && (id < MAX_DOMAIN_ID)) { + spin_lock(&iommu->domain_bitmap_lock); + __clear_bit(id, iommu->domain_bitmap); + spin_unlock(&iommu->domain_bitmap_lock); + } +} + +/* + * Check whether the system has a priv. + * If yes, it returns 1 and if not, it returns 0 + */ +static int has_dom(struct loongarch_iommu *iommu) +{ + int ret = 0; + + spin_lock(&iommu->dom_info_lock); + while (!list_empty(&iommu->dom_list)) { + ret = 1; + break; + } + spin_unlock(&iommu->dom_info_lock); + return ret; +} + +/* + * This function adds a private domain to the global domain list + */ +static struct dom_entry *find_domain_in_list(struct loongarch_iommu *iommu, struct dom_info *priv) +{ + struct dom_entry *entry, *found = NULL; + + if (priv == NULL) + return found; + spin_lock(&iommu->dom_info_lock); + list_for_each_entry(entry, &iommu->dom_list, list) { + if (entry->domain_info == priv) { + found = entry; + break; + } + } + spin_unlock(&iommu->dom_info_lock); + return found; +} + +static void add_domain_to_list(struct loongarch_iommu *iommu, struct dom_info *priv) +{ + struct dom_entry *entry; + + if (priv == NULL) + return; + entry = find_domain_in_list(iommu, priv); + if (entry != NULL) + return; + entry = kzalloc(sizeof(struct dom_entry), GFP_KERNEL); + entry->domain_info = priv; + spin_lock(&iommu->dom_info_lock); + list_add(&entry->list, &iommu->dom_list); + spin_unlock(&iommu->dom_info_lock); +} + +static void del_domain_from_list(struct loongarch_iommu *iommu, struct dom_info *priv) +{ + struct dom_entry *entry; + + entry = find_domain_in_list(iommu, priv); + if (entry == NULL) + return; + spin_lock(&iommu->dom_info_lock); + list_del(&entry->list); + spin_unlock(&iommu->dom_info_lock); + kfree(entry); +} + +static void free_pagetable(void *pt_base, int level) +{ + int i; + unsigned long *ptep, *pgtable; + + ptep = pt_base; + if (level == IOMMU_PT_LEVEL1) { + kfree(pt_base); + return; + } + for (i = 0; i < IOMMU_PTRS_PER_LEVEL; i++, ptep++) { + if (!iommu_pte_present(ptep)) + continue; + + if (((level - 1) == IOMMU_PT_LEVEL1) && iommu_pte_huge(ptep)) { + *ptep = 0; + continue; + } + + pgtable = phys_to_virt(*ptep & IOMMU_PAGE_MASK); + free_pagetable(pgtable, level - 1); + } + kfree(pt_base); +} + +static void iommu_free_pagetable(struct dom_info *info) +{ + free_pagetable(info->pgd, IOMMU_LEVEL_MAX); + info->pgd = NULL; +} + +static struct dom_info *alloc_dom_info(void) +{ + struct dom_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + return NULL; + + info->pgd = kzalloc(IOMMU_PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (info->pgd == NULL) { + kfree(info); + return NULL; + } + INIT_LIST_HEAD(&info->iommu_devlist); + spin_lock_init(&info->lock); + mutex_init(&info->ptl_lock); + info->domain.geometry.aperture_start = 0; + info->domain.geometry.aperture_end = ~0ULL; + info->domain.geometry.force_aperture = true; + + return info; +} + +static void dom_info_free(struct dom_info *info) +{ + if (info->pgd != NULL) { + kfree(info->pgd); + info->pgd = NULL; + } + kfree(info); +} + +static struct iommu_domain *la_iommu_domain_alloc(unsigned int type) +{ + struct dom_info *info; + + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + info = alloc_dom_info(); + if (info == NULL) + return NULL; + break; + default: + return NULL; + } + return &info->domain; +} + +void domain_deattach_iommu(struct dom_info *priv, struct iommu_info *info) +{ + if ((priv == NULL) || (info == NULL) || + (info->dev_cnt != 0) || (info->iommu == NULL)) { + pr_err("%s invalid parameter", __func__); + return; + } + del_domain_from_list(info->iommu, priv); + domain_id_free(info->iommu, info->id); + spin_lock(&priv->lock); + list_del(&info->list); + spin_unlock(&priv->lock); + kfree(info); +} + +static void la_iommu_domain_free(struct iommu_domain *domain) +{ + struct dom_info *priv; + struct loongarch_iommu *iommu = NULL; + struct iommu_info *info, *tmp; + + priv = to_dom_info(domain); + spin_lock(&priv->lock); + list_for_each_entry_safe(info, tmp, &priv->iommu_devlist, list) { + if (info->dev_cnt > 0) + detach_all_dev_by_domain(info); + iommu = info->iommu; + spin_unlock(&priv->lock); + domain_deattach_iommu(priv, info); + spin_lock(&priv->lock); + iommu_flush_iotlb(iommu); + if (!has_dom(iommu)) + iommu_translate_disable(iommu); + } + spin_unlock(&priv->lock); + mutex_lock(&priv->ptl_lock); + iommu_free_pagetable(priv); + mutex_unlock(&priv->ptl_lock); + dom_info_free(priv); +} + +struct iommu_rlookup_entry *lookup_rlooptable(int pcisegment) +{ + struct iommu_rlookup_entry *rlookupentry = NULL; + + list_for_each_entry(rlookupentry, &la_rlookup_iommu_list, list) { + if (rlookupentry->pcisegment == pcisegment) + return rlookupentry; + } + return NULL; +} + +struct loongarch_iommu *find_iommu_by_dev(struct pci_dev *pdev) +{ + int pcisegment; + unsigned short devid; + struct iommu_rlookup_entry *rlookupentry = NULL; + struct loongarch_iommu *iommu = NULL; + struct pci_bus *bus = pdev->bus; + + devid = PCI_DEVID(bus->number, pdev->devfn); + pcisegment = pci_domain_nr(bus); + rlookupentry = lookup_rlooptable(pcisegment); + if (rlookupentry == NULL) { + pr_info("%s find segment %d rlookupentry failed\n", __func__, + pcisegment); + return iommu; + } + iommu = rlookupentry->rlookup_table[devid]; + if (iommu && (!iommu->confbase)) + iommu = NULL; + return iommu; +} + +struct iommu_device *iommu_init_device(struct device *dev) +{ + struct la_iommu_dev_data *dev_data; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; + unsigned short devid; + struct loongarch_iommu *iommu = NULL; + struct iommu_device *iommu_dev = ERR_PTR(-ENODEV); + + if (!dev_is_pci(dev)) + return iommu_dev; + + if (dev->archdata.iommu != NULL || bus == NULL) { + pr_info("LA-IOMMU: bdf:0x%x has added\n", pdev->devfn); + return iommu_dev; + } + iommu = find_iommu_by_dev(pdev); + if (iommu == NULL) { + pci_info(pdev, "%s find iommu failed by dev\n", __func__); + return iommu_dev; + } + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return iommu_dev; + devid = PCI_DEVID(bus->number, pdev->devfn); + dev_data->bdf = devid; + + pci_info(pdev, "%s bdf %#x iommu dev id %#x\n", __func__, dev_data->bdf, iommu->devid); + /* The initial state is 0, and 1 is added only when attach dev */ + dev_data->count = 0; + dev_data->iommu = iommu; + dev_data->dev = dev; + dev->archdata.iommu = dev_data; + iommu_dev = &iommu->iommu_dev; + return iommu_dev; +} + +struct iommu_device *la_iommu_probe_device(struct device *dev) +{ + return iommu_init_device(dev); +} + +static struct iommu_group *la_iommu_device_group(struct device *dev) +{ + struct iommu_group *group; + + /* + * We don't support devices sharing stream IDs other than PCI RID + * aliases, since the necessary ID-to-device lookup becomes rather + * impractical given a potential sparse 32-bit stream ID space. + */ + if (dev_is_pci(dev)) + group = pci_device_group(dev); + else + group = generic_device_group(dev); + return group; +} + +static void la_iommu_remove_device(struct device *dev) +{ + struct la_iommu_dev_data *dev_data; + + iommu_group_remove_device(dev); + dev_data = dev->archdata.iommu; + dev->archdata.iommu = NULL; + kfree(dev_data); +} + +struct iommu_info *get_iommu_info_from_dom(struct dom_info *priv, struct loongarch_iommu *iommu) +{ + struct iommu_info *info; + + spin_lock(&priv->lock); + list_for_each_entry(info, &priv->iommu_devlist, list) { + if (info->iommu == iommu) { + spin_unlock(&priv->lock); + return info; + } + } + spin_unlock(&priv->lock); + return NULL; +} + +struct iommu_info *domain_attach_iommu(struct dom_info *priv, struct loongarch_iommu *iommu) +{ + u32 dir_ctrl; + struct iommu_info *info; + unsigned long phys; + + info = get_iommu_info_from_dom(priv, iommu); + if (info) + return info; + + info = kzalloc(sizeof(struct iommu_info), GFP_KERNEL_ACCOUNT); + if (!info) + return NULL; + + INIT_LIST_HEAD(&info->dev_list); + info->iommu = iommu; + info->id = domain_id_alloc(iommu); + if (info->id == -1) { + pr_info("%s alloc id for domain failed\n", __func__); + kfree(info); + return NULL; + } + + phys = virt_to_phys(priv->pgd); + dir_ctrl = (IOMMU_LEVEL_STRIDE << 26) | (IOMMU_LEVEL_SHIFT(2) << 20); + dir_ctrl |= (IOMMU_LEVEL_STRIDE << 16) | (IOMMU_LEVEL_SHIFT(1) << 10); + dir_ctrl |= (IOMMU_LEVEL_STRIDE << 6) | IOMMU_LEVEL_SHIFT(0); + iommu_write_regl(iommu, LA_IOMMU_DIR_CTRL(info->id), dir_ctrl); + iommu_write_regl(iommu, LA_IOMMU_PGD_HI(info->id), phys >> 32); + iommu_write_regl(iommu, LA_IOMMU_PGD_LO(info->id), phys & UINT_MAX); + + spin_lock(&priv->lock); + list_add(&info->list, &priv->iommu_devlist); + spin_unlock(&priv->lock); + add_domain_to_list(iommu, priv); + return info; +} + +static struct la_iommu_dev_data *get_devdata_from_iommu_info(struct dom_info *info, + struct loongarch_iommu *iommu, unsigned long bdf) +{ + struct iommu_info *entry; + struct la_iommu_dev_data *dev_data, *found = NULL; + + entry = get_iommu_info_from_dom(info, iommu); + if (!entry) + return found; + spin_lock(&entry->devlock); + list_for_each_entry(dev_data, &entry->dev_list, list) { + if (dev_data->bdf == bdf) { + found = dev_data; + break; + } + } + spin_unlock(&entry->devlock); + return found; +} +static void la_iommu_detach_dev(struct device *dev); + +static int la_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct dom_info *priv = to_dom_info(domain); + struct pci_dev *pdev = to_pci_dev(dev); + unsigned char busnum = pdev->bus->number; + struct la_iommu_dev_data *dev_data; + struct loongarch_iommu *iommu; + struct iommu_info *info; + unsigned short bdf; + + la_iommu_detach_dev(dev); + + if (domain == NULL) + return 0; + + bdf = PCI_DEVID(busnum, pdev->devfn); + dev_data = (struct la_iommu_dev_data *)dev->archdata.iommu; + if (dev_data == NULL) { + pci_info(pdev, "%s dev_data is Invalid\n", __func__); + return 0; + } + + iommu = dev_data->iommu; + if (iommu == NULL) { + pci_info(pdev, "%s iommu is Invalid\n", __func__); + return 0; + } + + pci_info(pdev, "%s bdf %#x priv %lx iommu devid %#x\n", __func__, + bdf, (unsigned long)priv, iommu->devid); + dev_data = get_devdata_from_iommu_info(priv, iommu, bdf); + if (dev_data) { + pci_info(pdev, "LA-IOMMU: bdf 0x%x devfn %x has attached, count:0x%x\n", + bdf, pdev->devfn, dev_data->count); + return 0; + } + dev_data = (struct la_iommu_dev_data *)dev->archdata.iommu; + + info = domain_attach_iommu(priv, iommu); + if (!info) { + pci_info(pdev, "domain attach iommu failed\n"); + return 0; + } + dev_data->domain = domain; + do_attach(info, dev_data); + return 0; +} + +static void la_iommu_detach_dev(struct device *dev) +{ + struct iommu_domain *domain; + struct dom_info *priv; + struct pci_dev *pdev = to_pci_dev(dev); + unsigned char busnum = pdev->bus->number; + struct la_iommu_dev_data *dev_data; + struct loongarch_iommu *iommu; + struct iommu_info *iommu_entry = NULL; + unsigned short bdf; + + bdf = PCI_DEVID(busnum, pdev->devfn); + dev_data = (struct la_iommu_dev_data *)dev->archdata.iommu; + if (dev_data == NULL) { + pci_info(pdev, "%s dev_data is Invalid\n", __func__); + return; + } + + domain = dev_data->domain; + if (domain == NULL) + return; + + priv = to_dom_info(domain); + iommu = dev_data->iommu; + if (iommu == NULL) { + pci_info(pdev, "%s iommu is Invalid\n", __func__); + return; + } + dev_data = get_devdata_from_iommu_info(priv, iommu, bdf); + if (dev_data == NULL) { + pci_info(pdev, "%s bdf %#x hasn't attached\n", + __func__, bdf); + return; + } + + iommu = dev_data->iommu; + dev_data->dev = NULL; + iommu_entry = get_iommu_info_from_dom(priv, iommu); + if (iommu_entry == NULL) { + pci_info(pdev, "%s get iommu_entry failed\n", __func__); + return; + } + + spin_lock(&iommu_entry->devlock); + do_detach(dev_data); + spin_unlock(&iommu_entry->devlock); + + pci_info(pdev, "%s iommu devid %x sigment %x\n", __func__, + iommu->devid, iommu->segment); +} + +static unsigned long *iommu_get_pte(void *pt_base, unsigned long vaddr, int level) +{ + int i; + unsigned long *ptep, *pgtable; + + if (level > (IOMMU_LEVEL_MAX - 1)) + return NULL; + pgtable = pt_base; + for (i = IOMMU_LEVEL_MAX - 1; i >= level; i--) { + ptep = iommu_pte_offset(pgtable, vaddr, i); + if (!iommu_pte_present(ptep)) + break; + if (iommu_pte_huge(ptep)) + break; + pgtable = phys_to_virt(*ptep & IOMMU_PAGE_MASK); + } + return ptep; +} + +static int iommu_get_page_table(unsigned long *ptep) +{ + void *addr; + unsigned long pte; + + if (!iommu_pte_present(ptep)) { + addr = kzalloc(IOMMU_PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!addr) + return -ENOMEM; + pte = virt_to_phys(addr) & IOMMU_PAGE_MASK; + pte |= IOMMU_PTE_RW; + *ptep = pte; + } + return 0; +} + +static size_t iommu_page_map(void *pt_base, + unsigned long start, unsigned long end, + phys_addr_t paddr, int level) +{ + unsigned long next, old, step; + unsigned long pte, *ptep, *pgtable; + int ret, huge; + + old = start; + ptep = iommu_pte_offset(pt_base, start, level); + if (level == IOMMU_PT_LEVEL0) { + paddr = paddr & IOMMU_PAGE_MASK; + do { + pte = paddr | IOMMU_PTE_RW; + *ptep = pte; + ptep++; + start += IOMMU_PAGE_SIZE; + paddr += IOMMU_PAGE_SIZE; + } while (start < end); + + return start - old; + } + + do { + next = iommu_ptable_end(start, end, level); + step = next - start; + huge = 0; + if ((level == IOMMU_PT_LEVEL1) && (step == IOMMU_HPAGE_SIZE)) + if (!iommu_pte_present(ptep) || iommu_pte_huge(ptep)) + huge = 1; + + if (huge) { + pte = (paddr & IOMMU_HPAGE_MASK) | + IOMMU_PTE_RW | IOMMU_PTE_HP; + *ptep = pte; + } else { + ret = iommu_get_page_table(ptep); + if (ret != 0) + break; + pgtable = phys_to_virt(*ptep & IOMMU_PAGE_MASK); + iommu_page_map(pgtable, start, next, paddr, level - 1); + } + + ptep++; + paddr += step; + start = next; + } while (start < end); + return start - old; +} + +static int domain_map_page(struct dom_info *priv, unsigned long start, + phys_addr_t paddr, size_t size) +{ + int ret = 0; + phys_addr_t end; + size_t map_size; + + end = start + size; + mutex_lock(&priv->ptl_lock); + map_size = iommu_page_map(priv->pgd, start, + end, paddr, IOMMU_LEVEL_MAX - 1); + if (map_size != size) + ret = -EFAULT; + mutex_unlock(&priv->ptl_lock); + la_iommu_flush_iotlb_all(&priv->domain); + return ret; +} + +static size_t iommu_page_unmap(void *pt_base, + unsigned long start, unsigned long end, int level) +{ + unsigned long next, old; + unsigned long *ptep, *pgtable; + + old = start; + ptep = iommu_pte_offset(pt_base, start, level); + if (level == IOMMU_PT_LEVEL0) { + do { + *ptep++ = 0; + start += IOMMU_PAGE_SIZE; + } while (start < end); + } else { + do { + next = iommu_ptable_end(start, end, level); + if (!iommu_pte_present(ptep)) + continue; + + if (iommu_pte_huge(ptep)) { + if ((next - start) != IOMMU_HPAGE_SIZE) + pr_err( + "Map pte on hugepage not supported now\n"); + *ptep = 0; + } else { + pgtable = phys_to_virt(*ptep & IOMMU_PAGE_MASK); + iommu_page_unmap(pgtable, start, + next, level - 1); + } + } while (ptep++, start = next, start < end); + } + return start - old; +} + +static size_t domain_unmap_page(struct dom_info *priv, + unsigned long start, size_t size) +{ + size_t unmap_len; + unsigned long end; + + end = start + size; + mutex_lock(&priv->ptl_lock); + unmap_len = iommu_page_unmap(priv->pgd, start, + end, (IOMMU_LEVEL_MAX - 1)); + mutex_unlock(&priv->ptl_lock); + la_iommu_flush_iotlb_all(&priv->domain); + return unmap_len; +} + +static int la_iommu_map(struct iommu_domain *domain, unsigned long vaddr, + phys_addr_t paddr, size_t len, int prot, gfp_t gfp) +{ + int ret; + struct dom_info *priv = to_dom_info(domain); + + ret = domain_map_page(priv, vaddr, paddr, len); + return ret; +} + +static size_t la_iommu_unmap(struct iommu_domain *domain, unsigned long vaddr, + size_t len, struct iommu_iotlb_gather *iotlb_gather) +{ + struct dom_info *priv = to_dom_info(domain); + + return domain_unmap_page(priv, vaddr, len); +} + +static phys_addr_t _iommu_iova_to_phys(struct dom_info *info, dma_addr_t vaddr) +{ + unsigned long *ptep; + unsigned long page_size, page_mask; + phys_addr_t paddr; + + mutex_lock(&info->ptl_lock); + ptep = iommu_get_pte(info->pgd, vaddr, IOMMU_PT_LEVEL0); + mutex_unlock(&info->ptl_lock); + + if (!ptep || !iommu_pte_present(ptep)) { + pr_warn_once( + "LA-IOMMU: shadow pte is null or not present with vaddr %llx\n", + vaddr); + paddr = 0; + return paddr; + } + + if (iommu_pte_huge(ptep)) { + page_size = IOMMU_HPAGE_SIZE; + page_mask = IOMMU_HPAGE_MASK; + } else { + page_size = IOMMU_PAGE_SIZE; + page_mask = IOMMU_PAGE_MASK; + } + paddr = *ptep & page_mask; + paddr |= vaddr & (page_size - 1); + return paddr; +} + +static phys_addr_t la_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t vaddr) +{ + struct dom_info *priv = to_dom_info(domain); + phys_addr_t phys; + + spin_lock(&priv->lock); + phys = _iommu_iova_to_phys(priv, vaddr); + spin_unlock(&priv->lock); + return phys; +} + +static void la_domain_set_plaform_dma_ops(struct device *dev) +{ + /* + * loongarch doesn't setup default domains because we can't hook into the + * normal probe path + */ +} + +const struct iommu_ops la_iommu_ops = { + .capable = la_iommu_capable, + .domain_alloc = la_iommu_domain_alloc, + .probe_device = la_iommu_probe_device, + .release_device = la_iommu_remove_device, + .device_group = la_iommu_device_group, + .pgsize_bitmap = LA_IOMMU_PGSIZE, + .owner = THIS_MODULE, + .set_platform_dma_ops = la_domain_set_plaform_dma_ops, + .default_domain_ops = &(const struct iommu_domain_ops) { + .attach_dev = la_iommu_attach_dev, + .map = la_iommu_map, + .unmap = la_iommu_unmap, + .iova_to_phys = la_iommu_iova_to_phys, + .flush_iotlb_all = la_iommu_flush_iotlb_all, + .free = la_iommu_domain_free, + } +}; + + +struct loongarch_iommu *loongarch_get_iommu_by_devid(struct pci_dev *pdev) +{ + int pcisegment; + unsigned short devid; + struct loongarch_iommu *iommu = NULL; + struct pci_bus *bus = pdev->bus; + + devid = PCI_DEVID(bus->number, pdev->devfn); + pcisegment = pci_domain_nr(pdev->bus); + list_for_each_entry(iommu, &la_iommu_list, list) { + if ((iommu->segment == pcisegment) && + (iommu->devid == devid)) { + return iommu; + } + } + return NULL; +} + +bool check_device_compat(struct pci_dev *pdev) +{ + bool compat = true; + + if ((pdev->revision == 0) && (pdev->device == 0x7a1f)) + compat = false; + return compat; +} + +static int loongarch_iommu_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int ret = 1; + int bitmap_sz = 0; + int tmp; + bool compat = false; + struct loongarch_iommu *iommu = NULL; + resource_size_t base, size; + + iommu = loongarch_get_iommu_by_devid(pdev); + if (iommu == NULL) { + pci_info(pdev, "%s can't find iommu\n", __func__); + return -ENODEV; + } + + compat = check_device_compat(pdev); + if (!compat) { + pci_info(pdev, + "%s The iommu driver is not compatible with this device\n", + __func__); + return -ENODEV; + } + + iommu->pdev = pdev; + base = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + if (!request_mem_region(base, size, "loongarch_iommu")) { + pci_err(pdev, + "%d can't reserve mmio registers base %llx size %llx\n", + __LINE__, base, size); + return -ENOMEM; + } + iommu->confbase_phy = base; + iommu->conf_size = size; + iommu->confbase = ioremap(base, size); + if (iommu->confbase == NULL) { + pci_info(pdev, "%s iommu pci dev bar0 is NULL\n", __func__); + return ret; + } + + pr_info("iommu confbase %llx pgtsize %llx\n", + (u64)iommu->confbase, size); + tmp = MAX_DOMAIN_ID / 8; + bitmap_sz = (MAX_DOMAIN_ID % 8) ? (tmp + 1) : tmp; + iommu->domain_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (iommu->domain_bitmap == NULL) { + pr_err("LA-IOMMU: domain bitmap alloc err bitmap_sz:%d\n", + bitmap_sz); + goto out_err; + } + + tmp = MAX_ATTACHED_DEV_ID / 8; + bitmap_sz = (MAX_ATTACHED_DEV_ID % 8) ? (tmp + 1) : tmp; + iommu->devtable_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (iommu->devtable_bitmap == NULL) { + pr_err("LA-IOMMU: devtable bitmap alloc err bitmap_sz:%d\n", + bitmap_sz); + goto out_err_1; + } + + ret = iommu_device_sysfs_add(&iommu->iommu_dev, &pdev->dev, + NULL, "ivhd-%#x", iommu->devid); + iommu_device_register(&iommu->iommu_dev, &la_iommu_ops, NULL); + return 0; + +out_err_1: + iommu->pdev = NULL; + iounmap(iommu->confbase); + iommu->confbase = NULL; + release_mem_region(iommu->confbase_phy, iommu->conf_size); + iommu->confbase_phy = 0; + iommu->conf_size = 0; + kfree(iommu->domain_bitmap); + iommu->domain_bitmap = NULL; +out_err: + return ret; +} + +static void loongarch_iommu_remove(struct pci_dev *pdev) +{ + struct loongarch_iommu *iommu = NULL; + + iommu = loongarch_get_iommu_by_devid(pdev); + if (iommu == NULL) + return; + if (iommu->domain_bitmap != NULL) { + kfree(iommu->domain_bitmap); + iommu->domain_bitmap = NULL; + } + if (iommu->devtable_bitmap != NULL) { + kfree(iommu->devtable_bitmap); + iommu->devtable_bitmap = NULL; + } + if (iommu->confbase != NULL) { + iounmap(iommu->confbase); + iommu->confbase = NULL; + } + if (iommu->confbase_phy != 0) { + release_mem_region(iommu->confbase_phy, iommu->conf_size); + iommu->confbase_phy = 0; + iommu->conf_size = 0; + } +} + +static int __init check_ivrs_checksum(struct acpi_table_header *table) +{ + int i; + u8 checksum = 0, *p = (u8 *)table; + + for (i = 0; i < table->length; ++i) + checksum += p[i]; + if (checksum != 0) { + /* ACPI table corrupt */ + pr_err("IVRS invalid checksum\n"); + return -ENODEV; + } + return 0; +} + +struct iommu_rlookup_entry *create_rlookup_entry(int pcisegment) +{ + struct iommu_rlookup_entry *rlookupentry = NULL; + + rlookupentry = kzalloc(sizeof(struct iommu_rlookup_entry), + GFP_KERNEL); + if (rlookupentry == NULL) + return rlookupentry; + + rlookupentry->pcisegment = pcisegment; + /* IOMMU rlookup table - find the IOMMU for a specific device */ + rlookupentry->rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (rlookupentry->rlookup_table == NULL) { + kfree(rlookupentry); + rlookupentry = NULL; + } else { + list_add(&rlookupentry->list, &la_rlookup_iommu_list); + } + return rlookupentry; +} + +/* Writes the specific IOMMU for a device into the rlookup table */ +static void __init set_iommu_for_device(struct loongarch_iommu *iommu, + u16 devid) +{ + struct iommu_rlookup_entry *rlookupentry = NULL; + + rlookupentry = lookup_rlooptable(iommu->segment); + if (rlookupentry == NULL) + rlookupentry = create_rlookup_entry(iommu->segment); + if (rlookupentry != NULL) + rlookupentry->rlookup_table[devid] = iommu; +} + +static inline u32 get_ivhd_header_size(struct ivhd_header *h) +{ + u32 size = 0; + + switch (h->type) { + case IVHD_HEAD_TYPE10: + size = 24; + break; + case IVHD_HEAD_TYPE11: + case IVHD_HEAD_TYPE40: + size = 40; + break; + } + return size; +} + +static inline void update_last_devid(u16 devid) +{ + if (devid > la_iommu_last_bdf) + la_iommu_last_bdf = devid; +} + +/* + * This function calculates the length of a given IVHD entry + */ +static inline int ivhd_entry_length(u8 *ivhd) +{ + u32 type = ((struct ivhd_entry *)ivhd)->type; + + if (type < 0x80) { + return 0x04 << (*ivhd >> 6); + } else if (type == IVHD_DEV_ACPI_HID) { + /* For ACPI_HID, offset 21 is uid len */ + return *((u8 *)ivhd + 21) + 22; + } + return 0; +} + +/* + * After reading the highest device id from the IOMMU PCI capability header + * this function looks if there is a higher device id defined in the ACPI table + */ +static int __init find_last_devid_from_ivhd(struct ivhd_header *h) +{ + u8 *p = (void *)h, *end = (void *)h; + struct ivhd_entry *dev; + + u32 ivhd_size = get_ivhd_header_size(h); + + if (!ivhd_size) { + pr_err("la-iommu: Unsupported IVHD type %#x\n", h->type); + return -EINVAL; + } + + p += ivhd_size; + end += h->length; + + while (p < end) { + dev = (struct ivhd_entry *)p; + switch (dev->type) { + case IVHD_DEV_ALL: + /* Use maximum BDF value for DEV_ALL */ + update_last_devid(MAX_BDF_NUM); + break; + case IVHD_DEV_SELECT: + case IVHD_DEV_RANGE_END: + case IVHD_DEV_ALIAS: + case IVHD_DEV_EXT_SELECT: + /* all the above subfield types refer to device ids */ + update_last_devid(dev->devid); + break; + default: + break; + } + p += ivhd_entry_length(p); + } + + WARN_ON(p != end); + + return 0; +} + +/* + * Iterate over all IVHD entries in the ACPI table and find the highest device + * id which we need to handle. This is the first of three functions which parse + * the ACPI table. So we check the checksum here. + */ +static int __init find_last_devid_acpi(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + + p += IVRS_HEADER_LENGTH; + + end += table->length; + while (p < end) { + h = (struct ivhd_header *)p; + if (h->type == la_iommu_target_ivhd_type) { + int ret = find_last_devid_from_ivhd(h); + + if (ret) + return ret; + } + + if (h->length == 0) + break; + + p += h->length; + } + + if (p != end) + return -EINVAL; + return 0; +} + +/* + * Takes a pointer to an loongarch IOMMU entry in the ACPI table and + * initializes the hardware and our data structures with it. + */ +static int __init init_iommu_from_acpi(struct loongarch_iommu *iommu, + struct ivhd_header *h) +{ + u8 *p = (u8 *)h; + u8 *end = p; + u16 devid = 0, devid_start = 0; + u32 dev_i; + struct ivhd_entry *e; + u32 ivhd_size; + + /* + * Done. Now parse the device entries + */ + ivhd_size = get_ivhd_header_size(h); + if (!ivhd_size) { + pr_err("loongarch iommu: Unsupported IVHD type %#x\n", h->type); + return -EINVAL; + } + + if (h->length == 0) + return -EINVAL; + + p += ivhd_size; + end += h->length; + + while (p < end) { + e = (struct ivhd_entry *)p; + switch (e->type) { + case IVHD_DEV_ALL: + for (dev_i = 0; dev_i <= la_iommu_last_bdf; ++dev_i) + set_iommu_for_device(iommu, dev_i); + break; + case IVHD_DEV_SELECT: + + pr_info(" DEV_SELECT\t\t\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + + devid = e->devid; + set_iommu_for_device(iommu, devid); + break; + case IVHD_DEV_SELECT_RANGE_START: + + pr_info(" DEV_SELECT_RANGE_START\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + + devid_start = e->devid; + break; + case IVHD_DEV_RANGE_END: + + pr_info(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + + devid = e->devid; + for (dev_i = devid_start; dev_i <= devid; ++dev_i) + set_iommu_for_device(iommu, dev_i); + break; + default: + break; + } + + p += ivhd_entry_length(p); + } + + return 0; +} + +/* + * This function clues the initialization function for one IOMMU + * together and also allocates the command buffer and programs the + * hardware. It does NOT enable the IOMMU. This is done afterwards. + */ +static int __init init_iommu_one(struct loongarch_iommu *iommu, + struct ivhd_header *h) +{ + int ret; + struct iommu_rlookup_entry *rlookupentry = NULL; + + spin_lock_init(&iommu->domain_bitmap_lock); + spin_lock_init(&iommu->dom_info_lock); + + /* Add IOMMU to internal data structures */ + INIT_LIST_HEAD(&iommu->dom_list); + + list_add_tail(&iommu->list, &la_iommu_list); + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->devid = h->devid; + iommu->segment = h->pci_seg; + ret = init_iommu_from_acpi(iommu, h); + if (ret) { + pr_err("%s init iommu from acpi failed\n", __func__); + return ret; + } + rlookupentry = lookup_rlooptable(iommu->segment); + if (rlookupentry != NULL) { + /* + * Make sure IOMMU is not considered to translate itself. + * The IVRS table tells us so, but this is a lie! + */ + rlookupentry->rlookup_table[iommu->devid] = NULL; + } + return 0; +} + +/* + * Iterates over all IOMMU entries in the ACPI table, allocates the + * IOMMU structure and initializes it with init_iommu_one() + */ +static int __init init_iommu_all(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + struct loongarch_iommu *iommu; + int ret; + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + h = (struct ivhd_header *)p; + + if (h->length == 0) + break; + + if (*p == la_iommu_target_ivhd_type) { + + pr_info("device: %02x:%02x.%01x seg: %d\n", + PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid), + PCI_FUNC(h->devid), h->pci_seg); + + iommu = kzalloc(sizeof(struct loongarch_iommu), + GFP_KERNEL); + if (iommu == NULL) + return -ENOMEM; + + ret = init_iommu_one(iommu, h); + if (ret) { + kfree(iommu); + pr_info("%s init iommu failed\n", __func__); + return ret; + } + } + p += h->length; + } + if (p != end) + return -EINVAL; + return 0; +} + +/** + * get_highest_supported_ivhd_type - Look up the appropriate IVHD type + * @ivrs Pointer to the IVRS header + * + * This function search through all IVDB of the maximum supported IVHD + */ +static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) +{ + u8 *base = (u8 *)ivrs; + struct ivhd_header *ivhd = (struct ivhd_header *) + (base + IVRS_HEADER_LENGTH); + u8 last_type = ivhd->type; + u16 devid = ivhd->devid; + + while (((u8 *)ivhd - base < ivrs->length) && + (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED) && + (ivhd->length > 0)) { + u8 *p = (u8 *) ivhd; + + if (ivhd->devid == devid) + last_type = ivhd->type; + ivhd = (struct ivhd_header *)(p + ivhd->length); + } + return last_type; +} + +static inline unsigned long tbl_size(int entry_size) +{ + unsigned int shift = PAGE_SHIFT + + get_order(((int)la_iommu_last_bdf + 1) * entry_size); + + return 1UL << shift; +} + +static int __init loongarch_iommu_ivrs_init(void) +{ + struct acpi_table_header *ivrs_base; + acpi_status status; + int ret = 0; + + status = acpi_get_table("IVRS", 0, &ivrs_base); + if (status == AE_NOT_FOUND) { + pr_info("%s get ivrs table failed\n", __func__); + return -ENODEV; + } + + /* + * Validate checksum here so we don't need to do it when + * we actually parse the table + */ + ret = check_ivrs_checksum(ivrs_base); + if (ret) + goto out; + + la_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); + pr_info("Using IVHD type %#x\n", la_iommu_target_ivhd_type); + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func + * we need to handle. Upon this information the shared data + * structures for the IOMMUs in the system will be allocated + */ + ret = find_last_devid_acpi(ivrs_base); + if (ret) { + pr_err("%s find last devid failed\n", __func__); + goto out; + } + + rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); + + /* + * now the data structures are allocated and basically initialized + * start the real acpi table scan + */ + ret = init_iommu_all(ivrs_base); +out: + /* Don't leak any ACPI memory */ + acpi_put_table(ivrs_base); + ivrs_base = NULL; + return ret; +} + +static void free_iommu_rlookup_entry(void) +{ + struct loongarch_iommu *iommu = NULL; + struct iommu_rlookup_entry *rlookupentry = NULL; + + while (!list_empty(&la_iommu_list)) { + iommu = list_first_entry(&la_iommu_list, struct loongarch_iommu, list); + list_del(&iommu->list); + kfree(iommu); + } + + while (!list_empty(&la_rlookup_iommu_list)) { + rlookupentry = list_first_entry(&la_rlookup_iommu_list, + struct iommu_rlookup_entry, list); + + list_del(&rlookupentry->list); + if (rlookupentry->rlookup_table != NULL) { + free_pages( + (unsigned long)rlookupentry->rlookup_table, + get_order(rlookup_table_size)); + + rlookupentry->rlookup_table = NULL; + } + kfree(rlookupentry); + } +} + +static int __init la_iommu_setup(char *str) +{ + if (!str) + return -EINVAL; + while (*str) { + if (!strncmp(str, "on", 2)) { + loongarch_iommu_disable = 0; + pr_info("IOMMU enabled\n"); + } else if (!strncmp(str, "off", 3)) { + loongarch_iommu_disable = 1; + pr_info("IOMMU disabled\n"); + } + str += strcspn(str, ","); + while (*str == ',') + str++; + } + return 0; +} +__setup("loongarch_iommu=", la_iommu_setup); + +static const struct pci_device_id loongson_iommu_pci_tbl[] = { + { PCI_DEVICE(0x14, 0x3c0f) }, + { PCI_DEVICE(0x14, 0x7a1f) }, + { 0, } +}; + +static struct pci_driver loongarch_iommu_driver = { + .name = "loongarch-iommu", + .id_table = loongson_iommu_pci_tbl, + .probe = loongarch_iommu_probe, + .remove = loongarch_iommu_remove, +}; + +static int __init loongarch_iommu_driver_init(void) +{ + int ret = 0; + + if (loongarch_iommu_disable == 0) { + ret = loongarch_iommu_ivrs_init(); + if (ret != 0) { + free_iommu_rlookup_entry(); + pr_err("Failed to init iommu by ivrs\n"); + } + + ret = pci_register_driver(&loongarch_iommu_driver); + if (ret != 0) { + pr_err("Failed to register IOMMU driver\n"); + return ret; + } + } + return ret; +} + +static void __exit loongarch_iommu_driver_exit(void) +{ + struct loongarch_iommu *iommu = NULL; + + if (loongarch_iommu_disable == 0) { + list_for_each_entry(iommu, &la_iommu_list, list) { + iommu_device_sysfs_remove(&iommu->iommu_dev); + iommu_device_unregister(&iommu->iommu_dev); + loongarch_iommu_remove(iommu->pdev); + } + free_iommu_rlookup_entry(); + pci_unregister_driver(&loongarch_iommu_driver); + } +} + +module_init(loongarch_iommu_driver_init); +module_exit(loongarch_iommu_driver_exit); diff --git a/drivers/iommu/loongarch_iommu.h b/drivers/iommu/loongarch_iommu.h new file mode 100644 index 0000000000000000000000000000000000000000..cf5640d95900bde0c2c4cd72832f5e1d184e16c6 --- /dev/null +++ b/drivers/iommu/loongarch_iommu.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Loongson IOMMU Driver + * + * Copyright (C) 2020-2021 Loongson Technology Ltd. + * Author: Lv Chen + * Wang Yang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef LOONGARCH_IOMMU_H +#define LOONGARCH_IOMMU_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IOVA_WIDTH 47 + +/* Bit value definition for I/O PTE fields */ +#define IOMMU_PTE_PR (1ULL << 0) /* Present */ +#define IOMMU_PTE_HP (1ULL << 1) /* HugePage */ +#define IOMMU_PTE_IR (1ULL << 2) /* Readable */ +#define IOMMU_PTE_IW (1ULL << 3) /* Writeable */ +#define IOMMU_PTE_RW (IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW) + +#define iommu_pte_present(ptep) ((*ptep != 0)) +#define iommu_pte_huge(ptep) ((*ptep) & IOMMU_PTE_HP) + +#define LA_IOMMU_PGSIZE (SZ_16K | SZ_32M) + +#define IOMMU_PT_LEVEL0 0x00 +#define IOMMU_PT_LEVEL1 0x01 + +/* IOMMU page table */ +#define IOMMU_PAGE_SHIFT PAGE_SHIFT +#define IOMMU_PAGE_SIZE (_AC(1, UL) << IOMMU_PAGE_SHIFT) +#define IOMMU_LEVEL_STRIDE (IOMMU_PAGE_SHIFT - 3) +#define IOMMU_PTRS_PER_LEVEL (IOMMU_PAGE_SIZE >> 3) +#define IOMMU_LEVEL_SHIFT(n) (((n) * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT) +#define IOMMU_LEVEL_SIZE(n) (_AC(1, UL) << (((n) * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT)) +#define IOMMU_LEVEL_MASK(n) (~(IOMMU_LEVEL_SIZE(n) - 1)) +#define IOMMU_LEVEL_MAX DIV_ROUND_UP((IOVA_WIDTH - IOMMU_PAGE_SHIFT), IOMMU_LEVEL_STRIDE) +#define IOMMU_PAGE_MASK (~(IOMMU_PAGE_SIZE - 1)) + +#define IOMMU_HPAGE_SIZE (1UL << IOMMU_LEVEL_SHIFT(IOMMU_PT_LEVEL1)) +#define IOMMU_HPAGE_MASK (~(IOMMU_HPAGE_SIZE - 1)) + +/* wired | index | domain | shift */ +#define LA_IOMMU_WIDS 0x10 +/* valid | busy | tlbar/aw | cmd */ +#define LA_IOMMU_VBTC 0x14 +#define IOMMU_PGTABLE_BUSY (1 << 16) +/* enable |index | valid | domain | bdf */ +#define LA_IOMMU_EIVDB 0x18 +/* enable | valid | cmd */ +#define LA_IOMMU_CMD 0x1C +#define LA_IOMMU_PGD0_LO 0x20 +#define LA_IOMMU_PGD0_HI 0x24 +#define STEP_PGD 0x8 +#define STEP_PGD_SHIFT 3 +#define LA_IOMMU_PGD_LO(domain_id) \ + (LA_IOMMU_PGD0_LO + ((domain_id) << STEP_PGD_SHIFT)) +#define LA_IOMMU_PGD_HI(domain_id) \ + (LA_IOMMU_PGD0_HI + ((domain_id) << STEP_PGD_SHIFT)) + +#define LA_IOMMU_DIR_CTRL0 0xA0 +#define LA_IOMMU_DIR_CTRL1 0xA4 +#define LA_IOMMU_DIR_CTRL(x) (LA_IOMMU_DIR_CTRL0 + ((x) << 2)) + +#define LA_IOMMU_SAFE_BASE_HI 0xE0 +#define LA_IOMMU_SAFE_BASE_LO 0xE4 +#define LA_IOMMU_EX_ADDR_LO 0xE8 +#define LA_IOMMU_EX_ADDR_HI 0xEC + +#define LA_IOMMU_PFM_CNT_EN 0x100 + +#define LA_IOMMU_RD_HIT_CNT_0 0x110 +#define LA_IOMMU_RD_MISS_CNT_O 0x114 +#define LA_IOMMU_WR_HIT_CNT_0 0x118 +#define LA_IOMMU_WR_MISS_CNT_0 0x11C +#define LA_IOMMU_RD_HIT_CNT_1 0x120 +#define LA_IOMMU_RD_MISS_CNT_1 0x124 +#define LA_IOMMU_WR_HIT_CNT_1 0x128 +#define LA_IOMMU_WR_MISS_CNT_1 0x12C +#define LA_IOMMU_RD_HIT_CNT_2 0x130 +#define LA_IOMMU_RD_MISS_CNT_2 0x134 +#define LA_IOMMU_WR_HIT_CNT_2 0x138 +#define LA_IOMMU_WR_MISS_CNT_2 0x13C + +#define MAX_DOMAIN_ID 16 +#define MAX_ATTACHED_DEV_ID 16 + +#define iommu_ptable_end(addr, end, level) \ +({ unsigned long __boundary = ((addr) + IOMMU_LEVEL_SIZE(level)) & \ + IOMMU_LEVEL_MASK(level); \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + +/* To find an entry in an iommu page table directory */ +#define iommu_page_index(addr, level) \ + (((addr) >> ((level * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT)) \ + & (IOMMU_PTRS_PER_LEVEL - 1)) + +struct loongarch_iommu { + struct list_head list; /* for la_iommu_list */ + spinlock_t domain_bitmap_lock; /* Lock for domain allocing */ + spinlock_t dom_info_lock; /* Lock for dom_list */ + void *domain_bitmap; /* Bitmap of global domains */ + void *devtable_bitmap; /* Bitmap of devtable */ + struct list_head dom_list; /* List of all domain privates */ + /* PCI device id of the IOMMU device */ + u16 devid; + int segment; /* PCI segment# */ + /* iommu configures the register space base address */ + void *confbase; + /* iommu configures the register space physical base address */ + resource_size_t confbase_phy; + /* iommu configures the register space size */ + resource_size_t conf_size; + struct pci_dev *pdev; + /* Handle for IOMMU core code */ + struct iommu_device iommu_dev; +} loongarch_iommu; + +struct iommu_rlookup_entry { + struct list_head list; + struct loongarch_iommu **rlookup_table; + int pcisegment; +}; + +struct iommu_info { + struct list_head list; /* for dom_info->iommu_devlist */ + struct loongarch_iommu *iommu; + spinlock_t devlock; /* priv dev list lock */ + struct list_head dev_list; /* List of all devices in this domain iommu */ + unsigned int dev_cnt; /* devices assigned to this domain iommu */ + short id; +} iommu_info; + +/* One vm is equal to a domain,one domain has a priv */ +struct dom_info { + struct list_head iommu_devlist; + struct iommu_domain domain; + struct mutex ptl_lock; /* Lock for page table */ + void *pgd; + spinlock_t lock; /* Lock for dom_info->iommu_devlist */ +} dom_info; + +struct dom_entry { + struct list_head list; /* for loongarch_iommu->dom_list */ + struct dom_info *domain_info; +} dom_entry; + +/* A device for passthrough */ +struct la_iommu_dev_data { + struct list_head list; /* for iommu_entry->dev_list */ + struct loongarch_iommu *iommu; + struct iommu_info *iommu_entry; + struct iommu_domain *domain; + struct device *dev; + unsigned short bdf; + int count; + int index; /* index in device table */ +}; + +static inline unsigned long *iommu_pte_offset(unsigned long *ptep, unsigned long addr, int level) +{ + return ptep + iommu_page_index(addr, level); +} +#endif /* LOONGARCH_IOMMU_H */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d80ab4e084d8bdb545ee6a17d2d08bb371f77b16..356791637dabf3450a9bc0c5f6c08c4b1ad3b741 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4457,6 +4457,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000, quirk_bridge_cavm_thrx2_pcie_root); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084, quirk_bridge_cavm_thrx2_pcie_root); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_LOONGSON, 0x3c09, + quirk_bridge_cavm_thrx2_pcie_root); /* * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero) @@ -5177,6 +5179,8 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, + { PCI_VENDOR_ID_LOONGSON, 0x3c09, pci_quirk_xgene_acs}, + { PCI_VENDOR_ID_LOONGSON, 0x3c19, pci_quirk_xgene_acs}, { 0 } }; diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 6bda6dbb48784b7047c467388575f9e35c8e2c0b..f7103ae7120235ceaf57317233cff1cea8214c6c 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -39,7 +39,7 @@ config VFIO_GROUP config VFIO_CONTAINER bool "Support for the VFIO container /dev/vfio/vfio" - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64 || LOONGARCH) depends on VFIO_GROUP default y help diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a66645eb5103265848f8ba137bdb7330a2293ed2..a077b9e4989c0f812702f897972172025ad4a354 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -216,6 +216,7 @@ enum kvm_bus { KVM_PIO_BUS, KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_FAST_MMIO_BUS, + KVM_IOCSR_BUS, KVM_NR_BUSES }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ae3a863bc8f66658a1f106a0cd697329bcd9bc85..4a8a497325e0185b93773b4b4ccbce167accff90 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1475,7 +1475,15 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LA_IOAPIC = 0x100, +#define KVM_DEV_TYPE_LA_IOAPIC KVM_DEV_TYPE_LA_IOAPIC + KVM_DEV_TYPE_LA_IPI, +#define KVM_DEV_TYPE_LA_IPI KVM_DEV_TYPE_LA_IPI + KVM_DEV_TYPE_LA_EXTIOI, +#define KVM_DEV_TYPE_LA_EXTIOI KVM_DEV_TYPE_LA_EXTIOI + KVM_DEV_TYPE_MAX, + }; struct kvm_vfio_spapr_tce { diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile index c392e7af474332e2c6ebd5a2e108207fc08e4786..c8be64c5cdb445514a8be026efec93da3b41b295 100644 --- a/tools/perf/arch/loongarch/Makefile +++ b/tools/perf/arch/loongarch/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 +HAVE_KVM_STAT_SUPPORT := 1 # # Syscall table generation for perf diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build index d776125a2d06832b5841c798bf39460357737ce9..b12d374d70964929ef39c211dfd5edd9cd51f01e 100644 --- a/tools/perf/arch/loongarch/util/Build +++ b/tools/perf/arch/loongarch/util/Build @@ -1,5 +1,7 @@ +perf-y += header.o perf-y += perf_regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o diff --git a/tools/perf/arch/loongarch/util/header.c b/tools/perf/arch/loongarch/util/header.c new file mode 100644 index 0000000000000000000000000000000000000000..d962dff55512b39c9d6adfb4f5ad76f9857df404 --- /dev/null +++ b/tools/perf/arch/loongarch/util/header.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of get_cpuid(). + * + * Author: Nikita Shubin + * Bibo Mao + * Huacai Chen + */ + +#include +#include +#include +#include +#include "util/debug.h" +#include "util/header.h" + +/* + * Output example from /proc/cpuinfo + * CPU Family : Loongson-64bit + * Model Name : Loongson-3C5000 + * CPU Revision : 0x10 + * FPU Revision : 0x01 + */ +#define CPUINFO_MODEL "Model Name" +#define CPUINFO "/proc/cpuinfo" + +static char *_get_field(const char *line) +{ + char *line2, *nl; + + line2 = strrchr(line, ' '); + if (!line2) + return NULL; + + line2++; + nl = strrchr(line, '\n'); + if (!nl) + return NULL; + + return strndup(line2, nl - line2); +} + +static char *_get_cpuid(void) +{ + unsigned long line_sz; + char *line, *model, *cpuid; + FILE *file; + + file = fopen(CPUINFO, "r"); + if (file == NULL) + return NULL; + + line = model = cpuid = NULL; + while (getline(&line, &line_sz, file) != -1) { + if (strncmp(line, CPUINFO_MODEL, strlen(CPUINFO_MODEL))) + continue; + + model = _get_field(line); + if (!model) + goto out_free; + break; + } + + if (model && (asprintf(&cpuid, "%s", model) < 0)) + cpuid = NULL; + +out_free: + fclose(file); + free(model); + return cpuid; +} + +int get_cpuid(char *buffer, size_t sz) +{ + int ret = 0; + char *cpuid = _get_cpuid(); + + if (!cpuid) + return EINVAL; + + if (sz < strlen(cpuid)) { + ret = ENOBUFS; + goto out_free; + } + + scnprintf(buffer, sz, "%s", cpuid); + +out_free: + free(cpuid); + return ret; +} + +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + return _get_cpuid(); +} diff --git a/tools/perf/arch/loongarch/util/kvm-stat.c b/tools/perf/arch/loongarch/util/kvm-stat.c new file mode 100644 index 0000000000000000000000000000000000000000..a7859a3a9a51b37e9581c3ae3a8a9b32a77bb8e8 --- /dev/null +++ b/tools/perf/arch/loongarch/util/kvm-stat.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "util/kvm-stat.h" +#include "util/parse-events.h" +#include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/pmus.h" + +#define LOONGARCH_EXCEPTION_INT 0 +#define LOONGARCH_EXCEPTION_PIL 1 +#define LOONGARCH_EXCEPTION_PIS 2 +#define LOONGARCH_EXCEPTION_PIF 3 +#define LOONGARCH_EXCEPTION_PME 4 +#define LOONGARCH_EXCEPTION_FPD 15 +#define LOONGARCH_EXCEPTION_SXD 16 +#define LOONGARCH_EXCEPTION_ASXD 17 +#define LOONGARCH_EXCEPTION_GSPR 22 +#define LOONGARCH_EXCEPTION_CPUCFG 100 +#define LOONGARCH_EXCEPTION_CSR 101 +#define LOONGARCH_EXCEPTION_IOCSR 102 +#define LOONGARCH_EXCEPTION_IDLE 103 +#define LOONGARCH_EXCEPTION_OTHERS 104 +#define LOONGARCH_EXCEPTION_HVC 23 + +#define loongarch_exception_type \ + {LOONGARCH_EXCEPTION_INT, "Interrupt" }, \ + {LOONGARCH_EXCEPTION_PIL, "Mem Read" }, \ + {LOONGARCH_EXCEPTION_PIS, "Mem Store" }, \ + {LOONGARCH_EXCEPTION_PIF, "Inst Fetch" }, \ + {LOONGARCH_EXCEPTION_PME, "Mem Modify" }, \ + {LOONGARCH_EXCEPTION_FPD, "FPU" }, \ + {LOONGARCH_EXCEPTION_SXD, "LSX" }, \ + {LOONGARCH_EXCEPTION_ASXD, "LASX" }, \ + {LOONGARCH_EXCEPTION_GSPR, "Privilege Error" }, \ + {LOONGARCH_EXCEPTION_HVC, "Hypercall" }, \ + {LOONGARCH_EXCEPTION_CPUCFG, "CPUCFG" }, \ + {LOONGARCH_EXCEPTION_CSR, "CSR" }, \ + {LOONGARCH_EXCEPTION_IOCSR, "IOCSR" }, \ + {LOONGARCH_EXCEPTION_IDLE, "Idle" }, \ + {LOONGARCH_EXCEPTION_OTHERS, "Others" } + +define_exit_reasons_table(loongarch_exit_reasons, loongarch_exception_type); + +const char *vcpu_id_str = "vcpu_id"; +const char *kvm_exit_reason = "reason"; +const char *kvm_entry_trace = "kvm:kvm_enter"; +const char *kvm_reenter_trace = "kvm:kvm_reenter"; +const char *kvm_exit_trace = "kvm:kvm_exit"; +const char *kvm_events_tp[] = { + "kvm:kvm_enter", + "kvm:kvm_reenter", + "kvm:kvm_exit", + "kvm:kvm_exit_gspr", + NULL, +}; + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + return exit_event_begin(evsel, sample, key); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + /* + * LoongArch kvm is different with other architectures + * + * There is kvm:kvm_reenter or kvm:kvm_enter event adjacent with + * kvm:kvm_exit event. + * kvm:kvm_enter means returning to vmm and then to guest + * kvm:kvm_reenter means returning to guest immediately + */ + return evsel__name_is(evsel, kvm_entry_trace) || evsel__name_is(evsel, kvm_reenter_trace); +} + +static void event_gspr_get_key(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + unsigned int insn; + + key->key = LOONGARCH_EXCEPTION_OTHERS; + insn = evsel__intval(evsel, sample, "inst_word"); + + switch (insn >> 24) { + case 0: + /* CPUCFG inst trap */ + if ((insn >> 10) == 0x1b) + key->key = LOONGARCH_EXCEPTION_CPUCFG; + break; + case 4: + /* CSR inst trap */ + key->key = LOONGARCH_EXCEPTION_CSR; + break; + case 6: + /* IOCSR inst trap */ + if ((insn >> 15) == 0xc90) + key->key = LOONGARCH_EXCEPTION_IOCSR; + else if ((insn >> 15) == 0xc91) + /* Idle inst trap */ + key->key = LOONGARCH_EXCEPTION_IDLE; + break; + default: + key->key = LOONGARCH_EXCEPTION_OTHERS; + break; + } +} + +static struct child_event_ops child_events[] = { + { .name = "kvm:kvm_exit_gspr", .get_key = event_gspr_get_key }, + { NULL, NULL }, +}; + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .child_ops = child_events, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events, }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "loongarch64"; + kvm->exit_reasons = loongarch_exit_reasons; + return 0; +} diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index c1cd7dfe4a9088fc046c73e3e24dca4293ff99f5..27e50190d419be6c7c657a3dd41914f150891027 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -55,6 +55,9 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) struct kvm_memory_slot *memslot; int as_id, id; + if (!mask) + return; + as_id = slot >> 16; id = (u16)slot;